Belle II Software release-09-00-01
ZMQEventProcessor.cc
1/**************************************************************************
2 * basf2 (Belle II Analysis Software Framework) *
3 * Author: The Belle II Collaboration *
4 * *
5 * See git log for contributors and copyright holders. *
6 * This file is licensed under LGPL-3.0, see LICENSE.md. *
7 **************************************************************************/
8
9#include <framework/pcore/ProcHelper.h>
10#include <framework/pcore/GlobalProcHandler.h>
11#include <framework/pcore/zmq/messages/ZMQDefinitions.h>
12#include <framework/pcore/zmq/utils/ZMQAddressUtils.h>
13#include <framework/pcore/zmq/messages/ZMQMessageFactory.h>
14#include <framework/pcore/PathUtils.h>
15
16#include <framework/pcore/ZMQEventProcessor.h>
17#include <framework/pcore/DataStoreStreamer.h>
18#include <framework/pcore/RbTuple.h>
19
20#include <framework/core/Environment.h>
21#include <framework/logging/LogSystem.h>
22
23#include <framework/database/DBStore.h>
24#include <framework/database/Database.h>
25#include <framework/core/RandomNumbers.h>
26#include <framework/core/MetadataService.h>
27#include <framework/gearbox/Unit.h>
28#include <framework/utilities/Utils.h>
29
30#include <TROOT.h>
31
32#include <sys/stat.h>
33
34#include <csignal>
35#include <fstream>
36
37using namespace std;
38using namespace Belle2;
39
40namespace {
48 static int g_signalReceived = 0;
49
51 static ZMQEventProcessor* g_eventProcessorForSignalHandling = nullptr;
52
53 static void cleanupAndRaiseSignal(int signalNumber)
54 {
55 if (g_eventProcessorForSignalHandling) {
56 g_eventProcessorForSignalHandling->cleanup();
57 }
58 // uninstall current handler and call default one.
59 signal(signalNumber, SIG_DFL);
60 raise(signalNumber);
61 }
62
63 static void storeSignal(int signalNumber)
64 {
65 if (signalNumber == SIGINT) {
66 EventProcessor::writeToStdErr("\nStopping basf2 gracefully...\n");
67 }
68
69 // We do not want to remove the first signal
70 if (g_signalReceived == 0) {
71 g_signalReceived = signalNumber;
72 }
73 }
74
76 std::string g_socketAddress = "";
77
78 void deleteSocketFiles()
79 {
80 if (not GlobalProcHandler::isProcess(ProcType::c_Monitor) and not GlobalProcHandler::isProcess(ProcType::c_Init)) {
81 return;
82 }
83
84 const std::vector<ZMQAddressType> socketAddressList = {ZMQAddressType::c_input, ZMQAddressType::c_output, ZMQAddressType::c_pub, ZMQAddressType::c_sub, ZMQAddressType::c_control};
85 const auto seperatorPos = g_socketAddress.find("://");
86
87 if (seperatorPos == std::string::npos or seperatorPos + 3 >= g_socketAddress.size()) {
88 return;
89 }
90
91 const std::string filename(g_socketAddress.substr(seperatorPos + 3));
92
93 struct stat buffer;
94 for (const auto socketAdressType : socketAddressList) {
95 const std::string socketAddress(ZMQAddressUtils::getSocketAddress(filename, socketAdressType));
96 if (stat(socketAddress.c_str(), &buffer) == 0) {
97 remove(socketAddress.c_str());
98 }
99 }
100 }
101} // namespace
102
104{
105 B2ASSERT("You are having two instances of the ZMQEventProcessor running! This is not possible",
106 not g_eventProcessorForSignalHandling);
107 g_eventProcessorForSignalHandling = this;
108
109 // Make sure to remove the sockets
110 g_socketAddress = Environment::Instance().getZMQSocketAddress();
111 std::atexit(deleteSocketFiles);
112}
113
115{
116 cleanup();
117 g_eventProcessorForSignalHandling = nullptr;
118}
119
120void ZMQEventProcessor::process(const PathPtr& path, long maxEvent)
121{
122 // Concerning signal handling:
123 // * During the initialization, we just raise the signal without doing any cleanup etc.
124 // * During the event execution, we will not allow for any signal in all processes except the parent process.
125 // Here, we catch sigint and clean up the processes AND WHAT DO WE DO IN THE OTHER CASES?
126 // * During cleanup, we will just ignore sigint, but the rest will be raised
127
128 if (path->isEmpty()) {
129 return;
130 }
131
132 const int numProcesses = Environment::Instance().getNumberProcesses();
133 if (numProcesses == 0) {
134 B2FATAL("ZMQEventProcessor::process() called for serial processing! Most likely a bug in Framework.");
135 }
136
137 // Split the path into input, main and output. A nullptr means, the path should not be used
138 PathPtr inputPath, mainPath, outputPath;
139 std::tie(inputPath, mainPath, outputPath) = PathUtils::splitPath(path);
140 const ModulePtr& histogramManager = PathUtils::getHistogramManager(inputPath);
141
142 // Check for existence of HLTZMQ2Ds module in input path to set DAQ environment
143 for (const ModulePtr& module : inputPath->getModules()) {
144 if (module->getName() == "HLTZMQ2Ds") {
146 B2INFO("ZMQEventProcessor : DAQ environment set");
147 break;
148 }
149 }
150
151 if (not mainPath or mainPath->isEmpty()) {
152 B2WARNING("Cannot run any modules in parallel (no c_ParallelProcessingCertified flag), falling back to single-core mode.");
153 EventProcessor::process(path, maxEvent);
154 return;
155 }
156
157 // inserts Rx/Tx modules into path (sets up IPC structures)
158 const ModulePtrList& moduleList = PathUtils::preparePaths(inputPath, mainPath, outputPath);
159
160 // Run the initialization of the modules and the histogram manager
161 initialize(moduleList, histogramManager);
162
163 // The main part: fork into the different processes and run!
164 const ModulePtrList& terminateGlobally = PathUtils::getTerminateGloballyModules(moduleList);
165 forkAndRun(maxEvent, inputPath, mainPath, outputPath, terminateGlobally);
166
167 installMainSignalHandlers(cleanupAndRaiseSignal);
168 // Run the final termination and cleanup with error check
169 terminateAndCleanup(histogramManager);
170}
171
172void ZMQEventProcessor::initialize(const ModulePtrList& moduleList, const ModulePtr& histogramManager)
173{
174 if (histogramManager) {
175 histogramManager->initialize();
176 }
177 // from now on the datastore is available
178 processInitialize(moduleList, true);
179
180 B2INFO("ZMQEventProcessor : processInitialize done");
181
182 // Don't start processing in case of no master module
183 if (!m_master) {
184 B2ERROR("There is no module that provides event and run numbers. You must either add the EventInfoSetter module to your path, or, if using an input module, read EventMetaData objects from file.");
185 }
186
187 // Check if errors appeared. If yes, don't start the event processing.
189 if (numLogError != 0) {
190 B2FATAL(numLogError << " ERROR(S) occurred! The processing of events will not be started.");
191 }
192
193 // TODO: I do not really understand what is going on here...
199 // disable ROOT's management of TFiles
200 // clear list, but don't actually delete the objects
201 gROOT->GetListOfFiles()->Clear("nodelete");
202}
203
205{
206 cleanup();
207
208 if (histogramManager) {
209 B2INFO("HistoManager:: adding histogram files");
211 }
212
213 // did anything bad happen?
214 if (g_signalReceived) {
215 if (g_signalReceived == SIGINT) {
216 B2RESULT("Processing aborted via signal " << g_signalReceived <<
217 ", terminating. Output files have been closed safely and should be readable.");
218 } else {
219 B2ERROR("Processing aborted via signal " << g_signalReceived <<
220 ", terminating. Output files have been closed safely and should be readable.");
221 }
222 // re-raise the signal
223 installSignalHandler(g_signalReceived, SIG_DFL);
224 raise(g_signalReceived);
225 }
226}
227
228void ZMQEventProcessor::runInput(const PathPtr& inputPath, const ModulePtrList& terminateGlobally, long maxEvent)
229{
230 if (not inputPath or inputPath->isEmpty()) {
231 return;
232 }
233
235 // This is not the input process, clean up datastore to not contain the first event
237 return;
238 }
239
240 // The default will be to not do anything on signals...
242
245
246 processPath(inputPath, terminateGlobally, maxEvent);
247 B2DEBUG(30, "Finished an input process");
248 exit(0);
249}
250
251void ZMQEventProcessor::runOutput(const PathPtr& outputPath, const ModulePtrList& terminateGlobally, long maxEvent)
252{
253 const auto& socketAddress = Environment::Instance().getZMQSocketAddress();
254 const auto pubSocketAddress(ZMQAddressUtils::getSocketAddress(socketAddress, ZMQAddressType::c_pub));
255 const auto subSocketAddress(ZMQAddressUtils::getSocketAddress(socketAddress, ZMQAddressType::c_sub));
256
257 if (not outputPath or outputPath->isEmpty()) {
258 return;
259 }
260
262 return;
263 }
264
265 // The default will be to not do anything on signals...
267
269
270 // Set the rx module as main module
271 m_master = outputPath->getModules().begin()->get();
272
273 processPath(outputPath, terminateGlobally, maxEvent);
274
275 // Send the statistics to the process monitor
276 StreamHelper streamer;
277 ZMQClient zmqClient;
278
279 // TODO: true?
280 streamer.initialize(0, true);
281 zmqClient.initialize(pubSocketAddress, subSocketAddress);
282
283 // TODO: make sure to only send statistics!
284 const auto& evtMessage = streamer.stream();
285 auto message = ZMQMessageFactory::createMessage(EMessageTypes::c_statisticMessage, evtMessage);
286 zmqClient.publish(std::move(message));
287
288 B2DEBUG(30, "Finished an output process");
289 exit(0);
290}
291void ZMQEventProcessor::runWorker(unsigned int numProcesses, const PathPtr& inputPath, const PathPtr& mainPath,
292 const ModulePtrList& terminateGlobally, long maxEvent)
293{
294 if (numProcesses == 0) {
295 return;
296 }
297
298 if (not GlobalProcHandler::startWorkerProcesses(numProcesses)) {
299 // Make sure the worker process is running until we go on
300 m_processMonitor.waitForRunningWorker(Environment::Instance().getZMQMaximalWaitingTime());
301 return;
302 }
303
304 // The default will be to not do anything on signals...
306
307 if (inputPath and not inputPath->isEmpty()) {
308 // set Rx as master
309 m_master = mainPath->getModules().begin()->get();
310 }
311
314
315 processPath(mainPath, terminateGlobally, maxEvent);
316 B2DEBUG(30, "Finished a worker process");
317 exit(0);
318}
319
320void ZMQEventProcessor::processPath(const PathPtr& localPath, const ModulePtrList& terminateGlobally, long maxEvent)
321{
322 ModulePtrList localModules = localPath->buildModulePathList();
323 maxEvent = getMaximumEventNumber(maxEvent);
324 // we are not using the default signal handler, so the processCore can not throw any exception because if sigint...
325 processCore(localPath, localModules, maxEvent, GlobalProcHandler::isProcess(ProcType::c_Input),
328
329 B2DEBUG(30, "terminate process...");
330 PathUtils::prependModulesIfNotPresent(&localModules, terminateGlobally);
331 processTerminate(localModules);
332}
333
334
335void ZMQEventProcessor::runMonitoring(const PathPtr& inputPath, const PathPtr& mainPath, const ModulePtrList& terminateGlobally,
336 long maxEvent)
337{
339 return;
340 }
341
342 const auto& environment = Environment::Instance();
343
344 B2DEBUG(30, "Will now start process monitor...");
345 const int numProcesses = environment.getNumberProcesses();
346 m_processMonitor.initialize(numProcesses);
347
348 // Make sure the input process is running until we go on
351 return;
352 }
353 // Make sure the output process is running until we go on
356 return;
357 }
358
359 installMainSignalHandlers(storeSignal);
360
361 // at least start the number of workers requested
362 runWorker(m_processMonitor.needMoreWorkers(), inputPath, mainPath, terminateGlobally, maxEvent);
363
364 const auto& restartFailedWorkers = environment.getZMQRestartFailedWorkers();
365 const auto& failOnFailedWorkers = environment.getZMQFailOnFailedWorkers();
366
367 B2DEBUG(30, "Will now start main loop...");
368 while (true) {
369 // check multicast for messages and kill workers if requested
371 // check the child processes, if one has died
373 // check if we have received any signal from the user or OS. Kill the processes if not SIGINT.
374 m_processMonitor.checkSignals(g_signalReceived);
375
376 // If we have received a SIGINT signal or the last process is gone, we can end smoothly
378 break;
379 }
380
381 // Test if we need more workers
382 const unsigned int neededWorkers = m_processMonitor.needMoreWorkers();
383 if (neededWorkers > 0) {
384 B2DEBUG(30, "restartFailedWorkers = " << restartFailedWorkers);
385 if (restartFailedWorkers) {
386 B2DEBUG(30, ".... Restarting a new worker");
387 B2ERROR(".... Restarting a new worker process");
388 runWorker(neededWorkers, inputPath, mainPath, terminateGlobally, maxEvent);
389 } else if (failOnFailedWorkers) {
390 B2ERROR("A worker failed. Will try to end the process smoothly now.");
391 break;
392 } else if (not m_processMonitor.hasWorkers()) {
393 B2WARNING("All workers have died and you did not request to restart them. Going down now.");
394 break;
395 }
396 }
397 }
398
399 B2DEBUG(30, "Finished the monitoring process");
400}
401
402void ZMQEventProcessor::forkAndRun(long maxEvent, const PathPtr& inputPath, const PathPtr& mainPath, const PathPtr& outputPath,
403 const ModulePtrList& terminateGlobally)
404{
405 const int numProcesses = Environment::Instance().getNumberProcesses();
406 GlobalProcHandler::initialize(numProcesses);
407
408 const auto& socketAddress = Environment::Instance().getZMQSocketAddress();
409
410 const auto pubSocketAddress(ZMQAddressUtils::getSocketAddress(socketAddress, ZMQAddressType::c_pub));
411 const auto subSocketAddress(ZMQAddressUtils::getSocketAddress(socketAddress, ZMQAddressType::c_sub));
412 const auto controlSocketAddress(ZMQAddressUtils::getSocketAddress(socketAddress, ZMQAddressType::c_control));
413
414 // We catch all signals and store them into a variable. This is used during the main loop then.
415 // From now on, we have to make sure to clean up behind us
416 installMainSignalHandlers(cleanupAndRaiseSignal);
417 m_processMonitor.subscribe(pubSocketAddress, subSocketAddress, controlSocketAddress);
418
419 runInput(inputPath, terminateGlobally, maxEvent);
420 runOutput(outputPath, terminateGlobally, maxEvent);
421 runMonitoring(inputPath, mainPath, terminateGlobally, maxEvent);
422}
423
425{
427 B2DEBUG(30, "Not running cleanup, as I am in process type " << GlobalProcHandler::getProcessName());
428 return;
429 }
432
433 deleteSocketFiles();
434}
435
436void ZMQEventProcessor::processCore(const PathPtr& startPath, const ModulePtrList& modulePathList, long maxEvent,
437 bool isInputProcess, bool isWorkerProcess, bool isOutputProcess)
438{
440 m_moduleList = modulePathList;
441
442 //Remember the previous event meta data, and identify end of data meta data
443 m_previousEventMetaData.setEndOfData(); //invalid start state
444
445 const bool collectStats = !Environment::Instance().getNoStats();
446
447 //Loop over the events
448 long currEvent = 0;
449 bool endProcess = false;
450 while (!endProcess) {
451 if (collectStats)
452 m_processStatisticsPtr->startGlobal();
453
454 PathIterator moduleIter(startPath);
455
456 if (isInputProcess) {
457 endProcess = ZMQEventProcessor::processEvent(moduleIter, isInputProcess && currEvent == 0);
458 } else if (isWorkerProcess) {
459 endProcess = ZMQEventProcessor::processEvent(moduleIter, false,
460 isWorkerProcess && currEvent == 0 && Environment::Instance().getZMQDAQEnvironment());
461 } else if (isOutputProcess) {
462 endProcess = ZMQEventProcessor::processEvent(moduleIter, false, false,
463 isOutputProcess && currEvent == 0 && Environment::Instance().getZMQDAQEnvironment());
464 } else {
465 B2INFO("processCore : should not come here. Specified path is invalid");
466 return;
467 }
468
469 //Delete event related data in DataStore
471
472 currEvent++;
473 if ((maxEvent > 0) && (currEvent >= maxEvent)) endProcess = true;
474 if (collectStats)
476 } //end event loop
477
478 //End last run
479 m_eventMetaDataPtr.create();
480 B2INFO("processCore : End Last Run. calling processEndRun()");
482}
483
484
485bool ZMQEventProcessor::processEvent(PathIterator moduleIter, bool skipMasterModule, bool WorkerPath, bool OutputPath)
486{
487 double time = Utils::getClock() / Unit::s;
489 MetadataService::Instance().addBasf2Status("running event loop");
491 }
492
493 const bool collectStats = !Environment::Instance().getNoStats();
494
495 while (!moduleIter.isDone()) {
496 Module* module = moduleIter.get();
497
498 // run the module ... unless we don't want to
499 if (module != m_master) {
500 callEvent(module);
501 } else if (!skipMasterModule) {
502 callEvent(module);
503 } else
504 B2INFO("Skipping execution of module " << module->getName());
505
506 if (!m_eventMetaDataPtr) {
507 return false;
508 }
509
510 //Check for end of data
511 if (m_eventMetaDataPtr->isEndOfData()) {
512 // Immediately leave the loop and terminate (true)
513 B2INFO("isEndOfData. Return");
514 return true;
515 }
516
517 //Handle EventMetaData changes by master module
518 if (module == m_master && !skipMasterModule) {
519
520 //initialize random number state for the event
522
523 // Worker Path
524 if (WorkerPath) {
525 B2INFO("Worker Path and First Event!");
526 if (Environment::Instance().isZMQDAQFirstEvent(m_eventMetaDataPtr->getExperiment(), m_eventMetaDataPtr->getRun())) {
527 B2INFO("Worker path processing for ZMQDAQ first event.....Skip to the end of path");
528 B2INFO(" --> exp = " << m_eventMetaDataPtr->getExperiment() << " run = " << m_eventMetaDataPtr->getRun());
529 while (true) {
530 module = moduleIter.get();
531 if (module->getName() == "ZMQTxWorker") break;
532 moduleIter.next();
533 }
534 continue;
535 }
536 }
537
538 // Check for EndOfRun
539 if (!WorkerPath && !OutputPath) {
540 if (m_eventMetaDataPtr->isEndOfRun()) {
541 B2INFO("===> EndOfRun : calling processEndRun(); isEndOfRun = " << m_eventMetaDataPtr->isEndOfRun());
543 // Store the current event meta data for the next round
545 // Leave this event, but not the full processing (false)
546 return false;
549 B2INFO("===> EndOfData : ----> Run change request to the same run!!! Skip this event.");
550 return false;
551 }
552 B2INFO("===> EndOfData : calling processBeginRun(); isEndOfData = " << m_previousEventMetaData.isEndOfData() <<
553 " isEndOfRun = " << m_previousEventMetaData.isEndOfRun());
554 B2INFO("--> cur run = " << m_eventMetaDataPtr->getRun() << " <- prev run = " << m_previousEventMetaData.getRun());
555 B2INFO("--> cur evt = " << m_eventMetaDataPtr->getEvent() << " <- prev evt = " << m_previousEventMetaData.getEvent());
556 // The run number should not be 0
557 if (m_eventMetaDataPtr->getRun() != 0) {
560 } else {
561 return false;
562 }
563 }
564
565 const bool runChanged = ((m_eventMetaDataPtr->getExperiment() != m_previousEventMetaData.getExperiment()) or
567 const bool runChangedWithoutNotice = runChanged and not m_previousEventMetaData.isEndOfData()
569 // if (runChangedWithoutNotice && !g_first_round) {
570 if (runChangedWithoutNotice) {
571 if (collectStats)
572 m_processStatisticsPtr->suspendGlobal();
573
574 B2INFO("===> Run Change (possibly offline) : calling processEndRun() and processBeginRun()");
575 B2INFO("--> cur run = " << m_eventMetaDataPtr->getRun() << " <- prev run = " << m_previousEventMetaData.getRun());
576 B2INFO("--> cur evt = " << m_eventMetaDataPtr->getEvent() << " <- prev evt = " << m_previousEventMetaData.getEvent());
577 B2INFO("--> runChanged = " << runChanged << " runChangedWithoutNotice = " << runChangedWithoutNotice);
578
581
582 if (collectStats)
583 m_processStatisticsPtr->resumeGlobal();
584 }
586 } else
587 B2INFO("Skipping begin/end run processing");
588
589 //make sure we use the event dependent generator again
591
593
594 } else if (!WorkerPath && !OutputPath) {
595 //Check for a second master module. Cannot do this if we skipped the
596 //master module as the EventMetaData is probably set before we call this
597 //function
598 if (!skipMasterModule && m_eventMetaDataPtr &&
600 B2FATAL("Two modules setting EventMetaData were discovered: " << m_master->getName() << " and " << module->getName());
601 }
602 }
603
604 if (g_signalReceived != 0) {
605 throw StoppedBySignalException(g_signalReceived);
606 }
607
608 //Check for the module conditions, evaluate them and if one is true switch to the new path
609 if (module->evalCondition()) {
610 PathPtr condPath = module->getConditionPath();
611 //continue with parent Path after condition path is executed?
612 if (module->getAfterConditionPath() == Module::EAfterConditionPath::c_Continue) {
613 moduleIter = PathIterator(condPath, moduleIter);
614 } else {
615 moduleIter = PathIterator(condPath);
616 }
617 } else {
618 moduleIter.next();
619 }
620 } //end module loop
621 return false;
622}
623
625{
626 MetadataService::Instance().addBasf2Status("beginning run");
627
628 m_inRun = true;
629
630 LogSystem& logSystem = LogSystem::Instance();
631 m_processStatisticsPtr->startGlobal();
632
633 if (!skipDB) DBStore::Instance().update();
634
635 // initialize random generator for end run
637
638 for (const ModulePtr& modPtr : m_moduleList) {
639 Module* module = modPtr.get();
640
641 //Set the module dependent log level
642 logSystem.updateModule(&(module->getLogConfig()), module->getName());
643
644 //Do beginRun() call
645 m_processStatisticsPtr->startModule();
646 module->beginRun();
648
649 //Set the global log level
650 logSystem.updateModule(nullptr);
651 }
652
654}
655
656
658{
660
661 if (!m_inRun)
662 return;
663 m_inRun = false;
664
665 LogSystem& logSystem = LogSystem::Instance();
666 m_processStatisticsPtr->startGlobal();
667
668 const EventMetaData newEventMetaData = *m_eventMetaDataPtr;
669
670 //initialize random generator for end run
672
673 for (const ModulePtr& modPtr : m_moduleList) {
674 Module* module = modPtr.get();
675
676 //Set the module dependent log level
677 logSystem.updateModule(&(module->getLogConfig()), module->getName());
678
679 //Do endRun() call
680 m_processStatisticsPtr->startModule();
681 module->endRun();
683
684 //Set the global log level
685 logSystem.updateModule(nullptr);
686 }
687 *m_eventMetaDataPtr = newEventMetaData;
688
690}
static void removeSideEffects()
call clear() and removeSideEffects() for all Mergeable objects in datastore (for c_Persistent durabil...
@ c_Event
Different object in each event, all objects/arrays are invalidated after event() function has been ca...
Definition: DataStore.h:59
static DataStore & Instance()
Instance of singleton Store.
Definition: DataStore.cc:54
void setInitializeActive(bool active)
Setter for m_initializeActive.
Definition: DataStore.cc:94
void invalidateData(EDurability durability)
Clears all registered StoreEntry objects of a specified durability, invalidating all objects.
Definition: DataStore.cc:715
const std::string & getZMQSocketAddress() const
Socket address to use in ZMQ.
Definition: Environment.h:261
int getNumberProcesses() const
Returns the number of worker processes which should be used for the parallel processing.
Definition: Environment.h:158
bool getNoStats() const
Disable collection of statistics during event processing.
Definition: Environment.h:200
void setZMQDAQEnvironment(bool zmqDAQ)
Set DAQ environment.
Definition: Environment.h:352
static Environment & Instance()
Static method to get a reference to the Environment instance.
Definition: Environment.cc:28
Store event, run, and experiment numbers.
Definition: EventMetaData.h:33
void setEndOfData()
Marks the end of the data processing.
int getRun() const
Run Getter.
unsigned int getEvent() const
Event Getter.
bool isEndOfRun() const
is end-of-run set? (see setEndOfRun()).
int getExperiment() const
Experiment Getter.
bool isEndOfData() const
is end-of-data set? (see setEndOfData()).
Exception thrown when execution is stopped by a signal.
void processInitialize(const ModulePtrList &modulePathList, bool setEventInfo=true)
Initializes the modules.
bool m_inRun
Are we currently in a run? If yes, processEndRun() needs to do something.
double m_lastMetadataUpdate
Time in seconds of last call for metadata update in event loop.
static void installSignalHandler(int sig, void(*fn)(int))
Install a signal handler 'fn' for given signal.
StoreObjPtr< ProcessStatistics > m_processStatisticsPtr
Also used in a number of places.
void process(const PathPtr &startPath, long maxEvent=0)
Processes the full module chain, starting with the first module in the given path.
void callEvent(Module *module)
Calls event() on one single module, setting up logging and statistics as needed.
void processTerminate(const ModulePtrList &modulePathList)
Terminates the modules.
static void writeToStdErr(const char msg[])
async-safe method to write something to STDERR.
const Module * m_master
The master module that determines the experiment/run/event number.
StoreObjPtr< EventMetaData > m_eventMetaDataPtr
EventMetaData is used by processEvent()/processCore().
ModulePtrList m_moduleList
List of all modules in order initialized.
long getMaximumEventNumber(long maxEvent) const
Calculate the maximum event number out of the argument from command line and the environment.
double m_metadataUpdateInterval
Minimal time difference in seconds for metadata updates in event loop.
static void installMainSignalHandlers(void(*fn)(int)=nullptr)
Install signal handler for INT, TERM and QUIT signals.
static bool startOutputProcess(bool local=false)
Fork and initialize an output process.
static bool startInputProcess()
Fork and initialize an input process.
static void initialize(unsigned int nWorkerProc)
Create a new process handler, which will handle nWorkerProc processes.
static bool startMonitoringProcess()
Fork and initialize a monitoring process.
static bool startWorkerProcesses(unsigned int numProcesses)
Fork and initialize as many worker processes as requested.
static bool isProcess(ProcType procType)
Return true if the process is of type procType.
static std::string getProcessName()
Get a human readable name for this process. (input, event, output...).
@ c_Error
Error: for things that went wrong and have to be fixed.
Definition: LogConfig.h:30
Class for logging debug, info and error messages.
Definition: LogSystem.h:46
void updateModule(const LogConfig *moduleLogConfig=nullptr, const std::string &moduleName="")
Sets the log configuration to the given module log configuration and sets the module name This method...
Definition: LogSystem.h:191
int getMessageCounter(LogConfig::ELogLevel logLevel) const
Returns the number of logging calls per log level.
Definition: LogSystem.cc:161
static LogSystem & Instance()
Static method to get a reference to the LogSystem instance.
Definition: LogSystem.cc:31
void addBasf2Status(const std::string &message="")
Add metadata of basf2 status.
static MetadataService & Instance()
Static method to get a reference to the MetadataService instance.
@ c_EndRun
Counting time/calls in endRun()
@ c_BeginRun
Counting time/calls in beginRun()
@ c_Event
Counting time/calls in event()
Base class for Modules.
Definition: Module.h:72
const std::string & getName() const
Returns the name of the module.
Definition: Module.h:187
std::list< ModulePtr > getModules() const override
no submodules, return empty list
Definition: Module.h:506
Iterator over a Path (returning Module pointers).
Definition: PathIterator.h:26
void next()
increment.
Definition: PathIterator.h:49
bool isDone() const
Are we finished iterating?
Definition: PathIterator.h:72
Module * get() const
dereference.
Definition: PathIterator.h:75
static ModulePtrList getTerminateGloballyModules(const ModulePtrList &modules)
Return only modules which have the TerminateGlobally Module flag set.
Definition: PathUtils.cc:196
static ModulePtrList preparePaths(PathPtr &inputPath, PathPtr &mainPath, PathPtr &outputPath)
Adds internal zmq modules to the paths.
Definition: PathUtils.cc:112
static std::tuple< PathPtr, PathPtr, PathPtr > splitPath(const PathPtr &path)
Split the given part into the input, main and output path (in this order) by looking onto the paralle...
Definition: PathUtils.cc:16
static ModulePtr getHistogramManager(PathPtr &inputPath)
Find the histogram manager in the paths and return it.
Definition: PathUtils.cc:97
static void prependModulesIfNotPresent(ModulePtrList *modules, const ModulePtrList &prependModules)
Prepend given 'prependModules' to 'modules', if they're not already present.
Definition: PathUtils.cc:206
void checkMulticast(int timeout=0)
check multicast for messages and kill workers if requested
void waitForRunningWorker(int timeout)
Block until either the worker process is running or the timeout (in seconds) is raised.
bool hasEnded() const
If we have received a SIGINT signal or the last process is gone, we can end smoothly.
void checkSignals(int g_signalReceived)
check if we have received any signal from the user or OS. Kill the processes if not SIGINT.
void waitForRunningInput(int timeout)
Block until either the input process is running or the timeout (in seconds) is raised.
void checkChildProcesses()
check the child processes, if one has died
void terminate()
Terminate the processing.
void waitForRunningOutput(int timeout)
Block until either the output process is running or the timeout (in seconds) is raised.
void subscribe(const std::string &pubSocketAddress, const std::string &subSocketAddress, const std::string &controlSocketAddress)
Start listening for process management information on the given address.
bool hasWorkers() const
Check if there is at least one running worker.
void killProcesses(unsigned int timeout)
Ask all processes to terminate. If not, kill them after timeout seconds.
void initialize(unsigned int requestedNumberOfWorkers)
Init the processing with that many workers.
unsigned int needMoreWorkers() const
Compare our current list of workers of how many we want to have.
void reset()
Reset the internal state.
static void initializeEndRun()
Initialize run independent random generator for end run.
static void initializeBeginRun()
Initialize run independent random generator for begin run.
static void useEventDependent()
Set Event dependent Random Generator as current one.
static void initializeEvent(bool force=false)
Initialize event information.
static RbTupleManager & Instance()
Access to singleton.
Definition: RbTuple.cc:40
int hadd(bool deleteflag=true)
Functions to add up all histogram files.
Definition: RbTuple.cc:138
Helper class for data store serialization.
Definition: StreamHelper.h:23
std::unique_ptr< EvtMessage > stream(bool addPersistentDurability=true, bool streamTransientObjects=true)
Stream the data store into an event message.
Definition: StreamHelper.cc:29
void initialize(int compressionLevel, bool handleMergeable)
Initialize this class. Call this e.g. in the first event.
Definition: StreamHelper.cc:18
static const double s
[second]
Definition: Unit.h:95
static std::string getSocketAddress(const std::string &socketAddress, ZMQAddressType socketPart)
Create a full socket address for the given type from a random socket address, ba adding a suffix.
A helper class for communicating over ZMQ. Includes a multicast and (if needed) also a data socket.
Definition: ZMQClient.h:22
void publish(AZMQMessage message) const
Publish the message to the multicast.
Definition: ZMQClient.h:53
void initialize(const std::string &pubSocketAddress, const std::string &subSocketAddress, const std::string &socketAddress, bool bind)
Initialize the multicast and a data socket of the given type.
Definition: ZMQClient.cc:53
This class provides the core event processing loop for parallel processing with ZMQ.
ProcessMonitor m_processMonitor
Instance of the process monitor.
void processEndRun()
Calls EndRun function.
void runMonitoring(const PathPtr &inputPath, const PathPtr &mainPath, const ModulePtrList &terminateGlobally, long maxEvent)
Start the monitoring (without forking)
void processBeginRun(bool skipDB=false)
Calls BeginRun function.
void cleanup()
clean up IPC resources (should only be called in one process).
void process(const PathPtr &spath, long maxEvent)
Processes the full module chain using parallel processing, starting with the first module in the give...
void terminateAndCleanup(const ModulePtr &histogramManager)
Last step in the process: run the termination and cleanup (kill all remaining processes)
void runWorker(unsigned int numProcesses, const PathPtr &inputPath, const PathPtr &mainPath, const ModulePtrList &terminateGlobally, long maxEvent)
Fork out the N worker process.
void initialize(const ModulePtrList &moduleList, const ModulePtr &histogramManager)
First step in the process: init the module in the list.
bool processEvent(PathIterator moduleIter, bool skipMasterModule, bool Worker=false, bool output=false)
Calls Event function.
void processCore(const PathPtr &startPath, const ModulePtrList &modulePathList, long maxEvent=0, bool isInputProcess=true, bool isWorkerProcess=false, bool isOutputProcess=false)
Process modules in the path.
void runInput(const PathPtr &inputPath, const ModulePtrList &terminateGlobally, long maxEvent)
Fork out the input process.
void runOutput(const PathPtr &outputPath, const ModulePtrList &terminateGlobally, long maxEvent)
Fork out the output process.
void forkAndRun(long maxEvent, const PathPtr &inputPath, const PathPtr &mainPath, const PathPtr &outputPath, const ModulePtrList &terminateGlobally)
Second step in the process: fork out the processes we need to have and call the event loop.
virtual ~ZMQEventProcessor()
Make sure we remove all sockets cleanly.
EventMetaData m_previousEventMetaData
Stores previous eventMetaData.
void processPath(const PathPtr &localPath, const ModulePtrList &terminateGlobally, long maxEvent)
Basic function run in every process: process the event loop of the given path.
ZMQEventProcessor()
Init the socket cleaning at exit.
static auto createMessage(const std::string &msgIdentity, const EMessageTypes msgType, const std::unique_ptr< EvtMessage > &eventMessage)
Create an ID Message out of an identity, the type and an event message.
std::shared_ptr< Path > PathPtr
Defines a pointer to a path object as a boost shared pointer.
Definition: Path.h:35
static DBStore & Instance()
Instance of a singleton DBStore.
Definition: DBStore.cc:28
std::shared_ptr< Module > ModulePtr
Defines a pointer to a module object as a boost shared pointer.
Definition: Module.h:43
std::list< ModulePtr > ModulePtrList
Defines a std::list of shared module pointers.
Definition: Module.h:584
void updateEvent()
Updates all intra-run dependent objects.
Definition: DBStore.cc:142
void update()
Updates all objects that are outside their interval of validity.
Definition: DBStore.cc:79
@ c_Output
Output Process.
@ c_Worker
Worker/Reconstruction Process.
@ c_Monitor
Monitoring Process.
@ c_Input
Input Process.
@ c_Init
Before the forks, the process is in init state.
@ c_sub
Multicast publish socket.
@ c_control
Multicast subscribe socket.
@ c_pub
Output socket.
double getClock()
Return current value of the real-time clock.
Definition: Utils.cc:66
Abstract base class for different kinds of events.
STL namespace.