Belle II Software development
ZMQEventProcessor.cc
1/**************************************************************************
2 * basf2 (Belle II Analysis Software Framework) *
3 * Author: The Belle II Collaboration *
4 * *
5 * See git log for contributors and copyright holders. *
6 * This file is licensed under LGPL-3.0, see LICENSE.md. *
7 **************************************************************************/
8
9#include <framework/pcore/ProcHelper.h>
10#include <framework/pcore/GlobalProcHandler.h>
11#include <framework/pcore/zmq/messages/ZMQDefinitions.h>
12#include <framework/pcore/zmq/utils/ZMQAddressUtils.h>
13#include <framework/pcore/zmq/messages/ZMQMessageFactory.h>
14#include <framework/pcore/PathUtils.h>
15
16#include <framework/pcore/ZMQEventProcessor.h>
17#include <framework/pcore/DataStoreStreamer.h>
18#include <framework/pcore/RbTuple.h>
19
20#include <framework/core/Environment.h>
21#include <framework/logging/LogSystem.h>
22
23#include <framework/database/DBStore.h>
24#include <framework/core/RandomNumbers.h>
25#include <framework/core/MetadataService.h>
26#include <framework/gearbox/Unit.h>
27#include <framework/utilities/Utils.h>
28
29#include <TROOT.h>
30
31#include <sys/stat.h>
32
33#include <csignal>
34
35using namespace std;
36using namespace Belle2;
37
38namespace {
46 static int g_signalReceived = 0;
47
49 static ZMQEventProcessor* g_eventProcessorForSignalHandling = nullptr;
50
51 static void cleanupAndRaiseSignal(int signalNumber)
52 {
53 if (g_eventProcessorForSignalHandling) {
54 g_eventProcessorForSignalHandling->cleanup();
55 }
56 // uninstall current handler and call default one.
57 signal(signalNumber, SIG_DFL);
58 raise(signalNumber);
59 }
60
61 static void storeSignal(int signalNumber)
62 {
63 if (signalNumber == SIGINT) {
64 EventProcessor::writeToStdErr("\nStopping basf2 gracefully...\n");
65 }
66
67 // We do not want to remove the first signal
68 if (g_signalReceived == 0) {
69 g_signalReceived = signalNumber;
70 }
71 }
72
74 std::string g_socketAddress = "";
75
76 void deleteSocketFiles()
77 {
78 if (not GlobalProcHandler::isProcess(ProcType::c_Monitor) and not GlobalProcHandler::isProcess(ProcType::c_Init)) {
79 return;
80 }
81
82 const std::vector<ZMQAddressType> socketAddressList = {ZMQAddressType::c_input, ZMQAddressType::c_output, ZMQAddressType::c_pub, ZMQAddressType::c_sub, ZMQAddressType::c_control};
83 const auto seperatorPos = g_socketAddress.find("://");
84
85 if (seperatorPos == std::string::npos or seperatorPos + 3 >= g_socketAddress.size()) {
86 return;
87 }
88
89 const std::string filename(g_socketAddress.substr(seperatorPos + 3));
90
91 struct stat buffer;
92 for (const auto socketAdressType : socketAddressList) {
93 const std::string socketAddress(ZMQAddressUtils::getSocketAddress(filename, socketAdressType));
94 if (stat(socketAddress.c_str(), &buffer) == 0) {
95 remove(socketAddress.c_str());
96 }
97 }
98 }
99} // namespace
100
102{
103 B2ASSERT("You are having two instances of the ZMQEventProcessor running! This is not possible",
104 not g_eventProcessorForSignalHandling);
105 g_eventProcessorForSignalHandling = this;
106
107 // Make sure to remove the sockets
108 g_socketAddress = Environment::Instance().getZMQSocketAddress();
109 std::atexit(deleteSocketFiles);
110}
111
113{
114 cleanup();
115 g_eventProcessorForSignalHandling = nullptr;
116}
117
118void ZMQEventProcessor::process(const PathPtr& path, long maxEvent)
119{
120 // Concerning signal handling:
121 // * During the initialization, we just raise the signal without doing any cleanup etc.
122 // * During the event execution, we will not allow for any signal in all processes except the parent process.
123 // Here, we catch sigint and clean up the processes AND WHAT DO WE DO IN THE OTHER CASES?
124 // * During cleanup, we will just ignore sigint, but the rest will be raised
125
126 if (path->isEmpty()) {
127 return;
128 }
129
130 const int numProcesses = Environment::Instance().getNumberProcesses();
131 if (numProcesses == 0) {
132 B2FATAL("ZMQEventProcessor::process() called for serial processing! Most likely a bug in Framework.");
133 }
134
135 // Split the path into input, main and output. A nullptr means, the path should not be used
136 PathPtr inputPath, mainPath, outputPath;
137 std::tie(inputPath, mainPath, outputPath) = PathUtils::splitPath(path);
138 const ModulePtr& histogramManager = PathUtils::getHistogramManager(inputPath);
139
140 // Check for existence of HLTZMQ2Ds module in input path to set DAQ environment
141 for (const ModulePtr& module : inputPath->getModules()) {
142 if (module->getName() == "HLTZMQ2Ds") {
144 B2INFO("ZMQEventProcessor : DAQ environment set");
145 break;
146 }
147 }
148
149 if (not mainPath or mainPath->isEmpty()) {
150 B2WARNING("Cannot run any modules in parallel (no c_ParallelProcessingCertified flag), falling back to single-core mode.");
151 EventProcessor::process(path, maxEvent);
152 return;
153 }
154
155 // inserts Rx/Tx modules into path (sets up IPC structures)
156 const ModulePtrList& moduleList = PathUtils::preparePaths(inputPath, mainPath, outputPath);
157
158 // Run the initialization of the modules and the histogram manager
159 initialize(moduleList, histogramManager);
160
161 // The main part: fork into the different processes and run!
162 const ModulePtrList& terminateGlobally = PathUtils::getTerminateGloballyModules(moduleList);
163 forkAndRun(maxEvent, inputPath, mainPath, outputPath, terminateGlobally);
164
165 installMainSignalHandlers(cleanupAndRaiseSignal);
166 // Run the final termination and cleanup with error check
167 terminateAndCleanup(histogramManager);
168}
169
170void ZMQEventProcessor::initialize(const ModulePtrList& moduleList, const ModulePtr& histogramManager)
171{
172 if (histogramManager) {
173 histogramManager->initialize();
174 }
175 // from now on the datastore is available
176 processInitialize(moduleList, true);
177
178 B2INFO("ZMQEventProcessor : processInitialize done");
179
180 // Don't start processing in case of no master module
181 if (!m_master) {
182 B2ERROR("There is no module that provides event and run numbers. You must either add the EventInfoSetter module to your path, or, if using an input module, read EventMetaData objects from file.");
183 }
184
185 // Check if errors appeared. If yes, don't start the event processing.
187 if (numLogError != 0) {
188 B2FATAL(numLogError << " ERROR(S) occurred! The processing of events will not be started.");
189 }
190
191 // TODO: I do not really understand what is going on here...
197 // disable ROOT's management of TFiles
198 // clear list, but don't actually delete the objects
199 gROOT->GetListOfFiles()->Clear("nodelete");
200}
201
203{
204 cleanup();
205
206 if (histogramManager) {
207 B2INFO("HistoManager:: adding histogram files");
209 }
210
211 // did anything bad happen?
212 if (g_signalReceived) {
213 if (g_signalReceived == SIGINT) {
214 B2RESULT("Processing aborted via signal " << g_signalReceived <<
215 ", terminating. Output files have been closed safely and should be readable.");
216 } else {
217 B2ERROR("Processing aborted via signal " << g_signalReceived <<
218 ", terminating. Output files have been closed safely and should be readable.");
219 }
220 // re-raise the signal
221 installSignalHandler(g_signalReceived, SIG_DFL);
222 raise(g_signalReceived);
223 }
224}
225
226void ZMQEventProcessor::runInput(const PathPtr& inputPath, const ModulePtrList& terminateGlobally, long maxEvent)
227{
228 if (not inputPath or inputPath->isEmpty()) {
229 return;
230 }
231
233 // This is not the input process, clean up datastore to not contain the first event
235 return;
236 }
237
238 // The default will be to not do anything on signals...
240
243
244 processPath(inputPath, terminateGlobally, maxEvent);
245 B2DEBUG(30, "Finished an input process");
246 exit(0);
247}
248
249void ZMQEventProcessor::runOutput(const PathPtr& outputPath, const ModulePtrList& terminateGlobally, long maxEvent)
250{
251 const auto& socketAddress = Environment::Instance().getZMQSocketAddress();
252 const auto pubSocketAddress(ZMQAddressUtils::getSocketAddress(socketAddress, ZMQAddressType::c_pub));
253 const auto subSocketAddress(ZMQAddressUtils::getSocketAddress(socketAddress, ZMQAddressType::c_sub));
254
255 if (not outputPath or outputPath->isEmpty()) {
256 return;
257 }
258
260 return;
261 }
262
263 // The default will be to not do anything on signals...
265
267
268 // Set the rx module as main module
269 m_master = outputPath->getModules().begin()->get();
270
271 processPath(outputPath, terminateGlobally, maxEvent);
272
273 // Send the statistics to the process monitor
274 StreamHelper streamer;
275 ZMQClient zmqClient;
276
277 // TODO: true?
278 streamer.initialize(0, true);
279 zmqClient.initialize(pubSocketAddress, subSocketAddress);
280
281 // TODO: make sure to only send statistics!
282 const auto& evtMessage = streamer.stream();
283 auto message = ZMQMessageFactory::createMessage(EMessageTypes::c_statisticMessage, evtMessage);
284 zmqClient.publish(std::move(message));
285
286 B2DEBUG(30, "Finished an output process");
287 exit(0);
288}
289void ZMQEventProcessor::runWorker(unsigned int numProcesses, const PathPtr& inputPath, const PathPtr& mainPath,
290 const ModulePtrList& terminateGlobally, long maxEvent)
291{
292 if (numProcesses == 0) {
293 return;
294 }
295
296 if (not GlobalProcHandler::startWorkerProcesses(numProcesses)) {
297 // Make sure the worker process is running until we go on
298 m_processMonitor.waitForRunningWorker(Environment::Instance().getZMQMaximalWaitingTime());
299 return;
300 }
301
302 // The default will be to not do anything on signals...
304
305 if (inputPath and not inputPath->isEmpty()) {
306 // set Rx as master
307 m_master = mainPath->getModules().begin()->get();
308 }
309
312
313 processPath(mainPath, terminateGlobally, maxEvent);
314 B2DEBUG(30, "Finished a worker process");
315 exit(0);
316}
317
318void ZMQEventProcessor::processPath(const PathPtr& localPath, const ModulePtrList& terminateGlobally, long maxEvent)
319{
320 ModulePtrList localModules = localPath->buildModulePathList();
321 maxEvent = getMaximumEventNumber(maxEvent);
322 // we are not using the default signal handler, so the processCore can not throw any exception because if sigint...
323 processCore(localPath, localModules, maxEvent, GlobalProcHandler::isProcess(ProcType::c_Input),
326
327 B2DEBUG(30, "terminate process...");
328 PathUtils::prependModulesIfNotPresent(&localModules, terminateGlobally);
329 processTerminate(localModules);
330}
331
332
333void ZMQEventProcessor::runMonitoring(const PathPtr& inputPath, const PathPtr& mainPath, const ModulePtrList& terminateGlobally,
334 long maxEvent)
335{
337 return;
338 }
339
340 const auto& environment = Environment::Instance();
341
342 B2DEBUG(30, "Will now start process monitor...");
343 const int numProcesses = environment.getNumberProcesses();
344 m_processMonitor.initialize(numProcesses);
345
346 // Make sure the input process is running until we go on
349 return;
350 }
351 // Make sure the output process is running until we go on
354 return;
355 }
356
357 installMainSignalHandlers(storeSignal);
358
359 // at least start the number of workers requested
360 runWorker(m_processMonitor.needMoreWorkers(), inputPath, mainPath, terminateGlobally, maxEvent);
361
362 const auto& restartFailedWorkers = environment.getZMQRestartFailedWorkers();
363 const auto& failOnFailedWorkers = environment.getZMQFailOnFailedWorkers();
364
365 B2DEBUG(30, "Will now start main loop...");
366 while (true) {
367 // check multicast for messages and kill workers if requested
369 // check the child processes, if one has died
371 // check if we have received any signal from the user or OS. Kill the processes if not SIGINT.
372 m_processMonitor.checkSignals(g_signalReceived);
373
374 // If we have received a SIGINT signal or the last process is gone, we can end smoothly
376 break;
377 }
378
379 // Test if we need more workers
380 const unsigned int neededWorkers = m_processMonitor.needMoreWorkers();
381 if (neededWorkers > 0) {
382 B2DEBUG(30, "restartFailedWorkers = " << restartFailedWorkers);
383 if (restartFailedWorkers) {
384 B2DEBUG(30, ".... Restarting a new worker");
385 B2ERROR(".... Restarting a new worker process");
386 runWorker(neededWorkers, inputPath, mainPath, terminateGlobally, maxEvent);
387 } else if (failOnFailedWorkers) {
388 B2ERROR("A worker failed. Will try to end the process smoothly now.");
389 break;
390 } else if (not m_processMonitor.hasWorkers()) {
391 B2WARNING("All workers have died and you did not request to restart them. Going down now.");
392 break;
393 }
394 }
395 }
396
397 B2DEBUG(30, "Finished the monitoring process");
398}
399
400void ZMQEventProcessor::forkAndRun(long maxEvent, const PathPtr& inputPath, const PathPtr& mainPath, const PathPtr& outputPath,
401 const ModulePtrList& terminateGlobally)
402{
403 const int numProcesses = Environment::Instance().getNumberProcesses();
404 GlobalProcHandler::initialize(numProcesses);
405
406 const auto& socketAddress = Environment::Instance().getZMQSocketAddress();
407
408 const auto pubSocketAddress(ZMQAddressUtils::getSocketAddress(socketAddress, ZMQAddressType::c_pub));
409 const auto subSocketAddress(ZMQAddressUtils::getSocketAddress(socketAddress, ZMQAddressType::c_sub));
410 const auto controlSocketAddress(ZMQAddressUtils::getSocketAddress(socketAddress, ZMQAddressType::c_control));
411
412 // We catch all signals and store them into a variable. This is used during the main loop then.
413 // From now on, we have to make sure to clean up behind us
414 installMainSignalHandlers(cleanupAndRaiseSignal);
415 m_processMonitor.subscribe(pubSocketAddress, subSocketAddress, controlSocketAddress);
416
417 runInput(inputPath, terminateGlobally, maxEvent);
418 runOutput(outputPath, terminateGlobally, maxEvent);
419 runMonitoring(inputPath, mainPath, terminateGlobally, maxEvent);
420}
421
423{
425 B2DEBUG(30, "Not running cleanup, as I am in process type " << GlobalProcHandler::getProcessName());
426 return;
427 }
430
431 deleteSocketFiles();
432}
433
434void ZMQEventProcessor::processCore(const PathPtr& startPath, const ModulePtrList& modulePathList, long maxEvent,
435 bool isInputProcess, bool isWorkerProcess, bool isOutputProcess)
436{
438 m_moduleList = modulePathList;
439
440 //Remember the previous event meta data, and identify end of data meta data
441 m_previousEventMetaData.setEndOfData(); //invalid start state
442
443 const bool collectStats = !Environment::Instance().getNoStats();
444
445 //Loop over the events
446 long currEvent = 0;
447 bool endProcess = false;
448 while (!endProcess) {
449 if (collectStats)
450 m_processStatisticsPtr->startGlobal();
451
452 PathIterator moduleIter(startPath);
453
454 if (isInputProcess) {
455 endProcess = ZMQEventProcessor::processEvent(moduleIter, isInputProcess && currEvent == 0);
456 } else if (isWorkerProcess) {
457 endProcess = ZMQEventProcessor::processEvent(moduleIter, false,
458 isWorkerProcess && currEvent == 0 && Environment::Instance().getZMQDAQEnvironment());
459 } else if (isOutputProcess) {
460 endProcess = ZMQEventProcessor::processEvent(moduleIter, false, false,
461 isOutputProcess && currEvent == 0 && Environment::Instance().getZMQDAQEnvironment());
462 } else {
463 B2INFO("processCore : should not come here. Specified path is invalid");
464 return;
465 }
466
467 //Delete event related data in DataStore
469
470 currEvent++;
471 if ((maxEvent > 0) && (currEvent >= maxEvent)) endProcess = true;
472 if (collectStats)
474 } //end event loop
475
476 //End last run
477 m_eventMetaDataPtr.create();
478 B2INFO("processCore : End Last Run. calling processEndRun()");
480}
481
482
483bool ZMQEventProcessor::processEvent(PathIterator moduleIter, bool skipMasterModule, bool WorkerPath, bool OutputPath)
484{
485 double time = Utils::getClock() / Unit::s;
487 MetadataService::Instance().addBasf2Status("running event loop");
489 }
490
491 const bool collectStats = !Environment::Instance().getNoStats();
492
493 while (!moduleIter.isDone()) {
494 Module* module = moduleIter.get();
495
496 // run the module ... unless we don't want to
497 if (module != m_master) {
498 callEvent(module);
499 } else if (!skipMasterModule) {
500 callEvent(module);
501 } else
502 B2INFO("Skipping execution of module " << module->getName());
503
504 if (!m_eventMetaDataPtr) {
505 return false;
506 }
507
508 //Check for end of data
509 if (m_eventMetaDataPtr->isEndOfData()) {
510 // Immediately leave the loop and terminate (true)
511 B2INFO("isEndOfData. Return");
512 return true;
513 }
514
515 //Handle EventMetaData changes by master module
516 if (module == m_master && !skipMasterModule) {
517
518 //initialize random number state for the event
520
521 // Worker Path
522 if (WorkerPath) {
523 B2INFO("Worker Path and First Event!");
524 if (Environment::Instance().isZMQDAQFirstEvent(m_eventMetaDataPtr->getExperiment(), m_eventMetaDataPtr->getRun())) {
525 B2INFO("Worker path processing for ZMQDAQ first event.....Skip to the end of path");
526 B2INFO(" --> exp = " << m_eventMetaDataPtr->getExperiment() << " run = " << m_eventMetaDataPtr->getRun());
527 while (true) {
528 module = moduleIter.get();
529 if (module->getName() == "ZMQTxWorker") break;
530 moduleIter.next();
531 }
532 continue;
533 }
534 }
535
536 // Check for EndOfRun
537 if (!WorkerPath && !OutputPath) {
538 if (m_eventMetaDataPtr->isEndOfRun()) {
539 B2INFO("===> EndOfRun : calling processEndRun(); isEndOfRun = " << m_eventMetaDataPtr->isEndOfRun());
541 // Store the current event meta data for the next round
543 // Leave this event, but not the full processing (false)
544 return false;
547 B2INFO("===> EndOfData : ----> Run change request to the same run!!! Skip this event.");
548 return false;
549 }
550 B2INFO("===> EndOfData : calling processBeginRun(); isEndOfData = " << m_previousEventMetaData.isEndOfData() <<
551 " isEndOfRun = " << m_previousEventMetaData.isEndOfRun());
552 B2INFO("--> cur run = " << m_eventMetaDataPtr->getRun() << " <- prev run = " << m_previousEventMetaData.getRun());
553 B2INFO("--> cur evt = " << m_eventMetaDataPtr->getEvent() << " <- prev evt = " << m_previousEventMetaData.getEvent());
554 // The run number should not be 0
555 if (m_eventMetaDataPtr->getRun() != 0) {
558 } else {
559 return false;
560 }
561 }
562
563 const bool runChanged = ((m_eventMetaDataPtr->getExperiment() != m_previousEventMetaData.getExperiment()) or
565 const bool runChangedWithoutNotice = runChanged and not m_previousEventMetaData.isEndOfData()
567 // if (runChangedWithoutNotice && !g_first_round) {
568 if (runChangedWithoutNotice) {
569 if (collectStats)
570 m_processStatisticsPtr->suspendGlobal();
571
572 B2INFO("===> Run Change (possibly offline) : calling processEndRun() and processBeginRun()");
573 B2INFO("--> cur run = " << m_eventMetaDataPtr->getRun() << " <- prev run = " << m_previousEventMetaData.getRun());
574 B2INFO("--> cur evt = " << m_eventMetaDataPtr->getEvent() << " <- prev evt = " << m_previousEventMetaData.getEvent());
575 B2INFO("--> runChanged = " << runChanged << " runChangedWithoutNotice = " << runChangedWithoutNotice);
576
579
580 if (collectStats)
581 m_processStatisticsPtr->resumeGlobal();
582 }
584 } else
585 B2INFO("Skipping begin/end run processing");
586
587 //make sure we use the event dependent generator again
589
591
592 } else if (!WorkerPath && !OutputPath) {
593 //Check for a second master module. Cannot do this if we skipped the
594 //master module as the EventMetaData is probably set before we call this
595 //function
596 if (!skipMasterModule && m_eventMetaDataPtr &&
598 B2FATAL("Two modules setting EventMetaData were discovered: " << m_master->getName() << " and " << module->getName());
599 }
600 }
601
602 if (g_signalReceived != 0) {
603 throw StoppedBySignalException(g_signalReceived);
604 }
605
606 //Check for the module conditions, evaluate them and if one is true switch to the new path
607 if (module->evalCondition()) {
608 PathPtr condPath = module->getConditionPath();
609 //continue with parent Path after condition path is executed?
610 if (module->getAfterConditionPath() == Module::EAfterConditionPath::c_Continue) {
611 moduleIter = PathIterator(condPath, moduleIter);
612 } else {
613 moduleIter = PathIterator(condPath);
614 }
615 } else {
616 moduleIter.next();
617 }
618 } //end module loop
619 return false;
620}
621
623{
624 MetadataService::Instance().addBasf2Status("beginning run");
625
626 m_inRun = true;
627
628 LogSystem& logSystem = LogSystem::Instance();
629 m_processStatisticsPtr->startGlobal();
630
631 if (!skipDB) DBStore::Instance().update();
632
633 // initialize random generator for end run
635
636 for (const ModulePtr& modPtr : m_moduleList) {
637 Module* module = modPtr.get();
638
639 //Set the module dependent log level
640 logSystem.updateModule(&(module->getLogConfig()), module->getName());
641
642 //Do beginRun() call
643 m_processStatisticsPtr->startModule();
644 module->beginRun();
646
647 //Set the global log level
648 logSystem.updateModule(nullptr);
649 }
650
652}
653
654
656{
658
659 if (!m_inRun)
660 return;
661 m_inRun = false;
662
663 LogSystem& logSystem = LogSystem::Instance();
664 m_processStatisticsPtr->startGlobal();
665
666 const EventMetaData newEventMetaData = *m_eventMetaDataPtr;
667
668 //initialize random generator for end run
670
671 for (const ModulePtr& modPtr : m_moduleList) {
672 Module* module = modPtr.get();
673
674 //Set the module dependent log level
675 logSystem.updateModule(&(module->getLogConfig()), module->getName());
676
677 //Do endRun() call
678 m_processStatisticsPtr->startModule();
679 module->endRun();
681
682 //Set the global log level
683 logSystem.updateModule(nullptr);
684 }
685 *m_eventMetaDataPtr = newEventMetaData;
686
688}
static void removeSideEffects()
call clear() and removeSideEffects() for all Mergeable objects in datastore (for c_Persistent durabil...
@ c_Event
Different object in each event, all objects/arrays are invalidated after event() function has been ca...
Definition: DataStore.h:59
static DataStore & Instance()
Instance of singleton Store.
Definition: DataStore.cc:53
void setInitializeActive(bool active)
Setter for m_initializeActive.
Definition: DataStore.cc:93
void invalidateData(EDurability durability)
Clears all registered StoreEntry objects of a specified durability, invalidating all objects.
Definition: DataStore.cc:714
const std::string & getZMQSocketAddress() const
Socket address to use in ZMQ.
Definition: Environment.h:261
int getNumberProcesses() const
Returns the number of worker processes which should be used for the parallel processing.
Definition: Environment.h:158
bool getNoStats() const
Disable collection of statistics during event processing.
Definition: Environment.h:200
void setZMQDAQEnvironment(bool zmqDAQ)
Set DAQ environment.
Definition: Environment.h:352
static Environment & Instance()
Static method to get a reference to the Environment instance.
Definition: Environment.cc:28
Store event, run, and experiment numbers.
Definition: EventMetaData.h:33
void setEndOfData()
Marks the end of the data processing.
int getRun() const
Run Getter.
unsigned int getEvent() const
Event Getter.
bool isEndOfRun() const
is end-of-run set? (see setEndOfRun()).
int getExperiment() const
Experiment Getter.
bool isEndOfData() const
is end-of-data set? (see setEndOfData()).
Exception thrown when execution is stopped by a signal.
void processInitialize(const ModulePtrList &modulePathList, bool setEventInfo=true)
Initializes the modules.
bool m_inRun
Are we currently in a run? If yes, processEndRun() needs to do something.
double m_lastMetadataUpdate
Time in seconds of last call for metadata update in event loop.
static void installSignalHandler(int sig, void(*fn)(int))
Install a signal handler 'fn' for given signal.
StoreObjPtr< ProcessStatistics > m_processStatisticsPtr
Also used in a number of places.
void process(const PathPtr &startPath, long maxEvent=0)
Processes the full module chain, starting with the first module in the given path.
void callEvent(Module *module)
Calls event() on one single module, setting up logging and statistics as needed.
void processTerminate(const ModulePtrList &modulePathList)
Terminates the modules.
static void writeToStdErr(const char msg[])
async-safe method to write something to STDERR.
const Module * m_master
The master module that determines the experiment/run/event number.
StoreObjPtr< EventMetaData > m_eventMetaDataPtr
EventMetaData is used by processEvent()/processCore().
ModulePtrList m_moduleList
List of all modules in order initialized.
long getMaximumEventNumber(long maxEvent) const
Calculate the maximum event number out of the argument from command line and the environment.
double m_metadataUpdateInterval
Minimal time difference in seconds for metadata updates in event loop.
static void installMainSignalHandlers(void(*fn)(int)=nullptr)
Install signal handler for INT, TERM and QUIT signals.
static bool startOutputProcess(bool local=false)
Fork and initialize an output process.
static bool startInputProcess()
Fork and initialize an input process.
static void initialize(unsigned int nWorkerProc)
Create a new process handler, which will handle nWorkerProc processes.
static bool startMonitoringProcess()
Fork and initialize a monitoring process.
static bool startWorkerProcesses(unsigned int numProcesses)
Fork and initialize as many worker processes as requested.
static bool isProcess(ProcType procType)
Return true if the process is of type procType.
static std::string getProcessName()
Get a human readable name for this process. (input, event, output...).
@ c_Error
Error: for things that went wrong and have to be fixed.
Definition: LogConfig.h:30
Class for logging debug, info and error messages.
Definition: LogSystem.h:46
void updateModule(const LogConfig *moduleLogConfig=nullptr, const std::string &moduleName="")
Sets the log configuration to the given module log configuration and sets the module name This method...
Definition: LogSystem.h:191
int getMessageCounter(LogConfig::ELogLevel logLevel) const
Returns the number of logging calls per log level.
Definition: LogSystem.cc:158
static LogSystem & Instance()
Static method to get a reference to the LogSystem instance.
Definition: LogSystem.cc:28
void addBasf2Status(const std::string &message="")
Add metadata of basf2 status.
static MetadataService & Instance()
Static method to get a reference to the MetadataService instance.
@ c_EndRun
Counting time/calls in endRun()
@ c_BeginRun
Counting time/calls in beginRun()
@ c_Event
Counting time/calls in event()
Base class for Modules.
Definition: Module.h:72
const std::string & getName() const
Returns the name of the module.
Definition: Module.h:186
std::list< ModulePtr > getModules() const override
no submodules, return empty list
Definition: Module.h:505
Iterator over a Path (returning Module pointers).
Definition: PathIterator.h:26
void next()
increment.
Definition: PathIterator.h:49
bool isDone() const
Are we finished iterating?
Definition: PathIterator.h:72
Module * get() const
dereference.
Definition: PathIterator.h:75
static ModulePtrList getTerminateGloballyModules(const ModulePtrList &modules)
Return only modules which have the TerminateGlobally Module flag set.
Definition: PathUtils.cc:196
static ModulePtrList preparePaths(PathPtr &inputPath, PathPtr &mainPath, PathPtr &outputPath)
Adds internal zmq modules to the paths.
Definition: PathUtils.cc:112
static std::tuple< PathPtr, PathPtr, PathPtr > splitPath(const PathPtr &path)
Split the given part into the input, main and output path (in this order) by looking onto the paralle...
Definition: PathUtils.cc:16
static ModulePtr getHistogramManager(PathPtr &inputPath)
Find the histogram manager in the paths and return it.
Definition: PathUtils.cc:97
static void prependModulesIfNotPresent(ModulePtrList *modules, const ModulePtrList &prependModules)
Prepend given 'prependModules' to 'modules', if they're not already present.
Definition: PathUtils.cc:206
void checkMulticast(int timeout=0)
check multicast for messages and kill workers if requested
void waitForRunningWorker(int timeout)
Block until either the worker process is running or the timeout (in seconds) is raised.
bool hasEnded() const
If we have received a SIGINT signal or the last process is gone, we can end smoothly.
void checkSignals(int g_signalReceived)
check if we have received any signal from the user or OS. Kill the processes if not SIGINT.
void waitForRunningInput(int timeout)
Block until either the input process is running or the timeout (in seconds) is raised.
void checkChildProcesses()
check the child processes, if one has died
void terminate()
Terminate the processing.
void waitForRunningOutput(int timeout)
Block until either the output process is running or the timeout (in seconds) is raised.
void subscribe(const std::string &pubSocketAddress, const std::string &subSocketAddress, const std::string &controlSocketAddress)
Start listening for process management information on the given address.
bool hasWorkers() const
Check if there is at least one running worker.
void killProcesses(unsigned int timeout)
Ask all processes to terminate. If not, kill them after timeout seconds.
void initialize(unsigned int requestedNumberOfWorkers)
Init the processing with that many workers.
unsigned int needMoreWorkers() const
Compare our current list of workers of how many we want to have.
void reset()
Reset the internal state.
static void initializeEndRun()
Initialize run independent random generator for end run.
static void initializeBeginRun()
Initialize run independent random generator for begin run.
static void useEventDependent()
Set Event dependent Random Generator as current one.
static void initializeEvent(bool force=false)
Initialize event information.
static RbTupleManager & Instance()
Access to singleton.
Definition: RbTuple.cc:38
int hadd(bool deleteflag=true)
Functions to add up all histogram files.
Definition: RbTuple.cc:136
Helper class for data store serialization.
Definition: StreamHelper.h:23
std::unique_ptr< EvtMessage > stream(bool addPersistentDurability=true, bool streamTransientObjects=true)
Stream the data store into an event message.
Definition: StreamHelper.cc:29
void initialize(int compressionLevel, bool handleMergeable)
Initialize this class. Call this e.g. in the first event.
Definition: StreamHelper.cc:18
static const double s
[second]
Definition: Unit.h:95
static std::string getSocketAddress(const std::string &socketAddress, ZMQAddressType socketPart)
Create a full socket address for the given type from a random socket address, ba adding a suffix.
A helper class for communicating over ZMQ. Includes a multicast and (if needed) also a data socket.
Definition: ZMQClient.h:22
void publish(AZMQMessage message) const
Publish the message to the multicast.
Definition: ZMQClient.h:53
void initialize(const std::string &pubSocketAddress, const std::string &subSocketAddress, const std::string &socketAddress, bool bind)
Initialize the multicast and a data socket of the given type.
Definition: ZMQClient.cc:53
This class provides the core event processing loop for parallel processing with ZMQ.
ProcessMonitor m_processMonitor
Instance of the process monitor.
void processEndRun()
Calls EndRun function.
void runMonitoring(const PathPtr &inputPath, const PathPtr &mainPath, const ModulePtrList &terminateGlobally, long maxEvent)
Start the monitoring (without forking)
void processBeginRun(bool skipDB=false)
Calls BeginRun function.
void cleanup()
clean up IPC resources (should only be called in one process).
void process(const PathPtr &spath, long maxEvent)
Processes the full module chain using parallel processing, starting with the first module in the give...
void terminateAndCleanup(const ModulePtr &histogramManager)
Last step in the process: run the termination and cleanup (kill all remaining processes)
void runWorker(unsigned int numProcesses, const PathPtr &inputPath, const PathPtr &mainPath, const ModulePtrList &terminateGlobally, long maxEvent)
Fork out the N worker process.
void initialize(const ModulePtrList &moduleList, const ModulePtr &histogramManager)
First step in the process: init the module in the list.
bool processEvent(PathIterator moduleIter, bool skipMasterModule, bool Worker=false, bool output=false)
Calls Event function.
void processCore(const PathPtr &startPath, const ModulePtrList &modulePathList, long maxEvent=0, bool isInputProcess=true, bool isWorkerProcess=false, bool isOutputProcess=false)
Process modules in the path.
void runInput(const PathPtr &inputPath, const ModulePtrList &terminateGlobally, long maxEvent)
Fork out the input process.
void runOutput(const PathPtr &outputPath, const ModulePtrList &terminateGlobally, long maxEvent)
Fork out the output process.
void forkAndRun(long maxEvent, const PathPtr &inputPath, const PathPtr &mainPath, const PathPtr &outputPath, const ModulePtrList &terminateGlobally)
Second step in the process: fork out the processes we need to have and call the event loop.
virtual ~ZMQEventProcessor()
Make sure we remove all sockets cleanly.
EventMetaData m_previousEventMetaData
Stores previous eventMetaData.
void processPath(const PathPtr &localPath, const ModulePtrList &terminateGlobally, long maxEvent)
Basic function run in every process: process the event loop of the given path.
ZMQEventProcessor()
Init the socket cleaning at exit.
static auto createMessage(const std::string &msgIdentity, const EMessageTypes msgType, const std::unique_ptr< EvtMessage > &eventMessage)
Create an ID Message out of an identity, the type and an event message.
std::shared_ptr< Path > PathPtr
Defines a pointer to a path object as a boost shared pointer.
Definition: Path.h:35
static DBStore & Instance()
Instance of a singleton DBStore.
Definition: DBStore.cc:26
std::shared_ptr< Module > ModulePtr
Defines a pointer to a module object as a boost shared pointer.
Definition: Module.h:43
std::list< ModulePtr > ModulePtrList
Defines a std::list of shared module pointers.
Definition: Module.h:583
void updateEvent()
Updates all intra-run dependent objects.
Definition: DBStore.cc:140
void update()
Updates all objects that are outside their interval of validity.
Definition: DBStore.cc:77
@ c_Output
Output Process.
@ c_Worker
Worker/Reconstruction Process.
@ c_Monitor
Monitoring Process.
@ c_Input
Input Process.
@ c_Init
Before the forks, the process is in init state.
@ c_sub
Multicast publish socket.
@ c_control
Multicast subscribe socket.
@ c_pub
Output socket.
double getClock()
Return current value of the real-time clock.
Definition: Utils.cc:66
Abstract base class for different kinds of events.
STL namespace.