Belle II Software light-2406-ragdoll
Gearbox.cc
1/**************************************************************************
2 * basf2 (Belle II Analysis Software Framework) *
3 * Author: The Belle II Collaboration *
4 * *
5 * See git log for contributors and copyright holders. *
6 * This file is licensed under LGPL-3.0, see LICENSE.md. *
7 **************************************************************************/
8#include <framework/gearbox/Gearbox.h>
9#include <framework/gearbox/GearDir.h>
10#include <framework/core/MRUCache.h>
11#include <framework/logging/Logger.h>
12#include <framework/utilities/Stream.h>
13
14#include <libxml/parser.h>
15#include <libxml/xinclude.h>
16#include <libxml/xmlIO.h>
17#include <cstring>
18#include <boost/algorithm/string.hpp>
19
20#include <TObject.h>
21
22using namespace std;
23
24namespace Belle2 {
30 namespace gearbox {
32 static int matchXmlUri(const char* uri)
33 {
34 //Ignore uris including file protocol. They are only used if loading of a
35 //resource failed, then libxml will try to look in /etc/xml/catalog if it
36 //can find the file there.
37 if (boost::starts_with(uri, "file:/")) return 0;
38 return 1;
39 }
40
42 void* openXmlUri(const char* uri)
43 {
44 B2DEBUG(200, "Request to open " << uri);
45 InputContext* context = Gearbox::getInstance().openXmlUri(uri);
46 return (void*) context;
47 }
48
50 static int readXmlData(void* context, char* buffer, int buffsize)
51 {
52 //B2DEBUG(200,"Calling read to get " << buffsize << " bytes");
53 auto* gearContext = static_cast<InputContext*>(context);
54 return gearContext->readXmlData(buffer, buffsize);
55 }
56
58 static int closeXmlContext(void* context)
59 {
60 B2DEBUG(200, "Closing context");
61 auto* gearContext = static_cast<InputContext*>(context);
62 delete gearContext;
63 return 0;
64 }
65 }
66
67 Gearbox::Gearbox(): m_xmlDocument(nullptr), m_xpathContext(nullptr),
68 m_parameterCache(new MRUCache<std::string, PathValue>(c_DefaultCacheSize))
69 {
70 xmlInitParser();
71 LIBXML_TEST_VERSION;
72 }
73
75 {
76 close();
78 delete m_parameterCache;
79 }
80
82 {
83 static Gearbox instance;
84 return instance;
85 }
86
88 {
89 //Check input handlers one by one
90 for (gearbox::InputHandler* handler : m_handlers) {
91 //try to create context for uri, return if success
92 gearbox::InputContext* context = handler->open(uri);
93 if (context) return context;
94 }
95 B2ERROR("Could not find data for uri '" << uri << "'");
96 return nullptr;
97 }
98
99 void Gearbox::setBackends(const vector<string>& backends)
100 {
102 for (const string& backend : backends) {
103 B2DEBUG(300, "Adding InputHandler for '" << backend << "'");
104 //Find correct InputHandler, assuming file backend by default if there is no colon in the
105 //uri
106 string prefix("file");
107 string accessinfo(backend);
108 size_t colon = backend.find(':');
109 if (colon != string::npos) {
110 prefix = backend.substr(0, colon);
111 accessinfo = backend.substr(colon + 1);
112 }
113 auto it = m_registeredHandlers.find(prefix);
114 if (it == m_registeredHandlers.end()) {
115 B2ERROR("Could not find input handler to handle '" << backend << "', ignoring");
116 continue;
117 }
118 gearbox::InputHandler* handler = it->second(accessinfo);
119 if (handler) {
120 m_handlers.push_back(handler);
121 } else {
122 B2ERROR("Problem creating input handler to handle '" << backend << "', ignoring");
123 }
124 }
125 }
126
128 {
129 for (gearbox::InputHandler* handler : m_handlers) delete handler;
130 m_handlers.clear();
131 }
132
133 void Gearbox::open(const std::string& name, size_t cacheSize)
134 {
135 //Check if we have an open connection and close first if so
136 if (m_xmlDocument) close();
137 //Check if we have at least one backend
138 if (m_handlers.empty())
139 B2FATAL("No backends defined, please use Gearbox::setBackends() first to specify how to access XML files.");
140
141 // register input callbacks for opening the files
142 xmlRegisterInputCallbacks(gearbox::matchXmlUri, gearbox::openXmlUri,
143 gearbox::readXmlData, gearbox::closeXmlContext);
144
145 //Open document
146 m_xmlDocument = xmlParseFile(name.c_str());
147 //libxml >= 2.7.0 introduced some limits on node size etc. which breaks reading VXDTF files
148 xmlXIncludeProcessFlags(m_xmlDocument, XML_PARSE_HUGE);
149
150 // reset input callbacks
151 xmlPopInputCallbacks();
152
153 if (!m_xmlDocument) B2FATAL("Could not connect gearbox to " << name);
154 m_xpathContext = xmlXPathNewContext(m_xmlDocument);
155 if (!m_xpathContext) B2FATAL("Could not create XPath context");
156
157 //Apply overrides
158 for (const auto& poverride : m_overrides) {
159 overridePathValue(poverride);
160 }
161
162 //Speeds up XPath computation on static documents.
163 xmlXPathOrderDocElems(m_xmlDocument);
164
165 //Set cachesize
166 m_parameterCache->setMaxSize(cacheSize);
167 }
168
170 {
171 if (m_xpathContext) xmlXPathFreeContext(m_xpathContext);
172 if (m_xmlDocument) xmlFreeDoc(m_xmlDocument);
173 m_xpathContext = nullptr;
174 m_xmlDocument = nullptr;
175
176 for (auto& entry : m_ownedObjects) {
177 delete entry.second;
178 }
179 m_ownedObjects.clear();
180
181 m_parameterCache->clear();
182 }
183
185 {
186 if (m_xpathContext == nullptr) B2FATAL("Gearbox is not connected");
187 //Make sure it ends with a slash
188 string query = ensureNode(poverride.path);
189 //Ok, lets search for the path
190 B2INFO("Override '" << poverride.path << "' with '" << poverride.value
191 << "' (unit: '" << poverride.unit << "')");
192 xmlXPathObjectPtr result = xmlXPathEvalExpression((xmlChar*) query.c_str(), m_xpathContext);
193 if (result != nullptr && result->type == XPATH_NODESET && !xmlXPathNodeSetIsEmpty(result->nodesetval)) {
194 //Found it, so let's replace the content
195 int numNodes = xmlXPathNodeSetGetLength(result->nodesetval);
196 if (!poverride.multiple && numNodes > 1) {
197 B2ERROR("Cannot override '" << poverride.path << "': more than one node found");
198 return;
199 } else {
200 B2DEBUG(200, "Found " << numNodes << " nodes, overriding them all");
201 }
202 for (int i = numNodes - 1; i >= 0; --i) {
203 xmlNodePtr node = result->nodesetval->nodeTab[i];
204 //Check if children are only TEXT nodes
205 bool textOnly(true);
206 for (xmlNodePtr child = node->children; child; child = child->next) {
207 textOnly &= child->type == XML_TEXT_NODE;
208 if (!textOnly) break;
209 }
210 if (!textOnly) {
211 B2ERROR("Cannot override '" << poverride.path << "': not just text content");
212 continue;
213 }
214 xmlNodeSetContent(node, BAD_CAST poverride.value.c_str());
215
216 //Is the path an element? if so replace the unit, otherwise warn
217 if (node->type != XML_ELEMENT_NODE) {
218 if (!poverride.unit.empty())
219 B2WARNING("Cannot set unit '" << poverride.unit << "' on '"
220 << poverride.path << "': not an element");
221 } else {
222 xmlSetProp(node, BAD_CAST "unit", BAD_CAST poverride.unit.c_str());
223 }
224 //From libxml example xpath2.c:
225 //All the elements returned by an XPath query are pointers to
226 //elements from the tree *except* namespace nodes where the XPath
227 //semantic is different from the implementation in libxml2 tree.
228 //As a result when a returned node set is freed when
229 //xmlXPathFreeObject() is called, that routine must check the
230 //element type. But node from the returned set may have been removed
231 //by xmlNodeSetContent() resulting in access to freed data.
232 //There is 2 ways around it:
233 // - make a copy of the pointers to the nodes from the result set
234 // then call xmlXPathFreeObject() and then modify the nodes
235 //or
236 // - remove the reference to the modified nodes from the node set
237 // as they are processed, if they are not namespace nodes.
238 if (node->type != XML_NAMESPACE_DECL)
239 result->nodesetval->nodeTab[i] = nullptr;
240 }
241 } else {
242 B2ERROR("Cannot override '" << poverride.path << "': not found");
243 }
244 xmlXPathFreeObject(result);
245 }
246
247 Gearbox::PathValue Gearbox::getPathValue(const std::string& path) const
248 {
249 PathValue value;
250 if (m_xpathContext == nullptr) B2FATAL("Gearbox is not connected");
251 //Get from cache if possible
252 if (m_parameterCache->retrieve(path, value)) {
253 return value;
254 }
255 //Nothing in cache, query xml
256 string query = ensureNode(path);
257 B2DEBUG(1000, "Gearbox XPath query: " << query);
258 xmlXPathObjectPtr result = xmlXPathEvalExpression((xmlChar*) query.c_str(), m_xpathContext);
259 if (result != nullptr && result->type == XPATH_NODESET && !xmlXPathNodeSetIsEmpty(result->nodesetval)) {
260 value.numNodes = xmlXPathNodeSetGetLength(result->nodesetval);
261 xmlNodePtr node = result->nodesetval->nodeTab[0];
262 //Example: <foo><bar/></foo>
263 // - bar has no children, so node->children is 0
264 // - foo has not text children, so node->children->content should be 0
265 // but xmlXPathOrderDocElems assigns them an index<0 to speed up XPath
266 // so we have to cast to a long integer and check if it is positive
267 if (node->children && (long)node->children->content > 0) {
268 xmlChar* valueString = xmlNodeListGetString(m_xmlDocument, node->children, 1);
269 value.value = (char*)valueString;
270 xmlFree(valueString);
271 }
272 //See if we have a unit attribute and add it
273 xmlAttrPtr attribute = node->properties;
274 while (attribute) {
275 B2DEBUG(1001, "Checking attribute " << attribute->name);
276 if (!strcmp((char*)attribute->name, "unit")) {
277 B2DEBUG(1001, "found Unit " << attribute->children->content);
278 value.unit = (char*)attribute->children->content;
279 break;
280 }
281 attribute = attribute->next;
282 }
283 //Remove leading and trailing whitespaces
284 boost::trim(value.value);
285 boost::trim(value.unit);
286 }
287 //Add to cache, empty or not: results won't change
288 m_parameterCache->insert(path, value);
289 B2DEBUG(1000, "Gearbox XPath result: " << value.numNodes << ", " << value.value << ", " << value.unit);
290
291 xmlXPathFreeObject(result);
292 return value;
293 }
294
295 const TObject* Gearbox::getTObject(const std::string& path) const noexcept(false)
296 {
297 //do we already have an object for this path?
298 auto it = m_ownedObjects.find(path);
299 if (it != m_ownedObjects.end())
300 return it->second;
301
302 const string& value = getString(path);
303 //assume base64-encoded raw data.
304 TObject* object = Stream::deserializeEncodedRawData(value);
305 if (!object)
306 throw gearbox::TObjectConversionError() << path;
307
308 m_ownedObjects[path] = object;
309
310 return object;
311 }
312
313
314 GearDir Gearbox::getDetectorComponent(const string& component)
315 {
316 return GearDir("/Detector/DetectorComponent[@name='" + component + "']/Content");
317 }
319}
GearDir is the basic class used for accessing the parameter store.
Definition: GearDir.h:31
Singleton class responsible for loading detector parameters from an XML file.
Definition: Gearbox.h:34
std::map< std::string, TObject * > m_ownedObjects
Map of queried objects (path -> TObject*).
Definition: Gearbox.h:215
std::map< std::string, gearbox::InputHandler::Factory * > m_registeredHandlers
Map of registered InputHandlers.
Definition: Gearbox.h:220
xmlDocPtr m_xmlDocument
Pointer to the libxml Document structure.
Definition: Gearbox.h:209
friend void * gearbox::openXmlUri(const char *)
friend to internal c-like function to interface libxml2 callback
MRUCache< std::string, PathValue > * m_parameterCache
Cache for already queried paths.
Definition: Gearbox.h:213
std::vector< PathOverride > m_overrides
the existing overrides
Definition: Gearbox.h:223
std::vector< gearbox::InputHandler * > m_handlers
List of input handlers which will be used to find resources.
Definition: Gearbox.h:218
xmlXPathContextPtr m_xpathContext
Pointer to the libxml XPath context.
Definition: Gearbox.h:211
Class implementing a generic Most Recently Used cache.
Definition: MRUCache.h:48
Class representing a resource context for gearbox.
Definition: InputHandler.h:25
Class to provide an InputContext for a given XML resource name.
Definition: InputHandler.h:47
std::string ensureNode(const std::string &path) const
make sure the path really corresponds to an XPath node expression by removing trailing slashes
Definition: Interface.cc:156
static Gearbox & getInstance()
Return reference to the Gearbox instance.
Definition: Gearbox.cc:81
virtual const TObject * getTObject(const std::string &path) const noexcept(false) override
Get the parameter path as a TObject.
Definition: Gearbox.cc:295
~Gearbox()
Free structures on destruction.
Definition: Gearbox.cc:74
Gearbox()
Singleton: private constructor.
Definition: Gearbox.cc:67
void overridePathValue(const PathOverride &poverride)
Change the value of a given path expression.
Definition: Gearbox.cc:184
PathValue getPathValue(const std::string &path) const
Return the (cached) value of a given path.
Definition: Gearbox.cc:247
void clearBackends()
Clear list of backends.
Definition: Gearbox.cc:127
void close()
Free internal structures of previously parsed tree and clear cache.
Definition: Gearbox.cc:169
void setBackends(const std::vector< std::string > &backends)
Select the backends to use to find resources.
Definition: Gearbox.cc:99
gearbox::InputContext * openXmlUri(const std::string &uri) const
Function to be called when libxml requests a new input uri to be opened.
Definition: Gearbox.cc:87
void open(const std::string &name="Belle2.xml", size_t cacheSize=c_DefaultCacheSize)
Open connection to backend and parse tree.
Definition: Gearbox.cc:133
GearDir getDetectorComponent(const std::string &component)
Return GearDir representing a given DetectorComponent.
Definition: Gearbox.cc:314
TObject * deserializeEncodedRawData(const std::string &base64Data)
Convert given serialized raw data back into TObject.
Definition: Stream.cc:72
Abstract base class for different kinds of events.
Definition: ClusterUtils.h:24
STL namespace.
Struct to override a path in the XML file with a custom value.
Definition: Gearbox.h:51
std::string unit
new Unit
Definition: Gearbox.h:57
bool multiple
if true, override all nodes when more than one node matches the XPath expression, bail otherwise
Definition: Gearbox.h:60
std::string path
XPath expression of the path to override.
Definition: Gearbox.h:53
std::string value
New value.
Definition: Gearbox.h:55
Struct for caching results from the xml file.
Definition: Gearbox.h:40