Belle II Software  release-08-01-10
Gearbox.cc
1 /**************************************************************************
2  * basf2 (Belle II Analysis Software Framework) *
3  * Author: The Belle II Collaboration *
4  * *
5  * See git log for contributors and copyright holders. *
6  * This file is licensed under LGPL-3.0, see LICENSE.md. *
7  **************************************************************************/
8 #include <framework/gearbox/Gearbox.h>
9 #include <framework/gearbox/GearDir.h>
10 #include <framework/core/MRUCache.h>
11 #include <framework/logging/Logger.h>
12 #include <framework/utilities/Stream.h>
13 
14 #include <libxml/parser.h>
15 #include <libxml/xinclude.h>
16 #include <libxml/xmlIO.h>
17 #include <cstring>
18 #include <boost/algorithm/string.hpp>
19 
20 #include <TObject.h>
21 
22 using namespace std;
23 
24 namespace Belle2 {
30  namespace gearbox {
32  static int matchXmlUri(const char* uri)
33  {
34  //Ignore uris including file protocol. They are only used if loading of a
35  //resource failed, then libxml will try to look in /etc/xml/catalog if it
36  //can find the file there.
37  if (boost::starts_with(uri, "file:/")) return 0;
38  return 1;
39  }
40 
42  void* openXmlUri(const char* uri)
43  {
44  B2DEBUG(200, "Request to open " << uri);
45  InputContext* context = Gearbox::getInstance().openXmlUri(uri);
46  return (void*) context;
47  }
48 
50  static int readXmlData(void* context, char* buffer, int buffsize)
51  {
52  //B2DEBUG(200,"Calling read to get " << buffsize << " bytes");
53  auto* gearContext = static_cast<InputContext*>(context);
54  return gearContext->readXmlData(buffer, buffsize);
55  }
56 
58  static int closeXmlContext(void* context)
59  {
60  B2DEBUG(200, "Closing context");
61  auto* gearContext = static_cast<InputContext*>(context);
62  delete gearContext;
63  return 0;
64  }
65  }
66 
67  Gearbox::Gearbox(): m_xmlDocument(nullptr), m_xpathContext(nullptr),
68  m_parameterCache(new MRUCache<std::string, PathValue>(c_DefaultCacheSize))
69  {
70  xmlInitParser();
71  LIBXML_TEST_VERSION;
72  }
73 
75  {
76  close();
77  clearBackends();
78  delete m_parameterCache;
79  }
80 
82  {
83  static Gearbox instance;
84  return instance;
85  }
86 
87  gearbox::InputContext* Gearbox::openXmlUri(const string& uri) const
88  {
89  //Check input handlers one by one
90  for (gearbox::InputHandler* handler : m_handlers) {
91  //try to create context for uri, return if success
92  gearbox::InputContext* context = handler->open(uri);
93  if (context) return context;
94  }
95  B2ERROR("Could not find data for uri '" << uri << "'");
96  return nullptr;
97  }
98 
99  void Gearbox::setBackends(const vector<string>& backends)
100  {
101  clearBackends();
102  for (const string& backend : backends) {
103  B2DEBUG(300, "Adding InputHandler for '" << backend << "'");
104  //Find correct InputHandler, assuming file backend by default if there is no colon in the
105  //uri
106  string prefix("file");
107  string accessinfo(backend);
108  size_t colon = backend.find(':');
109  if (colon != string::npos) {
110  prefix = backend.substr(0, colon);
111  accessinfo = backend.substr(colon + 1);
112  }
113  auto it = m_registeredHandlers.find(prefix);
114  if (it == m_registeredHandlers.end()) {
115  B2ERROR("Could not find input handler to handle '" << backend << "', ignoring");
116  continue;
117  }
118  gearbox::InputHandler* handler = it->second(accessinfo);
119  if (handler) {
120  m_handlers.push_back(handler);
121  } else {
122  B2ERROR("Problem creating input handler to handle '" << backend << "', ignoring");
123  }
124  }
125  }
126 
128  {
129  for (gearbox::InputHandler* handler : m_handlers) delete handler;
130  m_handlers.clear();
131  }
132 
133  void Gearbox::open(const std::string& name, size_t cacheSize)
134  {
135  //Check if we have an open connection and close first if so
136  if (m_xmlDocument) close();
137  //Check if we have at least one backend
138  if (m_handlers.empty())
139  B2FATAL("No backends defined, please use Gearbox::setBackends() first to specify how to access XML files.");
140 
141  // register input callbacks for opening the files
142  xmlRegisterInputCallbacks(gearbox::matchXmlUri, gearbox::openXmlUri,
143  gearbox::readXmlData, gearbox::closeXmlContext);
144 
145  //Open document
146  m_xmlDocument = xmlParseFile(name.c_str());
147  //libxml >= 2.7.0 introduced some limits on node size etc. which breaks reading VXDTF files
148  xmlXIncludeProcessFlags(m_xmlDocument, XML_PARSE_HUGE);
149 
150  // reset input callbacks
151  xmlPopInputCallbacks();
152 
153  if (!m_xmlDocument) B2FATAL("Could not connect gearbox to " << name);
154  m_xpathContext = xmlXPathNewContext(m_xmlDocument);
155  if (!m_xpathContext) B2FATAL("Could not create XPath context");
156 
157  //Apply overrides
158  for (const auto& poverride : m_overrides) {
159  overridePathValue(poverride);
160  }
161 
162  //Speeds up XPath computation on static documents.
163  xmlXPathOrderDocElems(m_xmlDocument);
164 
165  //Set cachesize
166  m_parameterCache->setMaxSize(cacheSize);
167  }
168 
170  {
171  if (m_xpathContext) xmlXPathFreeContext(m_xpathContext);
172  if (m_xmlDocument) xmlFreeDoc(m_xmlDocument);
173  m_xpathContext = nullptr;
174  m_xmlDocument = nullptr;
175 
176  for (auto& entry : m_ownedObjects) {
177  delete entry.second;
178  }
179  m_ownedObjects.clear();
180 
181  m_parameterCache->clear();
182  }
183 
185  {
186  if (m_xpathContext == nullptr) B2FATAL("Gearbox is not connected");
187  //Make sure it ends with a slash
188  string query = ensureNode(poverride.path);
189  //Ok, lets search for the path
190  B2INFO("Override '" << poverride.path << "' with '" << poverride.value
191  << "' (unit: '" << poverride.unit << "')");
192  xmlXPathObjectPtr result = xmlXPathEvalExpression((xmlChar*) query.c_str(), m_xpathContext);
193  if (result != nullptr && result->type == XPATH_NODESET && !xmlXPathNodeSetIsEmpty(result->nodesetval)) {
194  //Found it, so let's replace the content
195  int numNodes = xmlXPathNodeSetGetLength(result->nodesetval);
196  if (!poverride.multiple && numNodes > 1) {
197  B2ERROR("Cannot override '" << poverride.path << "': more than one node found");
198  return;
199  } else {
200  B2DEBUG(200, "Found " << numNodes << " nodes, overriding them all");
201  }
202  for (int i = numNodes - 1; i >= 0; --i) {
203  xmlNodePtr node = result->nodesetval->nodeTab[i];
204  //Check if children are only TEXT nodes
205  bool textOnly(true);
206  for (xmlNodePtr child = node->children; child; child = child->next) {
207  textOnly &= child->type == XML_TEXT_NODE;
208  if (!textOnly) break;
209  }
210  if (!textOnly) {
211  B2ERROR("Cannot override '" << poverride.path << "': not just text content");
212  continue;
213  }
214  xmlNodeSetContent(node, BAD_CAST poverride.value.c_str());
215 
216  //Is the path an element? if so replace the unit, otherwise warn
217  if (node->type != XML_ELEMENT_NODE) {
218  if (!poverride.unit.empty())
219  B2WARNING("Cannot set unit '" << poverride.unit << "' on '"
220  << poverride.path << "': not an element");
221  } else {
222  xmlSetProp(node, BAD_CAST "unit", BAD_CAST poverride.unit.c_str());
223  }
224  //From libxml example xpath2.c:
225  //All the elements returned by an XPath query are pointers to
226  //elements from the tree *except* namespace nodes where the XPath
227  //semantic is different from the implementation in libxml2 tree.
228  //As a result when a returned node set is freed when
229  //xmlXPathFreeObject() is called, that routine must check the
230  //element type. But node from the returned set may have been removed
231  //by xmlNodeSetContent() resulting in access to freed data.
232  //There is 2 ways around it:
233  // - make a copy of the pointers to the nodes from the result set
234  // then call xmlXPathFreeObject() and then modify the nodes
235  //or
236  // - remove the reference to the modified nodes from the node set
237  // as they are processed, if they are not namespace nodes.
238  if (node->type != XML_NAMESPACE_DECL)
239  result->nodesetval->nodeTab[i] = nullptr;
240  }
241  } else {
242  B2ERROR("Cannot override '" << poverride.path << "': not found");
243  }
244  xmlXPathFreeObject(result);
245  }
246 
247  Gearbox::PathValue Gearbox::getPathValue(const std::string& path) const
248  {
249  PathValue value;
250  if (m_xpathContext == nullptr) B2FATAL("Gearbox is not connected");
251  //Get from cache if possible
252  if (m_parameterCache->retrieve(path, value)) {
253  return value;
254  }
255  //Nothing in cache, query xml
256  string query = ensureNode(path);
257  B2DEBUG(1000, "Gearbox XPath query: " << query);
258  xmlXPathObjectPtr result = xmlXPathEvalExpression((xmlChar*) query.c_str(), m_xpathContext);
259  if (result != nullptr && result->type == XPATH_NODESET && !xmlXPathNodeSetIsEmpty(result->nodesetval)) {
260  value.numNodes = xmlXPathNodeSetGetLength(result->nodesetval);
261  xmlNodePtr node = result->nodesetval->nodeTab[0];
262  //Example: <foo><bar/></foo>
263  // - bar has no children, so node->children is 0
264  // - foo has not text children, so node->children->content should be 0
265  // but xmlXPathOrderDocElems assigns them an index<0 to speed up XPath
266  // so we have to cast to a long integer and check if it is positive
267  if (node->children && (long)node->children->content > 0) {
268  xmlChar* valueString = xmlNodeListGetString(m_xmlDocument, node->children, 1);
269  value.value = (char*)valueString;
270  xmlFree(valueString);
271  }
272  //See if we have a unit attribute and add it
273  xmlAttrPtr attribute = node->properties;
274  while (attribute) {
275  B2DEBUG(1001, "Checking attribute " << attribute->name);
276  if (!strcmp((char*)attribute->name, "unit")) {
277  B2DEBUG(1001, "found Unit " << attribute->children->content);
278  value.unit = (char*)attribute->children->content;
279  break;
280  }
281  attribute = attribute->next;
282  }
283  //Remove leading and trailing whitespaces
284  boost::trim(value.value);
285  boost::trim(value.unit);
286  }
287  //Add to cache, empty or not: results won't change
288  m_parameterCache->insert(path, value);
289  B2DEBUG(1000, "Gearbox XPath result: " << value.numNodes << ", " << value.value << ", " << value.unit);
290 
291  xmlXPathFreeObject(result);
292  return value;
293  }
294 
295  const TObject* Gearbox::getTObject(const std::string& path) const noexcept(false)
296  {
297  //do we already have an object for this path?
298  auto it = m_ownedObjects.find(path);
299  if (it != m_ownedObjects.end())
300  return it->second;
301 
302  const string& value = getString(path);
303  //assume base64-encoded raw data.
304  TObject* object = Stream::deserializeEncodedRawData(value);
305  if (!object)
306  throw gearbox::TObjectConversionError() << path;
307 
308  m_ownedObjects[path] = object;
309 
310  return object;
311  }
312 
313 
314  GearDir Gearbox::getDetectorComponent(const string& component)
315  {
316  return GearDir("/Detector/DetectorComponent[@name='" + component + "']/Content");
317  }
319 }
GearDir is the basic class used for accessing the parameter store.
Definition: GearDir.h:31
Singleton class responsible for loading detector parameters from an XML file.
Definition: Gearbox.h:34
std::map< std::string, TObject * > m_ownedObjects
Map of queried objects (path -> TObject*).
Definition: Gearbox.h:215
std::map< std::string, gearbox::InputHandler::Factory * > m_registeredHandlers
Map of registered InputHandlers.
Definition: Gearbox.h:220
xmlDocPtr m_xmlDocument
Pointer to the libxml Document structure.
Definition: Gearbox.h:209
MRUCache< std::string, PathValue > * m_parameterCache
Cache for already queried paths.
Definition: Gearbox.h:213
friend void * gearbox::openXmlUri(const char *)
friend to internal c-like function to interface libxml2 callback
std::vector< PathOverride > m_overrides
the existing overrides
Definition: Gearbox.h:223
std::vector< gearbox::InputHandler * > m_handlers
List of input handlers which will be used to find resources.
Definition: Gearbox.h:218
xmlXPathContextPtr m_xpathContext
Pointer to the libxml XPath context.
Definition: Gearbox.h:211
Class implementing a generic Most Recently Used cache.
Definition: MRUCache.h:48
Class representing a resource context for gearbox.
Definition: InputHandler.h:25
Class to provide an InputContext for a given XML resource name.
Definition: InputHandler.h:47
std::string ensureNode(const std::string &path) const
make sure the path really corresponds to an XPath node expression by removing trailing slashes
Definition: Interface.cc:156
static Gearbox & getInstance()
Return reference to the Gearbox instance.
Definition: Gearbox.cc:81
virtual const TObject * getTObject(const std::string &path) const noexcept(false) override
Get the parameter path as a TObject.
Definition: Gearbox.cc:295
~Gearbox()
Free structures on destruction.
Definition: Gearbox.cc:74
void overridePathValue(const PathOverride &poverride)
Change the value of a given path expression.
Definition: Gearbox.cc:184
PathValue getPathValue(const std::string &path) const
Return the (cached) value of a given path.
Definition: Gearbox.cc:247
void clearBackends()
Clear list of backends.
Definition: Gearbox.cc:127
void close()
Free internal structures of previously parsed tree and clear cache.
Definition: Gearbox.cc:169
void setBackends(const std::vector< std::string > &backends)
Select the backends to use to find resources.
Definition: Gearbox.cc:99
gearbox::InputContext * openXmlUri(const std::string &uri) const
Function to be called when libxml requests a new input uri to be opened.
Definition: Gearbox.cc:87
void open(const std::string &name="Belle2.xml", size_t cacheSize=c_DefaultCacheSize)
Open connection to backend and parse tree.
Definition: Gearbox.cc:133
GearDir getDetectorComponent(const std::string &component)
Return GearDir representing a given DetectorComponent.
Definition: Gearbox.cc:314
TObject * deserializeEncodedRawData(const std::string &base64Data)
Convert given serialized raw data back into TObject.
Definition: Stream.cc:72
Abstract base class for different kinds of events.
Struct to override a path in the XML file with a custom value.
Definition: Gearbox.h:51
std::string unit
new Unit
Definition: Gearbox.h:57
bool multiple
if true, override all nodes when more than one node matches the XPath expression, bail otherwise
Definition: Gearbox.h:60
std::string path
XPath expression of the path to override.
Definition: Gearbox.h:53
std::string value
New value.
Definition: Gearbox.h:55
Struct for caching results from the xml file.
Definition: Gearbox.h:40