xmlwrapp
Lightweight C++ XML parsing library
document.h
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2001-2003 Peter J Jones (pjones@pmade.org)
3  * Copyright (C) 2013 Vaclav Slavik <vslavik@gmail.com>
4  * All Rights Reserved
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  * notice, this list of conditions and the following disclaimer in
14  * the documentation and/or other materials provided with the
15  * distribution.
16  * 3. Neither the name of the Author nor the names of its contributors
17  * may be used to endorse or promote products derived from this software
18  * without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
21  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
23  * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR
24  * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
27  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
28  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
30  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  */
33 
34 /**
35  @file
36 
37  This file contains the definition of the xml::document class.
38  */
39 
40 #ifndef _xmlwrapp_document_h_
41 #define _xmlwrapp_document_h_
42 
43 // xmlwrapp includes
44 #include "xmlwrapp/init.h"
45 #include "xmlwrapp/node.h"
46 #include "xmlwrapp/export.h"
47 #include "xmlwrapp/errors.h"
48 
49 // standard includes
50 #include <iosfwd>
51 #include <string>
52 #include <cstddef>
53 
54 // forward declaration
55 namespace xslt
56 {
57 
58 class stylesheet;
59 namespace impl
60 {
61 class result;
62 }
63 
64 } // end xslt namespace
65 
66 namespace xml
67 {
68 
69 // forward declarations
70 class relaxng;
71 class schema;
72 class tree_parser;
73 
74 namespace impl
75 {
76 struct doc_impl;
77 struct xpath_context_impl;
78 }
79 
80 /**
81  The xml::document class is used to hold the XML tree and various bits of
82  information about it.
83  */
84 class XMLWRAPP_API document
85 {
86 public:
87  /// size type
88  typedef std::size_t size_type;
89 
90  /**
91  Create a new XML document with the default settings. The new document
92  will contain a root node with a name of "blank".
93  */
94  document();
95 
96  /**
97  Create a new XML document and set the name of the root element to the
98  given text.
99 
100  @param root_name What to set the name of the root element to.
101 
102  @deprecated Use `xml::document(xml::node(root_name))` constructor instead.
103  */
104  XMLWRAPP_DEPRECATED("use xml::document(xml::node(root_name)) instead")
105  explicit document(const char *root_name);
106 
107  /**
108  Create a new XML document and set the root node.
109 
110  @param n The node to use as the root node. n will be copied.
111  */
112  explicit document(const node& n);
113 
114  /**
115  Load XML document from given file.
116 
117  Errors are handled by @a on_error handler; if you pass
118  xml::throw_on_error, xml::exception is thrown on errors. If there's a
119  fatal error that prevents the document from being loaded and the error
120  handler doesn't throw an exception, the constructor will throw
121  xml::exception anyway.
122 
123  @param filename The name of the file to parse.
124  @param on_error Handler called to process errors and warnings.
125 
126  @since 0.7.0
127  */
128  explicit document(const char *filename, error_handler& on_error);
129 
130  /**
131  Load XML document from given data.
132 
133  Errors are handled by @a on_error handler; by default, xml::exception
134  is thrown on errors. If there's a fatal error that prevents the document
135  from being loaded and the error handler doesn't throw an exception, the
136  constructor will throw xml::exception anyway.
137 
138  @param data The XML data to parse.
139  @param size The size of the XML data to parse.
140  @param on_error Handler called to process errors and warnings.
141 
142  @since 0.7.0
143  */
144  explicit document(const char *data, size_type size, error_handler& on_error = throw_on_error);
145 
146  /**
147  Copy construct a new XML document. The new document will be an exact
148  copy of the original.
149 
150  @param other The other document object to copy from.
151  */
152  document(const document& other);
153 
154  /**
155  Copy another document object into this one using the assignment
156  operator. This document object will be an exact copy of the other
157  document after the assignment.
158 
159  @param other The document to copy from.
160  @return *this.
161  */
162  document& operator=(const document& other);
163 
164  /**
165  Swap one xml::document object for another.
166 
167  @param other The other document to swap
168  */
169  void swap(document& other);
170 
171  /**
172  Clean up after an XML document object.
173  */
174  ~document();
175 
176  /**
177  Get a reference to the root node of this document. If no root node
178  has been set, the returned node will be a blank node. You should take
179  caution to use a reference so that you don't copy the whole node
180  tree!
181 
182  @return A const reference to the root node.
183  */
184  const node& get_root_node() const;
185 
186  /**
187  Get a reference to the root node of this document. If no root node
188  has been set, the returned node will be a blank node. You should take
189  caution to use a reference so that you don't copy the whole node
190  tree!
191 
192  @return A reference to the root node.
193  */
194  node& get_root_node();
195 
196  /**
197  Set the root node to the given node. A full copy is made and stored
198  in the document object.
199 
200  @param n The new root node to use.
201  */
202  void set_root_node(const node& n);
203 
204  /**
205  Get the XML version for this document. For generated documents, the
206  version will be the default. For parsed documents, this will be the
207  version from the XML processing instruction.
208 
209  @return The XML version string for this document.
210  */
211  const std::string& get_version() const;
212 
213  /**
214  Set the XML version number for this document. This version string
215  will be used when generating the XML output.
216 
217  @param version The version string to use, like "1.0".
218  */
219  void set_version(const char *version);
220 
221  /**
222  Get the XML encoding for this document. The default encoding is
223  UTF-8.
224 
225  @return The encoding string.
226  */
227  std::string get_encoding() const;
228 
229  /**
230  Set the XML encoding string. If you don't set this, it will default
231  to UTF-8.
232 
233  Note that all strings in the XML document must be encoded in the
234  document encoding.
235 
236  @param encoding The XML encoding to use.
237  */
238  void set_encoding(const char *encoding);
239 
240  /**
241  Find out if the current document is a standalone document. For
242  generated documents, this will be the default. For parsed documents
243  this will be set based on the XML processing instruction.
244 
245  @return True if this document is standalone.
246  @return False if this document is not standalone.
247  */
248  bool get_is_standalone() const;
249 
250  /**
251  Set the standalone flag. This will show up in the XML output in the
252  correct processing instruction.
253 
254  @param sa What to set the standalone flag to.
255  */
256  void set_is_standalone(bool sa);
257 
258  /**
259  Walk through the document and expand <xi:include> elements. For more
260  information, please see the w3c recommendation for XInclude.
261  http://www.w3.org/2001/XInclude.
262 
263  The return value of this function may change to int after a bug has
264  been fixed in libxml2 (xmlXIncludeDoProcess).
265 
266  @return False if there was an error with substitutions.
267  @return True if there were no errors (with or without substitutions).
268  */
269  bool process_xinclude();
270 
271  /**
272  Test to see if this document has an internal subset. That is, DTD
273  data that is declared within the XML document itself.
274 
275  @return True if this document has an internal subset.
276  @return False otherwise.
277  */
278  bool has_internal_subset() const;
279 
280  /**
281  Test to see if this document has an external subset. That is, it
282  references a DTD from an external source, such as a file or URL.
283 
284  @return True if this document has an external subset.
285  @return False otherwise.
286  */
287  bool has_external_subset() const;
288 
289  /**
290  Validate this document against the DTD that has been attached to it.
291  This would happen at parse time if there was a !DOCTYPE definition.
292  If the DTD is valid, and the document is valid, this member function
293  will return true.
294 
295  If it returns false, you may want to send the document through
296  xmllint to get the actual error messages.
297 
298  @return True if the document is valid.
299  @return False if there was a problem with the DTD or XML doc.
300  */
301  bool validate();
302 
303  /**
304  Parse the given DTD and try to validate this document against it. If
305  the DTD is valid, and the document is valid, this member function
306  will return true.
307 
308  If it returns false, you may want to send the document through
309  xmllint to get the actual error messages.
310 
311  This member function will add the parsed DTD to this document as the
312  external subset after the validation. If there is already an external
313  DTD attached to this document it will be removed and deleted.
314 
315  @param dtdname A filename or URL for the DTD to use.
316  @return True if the document is valid.
317  @return False if there was a problem with the DTD or XML doc.
318  */
319  bool validate(const char *dtdname);
320 
321  /**
322  Returns the number of child nodes of this document. This will always
323  be at least one, since all xmlwrapp documents must have a root node.
324  This member function is useful to find out how many document children
325  there are, including processing instructions, comments, etc.
326 
327  @return The number of children nodes that this document has.
328  */
329  size_type size() const;
330 
331  /**
332  Get an iterator to the first child node of this document. If what you
333  really wanted was the root node (the first element) you should use
334  the get_root_node() member function instead.
335 
336  @return A xml::node::iterator that points to the first child node.
337  @return An end iterator if there are no children in this document
338  */
339  node::iterator begin();
340 
341  /**
342  Get a const_iterator to the first child node of this document. If
343  what you really wanted was the root node (the first element) you
344  should use the get_root_node() member function instead.
345 
346  @return A xml::node::const_iterator that points to the first child node.
347  @return An end const_iterator if there are no children in this document.
348  */
349  node::const_iterator begin() const;
350 
351  /**
352  Get an iterator that points one past the last child node for this
353  document.
354 
355  @return An end xml::node::iterator.
356  */
357  node::iterator end();
358 
359  /**
360  Get a const_iterator that points one past the last child node for
361  this document.
362 
363  @return An end xml::node::const_iterator.
364  */
365  node::const_iterator end() const;
366 
367  /**
368  Add a child xml::node to this document. You should not add a element
369  type node, since there can only be one root node. This member
370  function is only useful for adding processing instructions, comments,
371  etc.. If you do try to add a node of type element, an exception will
372  be thrown.
373 
374  @param child The child xml::node to add.
375  */
376  void push_back (const node &child);
377 
378  /**
379  Insert a new child node. The new node will be inserted at the end of
380  the child list. This is similar to the xml::node::push_back member
381  function except that an iterator to the inserted node is returned.
382 
383  The rules from the push_back member function apply here. Don't add a
384  node of type element.
385 
386  @param n The node to insert as a child of this document.
387  @return An iterator that points to the newly inserted node.
388  @see xml::document::push_back
389  */
390  node::iterator insert (const node &n);
391 
392  /**
393  Insert a new child node. The new node will be inserted before the
394  node pointed to by the given iterator.
395 
396  The rules from the push_back member function apply here. Don't add a
397  node of type element.
398 
399  @param position An iterator that points to the location where the new node should be inserted (before it).
400  @param n The node to insert as a child of this document.
401  @return An iterator that points to the newly inserted node.
402  @see xml::document::push_back
403  */
404  node::iterator insert(node::iterator position, const node &n);
405 
406  /**
407  Replace the node pointed to by the given iterator with another node.
408  The old node will be removed, including all its children, and
409  replaced with the new node. This will invalidate any iterators that
410  point to the node to be replaced, or any pointers or references to
411  that node.
412 
413  Do not replace this root node with this member function. The same
414  rules that apply to push_back apply here. If you try to replace a
415  node of type element, an exception will be thrown.
416 
417  @param old_node An iterator that points to the node that should be removed.
418  @param new_node The node to put in old_node's place.
419  @return An iterator that points to the new node.
420  @see xml::document::push_back
421  */
422  node::iterator replace(node::iterator old_node, const node& new_node);
423 
424  /**
425  Erase the node that is pointed to by the given iterator. The node
426  and all its children will be removed from this node. This will
427  invalidate any iterators that point to the node to be erased, or any
428  pointers or references to that node.
429 
430  Do not remove the root node using this member function. The same
431  rules that apply to push_back apply here. If you try to erase the
432  root node, an exception will be thrown.
433 
434  @param to_erase An iterator that points to the node to be erased.
435  @return An iterator that points to the node after the one being erased.
436  @see xml::document::push_back
437  */
438  node::iterator erase(node::iterator to_erase);
439 
440  /**
441  Erase all nodes in the given range, from first to last. This will
442  invalidate any iterators that point to the nodes to be erased, or any
443  pointers or references to those nodes.
444 
445  Do not remove the root node using this member function. The same
446  rules that apply to push_back apply here. If you try to erase the
447  root node, an exception will be thrown.
448 
449  @param first The first node in the range to be removed.
450  @param last An iterator that points one past the last node to erase. Think xml::node::end().
451  @return An iterator that points to the node after the last one being erased.
452  @see xml::document::push_back
453  */
454  node::iterator erase(node::iterator first, node::iterator last);
455 
456  /**
457  Convert the XML document tree into XML text data and place it into
458  the given string.
459 
460  Any errors occurring while converting the document to string are passed
461  to @a on_error handler. By default, an exception will be thrown if
462  anything goes wrong.
463 
464  @param s The string to place the XML text data.
465  @param on_error Handler called to process errors and warnings (new
466  since 0.8.0).
467  */
468  void save_to_string(std::string& s, error_handler& on_error = throw_on_error) const;
469 
470  /**
471  Convert the XML document tree into XML text data and place it into
472  the given filename.
473 
474  This function throws an exception if saving fails for any reason by
475  default and allows to customize this behaviour by passing a non-default
476  @a on_error handler.
477 
478  @param filename The name of the file to place the XML text data into.
479  @param on_error Handler called to process errors and warnings (new
480  since 0.8.0).
481  @param compression_level 0 is no compression, 1-9 allowed, where 1 is
482  for better speed, and 9 is for smaller size
483  @return True if the data was saved successfully.
484  @return False otherwise (notice that this is only possible if a custom
485  error handler not throwing on error is specified).
486  */
487  bool save_to_file(const char *filename,
488  int compression_level = 0,
489  error_handler& on_error = throw_on_error) const;
490 
491  /**
492  Convert the XML document tree into XML text data and then insert it
493  into the given stream.
494 
495  @param stream The stream to insert the XML into.
496  @param doc The document to insert.
497  @return The stream from the first parameter.
498  */
499  friend XMLWRAPP_API std::ostream& operator<< (std::ostream &stream, const document &doc);
500 
501 private:
502  impl::doc_impl *pimpl_;
503 
504  void set_doc_data (void *data);
505  void set_doc_data_from_xslt (void *data, xslt::impl::result *xr);
506  void* get_doc_data();
507  void* get_doc_data_read_only() const;
508  void* release_doc_data();
509 
510  friend class tree_parser;
511  friend class relaxng;
512  friend class schema;
513  friend class xslt::stylesheet;
514  friend struct impl::xpath_context_impl;
515 };
516 
517 } // namespace xml
518 
519 #endif // _xmlwrapp_document_h_
The xml::tree_parser class is used to parse an XML document and generate a tree like structure of xml...
Definition: tree_parser.h:73
This file contains errors-handling classes: xml::exception and xml::error_handler and derived classes...
The xml::error_handler class is used to handle libxml2 errors and warnings emitted during parsing...
Definition: errors.h:84
This file contains the definition of the xml::init class.
This file contains the definition of the xml::node class.
XSLT library namespace.
Definition: document.h:55
error_handler_throw_on_error throw_on_error
Error handler object that throws on any error.
STL namespace.
The xslt::stylesheet class is used to hold information about an XSLT stylesheet.
Definition: stylesheet.h:61
XML validator using RelaxNG.
Definition: relaxng.h:67
The xml::document class is used to hold the XML tree and various bits of information about it...
Definition: document.h:84
XML library namespace.
Definition: attributes.h:51
std::size_t size_type
size type
Definition: document.h:88
The xml::node class is used to hold information about one XML node.
Definition: node.h:88
XML Schema.
Definition: schema.h:66