xmlwrapp
Lightweight C++ XML parsing library
Loading...
Searching...
No Matches
document.h
Go to the documentation of this file.
1/*
2 * Copyright (C) 2001-2003 Peter J Jones (pjones@pmade.org)
3 * Copyright (C) 2013 Vaclav Slavik <vslavik@gmail.com>
4 * All Rights Reserved
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 * 3. Neither the name of the Author nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
21 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
23 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR
24 * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
27 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
28 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
30 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33
34/**
35 @file
36
37 This file contains the definition of the xml::document class.
38 */
39
40#ifndef _xmlwrapp_document_h_
41#define _xmlwrapp_document_h_
42
43// xmlwrapp includes
44#include "xmlwrapp/init.h"
45#include "xmlwrapp/node.h"
46#include "xmlwrapp/export.h"
47#include "xmlwrapp/errors.h"
48
49// standard includes
50#include <iosfwd>
51#include <memory>
52#include <string>
53#include <cstddef>
54
55XMLWRAPP_MSVC_SUPPRESS_DLL_MEMBER_WARN
56
57// forward declaration
58namespace xslt
59{
60
61class stylesheet;
62namespace impl
63{
64class result;
65}
66
67} // end xslt namespace
68
69namespace xml
70{
71
72// forward declarations
73class relaxng;
74class schema;
75class tree_parser;
76
77namespace impl
78{
79struct doc_impl;
80struct xpath_context_impl;
81}
82
83/**
84 The xml::document class is used to hold the XML tree and various bits of
85 information about it.
86 */
87class XMLWRAPP_API document
88{
89public:
90 /// size type
91 using size_type = std::size_t;
92
93 /**
94 Create a new XML document with the default settings. The new document
95 will contain a root node with a name of "blank".
96 */
98
99 /**
100 Create a new XML document and set the name of the root element to the
101 given text.
102
103 @param root_name What to set the name of the root element to.
104
105 @deprecated Use `xml::document(xml::node(root_name))` constructor instead.
106 */
107 XMLWRAPP_DEPRECATED("use xml::document(xml::node(root_name)) instead")
108 explicit document(const char *root_name);
109
110 /**
111 Create a new XML document and set the root node.
112
113 @param n The node to use as the root node. n will be copied.
114 */
115 explicit document(const node& n);
116
117 /**
118 Load XML document from given file.
119
120 Errors are handled by @a on_error handler; if you pass
121 xml::throw_on_error, xml::exception is thrown on errors. If there's a
122 fatal error that prevents the document from being loaded and the error
123 handler doesn't throw an exception, the constructor will throw
124 xml::exception anyway.
125
126 @param filename The name of the file to parse.
127 @param on_error Handler called to process errors and warnings.
128
129 @since 0.7.0
130 */
131 explicit document(const char *filename, error_handler& on_error);
132
133 /**
134 Load XML document from given data.
135
136 Errors are handled by @a on_error handler; by default, xml::exception
137 is thrown on errors. If there's a fatal error that prevents the document
138 from being loaded and the error handler doesn't throw an exception, the
139 constructor will throw xml::exception anyway.
140
141 @param data The XML data to parse.
142 @param len The length of the XML data to parse.
143 @param on_error Handler called to process errors and warnings.
144
145 @since 0.7.0
146 */
147 explicit document(const char *data, size_type len, error_handler& on_error = throw_on_error);
148
149 /**
150 Copy construct a new XML document. The new document will be an exact
151 copy of the original.
152
153 @param other The other document object to copy from.
154 */
155 document(const document& other);
156
157 /**
158 Copy another document object into this one using the assignment
159 operator. This document object will be an exact copy of the other
160 document after the assignment.
161
162 @param other The document to copy from.
163 @return *this.
164 */
165 document& operator=(const document& other);
166
167 /**
168 Swap one xml::document object for another.
169
170 @param other The other document to swap
171 */
172 void swap(document& other);
173
174 /**
175 Clean up after an XML document object.
176 */
178
179 /**
180 Get a reference to the root node of this document. If no root node
181 has been set, the returned node will be a blank node. You should take
182 caution to use a reference so that you don't copy the whole node
183 tree!
184
185 @return A const reference to the root node.
186 */
187 const node& get_root_node() const;
188
189 /**
190 Get a reference to the root node of this document. If no root node
191 has been set, the returned node will be a blank node. You should take
192 caution to use a reference so that you don't copy the whole node
193 tree!
194
195 @return A reference to the root node.
196 */
197 node& get_root_node();
198
199 /**
200 Set the root node to the given node. A full copy is made and stored
201 in the document object.
202
203 @param n The new root node to use.
204 */
205 void set_root_node(const node& n);
206
207 /**
208 Get the XML version for this document. For generated documents, the
209 version will be the default. For parsed documents, this will be the
210 version from the XML processing instruction.
211
212 @return The XML version string for this document.
213 */
214 const std::string& get_version() const;
215
216 /**
217 Set the XML version number for this document. This version string
218 will be used when generating the XML output.
219
220 @param version The version string to use, like "1.0".
221 */
222 void set_version(const char *version);
223
224 /**
225 Get the XML encoding for this document. The default encoding is
226 UTF-8.
227
228 @return The encoding string.
229 */
230 std::string get_encoding() const;
231
232 /**
233 Set the XML encoding string. If you don't set this, it will default
234 to UTF-8.
235
236 Note that all strings in the XML document must be encoded in the
237 document encoding.
238
239 @param encoding The XML encoding to use.
240 */
241 void set_encoding(const char *encoding);
242
243 /**
244 Find out if the current document is a standalone document. For
245 generated documents, this will be the default. For parsed documents
246 this will be set based on the XML processing instruction.
247
248 @return True if this document is standalone.
249 @return False if this document is not standalone.
250 */
251 bool get_is_standalone() const;
252
253 /**
254 Set the standalone flag. This will show up in the XML output in the
255 correct processing instruction.
256
257 @param sa What to set the standalone flag to.
258 */
259 void set_is_standalone(bool sa);
260
261 /**
262 Walk through the document and expand <xi:include> elements. For more
263 information, please see the w3c recommendation for XInclude.
264 http://www.w3.org/2001/XInclude.
265
266 The return value of this function may change to int after a bug has
267 been fixed in libxml2 (xmlXIncludeDoProcess).
268
269 @return False if there was an error with substitutions.
270 @return True if there were no errors (with or without substitutions).
271 */
272 bool process_xinclude();
273
274 /**
275 Test to see if this document has an internal subset. That is, DTD
276 data that is declared within the XML document itself.
277
278 @return True if this document has an internal subset.
279 @return False otherwise.
280 */
281 bool has_internal_subset() const;
282
283 /**
284 Test to see if this document has an external subset. That is, it
285 references a DTD from an external source, such as a file or URL.
286
287 @return True if this document has an external subset.
288 @return False otherwise.
289 */
290 bool has_external_subset() const;
291
292 /**
293 Validate this document against the DTD that has been attached to it.
294 This would happen at parse time if there was a !DOCTYPE definition.
295 If the DTD is valid, and the document is valid, this member function
296 will return true.
297
298 If it returns false, you may want to send the document through
299 xmllint to get the actual error messages.
300
301 @return True if the document is valid.
302 @return False if there was a problem with the DTD or XML doc.
303 */
304 bool validate();
305
306 /**
307 Parse the given DTD and try to validate this document against it. If
308 the DTD is valid, and the document is valid, this member function
309 will return true.
310
311 If it returns false, you may want to send the document through
312 xmllint to get the actual error messages.
313
314 This member function will add the parsed DTD to this document as the
315 external subset after the validation. If there is already an external
316 DTD attached to this document it will be removed and deleted.
317
318 @param dtdname A filename or URL for the DTD to use.
319 @return True if the document is valid.
320 @return False if there was a problem with the DTD or XML doc.
321 */
322 bool validate(const char *dtdname);
323
324 /**
325 Returns the number of child nodes of this document. This will always
326 be at least one, since all xmlwrapp documents must have a root node.
327 This member function is useful to find out how many document children
328 there are, including processing instructions, comments, etc.
329
330 @return The number of children nodes that this document has.
331 */
332 size_type size() const;
333
334 /**
335 Get an iterator to the first child node of this document. If what you
336 really wanted was the root node (the first element) you should use
337 the get_root_node() member function instead.
338
339 @return A xml::node::iterator that points to the first child node.
340 @return An end iterator if there are no children in this document
341 */
342 node::iterator begin();
343
344 /**
345 Get a const_iterator to the first child node of this document. If
346 what you really wanted was the root node (the first element) you
347 should use the get_root_node() member function instead.
348
349 @return A xml::node::const_iterator that points to the first child node.
350 @return An end const_iterator if there are no children in this document.
351 */
352 node::const_iterator begin() const;
353
354 /**
355 Get an iterator that points one past the last child node for this
356 document.
357
358 @return An end xml::node::iterator.
359 */
360 node::iterator end();
361
362 /**
363 Get a const_iterator that points one past the last child node for
364 this document.
365
366 @return An end xml::node::const_iterator.
367 */
368 node::const_iterator end() const;
369
370 /**
371 Add a child xml::node to this document. You should not add a element
372 type node, since there can only be one root node. This member
373 function is only useful for adding processing instructions, comments,
374 etc.. If you do try to add a node of type element, an exception will
375 be thrown.
376
377 @param child The child xml::node to add.
378 */
379 void push_back (const node &child);
380
381 /**
382 Insert a new child node. The new node will be inserted at the end of
383 the child list. This is similar to the xml::node::push_back member
384 function except that an iterator to the inserted node is returned.
385
386 The rules from the push_back member function apply here. Don't add a
387 node of type element.
388
389 @param n The node to insert as a child of this document.
390 @return An iterator that points to the newly inserted node.
391 @see xml::document::push_back
392 */
393 node::iterator insert (const node &n);
394
395 /**
396 Insert a new child node. The new node will be inserted before the
397 node pointed to by the given iterator.
398
399 The rules from the push_back member function apply here. Don't add a
400 node of type element.
401
402 @param position An iterator that points to the location where the new node should be inserted (before it).
403 @param n The node to insert as a child of this document.
404 @return An iterator that points to the newly inserted node.
405 @see xml::document::push_back
406 */
407 node::iterator insert(node::iterator position, const node &n);
408
409 /**
410 Replace the node pointed to by the given iterator with another node.
411 The old node will be removed, including all its children, and
412 replaced with the new node. This will invalidate any iterators that
413 point to the node to be replaced, or any pointers or references to
414 that node.
415
416 Do not replace this root node with this member function. The same
417 rules that apply to push_back apply here. If you try to replace a
418 node of type element, an exception will be thrown.
419
420 @param old_node An iterator that points to the node that should be removed.
421 @param new_node The node to put in old_node's place.
422 @return An iterator that points to the new node.
423 @see xml::document::push_back
424 */
425 node::iterator replace(node::iterator old_node, const node& new_node);
426
427 /**
428 Erase the node that is pointed to by the given iterator. The node
429 and all its children will be removed from this node. This will
430 invalidate any iterators that point to the node to be erased, or any
431 pointers or references to that node.
432
433 Do not remove the root node using this member function. The same
434 rules that apply to push_back apply here. If you try to erase the
435 root node, an exception will be thrown.
436
437 @param to_erase An iterator that points to the node to be erased.
438 @return An iterator that points to the node after the one being erased.
439 @see xml::document::push_back
440 */
441 node::iterator erase(node::iterator to_erase);
442
443 /**
444 Erase all nodes in the given range, from first to last. This will
445 invalidate any iterators that point to the nodes to be erased, or any
446 pointers or references to those nodes.
447
448 Do not remove the root node using this member function. The same
449 rules that apply to push_back apply here. If you try to erase the
450 root node, an exception will be thrown.
451
452 @param first The first node in the range to be removed.
453 @param last An iterator that points one past the last node to erase. Think xml::node::end().
454 @return An iterator that points to the node after the last one being erased.
455 @see xml::document::push_back
456 */
457 node::iterator erase(node::iterator first, node::iterator last);
458
459 /**
460 Convert the XML document tree into XML text data and place it into
461 the given string.
462
463 Any errors occurring while converting the document to string are passed
464 to @a on_error handler. By default, an exception will be thrown if
465 anything goes wrong.
466
467 @param s The string to place the XML text data.
468 @param on_error Handler called to process errors and warnings (new
469 since 0.8.0).
470 */
471 void save_to_string(std::string& s, error_handler& on_error = throw_on_error) const;
472
473 /**
474 Convert the XML document tree into XML text data and place it into
475 the given filename.
476
477 This function throws an exception if saving fails for any reason by
478 default and allows to customize this behaviour by passing a non-default
479 @a on_error handler.
480
481 @param filename The name of the file to place the XML text data into.
482 @param on_error Handler called to process errors and warnings (new
483 since 0.8.0).
484 @param compression_level 0 is no compression, 1-9 allowed, where 1 is
485 for better speed, and 9 is for smaller size
486 @return True if the data was saved successfully.
487 @return False otherwise (notice that this is only possible if a custom
488 error handler not throwing on error is specified).
489 */
490 bool save_to_file(const char *filename,
491 int compression_level = 0,
492 error_handler& on_error = throw_on_error) const;
493
494 /**
495 Convert the XML document tree into XML text data and then insert it
496 into the given stream.
497
498 @param stream The stream to insert the XML into.
499 @param doc The document to insert.
500 @return The stream from the first parameter.
501 */
502 friend XMLWRAPP_API std::ostream& operator<< (std::ostream &stream, const document &doc);
503
504private:
505 std::unique_ptr<impl::doc_impl> pimpl_;
506
507 void set_doc_data (void *data);
508 void set_doc_data_from_xslt (void *data, xslt::impl::result *xr);
509 void* get_doc_data();
510 void* get_doc_data_read_only() const;
511 void* release_doc_data();
512
513 friend class tree_parser;
514 friend class relaxng;
515 friend class schema;
516 friend class xslt::stylesheet;
517 friend struct impl::xpath_context_impl;
518};
519
520} // namespace xml
521
522XMLWRAPP_MSVC_RESTORE_DLL_MEMBER_WARN
523
524#endif // _xmlwrapp_document_h_
The xml::document class is used to hold the XML tree and various bits of information about it.
Definition document.h:88
std::size_t size_type
size type
Definition document.h:91
document()
Create a new XML document with the default settings.
The xml::error_handler class is used to handle libxml2 errors and warnings emitted during parsing,...
Definition errors.h:89
The xml::node class is used to hold information about one XML node.
Definition node.h:92
XML validator using RelaxNG.
Definition relaxng.h:72
XML Schema.
Definition schema.h:71
The xml::tree_parser class is used to parse an XML document and generate a tree like structure of xml...
Definition tree_parser.h:77
The xslt::stylesheet class is used to hold information about an XSLT stylesheet.
Definition stylesheet.h:65
This file contains errors-handling classes: xml::exception and xml::error_handler and derived classes...
XML library namespace.
Definition attributes.h:55
XSLT library namespace.
Definition document.h:59
This file contains the definition of the xml::node class.
This file contains the definition of the xml::init class.