TDME2  1.9.200
tinyxmlparser.cpp
Go to the documentation of this file.
1 /*
2 www.sourceforge.net/projects/tinyxml
3 Original code by Lee Thomason (www.grinninglizard.com)
4 
5 This software is provided 'as-is', without any express or implied
6 warranty. In no event will the authors be held liable for any
7 damages arising from the use of this software.
8 
9 Permission is granted to anyone to use this software for any
10 purpose, including commercial applications, and to alter it and
11 redistribute it freely, subject to the following restrictions:
12 
13 1. The origin of this software must not be misrepresented; you must
14 not claim that you wrote the original software. If you use this
15 software in a product, an acknowledgment in the product documentation
16 would be appreciated but is not required.
17 
18 2. Altered source versions must be plainly marked as such, and
19 must not be misrepresented as being the original software.
20 
21 3. This notice may not be removed or altered from any source
22 distribution.
23 */
24 
25 #include <ctype.h>
26 #include <stddef.h>
27 
28 #include "tinyxml.h"
29 
30 //#define DEBUG_PARSER
31 #if defined( DEBUG_PARSER )
32  #define TIXML_LOG printf
33 #endif
34 
35 using namespace tinyxml;
36 
37 // Note tha "PutString" hardcodes the same list. This
38 // is less flexible than it appears. Changing the entries
39 // or order will break putstring.
41 {
42  { "&amp;", 5, '&' },
43  { "&lt;", 4, '<' },
44  { "&gt;", 4, '>' },
45  { "&quot;", 6, '\"' },
46  { "&apos;", 6, '\'' }
47 };
48 
49 // Bunch of unicode info at:
50 // http://www.unicode.org/faq/utf_bom.html
51 // Including the basic of this table, which determines the #bytes in the
52 // sequence from the lead byte. 1 placed for invalid sequences --
53 // although the result will be junk, pass it through as much as possible.
54 // Beware of the non-characters in UTF-8:
55 // ef bb bf (Microsoft "lead bytes")
56 // ef bf be
57 // ef bf bf
58 
59 const unsigned char TIXML_UTF_LEAD_0 = 0xefU;
60 const unsigned char TIXML_UTF_LEAD_1 = 0xbbU;
61 const unsigned char TIXML_UTF_LEAD_2 = 0xbfU;
62 
63 const int TiXmlBase::utf8ByteTable[256] =
64 {
65  // 0 1 2 3 4 5 6 7 8 9 a b c d e f
66  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x00
67  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x10
68  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x20
69  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x30
70  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x40
71  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x50
72  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x60
73  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x70 End of ASCII range
74  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x80 0x80 to 0xc1 invalid
75  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x90
76  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xa0
77  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xb0
78  1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xc0 0xc2 to 0xdf 2 byte
79  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xd0
80  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 0xe0 0xe0 to 0xef 3 byte
81  4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // 0xf0 0xf0 to 0xf4 4 byte, 0xf5 and higher invalid
82 };
83 
84 
85 void TiXmlBase::ConvertUTF32ToUTF8( unsigned long input, char* output, int* length )
86 {
87  const unsigned long BYTE_MASK = 0xBF;
88  const unsigned long BYTE_MARK = 0x80;
89  const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
90 
91  if (input < 0x80)
92  *length = 1;
93  else if ( input < 0x800 )
94  *length = 2;
95  else if ( input < 0x10000 )
96  *length = 3;
97  else if ( input < 0x200000 )
98  *length = 4;
99  else
100  { *length = 0; return; } // This code won't covert this correctly anyway.
101 
102  output += *length;
103 
104  // Scary scary fall throughs.
105  switch (*length)
106  {
107  case 4:
108  --output;
109  *output = (char)((input | BYTE_MARK) & BYTE_MASK);
110  input >>= 6;
111  case 3:
112  --output;
113  *output = (char)((input | BYTE_MARK) & BYTE_MASK);
114  input >>= 6;
115  case 2:
116  --output;
117  *output = (char)((input | BYTE_MARK) & BYTE_MASK);
118  input >>= 6;
119  case 1:
120  --output;
121  *output = (char)(input | FIRST_BYTE_MARK[*length]);
122  }
123 }
124 
125 
126 /*static*/ int TiXmlBase::IsAlpha( unsigned char anyByte, TiXmlEncoding /*encoding*/ )
127 {
128  // This will only work for low-ascii, everything else is assumed to be a valid
129  // letter. I'm not sure this is the best approach, but it is quite tricky trying
130  // to figure out alhabetical vs. not across encoding. So take a very
131  // conservative approach.
132 
133 // if ( encoding == TIXML_ENCODING_UTF8 )
134 // {
135  if ( anyByte < 127 )
136  return isalpha( anyByte );
137  else
138  return 1; // What else to do? The unicode set is huge...get the english ones right.
139 // }
140 // else
141 // {
142 // return isalpha( anyByte );
143 // }
144 }
145 
146 
147 /*static*/ int TiXmlBase::IsAlphaNum( unsigned char anyByte, TiXmlEncoding /*encoding*/ )
148 {
149  // This will only work for low-ascii, everything else is assumed to be a valid
150  // letter. I'm not sure this is the best approach, but it is quite tricky trying
151  // to figure out alhabetical vs. not across encoding. So take a very
152  // conservative approach.
153 
154 // if ( encoding == TIXML_ENCODING_UTF8 )
155 // {
156  if ( anyByte < 127 )
157  return isalnum( anyByte );
158  else
159  return 1; // What else to do? The unicode set is huge...get the english ones right.
160 // }
161 // else
162 // {
163 // return isalnum( anyByte );
164 // }
165 }
166 
167 namespace tinyxml {
168 
170  {
171  friend class TiXmlDocument;
172  public:
173  void Stamp( const char* now, TiXmlEncoding encoding );
174 
175  const TiXmlCursor& Cursor() const { return cursor; }
176 
177  private:
178  // Only used by the document!
179  TiXmlParsingData( const char* start, int _tabsize, int row, int col )
180  {
181  assert( start );
182  stamp = start;
183  tabsize = _tabsize;
184  cursor.row = row;
185  cursor.col = col;
186  }
187 
189  const char* stamp;
190  int tabsize;
191  };
192 
193 };
194 
195 
196 void TiXmlParsingData::Stamp( const char* now, TiXmlEncoding encoding )
197 {
198  assert( now );
199 
200  // Do nothing if the tabsize is 0.
201  if ( tabsize < 1 )
202  {
203  return;
204  }
205 
206  // Get the current row, column.
207  int row = cursor.row;
208  int col = cursor.col;
209  const char* p = stamp;
210  assert( p );
211 
212  while ( p < now )
213  {
214  // Treat p as unsigned, so we have a happy compiler.
215  const unsigned char* pU = (const unsigned char*)p;
216 
217  // Code contributed by Fletcher Dunn: (modified by lee)
218  switch (*pU) {
219  case 0:
220  // We *should* never get here, but in case we do, don't
221  // advance past the terminating null character, ever
222  return;
223 
224  case '\r':
225  // bump down to the next line
226  ++row;
227  col = 0;
228  // Eat the character
229  ++p;
230 
231  // Check for \r\n sequence, and treat this as a single character
232  if (*p == '\n') {
233  ++p;
234  }
235  break;
236 
237  case '\n':
238  // bump down to the next line
239  ++row;
240  col = 0;
241 
242  // Eat the character
243  ++p;
244 
245  // Check for \n\r sequence, and treat this as a single
246  // character. (Yes, this bizarre thing does occur still
247  // on some arcane platforms...)
248  if (*p == '\r') {
249  ++p;
250  }
251  break;
252 
253  case '\t':
254  // Eat the character
255  ++p;
256 
257  // Skip to next tab stop
258  col = (col / tabsize + 1) * tabsize;
259  break;
260 
261  case TIXML_UTF_LEAD_0:
262  if ( encoding == TIXML_ENCODING_UTF8 )
263  {
264  if ( *(p+1) && *(p+2) )
265  {
266  // In these cases, don't advance the column. These are
267  // 0-width spaces.
268  if ( *(pU+1)==TIXML_UTF_LEAD_1 && *(pU+2)==TIXML_UTF_LEAD_2 )
269  p += 3;
270  else if ( *(pU+1)==0xbfU && *(pU+2)==0xbeU )
271  p += 3;
272  else if ( *(pU+1)==0xbfU && *(pU+2)==0xbfU )
273  p += 3;
274  else
275  { p +=3; ++col; } // A normal character.
276  }
277  }
278  else
279  {
280  ++p;
281  ++col;
282  }
283  break;
284 
285  default:
286  if ( encoding == TIXML_ENCODING_UTF8 )
287  {
288  // Eat the 1 to 4 byte utf8 character.
289  int step = TiXmlBase::utf8ByteTable[*((const unsigned char*)p)];
290  if ( step == 0 )
291  step = 1; // Error case from bad encoding, but handle gracefully.
292  p += step;
293 
294  // Just advance one column, of course.
295  ++col;
296  }
297  else
298  {
299  ++p;
300  ++col;
301  }
302  break;
303  }
304  }
305  cursor.row = row;
306  cursor.col = col;
307  assert( cursor.row >= -1 );
308  assert( cursor.col >= -1 );
309  stamp = p;
310  assert( stamp );
311 }
312 
313 
314 const char* TiXmlBase::SkipWhiteSpace( const char* p, TiXmlEncoding encoding )
315 {
316  if ( !p || !*p )
317  {
318  return 0;
319  }
320  if ( encoding == TIXML_ENCODING_UTF8 )
321  {
322  while ( *p )
323  {
324  const unsigned char* pU = (const unsigned char*)p;
325 
326  // Skip the stupid Microsoft UTF-8 Byte order marks
327  if ( *(pU+0)==TIXML_UTF_LEAD_0
328  && *(pU+1)==TIXML_UTF_LEAD_1
329  && *(pU+2)==TIXML_UTF_LEAD_2 )
330  {
331  p += 3;
332  continue;
333  }
334  else if(*(pU+0)==TIXML_UTF_LEAD_0
335  && *(pU+1)==0xbfU
336  && *(pU+2)==0xbeU )
337  {
338  p += 3;
339  continue;
340  }
341  else if(*(pU+0)==TIXML_UTF_LEAD_0
342  && *(pU+1)==0xbfU
343  && *(pU+2)==0xbfU )
344  {
345  p += 3;
346  continue;
347  }
348 
349  if ( IsWhiteSpace( *p ) ) // Still using old rules for white space.
350  ++p;
351  else
352  break;
353  }
354  }
355  else
356  {
357  while ( *p && IsWhiteSpace( *p ) )
358  ++p;
359  }
360 
361  return p;
362 }
363 
364 /*static*/ bool TiXmlBase::StreamWhiteSpace( std::istream * in, TIXML_STRING * tag )
365 {
366  for( ;; )
367  {
368  if ( !in->good() ) return false;
369 
370  int c = in->peek();
371  // At this scope, we can't get to a document. So fail silently.
372  if ( !IsWhiteSpace( c ) || c <= 0 )
373  return true;
374 
375  *tag += (char) in->get();
376  }
377 }
378 
379 /*static*/ bool TiXmlBase::StreamTo( std::istream * in, int character, TIXML_STRING * tag )
380 {
381  //assert( character > 0 && character < 128 ); // else it won't work in utf-8
382  while ( in->good() )
383  {
384  int c = in->peek();
385  if ( c == character )
386  return true;
387  if ( c <= 0 ) // Silent failure: can't get document at this scope
388  return false;
389 
390  in->get();
391  *tag += (char) c;
392  }
393  return false;
394 }
395 
396 // One of TinyXML's more performance demanding functions. Try to keep the memory overhead down. The
397 // "assign" optimization removes over 10% of the execution time.
398 //
399 const char* TiXmlBase::ReadName( const char* p, TIXML_STRING * name, TiXmlEncoding encoding )
400 {
401  // Oddly, not supported on some comilers,
402  //name->clear();
403  // So use this:
404  *name = "";
405  assert( p );
406 
407  // Names start with letters or underscores.
408  // Of course, in unicode, tinyxml has no idea what a letter *is*. The
409  // algorithm is generous.
410  //
411  // After that, they can be letters, underscores, numbers,
412  // hyphens, or colons. (Colons are valid ony for namespaces,
413  // but tinyxml can't tell namespaces from names.)
414  if ( p && *p
415  && ( IsAlpha( (unsigned char) *p, encoding ) || *p == '_' ) )
416  {
417  const char* start = p;
418  while( p && *p
419  && ( IsAlphaNum( (unsigned char ) *p, encoding )
420  || *p == '_'
421  || *p == '-'
422  || *p == '.'
423  || *p == ':' ) )
424  {
425  //(*name) += *p; // expensive
426  ++p;
427  }
428  if ( p-start > 0 ) {
429  name->assign( start, p-start );
430  }
431  return p;
432  }
433  return 0;
434 }
435 
436 const char* TiXmlBase::GetEntity( const char* p, char* value, int* length, TiXmlEncoding encoding )
437 {
438  // Presume an entity, and pull it out.
439  TIXML_STRING ent;
440  int i;
441  *length = 0;
442 
443  if ( *(p+1) && *(p+1) == '#' && *(p+2) )
444  {
445  unsigned long ucs = 0;
446  ptrdiff_t delta = 0;
447  unsigned mult = 1;
448 
449  if ( *(p+2) == 'x' )
450  {
451  // Hexadecimal.
452  if ( !*(p+3) ) return 0;
453 
454  const char* q = p+3;
455  q = strchr( q, ';' );
456 
457  if ( !q || !*q ) return 0;
458 
459  delta = q-p;
460  --q;
461 
462  while ( *q != 'x' )
463  {
464  if ( *q >= '0' && *q <= '9' )
465  ucs += mult * (*q - '0');
466  else if ( *q >= 'a' && *q <= 'f' )
467  ucs += mult * (*q - 'a' + 10);
468  else if ( *q >= 'A' && *q <= 'F' )
469  ucs += mult * (*q - 'A' + 10 );
470  else
471  return 0;
472  mult *= 16;
473  --q;
474  }
475  }
476  else
477  {
478  // Decimal.
479  if ( !*(p+2) ) return 0;
480 
481  const char* q = p+2;
482  q = strchr( q, ';' );
483 
484  if ( !q || !*q ) return 0;
485 
486  delta = q-p;
487  --q;
488 
489  while ( *q != '#' )
490  {
491  if ( *q >= '0' && *q <= '9' )
492  ucs += mult * (*q - '0');
493  else
494  return 0;
495  mult *= 10;
496  --q;
497  }
498  }
499  if ( encoding == TIXML_ENCODING_UTF8 )
500  {
501  // convert the UCS to UTF-8
502  ConvertUTF32ToUTF8( ucs, value, length );
503  }
504  else
505  {
506  *value = (char)ucs;
507  *length = 1;
508  }
509  return p + delta + 1;
510  }
511 
512  // Now try to match it.
513  for( i=0; i<NUM_ENTITY; ++i )
514  {
515  if ( strncmp( entity[i].str, p, entity[i].strLength ) == 0 )
516  {
517  assert( strlen( entity[i].str ) == entity[i].strLength );
518  *value = entity[i].chr;
519  *length = 1;
520  return ( p + entity[i].strLength );
521  }
522  }
523 
524  // So it wasn't an entity, its unrecognized, or something like that.
525  *value = *p; // Don't put back the last one, since we return it!
526  //*length = 1; // Leave unrecognized entities - this doesn't really work.
527  // Just writes strange XML.
528  return p+1;
529 }
530 
531 
532 bool TiXmlBase::StringEqual( const char* p,
533  const char* tag,
534  bool ignoreCase,
535  TiXmlEncoding encoding )
536 {
537  assert( p );
538  assert( tag );
539  if ( !p || !*p )
540  {
541  assert( 0 );
542  return false;
543  }
544 
545  const char* q = p;
546 
547  if ( ignoreCase )
548  {
549  while ( *q && *tag && ToLower( *q, encoding ) == ToLower( *tag, encoding ) )
550  {
551  ++q;
552  ++tag;
553  }
554 
555  if ( *tag == 0 )
556  return true;
557  }
558  else
559  {
560  while ( *q && *tag && *q == *tag )
561  {
562  ++q;
563  ++tag;
564  }
565 
566  if ( *tag == 0 ) // Have we found the end of the tag, and everything equal?
567  return true;
568  }
569  return false;
570 }
571 
572 const char* TiXmlBase::ReadText( const char* p,
573  TIXML_STRING * text,
574  bool trimWhiteSpace,
575  const char* endTag,
576  bool caseInsensitive,
577  TiXmlEncoding encoding )
578 {
579  *text = "";
580  if ( !trimWhiteSpace // certain tags always keep whitespace
581  || !condenseWhiteSpace ) // if true, whitespace is always kept
582  {
583  // Keep all the white space.
584  while ( p && *p
585  && !StringEqual( p, endTag, caseInsensitive, encoding )
586  )
587  {
588  int len;
589  char cArr[4] = { 0, 0, 0, 0 };
590  p = GetChar( p, cArr, &len, encoding );
591  text->append( cArr, len );
592  }
593  }
594  else
595  {
596  bool whitespace = false;
597 
598  // Remove leading white space:
599  p = SkipWhiteSpace( p, encoding );
600  while ( p && *p
601  && !StringEqual( p, endTag, caseInsensitive, encoding ) )
602  {
603  if ( *p == '\r' || *p == '\n' )
604  {
605  whitespace = true;
606  ++p;
607  }
608  else if ( IsWhiteSpace( *p ) )
609  {
610  whitespace = true;
611  ++p;
612  }
613  else
614  {
615  // If we've found whitespace, add it before the
616  // new character. Any whitespace just becomes a space.
617  if ( whitespace )
618  {
619  (*text) += ' ';
620  whitespace = false;
621  }
622  int len;
623  char cArr[4] = { 0, 0, 0, 0 };
624  p = GetChar( p, cArr, &len, encoding );
625  if ( len == 1 )
626  (*text) += cArr[0]; // more efficient
627  else
628  text->append( cArr, len );
629  }
630  }
631  }
632  if ( p && *p )
633  p += strlen( endTag );
634  return ( p && *p ) ? p : 0;
635 }
636 
637 
638 void TiXmlDocument::StreamIn( std::istream * in, TIXML_STRING * tag )
639 {
640  // The basic issue with a document is that we don't know what we're
641  // streaming. Read something presumed to be a tag (and hope), then
642  // identify it, and call the appropriate stream method on the tag.
643  //
644  // This "pre-streaming" will never read the closing ">" so the
645  // sub-tag can orient itself.
646 
647  if ( !StreamTo( in, '<', tag ) )
648  {
650  return;
651  }
652 
653  while ( in->good() )
654  {
655  int tagIndex = (int) tag->length();
656  while ( in->good() && in->peek() != '>' )
657  {
658  int c = in->get();
659  if ( c <= 0 )
660  {
662  break;
663  }
664  (*tag) += (char) c;
665  }
666 
667  if ( in->good() )
668  {
669  // We now have something we presume to be a node of
670  // some sort. Identify it, and call the node to
671  // continue streaming.
672  TiXmlNode* node = Identify( tag->c_str() + tagIndex, TIXML_DEFAULT_ENCODING );
673 
674  if ( node )
675  {
676  node->StreamIn( in, tag );
677  bool isElement = node->ToElement() != 0;
678  delete node;
679  node = 0;
680 
681  // If this is the root element, we're done. Parsing will be
682  // done by the >> operator.
683  if ( isElement )
684  {
685  return;
686  }
687  }
688  else
689  {
691  return;
692  }
693  }
694  }
695  // We should have returned sooner.
697 }
698 
699 
700 const char* TiXmlDocument::Parse( const char* p, TiXmlParsingData* prevData, TiXmlEncoding encoding )
701 {
702  ClearError();
703 
704  // Parse away, at the document level. Since a document
705  // contains nothing but other tags, most of what happens
706  // here is skipping white space.
707  if ( !p || !*p )
708  {
710  return 0;
711  }
712 
713  // Note that, for a document, this needs to come
714  // before the while space skip, so that parsing
715  // starts from the pointer we are given.
716  location.Clear();
717  if ( prevData )
718  {
719  location.row = prevData->cursor.row;
720  location.col = prevData->cursor.col;
721  }
722  else
723  {
724  location.row = 0;
725  location.col = 0;
726  }
728  location = data.Cursor();
729 
730  if ( encoding == TIXML_ENCODING_UNKNOWN )
731  {
732  // Check for the Microsoft UTF-8 lead bytes.
733  const unsigned char* pU = (const unsigned char*)p;
734  if ( *(pU+0) && *(pU+0) == TIXML_UTF_LEAD_0
735  && *(pU+1) && *(pU+1) == TIXML_UTF_LEAD_1
736  && *(pU+2) && *(pU+2) == TIXML_UTF_LEAD_2 )
737  {
738  encoding = TIXML_ENCODING_UTF8;
739  useMicrosoftBOM = true;
740  }
741  }
742 
743  p = SkipWhiteSpace( p, encoding );
744  if ( !p )
745  {
747  return 0;
748  }
749 
750  while ( p && *p )
751  {
752  TiXmlNode* node = Identify( p, encoding );
753  if ( node )
754  {
755  p = node->Parse( p, &data, encoding );
756  LinkEndChild( node );
757  }
758  else
759  {
760  break;
761  }
762 
763  // Did we get encoding info?
764  if ( encoding == TIXML_ENCODING_UNKNOWN
765  && node->ToDeclaration() )
766  {
767  TiXmlDeclaration* dec = node->ToDeclaration();
768  const char* enc = dec->Encoding();
769  assert( enc );
770 
771  if ( *enc == 0 )
772  encoding = TIXML_ENCODING_UTF8;
773  else if ( StringEqual( enc, "UTF-8", true, TIXML_ENCODING_UNKNOWN ) )
774  encoding = TIXML_ENCODING_UTF8;
775  else if ( StringEqual( enc, "UTF8", true, TIXML_ENCODING_UNKNOWN ) )
776  encoding = TIXML_ENCODING_UTF8; // incorrect, but be nice
777  else
778  encoding = TIXML_ENCODING_LEGACY;
779  }
780 
781  p = SkipWhiteSpace( p, encoding );
782  }
783 
784  // Was this empty?
785  if ( !firstChild ) {
786  SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, encoding );
787  return 0;
788  }
789 
790  // All is well.
791  return p;
792 }
793 
794 void TiXmlDocument::SetError( int err, const char* pError, TiXmlParsingData* data, TiXmlEncoding encoding )
795 {
796  // The first error in a chain is more accurate - don't set again!
797  if ( error )
798  return;
799 
800  assert( err > 0 && err < TIXML_ERROR_STRING_COUNT );
801  error = true;
802  errorId = err;
804 
806  if ( pError && data )
807  {
808  data->Stamp( pError, encoding );
809  errorLocation = data->Cursor();
810  }
811 }
812 
813 
814 TiXmlNode* TiXmlNode::Identify( const char* p, TiXmlEncoding encoding )
815 {
816  TiXmlNode* returnNode = 0;
817 
818  p = SkipWhiteSpace( p, encoding );
819  if( !p || !*p || *p != '<' )
820  {
821  return 0;
822  }
823 
824  p = SkipWhiteSpace( p, encoding );
825 
826  if ( !p || !*p )
827  {
828  return 0;
829  }
830 
831  // What is this thing?
832  // - Elements start with a letter or underscore, but xml is reserved.
833  // - Comments: <!--
834  // - Decleration: <?xml
835  // - Everthing else is unknown to tinyxml.
836  //
837 
838  const char* xmlHeader = { "<?xml" };
839  const char* commentHeader = { "<!--" };
840  const char* dtdHeader = { "<!" };
841  const char* cdataHeader = { "<![CDATA[" };
842 
843  if ( StringEqual( p, xmlHeader, true, encoding ) )
844  {
845  #ifdef DEBUG_PARSER
846  TIXML_LOG( "XML parsing Declaration\n" );
847  #endif
848  returnNode = new TiXmlDeclaration();
849  }
850  else if ( StringEqual( p, commentHeader, false, encoding ) )
851  {
852  #ifdef DEBUG_PARSER
853  TIXML_LOG( "XML parsing Comment\n" );
854  #endif
855  returnNode = new TiXmlComment();
856  }
857  else if ( StringEqual( p, cdataHeader, false, encoding ) )
858  {
859  #ifdef DEBUG_PARSER
860  TIXML_LOG( "XML parsing CDATA\n" );
861  #endif
862  TiXmlText* text = new TiXmlText( "" );
863  text->SetCDATA( true );
864  returnNode = text;
865  }
866  else if ( StringEqual( p, dtdHeader, false, encoding ) )
867  {
868  #ifdef DEBUG_PARSER
869  TIXML_LOG( "XML parsing Unknown(1)\n" );
870  #endif
871  returnNode = new TiXmlUnknown();
872  }
873  else if ( IsAlpha( *(p+1), encoding )
874  || *(p+1) == '_' )
875  {
876  #ifdef DEBUG_PARSER
877  TIXML_LOG( "XML parsing Element\n" );
878  #endif
879  returnNode = new TiXmlElement( "" );
880  }
881  else
882  {
883  #ifdef DEBUG_PARSER
884  TIXML_LOG( "XML parsing Unknown(2)\n" );
885  #endif
886  returnNode = new TiXmlUnknown();
887  }
888 
889  if ( returnNode )
890  {
891  // Set the parent, so it can report errors
892  returnNode->parent = this;
893  }
894  return returnNode;
895 }
896 
897 
898 void TiXmlElement::StreamIn (std::istream * in, TIXML_STRING * tag)
899 {
900  // We're called with some amount of pre-parsing. That is, some of "this"
901  // element is in "tag". Go ahead and stream to the closing ">"
902  while( in->good() )
903  {
904  int c = in->get();
905  if ( c <= 0 )
906  {
907  TiXmlDocument* document = GetDocument();
908  if ( document )
910  return;
911  }
912  (*tag) += (char) c ;
913 
914  if ( c == '>' )
915  break;
916  }
917 
918  if ( tag->length() < 3 ) return;
919 
920  // Okay...if we are a "/>" tag, then we're done. We've read a complete tag.
921  // If not, identify and stream.
922 
923  if ( tag->at( tag->length() - 1 ) == '>'
924  && tag->at( tag->length() - 2 ) == '/' )
925  {
926  // All good!
927  return;
928  }
929  else if ( tag->at( tag->length() - 1 ) == '>' )
930  {
931  // There is more. Could be:
932  // text
933  // cdata text (which looks like another node)
934  // closing tag
935  // another node.
936  for ( ;; )
937  {
938  StreamWhiteSpace( in, tag );
939 
940  // Do we have text?
941  if ( in->good() && in->peek() != '<' )
942  {
943  // Yep, text.
944  TiXmlText text( "" );
945  text.StreamIn( in, tag );
946 
947  // What follows text is a closing tag or another node.
948  // Go around again and figure it out.
949  continue;
950  }
951 
952  // We now have either a closing tag...or another node.
953  // We should be at a "<", regardless.
954  if ( !in->good() ) return;
955  assert( in->peek() == '<' );
956  int tagIndex = (int) tag->length();
957 
958  bool closingTag = false;
959  bool firstCharFound = false;
960 
961  for( ;; )
962  {
963  if ( !in->good() )
964  return;
965 
966  int c = in->peek();
967  if ( c <= 0 )
968  {
969  TiXmlDocument* document = GetDocument();
970  if ( document )
972  return;
973  }
974 
975  if ( c == '>' )
976  break;
977 
978  *tag += (char) c;
979  in->get();
980 
981  // Early out if we find the CDATA id.
982  if ( c == '[' && tag->size() >= 9 )
983  {
984  size_t len = tag->size();
985  const char* start = tag->c_str() + len - 9;
986  if ( strcmp( start, "<![CDATA[" ) == 0 ) {
987  assert( !closingTag );
988  break;
989  }
990  }
991 
992  if ( !firstCharFound && c != '<' && !IsWhiteSpace( c ) )
993  {
994  firstCharFound = true;
995  if ( c == '/' )
996  closingTag = true;
997  }
998  }
999  // If it was a closing tag, then read in the closing '>' to clean up the input stream.
1000  // If it was not, the streaming will be done by the tag.
1001  if ( closingTag )
1002  {
1003  if ( !in->good() )
1004  return;
1005 
1006  int c = in->get();
1007  if ( c <= 0 )
1008  {
1009  TiXmlDocument* document = GetDocument();
1010  if ( document )
1012  return;
1013  }
1014  assert( c == '>' );
1015  *tag += (char) c;
1016 
1017  // We are done, once we've found our closing tag.
1018  return;
1019  }
1020  else
1021  {
1022  // If not a closing tag, id it, and stream.
1023  const char* tagloc = tag->c_str() + tagIndex;
1024  TiXmlNode* node = Identify( tagloc, TIXML_DEFAULT_ENCODING );
1025  if ( !node )
1026  return;
1027  node->StreamIn( in, tag );
1028  delete node;
1029  node = 0;
1030 
1031  // No return: go around from the beginning: text, closing tag, or node.
1032  }
1033  }
1034  }
1035 }
1036 
1037 const char* TiXmlElement::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1038 {
1039  p = SkipWhiteSpace( p, encoding );
1040  TiXmlDocument* document = GetDocument();
1041 
1042  if ( !p || !*p )
1043  {
1044  if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, 0, 0, encoding );
1045  return 0;
1046  }
1047 
1048  if ( data )
1049  {
1050  data->Stamp( p, encoding );
1051  location = data->Cursor();
1052  }
1053 
1054  if ( *p != '<' )
1055  {
1056  if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, p, data, encoding );
1057  return 0;
1058  }
1059 
1060  p = SkipWhiteSpace( p+1, encoding );
1061 
1062  // Read the name.
1063  const char* pErr = p;
1064 
1065  p = ReadName( p, &value, encoding );
1066  if ( !p || !*p )
1067  {
1068  if ( document ) document->SetError( TIXML_ERROR_FAILED_TO_READ_ELEMENT_NAME, pErr, data, encoding );
1069  return 0;
1070  }
1071 
1072  TIXML_STRING endTag ("</");
1073  endTag += value;
1074 
1075  // Check for and read attributes. Also look for an empty
1076  // tag or an end tag.
1077  while ( p && *p )
1078  {
1079  pErr = p;
1080  p = SkipWhiteSpace( p, encoding );
1081  if ( !p || !*p )
1082  {
1083  if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
1084  return 0;
1085  }
1086  if ( *p == '/' )
1087  {
1088  ++p;
1089  // Empty tag.
1090  if ( *p != '>' )
1091  {
1092  if ( document ) document->SetError( TIXML_ERROR_PARSING_EMPTY, p, data, encoding );
1093  return 0;
1094  }
1095  return (p+1);
1096  }
1097  else if ( *p == '>' )
1098  {
1099  // Done with attributes (if there were any.)
1100  // Read the value -- which can include other
1101  // elements -- read the end tag, and return.
1102  ++p;
1103  p = ReadValue( p, data, encoding ); // Note this is an Element method, and will set the error if one happens.
1104  if ( !p || !*p ) {
1105  // We were looking for the end tag, but found nothing.
1106  // Fix for [ 1663758 ] Failure to report error on bad XML
1107  if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
1108  return 0;
1109  }
1110 
1111  // We should find the end tag now
1112  // note that:
1113  // </foo > and
1114  // </foo>
1115  // are both valid end tags.
1116  if ( StringEqual( p, endTag.c_str(), false, encoding ) )
1117  {
1118  p += endTag.length();
1119  p = SkipWhiteSpace( p, encoding );
1120  if ( p && *p && *p == '>' ) {
1121  ++p;
1122  return p;
1123  }
1124  if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
1125  return 0;
1126  }
1127  else
1128  {
1129  if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
1130  return 0;
1131  }
1132  }
1133  else
1134  {
1135  // Try to read an attribute:
1136  TiXmlAttribute* attrib = new TiXmlAttribute();
1137  if ( !attrib )
1138  {
1139  return 0;
1140  }
1141 
1142  attrib->SetDocument( document );
1143  pErr = p;
1144  p = attrib->Parse( p, data, encoding );
1145 
1146  if ( !p || !*p )
1147  {
1148  if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding );
1149  delete attrib;
1150  return 0;
1151  }
1152 
1153  // Handle the strange case of double attributes:
1154  TiXmlAttribute* node = attributeSet.Find( attrib->NameTStr() );
1155 
1156  if ( node )
1157  {
1158  if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding );
1159  delete attrib;
1160  return 0;
1161  }
1162 
1163  attributeSet.Add( attrib );
1164  }
1165  }
1166  return p;
1167 }
1168 
1169 
1170 const char* TiXmlElement::ReadValue( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1171 {
1172  TiXmlDocument* document = GetDocument();
1173 
1174  // Read in text and elements in any order.
1175  const char* pWithWhiteSpace = p;
1176  p = SkipWhiteSpace( p, encoding );
1177 
1178  while ( p && *p )
1179  {
1180  if ( *p != '<' )
1181  {
1182  // Take what we have, make a text element.
1183  TiXmlText* textNode = new TiXmlText( "" );
1184 
1185  if ( !textNode )
1186  {
1187  return 0;
1188  }
1189 
1191  {
1192  p = textNode->Parse( p, data, encoding );
1193  }
1194  else
1195  {
1196  // Special case: we want to keep the white space
1197  // so that leading spaces aren't removed.
1198  p = textNode->Parse( pWithWhiteSpace, data, encoding );
1199  }
1200 
1201  if ( !textNode->Blank() )
1202  LinkEndChild( textNode );
1203  else
1204  delete textNode;
1205  }
1206  else
1207  {
1208  // We hit a '<'
1209  // Have we hit a new element or an end tag? This could also be
1210  // a TiXmlText in the "CDATA" style.
1211  if ( StringEqual( p, "</", false, encoding ) )
1212  {
1213  return p;
1214  }
1215  else
1216  {
1217  TiXmlNode* node = Identify( p, encoding );
1218  if ( node )
1219  {
1220  p = node->Parse( p, data, encoding );
1221  LinkEndChild( node );
1222  }
1223  else
1224  {
1225  return 0;
1226  }
1227  }
1228  }
1229  pWithWhiteSpace = p;
1230  p = SkipWhiteSpace( p, encoding );
1231  }
1232 
1233  if ( !p )
1234  {
1235  if ( document ) document->SetError( TIXML_ERROR_READING_ELEMENT_VALUE, 0, 0, encoding );
1236  }
1237  return p;
1238 }
1239 
1240 
1241 void TiXmlUnknown::StreamIn( std::istream * in, TIXML_STRING * tag )
1242 {
1243  while ( in->good() )
1244  {
1245  int c = in->get();
1246  if ( c <= 0 )
1247  {
1248  TiXmlDocument* document = GetDocument();
1249  if ( document )
1251  return;
1252  }
1253  (*tag) += (char) c;
1254 
1255  if ( c == '>' )
1256  {
1257  // All is well.
1258  return;
1259  }
1260  }
1261 }
1262 
1263 
1264 const char* TiXmlUnknown::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1265 {
1266  TiXmlDocument* document = GetDocument();
1267  p = SkipWhiteSpace( p, encoding );
1268 
1269  if ( data )
1270  {
1271  data->Stamp( p, encoding );
1272  location = data->Cursor();
1273  }
1274  if ( !p || !*p || *p != '<' )
1275  {
1276  if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, p, data, encoding );
1277  return 0;
1278  }
1279  ++p;
1280  value = "";
1281 
1282  while ( p && *p && *p != '>' )
1283  {
1284  value += *p;
1285  ++p;
1286  }
1287 
1288  if ( !p )
1289  {
1290  if ( document )
1291  document->SetError( TIXML_ERROR_PARSING_UNKNOWN, 0, 0, encoding );
1292  }
1293  if ( p && *p == '>' )
1294  return p+1;
1295  return p;
1296 }
1297 
1298 void TiXmlComment::StreamIn( std::istream * in, TIXML_STRING * tag )
1299 {
1300  while ( in->good() )
1301  {
1302  int c = in->get();
1303  if ( c <= 0 )
1304  {
1305  TiXmlDocument* document = GetDocument();
1306  if ( document )
1308  return;
1309  }
1310 
1311  (*tag) += (char) c;
1312 
1313  if ( c == '>'
1314  && tag->at( tag->length() - 2 ) == '-'
1315  && tag->at( tag->length() - 3 ) == '-' )
1316  {
1317  // All is well.
1318  return;
1319  }
1320  }
1321 }
1322 
1323 
1324 const char* TiXmlComment::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1325 {
1326  TiXmlDocument* document = GetDocument();
1327  value = "";
1328 
1329  p = SkipWhiteSpace( p, encoding );
1330 
1331  if ( data )
1332  {
1333  data->Stamp( p, encoding );
1334  location = data->Cursor();
1335  }
1336  const char* startTag = "<!--";
1337  const char* endTag = "-->";
1338 
1339  if ( !StringEqual( p, startTag, false, encoding ) )
1340  {
1341  if ( document )
1342  document->SetError( TIXML_ERROR_PARSING_COMMENT, p, data, encoding );
1343  return 0;
1344  }
1345  p += strlen( startTag );
1346 
1347  // [ 1475201 ] TinyXML parses entities in comments
1348  // Oops - ReadText doesn't work, because we don't want to parse the entities.
1349  // p = ReadText( p, &value, false, endTag, false, encoding );
1350  //
1351  // from the XML spec:
1352  /*
1353  [Definition: Comments may appear anywhere in a document outside other markup; in addition,
1354  they may appear within the document type declaration at places allowed by the grammar.
1355  They are not part of the document's character data; an XML processor MAY, but need not,
1356  make it possible for an application to retrieve the text of comments. For compatibility,
1357  the string "--" (double-hyphen) MUST NOT occur within comments.] Parameter entity
1358  references MUST NOT be recognized within comments.
1359 
1360  An example of a comment:
1361 
1362  <!-- declarations for <head> & <body> -->
1363  */
1364 
1365  value = "";
1366  // Keep all the white space.
1367  while ( p && *p && !StringEqual( p, endTag, false, encoding ) )
1368  {
1369  value.append( p, 1 );
1370  ++p;
1371  }
1372  if ( p && *p )
1373  p += strlen( endTag );
1374 
1375  return p;
1376 }
1377 
1378 
1379 const char* TiXmlAttribute::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1380 {
1381  p = SkipWhiteSpace( p, encoding );
1382  if ( !p || !*p ) return 0;
1383 
1384  if ( data )
1385  {
1386  data->Stamp( p, encoding );
1387  location = data->Cursor();
1388  }
1389  // Read the name, the '=' and the value.
1390  const char* pErr = p;
1391  p = ReadName( p, &name, encoding );
1392  if ( !p || !*p )
1393  {
1394  if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
1395  return 0;
1396  }
1397  p = SkipWhiteSpace( p, encoding );
1398  if ( !p || !*p || *p != '=' )
1399  {
1400  if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
1401  return 0;
1402  }
1403 
1404  ++p; // skip '='
1405  p = SkipWhiteSpace( p, encoding );
1406  if ( !p || !*p )
1407  {
1408  if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
1409  return 0;
1410  }
1411 
1412  const char* end;
1413  const char SINGLE_QUOTE = '\'';
1414  const char DOUBLE_QUOTE = '\"';
1415 
1416  if ( *p == SINGLE_QUOTE )
1417  {
1418  ++p;
1419  end = "\'"; // single quote in string
1420  p = ReadText( p, &value, false, end, false, encoding );
1421  }
1422  else if ( *p == DOUBLE_QUOTE )
1423  {
1424  ++p;
1425  end = "\""; // double quote in string
1426  p = ReadText( p, &value, false, end, false, encoding );
1427  }
1428  else
1429  {
1430  // All attribute values should be in single or double quotes.
1431  // But this is such a common error that the parser will try
1432  // its best, even without them.
1433  value = "";
1434  while ( p && *p // existence
1435  && !IsWhiteSpace( *p ) // whitespace
1436  && *p != '/' && *p != '>' ) // tag end
1437  {
1438  if ( *p == SINGLE_QUOTE || *p == DOUBLE_QUOTE ) {
1439  // [ 1451649 ] Attribute values with trailing quotes not handled correctly
1440  // We did not have an opening quote but seem to have a
1441  // closing one. Give up and throw an error.
1442  if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
1443  return 0;
1444  }
1445  value += *p;
1446  ++p;
1447  }
1448  }
1449  return p;
1450 }
1451 
1452 
1453 void TiXmlText::StreamIn( std::istream * in, TIXML_STRING * tag )
1454 {
1455  while ( in->good() )
1456  {
1457  int c = in->peek();
1458  if ( !cdata && (c == '<' ) )
1459  {
1460  return;
1461  }
1462  if ( c <= 0 )
1463  {
1464  TiXmlDocument* document = GetDocument();
1465  if ( document )
1467  return;
1468  }
1469 
1470  (*tag) += (char) c;
1471  in->get(); // "commits" the peek made above
1472 
1473  if ( cdata && c == '>' && tag->size() >= 3 ) {
1474  size_t len = tag->size();
1475  if ( (*tag)[len-2] == ']' && (*tag)[len-3] == ']' ) {
1476  // terminator of cdata.
1477  return;
1478  }
1479  }
1480  }
1481 }
1482 
1483 
1484 const char* TiXmlText::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1485 {
1486  value = "";
1487  TiXmlDocument* document = GetDocument();
1488 
1489  if ( data )
1490  {
1491  data->Stamp( p, encoding );
1492  location = data->Cursor();
1493  }
1494 
1495  const char* const startTag = "<![CDATA[";
1496  const char* const endTag = "]]>";
1497 
1498  if ( cdata || StringEqual( p, startTag, false, encoding ) )
1499  {
1500  cdata = true;
1501 
1502  if ( !StringEqual( p, startTag, false, encoding ) )
1503  {
1504  if ( document )
1505  document->SetError( TIXML_ERROR_PARSING_CDATA, p, data, encoding );
1506  return 0;
1507  }
1508  p += strlen( startTag );
1509 
1510  // Keep all the white space, ignore the encoding, etc.
1511  while ( p && *p
1512  && !StringEqual( p, endTag, false, encoding )
1513  )
1514  {
1515  value += *p;
1516  ++p;
1517  }
1518 
1519  TIXML_STRING dummy;
1520  p = ReadText( p, &dummy, false, endTag, false, encoding );
1521  return p;
1522  }
1523  else
1524  {
1525  bool ignoreWhite = true;
1526 
1527  const char* end = "<";
1528  p = ReadText( p, &value, ignoreWhite, end, false, encoding );
1529  if ( p && *p )
1530  return p-1; // don't truncate the '<'
1531  return 0;
1532  }
1533 }
1534 
1535 
1536 void TiXmlDeclaration::StreamIn( std::istream * in, TIXML_STRING * tag )
1537 {
1538  while ( in->good() )
1539  {
1540  int c = in->get();
1541  if ( c <= 0 )
1542  {
1543  TiXmlDocument* document = GetDocument();
1544  if ( document )
1546  return;
1547  }
1548  (*tag) += (char) c;
1549 
1550  if ( c == '>' )
1551  {
1552  // All is well.
1553  return;
1554  }
1555  }
1556 }
1557 
1558 
1559 const char* TiXmlDeclaration::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding _encoding )
1560 {
1561  p = SkipWhiteSpace( p, _encoding );
1562  // Find the beginning, find the end, and look for
1563  // the stuff in-between.
1564  TiXmlDocument* document = GetDocument();
1565  if ( !p || !*p || !StringEqual( p, "<?xml", true, _encoding ) )
1566  {
1567  if ( document ) document->SetError( TIXML_ERROR_PARSING_DECLARATION, 0, 0, _encoding );
1568  return 0;
1569  }
1570  if ( data )
1571  {
1572  data->Stamp( p, _encoding );
1573  location = data->Cursor();
1574  }
1575  p += 5;
1576 
1577  version = "";
1578  encoding = "";
1579  standalone = "";
1580 
1581  while ( p && *p )
1582  {
1583  if ( *p == '>' )
1584  {
1585  ++p;
1586  return p;
1587  }
1588 
1589  p = SkipWhiteSpace( p, _encoding );
1590  if ( StringEqual( p, "version", true, _encoding ) )
1591  {
1592  TiXmlAttribute attrib;
1593  p = attrib.Parse( p, data, _encoding );
1594  version = attrib.Value();
1595  }
1596  else if ( StringEqual( p, "encoding", true, _encoding ) )
1597  {
1598  TiXmlAttribute attrib;
1599  p = attrib.Parse( p, data, _encoding );
1600  encoding = attrib.Value();
1601  }
1602  else if ( StringEqual( p, "standalone", true, _encoding ) )
1603  {
1604  TiXmlAttribute attrib;
1605  p = attrib.Parse( p, data, _encoding );
1606  standalone = attrib.Value();
1607  }
1608  else
1609  {
1610  // Read over whatever it is.
1611  while( p && *p && *p != '>' && !IsWhiteSpace( *p ) )
1612  ++p;
1613  }
1614  }
1615  return 0;
1616 }
1617 
1618 bool TiXmlText::Blank() const
1619 {
1620  for ( unsigned i=0; i<value.length(); i++ )
1621  if ( !IsWhiteSpace( value[i] ) )
1622  return false;
1623  return true;
1624 }
TiXmlAttribute * Find(const char *_name) const
Definition: tinyxml.cpp:1532
void Add(TiXmlAttribute *attribute)
Definition: tinyxml.cpp:1480
An attribute is a name-value pair.
Definition: tinyxml.h:734
virtual const char * Parse(const char *p, TiXmlParsingData *data, TiXmlEncoding encoding)
const char * Value() const
Return the value of this attribute.
Definition: tinyxml.h:764
TIXML_STRING value
Definition: tinyxml.h:833
void SetDocument(TiXmlDocument *doc)
Definition: tinyxml.h:825
const TIXML_STRING & NameTStr() const
Definition: tinyxml.h:770
TiXmlDocument * document
Definition: tinyxml.h:831
TIXML_STRING name
Definition: tinyxml.h:832
static bool StreamTo(std::istream *in, int character, TIXML_STRING *tag)
virtual const char * Parse(const char *p, TiXmlParsingData *data, TiXmlEncoding encoding)=0
static bool condenseWhiteSpace
Definition: tinyxml.h:386
static const char * ReadText(const char *in, TIXML_STRING *text, bool ignoreWhiteSpace, const char *endTag, bool ignoreCase, TiXmlEncoding encoding)
static int IsAlphaNum(unsigned char anyByte, TiXmlEncoding encoding)
static const char * GetEntity(const char *in, char *value, int *length, TiXmlEncoding encoding)
static bool IsWhiteSpaceCondensed()
Return the current white space setting.
Definition: tinyxml.h:199
static bool StringEqual(const char *p, const char *endTag, bool ignoreCase, TiXmlEncoding encoding)
static bool IsWhiteSpace(char c)
Definition: tinyxml.h:265
static void ConvertUTF32ToUTF8(unsigned long input, char *output, int *length)
TiXmlCursor location
Definition: tinyxml.h:346
static const int utf8ByteTable[256]
Definition: tinyxml.h:228
static Entity entity[NUM_ENTITY]
Definition: tinyxml.h:385
static int IsAlpha(unsigned char anyByte, TiXmlEncoding encoding)
static const char * GetChar(const char *p, char *_value, int *length, TiXmlEncoding encoding)
Definition: tinyxml.h:300
static bool StreamWhiteSpace(std::istream *in, TIXML_STRING *tag)
static const char * errorString[TIXML_ERROR_STRING_COUNT]
Definition: tinyxml.h:344
@ TIXML_ERROR_DOCUMENT_EMPTY
Definition: tinyxml.h:253
@ TIXML_ERROR_PARSING_ELEMENT
Definition: tinyxml.h:244
@ TIXML_ERROR_READING_END_TAG
Definition: tinyxml.h:249
@ TIXML_ERROR_STRING_COUNT
Definition: tinyxml.h:258
@ TIXML_ERROR_EMBEDDED_NULL
Definition: tinyxml.h:254
@ TIXML_ERROR_PARSING_EMPTY
Definition: tinyxml.h:248
@ TIXML_ERROR_FAILED_TO_READ_ELEMENT_NAME
Definition: tinyxml.h:245
@ TIXML_ERROR_PARSING_COMMENT
Definition: tinyxml.h:251
@ TIXML_ERROR_READING_ATTRIBUTES
Definition: tinyxml.h:247
@ TIXML_ERROR_PARSING_UNKNOWN
Definition: tinyxml.h:250
@ TIXML_ERROR_PARSING_DECLARATION
Definition: tinyxml.h:252
@ TIXML_ERROR_READING_ELEMENT_VALUE
Definition: tinyxml.h:246
@ TIXML_ERROR_PARSING_CDATA
Definition: tinyxml.h:255
static const char * ReadName(const char *p, TIXML_STRING *name, TiXmlEncoding encoding)
static const char * SkipWhiteSpace(const char *, TiXmlEncoding encoding)
static int ToLower(int v, TiXmlEncoding encoding)
Definition: tinyxml.h:355
An XML comment.
Definition: tinyxml.h:1099
virtual const char * Parse(const char *p, TiXmlParsingData *data, TiXmlEncoding encoding)
virtual void StreamIn(std::istream *in, TIXML_STRING *tag)
In correct XML the declaration is the first entry in the file.
Definition: tinyxml.h:1216
virtual const char * Parse(const char *p, TiXmlParsingData *data, TiXmlEncoding encoding)
TIXML_STRING standalone
Definition: tinyxml.h:1269
TIXML_STRING encoding
Definition: tinyxml.h:1268
const char * Encoding() const
Encoding. Will return an empty string if none was found.
Definition: tinyxml.h:1239
virtual void StreamIn(std::istream *in, TIXML_STRING *tag)
TIXML_STRING version
Definition: tinyxml.h:1267
Always the top level node.
Definition: tinyxml.h:1317
virtual const char * Parse(const char *p, TiXmlParsingData *data=0, TiXmlEncoding encoding=TIXML_DEFAULT_ENCODING)
Parse the given null terminated block of xml data.
void SetError(int err, const char *errorLocation, TiXmlParsingData *prevData, TiXmlEncoding encoding)
int TabSize() const
Definition: tinyxml.h:1425
void ClearError()
If you have handled the error, it can be reset with this call.
Definition: tinyxml.h:1430
TiXmlCursor errorLocation
Definition: tinyxml.h:1470
virtual void StreamIn(std::istream *in, TIXML_STRING *tag)
TIXML_STRING errorDesc
Definition: tinyxml.h:1468
virtual const char * Parse(const char *p, TiXmlParsingData *data, TiXmlEncoding encoding)
const char * ReadValue(const char *in, TiXmlParsingData *prevData, TiXmlEncoding encoding)
TiXmlAttributeSet attributeSet
Definition: tinyxml.h:1092
virtual void StreamIn(std::istream *in, TIXML_STRING *tag)
The parent class for everything in the Document Object Model.
Definition: tinyxml.h:397
TiXmlNode * LinkEndChild(TiXmlNode *addThis)
Add a new node related to this.
Definition: tinyxml.cpp:178
TIXML_STRING value
Definition: tinyxml.h:715
virtual void StreamIn(std::istream *in, TIXML_STRING *tag)=0
TiXmlNode * parent
Definition: tinyxml.h:709
TiXmlNode * firstChild
Definition: tinyxml.h:712
virtual const TiXmlElement * ToElement() const
Cast to a more defined type. Will return null if not of the requested type.
Definition: tinyxml.h:654
const TiXmlDocument * GetDocument() const
Return a pointer to the Document this node lives in.
Definition: tinyxml.cpp:501
friend class TiXmlElement
Definition: tinyxml.h:399
virtual const TiXmlDeclaration * ToDeclaration() const
Cast to a more defined type. Will return null if not of the requested type.
Definition: tinyxml.h:658
TiXmlNode * Identify(const char *start, TiXmlEncoding encoding)
const TiXmlCursor & Cursor() const
void Stamp(const char *now, TiXmlEncoding encoding)
TiXmlParsingData(const char *start, int _tabsize, int row, int col)
virtual const char * Parse(const char *p, TiXmlParsingData *data, TiXmlEncoding encoding)
void SetCDATA(bool _cdata)
Turns on or off a CDATA representation of text.
Definition: tinyxml.h:1177
virtual void StreamIn(std::istream *in, TIXML_STRING *tag)
Any tag that tinyXml doesn't recognize is saved as an unknown.
Definition: tinyxml.h:1281
virtual const char * Parse(const char *p, TiXmlParsingData *data, TiXmlEncoding encoding)
virtual void StreamIn(std::istream *in, TIXML_STRING *tag)
TiXmlEncoding
Definition: tinyxml.h:139
@ TIXML_ENCODING_UNKNOWN
Definition: tinyxml.h:140
@ TIXML_ENCODING_LEGACY
Definition: tinyxml.h:142
@ TIXML_ENCODING_UTF8
Definition: tinyxml.h:141
const TiXmlEncoding TIXML_DEFAULT_ENCODING
Definition: tinyxml.h:145
#define TIXML_STRING
Definition: tinyxml.h:36
const unsigned char TIXML_UTF_LEAD_0
const unsigned char TIXML_UTF_LEAD_1
const unsigned char TIXML_UTF_LEAD_2