Panda3D
 All Classes Functions Variables Enumerations
tinyxmlparser.cpp
00001 /*
00002 www.sourceforge.net/projects/tinyxml
00003 Original code (2.0 and earlier )copyright (c) 2000-2002 Lee Thomason (www.grinninglizard.com)
00004 
00005 This software is provided 'as-is', without any express or implied 
00006 warranty. In no event will the authors be held liable for any 
00007 damages arising from the use of this software.
00008 
00009 Permission is granted to anyone to use this software for any 
00010 purpose, including commercial applications, and to alter it and 
00011 redistribute it freely, subject to the following restrictions:
00012 
00013 1. The origin of this software must not be misrepresented; you must 
00014 not claim that you wrote the original software. If you use this
00015 software in a product, an acknowledgment in the product documentation
00016 would be appreciated but is not required.
00017 
00018 2. Altered source versions must be plainly marked as such, and 
00019 must not be misrepresented as being the original software.
00020 
00021 3. This notice may not be removed or altered from any source 
00022 distribution.
00023 */
00024 
00025 #include <ctype.h>
00026 #include <stddef.h>
00027 
00028 #include "tinyxml.h"
00029 
00030 //#define DEBUG_PARSER
00031 #if defined( DEBUG_PARSER )
00032 # if defined( DEBUG ) && defined( _MSC_VER )
00033 #   include <windows.h>
00034 #   define TIXML_LOG OutputDebugString
00035 # else
00036 #   define TIXML_LOG printf
00037 # endif
00038 #endif
00039 
00040 // Note tha "PutString" hardcodes the same list. This
00041 // is less flexible than it appears. Changing the entries
00042 // or order will break putstring. 
00043 TiXmlBase::Entity TiXmlBase::entity[ NUM_ENTITY ] = 
00044 {
00045   { "&amp;",  5, '&' },
00046   { "&lt;",   4, '<' },
00047   { "&gt;",   4, '>' },
00048   { "&quot;", 6, '\"' },
00049   { "&apos;", 6, '\'' }
00050 };
00051 
00052 // Bunch of unicode info at:
00053 //    http://www.unicode.org/faq/utf_bom.html
00054 // Including the basic of this table, which determines the #bytes in the
00055 // sequence from the lead byte. 1 placed for invalid sequences --
00056 // although the result will be junk, pass it through as much as possible.
00057 // Beware of the non-characters in UTF-8: 
00058 //        ef bb bf (Microsoft "lead bytes")
00059 //        ef bf be
00060 //        ef bf bf 
00061 
00062 const unsigned char TIXML_UTF_LEAD_0 = 0xefU;
00063 const unsigned char TIXML_UTF_LEAD_1 = 0xbbU;
00064 const unsigned char TIXML_UTF_LEAD_2 = 0xbfU;
00065 
00066 const int TiXmlBase::utf8ByteTable[256] = 
00067 {
00068   //  0 1 2 3 4 5 6 7 8 9 a b c d e f
00069     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 0x00
00070     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 0x10
00071     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 0x20
00072     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 0x30
00073     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 0x40
00074     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 0x50
00075     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 0x60
00076     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 0x70 End of ASCII range
00077     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 0x80 0x80 to 0xc1 invalid
00078     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 0x90 
00079     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 0xa0 
00080     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 0xb0 
00081     1,  1,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  // 0xc0 0xc2 to 0xdf 2 byte
00082     2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  // 0xd0
00083     3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  // 0xe0 0xe0 to 0xef 3 byte
00084     4,  4,  4,  4,  4,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1 // 0xf0 0xf0 to 0xf4 4 byte, 0xf5 and higher invalid
00085 };
00086 
00087 
00088 void TiXmlBase::ConvertUTF32ToUTF8( unsigned long input, char* output, int* length )
00089 {
00090   const unsigned long BYTE_MASK = 0xBF;
00091   const unsigned long BYTE_MARK = 0x80;
00092   const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
00093 
00094   if (input < 0x80) 
00095     *length = 1;
00096   else if ( input < 0x800 )
00097     *length = 2;
00098   else if ( input < 0x10000 )
00099     *length = 3;
00100   else if ( input < 0x200000 )
00101     *length = 4;
00102   else
00103     { *length = 0; return; }  // This code won't covert this correctly anyway.
00104 
00105   output += *length;
00106 
00107   // Scary scary fall throughs.
00108   switch (*length) 
00109   {
00110     case 4:
00111       --output; 
00112       *output = (char)((input | BYTE_MARK) & BYTE_MASK); 
00113       input >>= 6;
00114     case 3:
00115       --output; 
00116       *output = (char)((input | BYTE_MARK) & BYTE_MASK); 
00117       input >>= 6;
00118     case 2:
00119       --output; 
00120       *output = (char)((input | BYTE_MARK) & BYTE_MASK); 
00121       input >>= 6;
00122     case 1:
00123       --output; 
00124       *output = (char)(input | FIRST_BYTE_MARK[*length]);
00125   }
00126 }
00127 
00128 
00129 /*static*/ int TiXmlBase::IsAlpha( unsigned char anyByte, TiXmlEncoding /*encoding*/ )
00130 {
00131   // This will only work for low-ascii, everything else is assumed to be a valid
00132   // letter. I'm not sure this is the best approach, but it is quite tricky trying
00133   // to figure out alhabetical vs. not across encoding. So take a very 
00134   // conservative approach.
00135 
00136 //  if ( encoding == TIXML_ENCODING_UTF8 )
00137 //  {
00138     if ( anyByte < 127 )
00139       return isalpha( anyByte );
00140     else
00141       return 1; // What else to do? The unicode set is huge...get the english ones right.
00142 //  }
00143 //  else
00144 //  {
00145 //    return isalpha( anyByte );
00146 //  }
00147 }
00148 
00149 
00150 /*static*/ int TiXmlBase::IsAlphaNum( unsigned char anyByte, TiXmlEncoding /*encoding*/ )
00151 {
00152   // This will only work for low-ascii, everything else is assumed to be a valid
00153   // letter. I'm not sure this is the best approach, but it is quite tricky trying
00154   // to figure out alhabetical vs. not across encoding. So take a very 
00155   // conservative approach.
00156 
00157 //  if ( encoding == TIXML_ENCODING_UTF8 )
00158 //  {
00159     if ( anyByte < 127 )
00160       return isalnum( anyByte );
00161     else
00162       return 1; // What else to do? The unicode set is huge...get the english ones right.
00163 //  }
00164 //  else
00165 //  {
00166 //    return isalnum( anyByte );
00167 //  }
00168 }
00169 
00170 
00171 class TiXmlParsingData
00172 {
00173   friend class TiXmlDocument;
00174   public:
00175   void Stamp( const char* now, TiXmlEncoding encoding );
00176 
00177   const TiXmlCursor& Cursor() { return cursor; }
00178 
00179   private:
00180   // Only used by the document!
00181   TiXmlParsingData( const char* start, int _tabsize, int row, int col )
00182   {
00183     assert( start );
00184     stamp = start;
00185     tabsize = _tabsize;
00186     cursor.row = row;
00187     cursor.col = col;
00188   }
00189 
00190   TiXmlCursor   cursor;
00191   const char*   stamp;
00192   int       tabsize;
00193 };
00194 
00195 
00196 void TiXmlParsingData::Stamp( const char* now, TiXmlEncoding encoding )
00197 {
00198   assert( now );
00199 
00200   // Do nothing if the tabsize is 0.
00201   if ( tabsize < 1 )
00202   {
00203     return;
00204   }
00205 
00206   // Get the current row, column.
00207   int row = cursor.row;
00208   int col = cursor.col;
00209   const char* p = stamp;
00210   assert( p );
00211 
00212   while ( p < now )
00213   {
00214     // Treat p as unsigned, so we have a happy compiler.
00215     const unsigned char* pU = (const unsigned char*)p;
00216 
00217     // Code contributed by Fletcher Dunn: (modified by lee)
00218     switch (*pU) {
00219       case 0:
00220         // We *should* never get here, but in case we do, don't
00221         // advance past the terminating null character, ever
00222         return;
00223 
00224       case '\r':
00225         // bump down to the next line
00226         ++row;
00227         col = 0;        
00228         // Eat the character
00229         ++p;
00230 
00231         // Check for \r\n sequence, and treat this as a single character
00232         if (*p == '\n') {
00233           ++p;
00234         }
00235         break;
00236 
00237       case '\n':
00238         // bump down to the next line
00239         ++row;
00240         col = 0;
00241 
00242         // Eat the character
00243         ++p;
00244 
00245         // Check for \n\r sequence, and treat this as a single
00246         // character.  (Yes, this bizarre thing does occur still
00247         // on some arcane platforms...)
00248         if (*p == '\r') {
00249           ++p;
00250         }
00251         break;
00252 
00253       case '\t':
00254         // Eat the character
00255         ++p;
00256 
00257         // Skip to next tab stop
00258         col = (col / tabsize + 1) * tabsize;
00259         break;
00260 
00261       case TIXML_UTF_LEAD_0:
00262         if ( encoding == TIXML_ENCODING_UTF8 )
00263         {
00264           if ( *(p+1) && *(p+2) )
00265           {
00266             // In these cases, don't advance the column. These are
00267             // 0-width spaces.
00268             if ( *(pU+1)==TIXML_UTF_LEAD_1 && *(pU+2)==TIXML_UTF_LEAD_2 )
00269               p += 3; 
00270             else if ( *(pU+1)==0xbfU && *(pU+2)==0xbeU )
00271               p += 3; 
00272             else if ( *(pU+1)==0xbfU && *(pU+2)==0xbfU )
00273               p += 3; 
00274             else
00275               { p +=3; ++col; } // A normal character.
00276           }
00277         }
00278         else
00279         {
00280           ++p;
00281           ++col;
00282         }
00283         break;
00284 
00285       default:
00286         if ( encoding == TIXML_ENCODING_UTF8 )
00287         {
00288           // Eat the 1 to 4 byte utf8 character.
00289           int step = TiXmlBase::utf8ByteTable[*((const unsigned char*)p)];
00290           if ( step == 0 )
00291             step = 1;   // Error case from bad encoding, but handle gracefully.
00292           p += step;
00293 
00294           // Just advance one column, of course.
00295           ++col;
00296         }
00297         else
00298         {
00299           ++p;
00300           ++col;
00301         }
00302         break;
00303     }
00304   }
00305   cursor.row = row;
00306   cursor.col = col;
00307   assert( cursor.row >= -1 );
00308   assert( cursor.col >= -1 );
00309   stamp = p;
00310   assert( stamp );
00311 }
00312 
00313 
00314 const char* TiXmlBase::SkipWhiteSpace( const char* p, TiXmlEncoding encoding )
00315 {
00316   if ( !p || !*p )
00317   {
00318     return 0;
00319   }
00320   if ( encoding == TIXML_ENCODING_UTF8 )
00321   {
00322     while ( *p )
00323     {
00324       const unsigned char* pU = (const unsigned char*)p;
00325       
00326       // Skip the stupid Microsoft UTF-8 Byte order marks
00327       if (  *(pU+0)==TIXML_UTF_LEAD_0
00328          && *(pU+1)==TIXML_UTF_LEAD_1 
00329          && *(pU+2)==TIXML_UTF_LEAD_2 )
00330       {
00331         p += 3;
00332         continue;
00333       }
00334       else if(*(pU+0)==TIXML_UTF_LEAD_0
00335          && *(pU+1)==0xbfU
00336          && *(pU+2)==0xbeU )
00337       {
00338         p += 3;
00339         continue;
00340       }
00341       else if(*(pU+0)==TIXML_UTF_LEAD_0
00342          && *(pU+1)==0xbfU
00343          && *(pU+2)==0xbfU )
00344       {
00345         p += 3;
00346         continue;
00347       }
00348 
00349       if ( IsWhiteSpace( *p ) )   // Still using old rules for white space.
00350         ++p;
00351       else
00352         break;
00353     }
00354   }
00355   else
00356   {
00357     while ( *p && IsWhiteSpace( *p ) )
00358       ++p;
00359   }
00360 
00361   return p;
00362 }
00363 
00364 #ifdef TIXML_USE_STL
00365 /*static*/ bool TiXmlBase::StreamWhiteSpace( std::istream * in, TIXML_STRING * tag )
00366 {
00367   for( ;; )
00368   {
00369     if ( !in->good() ) return false;
00370 
00371     int c = in->peek();
00372     // At this scope, we can't get to a document. So fail silently.
00373     if ( !IsWhiteSpace( c ) || c <= 0 )
00374       return true;
00375 
00376     *tag += (char) in->get();
00377   }
00378 }
00379 
00380 /*static*/ bool TiXmlBase::StreamTo( std::istream * in, int character, TIXML_STRING * tag )
00381 {
00382   //assert( character > 0 && character < 128 ); // else it won't work in utf-8
00383   while ( in->good() )
00384   {
00385     int c = in->peek();
00386     if ( c == character )
00387       return true;
00388     if ( c <= 0 )   // Silent failure: can't get document at this scope
00389       return false;
00390 
00391     in->get();
00392     *tag += (char) c;
00393   }
00394   return false;
00395 }
00396 #endif
00397 
00398 // One of TinyXML's more performance demanding functions. Try to keep the memory overhead down. The
00399 // "assign" optimization removes over 10% of the execution time.
00400 //
00401 const char* TiXmlBase::ReadName( const char* p, TIXML_STRING * name, TiXmlEncoding encoding )
00402 {
00403   // Oddly, not supported on some comilers,
00404   //name->clear();
00405   // So use this:
00406   *name = "";
00407   assert( p );
00408 
00409   // Names start with letters or underscores.
00410   // Of course, in unicode, tinyxml has no idea what a letter *is*. The
00411   // algorithm is generous.
00412   //
00413   // After that, they can be letters, underscores, numbers,
00414   // hyphens, or colons. (Colons are valid ony for namespaces,
00415   // but tinyxml can't tell namespaces from names.)
00416   if (    p && *p 
00417      && ( IsAlpha( (unsigned char) *p, encoding ) || *p == '_' ) )
00418   {
00419     const char* start = p;
00420     while(    p && *p
00421         &&  (   IsAlphaNum( (unsigned char ) *p, encoding ) 
00422              || *p == '_'
00423              || *p == '-'
00424              || *p == '.'
00425              || *p == ':' ) )
00426     {
00427       //(*name) += *p; // expensive
00428       ++p;
00429     }
00430     if ( p-start > 0 ) {
00431       name->assign( start, p-start );
00432     }
00433     return p;
00434   }
00435   return 0;
00436 }
00437 
00438 const char* TiXmlBase::GetEntity( const char* p, char* value, int* length, TiXmlEncoding encoding )
00439 {
00440   // Presume an entity, and pull it out.
00441     TIXML_STRING ent;
00442   int i;
00443   *length = 0;
00444 
00445   if ( *(p+1) && *(p+1) == '#' && *(p+2) )
00446   {
00447     unsigned long ucs = 0;
00448     ptrdiff_t delta = 0;
00449     unsigned mult = 1;
00450 
00451     if ( *(p+2) == 'x' )
00452     {
00453       // Hexadecimal.
00454       if ( !*(p+3) ) return 0;
00455 
00456       const char* q = p+3;
00457       q = strchr( q, ';' );
00458 
00459       if ( !q || !*q ) return 0;
00460 
00461       delta = q-p;
00462       --q;
00463 
00464       while ( *q != 'x' )
00465       {
00466         if ( *q >= '0' && *q <= '9' )
00467           ucs += mult * (*q - '0');
00468         else if ( *q >= 'a' && *q <= 'f' )
00469           ucs += mult * (*q - 'a' + 10);
00470         else if ( *q >= 'A' && *q <= 'F' )
00471           ucs += mult * (*q - 'A' + 10 );
00472         else 
00473           return 0;
00474         mult *= 16;
00475         --q;
00476       }
00477     }
00478     else
00479     {
00480       // Decimal.
00481       if ( !*(p+2) ) return 0;
00482 
00483       const char* q = p+2;
00484       q = strchr( q, ';' );
00485 
00486       if ( !q || !*q ) return 0;
00487 
00488       delta = q-p;
00489       --q;
00490 
00491       while ( *q != '#' )
00492       {
00493         if ( *q >= '0' && *q <= '9' )
00494           ucs += mult * (*q - '0');
00495         else 
00496           return 0;
00497         mult *= 10;
00498         --q;
00499       }
00500     }
00501     if ( encoding == TIXML_ENCODING_UTF8 )
00502     {
00503       // convert the UCS to UTF-8
00504       ConvertUTF32ToUTF8( ucs, value, length );
00505     }
00506     else
00507     {
00508       *value = (char)ucs;
00509       *length = 1;
00510     }
00511     return p + delta + 1;
00512   }
00513 
00514   // Now try to match it.
00515   for( i=0; i<NUM_ENTITY; ++i )
00516   {
00517     if ( strncmp( entity[i].str, p, entity[i].strLength ) == 0 )
00518     {
00519       assert( strlen( entity[i].str ) == entity[i].strLength );
00520       *value = entity[i].chr;
00521       *length = 1;
00522       return ( p + entity[i].strLength );
00523     }
00524   }
00525 
00526   // So it wasn't an entity, its unrecognized, or something like that.
00527   *value = *p;  // Don't put back the last one, since we return it!
00528   //*length = 1;  // Leave unrecognized entities - this doesn't really work.
00529           // Just writes strange XML.
00530   return p+1;
00531 }
00532 
00533 
00534 bool TiXmlBase::StringEqual( const char* p,
00535                const char* tag,
00536                bool ignoreCase,
00537                TiXmlEncoding encoding )
00538 {
00539   assert( p );
00540   assert( tag );
00541   if ( !p || !*p )
00542   {
00543     assert( 0 );
00544     return false;
00545   }
00546 
00547   const char* q = p;
00548 
00549   if ( ignoreCase )
00550   {
00551     while ( *q && *tag && ToLower( *q, encoding ) == ToLower( *tag, encoding ) )
00552     {
00553       ++q;
00554       ++tag;
00555     }
00556 
00557     if ( *tag == 0 )
00558       return true;
00559   }
00560   else
00561   {
00562     while ( *q && *tag && *q == *tag )
00563     {
00564       ++q;
00565       ++tag;
00566     }
00567 
00568     if ( *tag == 0 )    // Have we found the end of the tag, and everything equal?
00569       return true;
00570   }
00571   return false;
00572 }
00573 
00574 const char* TiXmlBase::ReadText(  const char* p, 
00575                   TIXML_STRING * text, 
00576                   bool trimWhiteSpace, 
00577                   const char* endTag, 
00578                   bool caseInsensitive,
00579                   TiXmlEncoding encoding )
00580 {
00581     *text = "";
00582   if (    !trimWhiteSpace     // certain tags always keep whitespace
00583      || !condenseWhiteSpace ) // if true, whitespace is always kept
00584   {
00585     // Keep all the white space.
00586     while (    p && *p
00587         && !StringEqual( p, endTag, caseInsensitive, encoding )
00588         )
00589     {
00590       int len;
00591       char cArr[4] = { 0, 0, 0, 0 };
00592       p = GetChar( p, cArr, &len, encoding );
00593       text->append( cArr, len );
00594     }
00595   }
00596   else
00597   {
00598     bool whitespace = false;
00599 
00600     // Remove leading white space:
00601     p = SkipWhiteSpace( p, encoding );
00602     while (    p && *p
00603         && !StringEqual( p, endTag, caseInsensitive, encoding ) )
00604     {
00605       if ( *p == '\r' || *p == '\n' )
00606       {
00607         whitespace = true;
00608         ++p;
00609       }
00610       else if ( IsWhiteSpace( *p ) )
00611       {
00612         whitespace = true;
00613         ++p;
00614       }
00615       else
00616       {
00617         // If we've found whitespace, add it before the
00618         // new character. Any whitespace just becomes a space.
00619         if ( whitespace )
00620         {
00621           (*text) += ' ';
00622           whitespace = false;
00623         }
00624         int len;
00625         char cArr[4] = { 0, 0, 0, 0 };
00626         p = GetChar( p, cArr, &len, encoding );
00627         if ( len == 1 )
00628           (*text) += cArr[0]; // more efficient
00629         else
00630           text->append( cArr, len );
00631       }
00632     }
00633   }
00634   if ( p && *p ) 
00635     p += strlen( endTag );
00636   return p;
00637 }
00638 
00639 #ifdef TIXML_USE_STL
00640 
00641 void TiXmlDocument::StreamIn( std::istream * in, TIXML_STRING * tag )
00642 {
00643   // The basic issue with a document is that we don't know what we're
00644   // streaming. Read something presumed to be a tag (and hope), then
00645   // identify it, and call the appropriate stream method on the tag.
00646   //
00647   // This "pre-streaming" will never read the closing ">" so the
00648   // sub-tag can orient itself.
00649 
00650   if ( !StreamTo( in, '<', tag ) ) 
00651   {
00652     SetError( TIXML_ERROR_PARSING_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
00653     return;
00654   }
00655 
00656   while ( in->good() )
00657   {
00658     int tagIndex = (int) tag->length();
00659     while ( in->good() && in->peek() != '>' )
00660     {
00661       int c = in->get();
00662       if ( c <= 0 )
00663       {
00664         SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
00665         break;
00666       }
00667       (*tag) += (char) c;
00668     }
00669 
00670     if ( in->good() )
00671     {
00672       // We now have something we presume to be a node of 
00673       // some sort. Identify it, and call the node to
00674       // continue streaming.
00675       TiXmlNode* node = Identify( tag->c_str() + tagIndex, TIXML_DEFAULT_ENCODING );
00676 
00677       if ( node )
00678       {
00679         node->StreamIn( in, tag );
00680         bool isElement = node->ToElement() != 0;
00681         delete node;
00682         node = 0;
00683 
00684         // If this is the root element, we're done. Parsing will be
00685         // done by the >> operator.
00686         if ( isElement )
00687         {
00688           return;
00689         }
00690       }
00691       else
00692       {
00693         SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
00694         return;
00695       }
00696     }
00697   }
00698   // We should have returned sooner.
00699   SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
00700 }
00701 
00702 #endif
00703 
00704 const char* TiXmlDocument::Parse( const char* p, TiXmlParsingData* prevData, TiXmlEncoding encoding )
00705 {
00706   ClearError();
00707 
00708   // Parse away, at the document level. Since a document
00709   // contains nothing but other tags, most of what happens
00710   // here is skipping white space.
00711   if ( !p || !*p )
00712   {
00713     SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
00714     return 0;
00715   }
00716 
00717   // Note that, for a document, this needs to come
00718   // before the while space skip, so that parsing
00719   // starts from the pointer we are given.
00720   location.Clear();
00721   if ( prevData )
00722   {
00723     location.row = prevData->cursor.row;
00724     location.col = prevData->cursor.col;
00725   }
00726   else
00727   {
00728     location.row = 0;
00729     location.col = 0;
00730   }
00731   TiXmlParsingData data( p, TabSize(), location.row, location.col );
00732   location = data.Cursor();
00733 
00734   if ( encoding == TIXML_ENCODING_UNKNOWN )
00735   {
00736     // Check for the Microsoft UTF-8 lead bytes.
00737     const unsigned char* pU = (const unsigned char*)p;
00738     if (  *(pU+0) && *(pU+0) == TIXML_UTF_LEAD_0
00739        && *(pU+1) && *(pU+1) == TIXML_UTF_LEAD_1
00740        && *(pU+2) && *(pU+2) == TIXML_UTF_LEAD_2 )
00741     {
00742       encoding = TIXML_ENCODING_UTF8;
00743       useMicrosoftBOM = true;
00744     }
00745   }
00746 
00747     p = SkipWhiteSpace( p, encoding );
00748   if ( !p )
00749   {
00750     SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
00751     return 0;
00752   }
00753 
00754   while ( p && *p )
00755   {
00756     TiXmlNode* node = Identify( p, encoding );
00757     if ( node )
00758     {
00759       p = node->Parse( p, &data, encoding );
00760       LinkEndChild( node );
00761     }
00762     else
00763     {
00764       break;
00765     }
00766 
00767     // Did we get encoding info?
00768     if (    encoding == TIXML_ENCODING_UNKNOWN
00769        && node->ToDeclaration() )
00770     {
00771       TiXmlDeclaration* dec = node->ToDeclaration();
00772       const char* enc = dec->Encoding();
00773       assert( enc );
00774 
00775       if ( *enc == 0 )
00776         encoding = TIXML_ENCODING_UTF8;
00777       else if ( StringEqual( enc, "UTF-8", true, TIXML_ENCODING_UNKNOWN ) )
00778         encoding = TIXML_ENCODING_UTF8;
00779       else if ( StringEqual( enc, "UTF8", true, TIXML_ENCODING_UNKNOWN ) )
00780         encoding = TIXML_ENCODING_UTF8; // incorrect, but be nice
00781       else 
00782         encoding = TIXML_ENCODING_LEGACY;
00783     }
00784 
00785     p = SkipWhiteSpace( p, encoding );
00786   }
00787 
00788   // Was this empty?
00789   if ( !firstChild ) {
00790     SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, encoding );
00791     return 0;
00792   }
00793 
00794   // All is well.
00795   return p;
00796 }
00797 
00798 void TiXmlDocument::SetError( int err, const char* pError, TiXmlParsingData* data, TiXmlEncoding encoding )
00799 { 
00800   // The first error in a chain is more accurate - don't set again!
00801   if ( error )
00802     return;
00803 
00804   assert( err > 0 && err < TIXML_ERROR_STRING_COUNT );
00805   error   = true;
00806   errorId = err;
00807   errorDesc = errorString[ errorId ];
00808 
00809   errorLocation.Clear();
00810   if ( pError && data )
00811   {
00812     data->Stamp( pError, encoding );
00813     errorLocation = data->Cursor();
00814   }
00815 }
00816 
00817 
00818 TiXmlNode* TiXmlNode::Identify( const char* p, TiXmlEncoding encoding )
00819 {
00820   TiXmlNode* returnNode = 0;
00821 
00822   p = SkipWhiteSpace( p, encoding );
00823   if( !p || !*p || *p != '<' )
00824   {
00825     return 0;
00826   }
00827 
00828   p = SkipWhiteSpace( p, encoding );
00829 
00830   if ( !p || !*p )
00831   {
00832     return 0;
00833   }
00834 
00835   // What is this thing? 
00836   // - Elements start with a letter or underscore, but xml is reserved.
00837   // - Comments: <!--
00838   // - Decleration: <?xml
00839   // - Everthing else is unknown to tinyxml.
00840   //
00841 
00842   const char* xmlHeader = { "<?xml" };
00843   const char* commentHeader = { "<!--" };
00844   const char* dtdHeader = { "<!" };
00845   const char* cdataHeader = { "<![CDATA[" };
00846 
00847   if ( StringEqual( p, xmlHeader, true, encoding ) )
00848   {
00849     #ifdef DEBUG_PARSER
00850       TIXML_LOG( "XML parsing Declaration\n" );
00851     #endif
00852     returnNode = new TiXmlDeclaration();
00853   }
00854   else if ( StringEqual( p, commentHeader, false, encoding ) )
00855   {
00856     #ifdef DEBUG_PARSER
00857       TIXML_LOG( "XML parsing Comment\n" );
00858     #endif
00859     returnNode = new TiXmlComment();
00860   }
00861   else if ( StringEqual( p, cdataHeader, false, encoding ) )
00862   {
00863     #ifdef DEBUG_PARSER
00864       TIXML_LOG( "XML parsing CDATA\n" );
00865     #endif
00866     TiXmlText* text = new TiXmlText( "" );
00867     text->SetCDATA( true );
00868     returnNode = text;
00869   }
00870   else if ( StringEqual( p, dtdHeader, false, encoding ) )
00871   {
00872     #ifdef DEBUG_PARSER
00873       TIXML_LOG( "XML parsing Unknown(1)\n" );
00874     #endif
00875     returnNode = new TiXmlUnknown();
00876   }
00877   else if (    IsAlpha( *(p+1), encoding )
00878         || *(p+1) == '_' )
00879   {
00880     #ifdef DEBUG_PARSER
00881       TIXML_LOG( "XML parsing Element\n" );
00882     #endif
00883     returnNode = new TiXmlElement( "" );
00884   }
00885   else
00886   {
00887     #ifdef DEBUG_PARSER
00888       TIXML_LOG( "XML parsing Unknown(2)\n" );
00889     #endif
00890     returnNode = new TiXmlUnknown();
00891   }
00892 
00893   if ( returnNode )
00894   {
00895     // Set the parent, so it can report errors
00896     returnNode->parent = this;
00897   }
00898   return returnNode;
00899 }
00900 
00901 #ifdef TIXML_USE_STL
00902 
00903 void TiXmlElement::StreamIn (std::istream * in, TIXML_STRING * tag)
00904 {
00905   // We're called with some amount of pre-parsing. That is, some of "this"
00906   // element is in "tag". Go ahead and stream to the closing ">"
00907   while( in->good() )
00908   {
00909     int c = in->get();
00910     if ( c <= 0 )
00911     {
00912       TiXmlDocument* document = GetDocument();
00913       if ( document )
00914         document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
00915       return;
00916     }
00917     (*tag) += (char) c ;
00918     
00919     if ( c == '>' )
00920       break;
00921   }
00922 
00923   if ( tag->length() < 3 ) return;
00924 
00925   // Okay...if we are a "/>" tag, then we're done. We've read a complete tag.
00926   // If not, identify and stream.
00927 
00928   if (    tag->at( tag->length() - 1 ) == '>' 
00929      && tag->at( tag->length() - 2 ) == '/' )
00930   {
00931     // All good!
00932     return;
00933   }
00934   else if ( tag->at( tag->length() - 1 ) == '>' )
00935   {
00936     // There is more. Could be:
00937     //    text
00938     //    cdata text (which looks like another node)
00939     //    closing tag
00940     //    another node.
00941     for ( ;; )
00942     {
00943       StreamWhiteSpace( in, tag );
00944 
00945       // Do we have text?
00946       if ( in->good() && in->peek() != '<' ) 
00947       {
00948         // Yep, text.
00949         TiXmlText text( "" );
00950         text.StreamIn( in, tag );
00951 
00952         // What follows text is a closing tag or another node.
00953         // Go around again and figure it out.
00954         continue;
00955       }
00956 
00957       // We now have either a closing tag...or another node.
00958       // We should be at a "<", regardless.
00959       if ( !in->good() ) return;
00960       assert( in->peek() == '<' );
00961       int tagIndex = (int) tag->length();
00962 
00963       bool closingTag = false;
00964       bool firstCharFound = false;
00965 
00966       for( ;; )
00967       {
00968         if ( !in->good() )
00969           return;
00970 
00971         int c = in->peek();
00972         if ( c <= 0 )
00973         {
00974           TiXmlDocument* document = GetDocument();
00975           if ( document )
00976             document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
00977           return;
00978         }
00979         
00980         if ( c == '>' )
00981           break;
00982 
00983         *tag += (char) c;
00984         in->get();
00985 
00986         // Early out if we find the CDATA id.
00987         if ( c == '[' && tag->size() >= 9 )
00988         {
00989           size_t len = tag->size();
00990           const char* start = tag->c_str() + len - 9;
00991           if ( strcmp( start, "<![CDATA[" ) == 0 ) {
00992             assert( !closingTag );
00993             break;
00994           }
00995         }
00996 
00997         if ( !firstCharFound && c != '<' && !IsWhiteSpace( c ) )
00998         {
00999           firstCharFound = true;
01000           if ( c == '/' )
01001             closingTag = true;
01002         }
01003       }
01004       // If it was a closing tag, then read in the closing '>' to clean up the input stream.
01005       // If it was not, the streaming will be done by the tag.
01006       if ( closingTag )
01007       {
01008         if ( !in->good() )
01009           return;
01010 
01011         int c = in->get();
01012         if ( c <= 0 )
01013         {
01014           TiXmlDocument* document = GetDocument();
01015           if ( document )
01016             document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01017           return;
01018         }
01019         assert( c == '>' );
01020         *tag += (char) c;
01021 
01022         // We are done, once we've found our closing tag.
01023         return;
01024       }
01025       else
01026       {
01027         // If not a closing tag, id it, and stream.
01028         const char* tagloc = tag->c_str() + tagIndex;
01029         TiXmlNode* node = Identify( tagloc, TIXML_DEFAULT_ENCODING );
01030         if ( !node )
01031           return;
01032         node->StreamIn( in, tag );
01033         delete node;
01034         node = 0;
01035 
01036         // No return: go around from the beginning: text, closing tag, or node.
01037       }
01038     }
01039   }
01040 }
01041 #endif
01042 
01043 const char* TiXmlElement::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01044 {
01045   p = SkipWhiteSpace( p, encoding );
01046   TiXmlDocument* document = GetDocument();
01047 
01048   if ( !p || !*p )
01049   {
01050     if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, 0, 0, encoding );
01051     return 0;
01052   }
01053 
01054   if ( data )
01055   {
01056     data->Stamp( p, encoding );
01057     location = data->Cursor();
01058   }
01059 
01060   if ( *p != '<' )
01061   {
01062     if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, p, data, encoding );
01063     return 0;
01064   }
01065 
01066   p = SkipWhiteSpace( p+1, encoding );
01067 
01068   // Read the name.
01069   const char* pErr = p;
01070 
01071     p = ReadName( p, &value, encoding );
01072   if ( !p || !*p )
01073   {
01074     if ( document ) document->SetError( TIXML_ERROR_FAILED_TO_READ_ELEMENT_NAME, pErr, data, encoding );
01075     return 0;
01076   }
01077 
01078     TIXML_STRING endTag ("</");
01079   endTag += value;
01080 
01081   // Check for and read attributes. Also look for an empty
01082   // tag or an end tag.
01083   while ( p && *p )
01084   {
01085     pErr = p;
01086     p = SkipWhiteSpace( p, encoding );
01087     if ( !p || !*p )
01088     {
01089       if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
01090       return 0;
01091     }
01092     if ( *p == '/' )
01093     {
01094       ++p;
01095       // Empty tag.
01096       if ( *p  != '>' )
01097       {
01098         if ( document ) document->SetError( TIXML_ERROR_PARSING_EMPTY, p, data, encoding );   
01099         return 0;
01100       }
01101       return (p+1);
01102     }
01103     else if ( *p == '>' )
01104     {
01105       // Done with attributes (if there were any.)
01106       // Read the value -- which can include other
01107       // elements -- read the end tag, and return.
01108       ++p;
01109       p = ReadValue( p, data, encoding );   // Note this is an Element method, and will set the error if one happens.
01110       if ( !p || !*p ) {
01111         // We were looking for the end tag, but found nothing.
01112         // Fix for [ 1663758 ] Failure to report error on bad XML
01113         if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
01114         return 0;
01115       }
01116 
01117       // We should find the end tag now
01118       // note that:
01119       // </foo > and
01120       // </foo> 
01121       // are both valid end tags.
01122       if ( StringEqual( p, endTag.c_str(), false, encoding ) )
01123       {
01124         p += endTag.length();
01125         p = SkipWhiteSpace( p, encoding );
01126         if ( p && *p && *p == '>' ) {
01127           ++p;
01128           return p;
01129         }
01130         if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
01131         return 0;
01132       }
01133       else
01134       {
01135         if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
01136         return 0;
01137       }
01138     }
01139     else
01140     {
01141       // Try to read an attribute:
01142       TiXmlAttribute* attrib = new TiXmlAttribute();
01143       if ( !attrib )
01144       {
01145         return 0;
01146       }
01147 
01148       attrib->SetDocument( document );
01149       pErr = p;
01150       p = attrib->Parse( p, data, encoding );
01151 
01152       if ( !p || !*p )
01153       {
01154         if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding );
01155         delete attrib;
01156         return 0;
01157       }
01158 
01159       // Handle the strange case of double attributes:
01160       #ifdef TIXML_USE_STL
01161       TiXmlAttribute* node = attributeSet.Find( attrib->NameTStr() );
01162       #else
01163       TiXmlAttribute* node = attributeSet.Find( attrib->Name() );
01164       #endif
01165       if ( node )
01166       {
01167         if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding );
01168         delete attrib;
01169         return 0;
01170       }
01171 
01172       attributeSet.Add( attrib );
01173     }
01174   }
01175   return p;
01176 }
01177 
01178 
01179 const char* TiXmlElement::ReadValue( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01180 {
01181   TiXmlDocument* document = GetDocument();
01182 
01183   // Read in text and elements in any order.
01184   const char* pWithWhiteSpace = p;
01185   p = SkipWhiteSpace( p, encoding );
01186 
01187   while ( p && *p )
01188   {
01189     if ( *p != '<' )
01190     {
01191       // Take what we have, make a text element.
01192       TiXmlText* textNode = new TiXmlText( "" );
01193 
01194       if ( !textNode )
01195       {
01196           return 0;
01197       }
01198 
01199       if ( TiXmlBase::IsWhiteSpaceCondensed() )
01200       {
01201         p = textNode->Parse( p, data, encoding );
01202       }
01203       else
01204       {
01205         // Special case: we want to keep the white space
01206         // so that leading spaces aren't removed.
01207         p = textNode->Parse( pWithWhiteSpace, data, encoding );
01208       }
01209 
01210       if ( !textNode->Blank() )
01211         LinkEndChild( textNode );
01212       else
01213         delete textNode;
01214     } 
01215     else 
01216     {
01217       // We hit a '<'
01218       // Have we hit a new element or an end tag? This could also be
01219       // a TiXmlText in the "CDATA" style.
01220       if ( StringEqual( p, "</", false, encoding ) )
01221       {
01222         return p;
01223       }
01224       else
01225       {
01226         TiXmlNode* node = Identify( p, encoding );
01227         if ( node )
01228         {
01229           p = node->Parse( p, data, encoding );
01230           LinkEndChild( node );
01231         }       
01232         else
01233         {
01234           return 0;
01235         }
01236       }
01237     }
01238     pWithWhiteSpace = p;
01239     p = SkipWhiteSpace( p, encoding );
01240   }
01241 
01242   if ( !p )
01243   {
01244     if ( document ) document->SetError( TIXML_ERROR_READING_ELEMENT_VALUE, 0, 0, encoding );
01245   } 
01246   return p;
01247 }
01248 
01249 
01250 #ifdef TIXML_USE_STL
01251 void TiXmlUnknown::StreamIn( std::istream * in, TIXML_STRING * tag )
01252 {
01253   while ( in->good() )
01254   {
01255     int c = in->get();  
01256     if ( c <= 0 )
01257     {
01258       TiXmlDocument* document = GetDocument();
01259       if ( document )
01260         document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01261       return;
01262     }
01263     (*tag) += (char) c;
01264 
01265     if ( c == '>' )
01266     {
01267       // All is well.
01268       return;   
01269     }
01270   }
01271 }
01272 #endif
01273 
01274 
01275 const char* TiXmlUnknown::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01276 {
01277   TiXmlDocument* document = GetDocument();
01278   p = SkipWhiteSpace( p, encoding );
01279 
01280   if ( data )
01281   {
01282     data->Stamp( p, encoding );
01283     location = data->Cursor();
01284   }
01285   if ( !p || !*p || *p != '<' )
01286   {
01287     if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, p, data, encoding );
01288     return 0;
01289   }
01290   ++p;
01291     value = "";
01292 
01293   while ( p && *p && *p != '>' )
01294   {
01295     value += *p;
01296     ++p;
01297   }
01298 
01299   if ( !p )
01300   {
01301     if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, 0, 0, encoding );
01302   }
01303   if ( *p == '>' )
01304     return p+1;
01305   return p;
01306 }
01307 
01308 #ifdef TIXML_USE_STL
01309 void TiXmlComment::StreamIn( std::istream * in, TIXML_STRING * tag )
01310 {
01311   while ( in->good() )
01312   {
01313     int c = in->get();  
01314     if ( c <= 0 )
01315     {
01316       TiXmlDocument* document = GetDocument();
01317       if ( document )
01318         document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01319       return;
01320     }
01321 
01322     (*tag) += (char) c;
01323 
01324     if ( c == '>' 
01325        && tag->at( tag->length() - 2 ) == '-'
01326        && tag->at( tag->length() - 3 ) == '-' )
01327     {
01328       // All is well.
01329       return;   
01330     }
01331   }
01332 }
01333 #endif
01334 
01335 
01336 const char* TiXmlComment::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01337 {
01338   TiXmlDocument* document = GetDocument();
01339   value = "";
01340 
01341   p = SkipWhiteSpace( p, encoding );
01342 
01343   if ( data )
01344   {
01345     data->Stamp( p, encoding );
01346     location = data->Cursor();
01347   }
01348   const char* startTag = "<!--";
01349   const char* endTag   = "-->";
01350 
01351   if ( !StringEqual( p, startTag, false, encoding ) )
01352   {
01353     document->SetError( TIXML_ERROR_PARSING_COMMENT, p, data, encoding );
01354     return 0;
01355   }
01356   p += strlen( startTag );
01357 
01358   // [ 1475201 ] TinyXML parses entities in comments
01359   // Oops - ReadText doesn't work, because we don't want to parse the entities.
01360   // p = ReadText( p, &value, false, endTag, false, encoding );
01361   //
01362   // from the XML spec:
01363   /*
01364    [Definition: Comments may appear anywhere in a document outside other markup; in addition, 
01365                 they may appear within the document type declaration at places allowed by the grammar. 
01366           They are not part of the document's character data; an XML processor MAY, but need not, 
01367           make it possible for an application to retrieve the text of comments. For compatibility, 
01368           the string "--" (double-hyphen) MUST NOT occur within comments.] Parameter entity 
01369           references MUST NOT be recognized within comments.
01370 
01371           An example of a comment:
01372 
01373           <!-- declarations for <head> & <body> -->
01374   */
01375 
01376     value = "";
01377   // Keep all the white space.
01378   while ( p && *p && !StringEqual( p, endTag, false, encoding ) )
01379   {
01380     value.append( p, 1 );
01381     ++p;
01382   }
01383   if ( p && *p ) 
01384     p += strlen( endTag );
01385 
01386   return p;
01387 }
01388 
01389 
01390 const char* TiXmlAttribute::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01391 {
01392   p = SkipWhiteSpace( p, encoding );
01393   if ( !p || !*p ) return 0;
01394 
01395   if ( data )
01396   {
01397     data->Stamp( p, encoding );
01398     location = data->Cursor();
01399   }
01400   // Read the name, the '=' and the value.
01401   const char* pErr = p;
01402   p = ReadName( p, &name, encoding );
01403   if ( !p || !*p )
01404   {
01405     if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
01406     return 0;
01407   }
01408   p = SkipWhiteSpace( p, encoding );
01409   if ( !p || !*p || *p != '=' )
01410   {
01411     if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
01412     return 0;
01413   }
01414 
01415   ++p;  // skip '='
01416   p = SkipWhiteSpace( p, encoding );
01417   if ( !p || !*p )
01418   {
01419     if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
01420     return 0;
01421   }
01422   
01423   const char* end;
01424   const char SINGLE_QUOTE = '\'';
01425   const char DOUBLE_QUOTE = '\"';
01426 
01427   if ( *p == SINGLE_QUOTE )
01428   {
01429     ++p;
01430     end = "\'";   // single quote in string
01431     p = ReadText( p, &value, false, end, false, encoding );
01432   }
01433   else if ( *p == DOUBLE_QUOTE )
01434   {
01435     ++p;
01436     end = "\"";   // double quote in string
01437     p = ReadText( p, &value, false, end, false, encoding );
01438   }
01439   else
01440   {
01441     // All attribute values should be in single or double quotes.
01442     // But this is such a common error that the parser will try
01443     // its best, even without them.
01444     value = "";
01445     while (    p && *p                      // existence
01446         && !IsWhiteSpace( *p )                // whitespace
01447         && *p != '/' && *p != '>' )             // tag end
01448     {
01449       if ( *p == SINGLE_QUOTE || *p == DOUBLE_QUOTE ) {
01450         // [ 1451649 ] Attribute values with trailing quotes not handled correctly
01451         // We did not have an opening quote but seem to have a 
01452         // closing one. Give up and throw an error.
01453         if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
01454         return 0;
01455       }
01456       value += *p;
01457       ++p;
01458     }
01459   }
01460   return p;
01461 }
01462 
01463 #ifdef TIXML_USE_STL
01464 void TiXmlText::StreamIn( std::istream * in, TIXML_STRING * tag )
01465 {
01466   while ( in->good() )
01467   {
01468     int c = in->peek(); 
01469     if ( !cdata && (c == '<' ) ) 
01470     {
01471       return;
01472     }
01473     if ( c <= 0 )
01474     {
01475       TiXmlDocument* document = GetDocument();
01476       if ( document )
01477         document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01478       return;
01479     }
01480 
01481     (*tag) += (char) c;
01482     in->get();  // "commits" the peek made above
01483 
01484     if ( cdata && c == '>' && tag->size() >= 3 ) {
01485       size_t len = tag->size();
01486       if ( (*tag)[len-2] == ']' && (*tag)[len-3] == ']' ) {
01487         // terminator of cdata.
01488         return;
01489       }
01490     }    
01491   }
01492 }
01493 #endif
01494 
01495 const char* TiXmlText::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01496 {
01497   value = "";
01498   TiXmlDocument* document = GetDocument();
01499 
01500   if ( data )
01501   {
01502     data->Stamp( p, encoding );
01503     location = data->Cursor();
01504   }
01505 
01506   const char* const startTag = "<![CDATA[";
01507   const char* const endTag   = "]]>";
01508 
01509   if ( cdata || StringEqual( p, startTag, false, encoding ) )
01510   {
01511     cdata = true;
01512 
01513     if ( !StringEqual( p, startTag, false, encoding ) )
01514     {
01515       document->SetError( TIXML_ERROR_PARSING_CDATA, p, data, encoding );
01516       return 0;
01517     }
01518     p += strlen( startTag );
01519 
01520     // Keep all the white space, ignore the encoding, etc.
01521     while (    p && *p
01522         && !StringEqual( p, endTag, false, encoding )
01523         )
01524     {
01525       value += *p;
01526       ++p;
01527     }
01528 
01529     TIXML_STRING dummy; 
01530     p = ReadText( p, &dummy, false, endTag, false, encoding );
01531     return p;
01532   }
01533   else
01534   {
01535     bool ignoreWhite = true;
01536 
01537     const char* end = "<";
01538     p = ReadText( p, &value, ignoreWhite, end, false, encoding );
01539     if ( p )
01540       return p-1; // don't truncate the '<'
01541     return 0;
01542   }
01543 }
01544 
01545 #ifdef TIXML_USE_STL
01546 void TiXmlDeclaration::StreamIn( std::istream * in, TIXML_STRING * tag )
01547 {
01548   while ( in->good() )
01549   {
01550     int c = in->get();
01551     if ( c <= 0 )
01552     {
01553       TiXmlDocument* document = GetDocument();
01554       if ( document )
01555         document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01556       return;
01557     }
01558     (*tag) += (char) c;
01559 
01560     if ( c == '>' )
01561     {
01562       // All is well.
01563       return;
01564     }
01565   }
01566 }
01567 #endif
01568 
01569 const char* TiXmlDeclaration::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding _encoding )
01570 {
01571   p = SkipWhiteSpace( p, _encoding );
01572   // Find the beginning, find the end, and look for
01573   // the stuff in-between.
01574   TiXmlDocument* document = GetDocument();
01575   if ( !p || !*p || !StringEqual( p, "<?xml", true, _encoding ) )
01576   {
01577     if ( document ) document->SetError( TIXML_ERROR_PARSING_DECLARATION, 0, 0, _encoding );
01578     return 0;
01579   }
01580   if ( data )
01581   {
01582     data->Stamp( p, _encoding );
01583     location = data->Cursor();
01584   }
01585   p += 5;
01586 
01587   version = "";
01588   encoding = "";
01589   standalone = "";
01590 
01591   while ( p && *p )
01592   {
01593     if ( *p == '>' )
01594     {
01595       ++p;
01596       return p;
01597     }
01598 
01599     p = SkipWhiteSpace( p, _encoding );
01600     if ( StringEqual( p, "version", true, _encoding ) )
01601     {
01602       TiXmlAttribute attrib;
01603       p = attrib.Parse( p, data, _encoding );   
01604       version = attrib.Value();
01605     }
01606     else if ( StringEqual( p, "encoding", true, _encoding ) )
01607     {
01608       TiXmlAttribute attrib;
01609       p = attrib.Parse( p, data, _encoding );   
01610       encoding = attrib.Value();
01611     }
01612     else if ( StringEqual( p, "standalone", true, _encoding ) )
01613     {
01614       TiXmlAttribute attrib;
01615       p = attrib.Parse( p, data, _encoding );   
01616       standalone = attrib.Value();
01617     }
01618     else
01619     {
01620       // Read over whatever it is.
01621       while( p && *p && *p != '>' && !IsWhiteSpace( *p ) )
01622         ++p;
01623     }
01624   }
01625   return 0;
01626 }
01627 
01628 bool TiXmlText::Blank() const
01629 {
01630   for ( unsigned i=0; i<value.length(); i++ )
01631     if ( !IsWhiteSpace( value[i] ) )
01632       return false;
01633   return true;
01634 }
01635 
 All Classes Functions Variables Enumerations