Panda3D
|
00001 // Filename: textEncoder.I 00002 // Created by: drose (26Mar03) 00003 // 00004 //////////////////////////////////////////////////////////////////// 00005 // 00006 // PANDA 3D SOFTWARE 00007 // Copyright (c) Carnegie Mellon University. All rights reserved. 00008 // 00009 // All use of this software is subject to the terms of the revised BSD 00010 // license. You should have received a copy of this license along 00011 // with this source code in a file named "LICENSE." 00012 // 00013 //////////////////////////////////////////////////////////////////// 00014 00015 00016 //////////////////////////////////////////////////////////////////// 00017 // Function: TextEncoder::Constructor 00018 // Access: Published 00019 // Description: 00020 //////////////////////////////////////////////////////////////////// 00021 INLINE TextEncoder:: 00022 TextEncoder() { 00023 _encoding = _default_encoding; 00024 00025 // Initially, since the text string is empty, we know that both 00026 // _text and _wtext accurately reflect the empty state; so we "got" 00027 // both of them. 00028 _flags = (F_got_text | F_got_wtext); 00029 } 00030 00031 //////////////////////////////////////////////////////////////////// 00032 // Function: TextEncoder::Copy Constructor 00033 // Access: Published 00034 // Description: 00035 //////////////////////////////////////////////////////////////////// 00036 INLINE TextEncoder:: 00037 TextEncoder(const TextEncoder ©) : 00038 _flags(copy._flags), 00039 _encoding(copy._encoding), 00040 _text(copy._text), 00041 _wtext(copy._wtext) 00042 { 00043 } 00044 00045 //////////////////////////////////////////////////////////////////// 00046 // Function: TextEncoder::set_encoding 00047 // Access: Published 00048 // Description: Specifies how the string set via set_text() is to be 00049 // interpreted. The default, E_iso8859, means a 00050 // standard string with one-byte characters 00051 // (i.e. ASCII). Other encodings are possible to take 00052 // advantage of character sets with more than 256 00053 // characters. 00054 // 00055 // This affects only future calls to set_text(); it does 00056 // not change text that was set previously. 00057 //////////////////////////////////////////////////////////////////// 00058 INLINE void TextEncoder:: 00059 set_encoding(TextEncoder::Encoding encoding) { 00060 // Force the previously-set strings to be encoded or decoded now. 00061 get_text(); 00062 get_wtext(); 00063 _encoding = encoding; 00064 } 00065 00066 //////////////////////////////////////////////////////////////////// 00067 // Function: TextEncoder::get_encoding 00068 // Access: Published 00069 // Description: Returns the encoding by which the string set via 00070 // set_text() is to be interpreted. See set_encoding(). 00071 //////////////////////////////////////////////////////////////////// 00072 INLINE TextEncoder::Encoding TextEncoder:: 00073 get_encoding() const { 00074 return _encoding; 00075 } 00076 00077 //////////////////////////////////////////////////////////////////// 00078 // Function: TextEncoder::set_default_encoding 00079 // Access: Published, Static 00080 // Description: Specifies the default encoding to be used for all 00081 // subsequently created TextEncoder objects. See 00082 // set_encoding(). 00083 //////////////////////////////////////////////////////////////////// 00084 INLINE void TextEncoder:: 00085 set_default_encoding(TextEncoder::Encoding encoding) { 00086 _default_encoding = encoding; 00087 } 00088 00089 //////////////////////////////////////////////////////////////////// 00090 // Function: TextEncoder::get_default_encoding 00091 // Access: Published, Static 00092 // Description: Specifies the default encoding to be used for all 00093 // subsequently created TextEncoder objects. See 00094 // set_encoding(). 00095 //////////////////////////////////////////////////////////////////// 00096 INLINE TextEncoder::Encoding TextEncoder:: 00097 get_default_encoding() { 00098 return _default_encoding; 00099 } 00100 00101 //////////////////////////////////////////////////////////////////// 00102 // Function: TextEncoder::set_text 00103 // Access: Published 00104 // Description: Changes the text that is stored in the encoder. The 00105 // text should be encoded according to the method 00106 // indicated by set_encoding(). Subsequent calls to 00107 // get_text() will return this same string, while 00108 // get_wtext() will return the decoded version of the 00109 // string. 00110 //////////////////////////////////////////////////////////////////// 00111 INLINE void TextEncoder:: 00112 set_text(const string &text) { 00113 if (!has_text() || _text != text) { 00114 _text = text; 00115 _flags = (_flags | F_got_text) & ~F_got_wtext; 00116 } 00117 } 00118 00119 //////////////////////////////////////////////////////////////////// 00120 // Function: TextEncoder::set_text 00121 // Access: Published 00122 // Description: The two-parameter version of set_text() accepts an 00123 // explicit encoding; the text is immediately decoded 00124 // and stored as a wide-character string. Subsequent 00125 // calls to get_text() will return the same text 00126 // re-encoded using whichever encoding is specified by 00127 // set_encoding(). 00128 //////////////////////////////////////////////////////////////////// 00129 INLINE void TextEncoder:: 00130 set_text(const string &text, TextEncoder::Encoding encoding) { 00131 set_wtext(decode_text(text, encoding)); 00132 } 00133 00134 //////////////////////////////////////////////////////////////////// 00135 // Function: TextEncoder::clear_text 00136 // Access: Published 00137 // Description: Removes the text from the TextEncoder. 00138 //////////////////////////////////////////////////////////////////// 00139 INLINE void TextEncoder:: 00140 clear_text() { 00141 _text = string(); 00142 _wtext = wstring(); 00143 _flags |= (F_got_text | F_got_wtext); 00144 } 00145 00146 //////////////////////////////////////////////////////////////////// 00147 // Function: TextEncoder::has_text 00148 // Access: Published 00149 // Description: 00150 //////////////////////////////////////////////////////////////////// 00151 INLINE bool TextEncoder:: 00152 has_text() const { 00153 if (_flags & F_got_wtext) { 00154 return !_wtext.empty(); 00155 } else { 00156 return !_text.empty(); 00157 } 00158 } 00159 00160 //////////////////////////////////////////////////////////////////// 00161 // Function: TextEncoder::get_text 00162 // Access: Published 00163 // Description: Returns the current text, as encoded via the current 00164 // encoding system. 00165 //////////////////////////////////////////////////////////////////// 00166 INLINE string TextEncoder:: 00167 get_text() const { 00168 if ((_flags & F_got_text) == 0) { 00169 ((TextEncoder *)this)->_text = encode_wtext(_wtext); 00170 ((TextEncoder *)this)->_flags |= F_got_text; 00171 } 00172 return _text; 00173 } 00174 00175 //////////////////////////////////////////////////////////////////// 00176 // Function: TextEncoder::get_text 00177 // Access: Published 00178 // Description: Returns the current text, as encoded via the indicated 00179 // encoding system. 00180 //////////////////////////////////////////////////////////////////// 00181 INLINE string TextEncoder:: 00182 get_text(TextEncoder::Encoding encoding) const { 00183 return encode_wtext(get_wtext(), encoding); 00184 } 00185 00186 //////////////////////////////////////////////////////////////////// 00187 // Function: TextEncoder::append_text 00188 // Access: Published 00189 // Description: Appends the indicates string to the end of the stored 00190 // text. 00191 //////////////////////////////////////////////////////////////////// 00192 INLINE void TextEncoder:: 00193 append_text(const string &text) { 00194 _text = get_text() + text; 00195 _flags = (_flags | F_got_text) & ~F_got_wtext; 00196 } 00197 00198 //////////////////////////////////////////////////////////////////// 00199 // Function: TextEncoder::append_unicode_char 00200 // Access: Published 00201 // Description: Appends a single character to the end of the stored 00202 // text. This may be a wide character, up to 16 bits in 00203 // Unicode. 00204 //////////////////////////////////////////////////////////////////// 00205 INLINE void TextEncoder:: 00206 append_unicode_char(int character) { 00207 _wtext = get_wtext() + wstring(1, (wchar_t)character); 00208 _flags = (_flags | F_got_wtext) & ~F_got_text; 00209 } 00210 00211 //////////////////////////////////////////////////////////////////// 00212 // Function: TextEncoder::get_num_chars 00213 // Access: Published 00214 // Description: Returns the number of characters in the stored text. 00215 // This is a count of wide characters, after the string 00216 // has been decoded according to set_encoding(). 00217 //////////////////////////////////////////////////////////////////// 00218 INLINE int TextEncoder:: 00219 get_num_chars() const { 00220 return get_wtext().length(); 00221 } 00222 00223 //////////////////////////////////////////////////////////////////// 00224 // Function: TextEncoder::get_unicode_char 00225 // Access: Published 00226 // Description: Returns the Unicode value of the nth character in the 00227 // stored text. This may be a wide character (greater 00228 // than 255), after the string has been decoded 00229 // according to set_encoding(). 00230 //////////////////////////////////////////////////////////////////// 00231 INLINE int TextEncoder:: 00232 get_unicode_char(int index) const { 00233 get_wtext(); 00234 nassertr(index >= 0 && index < (int)_wtext.length(), 0); 00235 return _wtext[index]; 00236 } 00237 00238 //////////////////////////////////////////////////////////////////// 00239 // Function: TextEncoder::set_unicode_char 00240 // Access: Published 00241 // Description: Sets the Unicode value of the nth character in the 00242 // stored text. This may be a wide character (greater 00243 // than 255), after the string has been decoded 00244 // according to set_encoding(). 00245 //////////////////////////////////////////////////////////////////// 00246 INLINE void TextEncoder:: 00247 set_unicode_char(int index, int character) { 00248 get_wtext(); 00249 nassertv(index >= 0 && index < (int)_wtext.length()); 00250 _wtext[index] = character; 00251 _flags &= ~F_got_text; 00252 } 00253 00254 //////////////////////////////////////////////////////////////////// 00255 // Function: TextEncoder::get_encoded_char 00256 // Access: Published 00257 // Description: Returns the nth char of the stored text, as a one-, 00258 // two-, or three-byte encoded string. 00259 //////////////////////////////////////////////////////////////////// 00260 INLINE string TextEncoder:: 00261 get_encoded_char(int index) const { 00262 return get_encoded_char(index, get_encoding()); 00263 } 00264 00265 //////////////////////////////////////////////////////////////////// 00266 // Function: TextEncoder::get_encoded_char 00267 // Access: Published 00268 // Description: Returns the nth char of the stored text, as a one-, 00269 // two-, or three-byte encoded string. 00270 //////////////////////////////////////////////////////////////////// 00271 INLINE string TextEncoder:: 00272 get_encoded_char(int index, TextEncoder::Encoding encoding) const { 00273 wstring wch(1, (wchar_t)get_unicode_char(index)); 00274 return encode_wtext(wch, encoding); 00275 } 00276 00277 //////////////////////////////////////////////////////////////////// 00278 // Function: TextEncoder::get_text_as_ascii 00279 // Access: Published 00280 // Description: Returns the text associated with the node, converted 00281 // as nearly as possible to a fully-ASCII 00282 // representation. This means replacing accented 00283 // letters with their unaccented ASCII equivalents. 00284 // 00285 // It is possible that some characters in the string 00286 // cannot be converted to ASCII. (The string may 00287 // involve symbols like the copyright symbol, for 00288 // instance, or it might involve letters in some other 00289 // alphabet such as Greek or Cyrillic, or even Latin 00290 // letters like thorn or eth that are not part of the 00291 // ASCII character set.) In this case, as much of the 00292 // string as possible will be converted to ASCII, and 00293 // the nonconvertible characters will remain encoded in 00294 // the encoding specified by set_encoding(). 00295 //////////////////////////////////////////////////////////////////// 00296 INLINE string TextEncoder:: 00297 get_text_as_ascii() const { 00298 return encode_wtext(get_wtext_as_ascii()); 00299 } 00300 00301 //////////////////////////////////////////////////////////////////// 00302 // Function: TextEncoder::reencode_text 00303 // Access: Published, Static 00304 // Description: Given the indicated text string, which is assumed to 00305 // be encoded via the encoding "from", decodes it and 00306 // then reencodes it into the encoding "to", and returns 00307 // the newly encoded string. This does not change or 00308 // affect any properties on the TextEncoder itself. 00309 //////////////////////////////////////////////////////////////////// 00310 INLINE string TextEncoder:: 00311 reencode_text(const string &text, TextEncoder::Encoding from, 00312 TextEncoder::Encoding to) { 00313 return encode_wtext(decode_text(text, from), to); 00314 } 00315 00316 //////////////////////////////////////////////////////////////////// 00317 // Function: TextEncoder::unicode_isalpha 00318 // Access: Published, Static 00319 // Description: Returns true if the indicated character is an 00320 // alphabetic letter, false otherwise. This is akin to 00321 // ctype's isalpha(), extended to Unicode. 00322 //////////////////////////////////////////////////////////////////// 00323 INLINE bool TextEncoder:: 00324 unicode_isalpha(int character) { 00325 const UnicodeLatinMap::Entry *entry = UnicodeLatinMap::look_up(character); 00326 if (entry == (const UnicodeLatinMap::Entry *)NULL) { 00327 return false; 00328 } 00329 return entry->_char_type == UnicodeLatinMap::CT_upper || 00330 entry->_char_type == UnicodeLatinMap::CT_lower; 00331 } 00332 00333 //////////////////////////////////////////////////////////////////// 00334 // Function: TextEncoder::unicode_isdigit 00335 // Access: Published, Static 00336 // Description: Returns true if the indicated character is a 00337 // numeric digit, false otherwise. This is akin to 00338 // ctype's isdigit(), extended to Unicode. 00339 //////////////////////////////////////////////////////////////////// 00340 INLINE bool TextEncoder:: 00341 unicode_isdigit(int character) { 00342 const UnicodeLatinMap::Entry *entry = UnicodeLatinMap::look_up(character); 00343 if (entry == (const UnicodeLatinMap::Entry *)NULL) { 00344 // The digits aren't actually listed in the map. 00345 return (character >= '0' && character <= '9'); 00346 } 00347 // This silly test (!= 0) is necessary to prevent a VC++ warning. 00348 return (isdigit(entry->_ascii_equiv) != 0); 00349 } 00350 00351 //////////////////////////////////////////////////////////////////// 00352 // Function: TextEncoder::unicode_ispunct 00353 // Access: Published, Static 00354 // Description: Returns true if the indicated character is a 00355 // punctuation mark, false otherwise. This is akin to 00356 // ctype's ispunct(), extended to Unicode. 00357 //////////////////////////////////////////////////////////////////// 00358 INLINE bool TextEncoder:: 00359 unicode_ispunct(int character) { 00360 const UnicodeLatinMap::Entry *entry = UnicodeLatinMap::look_up(character); 00361 if (entry == (const UnicodeLatinMap::Entry *)NULL) { 00362 // Some punctuation marks aren't listed in the map. 00363 return (character >= 0 && character < 128 && ispunct(character)); 00364 } 00365 return entry->_char_type == UnicodeLatinMap::CT_punct; 00366 } 00367 00368 //////////////////////////////////////////////////////////////////// 00369 // Function: TextEncoder::unicode_isupper 00370 // Access: Published, Static 00371 // Description: Returns true if the indicated character is an 00372 // uppercase letter, false otherwise. This is akin to 00373 // ctype's isupper(), extended to Unicode. 00374 //////////////////////////////////////////////////////////////////// 00375 INLINE bool TextEncoder:: 00376 unicode_isupper(int character) { 00377 const UnicodeLatinMap::Entry *entry = UnicodeLatinMap::look_up(character); 00378 if (entry == (const UnicodeLatinMap::Entry *)NULL) { 00379 return false; 00380 } 00381 return entry->_char_type == UnicodeLatinMap::CT_upper; 00382 } 00383 00384 //////////////////////////////////////////////////////////////////// 00385 // Function: TextEncoder::unicode_isspace 00386 // Access: Published, Static 00387 // Description: Returns true if the indicated character is a 00388 // whitespace letter, false otherwise. This is akin to 00389 // ctype's isspace(), extended to Unicode. 00390 //////////////////////////////////////////////////////////////////// 00391 INLINE bool TextEncoder:: 00392 unicode_isspace(int character) { 00393 switch (character) { 00394 case ' ': 00395 case '\t': 00396 case '\n': 00397 return true; 00398 00399 default: 00400 return false; 00401 } 00402 } 00403 00404 //////////////////////////////////////////////////////////////////// 00405 // Function: TextEncoder::unicode_islower 00406 // Access: Published, Static 00407 // Description: Returns true if the indicated character is a 00408 // lowercase letter, false otherwise. This is akin to 00409 // ctype's islower(), extended to Unicode. 00410 //////////////////////////////////////////////////////////////////// 00411 INLINE bool TextEncoder:: 00412 unicode_islower(int character) { 00413 const UnicodeLatinMap::Entry *entry = UnicodeLatinMap::look_up(character); 00414 if (entry == (const UnicodeLatinMap::Entry *)NULL) { 00415 return false; 00416 } 00417 return entry->_char_type == UnicodeLatinMap::CT_lower; 00418 } 00419 00420 //////////////////////////////////////////////////////////////////// 00421 // Function: TextEncoder::unicode_toupper 00422 // Access: Published, Static 00423 // Description: Returns the uppercase equivalent of the given Unicode 00424 // character. This is akin to ctype's toupper(), 00425 // extended to Unicode. 00426 //////////////////////////////////////////////////////////////////// 00427 INLINE int TextEncoder:: 00428 unicode_toupper(int character) { 00429 const UnicodeLatinMap::Entry *entry = UnicodeLatinMap::look_up(character); 00430 if (entry == (const UnicodeLatinMap::Entry *)NULL) { 00431 return character; 00432 } 00433 return entry->_toupper_character; 00434 } 00435 00436 //////////////////////////////////////////////////////////////////// 00437 // Function: TextEncoder::unicode_tolower 00438 // Access: Published, Static 00439 // Description: Returns the uppercase equivalent of the given Unicode 00440 // character. This is akin to ctype's tolower(), 00441 // extended to Unicode. 00442 //////////////////////////////////////////////////////////////////// 00443 INLINE int TextEncoder:: 00444 unicode_tolower(int character) { 00445 const UnicodeLatinMap::Entry *entry = UnicodeLatinMap::look_up(character); 00446 if (entry == (const UnicodeLatinMap::Entry *)NULL) { 00447 return character; 00448 } 00449 return entry->_tolower_character; 00450 } 00451 00452 //////////////////////////////////////////////////////////////////// 00453 // Function: TextEncoder::upper 00454 // Access: Published, Static 00455 // Description: Converts the string to uppercase, assuming the string 00456 // is encoded in the default encoding. 00457 //////////////////////////////////////////////////////////////////// 00458 INLINE string TextEncoder:: 00459 upper(const string &source) { 00460 return upper(source, get_default_encoding()); 00461 } 00462 00463 //////////////////////////////////////////////////////////////////// 00464 // Function: TextEncoder::upper 00465 // Access: Published, Static 00466 // Description: Converts the string to uppercase, assuming the string 00467 // is encoded in the indicated encoding. 00468 //////////////////////////////////////////////////////////////////// 00469 INLINE string TextEncoder:: 00470 upper(const string &source, TextEncoder::Encoding encoding) { 00471 TextEncoder encoder; 00472 encoder.set_encoding(encoding); 00473 encoder.set_text(source); 00474 encoder.make_upper(); 00475 return encoder.get_text(); 00476 } 00477 00478 //////////////////////////////////////////////////////////////////// 00479 // Function: TextEncoder::lower 00480 // Access: Published, Static 00481 // Description: Converts the string to lowercase, assuming the string 00482 // is encoded in the default encoding. 00483 //////////////////////////////////////////////////////////////////// 00484 INLINE string TextEncoder:: 00485 lower(const string &source) { 00486 return lower(source, get_default_encoding()); 00487 } 00488 00489 //////////////////////////////////////////////////////////////////// 00490 // Function: TextEncoder::lower 00491 // Access: Published, Static 00492 // Description: Converts the string to lowercase, assuming the string 00493 // is encoded in the indicated encoding. 00494 //////////////////////////////////////////////////////////////////// 00495 INLINE string TextEncoder:: 00496 lower(const string &source, TextEncoder::Encoding encoding) { 00497 TextEncoder encoder; 00498 encoder.set_encoding(encoding); 00499 encoder.set_text(source); 00500 encoder.make_lower(); 00501 return encoder.get_text(); 00502 } 00503 00504 //////////////////////////////////////////////////////////////////// 00505 // Function: TextEncoder::set_wtext 00506 // Access: Published 00507 // Description: Changes the text that is stored in the encoder. 00508 // Subsequent calls to get_wtext() will return this same 00509 // string, while get_text() will return the encoded 00510 // version of the string. 00511 //////////////////////////////////////////////////////////////////// 00512 INLINE void TextEncoder:: 00513 set_wtext(const wstring &wtext) { 00514 if (!has_text() || _wtext != wtext) { 00515 _wtext = wtext; 00516 _flags = (_flags | F_got_wtext) & ~F_got_text; 00517 } 00518 } 00519 00520 //////////////////////////////////////////////////////////////////// 00521 // Function: TextEncoder::get_wtext 00522 // Access: Published 00523 // Description: Returns the text associated with the TextEncoder, as 00524 // a wide-character string. 00525 //////////////////////////////////////////////////////////////////// 00526 INLINE const wstring &TextEncoder:: 00527 get_wtext() const { 00528 if ((_flags & F_got_wtext) == 0) { 00529 ((TextEncoder *)this)->_wtext = decode_text(_text); 00530 ((TextEncoder *)this)->_flags |= F_got_wtext; 00531 } 00532 return _wtext; 00533 } 00534 00535 //////////////////////////////////////////////////////////////////// 00536 // Function: TextEncoder::append_wtext 00537 // Access: Published 00538 // Description: Appends the indicates string to the end of the stored 00539 // wide-character text. 00540 //////////////////////////////////////////////////////////////////// 00541 INLINE void TextEncoder:: 00542 append_wtext(const wstring &wtext) { 00543 _wtext = get_wtext() + wtext; 00544 _flags = (_flags | F_got_wtext) & ~F_got_text; 00545 } 00546 00547 //////////////////////////////////////////////////////////////////// 00548 // Function: TextEncoder::encode_wtext 00549 // Access: Published 00550 // Description: Encodes a wide-text string into a single-char string, 00551 // according to the current encoding. 00552 //////////////////////////////////////////////////////////////////// 00553 INLINE string TextEncoder:: 00554 encode_wtext(const wstring &wtext) const { 00555 return encode_wtext(wtext, _encoding); 00556 } 00557 00558 //////////////////////////////////////////////////////////////////// 00559 // Function: TextEncoder::decode_text 00560 // Access: Published 00561 // Description: Returns the given wstring decoded to a single-byte 00562 // string, via the current encoding system. 00563 //////////////////////////////////////////////////////////////////// 00564 INLINE wstring TextEncoder:: 00565 decode_text(const string &text) const { 00566 return decode_text(text, _encoding); 00567 }