24 TextEncoder::Encoding TextEncoder::_default_encoding = TextEncoder::E_utf8;
34 for (si = _wtext.begin(); si != _wtext.end(); ++si) {
37 _flags &= ~F_got_text;
49 for (si = _wtext.begin(); si != _wtext.end(); ++si) {
52 _flags &= ~F_got_text;
73 wstring::const_iterator si;
74 for (si = _wtext.begin(); si != _wtext.end(); ++si) {
75 wchar_t character = (*si);
79 if (map_entry !=
nullptr && map_entry->_ascii_equiv != 0) {
80 result += (wchar_t)map_entry->_ascii_equiv;
81 if (map_entry->_ascii_additional != 0) {
82 result += (wchar_t)map_entry->_ascii_additional;
101 wstring::const_iterator ti;
102 for (ti = _wtext.begin(); ti != _wtext.end(); ++ti) {
103 if (((*ti) & ~0x7f) != 0) {
116 encode_wchar(char32_t ch, TextEncoder::Encoding encoding) {
119 if ((ch & ~0xff) == 0) {
120 return string(1, (
char)ch);
127 if (map_entry !=
nullptr && map_entry->_ascii_equiv != 0) {
129 if (map_entry->_ascii_additional != 0) {
132 string(1, map_entry->_ascii_equiv) +
133 string(1, map_entry->_ascii_additional);
135 return string(1, map_entry->_ascii_equiv);
142 if ((ch & ~0x7f) == 0) {
143 return string(1, (
char)ch);
144 }
else if ((ch & ~0x7ff) == 0) {
146 string(1, (
char)((ch >> 6) | 0xc0)) +
147 string(1, (
char)((ch & 0x3f) | 0x80));
148 }
else if ((ch & ~0xffff) == 0) {
150 string(1, (
char)((ch >> 12) | 0xe0)) +
151 string(1, (
char)(((ch >> 6) & 0x3f) | 0x80)) +
152 string(1, (
char)((ch & 0x3f) | 0x80));
155 string(1, (
char)((ch >> 18) | 0xf0)) +
156 string(1, (
char)(((ch >> 12) & 0x3f) | 0x80)) +
157 string(1, (
char)(((ch >> 6) & 0x3f) | 0x80)) +
158 string(1, (
char)((ch & 0x3f) | 0x80));
162 if ((ch & ~0xffff) == 0) {
165 string(1, (
char)(ch >> 8)) +
166 string(1, (
char)(ch & 0xff));
169 uint32_t v = (uint32_t)ch - 0x10000u;
170 uint16_t hi = (v >> 10u) | 0xd800u;
171 uint16_t lo = (v & 0x3ffu) | 0xdc00u;
178 return string(encoded, 4);
190 encode_wtext(
const wstring &wtext, TextEncoder::Encoding encoding) {
193 for (
size_t i = 0; i < wtext.size(); ++i) {
194 wchar_t ch = wtext[i];
197 #if WCHAR_MAX < 0x10FFFF
198 if (ch >= 0xd800 && ch < 0xdc00 && (i + 1) < wtext.size()) {
200 wchar_t ch2 = wtext[i + 1];
201 if (ch2 >= 0xdc00 && ch2 < 0xe000) {
203 char32_t code_point = 0x10000 + ((ch - 0xd800) << 10) + (ch2 - 0xdc00);
222 decode_text(
const string &text, TextEncoder::Encoding encoding) {
227 return decode_text_impl(decoder);
233 return decode_text_impl(decoder);
240 return decode_text_impl(decoder);
249 wstring TextEncoder::
255 while (!decoder.
is_eof()) {
262 if (character <= WCHAR_MAX) {
266 uint32_t v = (uint32_t)character - 0x10000u;
267 result += (wchar_t)((v >> 10u) | 0xd800u);
268 result += (wchar_t)((v & 0x3ffu) | 0xdc00u);
375 operator << (ostream &out, TextEncoder::Encoding encoding) {
377 case TextEncoder::E_iso8859:
378 return out <<
"iso8859";
380 case TextEncoder::E_utf8:
381 return out <<
"utf8";
383 case TextEncoder::E_utf16be:
384 return out <<
"utf16be";
387 return out <<
"**invalid TextEncoder::Encoding(" << (int)encoding <<
")**";
394 operator >> (istream &in, TextEncoder::Encoding &encoding) {
398 if (word ==
"iso8859") {
399 encoding = TextEncoder::E_iso8859;
400 }
else if (word ==
"utf8" || word ==
"utf-8") {
401 encoding = TextEncoder::E_utf8;
402 }
else if (word ==
"unicode" || word ==
"utf16be" || word ==
"utf-16be" ||
403 word ==
"utf16-be" || word ==
"utf-16-be") {
404 encoding = TextEncoder::E_utf16be;
407 if (notify_ptr !=
nullptr) {
409 <<
"Invalid TextEncoder::Encoding: " << word <<
"\n";
411 encoding = TextEncoder::E_iso8859;
The base class to a family of classes that decode various kinds of encoded byte streams.
bool is_eof()
Returns true if the decoder has returned the last character in the string, false if there are more to...
virtual char32_t get_next_character()
Returns the next character in sequence.
static std::ostream * get_notify_ptr()
Returns the ostream that is used to write error messages to.
This decoder extracts characters two at a time to get a plain wide character sequence.
This decoder extracts utf-8 sequences.
std::wstring decode_text(const std::string &text) const
Returns the given wstring decoded to a single-byte string, via the current encoding system.
static std::string encode_wchar(char32_t ch, Encoding encoding)
Encodes a single Unicode character into a one-, two-, three-, or four-byte string,...
static int unicode_tolower(char32_t character)
Returns the uppercase equivalent of the given Unicode character.
bool is_wtext() const
Returns true if any of the characters in the string returned by get_wtext() are out of the range of a...
static int unicode_toupper(char32_t character)
Returns the uppercase equivalent of the given Unicode character.
std::wstring get_wtext_as_ascii() const
Returns the text associated with the node, converted as nearly as possible to a fully-ASCII represent...
const std::wstring & get_wtext() const
Returns the text associated with the TextEncoder, as a wide-character string.
void make_lower()
Adjusts the text stored within the encoder to all lowercase letters (preserving accent marks correctl...
void make_upper()
Adjusts the text stored within the encoder to all uppercase letters (preserving accent marks correctl...
std::string encode_wtext(const std::wstring &wtext) const
Encodes a wide-text string into a single-char string, according to the current encoding.
static const Entry * look_up(char32_t character)
Returns the Entry associated with the indicated character, if there is one.
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.