Panda3D
|
This class can be used to convert text between multiple representations, e.g. More...
#include "textEncoder.h"
Public Types | |
enum | Encoding { E_iso8859, E_utf8, E_unicode } |
Public Member Functions | |
TextEncoder (const TextEncoder ©) | |
void | append_text (const string &text) |
Appends the indicates string to the end of the stored text. | |
void | append_unicode_char (int character) |
Appends a single character to the end of the stored text. | |
void | append_wtext (const wstring &text) |
Appends the indicates string to the end of the stored wide-character text. | |
void | clear_text () |
Removes the text from the TextEncoder. | |
wstring | decode_text (const string &text) const |
Returns the given wstring decoded to a single-byte string, via the current encoding system. | |
string | encode_wtext (const wstring &wtext) const |
Encodes a wide-text string into a single-char string, according to the current encoding. | |
string | get_encoded_char (int index) const |
Returns the nth char of the stored text, as a one-, two-, or three-byte encoded string. | |
string | get_encoded_char (int index, Encoding encoding) const |
Returns the nth char of the stored text, as a one-, two-, or three-byte encoded string. | |
Encoding | get_encoding () const |
Returns the encoding by which the string set via set_text() is to be interpreted. | |
int | get_num_chars () const |
Returns the number of characters in the stored text. | |
string | get_text () const |
Returns the current text, as encoded via the current encoding system. | |
string | get_text (Encoding encoding) const |
Returns the current text, as encoded via the indicated encoding system. | |
string | get_text_as_ascii () const |
Returns the text associated with the node, converted as nearly as possible to a fully-ASCII representation. | |
int | get_unicode_char (int index) const |
Returns the Unicode value of the nth character in the stored text. | |
const wstring & | get_wtext () const |
Returns the text associated with the TextEncoder, as a wide-character string. | |
wstring | get_wtext_as_ascii () const |
Returns the text associated with the node, converted as nearly as possible to a fully-ASCII representation. | |
bool | has_text () const |
bool | is_wtext () const |
Returns true if any of the characters in the string returned by get_wtext() are out of the range of an ASCII character (and, therefore, get_wtext() should be called in preference to get_text()). | |
void | make_lower () |
Adjusts the text stored within the encoder to all lowercase letters (preserving accent marks correctly). | |
void | make_upper () |
Adjusts the text stored within the encoder to all uppercase letters (preserving accent marks correctly). | |
void | set_encoding (Encoding encoding) |
Specifies how the string set via set_text() is to be interpreted. | |
void | set_text (const string &text) |
Changes the text that is stored in the encoder. | |
void | set_text (const string &text, Encoding encoding) |
The two-parameter version of set_text() accepts an explicit encoding; the text is immediately decoded and stored as a wide-character string. | |
void | set_unicode_char (int index, int character) |
Sets the Unicode value of the nth character in the stored text. | |
void | set_wtext (const wstring &wtext) |
Changes the text that is stored in the encoder. | |
Static Public Member Functions | |
static wstring | decode_text (const string &text, Encoding encoding) |
Returns the given wstring decoded to a single-byte string, via the given encoding system. | |
static string | encode_wchar (wchar_t ch, Encoding encoding) |
Encodes a single wide char into a one-, two-, or three-byte string, according to the given encoding system. | |
static string | encode_wtext (const wstring &wtext, Encoding encoding) |
Encodes a wide-text string into a single-char string, according to the given encoding. | |
static TypeHandle | get_class_type () |
static Encoding | get_default_encoding () |
Specifies the default encoding to be used for all subsequently created TextEncoder objects. | |
static void | init_type () |
static string | lower (const string &source) |
Converts the string to lowercase, assuming the string is encoded in the default encoding. | |
static string | lower (const string &source, Encoding encoding) |
Converts the string to lowercase, assuming the string is encoded in the indicated encoding. | |
static string | reencode_text (const string &text, Encoding from, Encoding to) |
Given the indicated text string, which is assumed to be encoded via the encoding "from", decodes it and then reencodes it into the encoding "to", and returns the newly encoded string. | |
static void | set_default_encoding (Encoding encoding) |
Specifies the default encoding to be used for all subsequently created TextEncoder objects. | |
static bool | unicode_isalpha (int character) |
Returns true if the indicated character is an alphabetic letter, false otherwise. | |
static bool | unicode_isdigit (int character) |
Returns true if the indicated character is a numeric digit, false otherwise. | |
static bool | unicode_islower (int character) |
Returns true if the indicated character is a lowercase letter, false otherwise. | |
static bool | unicode_ispunct (int character) |
Returns true if the indicated character is a punctuation mark, false otherwise. | |
static bool | unicode_isspace (int character) |
Returns true if the indicated character is a whitespace letter, false otherwise. | |
static bool | unicode_isupper (int character) |
Returns true if the indicated character is an uppercase letter, false otherwise. | |
static int | unicode_tolower (int character) |
Returns the uppercase equivalent of the given Unicode character. | |
static int | unicode_toupper (int character) |
Returns the uppercase equivalent of the given Unicode character. | |
static string | upper (const string &source, Encoding encoding) |
Converts the string to uppercase, assuming the string is encoded in the indicated encoding. | |
static string | upper (const string &source) |
Converts the string to uppercase, assuming the string is encoded in the default encoding. |
This class can be used to convert text between multiple representations, e.g.
utf-8 to Unicode. You may use it as a static class object, passing the encoding each time, or you may create an instance and use that object, which will record the current encoding and retain the current string.
This class is also a base class of TextNode, which inherits this functionality.
Definition at line 37 of file textEncoder.h.
void TextEncoder::append_text | ( | const string & | text | ) | [inline] |
Appends the indicates string to the end of the stored text.
Reimplemented in TextNode.
Definition at line 193 of file textEncoder.I.
References get_text().
void TextEncoder::append_unicode_char | ( | int | character | ) | [inline] |
Appends a single character to the end of the stored text.
This may be a wide character, up to 16 bits in Unicode.
Definition at line 206 of file textEncoder.I.
References get_wtext().
void TextEncoder::append_wtext | ( | const wstring & | text | ) | [inline] |
Appends the indicates string to the end of the stored wide-character text.
Reimplemented in TextNode.
Definition at line 542 of file textEncoder.I.
References get_wtext().
void TextEncoder::clear_text | ( | ) | [inline] |
Removes the text from the TextEncoder.
Reimplemented in TextNode.
Definition at line 140 of file textEncoder.I.
wstring TextEncoder::decode_text | ( | const string & | text | ) | const [inline] |
Returns the given wstring decoded to a single-byte string, via the current encoding system.
Definition at line 565 of file textEncoder.I.
Referenced by TextNode::calc_width(), get_wtext(), ButtonEvent::read_datagram(), reencode_text(), PGEntry::set_text(), and set_text().
wstring TextEncoder::decode_text | ( | const string & | text, |
TextEncoder::Encoding | encoding | ||
) | [static] |
Returns the given wstring decoded to a single-byte string, via the given encoding system.
Definition at line 204 of file textEncoder.cxx.
string TextEncoder::encode_wchar | ( | wchar_t | ch, |
TextEncoder::Encoding | encoding | ||
) | [static] |
Encodes a single wide char into a one-, two-, or three-byte string, according to the given encoding system.
Definition at line 132 of file textEncoder.cxx.
References UnicodeLatinMap::look_up().
Referenced by encode_wtext().
string TextEncoder::encode_wtext | ( | const wstring & | wtext | ) | const [inline] |
Encodes a wide-text string into a single-char string, according to the current encoding.
Definition at line 554 of file textEncoder.I.
Referenced by MouseWatcherParameter::get_candidate_string_encoded(), get_encoded_char(), PGEntry::get_plain_text(), PGEntry::get_text(), get_text(), get_text_as_ascii(), TextNode::get_wordwrapped_text(), reencode_text(), and ButtonEvent::write_datagram().
string TextEncoder::encode_wtext | ( | const wstring & | wtext, |
TextEncoder::Encoding | encoding | ||
) | [static] |
Encodes a wide-text string into a single-char string, according to the given encoding.
Definition at line 187 of file textEncoder.cxx.
References encode_wchar().
TextEncoder::Encoding TextEncoder::get_default_encoding | ( | ) | [inline, static] |
Specifies the default encoding to be used for all subsequently created TextEncoder objects.
See set_encoding().
Definition at line 97 of file textEncoder.I.
Referenced by MouseWatcherParameter::get_candidate_string_encoded(), lower(), ButtonEvent::read_datagram(), upper(), and ButtonEvent::write_datagram().
string TextEncoder::get_encoded_char | ( | int | index | ) | const [inline] |
Returns the nth char of the stored text, as a one-, two-, or three-byte encoded string.
Definition at line 261 of file textEncoder.I.
References get_encoding().
string TextEncoder::get_encoded_char | ( | int | index, |
TextEncoder::Encoding | encoding | ||
) | const [inline] |
Returns the nth char of the stored text, as a one-, two-, or three-byte encoded string.
Definition at line 272 of file textEncoder.I.
References encode_wtext(), and get_unicode_char().
TextEncoder::Encoding TextEncoder::get_encoding | ( | ) | const [inline] |
Returns the encoding by which the string set via set_text() is to be interpreted.
See set_encoding().
Definition at line 73 of file textEncoder.I.
Referenced by get_encoded_char().
int TextEncoder::get_num_chars | ( | ) | const [inline] |
Returns the number of characters in the stored text.
This is a count of wide characters, after the string has been decoded according to set_encoding().
Definition at line 219 of file textEncoder.I.
References get_wtext().
string TextEncoder::get_text | ( | ) | const [inline] |
Returns the current text, as encoded via the current encoding system.
Definition at line 167 of file textEncoder.I.
References encode_wtext().
Referenced by append_text(), lower(), set_encoding(), and upper().
string TextEncoder::get_text | ( | TextEncoder::Encoding | encoding | ) | const [inline] |
Returns the current text, as encoded via the indicated encoding system.
Definition at line 182 of file textEncoder.I.
References encode_wtext(), and get_wtext().
string TextEncoder::get_text_as_ascii | ( | ) | const [inline] |
Returns the text associated with the node, converted as nearly as possible to a fully-ASCII representation.
This means replacing accented letters with their unaccented ASCII equivalents.
It is possible that some characters in the string cannot be converted to ASCII. (The string may involve symbols like the copyright symbol, for instance, or it might involve letters in some other alphabet such as Greek or Cyrillic, or even Latin letters like thorn or eth that are not part of the ASCII character set.) In this case, as much of the string as possible will be converted to ASCII, and the nonconvertible characters will remain encoded in the encoding specified by set_encoding().
Definition at line 297 of file textEncoder.I.
References encode_wtext(), and get_wtext_as_ascii().
int TextEncoder::get_unicode_char | ( | int | index | ) | const [inline] |
Returns the Unicode value of the nth character in the stored text.
This may be a wide character (greater than 255), after the string has been decoded according to set_encoding().
Definition at line 232 of file textEncoder.I.
References get_wtext().
Referenced by get_encoded_char().
const wstring & TextEncoder::get_wtext | ( | ) | const [inline] |
Returns the text associated with the TextEncoder, as a wide-character string.
Definition at line 527 of file textEncoder.I.
References decode_text().
Referenced by append_unicode_char(), append_wtext(), PNMTextMaker::calc_width(), PNMTextMaker::generate_into(), get_num_chars(), get_text(), get_unicode_char(), get_wtext_as_ascii(), is_wtext(), make_lower(), make_upper(), set_encoding(), and set_unicode_char().
wstring TextEncoder::get_wtext_as_ascii | ( | ) | const |
Returns the text associated with the node, converted as nearly as possible to a fully-ASCII representation.
This means replacing accented letters with their unaccented ASCII equivalents.
It is possible that some characters in the string cannot be converted to ASCII. (The string may involve symbols like the copyright symbol, for instance, or it might involve letters in some other alphabet such as Greek or Cyrillic, or even Latin letters like thorn or eth that are not part of the ASCII character set.) In this case, as much of the string as possible will be converted to ASCII, and the nonconvertible characters will remain in their original form.
Definition at line 80 of file textEncoder.cxx.
References get_wtext(), and UnicodeLatinMap::look_up().
Referenced by get_text_as_ascii().
bool TextEncoder::is_wtext | ( | ) | const |
Returns true if any of the characters in the string returned by get_wtext() are out of the range of an ASCII character (and, therefore, get_wtext() should be called in preference to get_text()).
Definition at line 112 of file textEncoder.cxx.
References get_wtext().
string TextEncoder::lower | ( | const string & | source | ) | [inline, static] |
Converts the string to lowercase, assuming the string is encoded in the default encoding.
Definition at line 485 of file textEncoder.I.
References get_default_encoding().
string TextEncoder::lower | ( | const string & | source, |
TextEncoder::Encoding | encoding | ||
) | [inline, static] |
Converts the string to lowercase, assuming the string is encoded in the indicated encoding.
Definition at line 496 of file textEncoder.I.
References get_text(), make_lower(), set_encoding(), and set_text().
void TextEncoder::make_lower | ( | ) |
Adjusts the text stored within the encoder to all lowercase letters (preserving accent marks correctly).
Definition at line 51 of file textEncoder.cxx.
References get_wtext(), and unicode_tolower().
Referenced by lower().
void TextEncoder::make_upper | ( | ) |
Adjusts the text stored within the encoder to all uppercase letters (preserving accent marks correctly).
Definition at line 34 of file textEncoder.cxx.
References get_wtext(), and unicode_toupper().
Referenced by upper().
string TextEncoder::reencode_text | ( | const string & | text, |
TextEncoder::Encoding | from, | ||
TextEncoder::Encoding | to | ||
) | [inline, static] |
Given the indicated text string, which is assumed to be encoded via the encoding "from", decodes it and then reencodes it into the encoding "to", and returns the newly encoded string.
This does not change or affect any properties on the TextEncoder itself.
Definition at line 311 of file textEncoder.I.
References decode_text(), and encode_wtext().
void TextEncoder::set_default_encoding | ( | TextEncoder::Encoding | encoding | ) | [inline, static] |
Specifies the default encoding to be used for all subsequently created TextEncoder objects.
See set_encoding().
Definition at line 85 of file textEncoder.I.
void TextEncoder::set_encoding | ( | TextEncoder::Encoding | encoding | ) | [inline] |
Specifies how the string set via set_text() is to be interpreted.
The default, E_iso8859, means a standard string with one-byte characters (i.e. ASCII). Other encodings are possible to take advantage of character sets with more than 256 characters.
This affects only future calls to set_text(); it does not change text that was set previously.
Definition at line 59 of file textEncoder.I.
References get_text(), and get_wtext().
void TextEncoder::set_text | ( | const string & | text | ) | [inline] |
Changes the text that is stored in the encoder.
The text should be encoded according to the method indicated by set_encoding(). Subsequent calls to get_text() will return this same string, while get_wtext() will return the decoded version of the string.
Reimplemented in TextNode.
Definition at line 112 of file textEncoder.I.
Referenced by PNMTextMaker::calc_width(), PNMTextMaker::generate_into(), lower(), and upper().
void TextEncoder::set_text | ( | const string & | text, |
TextEncoder::Encoding | encoding | ||
) | [inline] |
The two-parameter version of set_text() accepts an explicit encoding; the text is immediately decoded and stored as a wide-character string.
Subsequent calls to get_text() will return the same text re-encoded using whichever encoding is specified by set_encoding().
Reimplemented in TextNode.
Definition at line 130 of file textEncoder.I.
References decode_text(), and set_wtext().
void TextEncoder::set_unicode_char | ( | int | index, |
int | character | ||
) | [inline] |
Sets the Unicode value of the nth character in the stored text.
This may be a wide character (greater than 255), after the string has been decoded according to set_encoding().
Definition at line 247 of file textEncoder.I.
References get_wtext().
void TextEncoder::set_wtext | ( | const wstring & | wtext | ) | [inline] |
Changes the text that is stored in the encoder.
Subsequent calls to get_wtext() will return this same string, while get_text() will return the encoded version of the string.
Reimplemented in TextNode.
Definition at line 513 of file textEncoder.I.
Referenced by set_text().
bool TextEncoder::unicode_isalpha | ( | int | character | ) | [inline, static] |
Returns true if the indicated character is an alphabetic letter, false otherwise.
This is akin to ctype's isalpha(), extended to Unicode.
Definition at line 324 of file textEncoder.I.
References UnicodeLatinMap::look_up().
bool TextEncoder::unicode_isdigit | ( | int | character | ) | [inline, static] |
Returns true if the indicated character is a numeric digit, false otherwise.
This is akin to ctype's isdigit(), extended to Unicode.
Definition at line 341 of file textEncoder.I.
References UnicodeLatinMap::look_up().
bool TextEncoder::unicode_islower | ( | int | character | ) | [inline, static] |
Returns true if the indicated character is a lowercase letter, false otherwise.
This is akin to ctype's islower(), extended to Unicode.
Definition at line 412 of file textEncoder.I.
References UnicodeLatinMap::look_up().
bool TextEncoder::unicode_ispunct | ( | int | character | ) | [inline, static] |
Returns true if the indicated character is a punctuation mark, false otherwise.
This is akin to ctype's ispunct(), extended to Unicode.
Definition at line 359 of file textEncoder.I.
References UnicodeLatinMap::look_up().
bool TextEncoder::unicode_isspace | ( | int | character | ) | [inline, static] |
Returns true if the indicated character is a whitespace letter, false otherwise.
This is akin to ctype's isspace(), extended to Unicode.
Definition at line 392 of file textEncoder.I.
bool TextEncoder::unicode_isupper | ( | int | character | ) | [inline, static] |
Returns true if the indicated character is an uppercase letter, false otherwise.
This is akin to ctype's isupper(), extended to Unicode.
Definition at line 376 of file textEncoder.I.
References UnicodeLatinMap::look_up().
int TextEncoder::unicode_tolower | ( | int | character | ) | [inline, static] |
Returns the uppercase equivalent of the given Unicode character.
This is akin to ctype's tolower(), extended to Unicode.
Definition at line 444 of file textEncoder.I.
References UnicodeLatinMap::look_up().
Referenced by make_lower().
int TextEncoder::unicode_toupper | ( | int | character | ) | [inline, static] |
Returns the uppercase equivalent of the given Unicode character.
This is akin to ctype's toupper(), extended to Unicode.
Definition at line 428 of file textEncoder.I.
References UnicodeLatinMap::look_up().
Referenced by make_upper().
string TextEncoder::upper | ( | const string & | source | ) | [inline, static] |
Converts the string to uppercase, assuming the string is encoded in the default encoding.
Definition at line 459 of file textEncoder.I.
References get_default_encoding().
string TextEncoder::upper | ( | const string & | source, |
TextEncoder::Encoding | encoding | ||
) | [inline, static] |
Converts the string to uppercase, assuming the string is encoded in the indicated encoding.
Definition at line 470 of file textEncoder.I.
References get_text(), make_upper(), set_encoding(), and set_text().