23 _encoding = _default_encoding;
28 _flags = (F_got_text | F_got_wtext);
39 _encoding(copy._encoding),
86 _default_encoding = encoding;
98 return _default_encoding;
113 if (!has_text() || _text != text) {
115 _flags = (_flags | F_got_text) & ~F_got_wtext;
130 set_text(
const string &text, TextEncoder::Encoding encoding) {
143 _flags |= (F_got_text | F_got_wtext);
151 INLINE
bool TextEncoder::
153 if (_flags & F_got_wtext) {
154 return !_wtext.empty();
156 return !_text.empty();
168 if ((_flags & F_got_text) == 0) {
195 _flags = (_flags | F_got_text) & ~F_got_wtext;
207 _wtext =
get_wtext() + wstring(1, (
wchar_t)character);
208 _flags = (_flags | F_got_wtext) & ~F_got_text;
234 if (index >= 0 && index < (
int)_wtext.length()) {
235 return _wtext[index];
251 if (index >= 0 && index < (
int)_wtext.length()) {
252 _wtext[index] = character;
253 _flags &= ~F_got_text;
315 TextEncoder::Encoding to) {
332 return entry->_char_type == UnicodeLatinMap::CT_upper ||
333 entry->_char_type == UnicodeLatinMap::CT_lower;
348 return (character >=
'0' && character <=
'9');
351 return (isdigit(entry->_ascii_equiv) != 0);
366 return (character >= 0 && character < 128 && ispunct(character));
368 return entry->_char_type == UnicodeLatinMap::CT_punct;
384 return entry->_char_type == UnicodeLatinMap::CT_upper;
420 return entry->_char_type == UnicodeLatinMap::CT_lower;
436 return entry->_toupper_character;
452 return entry->_tolower_character;
473 upper(
const string &source, TextEncoder::Encoding encoding) {
499 lower(
const string &source, TextEncoder::Encoding encoding) {
517 if (!has_text() || _wtext != wtext) {
519 _flags = (_flags | F_got_wtext) & ~F_got_text;
531 if ((_flags & F_got_wtext) == 0) {
547 _flags = (_flags | F_got_wtext) & ~F_got_text;
578 operator << (ostream &out,
const wstring &str) {
void append_wtext(const wstring &text)
Appends the indicates string to the end of the stored wide-character text.
const wstring & get_wtext() const
Returns the text associated with the TextEncoder, as a wide-character string.
void append_text(const string &text)
Appends the indicates string to the end of the stored text.
int get_num_chars() const
Returns the number of characters in the stored text.
static bool unicode_ispunct(int character)
Returns true if the indicated character is a punctuation mark, false otherwise.
This class can be used to convert text between multiple representations, e.g.
static bool unicode_isspace(int character)
Returns true if the indicated character is a whitespace letter, false otherwise.
string encode_wtext(const wstring &wtext) const
Encodes a wide-text string into a single-char string, according to the current encoding.
static Encoding get_default_encoding()
Specifies the default encoding to be used for all subsequently created TextEncoder objects...
static string upper(const string &source)
Converts the string to uppercase, assuming the string is encoded in the default encoding.
string get_text() const
Returns the current text, as encoded via the current encoding system.
void make_lower()
Adjusts the text stored within the encoder to all lowercase letters (preserving accent marks correctl...
static bool unicode_isdigit(int character)
Returns true if the indicated character is a numeric digit, false otherwise.
wstring get_wtext_as_ascii() const
Returns the text associated with the node, converted as nearly as possible to a fully-ASCII represent...
void clear_text()
Removes the text from the TextEncoder.
static int unicode_tolower(int character)
Returns the uppercase equivalent of the given Unicode character.
static bool unicode_islower(int character)
Returns true if the indicated character is a lowercase letter, false otherwise.
string get_text_as_ascii() const
Returns the text associated with the node, converted as nearly as possible to a fully-ASCII represent...
void append_unicode_char(int character)
Appends a single character to the end of the stored text.
int get_unicode_char(int index) const
Returns the Unicode value of the nth character in the stored text.
static bool unicode_isupper(int character)
Returns true if the indicated character is an uppercase letter, false otherwise.
static string reencode_text(const string &text, Encoding from, Encoding to)
Given the indicated text string, which is assumed to be encoded via the encoding "from", decodes it and then reencodes it into the encoding "to", and returns the newly encoded string.
wstring decode_text(const string &text) const
Returns the given wstring decoded to a single-byte string, via the current encoding system...
void set_unicode_char(int index, int character)
Sets the Unicode value of the nth character in the stored text.
void set_encoding(Encoding encoding)
Specifies how the string set via set_text() is to be interpreted.
void set_text(const string &text)
Changes the text that is stored in the encoder.
string get_encoded_char(int index) const
Returns the nth char of the stored text, as a one-, two-, or three-byte encoded string.
static const Entry * look_up(wchar_t character)
Returns the Entry associated with the indicated character, if there is one.
static string lower(const string &source)
Converts the string to lowercase, assuming the string is encoded in the default encoding.
Encoding get_encoding() const
Returns the encoding by which the string set via set_text() is to be interpreted. ...
void make_upper()
Adjusts the text stored within the encoder to all uppercase letters (preserving accent marks correctl...
void set_wtext(const wstring &wtext)
Changes the text that is stored in the encoder.
static bool unicode_isalpha(int character)
Returns true if the indicated character is an alphabetic letter, false otherwise. ...
static int unicode_toupper(int character)
Returns the uppercase equivalent of the given Unicode character.
static void set_default_encoding(Encoding encoding)
Specifies the default encoding to be used for all subsequently created TextEncoder objects...