19 _encoding = _default_encoding;
23 _flags = (F_got_text | F_got_wtext);
32 _encoding(copy._encoding),
70 _default_encoding = encoding;
77 INLINE TextEncoder::Encoding TextEncoder::
78 get_default_encoding() {
79 return _default_encoding;
90 if (!has_text() || _text != text) {
92 _flags = (_flags | F_got_text) & ~F_got_wtext;
104 set_text(
const std::string &text, TextEncoder::Encoding encoding) {
105 if (encoding == _encoding) {
117 _text = std::string();
118 _wtext = std::wstring();
119 _flags |= (F_got_text | F_got_wtext);
126 INLINE
bool TextEncoder::
128 if (_flags & F_got_wtext) {
129 return !_wtext.empty();
131 return !_text.empty();
138 INLINE std::string TextEncoder::
140 if ((_flags & F_got_text) == 0) {
150 INLINE std::string TextEncoder::
151 get_text(TextEncoder::Encoding encoding)
const {
162 _flags = (_flags | F_got_text) & ~F_got_wtext;
173 #if WCHAR_MAX >= 0x10FFFF 175 _wtext =
get_wtext() + std::wstring(1, (
wchar_t)character);
177 if ((character & ~0xffff) == 0) {
178 _wtext =
get_wtext() + std::wstring(1, (
wchar_t)character);
181 uint32_t v = (uint32_t)character - 0x10000u;
183 (wchar_t)((v >> 10u) | 0xd800u),
184 (
wchar_t)((v & 0x3ffu) | 0xdc00u),
186 _wtext =
get_wtext() + std::wstring(wstr, 2);
189 _flags = (_flags | F_got_wtext) & ~F_got_text;
211 if (index < _wtext.length()) {
212 return _wtext[index];
225 if (index < _wtext.length()) {
226 _wtext[index] = character;
227 _flags &= ~F_got_text;
277 TextEncoder::Encoding to) {
288 if (entry ==
nullptr) {
291 return entry->_char_type == UnicodeLatinMap::CT_upper ||
292 entry->_char_type == UnicodeLatinMap::CT_lower;
302 if (entry ==
nullptr) {
304 return (character >=
'0' && character <=
'9');
307 return (isdigit(entry->_ascii_equiv) != 0);
317 if (entry ==
nullptr) {
319 return (character < 128 && ispunct(character));
321 return entry->_char_type == UnicodeLatinMap::CT_punct;
331 if (entry ==
nullptr) {
334 return entry->_char_type == UnicodeLatinMap::CT_upper;
361 if (entry ==
nullptr) {
364 return entry->_char_type == UnicodeLatinMap::CT_lower;
374 if (entry ==
nullptr) {
377 return entry->_toupper_character;
387 if (entry ==
nullptr) {
390 return entry->_tolower_character;
407 upper(
const std::string &source, TextEncoder::Encoding encoding) {
429 lower(
const std::string &source, TextEncoder::Encoding encoding) {
444 if (!has_text() || _wtext != wtext) {
446 _flags = (_flags | F_got_wtext) & ~F_got_text;
457 if ((_flags & F_got_wtext) == 0) {
469 if (!wtext.empty()) {
471 _flags = (_flags | F_got_wtext) & ~F_got_text;
497 INLINE std::ostream &
void append_text(const std::string &text)
Appends the indicates string to the end of the stored text.
std::ostream & operator<<(std::ostream &out, const std::wstring &str)
Uses the current default encoding to output the wstring.
static std::string reencode_text(const std::string &text, Encoding from, Encoding to)
Given the indicated text string, which is assumed to be encoded via the encoding "from", decodes it and then reencodes it into the encoding "to", and returns the newly encoded string.
static int unicode_toupper(char32_t character)
Returns the uppercase equivalent of the given Unicode character.
int get_unicode_char(size_t index) const
Returns the Unicode value of the nth character in the stored text.
This class can be used to convert text between multiple representations, e.g.
static bool unicode_ispunct(char32_t character)
Returns true if the indicated character is a punctuation mark, false otherwise.
static std::string upper(const std::string &source)
Converts the string to uppercase, assuming the string is encoded in the default encoding.
set_default_encoding
Specifies the default encoding to be used for all subsequently created TextEncoder objects...
void make_lower()
Adjusts the text stored within the encoder to all lowercase letters (preserving accent marks correctl...
void clear_text()
Removes the text from the TextEncoder.
void append_wtext(const std::wstring &text)
Appends the indicates string to the end of the stored wide-character text.
set_text
Changes the text that is stored in the encoder.
std::string get_text_as_ascii() const
Returns the text associated with the node, converted as nearly as possible to a fully-ASCII represent...
static const Entry * look_up(char32_t character)
Returns the Entry associated with the indicated character, if there is one.
void set_unicode_char(size_t index, char32_t character)
Sets the Unicode value of the nth character in the stored text.
get_default_encoding
Specifies the default encoding to be used for all subsequently created TextEncoder objects...
static int unicode_tolower(char32_t character)
Returns the uppercase equivalent of the given Unicode character.
static bool unicode_isalpha(char32_t character)
Returns true if the indicated character is an alphabetic letter, false otherwise. ...
std::string get_encoded_char(size_t index) const
Returns the nth char of the stored text, as a one-, two-, or three-byte encoded string.
std::wstring get_wtext_as_ascii() const
Returns the text associated with the node, converted as nearly as possible to a fully-ASCII represent...
get_text
Returns the current text, as encoded via the current encoding system.
std::string encode_wtext(const std::wstring &wtext) const
Encodes a wide-text string into a single-char string, according to the current encoding.
static bool unicode_isdigit(char32_t character)
Returns true if the indicated character is a numeric digit, false otherwise.
void set_encoding(Encoding encoding)
Specifies how the string set via set_text() is to be interpreted.
static std::string lower(const std::string &source)
Converts the string to lowercase, assuming the string is encoded in the default encoding.
const std::wstring & get_wtext() const
Returns the text associated with the TextEncoder, as a wide-character string.
Encoding get_encoding() const
Returns the encoding by which the string set via set_text() is to be interpreted. ...
void append_unicode_char(char32_t character)
Appends a single character to the end of the stored text.
std::wstring decode_text(const std::string &text) const
Returns the given wstring decoded to a single-byte string, via the current encoding system...
size_t get_num_chars() const
Returns the number of characters in the stored text.
void make_upper()
Adjusts the text stored within the encoder to all uppercase letters (preserving accent marks correctl...
static bool unicode_islower(char32_t character)
Returns true if the indicated character is a lowercase letter, false otherwise.
void set_wtext(const std::wstring &wtext)
Changes the text that is stored in the encoder.
static bool unicode_isupper(char32_t character)
Returns true if the indicated character is an uppercase letter, false otherwise.
static bool unicode_isspace(char32_t character)
Returns true if the indicated character is a whitespace letter, false otherwise.