Panda3D
|
00001 // Filename: unicodeLatinMap.h 00002 // Created by: drose (01Feb03) 00003 // 00004 //////////////////////////////////////////////////////////////////// 00005 // 00006 // PANDA 3D SOFTWARE 00007 // Copyright (c) Carnegie Mellon University. All rights reserved. 00008 // 00009 // All use of this software is subject to the terms of the revised BSD 00010 // license. You should have received a copy of this license along 00011 // with this source code in a file named "LICENSE." 00012 // 00013 //////////////////////////////////////////////////////////////////// 00014 00015 #ifndef UNICODELATINMAP_H 00016 #define UNICODELATINMAP_H 00017 00018 #include "dtoolbase.h" 00019 #include "pmap.h" 00020 00021 //////////////////////////////////////////////////////////////////// 00022 // Class : UnicodeLatinMap 00023 // Description : This class mainly serves as a container for a largish 00024 // table of the subset of the Unicode character set that 00025 // corresponds to the Latin alphabet, with its various 00026 // accent marks and so on. Specifically, this table 00027 // indicates how to map between the Unicode accented 00028 // character and the corresponding ASCII equivalent 00029 // without the accent mark; as well as how to switch 00030 // case from upper to lower while retaining the Unicode 00031 // accent marks. 00032 //////////////////////////////////////////////////////////////////// 00033 class EXPCL_DTOOL UnicodeLatinMap { 00034 public: 00035 enum AccentType { 00036 AT_none, 00037 AT_acute, 00038 AT_acute_and_dot_above, 00039 AT_breve, 00040 AT_breve_and_acute, 00041 AT_breve_and_dot_below, 00042 AT_breve_and_grave, 00043 AT_breve_and_hook_above, 00044 AT_breve_and_tilde, 00045 AT_breve_below, 00046 AT_caron, 00047 AT_caron_and_dot_above, 00048 AT_cedilla, 00049 AT_cedilla_and_acute, 00050 AT_cedilla_and_breve, 00051 AT_circumflex, 00052 AT_circumflex_and_acute, 00053 AT_circumflex_and_dot_below, 00054 AT_circumflex_and_grave, 00055 AT_circumflex_and_hook_above, 00056 AT_circumflex_and_tilde, 00057 AT_circumflex_below, 00058 AT_comma_below, 00059 AT_curl, 00060 AT_diaeresis, 00061 AT_diaeresis_and_acute, 00062 AT_diaeresis_and_caron, 00063 AT_diaeresis_and_grave, 00064 AT_diaeresis_and_macron, 00065 AT_diaeresis_below, 00066 AT_dot_above, 00067 AT_dot_above_and_macron, 00068 AT_dot_below, 00069 AT_dot_below_and_dot_above, 00070 AT_dot_below_and_macron, 00071 AT_double_acute, 00072 AT_double_grave, 00073 AT_grave, 00074 AT_hook, 00075 AT_hook_above, 00076 AT_horn, 00077 AT_horn_and_acute, 00078 AT_horn_and_dot_below, 00079 AT_horn_and_grave, 00080 AT_horn_and_hook_above, 00081 AT_horn_and_tilde, 00082 AT_inverted_breve, 00083 AT_line_below, 00084 AT_macron, 00085 AT_macron_and_acute, 00086 AT_macron_and_diaeresis, 00087 AT_macron_and_grave, 00088 AT_ogonek, 00089 AT_ogonek_and_macron, 00090 AT_ring_above, 00091 AT_ring_above_and_acute, 00092 AT_ring_below, 00093 AT_stroke, 00094 AT_stroke_and_acute, 00095 AT_stroke_and_hook, 00096 AT_tilde, 00097 AT_tilde_and_acute, 00098 AT_tilde_and_diaeresis, 00099 AT_tilde_and_macron, 00100 AT_tilde_below, 00101 AT_topbar, 00102 }; 00103 00104 enum AdditionalFlags { 00105 AF_ligature = 0x0001, 00106 AF_turned = 0x0002, 00107 AF_reversed = 0x0004, 00108 AF_smallcap = 0x0008, 00109 AF_dotless = 0x0010, 00110 }; 00111 00112 enum CharType { 00113 CT_upper, 00114 CT_lower, 00115 CT_punct, 00116 }; 00117 00118 class Entry { 00119 public: 00120 wchar_t _character; 00121 CharType _char_type; 00122 char _ascii_equiv; 00123 char _ascii_additional; 00124 wchar_t _tolower_character; 00125 wchar_t _toupper_character; 00126 AccentType _accent_type; 00127 int _additional_flags; 00128 }; 00129 00130 static const Entry *look_up(wchar_t character); 00131 00132 private: 00133 static void init(); 00134 static bool _initialized; 00135 00136 typedef phash_map<wchar_t, const Entry *, integer_hash<wchar_t> > ByCharacter; 00137 static ByCharacter *_by_character; 00138 enum { max_direct_chars = 256 }; 00139 static const Entry *_direct_chars[max_direct_chars]; 00140 }; 00141 00142 #endif