Panda3D
|
00001 // Filename: string_utils.cxx 00002 // Created by: drose (18Jan99) 00003 // 00004 //////////////////////////////////////////////////////////////////// 00005 // 00006 // PANDA 3D SOFTWARE 00007 // Copyright (c) Carnegie Mellon University. All rights reserved. 00008 // 00009 // All use of this software is subject to the terms of the revised BSD 00010 // license. You should have received a copy of this license along 00011 // with this source code in a file named "LICENSE." 00012 // 00013 //////////////////////////////////////////////////////////////////// 00014 00015 #include "string_utils.h" 00016 #include "textEncoder.h" 00017 #include "pstrtod.h" 00018 00019 #include <ctype.h> 00020 00021 // Case-insensitive string comparison, from Stroustrup's C++ third edition. 00022 // Works like strcmp(). 00023 int 00024 cmp_nocase(const string &s, const string &s2) { 00025 string::const_iterator p = s.begin(); 00026 string::const_iterator p2 = s2.begin(); 00027 00028 while (p != s.end() && p2 != s2.end()) { 00029 if (toupper(*p) != toupper(*p2)) { 00030 return (toupper(*p) < toupper(*p2)) ? -1 : 1; 00031 } 00032 ++p; 00033 ++p2; 00034 } 00035 00036 return (s2.size() == s.size()) ? 0 : 00037 (s.size() < s2.size()) ? -1 : 1; // size is unsigned 00038 } 00039 00040 INLINE int 00041 toupper_uh(int ch) { 00042 return (ch == '_') ? '-' : toupper(ch); 00043 } 00044 00045 00046 int 00047 cmp_nocase_uh(const string &s, const string &s2) { 00048 string::const_iterator p = s.begin(); 00049 string::const_iterator p2 = s2.begin(); 00050 00051 while (p != s.end() && p2 != s2.end()) { 00052 if (toupper_uh(*p) != toupper_uh(*p2)) { 00053 return (toupper_uh(*p) < toupper_uh(*p2)) ? -1 : 1; 00054 } 00055 ++p; 00056 ++p2; 00057 } 00058 00059 return (s2.size() == s.size()) ? 0 : 00060 (s.size() < s2.size()) ? -1 : 1; // size is unsigned 00061 } 00062 00063 00064 00065 //////////////////////////////////////////////////////////////////// 00066 // Function: downcase 00067 // Description: Returns the input string with all uppercase letters 00068 // converted to lowercase. 00069 //////////////////////////////////////////////////////////////////// 00070 string 00071 downcase(const string &s) { 00072 string result; 00073 result.reserve(s.size()); 00074 string::const_iterator p; 00075 for (p = s.begin(); p != s.end(); ++p) { 00076 result += tolower(*p); 00077 } 00078 return result; 00079 } 00080 00081 //////////////////////////////////////////////////////////////////// 00082 // Function: upcase 00083 // Description: Returns the input string with all lowercase letters 00084 // converted to uppercase. 00085 //////////////////////////////////////////////////////////////////// 00086 string 00087 upcase(const string &s) { 00088 string result; 00089 result.reserve(s.size()); 00090 string::const_iterator p; 00091 for (p = s.begin(); p != s.end(); ++p) { 00092 result += toupper(*p); 00093 } 00094 return result; 00095 } 00096 00097 00098 //////////////////////////////////////////////////////////////////// 00099 // Function: extract_words 00100 // Description: Divides the string into a number of words according 00101 // to whitespace. The words vector should be cleared by 00102 // the user before calling; otherwise, the list of words 00103 // in the string will be appended to the end of whatever 00104 // was there before. 00105 // 00106 // The return value is the number of words extracted. 00107 //////////////////////////////////////////////////////////////////// 00108 int 00109 extract_words(const string &str, vector_string &words) { 00110 int num_words = 0; 00111 00112 size_t pos = 0; 00113 while (pos < str.length() && isspace((unsigned int)str[pos])) { 00114 pos++; 00115 } 00116 while (pos < str.length()) { 00117 size_t word_start = pos; 00118 while (pos < str.length() && !isspace((unsigned int)str[pos])) { 00119 pos++; 00120 } 00121 words.push_back(str.substr(word_start, pos - word_start)); 00122 num_words++; 00123 00124 while (pos < str.length() && isspace((unsigned int)str[pos])) { 00125 pos++; 00126 } 00127 } 00128 00129 return num_words; 00130 } 00131 00132 //////////////////////////////////////////////////////////////////// 00133 // Function: extract_words 00134 // Description: Divides the string into a number of words according 00135 // to whitespace. The words vector should be cleared by 00136 // the user before calling; otherwise, the list of words 00137 // in the string will be appended to the end of whatever 00138 // was there before. 00139 // 00140 // The return value is the number of words extracted. 00141 //////////////////////////////////////////////////////////////////// 00142 int 00143 extract_words(const wstring &str, pvector<wstring> &words) { 00144 int num_words = 0; 00145 00146 size_t pos = 0; 00147 while (pos < str.length() && TextEncoder::unicode_isspace(str[pos])) { 00148 pos++; 00149 } 00150 while (pos < str.length()) { 00151 size_t word_start = pos; 00152 while (pos < str.length() && !TextEncoder::unicode_isspace(str[pos])) { 00153 pos++; 00154 } 00155 words.push_back(str.substr(word_start, pos - word_start)); 00156 num_words++; 00157 00158 while (pos < str.length() && TextEncoder::unicode_isspace(str[pos])) { 00159 pos++; 00160 } 00161 } 00162 00163 return num_words; 00164 } 00165 00166 //////////////////////////////////////////////////////////////////// 00167 // Function: tokenize 00168 // Description: Chops the source string up into pieces delimited by 00169 // any of the characters specified in delimiters. 00170 // Repeated delimiter characters represent zero-length 00171 // tokens. 00172 // 00173 // It is the user's responsibility to ensure the output 00174 // vector is cleared before calling this function; the 00175 // results will simply be appended to the end of the 00176 // vector. 00177 //////////////////////////////////////////////////////////////////// 00178 void 00179 tokenize(const string &str, vector_string &words, const string &delimiters, 00180 bool discard_repeated_delimiters) { 00181 size_t p = 0; 00182 while (p < str.length()) { 00183 size_t q = str.find_first_of(delimiters, p); 00184 if (q == string::npos) { 00185 if (q - p || !discard_repeated_delimiters){ 00186 words.push_back(str.substr(p)); 00187 } 00188 return; 00189 } 00190 if (q - p || !discard_repeated_delimiters){ 00191 words.push_back(str.substr(p, q - p)); 00192 } 00193 p = q + 1; 00194 } 00195 words.push_back(string()); 00196 } 00197 00198 //////////////////////////////////////////////////////////////////// 00199 // Function: tokenize 00200 // Description: Chops the source string up into pieces delimited by 00201 // any of the characters specified in delimiters. 00202 // Repeated delimiter characters represent zero-length 00203 // tokens. 00204 // 00205 // It is the user's responsibility to ensure the output 00206 // vector is cleared before calling this function; the 00207 // results will simply be appended to the end of the 00208 // vector. 00209 //////////////////////////////////////////////////////////////////// 00210 void 00211 tokenize(const wstring &str, pvector<wstring> &words, const wstring &delimiters, 00212 bool discard_repeated_delimiters) { 00213 size_t p = 0; 00214 while (p < str.length()) { 00215 size_t q = str.find_first_of(delimiters, p); 00216 if (q == string::npos) { 00217 if (q - p || !discard_repeated_delimiters){ 00218 words.push_back(str.substr(p)); 00219 } 00220 return; 00221 } 00222 if (q - p || !discard_repeated_delimiters){ 00223 words.push_back(str.substr(p, q - p)); 00224 } 00225 p = q + 1; 00226 } 00227 words.push_back(wstring()); 00228 } 00229 00230 //////////////////////////////////////////////////////////////////// 00231 // Function: trim_left 00232 // Description: Returns a new string representing the contents of the 00233 // given string with the leading whitespace removed. 00234 //////////////////////////////////////////////////////////////////// 00235 string 00236 trim_left(const string &str) { 00237 size_t begin = 0; 00238 while (begin < str.size() && isspace((unsigned int)str[begin])) { 00239 begin++; 00240 } 00241 00242 return str.substr(begin); 00243 } 00244 00245 //////////////////////////////////////////////////////////////////// 00246 // Function: trim_left 00247 // Description: Returns a new string representing the contents of the 00248 // given string with the leading whitespace removed. 00249 //////////////////////////////////////////////////////////////////// 00250 wstring 00251 trim_left(const wstring &str) { 00252 size_t begin = 0; 00253 while (begin < str.size() && TextEncoder::unicode_isspace(str[begin])) { 00254 begin++; 00255 } 00256 00257 return str.substr(begin); 00258 } 00259 00260 //////////////////////////////////////////////////////////////////// 00261 // Function: trim_right 00262 // Description: Returns a new string representing the contents of the 00263 // given string with the trailing whitespace removed. 00264 //////////////////////////////////////////////////////////////////// 00265 string 00266 trim_right(const string &str) { 00267 size_t begin = 0; 00268 size_t end = str.size(); 00269 while (end > begin && isspace((unsigned int)str[end - 1])) { 00270 end--; 00271 } 00272 00273 return str.substr(begin, end - begin); 00274 } 00275 00276 //////////////////////////////////////////////////////////////////// 00277 // Function: trim_right 00278 // Description: Returns a new string representing the contents of the 00279 // given string with the trailing whitespace removed. 00280 //////////////////////////////////////////////////////////////////// 00281 wstring 00282 trim_right(const wstring &str) { 00283 size_t begin = 0; 00284 size_t end = str.size(); 00285 while (end > begin && TextEncoder::unicode_isspace(str[end - 1])) { 00286 end--; 00287 } 00288 00289 return str.substr(begin, end - begin); 00290 } 00291 00292 //////////////////////////////////////////////////////////////////// 00293 // Function: trim 00294 // Description: Returns a new string representing the contents of the 00295 // given string with both leading and trailing 00296 // whitespace removed. 00297 //////////////////////////////////////////////////////////////////// 00298 string 00299 trim(const string &str) { 00300 size_t begin = 0; 00301 while (begin < str.size() && isspace((unsigned int)str[begin])) { 00302 begin++; 00303 } 00304 00305 size_t end = str.size(); 00306 while (end > begin && isspace((unsigned int)str[end - 1])) { 00307 end--; 00308 } 00309 00310 return str.substr(begin, end - begin); 00311 } 00312 00313 //////////////////////////////////////////////////////////////////// 00314 // Function: trim 00315 // Description: Returns a new string representing the contents of the 00316 // given string with both leading and trailing 00317 // whitespace removed. 00318 //////////////////////////////////////////////////////////////////// 00319 wstring 00320 trim(const wstring &str) { 00321 size_t begin = 0; 00322 while (begin < str.size() && TextEncoder::unicode_isspace(str[begin])) { 00323 begin++; 00324 } 00325 00326 size_t end = str.size(); 00327 while (end > begin && TextEncoder::unicode_isspace(str[end - 1])) { 00328 end--; 00329 } 00330 00331 return str.substr(begin, end - begin); 00332 } 00333 00334 //////////////////////////////////////////////////////////////////// 00335 // Function: string_to_int 00336 // Description: A string-interface wrapper around the C library 00337 // strtol(). This parses the ASCII representation of an 00338 // integer, and then sets tail to everything that 00339 // follows the first valid integer read. If, on exit, 00340 // str == tail, there was no valid integer in the 00341 // source string; if !tail.empty(), there was garbage 00342 // after the integer. 00343 // 00344 // It is legal if str and tail refer to the same string. 00345 //////////////////////////////////////////////////////////////////// 00346 int 00347 string_to_int(const string &str, string &tail) { 00348 const char *nptr = str.c_str(); 00349 char *endptr; 00350 int result = strtol(nptr, &endptr, 10); 00351 tail = endptr; 00352 return result; 00353 } 00354 00355 //////////////////////////////////////////////////////////////////// 00356 // Function: string_to_int 00357 // Description: Another flavor of string_to_int(), this one returns 00358 // true if the string is a perfectly valid integer (and 00359 // sets result to that value), or false otherwise. 00360 //////////////////////////////////////////////////////////////////// 00361 bool 00362 string_to_int(const string &str, int &result) { 00363 string tail; 00364 result = string_to_int(str, tail); 00365 return tail.empty(); 00366 } 00367 00368 //////////////////////////////////////////////////////////////////// 00369 // Function: string_to_double 00370 // Description: A string-interface wrapper around the C library 00371 // strtol(). This parses the ASCII representation of an 00372 // floating-point number, and then sets tail to 00373 // everything that follows the first valid integer read. 00374 // If, on exit, str == tail, there was no valid integer 00375 // in the source string; if !tail.empty(), there was 00376 // garbage after the number. 00377 // 00378 // It is legal if str and tail refer to the same string. 00379 //////////////////////////////////////////////////////////////////// 00380 double 00381 string_to_double(const string &str, string &tail) { 00382 const char *nptr = str.c_str(); 00383 char *endptr; 00384 double result = pstrtod(nptr, &endptr); 00385 tail = endptr; 00386 return result; 00387 } 00388 00389 //////////////////////////////////////////////////////////////////// 00390 // Function: string_to_double 00391 // Description: Another flavor of string_to_double(), this one 00392 // returns true if the string is a perfectly valid 00393 // number (and sets result to that value), or false 00394 // otherwise. 00395 //////////////////////////////////////////////////////////////////// 00396 bool 00397 string_to_double(const string &str, double &result) { 00398 string tail; 00399 result = string_to_double(str, tail); 00400 return tail.empty(); 00401 } 00402 00403 //////////////////////////////////////////////////////////////////// 00404 // Function: string_to_float 00405 // Description: Another flavor of string_to_float(), this one 00406 // returns true if the string is a perfectly valid 00407 // number (and sets result to that value), or false 00408 // otherwise. 00409 //////////////////////////////////////////////////////////////////// 00410 bool 00411 string_to_float(const string &str, PN_stdfloat &result) { 00412 string tail; 00413 result = (PN_stdfloat)string_to_double(str, tail); 00414 return tail.empty(); 00415 }