Panda3D

string_utils.cxx

00001 // Filename: string_utils.cxx
00002 // Created by:  drose (18Jan99)
00003 //
00004 ////////////////////////////////////////////////////////////////////
00005 //
00006 // PANDA 3D SOFTWARE
00007 // Copyright (c) Carnegie Mellon University.  All rights reserved.
00008 //
00009 // All use of this software is subject to the terms of the revised BSD
00010 // license.  You should have received a copy of this license along
00011 // with this source code in a file named "LICENSE."
00012 //
00013 ////////////////////////////////////////////////////////////////////
00014 
00015 #include "string_utils.h"
00016 #include "textEncoder.h"
00017 #include "pstrtod.h"
00018 
00019 #include <ctype.h>
00020 
00021 // Case-insensitive string comparison, from Stroustrup's C++ third edition.
00022 // Works like strcmp().
00023 int
00024 cmp_nocase(const string &s, const string &s2) {
00025   string::const_iterator p = s.begin();
00026   string::const_iterator p2 = s2.begin();
00027 
00028   while (p != s.end() && p2 != s2.end()) {
00029     if (toupper(*p) != toupper(*p2)) {
00030       return (toupper(*p) < toupper(*p2)) ? -1 : 1;
00031     }
00032     ++p;
00033     ++p2;
00034   }
00035 
00036   return (s2.size() == s.size()) ? 0 :
00037     (s.size() < s2.size()) ? -1 : 1;  // size is unsigned
00038 }
00039 
00040 INLINE int
00041 toupper_uh(int ch) {
00042   return (ch == '_') ? '-' : toupper(ch);
00043 }
00044 
00045 
00046 int
00047 cmp_nocase_uh(const string &s, const string &s2) {
00048   string::const_iterator p = s.begin();
00049   string::const_iterator p2 = s2.begin();
00050 
00051   while (p != s.end() && p2 != s2.end()) {
00052     if (toupper_uh(*p) != toupper_uh(*p2)) {
00053       return (toupper_uh(*p) < toupper_uh(*p2)) ? -1 : 1;
00054     }
00055     ++p;
00056     ++p2;
00057   }
00058 
00059   return (s2.size() == s.size()) ? 0 :
00060     (s.size() < s2.size()) ? -1 : 1;  // size is unsigned
00061 }
00062 
00063 
00064 
00065 ////////////////////////////////////////////////////////////////////
00066 //     Function: downcase
00067 //  Description: Returns the input string with all uppercase letters
00068 //               converted to lowercase.
00069 ////////////////////////////////////////////////////////////////////
00070 string
00071 downcase(const string &s) {
00072   string result;
00073   result.reserve(s.size());
00074   string::const_iterator p;
00075   for (p = s.begin(); p != s.end(); ++p) {
00076     result += tolower(*p);
00077   }
00078   return result;
00079 }
00080 
00081 ////////////////////////////////////////////////////////////////////
00082 //     Function: upcase
00083 //  Description: Returns the input string with all lowercase letters
00084 //               converted to uppercase.
00085 ////////////////////////////////////////////////////////////////////
00086 string
00087 upcase(const string &s) {
00088   string result;
00089   result.reserve(s.size());
00090   string::const_iterator p;
00091   for (p = s.begin(); p != s.end(); ++p) {
00092     result += toupper(*p);
00093   }
00094   return result;
00095 }
00096 
00097 
00098 ////////////////////////////////////////////////////////////////////
00099 //     Function: extract_words
00100 //  Description: Divides the string into a number of words according
00101 //               to whitespace.  The words vector should be cleared by
00102 //               the user before calling; otherwise, the list of words
00103 //               in the string will be appended to the end of whatever
00104 //               was there before.
00105 //
00106 //               The return value is the number of words extracted.
00107 ////////////////////////////////////////////////////////////////////
00108 int
00109 extract_words(const string &str, vector_string &words) {
00110   int num_words = 0;
00111 
00112   size_t pos = 0;
00113   while (pos < str.length() && isspace((unsigned int)str[pos])) {
00114     pos++;
00115   }
00116   while (pos < str.length()) {
00117     size_t word_start = pos;
00118     while (pos < str.length() && !isspace((unsigned int)str[pos])) {
00119       pos++;
00120     }
00121     words.push_back(str.substr(word_start, pos - word_start));
00122     num_words++;
00123 
00124     while (pos < str.length() && isspace((unsigned int)str[pos])) {
00125       pos++;
00126     }
00127   }
00128 
00129   return num_words;
00130 }
00131 
00132 ////////////////////////////////////////////////////////////////////
00133 //     Function: extract_words
00134 //  Description: Divides the string into a number of words according
00135 //               to whitespace.  The words vector should be cleared by
00136 //               the user before calling; otherwise, the list of words
00137 //               in the string will be appended to the end of whatever
00138 //               was there before.
00139 //
00140 //               The return value is the number of words extracted.
00141 ////////////////////////////////////////////////////////////////////
00142 int
00143 extract_words(const wstring &str, pvector<wstring> &words) {
00144   int num_words = 0;
00145 
00146   size_t pos = 0;
00147   while (pos < str.length() && TextEncoder::unicode_isspace(str[pos])) {
00148     pos++;
00149   }
00150   while (pos < str.length()) {
00151     size_t word_start = pos;
00152     while (pos < str.length() && !TextEncoder::unicode_isspace(str[pos])) {
00153       pos++;
00154     }
00155     words.push_back(str.substr(word_start, pos - word_start));
00156     num_words++;
00157 
00158     while (pos < str.length() && TextEncoder::unicode_isspace(str[pos])) {
00159       pos++;
00160     }
00161   }
00162 
00163   return num_words;
00164 }
00165 
00166 ////////////////////////////////////////////////////////////////////
00167 //     Function: tokenize
00168 //  Description: Chops the source string up into pieces delimited by
00169 //               any of the characters specified in delimiters.
00170 //               Repeated delimiter characters represent zero-length
00171 //               tokens.
00172 //
00173 //               It is the user's responsibility to ensure the output
00174 //               vector is cleared before calling this function; the
00175 //               results will simply be appended to the end of the
00176 //               vector.
00177 ////////////////////////////////////////////////////////////////////
00178 void
00179 tokenize(const string &str, vector_string &words, const string &delimiters,
00180          bool discard_repeated_delimiters) {
00181   size_t p = 0;
00182   while (p < str.length()) {
00183     size_t q = str.find_first_of(delimiters, p);
00184     if (q == string::npos) {
00185       if (q - p || !discard_repeated_delimiters){
00186         words.push_back(str.substr(p));
00187       }
00188       return;
00189     }
00190     if (q - p || !discard_repeated_delimiters){
00191         words.push_back(str.substr(p, q - p));
00192     }
00193     p = q + 1;
00194   }
00195   words.push_back(string());
00196 }
00197 
00198 ////////////////////////////////////////////////////////////////////
00199 //     Function: tokenize
00200 //  Description: Chops the source string up into pieces delimited by
00201 //               any of the characters specified in delimiters.
00202 //               Repeated delimiter characters represent zero-length
00203 //               tokens.
00204 //
00205 //               It is the user's responsibility to ensure the output
00206 //               vector is cleared before calling this function; the
00207 //               results will simply be appended to the end of the
00208 //               vector.
00209 ////////////////////////////////////////////////////////////////////
00210 void
00211 tokenize(const wstring &str, pvector<wstring> &words, const wstring &delimiters,
00212          bool discard_repeated_delimiters) {
00213   size_t p = 0;
00214   while (p < str.length()) {
00215     size_t q = str.find_first_of(delimiters, p);
00216     if (q == string::npos) {
00217       if (q - p || !discard_repeated_delimiters){
00218         words.push_back(str.substr(p));
00219       }
00220       return;
00221     }
00222     if (q - p || !discard_repeated_delimiters){
00223       words.push_back(str.substr(p, q - p));
00224     }
00225     p = q + 1;
00226   }
00227   words.push_back(wstring());
00228 }
00229 
00230 ////////////////////////////////////////////////////////////////////
00231 //     Function: trim_left
00232 //  Description: Returns a new string representing the contents of the
00233 //               given string with the leading whitespace removed.
00234 ////////////////////////////////////////////////////////////////////
00235 string
00236 trim_left(const string &str) {
00237   size_t begin = 0;
00238   while (begin < str.size() && isspace((unsigned int)str[begin])) {
00239     begin++;
00240   }
00241 
00242   return str.substr(begin);
00243 }
00244 
00245 ////////////////////////////////////////////////////////////////////
00246 //     Function: trim_left
00247 //  Description: Returns a new string representing the contents of the
00248 //               given string with the leading whitespace removed.
00249 ////////////////////////////////////////////////////////////////////
00250 wstring
00251 trim_left(const wstring &str) {
00252   size_t begin = 0;
00253   while (begin < str.size() && TextEncoder::unicode_isspace(str[begin])) {
00254     begin++;
00255   }
00256 
00257   return str.substr(begin);
00258 }
00259 
00260 ////////////////////////////////////////////////////////////////////
00261 //     Function: trim_right
00262 //  Description: Returns a new string representing the contents of the
00263 //               given string with the trailing whitespace removed.
00264 ////////////////////////////////////////////////////////////////////
00265 string
00266 trim_right(const string &str) {
00267   size_t begin = 0;
00268   size_t end = str.size();
00269   while (end > begin && isspace((unsigned int)str[end - 1])) {
00270     end--;
00271   }
00272 
00273   return str.substr(begin, end - begin);
00274 }
00275 
00276 ////////////////////////////////////////////////////////////////////
00277 //     Function: trim_right
00278 //  Description: Returns a new string representing the contents of the
00279 //               given string with the trailing whitespace removed.
00280 ////////////////////////////////////////////////////////////////////
00281 wstring
00282 trim_right(const wstring &str) {
00283   size_t begin = 0;
00284   size_t end = str.size();
00285   while (end > begin && TextEncoder::unicode_isspace(str[end - 1])) {
00286     end--;
00287   }
00288 
00289   return str.substr(begin, end - begin);
00290 }
00291 
00292 ////////////////////////////////////////////////////////////////////
00293 //     Function: trim
00294 //  Description: Returns a new string representing the contents of the
00295 //               given string with both leading and trailing
00296 //               whitespace removed.
00297 ////////////////////////////////////////////////////////////////////
00298 string
00299 trim(const string &str) {
00300   size_t begin = 0;
00301   while (begin < str.size() && isspace((unsigned int)str[begin])) {
00302     begin++;
00303   }
00304 
00305   size_t end = str.size();
00306   while (end > begin && isspace((unsigned int)str[end - 1])) {
00307     end--;
00308   }
00309 
00310   return str.substr(begin, end - begin);
00311 }
00312 
00313 ////////////////////////////////////////////////////////////////////
00314 //     Function: trim
00315 //  Description: Returns a new string representing the contents of the
00316 //               given string with both leading and trailing
00317 //               whitespace removed.
00318 ////////////////////////////////////////////////////////////////////
00319 wstring
00320 trim(const wstring &str) {
00321   size_t begin = 0;
00322   while (begin < str.size() && TextEncoder::unicode_isspace(str[begin])) {
00323     begin++;
00324   }
00325 
00326   size_t end = str.size();
00327   while (end > begin && TextEncoder::unicode_isspace(str[end - 1])) {
00328     end--;
00329   }
00330 
00331   return str.substr(begin, end - begin);
00332 }
00333 
00334 ////////////////////////////////////////////////////////////////////
00335 //     Function: string_to_int
00336 //  Description: A string-interface wrapper around the C library
00337 //               strtol().  This parses the ASCII representation of an
00338 //               integer, and then sets tail to everything that
00339 //               follows the first valid integer read.  If, on exit,
00340 //               str == tail, there was no valid integer in the
00341 //               source string; if !tail.empty(), there was garbage
00342 //               after the integer.
00343 //
00344 //               It is legal if str and tail refer to the same string.
00345 ////////////////////////////////////////////////////////////////////
00346 int
00347 string_to_int(const string &str, string &tail) {
00348   const char *nptr = str.c_str();
00349   char *endptr;
00350   int result = strtol(nptr, &endptr, 10);
00351   tail = endptr;
00352   return result;
00353 }
00354 
00355 ////////////////////////////////////////////////////////////////////
00356 //     Function: string_to_int
00357 //  Description: Another flavor of string_to_int(), this one returns
00358 //               true if the string is a perfectly valid integer (and
00359 //               sets result to that value), or false otherwise.
00360 ////////////////////////////////////////////////////////////////////
00361 bool
00362 string_to_int(const string &str, int &result) {
00363   string tail;
00364   result = string_to_int(str, tail);
00365   return tail.empty();
00366 }
00367 
00368 ////////////////////////////////////////////////////////////////////
00369 //     Function: string_to_double
00370 //  Description: A string-interface wrapper around the C library
00371 //               strtol().  This parses the ASCII representation of an
00372 //               floating-point number, and then sets tail to
00373 //               everything that follows the first valid integer read.
00374 //               If, on exit, str == tail, there was no valid integer
00375 //               in the source string; if !tail.empty(), there was
00376 //               garbage after the number.
00377 //
00378 //               It is legal if str and tail refer to the same string.
00379 ////////////////////////////////////////////////////////////////////
00380 double
00381 string_to_double(const string &str, string &tail) {
00382   const char *nptr = str.c_str();
00383   char *endptr;
00384   double result = pstrtod(nptr, &endptr);
00385   tail = endptr;
00386   return result;
00387 }
00388 
00389 ////////////////////////////////////////////////////////////////////
00390 //     Function: string_to_double
00391 //  Description: Another flavor of string_to_double(), this one
00392 //               returns true if the string is a perfectly valid
00393 //               number (and sets result to that value), or false
00394 //               otherwise.
00395 ////////////////////////////////////////////////////////////////////
00396 bool
00397 string_to_double(const string &str, double &result) {
00398   string tail;
00399   result = string_to_double(str, tail);
00400   return tail.empty();
00401 }
00402 
00403 ////////////////////////////////////////////////////////////////////
00404 //     Function: string_to_float
00405 //  Description: Another flavor of string_to_float(), this one
00406 //               returns true if the string is a perfectly valid
00407 //               number (and sets result to that value), or false
00408 //               otherwise.
00409 ////////////////////////////////////////////////////////////////////
00410 bool
00411 string_to_float(const string &str, PN_stdfloat &result) {
00412   string tail;
00413   result = (PN_stdfloat)string_to_double(str, tail);
00414   return tail.empty();
00415 }
 All Classes Functions Variables Enumerations