Panda3D
string_utils.cxx
Go to the documentation of this file.
1 /**
2  * PANDA 3D SOFTWARE
3  * Copyright (c) Carnegie Mellon University. All rights reserved.
4  *
5  * All use of this software is subject to the terms of the revised BSD
6  * license. You should have received a copy of this license along
7  * with this source code in a file named "LICENSE."
8  *
9  * @file string_utils.cxx
10  * @author drose
11  * @date 1999-01-18
12  */
13 
14 #include "string_utils.h"
15 #include "textEncoder.h"
16 #include "pstrtod.h"
17 
18 #include <ctype.h>
19 
20 using std::string;
21 using std::wstring;
22 
23 // Case-insensitive string comparison, from Stroustrup's C++ third edition.
24 // Works like strcmp().
25 int
26 cmp_nocase(const string &s, const string &s2) {
27  string::const_iterator p = s.begin();
28  string::const_iterator p2 = s2.begin();
29 
30  while (p != s.end() && p2 != s2.end()) {
31  if (toupper(*p) != toupper(*p2)) {
32  return (toupper(*p) < toupper(*p2)) ? -1 : 1;
33  }
34  ++p;
35  ++p2;
36  }
37 
38  return (s2.size() == s.size()) ? 0 :
39  (s.size() < s2.size()) ? -1 : 1; // size is unsigned
40 }
41 
42 INLINE int
43 toupper_uh(int ch) {
44  return (ch == '_') ? '-' : toupper(ch);
45 }
46 
47 
48 int
49 cmp_nocase_uh(const string &s, const string &s2) {
50  string::const_iterator p = s.begin();
51  string::const_iterator p2 = s2.begin();
52 
53  while (p != s.end() && p2 != s2.end()) {
54  if (toupper_uh(*p) != toupper_uh(*p2)) {
55  return (toupper_uh(*p) < toupper_uh(*p2)) ? -1 : 1;
56  }
57  ++p;
58  ++p2;
59  }
60 
61  return (s2.size() == s.size()) ? 0 :
62  (s.size() < s2.size()) ? -1 : 1; // size is unsigned
63 }
64 
65 
66 
67 /**
68  * Returns the input string with all uppercase letters converted to lowercase.
69  */
70 string
71 downcase(const string &s) {
72  string result;
73  result.reserve(s.size());
74  string::const_iterator p;
75  for (p = s.begin(); p != s.end(); ++p) {
76  result += tolower(*p);
77  }
78  return result;
79 }
80 
81 /**
82  * Returns the input string with all lowercase letters converted to uppercase.
83  */
84 string
85 upcase(const string &s) {
86  string result;
87  result.reserve(s.size());
88  string::const_iterator p;
89  for (p = s.begin(); p != s.end(); ++p) {
90  result += toupper(*p);
91  }
92  return result;
93 }
94 
95 
96 /**
97  * Divides the string into a number of words according to whitespace. The
98  * words vector should be cleared by the user before calling; otherwise, the
99  * list of words in the string will be appended to the end of whatever was
100  * there before.
101  *
102  * The return value is the number of words extracted.
103  */
104 int
105 extract_words(const string &str, vector_string &words) {
106  int num_words = 0;
107 
108  size_t pos = 0;
109  while (pos < str.length() && isspace((unsigned int)str[pos])) {
110  pos++;
111  }
112  while (pos < str.length()) {
113  size_t word_start = pos;
114  while (pos < str.length() && !isspace((unsigned int)str[pos])) {
115  pos++;
116  }
117  words.push_back(str.substr(word_start, pos - word_start));
118  num_words++;
119 
120  while (pos < str.length() && isspace((unsigned int)str[pos])) {
121  pos++;
122  }
123  }
124 
125  return num_words;
126 }
127 
128 /**
129  * Divides the string into a number of words according to whitespace. The
130  * words vector should be cleared by the user before calling; otherwise, the
131  * list of words in the string will be appended to the end of whatever was
132  * there before.
133  *
134  * The return value is the number of words extracted.
135  */
136 int
137 extract_words(const wstring &str, pvector<wstring> &words) {
138  int num_words = 0;
139 
140  size_t pos = 0;
141  while (pos < str.length() && TextEncoder::unicode_isspace(str[pos])) {
142  pos++;
143  }
144  while (pos < str.length()) {
145  size_t word_start = pos;
146  while (pos < str.length() && !TextEncoder::unicode_isspace(str[pos])) {
147  pos++;
148  }
149  words.push_back(str.substr(word_start, pos - word_start));
150  num_words++;
151 
152  while (pos < str.length() && TextEncoder::unicode_isspace(str[pos])) {
153  pos++;
154  }
155  }
156 
157  return num_words;
158 }
159 
160 /**
161  * Chops the source string up into pieces delimited by any of the characters
162  * specified in delimiters. Repeated delimiter characters represent zero-
163  * length tokens.
164  *
165  * It is the user's responsibility to ensure the output vector is cleared
166  * before calling this function; the results will simply be appended to the
167  * end of the vector.
168  */
169 void
170 tokenize(const string &str, vector_string &words, const string &delimiters,
171  bool discard_repeated_delimiters) {
172  size_t p = 0;
173  while (p < str.length()) {
174  size_t q = str.find_first_of(delimiters, p);
175  if (q == string::npos) {
176  if (q - p || !discard_repeated_delimiters){
177  words.push_back(str.substr(p));
178  }
179  return;
180  }
181  if (q - p || !discard_repeated_delimiters){
182  words.push_back(str.substr(p, q - p));
183  }
184  p = q + 1;
185  }
186  words.push_back(string());
187 }
188 
189 /**
190  * Chops the source string up into pieces delimited by any of the characters
191  * specified in delimiters. Repeated delimiter characters represent zero-
192  * length tokens.
193  *
194  * It is the user's responsibility to ensure the output vector is cleared
195  * before calling this function; the results will simply be appended to the
196  * end of the vector.
197  */
198 void
199 tokenize(const wstring &str, pvector<wstring> &words, const wstring &delimiters,
200  bool discard_repeated_delimiters) {
201  size_t p = 0;
202  while (p < str.length()) {
203  size_t q = str.find_first_of(delimiters, p);
204  if (q == string::npos) {
205  if (q - p || !discard_repeated_delimiters){
206  words.push_back(str.substr(p));
207  }
208  return;
209  }
210  if (q - p || !discard_repeated_delimiters){
211  words.push_back(str.substr(p, q - p));
212  }
213  p = q + 1;
214  }
215  words.push_back(wstring());
216 }
217 
218 /**
219  * Returns a new string representing the contents of the given string with the
220  * leading whitespace removed.
221  */
222 string
223 trim_left(const string &str) {
224  size_t begin = 0;
225  while (begin < str.size() && isspace((unsigned int)str[begin])) {
226  begin++;
227  }
228 
229  return str.substr(begin);
230 }
231 
232 /**
233  * Returns a new string representing the contents of the given string with the
234  * leading whitespace removed.
235  */
236 wstring
237 trim_left(const wstring &str) {
238  size_t begin = 0;
239  while (begin < str.size() && TextEncoder::unicode_isspace(str[begin])) {
240  begin++;
241  }
242 
243  return str.substr(begin);
244 }
245 
246 /**
247  * Returns a new string representing the contents of the given string with the
248  * trailing whitespace removed.
249  */
250 string
251 trim_right(const string &str) {
252  size_t begin = 0;
253  size_t end = str.size();
254  while (end > begin && isspace((unsigned int)str[end - 1])) {
255  end--;
256  }
257 
258  return str.substr(begin, end - begin);
259 }
260 
261 /**
262  * Returns a new string representing the contents of the given string with the
263  * trailing whitespace removed.
264  */
265 wstring
266 trim_right(const wstring &str) {
267  size_t begin = 0;
268  size_t end = str.size();
269  while (end > begin && TextEncoder::unicode_isspace(str[end - 1])) {
270  end--;
271  }
272 
273  return str.substr(begin, end - begin);
274 }
275 
276 /**
277  * Returns a new string representing the contents of the given string with
278  * both leading and trailing whitespace removed.
279  */
280 string
281 trim(const string &str) {
282  size_t begin = 0;
283  while (begin < str.size() && isspace((unsigned int)str[begin])) {
284  begin++;
285  }
286 
287  size_t end = str.size();
288  while (end > begin && isspace((unsigned int)str[end - 1])) {
289  end--;
290  }
291 
292  return str.substr(begin, end - begin);
293 }
294 
295 /**
296  * Returns a new string representing the contents of the given string with
297  * both leading and trailing whitespace removed.
298  */
299 wstring
300 trim(const wstring &str) {
301  size_t begin = 0;
302  while (begin < str.size() && TextEncoder::unicode_isspace(str[begin])) {
303  begin++;
304  }
305 
306  size_t end = str.size();
307  while (end > begin && TextEncoder::unicode_isspace(str[end - 1])) {
308  end--;
309  }
310 
311  return str.substr(begin, end - begin);
312 }
313 
314 /**
315  * A string-interface wrapper around the C library strtol(). This parses the
316  * ASCII representation of an integer, and then sets tail to everything that
317  * follows the first valid integer read. If, on exit, str == tail, there was
318  * no valid integer in the source string; if !tail.empty(), there was garbage
319  * after the integer.
320  *
321  * It is legal if str and tail refer to the same string.
322  */
323 int
324 string_to_int(const string &str, string &tail) {
325  const char *nptr = str.c_str();
326  char *endptr;
327  int result = strtol(nptr, &endptr, 10);
328  tail = endptr;
329  return result;
330 }
331 
332 /**
333  * Another flavor of string_to_int(), this one returns true if the string is a
334  * perfectly valid integer (and sets result to that value), or false
335  * otherwise.
336  */
337 bool
338 string_to_int(const string &str, int &result) {
339  string tail;
340  result = string_to_int(str, tail);
341  return tail.empty();
342 }
343 
344 /**
345  * A string-interface wrapper around the C library strtol(). This parses the
346  * ASCII representation of an floating-point number, and then sets tail to
347  * everything that follows the first valid integer read. If, on exit, str ==
348  * tail, there was no valid integer in the source string; if !tail.empty(),
349  * there was garbage after the number.
350  *
351  * It is legal if str and tail refer to the same string.
352  */
353 double
354 string_to_double(const string &str, string &tail) {
355  const char *nptr = str.c_str();
356  char *endptr;
357  double result = pstrtod(nptr, &endptr);
358  tail = endptr;
359  return result;
360 }
361 
362 /**
363  * Another flavor of string_to_double(), this one returns true if the string
364  * is a perfectly valid number (and sets result to that value), or false
365  * otherwise.
366  */
367 bool
368 string_to_double(const string &str, double &result) {
369  string tail;
370  result = string_to_double(str, tail);
371  return tail.empty();
372 }
373 
374 /**
375  *
376  */
377 bool
378 string_to_float(const string &str, float &result) {
379  string tail;
380  result = (float)string_to_double(str, tail);
381  return tail.empty();
382 }
383 
384 /**
385  *
386  */
387 bool
388 string_to_stdfloat(const string &str, PN_stdfloat &result) {
389  string tail;
390  result = (PN_stdfloat)string_to_double(str, tail);
391  return tail.empty();
392 }
int string_to_int(const string &str, string &tail)
A string-interface wrapper around the C library strtol().
double string_to_double(const string &str, string &tail)
A string-interface wrapper around the C library strtol().
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.
double pstrtod(const char *nptr, char **endptr)
This function re-implements strtod, to avoid the problems that occur when the LC_NUMERIC locale gets ...
Definition: pstrtod.cxx:31
string downcase(const string &s)
Returns the input string with all uppercase letters converted to lowercase.
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.
This is our own Panda specialization on the default STL vector.
Definition: pvector.h:42
int extract_words(const string &str, vector_string &words)
Divides the string into a number of words according to whitespace.
string trim(const string &str)
Returns a new string representing the contents of the given string with both leading and trailing whi...
void tokenize(const string &str, vector_string &words, const string &delimiters, bool discard_repeated_delimiters)
Chops the source string up into pieces delimited by any of the characters specified in delimiters.
string trim_left(const string &str)
Returns a new string representing the contents of the given string with the leading whitespace remove...
string upcase(const string &s)
Returns the input string with all lowercase letters converted to uppercase.
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.
static bool unicode_isspace(char32_t character)
Returns true if the indicated character is a whitespace letter, false otherwise.
Definition: textEncoder.I:342
string trim_right(const string &str)
Returns a new string representing the contents of the given string with the trailing whitespace remov...