Panda3D
string_utils.cxx
Go to the documentation of this file.
1 /**
2  * PANDA 3D SOFTWARE
3  * Copyright (c) Carnegie Mellon University. All rights reserved.
4  *
5  * All use of this software is subject to the terms of the revised BSD
6  * license. You should have received a copy of this license along
7  * with this source code in a file named "LICENSE."
8  *
9  * @file string_utils.cxx
10  * @author drose
11  * @date 1999-01-18
12  */
13 
14 #include "string_utils.h"
15 #include "textEncoder.h"
16 #include "pstrtod.h"
17 
18 #include <ctype.h>
19 
20 using std::string;
21 using std::wstring;
22 
23 // Case-insensitive string comparison, from Stroustrup's C++ third edition.
24 // Works like strcmp().
25 int
26 cmp_nocase(const string &s, const string &s2) {
27  string::const_iterator p = s.begin();
28  string::const_iterator p2 = s2.begin();
29 
30  while (p != s.end() && p2 != s2.end()) {
31  if (toupper(*p) != toupper(*p2)) {
32  return (toupper(*p) < toupper(*p2)) ? -1 : 1;
33  }
34  ++p;
35  ++p2;
36  }
37 
38  return (s2.size() == s.size()) ? 0 :
39  (s.size() < s2.size()) ? -1 : 1; // size is unsigned
40 }
41 
42 INLINE int
43 toupper_uh(int ch) {
44  return (ch == '_') ? '-' : toupper(ch);
45 }
46 
47 
48 int
49 cmp_nocase_uh(const string &s, const string &s2) {
50  string::const_iterator p = s.begin();
51  string::const_iterator p2 = s2.begin();
52 
53  while (p != s.end() && p2 != s2.end()) {
54  if (toupper_uh(*p) != toupper_uh(*p2)) {
55  return (toupper_uh(*p) < toupper_uh(*p2)) ? -1 : 1;
56  }
57  ++p;
58  ++p2;
59  }
60 
61  return (s2.size() == s.size()) ? 0 :
62  (s.size() < s2.size()) ? -1 : 1; // size is unsigned
63 }
64 
65 
66 
67 /**
68  * Returns the input string with all uppercase letters converted to lowercase.
69  */
70 string
71 downcase(const string &s) {
72  string result;
73  result.reserve(s.size());
74  string::const_iterator p;
75  for (p = s.begin(); p != s.end(); ++p) {
76  result += tolower(*p);
77  }
78  return result;
79 }
80 
81 /**
82  * Returns the input string with all lowercase letters converted to uppercase.
83  */
84 string
85 upcase(const string &s) {
86  string result;
87  result.reserve(s.size());
88  string::const_iterator p;
89  for (p = s.begin(); p != s.end(); ++p) {
90  result += toupper(*p);
91  }
92  return result;
93 }
94 
95 
96 /**
97  * Divides the string into a number of words according to whitespace. The
98  * words vector should be cleared by the user before calling; otherwise, the
99  * list of words in the string will be appended to the end of whatever was
100  * there before.
101  *
102  * The return value is the number of words extracted.
103  */
104 int
105 extract_words(const string &str, vector_string &words) {
106  int num_words = 0;
107 
108  size_t pos = 0;
109  while (pos < str.length() && isspace((unsigned int)str[pos])) {
110  pos++;
111  }
112  while (pos < str.length()) {
113  size_t word_start = pos;
114  while (pos < str.length() && !isspace((unsigned int)str[pos])) {
115  pos++;
116  }
117  words.push_back(str.substr(word_start, pos - word_start));
118  num_words++;
119 
120  while (pos < str.length() && isspace((unsigned int)str[pos])) {
121  pos++;
122  }
123  }
124 
125  return num_words;
126 }
127 
128 /**
129  * Divides the string into a number of words according to whitespace. The
130  * words vector should be cleared by the user before calling; otherwise, the
131  * list of words in the string will be appended to the end of whatever was
132  * there before.
133  *
134  * The return value is the number of words extracted.
135  */
136 int
137 extract_words(const wstring &str, pvector<wstring> &words) {
138  int num_words = 0;
139 
140  size_t pos = 0;
141  while (pos < str.length() && TextEncoder::unicode_isspace(str[pos])) {
142  pos++;
143  }
144  while (pos < str.length()) {
145  size_t word_start = pos;
146  while (pos < str.length() && !TextEncoder::unicode_isspace(str[pos])) {
147  pos++;
148  }
149  words.push_back(str.substr(word_start, pos - word_start));
150  num_words++;
151 
152  while (pos < str.length() && TextEncoder::unicode_isspace(str[pos])) {
153  pos++;
154  }
155  }
156 
157  return num_words;
158 }
159 
160 /**
161  * Chops the source string up into pieces delimited by any of the characters
162  * specified in delimiters. Repeated delimiter characters represent zero-
163  * length tokens.
164  *
165  * It is the user's responsibility to ensure the output vector is cleared
166  * before calling this function; the results will simply be appended to the
167  * end of the vector.
168  */
169 void
170 tokenize(const string &str, vector_string &words, const string &delimiters,
171  bool discard_repeated_delimiters) {
172  size_t p = 0;
173  while (p < str.length()) {
174  size_t q = str.find_first_of(delimiters, p);
175  if (q == string::npos) {
176  if (q - p || !discard_repeated_delimiters){
177  words.push_back(str.substr(p));
178  }
179  return;
180  }
181  if (q - p || !discard_repeated_delimiters){
182  words.push_back(str.substr(p, q - p));
183  }
184  p = q + 1;
185  }
186  words.push_back(string());
187 }
188 
189 /**
190  * Chops the source string up into pieces delimited by any of the characters
191  * specified in delimiters. Repeated delimiter characters represent zero-
192  * length tokens.
193  *
194  * It is the user's responsibility to ensure the output vector is cleared
195  * before calling this function; the results will simply be appended to the
196  * end of the vector.
197  */
198 void
199 tokenize(const wstring &str, pvector<wstring> &words, const wstring &delimiters,
200  bool discard_repeated_delimiters) {
201  size_t p = 0;
202  while (p < str.length()) {
203  size_t q = str.find_first_of(delimiters, p);
204  if (q == string::npos) {
205  if (q - p || !discard_repeated_delimiters){
206  words.push_back(str.substr(p));
207  }
208  return;
209  }
210  if (q - p || !discard_repeated_delimiters){
211  words.push_back(str.substr(p, q - p));
212  }
213  p = q + 1;
214  }
215  words.push_back(wstring());
216 }
217 
218 /**
219  * Returns a new string representing the contents of the given string with the
220  * leading whitespace removed.
221  */
222 string
223 trim_left(const string &str) {
224  size_t begin = 0;
225  while (begin < str.size() && isspace((unsigned int)str[begin])) {
226  begin++;
227  }
228 
229  return str.substr(begin);
230 }
231 
232 /**
233  * Returns a new string representing the contents of the given string with the
234  * leading whitespace removed.
235  */
236 wstring
237 trim_left(const wstring &str) {
238  size_t begin = 0;
239  while (begin < str.size() && TextEncoder::unicode_isspace(str[begin])) {
240  begin++;
241  }
242 
243  return str.substr(begin);
244 }
245 
246 /**
247  * Returns a new string representing the contents of the given string with the
248  * trailing whitespace removed.
249  */
250 string
251 trim_right(const string &str) {
252  size_t begin = 0;
253  size_t end = str.size();
254  while (end > begin && isspace((unsigned int)str[end - 1])) {
255  end--;
256  }
257 
258  return str.substr(begin, end - begin);
259 }
260 
261 /**
262  * Returns a new string representing the contents of the given string with the
263  * trailing whitespace removed.
264  */
265 wstring
266 trim_right(const wstring &str) {
267  size_t begin = 0;
268  size_t end = str.size();
269  while (end > begin && TextEncoder::unicode_isspace(str[end - 1])) {
270  end--;
271  }
272 
273  return str.substr(begin, end - begin);
274 }
275 
276 /**
277  * Returns a new string representing the contents of the given string with
278  * both leading and trailing whitespace removed.
279  */
280 string
281 trim(const string &str) {
282  size_t begin = 0;
283  while (begin < str.size() && isspace((unsigned int)str[begin])) {
284  begin++;
285  }
286 
287  size_t end = str.size();
288  while (end > begin && isspace((unsigned int)str[end - 1])) {
289  end--;
290  }
291 
292  return str.substr(begin, end - begin);
293 }
294 
295 /**
296  * Returns a new string representing the contents of the given string with
297  * both leading and trailing whitespace removed.
298  */
299 wstring
300 trim(const wstring &str) {
301  size_t begin = 0;
302  while (begin < str.size() && TextEncoder::unicode_isspace(str[begin])) {
303  begin++;
304  }
305 
306  size_t end = str.size();
307  while (end > begin && TextEncoder::unicode_isspace(str[end - 1])) {
308  end--;
309  }
310 
311  return str.substr(begin, end - begin);
312 }
313 
314 /**
315  * A string-interface wrapper around the C library strtol(). This parses the
316  * ASCII representation of an integer, and then sets tail to everything that
317  * follows the first valid integer read. If, on exit, str == tail, there was
318  * no valid integer in the source string; if !tail.empty(), there was garbage
319  * after the integer.
320  *
321  * It is legal if str and tail refer to the same string.
322  */
323 int
324 string_to_int(const string &str, string &tail) {
325  const char *nptr = str.c_str();
326  char *endptr;
327  int result = strtol(nptr, &endptr, 10);
328  tail = endptr;
329  return result;
330 }
331 
332 /**
333  * Another flavor of string_to_int(), this one returns true if the string is a
334  * perfectly valid integer (and sets result to that value), or false
335  * otherwise.
336  */
337 bool
338 string_to_int(const string &str, int &result) {
339  string tail;
340  result = string_to_int(str, tail);
341  return tail.empty();
342 }
343 
344 /**
345  * A string-interface wrapper around the C library strtol(). This parses the
346  * ASCII representation of an floating-point number, and then sets tail to
347  * everything that follows the first valid integer read. If, on exit, str ==
348  * tail, there was no valid integer in the source string; if !tail.empty(),
349  * there was garbage after the number.
350  *
351  * It is legal if str and tail refer to the same string.
352  */
353 double
354 string_to_double(const string &str, string &tail) {
355  const char *nptr = str.c_str();
356  char *endptr;
357  double result = pstrtod(nptr, &endptr);
358  tail = endptr;
359  return result;
360 }
361 
362 /**
363  * Another flavor of string_to_double(), this one returns true if the string
364  * is a perfectly valid number (and sets result to that value), or false
365  * otherwise.
366  */
367 bool
368 string_to_double(const string &str, double &result) {
369  string tail;
370  result = string_to_double(str, tail);
371  return tail.empty();
372 }
373 
374 /**
375  *
376  */
377 bool
378 string_to_float(const string &str, float &result) {
379  string tail;
380  result = (float)string_to_double(str, tail);
381  return tail.empty();
382 }
383 
384 /**
385  *
386  */
387 bool
388 string_to_stdfloat(const string &str, PN_stdfloat &result) {
389  string tail;
390  result = (PN_stdfloat)string_to_double(str, tail);
391  return tail.empty();
392 }
pstrtod.h
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.
pvector
This is our own Panda specialization on the default STL vector.
Definition: pvector.h:42
tokenize
void tokenize(const string &str, vector_string &words, const string &delimiters, bool discard_repeated_delimiters)
Chops the source string up into pieces delimited by any of the characters specified in delimiters.
Definition: string_utils.cxx:170
string_utils.h
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.
TextEncoder::unicode_isspace
static bool unicode_isspace(char32_t character)
Returns true if the indicated character is a whitespace letter, false otherwise.
Definition: textEncoder.I:342
pstrtod
double pstrtod(const char *nptr, char **endptr)
This function re-implements strtod, to avoid the problems that occur when the LC_NUMERIC locale gets ...
Definition: pstrtod.cxx:31
downcase
string downcase(const string &s)
Returns the input string with all uppercase letters converted to lowercase.
Definition: string_utils.cxx:71
upcase
string upcase(const string &s)
Returns the input string with all lowercase letters converted to uppercase.
Definition: string_utils.cxx:85
trim_left
string trim_left(const string &str)
Returns a new string representing the contents of the given string with the leading whitespace remove...
Definition: string_utils.cxx:223
trim_right
string trim_right(const string &str)
Returns a new string representing the contents of the given string with the trailing whitespace remov...
Definition: string_utils.cxx:251
string_to_int
int string_to_int(const string &str, string &tail)
A string-interface wrapper around the C library strtol().
Definition: string_utils.cxx:324
string_to_double
double string_to_double(const string &str, string &tail)
A string-interface wrapper around the C library strtol().
Definition: string_utils.cxx:354
extract_words
int extract_words(const string &str, vector_string &words)
Divides the string into a number of words according to whitespace.
Definition: string_utils.cxx:105
trim
string trim(const string &str)
Returns a new string representing the contents of the given string with both leading and trailing whi...
Definition: string_utils.cxx:281
textEncoder.h
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.