Panda3D
 All Classes Functions Variables Enumerations
string_utils.cxx
1 // Filename: string_utils.cxx
2 // Created by: drose (18Jan99)
3 //
4 ////////////////////////////////////////////////////////////////////
5 //
6 // PANDA 3D SOFTWARE
7 // Copyright (c) Carnegie Mellon University. All rights reserved.
8 //
9 // All use of this software is subject to the terms of the revised BSD
10 // license. You should have received a copy of this license along
11 // with this source code in a file named "LICENSE."
12 //
13 ////////////////////////////////////////////////////////////////////
14 
15 #include "string_utils.h"
16 #include "textEncoder.h"
17 #include "pstrtod.h"
18 
19 #include <ctype.h>
20 
21 // Case-insensitive string comparison, from Stroustrup's C++ third edition.
22 // Works like strcmp().
23 int
24 cmp_nocase(const string &s, const string &s2) {
25  string::const_iterator p = s.begin();
26  string::const_iterator p2 = s2.begin();
27 
28  while (p != s.end() && p2 != s2.end()) {
29  if (toupper(*p) != toupper(*p2)) {
30  return (toupper(*p) < toupper(*p2)) ? -1 : 1;
31  }
32  ++p;
33  ++p2;
34  }
35 
36  return (s2.size() == s.size()) ? 0 :
37  (s.size() < s2.size()) ? -1 : 1; // size is unsigned
38 }
39 
40 INLINE int
41 toupper_uh(int ch) {
42  return (ch == '_') ? '-' : toupper(ch);
43 }
44 
45 
46 int
47 cmp_nocase_uh(const string &s, const string &s2) {
48  string::const_iterator p = s.begin();
49  string::const_iterator p2 = s2.begin();
50 
51  while (p != s.end() && p2 != s2.end()) {
52  if (toupper_uh(*p) != toupper_uh(*p2)) {
53  return (toupper_uh(*p) < toupper_uh(*p2)) ? -1 : 1;
54  }
55  ++p;
56  ++p2;
57  }
58 
59  return (s2.size() == s.size()) ? 0 :
60  (s.size() < s2.size()) ? -1 : 1; // size is unsigned
61 }
62 
63 
64 
65 ////////////////////////////////////////////////////////////////////
66 // Function: downcase
67 // Description: Returns the input string with all uppercase letters
68 // converted to lowercase.
69 ////////////////////////////////////////////////////////////////////
70 string
71 downcase(const string &s) {
72  string result;
73  result.reserve(s.size());
74  string::const_iterator p;
75  for (p = s.begin(); p != s.end(); ++p) {
76  result += tolower(*p);
77  }
78  return result;
79 }
80 
81 ////////////////////////////////////////////////////////////////////
82 // Function: upcase
83 // Description: Returns the input string with all lowercase letters
84 // converted to uppercase.
85 ////////////////////////////////////////////////////////////////////
86 string
87 upcase(const string &s) {
88  string result;
89  result.reserve(s.size());
90  string::const_iterator p;
91  for (p = s.begin(); p != s.end(); ++p) {
92  result += toupper(*p);
93  }
94  return result;
95 }
96 
97 
98 ////////////////////////////////////////////////////////////////////
99 // Function: extract_words
100 // Description: Divides the string into a number of words according
101 // to whitespace. The words vector should be cleared by
102 // the user before calling; otherwise, the list of words
103 // in the string will be appended to the end of whatever
104 // was there before.
105 //
106 // The return value is the number of words extracted.
107 ////////////////////////////////////////////////////////////////////
108 int
109 extract_words(const string &str, vector_string &words) {
110  int num_words = 0;
111 
112  size_t pos = 0;
113  while (pos < str.length() && isspace((unsigned int)str[pos])) {
114  pos++;
115  }
116  while (pos < str.length()) {
117  size_t word_start = pos;
118  while (pos < str.length() && !isspace((unsigned int)str[pos])) {
119  pos++;
120  }
121  words.push_back(str.substr(word_start, pos - word_start));
122  num_words++;
123 
124  while (pos < str.length() && isspace((unsigned int)str[pos])) {
125  pos++;
126  }
127  }
128 
129  return num_words;
130 }
131 
132 ////////////////////////////////////////////////////////////////////
133 // Function: extract_words
134 // Description: Divides the string into a number of words according
135 // to whitespace. The words vector should be cleared by
136 // the user before calling; otherwise, the list of words
137 // in the string will be appended to the end of whatever
138 // was there before.
139 //
140 // The return value is the number of words extracted.
141 ////////////////////////////////////////////////////////////////////
142 int
143 extract_words(const wstring &str, pvector<wstring> &words) {
144  int num_words = 0;
145 
146  size_t pos = 0;
147  while (pos < str.length() && TextEncoder::unicode_isspace(str[pos])) {
148  pos++;
149  }
150  while (pos < str.length()) {
151  size_t word_start = pos;
152  while (pos < str.length() && !TextEncoder::unicode_isspace(str[pos])) {
153  pos++;
154  }
155  words.push_back(str.substr(word_start, pos - word_start));
156  num_words++;
157 
158  while (pos < str.length() && TextEncoder::unicode_isspace(str[pos])) {
159  pos++;
160  }
161  }
162 
163  return num_words;
164 }
165 
166 ////////////////////////////////////////////////////////////////////
167 // Function: tokenize
168 // Description: Chops the source string up into pieces delimited by
169 // any of the characters specified in delimiters.
170 // Repeated delimiter characters represent zero-length
171 // tokens.
172 //
173 // It is the user's responsibility to ensure the output
174 // vector is cleared before calling this function; the
175 // results will simply be appended to the end of the
176 // vector.
177 ////////////////////////////////////////////////////////////////////
178 void
179 tokenize(const string &str, vector_string &words, const string &delimiters,
180  bool discard_repeated_delimiters) {
181  size_t p = 0;
182  while (p < str.length()) {
183  size_t q = str.find_first_of(delimiters, p);
184  if (q == string::npos) {
185  if (q - p || !discard_repeated_delimiters){
186  words.push_back(str.substr(p));
187  }
188  return;
189  }
190  if (q - p || !discard_repeated_delimiters){
191  words.push_back(str.substr(p, q - p));
192  }
193  p = q + 1;
194  }
195  words.push_back(string());
196 }
197 
198 ////////////////////////////////////////////////////////////////////
199 // Function: tokenize
200 // Description: Chops the source string up into pieces delimited by
201 // any of the characters specified in delimiters.
202 // Repeated delimiter characters represent zero-length
203 // tokens.
204 //
205 // It is the user's responsibility to ensure the output
206 // vector is cleared before calling this function; the
207 // results will simply be appended to the end of the
208 // vector.
209 ////////////////////////////////////////////////////////////////////
210 void
211 tokenize(const wstring &str, pvector<wstring> &words, const wstring &delimiters,
212  bool discard_repeated_delimiters) {
213  size_t p = 0;
214  while (p < str.length()) {
215  size_t q = str.find_first_of(delimiters, p);
216  if (q == string::npos) {
217  if (q - p || !discard_repeated_delimiters){
218  words.push_back(str.substr(p));
219  }
220  return;
221  }
222  if (q - p || !discard_repeated_delimiters){
223  words.push_back(str.substr(p, q - p));
224  }
225  p = q + 1;
226  }
227  words.push_back(wstring());
228 }
229 
230 ////////////////////////////////////////////////////////////////////
231 // Function: trim_left
232 // Description: Returns a new string representing the contents of the
233 // given string with the leading whitespace removed.
234 ////////////////////////////////////////////////////////////////////
235 string
236 trim_left(const string &str) {
237  size_t begin = 0;
238  while (begin < str.size() && isspace((unsigned int)str[begin])) {
239  begin++;
240  }
241 
242  return str.substr(begin);
243 }
244 
245 ////////////////////////////////////////////////////////////////////
246 // Function: trim_left
247 // Description: Returns a new string representing the contents of the
248 // given string with the leading whitespace removed.
249 ////////////////////////////////////////////////////////////////////
250 wstring
251 trim_left(const wstring &str) {
252  size_t begin = 0;
253  while (begin < str.size() && TextEncoder::unicode_isspace(str[begin])) {
254  begin++;
255  }
256 
257  return str.substr(begin);
258 }
259 
260 ////////////////////////////////////////////////////////////////////
261 // Function: trim_right
262 // Description: Returns a new string representing the contents of the
263 // given string with the trailing whitespace removed.
264 ////////////////////////////////////////////////////////////////////
265 string
266 trim_right(const string &str) {
267  size_t begin = 0;
268  size_t end = str.size();
269  while (end > begin && isspace((unsigned int)str[end - 1])) {
270  end--;
271  }
272 
273  return str.substr(begin, end - begin);
274 }
275 
276 ////////////////////////////////////////////////////////////////////
277 // Function: trim_right
278 // Description: Returns a new string representing the contents of the
279 // given string with the trailing whitespace removed.
280 ////////////////////////////////////////////////////////////////////
281 wstring
282 trim_right(const wstring &str) {
283  size_t begin = 0;
284  size_t end = str.size();
285  while (end > begin && TextEncoder::unicode_isspace(str[end - 1])) {
286  end--;
287  }
288 
289  return str.substr(begin, end - begin);
290 }
291 
292 ////////////////////////////////////////////////////////////////////
293 // Function: trim
294 // Description: Returns a new string representing the contents of the
295 // given string with both leading and trailing
296 // whitespace removed.
297 ////////////////////////////////////////////////////////////////////
298 string
299 trim(const string &str) {
300  size_t begin = 0;
301  while (begin < str.size() && isspace((unsigned int)str[begin])) {
302  begin++;
303  }
304 
305  size_t end = str.size();
306  while (end > begin && isspace((unsigned int)str[end - 1])) {
307  end--;
308  }
309 
310  return str.substr(begin, end - begin);
311 }
312 
313 ////////////////////////////////////////////////////////////////////
314 // Function: trim
315 // Description: Returns a new string representing the contents of the
316 // given string with both leading and trailing
317 // whitespace removed.
318 ////////////////////////////////////////////////////////////////////
319 wstring
320 trim(const wstring &str) {
321  size_t begin = 0;
322  while (begin < str.size() && TextEncoder::unicode_isspace(str[begin])) {
323  begin++;
324  }
325 
326  size_t end = str.size();
327  while (end > begin && TextEncoder::unicode_isspace(str[end - 1])) {
328  end--;
329  }
330 
331  return str.substr(begin, end - begin);
332 }
333 
334 ////////////////////////////////////////////////////////////////////
335 // Function: string_to_int
336 // Description: A string-interface wrapper around the C library
337 // strtol(). This parses the ASCII representation of an
338 // integer, and then sets tail to everything that
339 // follows the first valid integer read. If, on exit,
340 // str == tail, there was no valid integer in the
341 // source string; if !tail.empty(), there was garbage
342 // after the integer.
343 //
344 // It is legal if str and tail refer to the same string.
345 ////////////////////////////////////////////////////////////////////
346 int
347 string_to_int(const string &str, string &tail) {
348  const char *nptr = str.c_str();
349  char *endptr;
350  int result = strtol(nptr, &endptr, 10);
351  tail = endptr;
352  return result;
353 }
354 
355 ////////////////////////////////////////////////////////////////////
356 // Function: string_to_int
357 // Description: Another flavor of string_to_int(), this one returns
358 // true if the string is a perfectly valid integer (and
359 // sets result to that value), or false otherwise.
360 ////////////////////////////////////////////////////////////////////
361 bool
362 string_to_int(const string &str, int &result) {
363  string tail;
364  result = string_to_int(str, tail);
365  return tail.empty();
366 }
367 
368 ////////////////////////////////////////////////////////////////////
369 // Function: string_to_double
370 // Description: A string-interface wrapper around the C library
371 // strtol(). This parses the ASCII representation of an
372 // floating-point number, and then sets tail to
373 // everything that follows the first valid integer read.
374 // If, on exit, str == tail, there was no valid integer
375 // in the source string; if !tail.empty(), there was
376 // garbage after the number.
377 //
378 // It is legal if str and tail refer to the same string.
379 ////////////////////////////////////////////////////////////////////
380 double
381 string_to_double(const string &str, string &tail) {
382  const char *nptr = str.c_str();
383  char *endptr;
384  double result = pstrtod(nptr, &endptr);
385  tail = endptr;
386  return result;
387 }
388 
389 ////////////////////////////////////////////////////////////////////
390 // Function: string_to_double
391 // Description: Another flavor of string_to_double(), this one
392 // returns true if the string is a perfectly valid
393 // number (and sets result to that value), or false
394 // otherwise.
395 ////////////////////////////////////////////////////////////////////
396 bool
397 string_to_double(const string &str, double &result) {
398  string tail;
399  result = string_to_double(str, tail);
400  return tail.empty();
401 }
402 
403 ////////////////////////////////////////////////////////////////////
404 // Function: string_to_float
405 // Description:
406 ////////////////////////////////////////////////////////////////////
407 bool
408 string_to_float(const string &str, float &result) {
409  string tail;
410  result = (float)string_to_double(str, tail);
411  return tail.empty();
412 }
413 
414 ////////////////////////////////////////////////////////////////////
415 // Function: string_to_stdfloat
416 // Description:
417 ////////////////////////////////////////////////////////////////////
418 bool
419 string_to_stdfloat(const string &str, PN_stdfloat &result) {
420  string tail;
421  result = (PN_stdfloat)string_to_double(str, tail);
422  return tail.empty();
423 }
static bool unicode_isspace(int character)
Returns true if the indicated character is a whitespace letter, false otherwise.
Definition: textEncoder.I:395
This is our own Panda specialization on the default STL vector.
Definition: pvector.h:39
iterator begin()
Returns an iterator that may be used to traverse the elements of the matrix, STL-style.
Definition: lvecBase3.h:657