Panda3D
globPattern.cxx
Go to the documentation of this file.
1 /**
2  * PANDA 3D SOFTWARE
3  * Copyright (c) Carnegie Mellon University. All rights reserved.
4  *
5  * All use of this software is subject to the terms of the revised BSD
6  * license. You should have received a copy of this license along
7  * with this source code in a file named "LICENSE."
8  *
9  * @file globPattern.cxx
10  * @author drose
11  * @date 2000-05-30
12  */
13 
14 #include "globPattern.h"
15 #include "string_utils.h"
16 #include <ctype.h>
17 
18 using std::string;
19 
20 /**
21  * Returns true if the pattern includes any special globbing characters, or
22  * false if it is just a literal string.
23  */
24 bool GlobPattern::
26  string::const_iterator pi;
27  pi = _pattern.begin();
28  while (pi != _pattern.end()) {
29  switch (*pi) {
30  case '*':
31  case '?':
32  case '[':
33  return true;
34 
35  case '\\':
36  ++pi;
37  if (pi == _pattern.end()) {
38  return false;
39  }
40  }
41  ++pi;
42  }
43  return false;
44 }
45 
46 /**
47  * Returns the initial part of the pattern before the first glob character.
48  * Since many glob patterns begin with a sequence of static characters and end
49  * with one or more glob characters, this can be used to optimized searches
50  * through sorted indices.
51  */
52 string GlobPattern::
54  string prefix;
55 
56  size_t p = 0; // current point
57  size_t q = 0; // starting point
58  while (p < _pattern.size()) {
59  switch (_pattern[p]) {
60  case '*':
61  case '?':
62  case '[':
63  return prefix + _pattern.substr(q, p - q);
64 
65  case '\\':
66  // Skip over the backslash.
67  prefix += _pattern.substr(q, p - q);
68  ++p;
69  q = p;
70  }
71  ++p;
72  }
73  return prefix += _pattern.substr(q, p - q);
74 }
75 
76 /**
77  * Treats the GlobPattern as a filename pattern, and returns a list of any
78  * actual files that match the pattern. This is the behavior of the standard
79  * Posix glob() function. Any part of the filename may contain glob
80  * characters, including intermediate directory names.
81  *
82  * If cwd is specified, it is the directory that relative filenames are taken
83  * to be relative to; otherwise, the actual current working directory is
84  * assumed.
85  *
86  * The return value is the number of files matched, which are added to the
87  * results vector.
88  */
89 int GlobPattern::
90 match_files(vector_string &results, const Filename &cwd) const {
91  string prefix, pattern, suffix;
92 
93  string source = _pattern;
94  if (!source.empty() && source[0] == '/') {
95  // If the first character is a slash, that becomes the prefix.
96  prefix = "/";
97  source = source.substr(1);
98  }
99 
100  size_t slash = source.find('/');
101  if (slash == string::npos) {
102  pattern = source;
103  } else {
104  pattern = source.substr(0, slash);
105  suffix = source.substr(slash);
106  }
107 
108  GlobPattern glob(pattern);
109  glob.set_case_sensitive(_case_sensitive);
110  return glob.r_match_files(prefix, suffix, results, cwd);
111 }
112 
113 /**
114  * The recursive implementation of match_files().
115  */
116 int GlobPattern::
117 r_match_files(const Filename &prefix, const string &suffix,
118  vector_string &results, const Filename &cwd) {
119  string next_pattern, next_suffix;
120 
121  size_t slash = suffix.find('/');
122  if (slash == string::npos) {
123  next_pattern = suffix;
124  } else if (slash + 1 == suffix.size()) {
125  // If the slash is at the end, we need to keep it, since it indicates that
126  // we only want to match directories.
127  next_pattern = suffix.substr(0, slash);
128  next_suffix = "/";
129  } else {
130  next_pattern = suffix.substr(0, slash);
131  next_suffix = suffix.substr(slash + 1);
132  }
133 
134  if (_pattern == "**" && next_pattern == "**") {
135  // Collapse consecutive globstar patterns.
136  return r_match_files(prefix, next_suffix, results, cwd);
137  }
138 
139  Filename parent_dir;
140  if (prefix.is_local() && !cwd.empty()) {
141  parent_dir = Filename(cwd, prefix);
142  } else {
143  parent_dir = prefix;
144  }
145 
146  GlobPattern next_glob(next_pattern);
147  next_glob.set_case_sensitive(_case_sensitive);
148 
149  if (!has_glob_characters()) {
150  // If there are no special characters in the pattern, it's a literal
151  // match.
152  Filename fn(parent_dir, _pattern);
153  if (suffix.empty()) {
154  // Time to stop.
155  if (fn.exists()) {
156  results.push_back(Filename(prefix, _pattern));
157  return 1;
158  }
159  return 0;
160  } else if (fn.is_directory()) {
161  // If the pattern ends with a slash, match a directory only.
162  if (suffix == "/") {
163  results.push_back(Filename(prefix, _pattern + "/"));
164  return 1;
165  } else {
166  return next_glob.r_match_files(Filename(prefix, _pattern),
167  next_suffix, results, cwd);
168  }
169  }
170  }
171 
172  // If there *are* special glob characters, we must attempt to match the
173  // pattern against the files in this directory.
174 
175  vector_string dir_files;
176  if (!parent_dir.scan_directory(dir_files)) {
177  // Not a directory, or unable to read directory; stop here.
178  return 0;
179  }
180 
181  // Now go through each file in the directory looking for one that matches
182  // the pattern.
183  int num_matched = 0;
184 
185  // A globstar pattern matches zero or more directories.
186  if (_pattern == "**") {
187  // Try to match this directory (as if the globstar wasn't there)
188  if (suffix.empty()) {
189  // This is a directory. Add it.
190  results.push_back(Filename(prefix));
191  num_matched++;
192  } else if (suffix == "/") {
193  // Keep the trailing slash, but be sure not to duplicate it.
194  results.push_back(Filename(prefix, ""));
195  num_matched++;
196  } else {
197  num_matched += next_glob.r_match_files(prefix, next_suffix, results, cwd);
198  }
199  next_suffix = suffix;
200  next_glob = *this;
201  }
202 
203  for (const string &local_file : dir_files) {
204  if (_pattern[0] == '.' || (local_file.empty() || local_file[0] != '.')) {
205  if (matches(local_file)) {
206  // We have a match; continue.
207  if (Filename(parent_dir, local_file).is_directory()) {
208  if (suffix.empty() && _pattern != "**") {
209  results.push_back(Filename(prefix, local_file));
210  num_matched++;
211  } else if (suffix == "/" && _pattern != "**") {
212  results.push_back(Filename(prefix, local_file + "/"));
213  num_matched++;
214  } else {
215  num_matched += next_glob.r_match_files(Filename(prefix, local_file),
216  next_suffix, results, cwd);
217  }
218  } else if (suffix.empty()) {
219  results.push_back(Filename(prefix, local_file));
220  num_matched++;
221  }
222  }
223  }
224  }
225 
226  return num_matched;
227 }
228 
229 /**
230  * Treats the GlobPattern as a filename pattern, and returns true if the given
231  * filename matches the pattern. Unlike matches(), this will not match slash
232  * characters for single asterisk characters, and it will ignore path
233  * components that only contain a dot.
234  */
235 bool GlobPattern::
236 matches_file(Filename candidate) const {
237  if (_pattern.empty()) {
238  // Special case.
239  return candidate.empty();
240  }
241 
242  // Either both must be absolute, or both must be relative.
243  if ((_pattern[0] != '/') != candidate.is_local()) {
244  return false;
245  }
246 
247  return r_matches_file(_pattern, candidate);
248 }
249 
250 /**
251  * The recursive implementation of matches_file().
252  */
253 bool GlobPattern::
254 r_matches_file(const string &pattern, const Filename &candidate) const {
255  // Split off the next bit of pattern.
256  std::string next_pattern;
257  GlobPattern glob;
258  glob.set_case_sensitive(get_case_sensitive());
259  glob.set_nomatch_chars(get_nomatch_chars());
260 
261  bool pattern_end;
262  size_t slash = pattern.find('/');
263  if (slash == string::npos) {
264  glob.set_pattern(pattern);
265  pattern_end = true;
266  } else {
267  glob.set_pattern(pattern.substr(0, slash));
268  next_pattern = pattern.substr(slash + 1);
269  pattern_end = false;
270 
271  if (slash == 0 || (slash == 1 && pattern[0] == '.')) {
272  // Ignore // and /./ in patterns
273  return r_matches_file(next_pattern, candidate);
274  }
275  }
276 
277  // Also split off the next component in the candidate filename.
278  std::string part;
279  Filename next_candidate;
280 
281  bool candidate_end;
282  size_t fn_slash = ((const std::string &)candidate).find('/');
283  if (fn_slash == string::npos) {
284  part = candidate;
285  candidate_end = true;
286  } else {
287  part = candidate.substr(0, fn_slash);
288  next_candidate = candidate.substr(fn_slash + 1);
289  candidate_end = false;
290 
291  // Ignore // and /./ in filenames.
292  if (fn_slash == 0 || part == ".") {
293  return r_matches_file(pattern, next_candidate);
294  }
295  }
296 
297  // Now check if the current part matches the current pattern.
298  bool part_matches;
299  if (glob.get_pattern() == "**") {
300  // This matches any number of parts.
301  if (pattern_end) {
302  // We might as well stop checking here; it matches whatever might come.
303  return true;
304  }
305  // We branch out to three options: either we match nothing, we match this
306  // part only, or we match this part and maybe more.
307  return r_matches_file(next_pattern, candidate)
308  || (!candidate_end && r_matches_file(next_pattern, next_candidate))
309  || (!candidate_end && r_matches_file(pattern, next_candidate));
310  }
311  else if (glob.get_pattern() == "*" && _nomatch_chars.empty()) {
312  // Matches any part (faster version of below)
313  part_matches = true;
314  }
315  else if ((glob.get_pattern() == "." && part.empty())
316  || (glob.get_pattern().empty() && part == ".")) {
317  // So that /path/. matches /path/, and vice versa.
318  part_matches = true;
319  }
320  else if (glob.has_glob_characters()) {
321  part_matches = glob.matches(part);
322  }
323  else if (get_case_sensitive()) {
324  part_matches = (part == glob.get_pattern());
325  }
326  else {
327  part_matches = (cmp_nocase(part, glob.get_pattern()) == 0);
328  }
329 
330  if (!part_matches) {
331  // It doesn't match, so we end our search here.
332  return false;
333  }
334 
335  if (candidate_end && pattern_end) {
336  // We've reached the end of both candidate and pattern, so it matches.
337  return true;
338  }
339 
340  if (pattern_end != candidate_end) {
341  // One of them has ended, but the other hasn't, so it's not a match.
342  return false;
343  }
344 
345  // It matches; move on to the next part.
346  return r_matches_file(next_pattern, next_candidate);
347 }
348 
349 /**
350  * The recursive implementation of matches(). This returns true if the
351  * pattern substring [pi, pend) matches the candidate substring [ci, cend),
352  * false otherwise.
353  */
354 bool GlobPattern::
355 matches_substr(string::const_iterator pi, string::const_iterator pend,
356  string::const_iterator ci, string::const_iterator cend) const {
357  // If we run out of pattern or candidate string, it's a match only if they
358  // both ran out at the same time.
359  if (pi == pend || ci == cend) {
360  // A special exception: we allow ci to reach the end before pi, only if pi
361  // is one character before the end and that last character is '*'.
362  if ((ci == cend) && (std::distance(pi, pend) == 1) && (*pi) == '*') {
363  return true;
364  }
365  return (pi == pend && ci == cend);
366  }
367 
368  switch (*pi) {
369 
370  case '*':
371  // A '*' in the pattern string means to match any sequence of zero or more
372  // characters in the candidate string. This means we have to recurse
373  // twice: either consume one character of the candidate string and
374  // continue to try matching the *, or stop trying to match the * here.
375  if (_nomatch_chars.find(*ci) == string::npos) {
376  return
377  matches_substr(pi, pend, ci + 1, cend) ||
378  matches_substr(pi + 1, pend, ci, cend);
379  } else {
380  // On the other hand, if this is one of the nomatch chars, we can only
381  // stop here.
382  return matches_substr(pi + 1, pend, ci, cend);
383  }
384 
385  case '?':
386  // A '?' in the pattern string means to match exactly one character in the
387  // candidate string. That's easy.
388  return matches_substr(pi + 1, pend, ci + 1, cend);
389 
390  case '[':
391  // An open square bracket begins a set.
392  ++pi;
393  if ((*pi) == '!') {
394  ++pi;
395  if (matches_set(pi, pend, *ci)) {
396  return false;
397  }
398  } else {
399  if (!matches_set(pi, pend, *ci)) {
400  return false;
401  }
402  }
403  if (pi == pend) {
404  // Oops, there wasn't a closing square bracket.
405  return false;
406  }
407  return matches_substr(pi + 1, pend, ci + 1, cend);
408 
409  case '\\':
410  // A backslash escapes the next special character.
411  ++pi;
412  if (pi == pend) {
413  return false;
414  }
415  // fall through.
416 
417  default:
418  // Anything else means to match exactly that.
419  if (_case_sensitive) {
420  if ((*pi) != (*ci)) {
421  return false;
422  }
423  } else {
424  if (tolower(*pi) != tolower(*ci)) {
425  return false;
426  }
427  }
428  return matches_substr(pi + 1, pend, ci + 1, cend);
429  }
430 }
431 
432 
433 /**
434  * Called when an unescaped open square bracked is scanned, this is called
435  * with pi positioned after the opening square bracket, scans the set
436  * sequence, leaving pi positioned on the closing square bracket, and returns
437  * true if the indicated character matches the set of characters indicated,
438  * false otherwise.
439  */
440 bool GlobPattern::
441 matches_set(string::const_iterator &pi, string::const_iterator pend,
442  char ch) const {
443  bool matched = false;
444 
445  while (pi != pend && (*pi) != ']') {
446  if ((*pi) == '\\') {
447  // Backslash escapes the next character.
448  ++pi;
449  if (pi == pend) {
450  return false;
451  }
452  }
453 
454  if (ch == (*pi)) {
455  matched = true;
456  }
457 
458  // Maybe it's an a-z style range?
459  char start = (*pi);
460  ++pi;
461  if (pi != pend && (*pi) == '-') {
462  ++pi;
463  if (pi != pend && (*pi) != ']') {
464  // Yes, we have a range: start-end.
465 
466  if ((*pi) == '\\') {
467  // Backslash escapes.
468  ++pi;
469  if (pi == pend) {
470  return false;
471  }
472  }
473 
474  char end = (*pi);
475  ++pi;
476 
477  if ((ch >= start && ch <= end) ||
478  (!_case_sensitive &&
479  ((tolower(ch) >= start && tolower(ch) <= end) ||
480  (toupper(ch) >= start && toupper(ch) <= end)))) {
481  matched = true;
482  }
483  } else {
484  // This was a - at the end of the string.
485  if (ch == '-') {
486  matched = true;
487  }
488  }
489  }
490  }
491 
492  return matched;
493 }
set_pattern
Changes the pattern string that the GlobPattern object matches.
Definition: globPattern.h:44
set_case_sensitive
Sets whether the match is case sensitive (true) or case insensitive (false).
Definition: globPattern.h:48
set_nomatch_chars
Specifies a set of characters that are not matched by * or ?.
Definition: globPattern.h:52
bool has_glob_characters() const
Returns true if the pattern includes any special globbing characters, or false if it is just a litera...
Definition: globPattern.cxx:25
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.
bool matches(const std::string &candidate) const
Returns true if the candidate string matches the pattern, false otherwise.
Definition: globPattern.I:122
get_case_sensitive
Returns whether the match is case sensitive (true) or case insensitive (false).
Definition: globPattern.h:48
The name of a file, such as a texture file or an Egg file.
Definition: filename.h:39
std::string get_const_prefix() const
Returns the initial part of the pattern before the first glob character.
Definition: globPattern.cxx:53
bool matches_file(Filename candidate) const
Treats the GlobPattern as a filename pattern, and returns true if the given filename matches the patt...
int match_files(vector_string &results, const Filename &cwd=Filename()) const
Treats the GlobPattern as a filename pattern, and returns a list of any actual files that match the p...
Definition: globPattern.cxx:90
bool is_local() const
Returns true if the filename is local, e.g.
Definition: filename.I:549
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.
get_pattern
Returns the pattern string that the GlobPattern object matches.
Definition: globPattern.h:44
bool is_directory() const
Returns true if the filename exists and is a directory name, false otherwise.
Definition: filename.cxx:1359
bool scan_directory(vector_string &contents) const
Attempts to open the named filename as if it were a directory and looks for the non-hidden files with...
Definition: filename.cxx:1718
This class can be used to test for string matches against standard Unix- shell filename globbing conv...
Definition: globPattern.h:32