Panda3D
globPattern.cxx
1 // Filename: globPattern.cxx
2 // Created by: drose (30May00)
3 //
4 ////////////////////////////////////////////////////////////////////
5 //
6 // PANDA 3D SOFTWARE
7 // Copyright (c) Carnegie Mellon University. All rights reserved.
8 //
9 // All use of this software is subject to the terms of the revised BSD
10 // license. You should have received a copy of this license along
11 // with this source code in a file named "LICENSE."
12 //
13 ////////////////////////////////////////////////////////////////////
14 
15 #include "globPattern.h"
16 #include <ctype.h>
17 
18 ////////////////////////////////////////////////////////////////////
19 // Function: GlobPattern::has_glob_characters
20 // Access: Published
21 // Description: Returns true if the pattern includes any special
22 // globbing characters, or false if it is just a literal
23 // string.
24 ////////////////////////////////////////////////////////////////////
25 bool GlobPattern::
27  string::const_iterator pi;
28  pi = _pattern.begin();
29  while (pi != _pattern.end()) {
30  switch (*pi) {
31  case '*':
32  case '?':
33  case '[':
34  return true;
35 
36  case '\\':
37  ++pi;
38  if (pi == _pattern.end()) {
39  return false;
40  }
41  }
42  ++pi;
43  }
44  return false;
45 }
46 
47 ////////////////////////////////////////////////////////////////////
48 // Function: GlobPattern::get_const_prefix
49 // Access: Published
50 // Description: Returns the initial part of the pattern before the
51 // first glob character. Since many glob patterns begin
52 // with a sequence of static characters and end with one
53 // or more glob characters, this can be used to
54 // optimized searches through sorted indices.
55 ////////////////////////////////////////////////////////////////////
56 string GlobPattern::
58  string prefix;
59 
60  size_t p = 0; // current point
61  size_t q = 0; // starting point
62  while (p < _pattern.size()) {
63  switch (_pattern[p]) {
64  case '*':
65  case '?':
66  case '[':
67  return prefix + _pattern.substr(q, p - q);
68 
69  case '\\':
70  // Skip over the backslash.
71  prefix += _pattern.substr(q, p - q);
72  ++p;
73  q = p;
74  }
75  ++p;
76  }
77  return prefix += _pattern.substr(q, p - q);
78 }
79 
80 ////////////////////////////////////////////////////////////////////
81 // Function: GlobPattern::match_files
82 // Access: Published
83 // Description: Treats the GlobPattern as a filename pattern, and
84 // returns a list of any actual files that match the
85 // pattern. This is the behavior of the standard Posix
86 // glob() function. Any part of the filename may
87 // contain glob characters, including intermediate
88 // directory names.
89 //
90 // If cwd is specified, it is the directory that
91 // relative filenames are taken to be relative to;
92 // otherwise, the actual current working directory is
93 // assumed.
94 //
95 // The return value is the number of files matched,
96 // which are added to the results vector.
97 ////////////////////////////////////////////////////////////////////
98 int GlobPattern::
99 match_files(vector_string &results, const Filename &cwd) const {
100  string prefix, pattern, suffix;
101 
102  string source = _pattern;
103  if (!source.empty() && source[0] == '/') {
104  // If the first character is a slash, that becomes the prefix.
105  prefix = "/";
106  source = source.substr(1);
107  }
108 
109  size_t slash = source.find('/');
110  if (slash == string::npos) {
111  pattern = source;
112  } else {
113  pattern = source.substr(0, slash);
114  suffix = source.substr(slash + 1);
115  }
116 
117  GlobPattern glob(pattern);
118  glob.set_case_sensitive(_case_sensitive);
119  return glob.r_match_files(prefix, suffix, results, cwd);
120 }
121 
122 ////////////////////////////////////////////////////////////////////
123 // Function: GlobPattern::r_match_files
124 // Access: Private
125 // Description: The recursive implementation of match_files().
126 ////////////////////////////////////////////////////////////////////
127 int GlobPattern::
128 r_match_files(const Filename &prefix, const string &suffix,
129  vector_string &results, const Filename &cwd) {
130  string next_pattern, next_suffix;
131 
132  size_t slash = suffix.find('/');
133  if (slash == string::npos) {
134  next_pattern = suffix;
135  } else {
136  next_pattern = suffix.substr(0, slash);
137  next_suffix = suffix.substr(slash + 1);
138  }
139 
140  Filename parent_dir;
141  if (prefix.is_local() && !cwd.empty()) {
142  parent_dir = Filename(cwd, prefix);
143  } else {
144  parent_dir = prefix;
145  }
146 
147  GlobPattern next_glob(next_pattern);
148  next_glob.set_case_sensitive(_case_sensitive);
149 
150  if (!has_glob_characters()) {
151  // If there are no special characters in the pattern, it's a
152  // literal match.
153  if (suffix.empty()) {
154  // Time to stop.
155  Filename single_filename(parent_dir, _pattern);
156  if (single_filename.exists()) {
157  results.push_back(Filename(prefix, _pattern));
158  return 1;
159  }
160  return 0;
161  }
162 
163  return next_glob.r_match_files(Filename(prefix, _pattern),
164  next_suffix, results, cwd);
165 
166  }
167 
168  // If there *are* special glob characters, we must attempt to
169  // match the pattern against the files in this directory.
170 
171  vector_string dir_files;
172  if (!parent_dir.scan_directory(dir_files)) {
173  // Not a directory, or unable to read directory; stop here.
174  return 0;
175  }
176 
177  // Now go through each file in the directory looking for one that
178  // matches the pattern.
179  int num_matched = 0;
180 
181  vector_string::const_iterator fi;
182  for (fi = dir_files.begin(); fi != dir_files.end(); ++fi) {
183  const string &local_file = (*fi);
184  if (_pattern[0] == '.' || (local_file.empty() || local_file[0] != '.')) {
185  if (matches(local_file)) {
186  // We have a match; continue.
187  if (suffix.empty()) {
188  results.push_back(Filename(prefix, local_file));
189  num_matched++;
190  } else {
191  num_matched += next_glob.r_match_files(Filename(prefix, local_file),
192  next_suffix, results, cwd);
193  }
194  }
195  }
196  }
197 
198  return num_matched;
199 }
200 
201 ////////////////////////////////////////////////////////////////////
202 // Function: GlobPattern::matches_substr
203 // Access: Private
204 // Description: The recursive implementation of matches(). This
205 // returns true if the pattern substring [pi, pend)
206 // matches the candidate substring [ci, cend), false
207 // otherwise.
208 ////////////////////////////////////////////////////////////////////
209 bool GlobPattern::
210 matches_substr(string::const_iterator pi, string::const_iterator pend,
211  string::const_iterator ci, string::const_iterator cend) const {
212  // If we run out of pattern or candidate string, it's a match only
213  // if they both ran out at the same time.
214  if (pi == pend || ci == cend) {
215  // A special exception: we allow ci to reach the end before pi,
216  // only if pi is one character before the end and that last
217  // character is '*'.
218  if ((ci == cend) && (std::distance(pi, pend) == 1) && (*pi) == '*') {
219  return true;
220  }
221  return (pi == pend && ci == cend);
222  }
223 
224  switch (*pi) {
225 
226  case '*':
227  // A '*' in the pattern string means to match any sequence of zero
228  // or more characters in the candidate string. This means we have
229  // to recurse twice: either consume one character of the candidate
230  // string and continue to try matching the *, or stop trying to
231  // match the * here.
232  if (_nomatch_chars.find(*ci) == string::npos) {
233  return
234  matches_substr(pi, pend, ci + 1, cend) ||
235  matches_substr(pi + 1, pend, ci, cend);
236  } else {
237  // On the other hand, if this is one of the nomatch chars, we
238  // can only stop here.
239  return matches_substr(pi + 1, pend, ci, cend);
240  }
241 
242  case '?':
243  // A '?' in the pattern string means to match exactly one
244  // character in the candidate string. That's easy.
245  return matches_substr(pi + 1, pend, ci + 1, cend);
246 
247  case '[':
248  // An open square bracket begins a set.
249  ++pi;
250  if ((*pi) == '!') {
251  ++pi;
252  if (matches_set(pi, pend, *ci)) {
253  return false;
254  }
255  } else {
256  if (!matches_set(pi, pend, *ci)) {
257  return false;
258  }
259  }
260  if (pi == pend) {
261  // Oops, there wasn't a closing square bracket.
262  return false;
263  }
264  return matches_substr(pi + 1, pend, ci + 1, cend);
265 
266  case '\\':
267  // A backslash escapes the next special character.
268  ++pi;
269  if (pi == pend) {
270  return false;
271  }
272  // fall through.
273 
274  default:
275  // Anything else means to match exactly that.
276  if (_case_sensitive) {
277  if ((*pi) != (*ci)) {
278  return false;
279  }
280  } else {
281  if (tolower(*pi) != tolower(*ci)) {
282  return false;
283  }
284  }
285  return matches_substr(pi + 1, pend, ci + 1, cend);
286  }
287 }
288 
289 
290 ////////////////////////////////////////////////////////////////////
291 // Function: GlobPattern::matches_set
292 // Access: Private
293 // Description: Called when an unescaped open square bracked is
294 // scanned, this is called with pi positioned after the
295 // opening square bracket, scans the set sequence,
296 // leaving pi positioned on the closing square bracket,
297 // and returns true if the indicated character matches
298 // the set of characters indicated, false otherwise.
299 ////////////////////////////////////////////////////////////////////
300 bool GlobPattern::
301 matches_set(string::const_iterator &pi, string::const_iterator pend,
302  char ch) const {
303  bool matched = false;
304 
305  while (pi != pend && (*pi) != ']') {
306  if ((*pi) == '\\') {
307  // Backslash escapes the next character.
308  ++pi;
309  if (pi == pend) {
310  return false;
311  }
312  }
313 
314  if (ch == (*pi)) {
315  matched = true;
316  }
317 
318  // Maybe it's an a-z style range?
319  char start = (*pi);
320  ++pi;
321  if (pi != pend && (*pi) == '-') {
322  ++pi;
323  if (pi != pend && (*pi) != ']') {
324  // Yes, we have a range: start-end.
325 
326  if ((*pi) == '\\') {
327  // Backslash escapes.
328  ++pi;
329  if (pi == pend) {
330  return false;
331  }
332  }
333 
334  char end = (*pi);
335  ++pi;
336 
337  if ((ch >= start && ch <= end) ||
338  (!_case_sensitive &&
339  ((tolower(ch) >= start && tolower(ch) <= end) ||
340  (toupper(ch) >= start && toupper(ch) <= end)))) {
341  matched = true;
342  }
343  } else {
344  // This was a - at the end of the string.
345  if (ch == '-') {
346  matched = true;
347  }
348  }
349  }
350  }
351 
352  return matched;
353 }
354 
355 
356 
bool matches(const string &candidate) const
Returns true if the candidate string matches the pattern, false otherwise.
Definition: globPattern.I:157
bool has_glob_characters() const
Returns true if the pattern includes any special globbing characters, or false if it is just a litera...
Definition: globPattern.cxx:26
void set_case_sensitive(bool case_sensitive)
Sets whether the match is case sensitive (true) or case insensitive (false).
Definition: globPattern.I:112
The name of a file, such as a texture file or an Egg file.
Definition: filename.h:44
string get_const_prefix() const
Returns the initial part of the pattern before the first glob character.
Definition: globPattern.cxx:57
int match_files(vector_string &results, const Filename &cwd=Filename()) const
Treats the GlobPattern as a filename pattern, and returns a list of any actual files that match the p...
Definition: globPattern.cxx:99
bool is_local() const
Returns true if the filename is local, e.g.
Definition: filename.I:664
bool scan_directory(vector_string &contents) const
Attempts to open the named filename as if it were a directory and looks for the non-hidden files with...
Definition: filename.cxx:1854
bool exists() const
Returns true if the filename exists on the disk, false otherwise.
Definition: filename.cxx:1356
This class can be used to test for string matches against standard Unix-shell filename globbing conve...
Definition: globPattern.h:37