Panda3D
Loading...
Searching...
No Matches
globPattern.cxx
Go to the documentation of this file.
1/**
2 * PANDA 3D SOFTWARE
3 * Copyright (c) Carnegie Mellon University. All rights reserved.
4 *
5 * All use of this software is subject to the terms of the revised BSD
6 * license. You should have received a copy of this license along
7 * with this source code in a file named "LICENSE."
8 *
9 * @file globPattern.cxx
10 * @author drose
11 * @date 2000-05-30
12 */
13
14#include "globPattern.h"
15#include "string_utils.h"
16#include <ctype.h>
17
18using std::string;
19
20/**
21 * Returns true if the pattern includes any special globbing characters, or
22 * false if it is just a literal string.
23 */
25has_glob_characters() const {
26 string::const_iterator pi;
27 pi = _pattern.begin();
28 while (pi != _pattern.end()) {
29 switch (*pi) {
30 case '*':
31 case '?':
32 case '[':
33 return true;
34
35 case '\\':
36 ++pi;
37 if (pi == _pattern.end()) {
38 return false;
39 }
40 }
41 ++pi;
42 }
43 return false;
44}
45
46/**
47 * Returns the initial part of the pattern before the first glob character.
48 * Since many glob patterns begin with a sequence of static characters and end
49 * with one or more glob characters, this can be used to optimized searches
50 * through sorted indices.
51 */
53get_const_prefix() const {
54 string prefix;
55
56 size_t p = 0; // current point
57 size_t q = 0; // starting point
58 while (p < _pattern.size()) {
59 switch (_pattern[p]) {
60 case '*':
61 case '?':
62 case '[':
63 return prefix + _pattern.substr(q, p - q);
64
65 case '\\':
66 // Skip over the backslash.
67 prefix += _pattern.substr(q, p - q);
68 ++p;
69 q = p;
70 }
71 ++p;
72 }
73 return prefix += _pattern.substr(q, p - q);
74}
75
76/**
77 * Treats the GlobPattern as a filename pattern, and returns a list of any
78 * actual files that match the pattern. This is the behavior of the standard
79 * Posix glob() function. Any part of the filename may contain glob
80 * characters, including intermediate directory names.
81 *
82 * If cwd is specified, it is the directory that relative filenames are taken
83 * to be relative to; otherwise, the actual current working directory is
84 * assumed.
85 *
86 * The return value is the number of files matched, which are added to the
87 * results vector.
88 */
90match_files(vector_string &results, const Filename &cwd) const {
91 string prefix, pattern, suffix;
92
93 string source = _pattern;
94 if (!source.empty() && source[0] == '/') {
95 // If the first character is a slash, that becomes the prefix.
96 prefix = "/";
97 source = source.substr(1);
98 }
99
100 size_t slash = source.find('/');
101 if (slash == string::npos) {
102 pattern = source;
103 } else {
104 pattern = source.substr(0, slash);
105 suffix = source.substr(slash);
106 }
107
108 GlobPattern glob(pattern);
109 glob.set_case_sensitive(_case_sensitive);
110 return glob.r_match_files(prefix, suffix, results, cwd);
111}
112
113/**
114 * The recursive implementation of match_files().
115 */
116int GlobPattern::
117r_match_files(const Filename &prefix, const string &suffix,
118 vector_string &results, const Filename &cwd) {
119 string next_pattern, next_suffix;
120
121 size_t slash = suffix.find('/');
122 if (slash == string::npos) {
123 next_pattern = suffix;
124 } else if (slash + 1 == suffix.size()) {
125 // If the slash is at the end, we need to keep it, since it indicates that
126 // we only want to match directories.
127 next_pattern = suffix.substr(0, slash);
128 next_suffix = "/";
129 } else {
130 next_pattern = suffix.substr(0, slash);
131 next_suffix = suffix.substr(slash + 1);
132 }
133
134 if (_pattern == "**" && next_pattern == "**") {
135 // Collapse consecutive globstar patterns.
136 return r_match_files(prefix, next_suffix, results, cwd);
137 }
138
139 Filename parent_dir;
140 if (prefix.is_local() && !cwd.empty()) {
141 parent_dir = Filename(cwd, prefix);
142 } else {
143 parent_dir = prefix;
144 }
145
146 GlobPattern next_glob(next_pattern);
147 next_glob.set_case_sensitive(_case_sensitive);
148
149 if (!has_glob_characters()) {
150 // If there are no special characters in the pattern, it's a literal
151 // match.
152 Filename fn(parent_dir, _pattern);
153 if (suffix.empty()) {
154 // Time to stop.
155 if (fn.exists()) {
156 results.push_back(Filename(prefix, _pattern));
157 return 1;
158 }
159 return 0;
160 } else if (fn.is_directory()) {
161 // If the pattern ends with a slash, match a directory only.
162 if (suffix == "/") {
163 results.push_back(Filename(prefix, _pattern + "/"));
164 return 1;
165 } else {
166 return next_glob.r_match_files(Filename(prefix, _pattern),
167 next_suffix, results, cwd);
168 }
169 }
170 }
171
172 // If there *are* special glob characters, we must attempt to match the
173 // pattern against the files in this directory.
174
175 vector_string dir_files;
176 if (!parent_dir.scan_directory(dir_files)) {
177 // Not a directory, or unable to read directory; stop here.
178 return 0;
179 }
180
181 // Now go through each file in the directory looking for one that matches
182 // the pattern.
183 int num_matched = 0;
184
185 // A globstar pattern matches zero or more directories.
186 if (_pattern == "**") {
187 // Try to match this directory (as if the globstar wasn't there)
188 if (suffix.empty()) {
189 // This is a directory. Add it.
190 results.push_back(Filename(prefix));
191 num_matched++;
192 } else if (suffix == "/") {
193 // Keep the trailing slash, but be sure not to duplicate it.
194 results.push_back(Filename(prefix, ""));
195 num_matched++;
196 } else {
197 num_matched += next_glob.r_match_files(prefix, next_suffix, results, cwd);
198 }
199 next_suffix = suffix;
200 next_glob = *this;
201 }
202
203 for (const string &local_file : dir_files) {
204 if (_pattern[0] == '.' || (local_file.empty() || local_file[0] != '.')) {
205 if (matches(local_file)) {
206 // We have a match; continue.
207 if (Filename(parent_dir, local_file).is_directory()) {
208 if (suffix.empty() && _pattern != "**") {
209 results.push_back(Filename(prefix, local_file));
210 num_matched++;
211 } else if (suffix == "/" && _pattern != "**") {
212 results.push_back(Filename(prefix, local_file + "/"));
213 num_matched++;
214 } else {
215 num_matched += next_glob.r_match_files(Filename(prefix, local_file),
216 next_suffix, results, cwd);
217 }
218 } else if (suffix.empty()) {
219 results.push_back(Filename(prefix, local_file));
220 num_matched++;
221 }
222 }
223 }
224 }
225
226 return num_matched;
227}
228
229/**
230 * Treats the GlobPattern as a filename pattern, and returns true if the given
231 * filename matches the pattern. Unlike matches(), this will not match slash
232 * characters for single asterisk characters, and it will ignore path
233 * components that only contain a dot.
234 */
236matches_file(Filename candidate) const {
237 if (_pattern.empty()) {
238 // Special case.
239 return candidate.empty();
240 }
241
242 // Either both must be absolute, or both must be relative.
243 if ((_pattern[0] != '/') != candidate.is_local()) {
244 return false;
245 }
246
247 return r_matches_file(_pattern, candidate);
248}
249
250/**
251 * The recursive implementation of matches_file().
252 */
253bool GlobPattern::
254r_matches_file(const string &pattern, const Filename &candidate) const {
255 // Split off the next bit of pattern.
256 std::string next_pattern;
257 GlobPattern glob;
260
261 bool pattern_end;
262 size_t slash = pattern.find('/');
263 if (slash == string::npos) {
264 glob.set_pattern(pattern);
265 pattern_end = true;
266 } else {
267 glob.set_pattern(pattern.substr(0, slash));
268 next_pattern = pattern.substr(slash + 1);
269 pattern_end = false;
270
271 if (slash == 0 || (slash == 1 && pattern[0] == '.')) {
272 // Ignore // and /./ in patterns
273 return r_matches_file(next_pattern, candidate);
274 }
275 }
276
277 // Also split off the next component in the candidate filename.
278 std::string part;
279 Filename next_candidate;
280
281 bool candidate_end;
282 size_t fn_slash = ((const std::string &)candidate).find('/');
283 if (fn_slash == string::npos) {
284 part = candidate;
285 candidate_end = true;
286 } else {
287 part = candidate.substr(0, fn_slash);
288 next_candidate = candidate.substr(fn_slash + 1);
289 candidate_end = false;
290
291 // Ignore // and /./ in filenames.
292 if (fn_slash == 0 || part == ".") {
293 return r_matches_file(pattern, next_candidate);
294 }
295 }
296
297 // Now check if the current part matches the current pattern.
298 bool part_matches;
299 if (glob.get_pattern() == "**") {
300 // This matches any number of parts.
301 if (pattern_end) {
302 // We might as well stop checking here; it matches whatever might come.
303 return true;
304 }
305 // We branch out to three options: either we match nothing, we match this
306 // part only, or we match this part and maybe more.
307 return r_matches_file(next_pattern, candidate)
308 || (!candidate_end && r_matches_file(next_pattern, next_candidate))
309 || (!candidate_end && r_matches_file(pattern, next_candidate));
310 }
311 else if (glob.get_pattern() == "*" && _nomatch_chars.empty()) {
312 // Matches any part (faster version of below)
313 part_matches = true;
314 }
315 else if ((glob.get_pattern() == "." && part.empty())
316 || (glob.get_pattern().empty() && part == ".")) {
317 // So that /path/. matches /path/, and vice versa.
318 part_matches = true;
319 }
320 else if (glob.has_glob_characters()) {
321 part_matches = glob.matches(part);
322 }
323 else if (get_case_sensitive()) {
324 part_matches = (part == glob.get_pattern());
325 }
326 else {
327 part_matches = (cmp_nocase(part, glob.get_pattern()) == 0);
328 }
329
330 if (!part_matches) {
331 // It doesn't match, so we end our search here.
332 return false;
333 }
334
335 if (candidate_end && pattern_end) {
336 // We've reached the end of both candidate and pattern, so it matches.
337 return true;
338 }
339
340 if (pattern_end != candidate_end) {
341 // One of them has ended, but the other hasn't, so it's not a match.
342 return false;
343 }
344
345 // It matches; move on to the next part.
346 return r_matches_file(next_pattern, next_candidate);
347}
348
349/**
350 * The recursive implementation of matches(). This returns true if the
351 * pattern substring [pi, pend) matches the candidate substring [ci, cend),
352 * false otherwise.
353 */
354bool GlobPattern::
355matches_substr(string::const_iterator pi, string::const_iterator pend,
356 string::const_iterator ci, string::const_iterator cend) const {
357 // If we run out of pattern or candidate string, it's a match only if they
358 // both ran out at the same time.
359 if (pi == pend || ci == cend) {
360 // A special exception: we allow ci to reach the end before pi, only if pi
361 // is one character before the end and that last character is '*'.
362 if ((ci == cend) && (std::distance(pi, pend) == 1) && (*pi) == '*') {
363 return true;
364 }
365 return (pi == pend && ci == cend);
366 }
367
368 switch (*pi) {
369
370 case '*':
371 // A '*' in the pattern string means to match any sequence of zero or more
372 // characters in the candidate string. This means we have to recurse
373 // twice: either consume one character of the candidate string and
374 // continue to try matching the *, or stop trying to match the * here.
375 if (_nomatch_chars.find(*ci) == string::npos) {
376 return
377 matches_substr(pi, pend, ci + 1, cend) ||
378 matches_substr(pi + 1, pend, ci, cend);
379 } else {
380 // On the other hand, if this is one of the nomatch chars, we can only
381 // stop here.
382 return matches_substr(pi + 1, pend, ci, cend);
383 }
384
385 case '?':
386 // A '?' in the pattern string means to match exactly one character in the
387 // candidate string. That's easy.
388 return matches_substr(pi + 1, pend, ci + 1, cend);
389
390 case '[':
391 // An open square bracket begins a set.
392 ++pi;
393 if ((*pi) == '!') {
394 ++pi;
395 if (matches_set(pi, pend, *ci)) {
396 return false;
397 }
398 } else {
399 if (!matches_set(pi, pend, *ci)) {
400 return false;
401 }
402 }
403 if (pi == pend) {
404 // Oops, there wasn't a closing square bracket.
405 return false;
406 }
407 return matches_substr(pi + 1, pend, ci + 1, cend);
408
409 case '\\':
410 // A backslash escapes the next special character.
411 ++pi;
412 if (pi == pend) {
413 return false;
414 }
415 // fall through.
416
417 default:
418 // Anything else means to match exactly that.
419 if (_case_sensitive) {
420 if ((*pi) != (*ci)) {
421 return false;
422 }
423 } else {
424 if (tolower(*pi) != tolower(*ci)) {
425 return false;
426 }
427 }
428 return matches_substr(pi + 1, pend, ci + 1, cend);
429 }
430}
431
432
433/**
434 * Called when an unescaped open square bracked is scanned, this is called
435 * with pi positioned after the opening square bracket, scans the set
436 * sequence, leaving pi positioned on the closing square bracket, and returns
437 * true if the indicated character matches the set of characters indicated,
438 * false otherwise.
439 */
440bool GlobPattern::
441matches_set(string::const_iterator &pi, string::const_iterator pend,
442 char ch) const {
443 bool matched = false;
444
445 while (pi != pend && (*pi) != ']') {
446 if ((*pi) == '\\') {
447 // Backslash escapes the next character.
448 ++pi;
449 if (pi == pend) {
450 return false;
451 }
452 }
453
454 if (ch == (*pi)) {
455 matched = true;
456 }
457
458 // Maybe it's an a-z style range?
459 char start = (*pi);
460 ++pi;
461 if (pi != pend && (*pi) == '-') {
462 ++pi;
463 if (pi != pend && (*pi) != ']') {
464 // Yes, we have a range: start-end.
465
466 if ((*pi) == '\\') {
467 // Backslash escapes.
468 ++pi;
469 if (pi == pend) {
470 return false;
471 }
472 }
473
474 char end = (*pi);
475 ++pi;
476
477 if ((ch >= start && ch <= end) ||
478 (!_case_sensitive &&
479 ((tolower(ch) >= start && tolower(ch) <= end) ||
480 (toupper(ch) >= start && toupper(ch) <= end)))) {
481 matched = true;
482 }
483 } else {
484 // This was a - at the end of the string.
485 if (ch == '-') {
486 matched = true;
487 }
488 }
489 }
490 }
491
492 return matched;
493}
The name of a file, such as a texture file or an Egg file.
Definition filename.h:44
bool scan_directory(vector_string &contents) const
Attempts to open the named filename as if it were a directory and looks for the non-hidden files with...
bool is_local() const
Returns true if the filename is local, e.g.
Definition filename.I:549
This class can be used to test for string matches against standard Unix- shell filename globbing conv...
Definition globPattern.h:32
get_case_sensitive
Returns whether the match is case sensitive (true) or case insensitive (false).
Definition globPattern.h:48
set_pattern
Changes the pattern string that the GlobPattern object matches.
Definition globPattern.h:44
bool has_glob_characters() const
Returns true if the pattern includes any special globbing characters, or false if it is just a litera...
std::string get_const_prefix() const
Returns the initial part of the pattern before the first glob character.
get_pattern
Returns the pattern string that the GlobPattern object matches.
Definition globPattern.h:44
bool matches(const std::string &candidate) const
Returns true if the candidate string matches the pattern, false otherwise.
set_nomatch_chars
Specifies a set of characters that are not matched by * or ?.
Definition globPattern.h:52
get_nomatch_chars
Returns the set of characters that are not matched by * or ?.
Definition globPattern.h:52
bool matches_file(Filename candidate) const
Treats the GlobPattern as a filename pattern, and returns true if the given filename matches the patt...
set_case_sensitive
Sets whether the match is case sensitive (true) or case insensitive (false).
Definition globPattern.h:48
int match_files(vector_string &results, const Filename &cwd=Filename()) const
Treats the GlobPattern as a filename pattern, and returns a list of any actual files that match the p...
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.