Panda3D

globPattern.cxx

00001 // Filename: globPattern.cxx
00002 // Created by:  drose (30May00)
00003 //
00004 ////////////////////////////////////////////////////////////////////
00005 //
00006 // PANDA 3D SOFTWARE
00007 // Copyright (c) Carnegie Mellon University.  All rights reserved.
00008 //
00009 // All use of this software is subject to the terms of the revised BSD
00010 // license.  You should have received a copy of this license along
00011 // with this source code in a file named "LICENSE."
00012 //
00013 ////////////////////////////////////////////////////////////////////
00014 
00015 #include "globPattern.h"
00016 #include <ctype.h>
00017 
00018 ////////////////////////////////////////////////////////////////////
00019 //     Function: GlobPattern::has_glob_characters
00020 //       Access: Public
00021 //  Description: Returns true if the pattern includes any special
00022 //               globbing characters, or false if it is just a literal
00023 //               string.
00024 ////////////////////////////////////////////////////////////////////
00025 bool GlobPattern::
00026 has_glob_characters() const {
00027   string::const_iterator pi;
00028   pi = _pattern.begin();
00029   while (pi != _pattern.end()) {
00030     switch (*pi) {
00031     case '*':
00032     case '?':
00033     case '[':
00034       return true;
00035 
00036     case '\\':
00037       ++pi;
00038       if (pi == _pattern.end()) {
00039         return false;
00040       }
00041     }
00042     ++pi;
00043   }
00044   return false;
00045 }
00046 
00047 ////////////////////////////////////////////////////////////////////
00048 //     Function: GlobPattern::get_const_prefix
00049 //       Access: Public
00050 //  Description: Returns the initial part of the pattern before the
00051 //               first glob character.  Since many glob patterns begin
00052 //               with a sequence of static characters and end with one
00053 //               or more glob characters, this can be used to
00054 //               optimized searches through sorted indices.
00055 ////////////////////////////////////////////////////////////////////
00056 string GlobPattern::
00057 get_const_prefix() const {
00058   string prefix;
00059 
00060   size_t p = 0;  // current point
00061   size_t q = 0;  // starting point
00062   while (p < _pattern.size()) {
00063     switch (_pattern[p]) {
00064     case '*':
00065     case '?':
00066     case '[':
00067       return prefix + _pattern.substr(q, p - q);
00068 
00069     case '\\':
00070       // Skip over the backslash.
00071       prefix += _pattern.substr(q, p - q);
00072       ++p;
00073       q = p;
00074     }
00075     ++p;
00076   }
00077   return prefix += _pattern.substr(q, p - q);
00078 }
00079 
00080 ////////////////////////////////////////////////////////////////////
00081 //     Function: GlobPattern::match_files
00082 //       Access: Public
00083 //  Description: Treats the GlobPattern as a filename pattern, and
00084 //               returns a list of any actual files that match the
00085 //               pattern.  This is the behavior of the standard Posix
00086 //               glob() function.  Any part of the filename may
00087 //               contain glob characters, including intermediate
00088 //               directory names.
00089 //
00090 //               If cwd is specified, it is the directory that
00091 //               relative filenames are taken to be relative to;
00092 //               otherwise, the actual current working directory is
00093 //               assumed.
00094 //
00095 //               The return value is the number of files matched,
00096 //               which are added to the results vector.
00097 ////////////////////////////////////////////////////////////////////
00098 int GlobPattern::
00099 match_files(vector_string &results, const Filename &cwd) {
00100   string prefix, pattern, suffix;
00101 
00102   string source = _pattern;
00103   if (!source.empty() && source[0] == '/') {
00104     // If the first character is a slash, that becomes the prefix.
00105     prefix = "/";
00106     source = source.substr(1);
00107   }
00108 
00109   size_t slash = source.find('/');
00110   if (slash == string::npos) {
00111     pattern = source;
00112   } else {
00113     pattern = source.substr(0, slash);
00114     suffix = source.substr(slash + 1);
00115   }
00116   
00117   GlobPattern glob(pattern);
00118   return glob.r_match_files(prefix, suffix, results, cwd);
00119 }
00120 
00121 ////////////////////////////////////////////////////////////////////
00122 //     Function: GlobPattern::r_match_files
00123 //       Access: Private
00124 //  Description: The recursive implementation of match_files().
00125 ////////////////////////////////////////////////////////////////////
00126 int GlobPattern::
00127 r_match_files(const Filename &prefix, const string &suffix,
00128               vector_string &results, const Filename &cwd) {
00129   string next_pattern, next_suffix;
00130 
00131   size_t slash = suffix.find('/');
00132   if (slash == string::npos) {
00133     next_pattern = suffix;
00134   } else {
00135     next_pattern = suffix.substr(0, slash);
00136     next_suffix = suffix.substr(slash + 1);
00137   }
00138 
00139   Filename parent_dir;
00140   if (prefix.is_local() && !cwd.empty()) {
00141     parent_dir = Filename(cwd, prefix);
00142   } else {
00143     parent_dir = prefix;
00144   }
00145 
00146   GlobPattern next_glob(next_pattern);
00147 
00148   if (!has_glob_characters()) {
00149     // If there are no special characters in the pattern, it's a
00150     // literal match.
00151     if (suffix.empty()) {
00152       // Time to stop.
00153       Filename single_filename(parent_dir, _pattern);
00154       if (single_filename.exists()) {
00155         results.push_back(Filename(prefix, _pattern));
00156         return 1;
00157       }
00158       return 0;
00159     }
00160 
00161     return next_glob.r_match_files(Filename(prefix, _pattern),
00162                                    next_suffix, results, cwd);
00163 
00164   } 
00165 
00166   // If there *are* special glob characters, we must attempt to
00167   // match the pattern against the files in this directory.
00168   
00169   vector_string dir_files;
00170   if (!parent_dir.scan_directory(dir_files)) {
00171     // Not a directory, or unable to read directory; stop here.
00172     return 0;
00173   }
00174   
00175   // Now go through each file in the directory looking for one that
00176   // matches the pattern.
00177   int num_matched = 0;
00178   
00179   vector_string::const_iterator fi;
00180   for (fi = dir_files.begin(); fi != dir_files.end(); ++fi) {
00181     const string &local_file = (*fi);
00182     if (_pattern[0] == '.' || (local_file.empty() || local_file[0] != '.')) {
00183       if (matches(local_file)) {
00184         // We have a match; continue.
00185         if (suffix.empty()) {
00186           results.push_back(Filename(prefix, local_file));
00187           num_matched++; 
00188         } else {
00189           num_matched += next_glob.r_match_files(Filename(prefix, local_file),
00190                                                  next_suffix, results, cwd);
00191         }
00192       }
00193     }
00194   }
00195   
00196   return num_matched;
00197 }
00198 
00199 ////////////////////////////////////////////////////////////////////
00200 //     Function: GlobPattern::matches_substr
00201 //       Access: Private
00202 //  Description: The recursive implementation of matches().  This
00203 //               returns true if the pattern substring [pi, pend)
00204 //               matches the candidate substring [ci, cend), false
00205 //               otherwise.
00206 ////////////////////////////////////////////////////////////////////
00207 bool GlobPattern::
00208 matches_substr(string::const_iterator pi, string::const_iterator pend,
00209                string::const_iterator ci, string::const_iterator cend) const {
00210   // If we run out of pattern or candidate string, it's a match only
00211   // if they both ran out at the same time.
00212   if (pi == pend || ci == cend) {
00213     // A special exception: we allow ci to reach the end before pi,
00214     // only if pi is one character before the end and that last
00215     // character is '*'.
00216     if ((ci == cend) && (std::distance(pi, pend) == 1) && (*pi) == '*') {
00217       return true;
00218     }
00219     return (pi == pend && ci == cend);
00220   }
00221 
00222   switch (*pi) {
00223 
00224   case '*':
00225     // A '*' in the pattern string means to match any sequence of zero
00226     // or more characters in the candidate string.  This means we have
00227     // to recurse twice: either consume one character of the candidate
00228     // string and continue to try matching the *, or stop trying to
00229     // match the * here.
00230     if (_nomatch_chars.find(*ci) == string::npos) {
00231       return
00232         matches_substr(pi, pend, ci + 1, cend) ||
00233         matches_substr(pi + 1, pend, ci, cend);
00234     } else {
00235       // On the other hand, if this is one of the nomatch chars, we
00236       // can only stop here.
00237       return matches_substr(pi + 1, pend, ci, cend);
00238     }
00239 
00240   case '?':
00241     // A '?' in the pattern string means to match exactly one
00242     // character in the candidate string.  That's easy.
00243     return matches_substr(pi + 1, pend, ci + 1, cend);
00244 
00245   case '[':
00246     // An open square bracket begins a set.
00247     ++pi;
00248     if ((*pi) == '!') {
00249       ++pi;
00250       if (matches_set(pi, pend, *ci)) {
00251         return false;
00252       }
00253     } else {
00254       if (!matches_set(pi, pend, *ci)) {
00255         return false;
00256       }
00257     }
00258     if (pi == pend) {
00259       // Oops, there wasn't a closing square bracket.
00260       return false;
00261     }
00262     return matches_substr(pi + 1, pend, ci + 1, cend);
00263 
00264   case '\\':
00265     // A backslash escapes the next special character.
00266     ++pi;
00267     if (pi == pend) {
00268       return false;
00269     }
00270     // fall through.
00271 
00272   default:
00273     // Anything else means to match exactly that.
00274     if (_case_sensitive) {
00275       if ((*pi) != (*ci)) {
00276         return false;
00277       }
00278     } else {
00279       if (tolower(*pi) != tolower(*ci)) {
00280         return false;
00281       }
00282     }
00283     return matches_substr(pi + 1, pend, ci + 1, cend);
00284   }
00285 }
00286 
00287 
00288 ////////////////////////////////////////////////////////////////////
00289 //     Function: GlobPattern::matches_set
00290 //       Access: Private
00291 //  Description: Called when an unescaped open square bracked is
00292 //               scanned, this is called with pi positioned after the
00293 //               opening square bracket, scans the set sequence,
00294 //               leaving pi positioned on the closing square bracket,
00295 //               and returns true if the indicated character matches
00296 //               the set of characters indicated, false otherwise.
00297 ////////////////////////////////////////////////////////////////////
00298 bool GlobPattern::
00299 matches_set(string::const_iterator &pi, string::const_iterator pend,
00300             char ch) const {
00301   bool matched = false;
00302 
00303   while (pi != pend && (*pi) != ']') {
00304     if ((*pi) == '\\') {
00305       // Backslash escapes the next character.
00306       ++pi;
00307       if (pi == pend) {
00308         return false;
00309       }
00310     }
00311 
00312     if (ch == (*pi)) {
00313       matched = true;
00314     }
00315 
00316     // Maybe it's an a-z style range?
00317     char start = (*pi);
00318     ++pi;
00319     if (pi != pend && (*pi) == '-') {
00320       ++pi;
00321       if (pi != pend && (*pi) != ']') {
00322         // Yes, we have a range: start-end.
00323 
00324         if ((*pi) == '\\') {
00325           // Backslash escapes.
00326           ++pi;
00327           if (pi == pend) {
00328             return false;
00329           }
00330         }
00331 
00332         char end = (*pi);
00333         ++pi;
00334 
00335         if (ch >= start && ch <= end || 
00336             (!_case_sensitive && 
00337              ((tolower(ch) >= start && tolower(ch) <= end) ||
00338               (toupper(ch) >= start && toupper(ch) <= end)))) {
00339           matched = true;
00340         }
00341       } else {
00342         // This was a - at the end of the string.
00343         if (ch == '-') {
00344           matched = true;
00345         }
00346       }
00347     }
00348   }
00349 
00350   return matched;
00351 }
00352 
00353 
00354 
 All Classes Functions Variables Enumerations