Panda3D
 All Classes Functions Variables Enumerations
globPattern.cxx
00001 // Filename: globPattern.cxx
00002 // Created by:  drose (30May00)
00003 //
00004 ////////////////////////////////////////////////////////////////////
00005 //
00006 // PANDA 3D SOFTWARE
00007 // Copyright (c) Carnegie Mellon University.  All rights reserved.
00008 //
00009 // All use of this software is subject to the terms of the revised BSD
00010 // license.  You should have received a copy of this license along
00011 // with this source code in a file named "LICENSE."
00012 //
00013 ////////////////////////////////////////////////////////////////////
00014 
00015 #include "globPattern.h"
00016 #include <ctype.h>
00017 
00018 ////////////////////////////////////////////////////////////////////
00019 //     Function: GlobPattern::has_glob_characters
00020 //       Access: Published
00021 //  Description: Returns true if the pattern includes any special
00022 //               globbing characters, or false if it is just a literal
00023 //               string.
00024 ////////////////////////////////////////////////////////////////////
00025 bool GlobPattern::
00026 has_glob_characters() const {
00027   string::const_iterator pi;
00028   pi = _pattern.begin();
00029   while (pi != _pattern.end()) {
00030     switch (*pi) {
00031     case '*':
00032     case '?':
00033     case '[':
00034       return true;
00035 
00036     case '\\':
00037       ++pi;
00038       if (pi == _pattern.end()) {
00039         return false;
00040       }
00041     }
00042     ++pi;
00043   }
00044   return false;
00045 }
00046 
00047 ////////////////////////////////////////////////////////////////////
00048 //     Function: GlobPattern::get_const_prefix
00049 //       Access: Published
00050 //  Description: Returns the initial part of the pattern before the
00051 //               first glob character.  Since many glob patterns begin
00052 //               with a sequence of static characters and end with one
00053 //               or more glob characters, this can be used to
00054 //               optimized searches through sorted indices.
00055 ////////////////////////////////////////////////////////////////////
00056 string GlobPattern::
00057 get_const_prefix() const {
00058   string prefix;
00059 
00060   size_t p = 0;  // current point
00061   size_t q = 0;  // starting point
00062   while (p < _pattern.size()) {
00063     switch (_pattern[p]) {
00064     case '*':
00065     case '?':
00066     case '[':
00067       return prefix + _pattern.substr(q, p - q);
00068 
00069     case '\\':
00070       // Skip over the backslash.
00071       prefix += _pattern.substr(q, p - q);
00072       ++p;
00073       q = p;
00074     }
00075     ++p;
00076   }
00077   return prefix += _pattern.substr(q, p - q);
00078 }
00079 
00080 ////////////////////////////////////////////////////////////////////
00081 //     Function: GlobPattern::match_files
00082 //       Access: Published
00083 //  Description: Treats the GlobPattern as a filename pattern, and
00084 //               returns a list of any actual files that match the
00085 //               pattern.  This is the behavior of the standard Posix
00086 //               glob() function.  Any part of the filename may
00087 //               contain glob characters, including intermediate
00088 //               directory names.
00089 //
00090 //               If cwd is specified, it is the directory that
00091 //               relative filenames are taken to be relative to;
00092 //               otherwise, the actual current working directory is
00093 //               assumed.
00094 //
00095 //               The return value is the number of files matched,
00096 //               which are added to the results vector.
00097 ////////////////////////////////////////////////////////////////////
00098 int GlobPattern::
00099 match_files(vector_string &results, const Filename &cwd) const {
00100   string prefix, pattern, suffix;
00101 
00102   string source = _pattern;
00103   if (!source.empty() && source[0] == '/') {
00104     // If the first character is a slash, that becomes the prefix.
00105     prefix = "/";
00106     source = source.substr(1);
00107   }
00108 
00109   size_t slash = source.find('/');
00110   if (slash == string::npos) {
00111     pattern = source;
00112   } else {
00113     pattern = source.substr(0, slash);
00114     suffix = source.substr(slash + 1);
00115   }
00116   
00117   GlobPattern glob(pattern);
00118   return glob.r_match_files(prefix, suffix, results, cwd);
00119 }
00120 
00121 #ifdef HAVE_PYTHON
00122 ////////////////////////////////////////////////////////////////////
00123 //     Function: GlobPattern::match_files
00124 //       Access: Published
00125 //  Description: This variant on match_files returns a Python list
00126 //               of strings.
00127 ////////////////////////////////////////////////////////////////////
00128 PyObject *GlobPattern::
00129 match_files(const Filename &cwd) const {
00130   vector_string contents;
00131   match_files(contents, cwd);
00132 
00133   PyObject *result = PyList_New(contents.size());
00134   for (size_t i = 0; i < contents.size(); ++i) {
00135     const string &filename = contents[i];
00136     PyObject *str = PyString_FromStringAndSize(filename.data(), filename.size());
00137     PyList_SET_ITEM(result, i, str);
00138   }
00139 
00140   return result;
00141 }
00142 #endif  // HAVE_PYTHON
00143 
00144 ////////////////////////////////////////////////////////////////////
00145 //     Function: GlobPattern::r_match_files
00146 //       Access: Private
00147 //  Description: The recursive implementation of match_files().
00148 ////////////////////////////////////////////////////////////////////
00149 int GlobPattern::
00150 r_match_files(const Filename &prefix, const string &suffix,
00151               vector_string &results, const Filename &cwd) {
00152   string next_pattern, next_suffix;
00153 
00154   size_t slash = suffix.find('/');
00155   if (slash == string::npos) {
00156     next_pattern = suffix;
00157   } else {
00158     next_pattern = suffix.substr(0, slash);
00159     next_suffix = suffix.substr(slash + 1);
00160   }
00161 
00162   Filename parent_dir;
00163   if (prefix.is_local() && !cwd.empty()) {
00164     parent_dir = Filename(cwd, prefix);
00165   } else {
00166     parent_dir = prefix;
00167   }
00168 
00169   GlobPattern next_glob(next_pattern);
00170 
00171   if (!has_glob_characters()) {
00172     // If there are no special characters in the pattern, it's a
00173     // literal match.
00174     if (suffix.empty()) {
00175       // Time to stop.
00176       Filename single_filename(parent_dir, _pattern);
00177       if (single_filename.exists()) {
00178         results.push_back(Filename(prefix, _pattern));
00179         return 1;
00180       }
00181       return 0;
00182     }
00183 
00184     return next_glob.r_match_files(Filename(prefix, _pattern),
00185                                    next_suffix, results, cwd);
00186 
00187   } 
00188 
00189   // If there *are* special glob characters, we must attempt to
00190   // match the pattern against the files in this directory.
00191   
00192   vector_string dir_files;
00193   if (!parent_dir.scan_directory(dir_files)) {
00194     // Not a directory, or unable to read directory; stop here.
00195     return 0;
00196   }
00197   
00198   // Now go through each file in the directory looking for one that
00199   // matches the pattern.
00200   int num_matched = 0;
00201   
00202   vector_string::const_iterator fi;
00203   for (fi = dir_files.begin(); fi != dir_files.end(); ++fi) {
00204     const string &local_file = (*fi);
00205     if (_pattern[0] == '.' || (local_file.empty() || local_file[0] != '.')) {
00206       if (matches(local_file)) {
00207         // We have a match; continue.
00208         if (suffix.empty()) {
00209           results.push_back(Filename(prefix, local_file));
00210           num_matched++; 
00211         } else {
00212           num_matched += next_glob.r_match_files(Filename(prefix, local_file),
00213                                                  next_suffix, results, cwd);
00214         }
00215       }
00216     }
00217   }
00218   
00219   return num_matched;
00220 }
00221 
00222 ////////////////////////////////////////////////////////////////////
00223 //     Function: GlobPattern::matches_substr
00224 //       Access: Private
00225 //  Description: The recursive implementation of matches().  This
00226 //               returns true if the pattern substring [pi, pend)
00227 //               matches the candidate substring [ci, cend), false
00228 //               otherwise.
00229 ////////////////////////////////////////////////////////////////////
00230 bool GlobPattern::
00231 matches_substr(string::const_iterator pi, string::const_iterator pend,
00232                string::const_iterator ci, string::const_iterator cend) const {
00233   // If we run out of pattern or candidate string, it's a match only
00234   // if they both ran out at the same time.
00235   if (pi == pend || ci == cend) {
00236     // A special exception: we allow ci to reach the end before pi,
00237     // only if pi is one character before the end and that last
00238     // character is '*'.
00239     if ((ci == cend) && (std::distance(pi, pend) == 1) && (*pi) == '*') {
00240       return true;
00241     }
00242     return (pi == pend && ci == cend);
00243   }
00244 
00245   switch (*pi) {
00246 
00247   case '*':
00248     // A '*' in the pattern string means to match any sequence of zero
00249     // or more characters in the candidate string.  This means we have
00250     // to recurse twice: either consume one character of the candidate
00251     // string and continue to try matching the *, or stop trying to
00252     // match the * here.
00253     if (_nomatch_chars.find(*ci) == string::npos) {
00254       return
00255         matches_substr(pi, pend, ci + 1, cend) ||
00256         matches_substr(pi + 1, pend, ci, cend);
00257     } else {
00258       // On the other hand, if this is one of the nomatch chars, we
00259       // can only stop here.
00260       return matches_substr(pi + 1, pend, ci, cend);
00261     }
00262 
00263   case '?':
00264     // A '?' in the pattern string means to match exactly one
00265     // character in the candidate string.  That's easy.
00266     return matches_substr(pi + 1, pend, ci + 1, cend);
00267 
00268   case '[':
00269     // An open square bracket begins a set.
00270     ++pi;
00271     if ((*pi) == '!') {
00272       ++pi;
00273       if (matches_set(pi, pend, *ci)) {
00274         return false;
00275       }
00276     } else {
00277       if (!matches_set(pi, pend, *ci)) {
00278         return false;
00279       }
00280     }
00281     if (pi == pend) {
00282       // Oops, there wasn't a closing square bracket.
00283       return false;
00284     }
00285     return matches_substr(pi + 1, pend, ci + 1, cend);
00286 
00287   case '\\':
00288     // A backslash escapes the next special character.
00289     ++pi;
00290     if (pi == pend) {
00291       return false;
00292     }
00293     // fall through.
00294 
00295   default:
00296     // Anything else means to match exactly that.
00297     if (_case_sensitive) {
00298       if ((*pi) != (*ci)) {
00299         return false;
00300       }
00301     } else {
00302       if (tolower(*pi) != tolower(*ci)) {
00303         return false;
00304       }
00305     }
00306     return matches_substr(pi + 1, pend, ci + 1, cend);
00307   }
00308 }
00309 
00310 
00311 ////////////////////////////////////////////////////////////////////
00312 //     Function: GlobPattern::matches_set
00313 //       Access: Private
00314 //  Description: Called when an unescaped open square bracked is
00315 //               scanned, this is called with pi positioned after the
00316 //               opening square bracket, scans the set sequence,
00317 //               leaving pi positioned on the closing square bracket,
00318 //               and returns true if the indicated character matches
00319 //               the set of characters indicated, false otherwise.
00320 ////////////////////////////////////////////////////////////////////
00321 bool GlobPattern::
00322 matches_set(string::const_iterator &pi, string::const_iterator pend,
00323             char ch) const {
00324   bool matched = false;
00325 
00326   while (pi != pend && (*pi) != ']') {
00327     if ((*pi) == '\\') {
00328       // Backslash escapes the next character.
00329       ++pi;
00330       if (pi == pend) {
00331         return false;
00332       }
00333     }
00334 
00335     if (ch == (*pi)) {
00336       matched = true;
00337     }
00338 
00339     // Maybe it's an a-z style range?
00340     char start = (*pi);
00341     ++pi;
00342     if (pi != pend && (*pi) == '-') {
00343       ++pi;
00344       if (pi != pend && (*pi) != ']') {
00345         // Yes, we have a range: start-end.
00346 
00347         if ((*pi) == '\\') {
00348           // Backslash escapes.
00349           ++pi;
00350           if (pi == pend) {
00351             return false;
00352           }
00353         }
00354 
00355         char end = (*pi);
00356         ++pi;
00357 
00358         if (ch >= start && ch <= end || 
00359             (!_case_sensitive && 
00360              ((tolower(ch) >= start && tolower(ch) <= end) ||
00361               (toupper(ch) >= start && toupper(ch) <= end)))) {
00362           matched = true;
00363         }
00364       } else {
00365         // This was a - at the end of the string.
00366         if (ch == '-') {
00367           matched = true;
00368         }
00369       }
00370     }
00371   }
00372 
00373   return matched;
00374 }
00375 
00376 
00377 
 All Classes Functions Variables Enumerations