Panda3D
|
00001 // Filename: globPattern.cxx 00002 // Created by: drose (30May00) 00003 // 00004 //////////////////////////////////////////////////////////////////// 00005 // 00006 // PANDA 3D SOFTWARE 00007 // Copyright (c) Carnegie Mellon University. All rights reserved. 00008 // 00009 // All use of this software is subject to the terms of the revised BSD 00010 // license. You should have received a copy of this license along 00011 // with this source code in a file named "LICENSE." 00012 // 00013 //////////////////////////////////////////////////////////////////// 00014 00015 #include "globPattern.h" 00016 #include <ctype.h> 00017 00018 //////////////////////////////////////////////////////////////////// 00019 // Function: GlobPattern::has_glob_characters 00020 // Access: Published 00021 // Description: Returns true if the pattern includes any special 00022 // globbing characters, or false if it is just a literal 00023 // string. 00024 //////////////////////////////////////////////////////////////////// 00025 bool GlobPattern:: 00026 has_glob_characters() const { 00027 string::const_iterator pi; 00028 pi = _pattern.begin(); 00029 while (pi != _pattern.end()) { 00030 switch (*pi) { 00031 case '*': 00032 case '?': 00033 case '[': 00034 return true; 00035 00036 case '\\': 00037 ++pi; 00038 if (pi == _pattern.end()) { 00039 return false; 00040 } 00041 } 00042 ++pi; 00043 } 00044 return false; 00045 } 00046 00047 //////////////////////////////////////////////////////////////////// 00048 // Function: GlobPattern::get_const_prefix 00049 // Access: Published 00050 // Description: Returns the initial part of the pattern before the 00051 // first glob character. Since many glob patterns begin 00052 // with a sequence of static characters and end with one 00053 // or more glob characters, this can be used to 00054 // optimized searches through sorted indices. 00055 //////////////////////////////////////////////////////////////////// 00056 string GlobPattern:: 00057 get_const_prefix() const { 00058 string prefix; 00059 00060 size_t p = 0; // current point 00061 size_t q = 0; // starting point 00062 while (p < _pattern.size()) { 00063 switch (_pattern[p]) { 00064 case '*': 00065 case '?': 00066 case '[': 00067 return prefix + _pattern.substr(q, p - q); 00068 00069 case '\\': 00070 // Skip over the backslash. 00071 prefix += _pattern.substr(q, p - q); 00072 ++p; 00073 q = p; 00074 } 00075 ++p; 00076 } 00077 return prefix += _pattern.substr(q, p - q); 00078 } 00079 00080 //////////////////////////////////////////////////////////////////// 00081 // Function: GlobPattern::match_files 00082 // Access: Published 00083 // Description: Treats the GlobPattern as a filename pattern, and 00084 // returns a list of any actual files that match the 00085 // pattern. This is the behavior of the standard Posix 00086 // glob() function. Any part of the filename may 00087 // contain glob characters, including intermediate 00088 // directory names. 00089 // 00090 // If cwd is specified, it is the directory that 00091 // relative filenames are taken to be relative to; 00092 // otherwise, the actual current working directory is 00093 // assumed. 00094 // 00095 // The return value is the number of files matched, 00096 // which are added to the results vector. 00097 //////////////////////////////////////////////////////////////////// 00098 int GlobPattern:: 00099 match_files(vector_string &results, const Filename &cwd) const { 00100 string prefix, pattern, suffix; 00101 00102 string source = _pattern; 00103 if (!source.empty() && source[0] == '/') { 00104 // If the first character is a slash, that becomes the prefix. 00105 prefix = "/"; 00106 source = source.substr(1); 00107 } 00108 00109 size_t slash = source.find('/'); 00110 if (slash == string::npos) { 00111 pattern = source; 00112 } else { 00113 pattern = source.substr(0, slash); 00114 suffix = source.substr(slash + 1); 00115 } 00116 00117 GlobPattern glob(pattern); 00118 return glob.r_match_files(prefix, suffix, results, cwd); 00119 } 00120 00121 #ifdef HAVE_PYTHON 00122 //////////////////////////////////////////////////////////////////// 00123 // Function: GlobPattern::match_files 00124 // Access: Published 00125 // Description: This variant on match_files returns a Python list 00126 // of strings. 00127 //////////////////////////////////////////////////////////////////// 00128 PyObject *GlobPattern:: 00129 match_files(const Filename &cwd) const { 00130 vector_string contents; 00131 match_files(contents, cwd); 00132 00133 PyObject *result = PyList_New(contents.size()); 00134 for (size_t i = 0; i < contents.size(); ++i) { 00135 const string &filename = contents[i]; 00136 PyObject *str = PyString_FromStringAndSize(filename.data(), filename.size()); 00137 PyList_SET_ITEM(result, i, str); 00138 } 00139 00140 return result; 00141 } 00142 #endif // HAVE_PYTHON 00143 00144 //////////////////////////////////////////////////////////////////// 00145 // Function: GlobPattern::r_match_files 00146 // Access: Private 00147 // Description: The recursive implementation of match_files(). 00148 //////////////////////////////////////////////////////////////////// 00149 int GlobPattern:: 00150 r_match_files(const Filename &prefix, const string &suffix, 00151 vector_string &results, const Filename &cwd) { 00152 string next_pattern, next_suffix; 00153 00154 size_t slash = suffix.find('/'); 00155 if (slash == string::npos) { 00156 next_pattern = suffix; 00157 } else { 00158 next_pattern = suffix.substr(0, slash); 00159 next_suffix = suffix.substr(slash + 1); 00160 } 00161 00162 Filename parent_dir; 00163 if (prefix.is_local() && !cwd.empty()) { 00164 parent_dir = Filename(cwd, prefix); 00165 } else { 00166 parent_dir = prefix; 00167 } 00168 00169 GlobPattern next_glob(next_pattern); 00170 00171 if (!has_glob_characters()) { 00172 // If there are no special characters in the pattern, it's a 00173 // literal match. 00174 if (suffix.empty()) { 00175 // Time to stop. 00176 Filename single_filename(parent_dir, _pattern); 00177 if (single_filename.exists()) { 00178 results.push_back(Filename(prefix, _pattern)); 00179 return 1; 00180 } 00181 return 0; 00182 } 00183 00184 return next_glob.r_match_files(Filename(prefix, _pattern), 00185 next_suffix, results, cwd); 00186 00187 } 00188 00189 // If there *are* special glob characters, we must attempt to 00190 // match the pattern against the files in this directory. 00191 00192 vector_string dir_files; 00193 if (!parent_dir.scan_directory(dir_files)) { 00194 // Not a directory, or unable to read directory; stop here. 00195 return 0; 00196 } 00197 00198 // Now go through each file in the directory looking for one that 00199 // matches the pattern. 00200 int num_matched = 0; 00201 00202 vector_string::const_iterator fi; 00203 for (fi = dir_files.begin(); fi != dir_files.end(); ++fi) { 00204 const string &local_file = (*fi); 00205 if (_pattern[0] == '.' || (local_file.empty() || local_file[0] != '.')) { 00206 if (matches(local_file)) { 00207 // We have a match; continue. 00208 if (suffix.empty()) { 00209 results.push_back(Filename(prefix, local_file)); 00210 num_matched++; 00211 } else { 00212 num_matched += next_glob.r_match_files(Filename(prefix, local_file), 00213 next_suffix, results, cwd); 00214 } 00215 } 00216 } 00217 } 00218 00219 return num_matched; 00220 } 00221 00222 //////////////////////////////////////////////////////////////////// 00223 // Function: GlobPattern::matches_substr 00224 // Access: Private 00225 // Description: The recursive implementation of matches(). This 00226 // returns true if the pattern substring [pi, pend) 00227 // matches the candidate substring [ci, cend), false 00228 // otherwise. 00229 //////////////////////////////////////////////////////////////////// 00230 bool GlobPattern:: 00231 matches_substr(string::const_iterator pi, string::const_iterator pend, 00232 string::const_iterator ci, string::const_iterator cend) const { 00233 // If we run out of pattern or candidate string, it's a match only 00234 // if they both ran out at the same time. 00235 if (pi == pend || ci == cend) { 00236 // A special exception: we allow ci to reach the end before pi, 00237 // only if pi is one character before the end and that last 00238 // character is '*'. 00239 if ((ci == cend) && (std::distance(pi, pend) == 1) && (*pi) == '*') { 00240 return true; 00241 } 00242 return (pi == pend && ci == cend); 00243 } 00244 00245 switch (*pi) { 00246 00247 case '*': 00248 // A '*' in the pattern string means to match any sequence of zero 00249 // or more characters in the candidate string. This means we have 00250 // to recurse twice: either consume one character of the candidate 00251 // string and continue to try matching the *, or stop trying to 00252 // match the * here. 00253 if (_nomatch_chars.find(*ci) == string::npos) { 00254 return 00255 matches_substr(pi, pend, ci + 1, cend) || 00256 matches_substr(pi + 1, pend, ci, cend); 00257 } else { 00258 // On the other hand, if this is one of the nomatch chars, we 00259 // can only stop here. 00260 return matches_substr(pi + 1, pend, ci, cend); 00261 } 00262 00263 case '?': 00264 // A '?' in the pattern string means to match exactly one 00265 // character in the candidate string. That's easy. 00266 return matches_substr(pi + 1, pend, ci + 1, cend); 00267 00268 case '[': 00269 // An open square bracket begins a set. 00270 ++pi; 00271 if ((*pi) == '!') { 00272 ++pi; 00273 if (matches_set(pi, pend, *ci)) { 00274 return false; 00275 } 00276 } else { 00277 if (!matches_set(pi, pend, *ci)) { 00278 return false; 00279 } 00280 } 00281 if (pi == pend) { 00282 // Oops, there wasn't a closing square bracket. 00283 return false; 00284 } 00285 return matches_substr(pi + 1, pend, ci + 1, cend); 00286 00287 case '\\': 00288 // A backslash escapes the next special character. 00289 ++pi; 00290 if (pi == pend) { 00291 return false; 00292 } 00293 // fall through. 00294 00295 default: 00296 // Anything else means to match exactly that. 00297 if (_case_sensitive) { 00298 if ((*pi) != (*ci)) { 00299 return false; 00300 } 00301 } else { 00302 if (tolower(*pi) != tolower(*ci)) { 00303 return false; 00304 } 00305 } 00306 return matches_substr(pi + 1, pend, ci + 1, cend); 00307 } 00308 } 00309 00310 00311 //////////////////////////////////////////////////////////////////// 00312 // Function: GlobPattern::matches_set 00313 // Access: Private 00314 // Description: Called when an unescaped open square bracked is 00315 // scanned, this is called with pi positioned after the 00316 // opening square bracket, scans the set sequence, 00317 // leaving pi positioned on the closing square bracket, 00318 // and returns true if the indicated character matches 00319 // the set of characters indicated, false otherwise. 00320 //////////////////////////////////////////////////////////////////// 00321 bool GlobPattern:: 00322 matches_set(string::const_iterator &pi, string::const_iterator pend, 00323 char ch) const { 00324 bool matched = false; 00325 00326 while (pi != pend && (*pi) != ']') { 00327 if ((*pi) == '\\') { 00328 // Backslash escapes the next character. 00329 ++pi; 00330 if (pi == pend) { 00331 return false; 00332 } 00333 } 00334 00335 if (ch == (*pi)) { 00336 matched = true; 00337 } 00338 00339 // Maybe it's an a-z style range? 00340 char start = (*pi); 00341 ++pi; 00342 if (pi != pend && (*pi) == '-') { 00343 ++pi; 00344 if (pi != pend && (*pi) != ']') { 00345 // Yes, we have a range: start-end. 00346 00347 if ((*pi) == '\\') { 00348 // Backslash escapes. 00349 ++pi; 00350 if (pi == pend) { 00351 return false; 00352 } 00353 } 00354 00355 char end = (*pi); 00356 ++pi; 00357 00358 if (ch >= start && ch <= end || 00359 (!_case_sensitive && 00360 ((tolower(ch) >= start && tolower(ch) <= end) || 00361 (toupper(ch) >= start && toupper(ch) <= end)))) { 00362 matched = true; 00363 } 00364 } else { 00365 // This was a - at the end of the string. 00366 if (ch == '-') { 00367 matched = true; 00368 } 00369 } 00370 } 00371 } 00372 00373 return matched; 00374 } 00375 00376 00377