Panda3D
|
00001 // Filename: globPattern.cxx 00002 // Created by: drose (30May00) 00003 // 00004 //////////////////////////////////////////////////////////////////// 00005 // 00006 // PANDA 3D SOFTWARE 00007 // Copyright (c) Carnegie Mellon University. All rights reserved. 00008 // 00009 // All use of this software is subject to the terms of the revised BSD 00010 // license. You should have received a copy of this license along 00011 // with this source code in a file named "LICENSE." 00012 // 00013 //////////////////////////////////////////////////////////////////// 00014 00015 #include "globPattern.h" 00016 #include <ctype.h> 00017 00018 //////////////////////////////////////////////////////////////////// 00019 // Function: GlobPattern::has_glob_characters 00020 // Access: Public 00021 // Description: Returns true if the pattern includes any special 00022 // globbing characters, or false if it is just a literal 00023 // string. 00024 //////////////////////////////////////////////////////////////////// 00025 bool GlobPattern:: 00026 has_glob_characters() const { 00027 string::const_iterator pi; 00028 pi = _pattern.begin(); 00029 while (pi != _pattern.end()) { 00030 switch (*pi) { 00031 case '*': 00032 case '?': 00033 case '[': 00034 return true; 00035 00036 case '\\': 00037 ++pi; 00038 if (pi == _pattern.end()) { 00039 return false; 00040 } 00041 } 00042 ++pi; 00043 } 00044 return false; 00045 } 00046 00047 //////////////////////////////////////////////////////////////////// 00048 // Function: GlobPattern::get_const_prefix 00049 // Access: Public 00050 // Description: Returns the initial part of the pattern before the 00051 // first glob character. Since many glob patterns begin 00052 // with a sequence of static characters and end with one 00053 // or more glob characters, this can be used to 00054 // optimized searches through sorted indices. 00055 //////////////////////////////////////////////////////////////////// 00056 string GlobPattern:: 00057 get_const_prefix() const { 00058 string prefix; 00059 00060 size_t p = 0; // current point 00061 size_t q = 0; // starting point 00062 while (p < _pattern.size()) { 00063 switch (_pattern[p]) { 00064 case '*': 00065 case '?': 00066 case '[': 00067 return prefix + _pattern.substr(q, p - q); 00068 00069 case '\\': 00070 // Skip over the backslash. 00071 prefix += _pattern.substr(q, p - q); 00072 ++p; 00073 q = p; 00074 } 00075 ++p; 00076 } 00077 return prefix += _pattern.substr(q, p - q); 00078 } 00079 00080 //////////////////////////////////////////////////////////////////// 00081 // Function: GlobPattern::match_files 00082 // Access: Public 00083 // Description: Treats the GlobPattern as a filename pattern, and 00084 // returns a list of any actual files that match the 00085 // pattern. This is the behavior of the standard Posix 00086 // glob() function. Any part of the filename may 00087 // contain glob characters, including intermediate 00088 // directory names. 00089 // 00090 // If cwd is specified, it is the directory that 00091 // relative filenames are taken to be relative to; 00092 // otherwise, the actual current working directory is 00093 // assumed. 00094 // 00095 // The return value is the number of files matched, 00096 // which are added to the results vector. 00097 //////////////////////////////////////////////////////////////////// 00098 int GlobPattern:: 00099 match_files(vector_string &results, const Filename &cwd) { 00100 string prefix, pattern, suffix; 00101 00102 string source = _pattern; 00103 if (!source.empty() && source[0] == '/') { 00104 // If the first character is a slash, that becomes the prefix. 00105 prefix = "/"; 00106 source = source.substr(1); 00107 } 00108 00109 size_t slash = source.find('/'); 00110 if (slash == string::npos) { 00111 pattern = source; 00112 } else { 00113 pattern = source.substr(0, slash); 00114 suffix = source.substr(slash + 1); 00115 } 00116 00117 GlobPattern glob(pattern); 00118 return glob.r_match_files(prefix, suffix, results, cwd); 00119 } 00120 00121 //////////////////////////////////////////////////////////////////// 00122 // Function: GlobPattern::r_match_files 00123 // Access: Private 00124 // Description: The recursive implementation of match_files(). 00125 //////////////////////////////////////////////////////////////////// 00126 int GlobPattern:: 00127 r_match_files(const Filename &prefix, const string &suffix, 00128 vector_string &results, const Filename &cwd) { 00129 string next_pattern, next_suffix; 00130 00131 size_t slash = suffix.find('/'); 00132 if (slash == string::npos) { 00133 next_pattern = suffix; 00134 } else { 00135 next_pattern = suffix.substr(0, slash); 00136 next_suffix = suffix.substr(slash + 1); 00137 } 00138 00139 Filename parent_dir; 00140 if (prefix.is_local() && !cwd.empty()) { 00141 parent_dir = Filename(cwd, prefix); 00142 } else { 00143 parent_dir = prefix; 00144 } 00145 00146 GlobPattern next_glob(next_pattern); 00147 00148 if (!has_glob_characters()) { 00149 // If there are no special characters in the pattern, it's a 00150 // literal match. 00151 if (suffix.empty()) { 00152 // Time to stop. 00153 Filename single_filename(parent_dir, _pattern); 00154 if (single_filename.exists()) { 00155 results.push_back(Filename(prefix, _pattern)); 00156 return 1; 00157 } 00158 return 0; 00159 } 00160 00161 return next_glob.r_match_files(Filename(prefix, _pattern), 00162 next_suffix, results, cwd); 00163 00164 } 00165 00166 // If there *are* special glob characters, we must attempt to 00167 // match the pattern against the files in this directory. 00168 00169 vector_string dir_files; 00170 if (!parent_dir.scan_directory(dir_files)) { 00171 // Not a directory, or unable to read directory; stop here. 00172 return 0; 00173 } 00174 00175 // Now go through each file in the directory looking for one that 00176 // matches the pattern. 00177 int num_matched = 0; 00178 00179 vector_string::const_iterator fi; 00180 for (fi = dir_files.begin(); fi != dir_files.end(); ++fi) { 00181 const string &local_file = (*fi); 00182 if (_pattern[0] == '.' || (local_file.empty() || local_file[0] != '.')) { 00183 if (matches(local_file)) { 00184 // We have a match; continue. 00185 if (suffix.empty()) { 00186 results.push_back(Filename(prefix, local_file)); 00187 num_matched++; 00188 } else { 00189 num_matched += next_glob.r_match_files(Filename(prefix, local_file), 00190 next_suffix, results, cwd); 00191 } 00192 } 00193 } 00194 } 00195 00196 return num_matched; 00197 } 00198 00199 //////////////////////////////////////////////////////////////////// 00200 // Function: GlobPattern::matches_substr 00201 // Access: Private 00202 // Description: The recursive implementation of matches(). This 00203 // returns true if the pattern substring [pi, pend) 00204 // matches the candidate substring [ci, cend), false 00205 // otherwise. 00206 //////////////////////////////////////////////////////////////////// 00207 bool GlobPattern:: 00208 matches_substr(string::const_iterator pi, string::const_iterator pend, 00209 string::const_iterator ci, string::const_iterator cend) const { 00210 // If we run out of pattern or candidate string, it's a match only 00211 // if they both ran out at the same time. 00212 if (pi == pend || ci == cend) { 00213 // A special exception: we allow ci to reach the end before pi, 00214 // only if pi is one character before the end and that last 00215 // character is '*'. 00216 if ((ci == cend) && (std::distance(pi, pend) == 1) && (*pi) == '*') { 00217 return true; 00218 } 00219 return (pi == pend && ci == cend); 00220 } 00221 00222 switch (*pi) { 00223 00224 case '*': 00225 // A '*' in the pattern string means to match any sequence of zero 00226 // or more characters in the candidate string. This means we have 00227 // to recurse twice: either consume one character of the candidate 00228 // string and continue to try matching the *, or stop trying to 00229 // match the * here. 00230 if (_nomatch_chars.find(*ci) == string::npos) { 00231 return 00232 matches_substr(pi, pend, ci + 1, cend) || 00233 matches_substr(pi + 1, pend, ci, cend); 00234 } else { 00235 // On the other hand, if this is one of the nomatch chars, we 00236 // can only stop here. 00237 return matches_substr(pi + 1, pend, ci, cend); 00238 } 00239 00240 case '?': 00241 // A '?' in the pattern string means to match exactly one 00242 // character in the candidate string. That's easy. 00243 return matches_substr(pi + 1, pend, ci + 1, cend); 00244 00245 case '[': 00246 // An open square bracket begins a set. 00247 ++pi; 00248 if ((*pi) == '!') { 00249 ++pi; 00250 if (matches_set(pi, pend, *ci)) { 00251 return false; 00252 } 00253 } else { 00254 if (!matches_set(pi, pend, *ci)) { 00255 return false; 00256 } 00257 } 00258 if (pi == pend) { 00259 // Oops, there wasn't a closing square bracket. 00260 return false; 00261 } 00262 return matches_substr(pi + 1, pend, ci + 1, cend); 00263 00264 case '\\': 00265 // A backslash escapes the next special character. 00266 ++pi; 00267 if (pi == pend) { 00268 return false; 00269 } 00270 // fall through. 00271 00272 default: 00273 // Anything else means to match exactly that. 00274 if (_case_sensitive) { 00275 if ((*pi) != (*ci)) { 00276 return false; 00277 } 00278 } else { 00279 if (tolower(*pi) != tolower(*ci)) { 00280 return false; 00281 } 00282 } 00283 return matches_substr(pi + 1, pend, ci + 1, cend); 00284 } 00285 } 00286 00287 00288 //////////////////////////////////////////////////////////////////// 00289 // Function: GlobPattern::matches_set 00290 // Access: Private 00291 // Description: Called when an unescaped open square bracked is 00292 // scanned, this is called with pi positioned after the 00293 // opening square bracket, scans the set sequence, 00294 // leaving pi positioned on the closing square bracket, 00295 // and returns true if the indicated character matches 00296 // the set of characters indicated, false otherwise. 00297 //////////////////////////////////////////////////////////////////// 00298 bool GlobPattern:: 00299 matches_set(string::const_iterator &pi, string::const_iterator pend, 00300 char ch) const { 00301 bool matched = false; 00302 00303 while (pi != pend && (*pi) != ']') { 00304 if ((*pi) == '\\') { 00305 // Backslash escapes the next character. 00306 ++pi; 00307 if (pi == pend) { 00308 return false; 00309 } 00310 } 00311 00312 if (ch == (*pi)) { 00313 matched = true; 00314 } 00315 00316 // Maybe it's an a-z style range? 00317 char start = (*pi); 00318 ++pi; 00319 if (pi != pend && (*pi) == '-') { 00320 ++pi; 00321 if (pi != pend && (*pi) != ']') { 00322 // Yes, we have a range: start-end. 00323 00324 if ((*pi) == '\\') { 00325 // Backslash escapes. 00326 ++pi; 00327 if (pi == pend) { 00328 return false; 00329 } 00330 } 00331 00332 char end = (*pi); 00333 ++pi; 00334 00335 if (ch >= start && ch <= end || 00336 (!_case_sensitive && 00337 ((tolower(ch) >= start && tolower(ch) <= end) || 00338 (toupper(ch) >= start && toupper(ch) <= end)))) { 00339 matched = true; 00340 } 00341 } else { 00342 // This was a - at the end of the string. 00343 if (ch == '-') { 00344 matched = true; 00345 } 00346 } 00347 } 00348 } 00349 00350 return matched; 00351 } 00352 00353 00354