Panda3D

patchfile.cxx

00001 // Filename: patchfile.cxx
00002 // Created by:  darren, mike (09Jan97)
00003 //
00004 ////////////////////////////////////////////////////////////////////
00005 //
00006 // PANDA 3D SOFTWARE
00007 // Copyright (c) Carnegie Mellon University.  All rights reserved.
00008 //
00009 // All use of this software is subject to the terms of the revised BSD
00010 // license.  You should have received a copy of this license along
00011 // with this source code in a file named "LICENSE."
00012 //
00013 ////////////////////////////////////////////////////////////////////
00014 
00015 #include "pandabase.h"
00016 
00017 #ifdef HAVE_OPENSSL
00018 
00019 #include "config_express.h"
00020 #include "error_utils.h"
00021 #include "patchfile.h"
00022 #include "streamReader.h"
00023 #include "streamWriter.h"
00024 #include "multifile.h"
00025 #include "hashVal.h"
00026 #include "virtualFileSystem.h"
00027 
00028 #include <string.h>  // for strstr
00029 
00030 #ifdef HAVE_TAR
00031 #include "libtar.h"
00032 #include <fcntl.h>  // for O_RDONLY
00033 #endif  // HAVE_TAR
00034 
00035 #ifdef HAVE_TAR
00036 istream *Patchfile::_tar_istream = NULL;
00037 #endif  // HAVE_TAR
00038 
00039 ////////////////////////////////////////////////////////////////////
00040 
00041 // this actually slows things down...
00042 //#define USE_MD5_FOR_HASHTABLE_INDEX_VALUES
00043 
00044 // Patch File Format ///////////////////////////////////////////////
00045 ///// IF THIS CHANGES, UPDATE installerApplyPatch.cxx IN THE INSTALLER
00046 ////////////////////////////////////////////////////////////////////
00047 // [ HEADER ]
00048 //   4 bytes  0xfeebfaac ("magic number")
00049 //            (older patch files have a magic number 0xfeebfaab,
00050 //            indicating they are version number 0.)
00051 //   2 bytes  version number (if magic number == 0xfeebfaac)
00052 //   4 bytes  length of starting file (if version >= 1)
00053 //  16 bytes  MD5 of starting file    (if version >= 1)
00054 //   4 bytes  length of resulting patched file
00055 //  16 bytes  MD5 of resultant patched file
00056 
00057 // Note that MD5 hashes are written in the order observed by
00058 // HashVal::read_stream() and HashVal::write_stream(), which is not
00059 // the normal linear order.  (Each group of four bytes is reversed.)
00060 
00061 const int _v0_header_length = 4 + 4 + 16;
00062 const int _v1_header_length = 4 + 2 + 4 + 16 + 4 + 16;
00063 //
00064 // [ ADD/COPY pairs; repeated N times ]
00065 //   2 bytes  AL = ADD length
00066 //  AL bytes  bytes to add
00067 //   2 bytes  CL = COPY length
00068 //   4 bytes  offset of data to copy from original file, if CL != 0.
00069 //            If version >= 2, offset is relative to end of previous
00070 //            copy block; if version < 2, offset is relative to
00071 //            beginning of file.
00072 //
00073 // [ TERMINATOR ]
00074 //   2 bytes  zero-length ADD
00075 //   2 bytes  zero-length COPY
00076 ////////////////////////////////////////////////////////////////////
00077 ////////////////////////////////////////////////////////////////////
00078 
00079 ////////////////////////////////////////////////////////////////////
00080 // Defines
00081 ////////////////////////////////////////////////////////////////////
00082 const PN_uint32 Patchfile::_v0_magic_number = 0xfeebfaab;
00083 const PN_uint32 Patchfile::_magic_number = 0xfeebfaac;
00084 
00085 // Created version 1 on 11/2/02 to store length and MD5 of original file.
00086 // To version 2 on 11/2/02 to store copy offsets as relative.
00087 const PN_uint16 Patchfile::_current_version = 2;
00088 
00089 const PN_uint32 Patchfile::_HASH_BITS = 24;
00090 const PN_uint32 Patchfile::_HASHTABLESIZE = PN_uint32(1) << Patchfile::_HASH_BITS;
00091 const PN_uint32 Patchfile::_DEFAULT_FOOTPRINT_LENGTH = 9; // this produced the smallest patch file for libpanda.dll when tested, 12/20/2000
00092 const PN_uint32 Patchfile::_NULL_VALUE = PN_uint32(0) - 1;
00093 const PN_uint32 Patchfile::_MAX_RUN_LENGTH = (PN_uint32(1) << 16) - 1;
00094 const PN_uint32 Patchfile::_HASH_MASK = (PN_uint32(1) << Patchfile::_HASH_BITS) - 1;
00095 
00096 ////////////////////////////////////////////////////////////////////
00097 //     Function: Patchfile::Constructor
00098 //       Access: Public
00099 //  Description: Create a patch file and initializes internal data
00100 ////////////////////////////////////////////////////////////////////
00101 Patchfile::
00102 Patchfile() {
00103   PT(Buffer) buffer = new Buffer(patchfile_buffer_size);
00104   init(buffer);
00105 }
00106 
00107 ////////////////////////////////////////////////////////////////////
00108 //     Function: Patchfile::Constructor
00109 //       Access: Public
00110 //  Description: Create patch file with buffer to patch
00111 ////////////////////////////////////////////////////////////////////
00112 Patchfile::
00113 Patchfile(PT(Buffer) buffer) {
00114   init(buffer);
00115 }
00116 
00117 ////////////////////////////////////////////////////////////////////
00118 //     Function: Patchfile::init
00119 //       Access: Private
00120 //  Description:
00121 ////////////////////////////////////////////////////////////////////
00122 void Patchfile::
00123 init(PT(Buffer) buffer) {
00124   _rename_output_to_orig = false;
00125   _delete_patchfile = false;
00126   _hash_table = NULL;
00127   _initiated = false;
00128   nassertv(!buffer.is_null());
00129   _buffer = buffer;
00130 
00131   _version_number = 0;
00132   _allow_multifile = true;
00133 
00134   _patch_stream = NULL;
00135   _origfile_stream = NULL;
00136 
00137   reset_footprint_length();
00138 }
00139 
00140 ////////////////////////////////////////////////////////////////////
00141 //     Function: Patchfile::Destructor
00142 //       Access: Public
00143 //  Description:
00144 ////////////////////////////////////////////////////////////////////
00145 Patchfile::
00146 ~Patchfile() {
00147   if (_hash_table != (PN_uint32 *)NULL) {
00148     PANDA_FREE_ARRAY(_hash_table);
00149   }
00150 
00151   if (_initiated) {
00152     cleanup();
00153   }
00154 
00155   nassertv(_patch_stream == NULL);
00156   nassertv(_origfile_stream == NULL);
00157 }
00158 
00159 ////////////////////////////////////////////////////////////////////
00160 //     Function: Patchfile::cleanup
00161 //       Access: Private
00162 //  Description: Closes and clean up internal data structures
00163 ////////////////////////////////////////////////////////////////////
00164 void Patchfile::
00165 cleanup() {
00166   if (!_initiated) {
00167     express_cat.error()
00168       << "Patchfile::cleanup() - Patching has not been initiated"
00169       << endl;
00170     return;
00171   }
00172 
00173   // close files
00174   VirtualFileSystem *vfs = VirtualFileSystem::get_global_ptr();
00175   if (_origfile_stream != NULL) {
00176     vfs->close_read_file(_origfile_stream);
00177     _origfile_stream = NULL;
00178   }
00179   if (_patch_stream != NULL) {
00180     vfs->close_read_file(_patch_stream);
00181     _patch_stream = NULL;
00182   }
00183   _write_stream.close();
00184 
00185   _initiated = false;
00186 }
00187 
00188 ////////////////////////////////////////////////////////////////////
00189 ///// PATCH FILE APPLY MEMBER FUNCTIONS
00190 /////
00191 ////////////////////
00192 ///// NOTE: this patch-application functionality unfortunately has to be
00193 /////       duplicated in the Installer. It is contained in the file
00194 /////       installerApplyPatch.cxx
00195 /////       PLEASE MAKE SURE THAT THAT FILE GETS UPDATED IF ANY OF THIS
00196 /////       LOGIC CHANGES! (i.e. if the patch file format changes)
00197 ////////////////////
00198 ////////////////////////////////////////////////////////////////////
00199 
00200 ////////////////////////////////////////////////////////////////////
00201 //     Function: Patchfile::initiate
00202 //       Access: Published
00203 //  Description: Set up to apply the patch to the file (original
00204 //               file and patch are destroyed in the process).
00205 ////////////////////////////////////////////////////////////////////
00206 int Patchfile::
00207 initiate(const Filename &patch_file, const Filename &file) {
00208   int result = initiate(patch_file, file, Filename::temporary("", "patch_"));
00209   _rename_output_to_orig = true;
00210   _delete_patchfile = !keep_temporary_files;
00211   return result;
00212 }
00213 
00214 ////////////////////////////////////////////////////////////////////
00215 //     Function: Patchfile::initiate
00216 //       Access: Published
00217 //  Description: Set up to apply the patch to the file.  In this form,
00218 //               neither the original file nor the patch file are
00219 //               destroyed.
00220 ////////////////////////////////////////////////////////////////////
00221 int Patchfile::
00222 initiate(const Filename &patch_file, const Filename &orig_file,
00223          const Filename &target_file) {
00224   if (_initiated) {
00225     express_cat.error()
00226       << "Patchfile::initiate() - Patching has already been initiated"
00227       << endl;
00228     return EU_error_abort;
00229   }
00230 
00231   nassertr(orig_file != target_file, EU_error_abort);
00232 
00233   VirtualFileSystem *vfs = VirtualFileSystem::get_global_ptr();
00234 
00235   // Open the original file for read
00236   nassertr(_origfile_stream == NULL, EU_error_abort);
00237   _orig_file = orig_file;
00238   _orig_file.set_binary();
00239   _origfile_stream = vfs->open_read_file(_orig_file, false);
00240   if (_origfile_stream == NULL) {
00241     express_cat.error()
00242       << "Patchfile::initiate() - Failed to open file: " << _orig_file << endl;
00243     return get_write_error();
00244   }
00245 
00246   // Open the temp file for write
00247   _output_file = target_file;
00248   _output_file.set_binary();
00249   if (!_output_file.open_write(_write_stream)) {
00250     express_cat.error()
00251       << "Patchfile::initiate() - Failed to open file: " << _output_file << endl;
00252     return get_write_error();
00253   }
00254 
00255   if (express_cat.is_debug()) {
00256     express_cat.debug()
00257       << "Patchfile using output file " << _output_file << "\n";
00258   }
00259 
00260   int result = internal_read_header(patch_file);
00261   _total_bytes_processed = 0;
00262 
00263   _initiated = true;
00264   return result;
00265 }
00266 
00267 ////////////////////////////////////////////////////////////////////
00268 //     Function: Patchfile::read_header
00269 //       Access: Published
00270 //  Description: Opens the patch file for reading, and gets the header
00271 //               information from the file but does not begin to do
00272 //               any real work.  This can be used to query the data
00273 //               stored in the patch.
00274 ////////////////////////////////////////////////////////////////////
00275 int Patchfile::
00276 read_header(const Filename &patch_file) {
00277   if (_initiated) {
00278     express_cat.error()
00279       << "Patchfile::initiate() - Patching has already been initiated"
00280       << endl;
00281     return EU_error_abort;
00282   }
00283 
00284   int result = internal_read_header(patch_file);
00285   if (_patch_stream != NULL) {
00286     VirtualFileSystem *vfs = VirtualFileSystem::get_global_ptr();
00287     vfs->close_read_file(_patch_stream);
00288     _patch_stream = NULL;
00289   }
00290   return result;
00291 }
00292 
00293 ////////////////////////////////////////////////////////////////////
00294 //     Function: Patchfile::run
00295 //       Access: Published
00296 //  Description: Perform one buffer's worth of patching
00297 //               Returns EU_ok while patching
00298 //               Returns EU_success when done
00299 //               If error happens will return one of:
00300 //               EU_error_abort : Patching has not been initiated
00301 //               EU_error_file_invalid : file is corrupted
00302 //               EU_error_invalid_checksum : incompatible patch file
00303 //               EU_error_write_file_rename : could not rename file
00304 ////////////////////////////////////////////////////////////////////
00305 int Patchfile::
00306 run() {
00307   // Now patch the file using the given buffer
00308   int buflen;
00309   int bytes_read;
00310   PN_uint16 ADD_length;
00311   PN_uint16 COPY_length;
00312   PN_int32 COPY_offset;
00313 
00314   if (_initiated == false) {
00315     express_cat.error()
00316       << "Patchfile::run() - Patching has not been initiated"
00317       << endl;
00318     return EU_error_abort;
00319   }
00320 
00321   nassertr(_patch_stream != NULL, EU_error_abort);
00322   nassertr(_origfile_stream != NULL, EU_error_abort);
00323   StreamReader patch_reader(*_patch_stream);
00324 
00325   buflen = _buffer->get_length();
00326   bytes_read = 0;
00327 
00328   while (bytes_read < buflen) {
00329     ///////////
00330     // read # of ADD bytes
00331     nassertr(_buffer->get_length() >= (int)sizeof(ADD_length), false);
00332     ADD_length = patch_reader.get_uint16();
00333     if (_patch_stream->fail()) {
00334       express_cat.error()
00335         << "Truncated patch file.\n";
00336       return EU_error_file_invalid;
00337     }
00338 
00339     bytes_read += (int)ADD_length;
00340     _total_bytes_processed += (int)ADD_length;
00341     if (_total_bytes_processed > _total_bytes_to_process) {
00342       express_cat.error()
00343         << "Runaway patch file.\n";
00344       return EU_error_file_invalid;
00345     }
00346 
00347     // if there are bytes to add, read them from patch file and write them to output
00348     if (express_cat.is_spam() && ADD_length != 0) {
00349       express_cat.spam()
00350         << "ADD: " << ADD_length << " (to "
00351         << _write_stream.tellp() << ")" << endl;
00352     }
00353 
00354     PN_uint32 bytes_left = (PN_uint32)ADD_length;
00355     while (bytes_left > 0) {
00356       PN_uint32 bytes_this_time = (PN_uint32) min(bytes_left, (PN_uint32) buflen);
00357       _patch_stream->read(_buffer->_buffer, bytes_this_time);
00358       if (_patch_stream->fail()) {
00359         express_cat.error()
00360           << "Truncated patch file.\n";
00361         return EU_error_file_invalid;
00362       }
00363       _write_stream.write(_buffer->_buffer, bytes_this_time);
00364       bytes_left -= bytes_this_time;
00365     }
00366 
00367     ///////////
00368     // read # of COPY bytes
00369     nassertr(_buffer->get_length() >= (int)sizeof(COPY_length), false);
00370     COPY_length = patch_reader.get_uint16();
00371     if (_patch_stream->fail()) {
00372       express_cat.error()
00373         << "Truncated patch file.\n";
00374       return EU_error_file_invalid;
00375     }
00376 
00377     bytes_read += (int)COPY_length;
00378     _total_bytes_processed += (int)COPY_length;
00379     if (_total_bytes_processed > _total_bytes_to_process) {
00380       express_cat.error()
00381         << "Runaway patch file.\n";
00382       return EU_error_file_invalid;
00383     }
00384 
00385     // if there are bytes to copy, read them from original file and write them to output
00386     if (0 != COPY_length) {
00387       // read copy offset
00388       nassertr(_buffer->get_length() >= (int)sizeof(COPY_offset), false);
00389       COPY_offset = patch_reader.get_int32();
00390       if (_patch_stream->fail()) {
00391         express_cat.error()
00392           << "Truncated patch file.\n";
00393         return EU_error_file_invalid;
00394       }
00395 
00396       // seek to the copy source pos
00397       if (_version_number < 2) {
00398         _origfile_stream->seekg(COPY_offset, ios::beg);
00399       } else {
00400         _origfile_stream->seekg(COPY_offset, ios::cur);
00401       }
00402       if (_origfile_stream->fail()) {
00403         express_cat.error()
00404           << "Invalid copy offset in patch file.\n";
00405         return EU_error_file_invalid;
00406       }
00407 
00408       if (express_cat.is_spam()) {
00409         express_cat.spam()
00410           << "COPY: " << COPY_length << " bytes from offset "
00411           << COPY_offset << " (from " << _origfile_stream->tellg()
00412           << " to " << _write_stream.tellp() << ")"
00413           << endl;
00414       }
00415 
00416       // read the copy bytes from original file and write them to output
00417       PN_uint32 bytes_left = (PN_uint32)COPY_length;
00418 
00419       while (bytes_left > 0) {
00420         PN_uint32 bytes_this_time = (PN_uint32) min(bytes_left, (PN_uint32) buflen);
00421         _origfile_stream->read(_buffer->_buffer, bytes_this_time);
00422         if (_origfile_stream->fail()) {
00423           express_cat.error()
00424             << "Invalid copy length in patch file.\n";
00425           return EU_error_file_invalid;
00426         }
00427         _write_stream.write(_buffer->_buffer, bytes_this_time);
00428         bytes_left -= bytes_this_time;
00429       }
00430     }
00431 
00432     // if we got a pair of zero-length ADD and COPY blocks, we're done
00433     if ((0 == ADD_length) && (0 == COPY_length)) {
00434       cleanup();
00435 
00436       if (express_cat.is_debug()) {
00437         express_cat.debug()
00438           //<< "result file = " << _result_file_length
00439           << " total bytes = " << _total_bytes_processed << endl;
00440       }
00441 
00442       // check the MD5 from the patch file against the newly patched file
00443       {
00444         HashVal MD5_actual;
00445         MD5_actual.hash_file(_output_file);
00446         if (_MD5_ofResult != MD5_actual) {
00447           // Whoops, patching screwed up somehow.
00448           if (_origfile_stream != NULL) {
00449             VirtualFileSystem *vfs = VirtualFileSystem::get_global_ptr();
00450             vfs->close_read_file(_origfile_stream);
00451             _origfile_stream = NULL;
00452           }
00453           _write_stream.close();
00454 
00455           express_cat.info()
00456             << "Patching produced incorrect checksum.  Got:\n"
00457             << "    " << MD5_actual
00458             << "\nExpected:\n"
00459             << "    " << _MD5_ofResult
00460             << "\n";
00461 
00462           // This is a fine time to double-check the starting
00463           // checksum.
00464           if (!has_source_hash()) {
00465             express_cat.info()
00466               << "No source hash in patch file to verify.\n";
00467           } else {
00468             HashVal MD5_orig;
00469             MD5_orig.hash_file(_orig_file);
00470             if (MD5_orig != get_source_hash()) {
00471               express_cat.info()
00472                 << "Started from incorrect source file.  Got:\n"
00473                 << "    " << MD5_orig
00474                 << "\nExpected:\n"
00475                 << "    " << get_source_hash()
00476                 << "\n";
00477             } else {
00478               express_cat.info()
00479                 << "Started from correct source file:\n"
00480                 << "    " << MD5_orig
00481                 << "\n";
00482             }
00483           }
00484 
00485           // delete the temp file and the patch file
00486           if (_rename_output_to_orig) {
00487             _output_file.unlink();
00488           }
00489           if (_delete_patchfile) {
00490             _patch_file.unlink();
00491           }
00492           // return "invalid checksum"
00493           return EU_error_invalid_checksum;
00494         }
00495       }
00496 
00497       // delete the patch file
00498       if (_delete_patchfile) {
00499         _patch_file.unlink();
00500       }
00501 
00502       // rename the temp file to the original file name
00503       if (_rename_output_to_orig) {
00504         _orig_file.unlink();
00505         if (!_output_file.rename_to(_orig_file)) {
00506           express_cat.error()
00507             << "Patchfile::run() failed to rename temp file to: " << _orig_file
00508             << endl;
00509           return EU_error_write_file_rename;
00510         }
00511       }
00512 
00513       return EU_success;
00514     }
00515   }
00516 
00517   return EU_ok;
00518 }
00519 
00520 ////////////////////////////////////////////////////////////////////
00521 //     Function: Patchfile::apply
00522 //       Access: Public
00523 //  Description: Patches the entire file in one call
00524 //               returns true on success and false on error
00525 //
00526 //               This version will delete the patch file and overwrite
00527 //               the original file.
00528 ////////////////////////////////////////////////////////////////////
00529 bool Patchfile::
00530 apply(Filename &patch_file, Filename &file) {
00531   int ret = initiate(patch_file, file);
00532   if (ret < 0)
00533     return false;
00534   for (;;) {
00535     ret = run();
00536     if (ret == EU_success)
00537       return true;
00538     if (ret < 0)
00539       return false;
00540   }
00541   return false;
00542 }
00543 
00544 ////////////////////////////////////////////////////////////////////
00545 //     Function: Patchfile::apply
00546 //       Access: Public
00547 //  Description: Patches the entire file in one call
00548 //               returns true on success and false on error
00549 //
00550 //               This version will not delete any files.
00551 ////////////////////////////////////////////////////////////////////
00552 bool Patchfile::
00553 apply(Filename &patch_file, Filename &orig_file, const Filename &target_file) {
00554   int ret = initiate(patch_file, orig_file, target_file);
00555   if (ret < 0)
00556     return false;
00557   for (;;) {
00558     ret = run();
00559     if (ret == EU_success)
00560       return true;
00561     if (ret < 0)
00562       return false;
00563   }
00564   return false;
00565 }
00566 
00567 
00568 ////////////////////////////////////////////////////////////////////
00569 //     Function: Patchfile::internal_read_header
00570 //       Access: Private
00571 //  Description: Reads the header and leaves the patch file open.
00572 ////////////////////////////////////////////////////////////////////
00573 int Patchfile::
00574 internal_read_header(const Filename &patch_file) {
00575   // Open the patch file for read
00576   VirtualFileSystem *vfs = VirtualFileSystem::get_global_ptr();
00577   nassertr(_patch_stream == NULL, EU_error_abort);
00578   _patch_file = patch_file;
00579   _patch_file.set_binary();
00580   _patch_stream = vfs->open_read_file(_patch_file, true);
00581   if (_patch_stream == NULL) {
00582     express_cat.error()
00583       << "Patchfile::initiate() - Failed to open file: " << _patch_file << endl;
00584     return get_write_error();
00585   }
00586 
00587   /////////////
00588   // read header, make sure the patch file is valid
00589 
00590   StreamReader patch_reader(*_patch_stream);
00591 
00592   // check the magic number
00593   nassertr(_buffer->get_length() >= _v0_header_length, false);
00594   PN_uint32 magic_number = patch_reader.get_uint32();
00595   if (magic_number != _magic_number && magic_number != _v0_magic_number) {
00596     express_cat.error()
00597       << "Invalid patch file: " << _patch_file << endl;
00598     return EU_error_file_invalid;
00599   }
00600 
00601   _version_number = 0;
00602   if (magic_number != _v0_magic_number) {
00603     _version_number = patch_reader.get_uint16();
00604   }
00605   if (_version_number > _current_version) {
00606     express_cat.error()
00607       << "Can't read version " << _version_number << " patch files: "
00608       << _patch_file << endl;
00609     return EU_error_file_invalid;
00610   }
00611 
00612   if (_version_number >= 1) {
00613     // Get the length of the source file.
00614     /*PN_uint32 source_file_length =*/ patch_reader.get_uint32();
00615 
00616     // get the MD5 of the source file.
00617     _MD5_ofSource.read_stream(patch_reader);
00618   }
00619 
00620   // get the length of the patched result file
00621   _total_bytes_to_process = patch_reader.get_uint32();
00622 
00623   // get the MD5 of the resultant patched file
00624   _MD5_ofResult.read_stream(patch_reader);
00625 
00626   express_cat.debug()
00627     << "Patchfile::initiate() - valid patchfile" << endl;
00628 
00629   return EU_success;
00630 }
00631 
00632 ////////////////////////////////////////////////////////////////////
00633 ///// PATCH FILE BUILDING MEMBER FUNCTIONS
00634 ////////////////////////////////////////////////////////////////////
00635 
00636 ////////////////////////////////////////////////////////////////////
00637 //     Function: Patchfile::calc_hash
00638 //       Access: Private
00639 //  Description:
00640 ////////////////////////////////////////////////////////////////////
00641 PN_uint32 Patchfile::
00642 calc_hash(const char *buffer) {
00643 #ifdef USE_MD5_FOR_HASHTABLE_INDEX_VALUES
00644   HashVal hash;
00645   hash.hash_buffer(buffer, _footprint_length);
00646 
00647   //cout << PN_uint16(hash.get_value(0)) << " ";
00648 
00649   return PN_uint16(hash.get_value(0));
00650 #else
00651   PN_uint32 hash_value = 0;
00652 
00653   for(int i = 0; i < (int)_footprint_length; i++) {
00654     // this is probably not such a good hash. to be replaced
00655     /// --> TRIED MD5, was not worth it for the execution-time hit on 800Mhz PC
00656     hash_value ^= PN_uint32(*buffer) << ((i * 2) % Patchfile::_HASH_BITS);
00657     buffer++;
00658   }
00659 
00660   // use the bits that overflowed past the end of the hash bit range
00661   // (this is intended for _HASH_BITS == 24)
00662   hash_value ^= (hash_value >> Patchfile::_HASH_BITS);
00663 
00664   //cout << hash_value << " ";
00665 
00666   return hash_value & _HASH_MASK;
00667 #endif
00668 }
00669 
00670 ////////////////////////////////////////////////////////////////////
00671 //     Function: Patchfile::build_hash_link_tables
00672 //       Access: Private
00673 //  Description:
00674 //               The hash and link tables allow for a quick, linear
00675 //               search of all locations in the file that begin with
00676 //               a particular sequence of bytes, or "footprint."
00677 //
00678 //               The hash table is a table of offsets into the file,
00679 //               with one entry for every possible footprint hash
00680 //               value. For a hash of a footprint, the entry at the
00681 //               offset of the hash value provides an initial location
00682 //               in the file that has a matching footprint.
00683 //
00684 //               The link table is a large linked list of file offsets,
00685 //               with one entry for every byte in the file. Each offset
00686 //               in the link table will point to another offset that
00687 //               has the same footprint at the corresponding offset in the
00688 //               actual file. Starting with an offset taken from the hash
00689 //               table, one can rapidly produce a list of offsets that
00690 //               all have the same footprint.
00691 ////////////////////////////////////////////////////////////////////
00692 void Patchfile::
00693 build_hash_link_tables(const char *buffer_orig, PN_uint32 length_orig,
00694   PN_uint32 *hash_table, PN_uint32 *link_table) {
00695 
00696   PN_uint32 i;
00697 
00698   // clear hash table
00699   for(i = 0; i < _HASHTABLESIZE; i++) {
00700     hash_table[i] = _NULL_VALUE;
00701   }
00702 
00703   // clear link table
00704   for(i = 0; i < length_orig; i++) {
00705     link_table[i] = _NULL_VALUE;
00706   }
00707 
00708   if(length_orig < _footprint_length) return;
00709 
00710   // run through original file and hash each footprint
00711   for(i = 0; i < (length_orig - _footprint_length); i++) {
00712 
00713     PN_uint32 hash_value = calc_hash(&buffer_orig[i]);
00714 
00715     // we must now store this file index in the hash table
00716     // at the offset of the hash value
00717 
00718     // to account for multiple file offsets with identical
00719     // hash values, there is a link table with an entry for
00720     // every footprint in the file. We create linked lists
00721     // of offsets in the link table.
00722 
00723     // first, set the value in the link table for the current
00724     // offset to whatever the current list head is (the
00725     // value in the hash table) (note that this only works
00726     // because the hash and link tables both use
00727     // _NULL_VALUE to indicate a null index)
00728     link_table[i] = hash_table[hash_value];
00729 
00730     // set the new list head; store the current offset in the
00731     // hash table at the offset of the footprint's hash value
00732     hash_table[hash_value] = i;
00733 
00734     /*
00735     if (_NULL_VALUE == hash_table[hash_value]) {
00736       // hash entry is empty, store this offset
00737       hash_table[hash_value] = i;
00738     } else {
00739       // hash entry is taken, go to the link table
00740       PN_uint32 link_offset = hash_table[hash_value];
00741 
00742       while (_NULL_VALUE != link_table[link_offset]) {
00743         link_offset = link_table[link_offset];
00744       }
00745       link_table[link_offset] = i;
00746     }
00747     */
00748   }
00749 }
00750 
00751 ////////////////////////////////////////////////////////////////////
00752 //     Function: Patchfile::calc_match_length
00753 //       Access: Private
00754 //  Description:
00755 //               This function calculates the length of a match between
00756 //               two strings of bytes
00757 ////////////////////////////////////////////////////////////////////
00758 PN_uint32 Patchfile::
00759 calc_match_length(const char* buf1, const char* buf2, PN_uint32 max_length,
00760                   PN_uint32 min_length) {
00761   // early out: look ahead and sample the end of the minimum range
00762   if (min_length > 2) {
00763     if (min_length >= max_length)
00764       return 0;
00765     if (buf1[min_length] != buf2[min_length] ||
00766         buf1[min_length-1] != buf2[min_length-1] ||
00767         buf1[min_length-2] != buf2[min_length-2]) {
00768       return 0;
00769     }
00770   }
00771 
00772   PN_uint32 length = 0;
00773   while ((length < max_length) && (*buf1 == *buf2)) {
00774     buf1++, buf2++, length++;
00775   }
00776   return length;
00777 }
00778 
00779 ////////////////////////////////////////////////////////////////////
00780 //     Function: Patchfile::find_longest_match
00781 //       Access: Private
00782 //  Description:
00783 //               This function will find the longest string in the
00784 //               original file that matches a string in the new file.
00785 ////////////////////////////////////////////////////////////////////
00786 void Patchfile::
00787 find_longest_match(PN_uint32 new_pos, PN_uint32 &copy_pos, PN_uint16 &copy_length,
00788   PN_uint32 *hash_table, PN_uint32 *link_table, const char* buffer_orig,
00789   PN_uint32 length_orig, const char* buffer_new, PN_uint32 length_new) {
00790 
00791   // set length to a safe value
00792   copy_length = 0;
00793 
00794   // get offset of matching string (in orig file) from hash table
00795   PN_uint32 hash_value = calc_hash(&buffer_new[new_pos]);
00796 
00797   // if no match, bail
00798   if (_NULL_VALUE == hash_table[hash_value])
00799     return;
00800 
00801   copy_pos = hash_table[hash_value];
00802 
00803   // calc match length
00804   copy_length = (PN_uint16)calc_match_length(&buffer_new[new_pos],
00805                                              &buffer_orig[copy_pos],
00806                                              min(min((length_new - new_pos),
00807                                                      (length_orig - copy_pos)),
00808                                                  _MAX_RUN_LENGTH),
00809                                              0);
00810 
00811   // run through link table, see if we find any longer matches
00812   PN_uint32 match_offset;
00813   PN_uint16 match_length;
00814   match_offset = link_table[copy_pos];
00815 
00816   while (match_offset != _NULL_VALUE) {
00817     match_length = (PN_uint16)calc_match_length(&buffer_new[new_pos],
00818                                                 &buffer_orig[match_offset],
00819                                                 min(min((length_new - new_pos),
00820                                                         (length_orig - match_offset)),
00821                                                     _MAX_RUN_LENGTH),
00822                                                 copy_length);
00823 
00824     // have we found a longer match?
00825     if (match_length > copy_length) {
00826       copy_pos = match_offset;
00827       copy_length = match_length;
00828     }
00829 
00830     // traverse the link table
00831     match_offset = link_table[match_offset];
00832   }
00833 }
00834 
00835 ////////////////////////////////////////////////////////////////////
00836 //     Function: Patchfile::emit_ADD
00837 //       Access: Private
00838 //  Description:
00839 ////////////////////////////////////////////////////////////////////
00840 void Patchfile::
00841 emit_ADD(ostream &write_stream, PN_uint32 length, const char* buffer) {
00842   nassertv(length == (PN_uint16)length); //we only write a uint16
00843 
00844   if (express_cat.is_spam()) {
00845     express_cat.spam()
00846       << "ADD: " << length << " (to " << _add_pos << ")" << endl;
00847   }
00848 
00849   // write ADD length
00850   StreamWriter patch_writer(write_stream);
00851   patch_writer.add_uint16((PN_uint16)length);
00852 
00853   // if there are bytes to add, add them
00854   if (length > 0) {
00855     patch_writer.append_data(buffer, (PN_uint16)length);
00856   }
00857 
00858   _add_pos += length;
00859 }
00860 
00861 ////////////////////////////////////////////////////////////////////
00862 //     Function: Patchfile::emit_COPY
00863 //       Access: Private
00864 //  Description:
00865 ////////////////////////////////////////////////////////////////////
00866 void Patchfile::
00867 emit_COPY(ostream &write_stream, PN_uint32 length, PN_uint32 copy_pos) {
00868   nassertv(length == (PN_uint16)length); //we only write a uint16
00869 
00870   PN_int32 offset = (int)copy_pos - (int)_last_copy_pos;
00871   if (express_cat.is_spam()) {
00872     express_cat.spam()
00873       << "COPY: " << length << " bytes from offset " << offset
00874       << " (from " << copy_pos << " to " << _add_pos << ")" << endl;
00875   }
00876 
00877   // write COPY length
00878   StreamWriter patch_writer(write_stream);
00879   patch_writer.add_uint16((PN_uint16)length);
00880 
00881   if ((PN_uint16)length != 0) {
00882     // write COPY offset
00883     patch_writer.add_int32(offset);
00884     _last_copy_pos = copy_pos + length;
00885   }
00886 
00887   _add_pos += length;
00888 }
00889 
00890 ////////////////////////////////////////////////////////////////////
00891 //     Function: Patchfile::emit_add_and_copy
00892 //       Access: Private
00893 //  Description: Emits an add/copy pair.  If necessary, repeats the
00894 //               pair as needed to work around the 16-bit chunk size
00895 //               limit.
00896 ////////////////////////////////////////////////////////////////////
00897 void Patchfile::
00898 emit_add_and_copy(ostream &write_stream,
00899                   PN_uint32 add_length, const char *add_buffer,
00900                   PN_uint32 copy_length, PN_uint32 copy_pos) {
00901   if (add_length == 0 && copy_length == 0) {
00902     // Don't accidentally emit a termination code.
00903     return;
00904   }
00905 
00906   static const PN_uint16 max_write = 65535;
00907   while (add_length > max_write) {
00908     // Overflow.  This chunk is too large to fit into a single
00909     // ADD block, so we have to write it as multiple ADDs.
00910     emit_ADD(write_stream, max_write, add_buffer);
00911     add_buffer += max_write;
00912     add_length -= max_write;
00913     emit_COPY(write_stream, 0, 0);
00914   }
00915 
00916   emit_ADD(write_stream, add_length, add_buffer);
00917 
00918   while (copy_length > max_write) {
00919     // Overflow.
00920     emit_COPY(write_stream, max_write, copy_pos);
00921     copy_pos += max_write;
00922     copy_length -= max_write;
00923     emit_ADD(write_stream, 0, NULL);
00924   }
00925 
00926   emit_COPY(write_stream, copy_length, copy_pos);
00927 }
00928 
00929 ////////////////////////////////////////////////////////////////////
00930 //     Function: Patchfile::cache_add_and_copy
00931 //       Access: Private
00932 //  Description: Potentially emits one or more add/copy pairs.  The
00933 //               current state is saved, so as to minimize wasted
00934 //               emits from consecutive adds or copies.
00935 ////////////////////////////////////////////////////////////////////
00936 void Patchfile::
00937 cache_add_and_copy(ostream &write_stream,
00938                    PN_uint32 add_length, const char *add_buffer,
00939                    PN_uint32 copy_length, PN_uint32 copy_pos) {
00940   if (add_length != 0) {
00941     if (_cache_copy_length != 0) {
00942       // Have to flush.
00943       cache_flush(write_stream);
00944     }
00945     // Add the string to the current cache.
00946     _cache_add_data += string(add_buffer, add_length);
00947   }
00948 
00949   if (copy_length != 0) {
00950     if (_cache_copy_length == 0) {
00951       // Start a new copy phase.
00952       _cache_copy_start = copy_pos;
00953       _cache_copy_length = copy_length;
00954 
00955     } else if (_cache_copy_start + _cache_copy_length == copy_pos) {
00956       // We can just tack on the copy to what we've already got.
00957       _cache_copy_length += copy_length;
00958 
00959     } else {
00960       // It's a discontinuous copy.  We have to flush.
00961       cache_flush(write_stream);
00962       _cache_copy_start = copy_pos;
00963       _cache_copy_length = copy_length;
00964     }
00965   }
00966 }
00967 
00968 ////////////////////////////////////////////////////////////////////
00969 //     Function: Patchfile::cache_flush
00970 //       Access: Private
00971 //  Description: Closes any copy or add phases that are still open
00972 //               after a previous call to cache_add_and_copy().
00973 ////////////////////////////////////////////////////////////////////
00974 void Patchfile::
00975 cache_flush(ostream &write_stream) {
00976   emit_add_and_copy(write_stream,
00977                     _cache_add_data.size(), _cache_add_data.data(),
00978                     _cache_copy_length, _cache_copy_start);
00979   _cache_add_data = string();
00980   _cache_copy_length = 0;
00981 }
00982 
00983 
00984 ////////////////////////////////////////////////////////////////////
00985 //     Function: Patchfile::write_header
00986 //       Access: Private
00987 //  Description:
00988 //               Writes the patchfile header.
00989 ////////////////////////////////////////////////////////////////////
00990 void Patchfile::
00991 write_header(ostream &write_stream,
00992              istream &stream_orig, istream &stream_new) {
00993   // prepare to write the patch file header
00994 
00995   // write the patch file header
00996   StreamWriter patch_writer(write_stream);
00997   patch_writer.add_uint32(_magic_number);
00998   patch_writer.add_uint16(_current_version);
00999 
01000   stream_orig.seekg(0, ios::end);
01001   streampos source_file_length = stream_orig.tellg();
01002   patch_writer.add_uint32((PN_uint32)source_file_length);
01003 
01004   // calc MD5 of original file
01005   _MD5_ofSource.hash_stream(stream_orig);
01006   // add it to the header
01007   _MD5_ofSource.write_stream(patch_writer);
01008 
01009   if (express_cat.is_debug()) {
01010     express_cat.debug()
01011       << "Orig: " << _MD5_ofSource << "\n";
01012   }
01013 
01014   stream_new.seekg(0, ios::end);
01015   streampos result_file_length = stream_new.tellg();
01016   patch_writer.add_uint32((PN_uint32)result_file_length);
01017 
01018   // calc MD5 of resultant patched file
01019   _MD5_ofResult.hash_stream(stream_new);
01020   // add it to the header
01021   _MD5_ofResult.write_stream(patch_writer);
01022 
01023   if (express_cat.is_debug()) {
01024     express_cat.debug()
01025       << " New: " << _MD5_ofResult << "\n";
01026   }
01027 }
01028 
01029 ////////////////////////////////////////////////////////////////////
01030 //     Function: Patchfile::write_terminator
01031 //       Access: Private
01032 //  Description: Writes the patchfile terminator.
01033 ////////////////////////////////////////////////////////////////////
01034 void Patchfile::
01035 write_terminator(ostream &write_stream) {
01036   cache_flush(write_stream);
01037   // write terminator (null ADD, null COPY)
01038   emit_ADD(write_stream, 0, NULL);
01039   emit_COPY(write_stream, 0, 0);
01040 }
01041 
01042 ////////////////////////////////////////////////////////////////////
01043 //     Function: Patchfile::compute_file_patches
01044 //       Access: Private
01045 //  Description: Computes the patches for the entire file (if it is
01046 //               not a multifile) or for a single subfile (if it is)
01047 //
01048 //               Returns true if successful, false on error.
01049 ////////////////////////////////////////////////////////////////////
01050 bool Patchfile::
01051 compute_file_patches(ostream &write_stream,
01052                      PN_uint32 offset_orig, PN_uint32 offset_new,
01053                      istream &stream_orig, istream &stream_new) {
01054   // read in original file
01055   stream_orig.seekg(0, ios::end);
01056   nassertr(stream_orig, false);
01057   PN_uint32 source_file_length = stream_orig.tellg();
01058   if (express_cat.is_debug()) {
01059     express_cat.debug()
01060       << "Allocating " << source_file_length << " bytes to read orig\n";
01061   }
01062 
01063   char *buffer_orig = (char *)PANDA_MALLOC_ARRAY(source_file_length);
01064   stream_orig.seekg(0, ios::beg);
01065   stream_orig.read(buffer_orig, source_file_length);
01066 
01067   // read in new file
01068   stream_new.seekg(0, ios::end);
01069   PN_uint32 result_file_length = stream_new.tellg();
01070   nassertr(stream_new, false);
01071   if (express_cat.is_debug()) {
01072     express_cat.debug()
01073       << "Allocating " << result_file_length << " bytes to read new\n";
01074   }
01075 
01076   char *buffer_new = (char *)PANDA_MALLOC_ARRAY(result_file_length);
01077   stream_new.seekg(0, ios::beg);
01078   stream_new.read(buffer_new, result_file_length);
01079 
01080   // allocate hash/link tables
01081   if (_hash_table == (PN_uint32 *)NULL) {
01082     if (express_cat.is_debug()) {
01083       express_cat.debug()
01084         << "Allocating hashtable of size " << _HASHTABLESIZE << " * 4\n";
01085     }
01086     _hash_table = (PN_uint32 *)PANDA_MALLOC_ARRAY(_HASHTABLESIZE * sizeof(PN_uint32));
01087   }
01088 
01089   if (express_cat.is_debug()) {
01090     express_cat.debug()
01091       << "Allocating linktable of size " << source_file_length << " * 4\n";
01092   }
01093 
01094   PN_uint32 *link_table = (PN_uint32 *)PANDA_MALLOC_ARRAY(source_file_length * sizeof(PN_uint32));
01095 
01096   // build hash and link tables for original file
01097   build_hash_link_tables(buffer_orig, source_file_length, _hash_table, link_table);
01098 
01099   // run through new file
01100 
01101   PN_uint32 new_pos = 0;
01102   PN_uint32 start_pos = new_pos; // this is the position for the start of ADD operations
01103 
01104   if(((PN_uint32) result_file_length) >= _footprint_length)
01105   {
01106     while (new_pos < (result_file_length - _footprint_length)) {
01107 
01108       // find best match for current position
01109       PN_uint32 COPY_pos;
01110       PN_uint16 COPY_length;
01111 
01112       find_longest_match(new_pos, COPY_pos, COPY_length, _hash_table, link_table,
01113         buffer_orig, source_file_length, buffer_new, result_file_length);
01114 
01115       // if no match or match not longer than footprint length, skip to next byte
01116       if (COPY_length < _footprint_length) {
01117         // go to next byte
01118         new_pos++;
01119       } else {
01120         // emit ADD for all skipped bytes
01121         int num_skipped = (int)new_pos - (int)start_pos;
01122         if (express_cat.is_spam()) {
01123           express_cat.spam()
01124             << "build: num_skipped = " << num_skipped
01125             << endl;
01126         }
01127         cache_add_and_copy(write_stream, num_skipped, &buffer_new[start_pos],
01128                            COPY_length, COPY_pos + offset_orig);
01129         new_pos += (PN_uint32)COPY_length;
01130         start_pos = new_pos;
01131       }
01132     }
01133   }
01134 
01135   if (express_cat.is_spam()) {
01136     express_cat.spam()
01137       << "build: result_file_length = " << result_file_length
01138       << " start_pos = " << start_pos
01139       << endl;
01140   }
01141 
01142   // are there still more bytes left in the new file?
01143   if (start_pos != result_file_length) {
01144     // emit ADD for all remaining bytes
01145 
01146     PN_uint32 remaining_bytes = result_file_length - start_pos;
01147     cache_add_and_copy(write_stream, remaining_bytes, &buffer_new[start_pos],
01148                        0, 0);
01149     start_pos += remaining_bytes;
01150   }
01151 
01152   PANDA_FREE_ARRAY(link_table);
01153 
01154   PANDA_FREE_ARRAY(buffer_orig);
01155   PANDA_FREE_ARRAY(buffer_new);
01156 
01157   return true;
01158 }
01159 
01160 ////////////////////////////////////////////////////////////////////
01161 //     Function: Patchfile::compute_mf_patches
01162 //       Access: Private
01163 //  Description: Computes patches for the files, knowing that they are
01164 //               both Panda Multifiles.  This will build patches one
01165 //               subfile at a time, which can potentially be much,
01166 //               much faster for large Multifiles that contain many
01167 //               small subfiles.
01168 ////////////////////////////////////////////////////////////////////
01169 bool Patchfile::
01170 compute_mf_patches(ostream &write_stream,
01171                    PN_uint32 offset_orig, PN_uint32 offset_new,
01172                    istream &stream_orig, istream &stream_new) {
01173   Multifile mf_orig, mf_new;
01174   IStreamWrapper stream_origw(stream_orig);
01175   IStreamWrapper stream_neww(stream_new);
01176   if (!mf_orig.open_read(&stream_origw) ||
01177       !mf_new.open_read(&stream_neww)) {
01178     express_cat.error()
01179       << "Input multifiles appear to be corrupt.\n";
01180     return false;
01181   }
01182 
01183   if (mf_new.needs_repack()) {
01184     express_cat.error()
01185       << "Input multifiles need to be repacked.\n";
01186     return false;
01187   }
01188 
01189   // First, compute the patch for the header / index.
01190 
01191   {
01192     ISubStream index_orig(&stream_origw, 0, mf_orig.get_index_end());
01193     ISubStream index_new(&stream_neww, 0, mf_new.get_index_end());
01194     if (!do_compute_patches("", "",
01195                             write_stream, offset_orig, offset_new,
01196                             index_orig, index_new)) {
01197       return false;
01198     }
01199     nassertr(_add_pos + _cache_add_data.size() + _cache_copy_length == offset_new + mf_new.get_index_end(), false);
01200   }
01201 
01202   // Now walk through each subfile in the new multifile.  If a
01203   // particular subfile exists in both source files, we compute the
01204   // patches for the subfile; for a new subfile, we trivially add it.
01205   // If a subfile has been removed, we simply don't add it (we'll
01206   // never even notice this case).
01207   int new_num_subfiles = mf_new.get_num_subfiles();
01208   for (int ni = 0; ni < new_num_subfiles; ++ni) {
01209     nassertr(_add_pos + _cache_add_data.size() + _cache_copy_length == offset_new + mf_new.get_subfile_internal_start(ni), false);
01210     string name = mf_new.get_subfile_name(ni);
01211     int oi = mf_orig.find_subfile(name);
01212 
01213     if (oi < 0) {
01214       // This is a newly-added subfile.  Add it the hard way.
01215       express_cat.info()
01216         << "Adding subfile " << mf_new.get_subfile_name(ni) << "\n";
01217 
01218       streampos new_start = mf_new.get_subfile_internal_start(ni);
01219       size_t new_size = mf_new.get_subfile_internal_length(ni);
01220       char *buffer_new = (char *)PANDA_MALLOC_ARRAY(new_size);
01221       stream_new.seekg(new_start, ios::beg);
01222       stream_new.read(buffer_new, new_size);
01223       cache_add_and_copy(write_stream, new_size, buffer_new, 0, 0);
01224       PANDA_FREE_ARRAY(buffer_new);
01225 
01226     } else {
01227       // This subfile exists in both the original and the new files.
01228       // Patch it.
01229       streampos orig_start = mf_orig.get_subfile_internal_start(oi);
01230       size_t orig_size = mf_orig.get_subfile_internal_length(oi);
01231 
01232       streampos new_start = mf_new.get_subfile_internal_start(ni);
01233       size_t new_size = mf_new.get_subfile_internal_length(ni);
01234 
01235       if (!patch_subfile(write_stream, offset_orig, offset_new,
01236                          mf_new.get_subfile_name(ni),
01237                          stream_origw, orig_start, orig_start + (streampos)orig_size,
01238                          stream_neww, new_start, new_start + (streampos)new_size)) {
01239         return false;
01240       }
01241     }
01242   }
01243 
01244   return true;
01245 }
01246 
01247 #ifdef HAVE_TAR
01248 ////////////////////////////////////////////////////////////////////
01249 //     Function: Patchfile::read_tar
01250 //       Access: Private
01251 //  Description: Uses libtar to extract the location within the tar
01252 //               file of each of the subfiles.  Returns true if the
01253 //               tar file is read successfully, false if there is an
01254 //               error (e.g. it is not a tar file).
01255 ////////////////////////////////////////////////////////////////////
01256 bool Patchfile::
01257 read_tar(TarDef &tar, istream &stream) {
01258   TAR *tfile;
01259   tartype_t tt;
01260   tt.openfunc = tar_openfunc;
01261   tt.closefunc = tar_closefunc;
01262   tt.readfunc = tar_readfunc;
01263   tt.writefunc = tar_writefunc;
01264 
01265   stream.seekg(0, ios::beg);
01266   nassertr(_tar_istream == NULL, false);
01267   _tar_istream = &stream;
01268   if (tar_open(&tfile, (char *)"dummy", &tt, O_RDONLY, 0, 0) != 0) {
01269     _tar_istream = NULL;
01270     return false;
01271   }
01272 
01273   // Walk through the tar file, noting the current file position as we
01274   // reach each subfile.  Use this information to infer the start and
01275   // end of each subfile within the stream.
01276 
01277   streampos last_pos = 0;
01278   int flag = th_read(tfile);
01279   while (flag == 0) {
01280     TarSubfile subfile;
01281     subfile._name = th_get_pathname(tfile);
01282     subfile._header_start = last_pos;
01283     subfile._data_start = stream.tellg();
01284     subfile._data_end = subfile._data_start + (streampos)th_get_size(tfile);
01285     tar_skip_regfile(tfile);
01286     subfile._end = stream.tellg();
01287     tar.push_back(subfile);
01288 
01289     last_pos = subfile._end;
01290     flag = th_read(tfile);
01291   }
01292 
01293   // Create one more "subfile" for the bytes at the tail of the file.
01294   // This subfile has no name.
01295   TarSubfile subfile;
01296   subfile._header_start = last_pos;
01297   stream.clear();
01298   stream.seekg(0, ios::end);
01299   subfile._data_start = stream.tellg();
01300   subfile._data_end = subfile._data_start;
01301   subfile._end = subfile._data_start;
01302   tar.push_back(subfile);
01303 
01304   tar_close(tfile);
01305   _tar_istream = NULL;
01306   return (flag == 1);
01307 }
01308 #endif  // HAVE_TAR
01309 
01310 #ifdef HAVE_TAR
01311 ////////////////////////////////////////////////////////////////////
01312 //     Function: Patchfile::compute_tar_patches
01313 //       Access: Private
01314 //  Description: Computes patches for the files, knowing that they are
01315 //               both tar files.  This is similar to
01316 //               compute_mf_patches().
01317 //
01318 //               The tar indexes should have been built up by a
01319 //               previous call to read_tar().
01320 ////////////////////////////////////////////////////////////////////
01321 bool Patchfile::
01322 compute_tar_patches(ostream &write_stream,
01323                     PN_uint32 offset_orig, PN_uint32 offset_new,
01324                     istream &stream_orig, istream &stream_new,
01325                     TarDef &tar_orig, TarDef &tar_new) {
01326 
01327   // Sort the orig list by filename, so we can quickly look up files
01328   // from the new list.
01329   tar_orig.sort();
01330 
01331   // However, it is important to keep the new list in its original,
01332   // on-disk order.
01333 
01334   // Walk through each subfile in the new tar file.  If a particular
01335   // subfile exists in both source files, we compute the patches for
01336   // the subfile; for a new subfile, we trivially add it.  If a
01337   // subfile has been removed, we simply don't add it (we'll never
01338   // even notice this case).
01339 
01340   IStreamWrapper stream_origw(stream_orig);
01341   IStreamWrapper stream_neww(stream_new);
01342 
01343   TarDef::const_iterator ni;
01344   streampos last_pos = 0;
01345   for (ni = tar_new.begin(); ni != tar_new.end(); ++ni) {
01346     const TarSubfile &sf_new =(*ni);
01347     nassertr(sf_new._header_start == last_pos, false);
01348 
01349     TarDef::const_iterator oi = tar_orig.find(sf_new);
01350 
01351     if (oi == tar_orig.end()) {
01352       // This is a newly-added subfile.  Add it the hard way.
01353       express_cat.info()
01354         << "Adding subfile " << sf_new._name << "\n";
01355 
01356       streampos new_start = sf_new._header_start;
01357       size_t new_size = sf_new._end - sf_new._header_start;
01358       char *buffer_new = (char *)PANDA_MALLOC_ARRAY(new_size);
01359       stream_new.seekg(new_start, ios::beg);
01360       stream_new.read(buffer_new, new_size);
01361       cache_add_and_copy(write_stream, new_size, buffer_new, 0, 0);
01362       PANDA_FREE_ARRAY(buffer_new);
01363 
01364     } else {
01365       // This subfile exists in both the original and the new files.
01366       // Patch it.
01367       const TarSubfile &sf_orig =(*oi);
01368 
01369       // We patch the header and data of the file separately, so we
01370       // can accurately detect nested multifiles.  The extra data at
01371       // the end of the file (possibly introduced by a tar file's
01372       // blocking) is the footer, which is also patched separately.
01373       if (!patch_subfile(write_stream, offset_orig, offset_new, "",
01374                          stream_origw, sf_orig._header_start, sf_orig._data_start,
01375                          stream_neww, sf_new._header_start, sf_new._data_start)) {
01376         return false;
01377       }
01378 
01379       if (!patch_subfile(write_stream, offset_orig, offset_new, sf_new._name,
01380                          stream_origw, sf_orig._data_start, sf_orig._data_end,
01381                          stream_neww, sf_new._data_start, sf_new._data_end)) {
01382         return false;
01383       }
01384 
01385       if (!patch_subfile(write_stream, offset_orig, offset_new, "",
01386                          stream_origw, sf_orig._data_end, sf_orig._end,
01387                          stream_neww, sf_new._data_end, sf_new._end)) {
01388         return false;
01389       }
01390     }
01391 
01392     last_pos = sf_new._end;
01393   }
01394 
01395   return true;
01396 }
01397 #endif  // HAVE_TAR
01398 
01399 #ifdef HAVE_TAR
01400 ////////////////////////////////////////////////////////////////////
01401 //     Function: Patchfile::tar_openfunc
01402 //       Access: Private, Static
01403 //  Description: A callback function to redirect libtar to read from
01404 //               our istream instead of using low-level Unix I/O.
01405 ////////////////////////////////////////////////////////////////////
01406 int Patchfile::
01407 tar_openfunc(const char *, int, ...) {
01408   // Since we don't actually open a file--the stream is already
01409   // open--we do nothing here.
01410   return 0;
01411 }
01412 #endif  // HAVE_TAR
01413 
01414 #ifdef HAVE_TAR
01415 ////////////////////////////////////////////////////////////////////
01416 //     Function: Patchfile::tar_closefunc
01417 //       Access: Private, Static
01418 //  Description: A callback function to redirect libtar to read from
01419 //               our istream instead of using low-level Unix I/O.
01420 ////////////////////////////////////////////////////////////////////
01421 int Patchfile::
01422 tar_closefunc(int) {
01423   // Since we don't actually open a file, no need to close it either.
01424   return 0;
01425 }
01426 #endif  // HAVE_TAR
01427 
01428 #ifdef HAVE_TAR
01429 ////////////////////////////////////////////////////////////////////
01430 //     Function: Patchfile::tar_readfunc
01431 //       Access: Private, Static
01432 //  Description: A callback function to redirect libtar to read from
01433 //               our istream instead of using low-level Unix I/O.
01434 ////////////////////////////////////////////////////////////////////
01435 ssize_t Patchfile::
01436 tar_readfunc(int, void *buffer, size_t nbytes) {
01437   nassertr(_tar_istream != NULL, 0);
01438   _tar_istream->read((char *)buffer, nbytes);
01439   return (ssize_t)_tar_istream->gcount();
01440 }
01441 #endif  // HAVE_TAR
01442 
01443 #ifdef HAVE_TAR
01444 ////////////////////////////////////////////////////////////////////
01445 //     Function: Patchfile::tar_writefunc
01446 //       Access: Private, Static
01447 //  Description: A callback function to redirect libtar to read from
01448 //               our istream instead of using low-level Unix I/O.
01449 ////////////////////////////////////////////////////////////////////
01450 ssize_t Patchfile::
01451 tar_writefunc(int, const void *, size_t) {
01452   // Since we use libtar only for reading, it is an error if this
01453   // method gets called.
01454   nassertr(false, -1);
01455   return -1;
01456 }
01457 #endif  // HAVE_TAR
01458 
01459 ////////////////////////////////////////////////////////////////////
01460 //     Function: Patchfile::build
01461 //       Access: Public
01462 //  Description:
01463 //               This implementation uses the "greedy differencing
01464 //               algorithm" described in the masters thesis
01465 //               "Differential Compression: A Generalized Solution
01466 //               for Binary Files" by Randal C. Burns (p.13).
01467 //               For an original file of size M and a new file of
01468 //               size N, this algorithm is O(M) in space and
01469 //               O(M*N) (worst-case) in time.
01470 //               return false on error
01471 ////////////////////////////////////////////////////////////////////
01472 bool Patchfile::
01473 build(Filename file_orig, Filename file_new, Filename patch_name) {
01474   patch_name.set_binary();
01475 
01476   // Open the original file for read
01477   pifstream stream_orig;
01478   file_orig.set_binary();
01479   if (!file_orig.open_read(stream_orig)) {
01480     express_cat.error()
01481       << "Patchfile::build() - Failed to open file: " << file_orig << endl;
01482     return false;
01483   }
01484 
01485   // Open the new file for read
01486   pifstream stream_new;
01487   file_new.set_binary();
01488   if (!file_new.open_read(stream_new)) {
01489     express_cat.error()
01490       << "Patchfile::build() - Failed to open file: " << file_new << endl;
01491     return false;
01492   }
01493 
01494   // Open patch file for write
01495   pofstream write_stream;
01496   if (!patch_name.open_write(write_stream)) {
01497     express_cat.error()
01498       << "Patchfile::build() - Failed to open file: " << patch_name << endl;
01499     return false;
01500   }
01501 
01502   _last_copy_pos = 0;
01503   _add_pos = 0;
01504   _cache_add_data = string();
01505   _cache_copy_start = 0;
01506   _cache_copy_length = 0;
01507 
01508   write_header(write_stream, stream_orig, stream_new);
01509 
01510   if (!do_compute_patches(file_orig, file_new,
01511                           write_stream, 0, 0,
01512                           stream_orig, stream_new)) {
01513     return false;
01514   }
01515 
01516   write_terminator(write_stream);
01517 
01518   if (express_cat.is_debug()) {
01519     express_cat.debug()
01520       << "Patch file will generate " << _add_pos << "-byte file.\n";
01521   }
01522 
01523 #ifndef NDEBUG
01524  {
01525    // Make sure the resulting file would be the right size.
01526    stream_new.seekg(0, ios::end);
01527    streampos result_file_length = stream_new.tellg();
01528    nassertr(_add_pos == result_file_length, false);
01529  }
01530 #endif  // NDEBUG
01531 
01532   return (_last_copy_pos != 0);
01533 }
01534 
01535 ////////////////////////////////////////////////////////////////////
01536 //     Function: Patchfile::do_compute_patches
01537 //       Access: Private
01538 //  Description: Computes the patches for the indicated A to B files,
01539 //               or subfiles.  Checks for multifiles or tar files
01540 //               before falling back to whole-file patching.
01541 ////////////////////////////////////////////////////////////////////
01542 bool Patchfile::
01543 do_compute_patches(const Filename &file_orig, const Filename &file_new,
01544                    ostream &write_stream,
01545                    PN_uint32 offset_orig, PN_uint32 offset_new,
01546                    istream &stream_orig, istream &stream_new) {
01547   nassertr(_add_pos + _cache_add_data.size() + _cache_copy_length == offset_new, false);
01548 
01549   // Check whether our input files are Panda multifiles or tar files.
01550   bool is_multifile = false;
01551 #ifdef HAVE_TAR
01552   bool is_tarfile = false;
01553   TarDef tar_orig, tar_new;
01554 #endif  // HAVE_TAR
01555 
01556   if (_allow_multifile) {
01557     if (strstr(file_orig.get_basename().c_str(), ".mf") != NULL ||
01558         strstr(file_new.get_basename().c_str(), ".mf") != NULL) {
01559       // Read the first n bytes of both files for the Multifile magic
01560       // number.
01561       string magic_number = Multifile::get_magic_number();
01562       char *buffer = (char *)PANDA_MALLOC_ARRAY(magic_number.size());
01563       stream_orig.seekg(0, ios::beg);
01564       stream_orig.read(buffer, magic_number.size());
01565 
01566       if (stream_orig.gcount() == (int)magic_number.size() &&
01567           memcmp(buffer, magic_number.data(), magic_number.size()) == 0) {
01568         stream_new.seekg(0, ios::beg);
01569         stream_new.read(buffer, magic_number.size());
01570         if (stream_new.gcount() == (int)magic_number.size() &&
01571             memcmp(buffer, magic_number.data(), magic_number.size()) == 0) {
01572           is_multifile = true;
01573         }
01574       }
01575       PANDA_FREE_ARRAY(buffer);
01576     }
01577 #ifdef HAVE_TAR
01578     if (strstr(file_orig.get_basename().c_str(), ".tar") != NULL ||
01579         strstr(file_new.get_basename().c_str(), ".tar") != NULL) {
01580       if (read_tar(tar_orig, stream_orig) &&
01581           read_tar(tar_new, stream_new)) {
01582         is_tarfile = true;
01583       }
01584     }
01585 #endif  // HAVE_TAR
01586   }
01587 
01588   if (is_multifile) {
01589     if (express_cat.is_debug()) {
01590       express_cat.debug()
01591         << file_orig.get_basename() << " appears to be a Panda Multifile.\n";
01592     }
01593     if (!compute_mf_patches(write_stream, offset_orig, offset_new,
01594                             stream_orig, stream_new)) {
01595       return false;
01596     }
01597 #ifdef HAVE_TAR
01598   } else if (is_tarfile) {
01599     if (express_cat.is_debug()) {
01600       express_cat.debug()
01601         << file_orig.get_basename() << " appears to be a tar file.\n";
01602     }
01603     if (!compute_tar_patches(write_stream, offset_orig, offset_new,
01604                              stream_orig, stream_new, tar_orig, tar_new)) {
01605       return false;
01606     }
01607 #endif  // HAVE_TAR
01608   } else {
01609     if (express_cat.is_debug()) {
01610       express_cat.debug()
01611         << file_orig.get_basename() << " is not a multifile.\n";
01612     }
01613     if (!compute_file_patches(write_stream, offset_orig, offset_new,
01614                               stream_orig, stream_new)) {
01615       return false;
01616     }
01617   }
01618 
01619   return true;
01620 }
01621 
01622 ////////////////////////////////////////////////////////////////////
01623 //     Function: Patchfile::patch_subfile
01624 //       Access: Private
01625 //  Description: Generates patches for a nested subfile of a Panda
01626 //               Multifile or a tar file.
01627 ////////////////////////////////////////////////////////////////////
01628 bool Patchfile::
01629 patch_subfile(ostream &write_stream,
01630               PN_uint32 offset_orig, PN_uint32 offset_new,
01631               const Filename &filename,
01632               IStreamWrapper &stream_orig, streampos orig_start, streampos orig_end,
01633               IStreamWrapper &stream_new, streampos new_start, streampos new_end) {
01634   nassertr(_add_pos + _cache_add_data.size() + _cache_copy_length == offset_new + new_start, false);
01635 
01636   size_t new_size = new_end - new_start;
01637   size_t orig_size = orig_end - orig_start;
01638 
01639   ISubStream subfile_orig(&stream_orig, orig_start, orig_end);
01640   ISubStream subfile_new(&stream_new, new_start, new_end);
01641 
01642   bool is_unchanged = false;
01643   if (orig_size == new_size) {
01644     HashVal hash_orig, hash_new;
01645     hash_orig.hash_stream(subfile_orig);
01646     hash_new.hash_stream(subfile_new);
01647 
01648     if (hash_orig == hash_new) {
01649       // Actually, the subfile is unchanged; just emit it.
01650       is_unchanged = true;
01651     }
01652   }
01653 
01654   if (is_unchanged) {
01655     if (express_cat.is_debug() && !filename.empty()) {
01656       express_cat.debug()
01657         << "Keeping subfile " << filename << "\n";
01658     }
01659     cache_add_and_copy(write_stream, 0, NULL,
01660                        orig_size, offset_orig + orig_start);
01661 
01662   } else {
01663     if (!filename.empty()) {
01664       express_cat.info()
01665         << "Patching subfile " << filename << "\n";
01666     }
01667 
01668     if (!do_compute_patches(filename, filename, write_stream,
01669                             offset_orig + orig_start, offset_new + new_start,
01670                             subfile_orig, subfile_new)) {
01671       return false;
01672     }
01673   }
01674 
01675   return true;
01676 }
01677 
01678 #endif // HAVE_OPENSSL
 All Classes Functions Variables Enumerations