Panda3D
patchfile.cxx
1 // Filename: patchfile.cxx
2 // Created by: darren, mike (09Jan97)
3 //
4 ////////////////////////////////////////////////////////////////////
5 //
6 // PANDA 3D SOFTWARE
7 // Copyright (c) Carnegie Mellon University. All rights reserved.
8 //
9 // All use of this software is subject to the terms of the revised BSD
10 // license. You should have received a copy of this license along
11 // with this source code in a file named "LICENSE."
12 //
13 ////////////////////////////////////////////////////////////////////
14 
15 #include "pandabase.h"
16 
17 #ifdef HAVE_OPENSSL
18 
19 #include "config_express.h"
20 #include "error_utils.h"
21 #include "patchfile.h"
22 #include "streamReader.h"
23 #include "streamWriter.h"
24 #include "multifile.h"
25 #include "hashVal.h"
26 #include "virtualFileSystem.h"
27 
28 #include <string.h> // for strstr
29 
30 #ifdef HAVE_TAR
31 #include "libtar.h"
32 #include <fcntl.h> // for O_RDONLY
33 #endif // HAVE_TAR
34 
35 #ifdef HAVE_TAR
36 istream *Patchfile::_tar_istream = NULL;
37 #endif // HAVE_TAR
38 
39 ////////////////////////////////////////////////////////////////////
40 
41 // this actually slows things down...
42 //#define USE_MD5_FOR_HASHTABLE_INDEX_VALUES
43 
44 // Patch File Format ///////////////////////////////////////////////
45 ///// IF THIS CHANGES, UPDATE installerApplyPatch.cxx IN THE INSTALLER
46 ////////////////////////////////////////////////////////////////////
47 // [ HEADER ]
48 // 4 bytes 0xfeebfaac ("magic number")
49 // (older patch files have a magic number 0xfeebfaab,
50 // indicating they are version number 0.)
51 // 2 bytes version number (if magic number == 0xfeebfaac)
52 // 4 bytes length of starting file (if version >= 1)
53 // 16 bytes MD5 of starting file (if version >= 1)
54 // 4 bytes length of resulting patched file
55 // 16 bytes MD5 of resultant patched file
56 
57 // Note that MD5 hashes are written in the order observed by
58 // HashVal::read_stream() and HashVal::write_stream(), which is not
59 // the normal linear order. (Each group of four bytes is reversed.)
60 
61 const int _v0_header_length = 4 + 4 + 16;
62 const int _v1_header_length = 4 + 2 + 4 + 16 + 4 + 16;
63 //
64 // [ ADD/COPY pairs; repeated N times ]
65 // 2 bytes AL = ADD length
66 // AL bytes bytes to add
67 // 2 bytes CL = COPY length
68 // 4 bytes offset of data to copy from original file, if CL != 0.
69 // If version >= 2, offset is relative to end of previous
70 // copy block; if version < 2, offset is relative to
71 // beginning of file.
72 //
73 // [ TERMINATOR ]
74 // 2 bytes zero-length ADD
75 // 2 bytes zero-length COPY
76 ////////////////////////////////////////////////////////////////////
77 ////////////////////////////////////////////////////////////////////
78 
79 ////////////////////////////////////////////////////////////////////
80 // Defines
81 ////////////////////////////////////////////////////////////////////
82 const PN_uint32 Patchfile::_v0_magic_number = 0xfeebfaab;
83 const PN_uint32 Patchfile::_magic_number = 0xfeebfaac;
84 
85 // Created version 1 on 11/2/02 to store length and MD5 of original file.
86 // To version 2 on 11/2/02 to store copy offsets as relative.
87 const PN_uint16 Patchfile::_current_version = 2;
88 
89 const PN_uint32 Patchfile::_HASH_BITS = 24;
90 const PN_uint32 Patchfile::_HASHTABLESIZE = PN_uint32(1) << Patchfile::_HASH_BITS;
91 const PN_uint32 Patchfile::_DEFAULT_FOOTPRINT_LENGTH = 9; // this produced the smallest patch file for libpanda.dll when tested, 12/20/2000
92 const PN_uint32 Patchfile::_NULL_VALUE = PN_uint32(0) - 1;
93 const PN_uint32 Patchfile::_MAX_RUN_LENGTH = (PN_uint32(1) << 16) - 1;
94 const PN_uint32 Patchfile::_HASH_MASK = (PN_uint32(1) << Patchfile::_HASH_BITS) - 1;
95 
96 ////////////////////////////////////////////////////////////////////
97 // Function: Patchfile::Constructor
98 // Access: Public
99 // Description: Create a patch file and initializes internal data
100 ////////////////////////////////////////////////////////////////////
101 Patchfile::
102 Patchfile() {
103  PT(Buffer) buffer = new Buffer(patchfile_buffer_size);
104  init(buffer);
105 }
106 
107 ////////////////////////////////////////////////////////////////////
108 // Function: Patchfile::Constructor
109 // Access: Public
110 // Description: Create patch file with buffer to patch
111 ////////////////////////////////////////////////////////////////////
112 Patchfile::
113 Patchfile(PT(Buffer) buffer) {
114  init(buffer);
115 }
116 
117 ////////////////////////////////////////////////////////////////////
118 // Function: Patchfile::init
119 // Access: Private
120 // Description:
121 ////////////////////////////////////////////////////////////////////
122 void Patchfile::
123 init(PT(Buffer) buffer) {
124  _rename_output_to_orig = false;
125  _delete_patchfile = false;
126  _hash_table = NULL;
127  _initiated = false;
128  nassertv(!buffer.is_null());
129  _buffer = buffer;
130 
131  _version_number = 0;
132  _allow_multifile = true;
133 
134  _patch_stream = NULL;
135  _origfile_stream = NULL;
136 
137  reset_footprint_length();
138 }
139 
140 ////////////////////////////////////////////////////////////////////
141 // Function: Patchfile::Destructor
142 // Access: Public
143 // Description:
144 ////////////////////////////////////////////////////////////////////
145 Patchfile::
146 ~Patchfile() {
147  if (_hash_table != (PN_uint32 *)NULL) {
148  PANDA_FREE_ARRAY(_hash_table);
149  }
150 
151  if (_initiated) {
152  cleanup();
153  }
154 
155  nassertv(_patch_stream == NULL);
156  nassertv(_origfile_stream == NULL);
157 }
158 
159 ////////////////////////////////////////////////////////////////////
160 // Function: Patchfile::cleanup
161 // Access: Private
162 // Description: Closes and clean up internal data structures
163 ////////////////////////////////////////////////////////////////////
164 void Patchfile::
165 cleanup() {
166  if (!_initiated) {
167  express_cat.error()
168  << "Patchfile::cleanup() - Patching has not been initiated"
169  << endl;
170  return;
171  }
172 
173  // close files
175  if (_origfile_stream != NULL) {
176  vfs->close_read_file(_origfile_stream);
177  _origfile_stream = NULL;
178  }
179  if (_patch_stream != NULL) {
180  vfs->close_read_file(_patch_stream);
181  _patch_stream = NULL;
182  }
183  _write_stream.close();
184 
185  _initiated = false;
186 }
187 
188 ////////////////////////////////////////////////////////////////////
189 ///// PATCH FILE APPLY MEMBER FUNCTIONS
190 /////
191 ////////////////////
192 ///// NOTE: this patch-application functionality unfortunately has to be
193 ///// duplicated in the Installer. It is contained in the file
194 ///// installerApplyPatch.cxx
195 ///// PLEASE MAKE SURE THAT THAT FILE GETS UPDATED IF ANY OF THIS
196 ///// LOGIC CHANGES! (i.e. if the patch file format changes)
197 ////////////////////
198 ////////////////////////////////////////////////////////////////////
199 
200 ////////////////////////////////////////////////////////////////////
201 // Function: Patchfile::initiate
202 // Access: Published
203 // Description: Set up to apply the patch to the file (original
204 // file and patch are destroyed in the process).
205 ////////////////////////////////////////////////////////////////////
206 int Patchfile::
207 initiate(const Filename &patch_file, const Filename &file) {
208  int result = initiate(patch_file, file, Filename::temporary("", "patch_"));
209  _rename_output_to_orig = true;
210  _delete_patchfile = !keep_temporary_files;
211  return result;
212 }
213 
214 ////////////////////////////////////////////////////////////////////
215 // Function: Patchfile::initiate
216 // Access: Published
217 // Description: Set up to apply the patch to the file. In this form,
218 // neither the original file nor the patch file are
219 // destroyed.
220 ////////////////////////////////////////////////////////////////////
221 int Patchfile::
222 initiate(const Filename &patch_file, const Filename &orig_file,
223  const Filename &target_file) {
224  if (_initiated) {
225  express_cat.error()
226  << "Patchfile::initiate() - Patching has already been initiated"
227  << endl;
228  return EU_error_abort;
229  }
230 
231  nassertr(orig_file != target_file, EU_error_abort);
232 
234 
235  // Open the original file for read
236  nassertr(_origfile_stream == NULL, EU_error_abort);
237  _orig_file = orig_file;
238  _orig_file.set_binary();
239  _origfile_stream = vfs->open_read_file(_orig_file, false);
240  if (_origfile_stream == NULL) {
241  express_cat.error()
242  << "Patchfile::initiate() - Failed to open file: " << _orig_file << endl;
243  return get_write_error();
244  }
245 
246  // Open the temp file for write
247  _output_file = target_file;
248  _output_file.set_binary();
249  if (!_output_file.open_write(_write_stream)) {
250  express_cat.error()
251  << "Patchfile::initiate() - Failed to open file: " << _output_file << endl;
252  return get_write_error();
253  }
254 
255  if (express_cat.is_debug()) {
256  express_cat.debug()
257  << "Patchfile using output file " << _output_file << "\n";
258  }
259 
260  int result = internal_read_header(patch_file);
261  _total_bytes_processed = 0;
262 
263  _initiated = true;
264  return result;
265 }
266 
267 ////////////////////////////////////////////////////////////////////
268 // Function: Patchfile::read_header
269 // Access: Published
270 // Description: Opens the patch file for reading, and gets the header
271 // information from the file but does not begin to do
272 // any real work. This can be used to query the data
273 // stored in the patch.
274 ////////////////////////////////////////////////////////////////////
275 int Patchfile::
276 read_header(const Filename &patch_file) {
277  if (_initiated) {
278  express_cat.error()
279  << "Patchfile::initiate() - Patching has already been initiated"
280  << endl;
281  return EU_error_abort;
282  }
283 
284  int result = internal_read_header(patch_file);
285  if (_patch_stream != NULL) {
287  vfs->close_read_file(_patch_stream);
288  _patch_stream = NULL;
289  }
290  return result;
291 }
292 
293 ////////////////////////////////////////////////////////////////////
294 // Function: Patchfile::run
295 // Access: Published
296 // Description: Perform one buffer's worth of patching
297 // Returns EU_ok while patching
298 // Returns EU_success when done
299 // If error happens will return one of:
300 // EU_error_abort : Patching has not been initiated
301 // EU_error_file_invalid : file is corrupted
302 // EU_error_invalid_checksum : incompatible patch file
303 // EU_error_write_file_rename : could not rename file
304 ////////////////////////////////////////////////////////////////////
305 int Patchfile::
306 run() {
307  // Now patch the file using the given buffer
308  int buflen;
309  int bytes_read;
310  PN_uint16 ADD_length;
311  PN_uint16 COPY_length;
312  PN_int32 COPY_offset;
313 
314  if (_initiated == false) {
315  express_cat.error()
316  << "Patchfile::run() - Patching has not been initiated"
317  << endl;
318  return EU_error_abort;
319  }
320 
321  nassertr(_patch_stream != NULL, EU_error_abort);
322  nassertr(_origfile_stream != NULL, EU_error_abort);
323  StreamReader patch_reader(*_patch_stream);
324 
325  buflen = _buffer->get_length();
326  bytes_read = 0;
327 
328  while (bytes_read < buflen) {
329  ///////////
330  // read # of ADD bytes
331  nassertr(_buffer->get_length() >= (int)sizeof(ADD_length), false);
332  ADD_length = patch_reader.get_uint16();
333  if (_patch_stream->fail()) {
334  express_cat.error()
335  << "Truncated patch file.\n";
336  return EU_error_file_invalid;
337  }
338 
339  bytes_read += (int)ADD_length;
340  _total_bytes_processed += (int)ADD_length;
341  if (_total_bytes_processed > _total_bytes_to_process) {
342  express_cat.error()
343  << "Runaway patch file.\n";
344  return EU_error_file_invalid;
345  }
346 
347  // if there are bytes to add, read them from patch file and write them to output
348  if (express_cat.is_spam() && ADD_length != 0) {
349  express_cat.spam()
350  << "ADD: " << ADD_length << " (to "
351  << _write_stream.tellp() << ")" << endl;
352  }
353 
354  PN_uint32 bytes_left = (PN_uint32)ADD_length;
355  while (bytes_left > 0) {
356  PN_uint32 bytes_this_time = (PN_uint32) min(bytes_left, (PN_uint32) buflen);
357  _patch_stream->read(_buffer->_buffer, bytes_this_time);
358  if (_patch_stream->fail()) {
359  express_cat.error()
360  << "Truncated patch file.\n";
361  return EU_error_file_invalid;
362  }
363  _write_stream.write(_buffer->_buffer, bytes_this_time);
364  bytes_left -= bytes_this_time;
365  }
366 
367  ///////////
368  // read # of COPY bytes
369  nassertr(_buffer->get_length() >= (int)sizeof(COPY_length), false);
370  COPY_length = patch_reader.get_uint16();
371  if (_patch_stream->fail()) {
372  express_cat.error()
373  << "Truncated patch file.\n";
374  return EU_error_file_invalid;
375  }
376 
377  bytes_read += (int)COPY_length;
378  _total_bytes_processed += (int)COPY_length;
379  if (_total_bytes_processed > _total_bytes_to_process) {
380  express_cat.error()
381  << "Runaway patch file.\n";
382  return EU_error_file_invalid;
383  }
384 
385  // if there are bytes to copy, read them from original file and write them to output
386  if (0 != COPY_length) {
387  // read copy offset
388  nassertr(_buffer->get_length() >= (int)sizeof(COPY_offset), false);
389  COPY_offset = patch_reader.get_int32();
390  if (_patch_stream->fail()) {
391  express_cat.error()
392  << "Truncated patch file.\n";
393  return EU_error_file_invalid;
394  }
395 
396  // seek to the copy source pos
397  if (_version_number < 2) {
398  _origfile_stream->seekg(COPY_offset, ios::beg);
399  } else {
400  _origfile_stream->seekg(COPY_offset, ios::cur);
401  }
402  if (_origfile_stream->fail()) {
403  express_cat.error()
404  << "Invalid copy offset in patch file.\n";
405  return EU_error_file_invalid;
406  }
407 
408  if (express_cat.is_spam()) {
409  express_cat.spam()
410  << "COPY: " << COPY_length << " bytes from offset "
411  << COPY_offset << " (from " << _origfile_stream->tellg()
412  << " to " << _write_stream.tellp() << ")"
413  << endl;
414  }
415 
416  // read the copy bytes from original file and write them to output
417  PN_uint32 bytes_left = (PN_uint32)COPY_length;
418 
419  while (bytes_left > 0) {
420  PN_uint32 bytes_this_time = (PN_uint32) min(bytes_left, (PN_uint32) buflen);
421  _origfile_stream->read(_buffer->_buffer, bytes_this_time);
422  if (_origfile_stream->fail()) {
423  express_cat.error()
424  << "Invalid copy length in patch file.\n";
425  return EU_error_file_invalid;
426  }
427  _write_stream.write(_buffer->_buffer, bytes_this_time);
428  bytes_left -= bytes_this_time;
429  }
430  }
431 
432  // if we got a pair of zero-length ADD and COPY blocks, we're done
433  if ((0 == ADD_length) && (0 == COPY_length)) {
434  cleanup();
435 
436  if (express_cat.is_debug()) {
437  express_cat.debug()
438  //<< "result file = " << _result_file_length
439  << " total bytes = " << _total_bytes_processed << endl;
440  }
441 
442  // check the MD5 from the patch file against the newly patched file
443  {
444  HashVal MD5_actual;
445  MD5_actual.hash_file(_output_file);
446  if (_MD5_ofResult != MD5_actual) {
447  // Whoops, patching screwed up somehow.
448  if (_origfile_stream != NULL) {
450  vfs->close_read_file(_origfile_stream);
451  _origfile_stream = NULL;
452  }
453  _write_stream.close();
454 
455  express_cat.info()
456  << "Patching produced incorrect checksum. Got:\n"
457  << " " << MD5_actual
458  << "\nExpected:\n"
459  << " " << _MD5_ofResult
460  << "\n";
461 
462  // This is a fine time to double-check the starting
463  // checksum.
464  if (!has_source_hash()) {
465  express_cat.info()
466  << "No source hash in patch file to verify.\n";
467  } else {
468  HashVal MD5_orig;
469  MD5_orig.hash_file(_orig_file);
470  if (MD5_orig != get_source_hash()) {
471  express_cat.info()
472  << "Started from incorrect source file. Got:\n"
473  << " " << MD5_orig
474  << "\nExpected:\n"
475  << " " << get_source_hash()
476  << "\n";
477  } else {
478  express_cat.info()
479  << "Started from correct source file:\n"
480  << " " << MD5_orig
481  << "\n";
482  }
483  }
484 
485  // delete the temp file and the patch file
486  if (_rename_output_to_orig) {
487  _output_file.unlink();
488  }
489  if (_delete_patchfile) {
490  _patch_file.unlink();
491  }
492  // return "invalid checksum"
493  return EU_error_invalid_checksum;
494  }
495  }
496 
497  // delete the patch file
498  if (_delete_patchfile) {
499  _patch_file.unlink();
500  }
501 
502  // rename the temp file to the original file name
503  if (_rename_output_to_orig) {
504  _orig_file.unlink();
505  if (!_output_file.rename_to(_orig_file)) {
506  express_cat.error()
507  << "Patchfile::run() failed to rename temp file to: " << _orig_file
508  << endl;
509  return EU_error_write_file_rename;
510  }
511  }
512 
513  return EU_success;
514  }
515  }
516 
517  return EU_ok;
518 }
519 
520 ////////////////////////////////////////////////////////////////////
521 // Function: Patchfile::apply
522 // Access: Public
523 // Description: Patches the entire file in one call
524 // returns true on success and false on error
525 //
526 // This version will delete the patch file and overwrite
527 // the original file.
528 ////////////////////////////////////////////////////////////////////
529 bool Patchfile::
530 apply(Filename &patch_file, Filename &file) {
531  int ret = initiate(patch_file, file);
532  if (ret < 0)
533  return false;
534  for (;;) {
535  ret = run();
536  if (ret == EU_success)
537  return true;
538  if (ret < 0)
539  return false;
540  }
541  return false;
542 }
543 
544 ////////////////////////////////////////////////////////////////////
545 // Function: Patchfile::apply
546 // Access: Public
547 // Description: Patches the entire file in one call
548 // returns true on success and false on error
549 //
550 // This version will not delete any files.
551 ////////////////////////////////////////////////////////////////////
552 bool Patchfile::
553 apply(Filename &patch_file, Filename &orig_file, const Filename &target_file) {
554  int ret = initiate(patch_file, orig_file, target_file);
555  if (ret < 0)
556  return false;
557  for (;;) {
558  ret = run();
559  if (ret == EU_success)
560  return true;
561  if (ret < 0)
562  return false;
563  }
564  return false;
565 }
566 
567 
568 ////////////////////////////////////////////////////////////////////
569 // Function: Patchfile::internal_read_header
570 // Access: Private
571 // Description: Reads the header and leaves the patch file open.
572 ////////////////////////////////////////////////////////////////////
573 int Patchfile::
574 internal_read_header(const Filename &patch_file) {
575  // Open the patch file for read
577  nassertr(_patch_stream == NULL, EU_error_abort);
578  _patch_file = patch_file;
579  _patch_file.set_binary();
580  _patch_stream = vfs->open_read_file(_patch_file, true);
581  if (_patch_stream == NULL) {
582  express_cat.error()
583  << "Patchfile::initiate() - Failed to open file: " << _patch_file << endl;
584  return get_write_error();
585  }
586 
587  /////////////
588  // read header, make sure the patch file is valid
589 
590  StreamReader patch_reader(*_patch_stream);
591 
592  // check the magic number
593  nassertr(_buffer->get_length() >= _v0_header_length, false);
594  PN_uint32 magic_number = patch_reader.get_uint32();
595  if (magic_number != _magic_number && magic_number != _v0_magic_number) {
596  express_cat.error()
597  << "Invalid patch file: " << _patch_file << endl;
598  return EU_error_file_invalid;
599  }
600 
601  _version_number = 0;
602  if (magic_number != _v0_magic_number) {
603  _version_number = patch_reader.get_uint16();
604  }
605  if (_version_number > _current_version) {
606  express_cat.error()
607  << "Can't read version " << _version_number << " patch files: "
608  << _patch_file << endl;
609  return EU_error_file_invalid;
610  }
611 
612  if (_version_number >= 1) {
613  // Get the length of the source file.
614  /*PN_uint32 source_file_length =*/ patch_reader.get_uint32();
615 
616  // get the MD5 of the source file.
617  _MD5_ofSource.read_stream(patch_reader);
618  }
619 
620  // get the length of the patched result file
621  _total_bytes_to_process = patch_reader.get_uint32();
622 
623  // get the MD5 of the resultant patched file
624  _MD5_ofResult.read_stream(patch_reader);
625 
626  express_cat.debug()
627  << "Patchfile::initiate() - valid patchfile" << endl;
628 
629  return EU_success;
630 }
631 
632 ////////////////////////////////////////////////////////////////////
633 ///// PATCH FILE BUILDING MEMBER FUNCTIONS
634 ////////////////////////////////////////////////////////////////////
635 
636 ////////////////////////////////////////////////////////////////////
637 // Function: Patchfile::calc_hash
638 // Access: Private
639 // Description:
640 ////////////////////////////////////////////////////////////////////
641 PN_uint32 Patchfile::
642 calc_hash(const char *buffer) {
643 #ifdef USE_MD5_FOR_HASHTABLE_INDEX_VALUES
644  HashVal hash;
645  hash.hash_buffer(buffer, _footprint_length);
646 
647  //cout << PN_uint16(hash.get_value(0)) << " ";
648 
649  return PN_uint16(hash.get_value(0));
650 #else
651  PN_uint32 hash_value = 0;
652 
653  for(int i = 0; i < (int)_footprint_length; i++) {
654  // this is probably not such a good hash. to be replaced
655  /// --> TRIED MD5, was not worth it for the execution-time hit on 800Mhz PC
656  hash_value ^= PN_uint32(*buffer) << ((i * 2) % Patchfile::_HASH_BITS);
657  buffer++;
658  }
659 
660  // use the bits that overflowed past the end of the hash bit range
661  // (this is intended for _HASH_BITS == 24)
662  hash_value ^= (hash_value >> Patchfile::_HASH_BITS);
663 
664  //cout << hash_value << " ";
665 
666  return hash_value & _HASH_MASK;
667 #endif
668 }
669 
670 ////////////////////////////////////////////////////////////////////
671 // Function: Patchfile::build_hash_link_tables
672 // Access: Private
673 // Description:
674 // The hash and link tables allow for a quick, linear
675 // search of all locations in the file that begin with
676 // a particular sequence of bytes, or "footprint."
677 //
678 // The hash table is a table of offsets into the file,
679 // with one entry for every possible footprint hash
680 // value. For a hash of a footprint, the entry at the
681 // offset of the hash value provides an initial location
682 // in the file that has a matching footprint.
683 //
684 // The link table is a large linked list of file offsets,
685 // with one entry for every byte in the file. Each offset
686 // in the link table will point to another offset that
687 // has the same footprint at the corresponding offset in the
688 // actual file. Starting with an offset taken from the hash
689 // table, one can rapidly produce a list of offsets that
690 // all have the same footprint.
691 ////////////////////////////////////////////////////////////////////
692 void Patchfile::
693 build_hash_link_tables(const char *buffer_orig, PN_uint32 length_orig,
694  PN_uint32 *hash_table, PN_uint32 *link_table) {
695 
696  PN_uint32 i;
697 
698  // clear hash table
699  for(i = 0; i < _HASHTABLESIZE; i++) {
700  hash_table[i] = _NULL_VALUE;
701  }
702 
703  // clear link table
704  for(i = 0; i < length_orig; i++) {
705  link_table[i] = _NULL_VALUE;
706  }
707 
708  if(length_orig < _footprint_length) return;
709 
710  // run through original file and hash each footprint
711  for(i = 0; i < (length_orig - _footprint_length); i++) {
712 
713  PN_uint32 hash_value = calc_hash(&buffer_orig[i]);
714 
715  // we must now store this file index in the hash table
716  // at the offset of the hash value
717 
718  // to account for multiple file offsets with identical
719  // hash values, there is a link table with an entry for
720  // every footprint in the file. We create linked lists
721  // of offsets in the link table.
722 
723  // first, set the value in the link table for the current
724  // offset to whatever the current list head is (the
725  // value in the hash table) (note that this only works
726  // because the hash and link tables both use
727  // _NULL_VALUE to indicate a null index)
728  link_table[i] = hash_table[hash_value];
729 
730  // set the new list head; store the current offset in the
731  // hash table at the offset of the footprint's hash value
732  hash_table[hash_value] = i;
733 
734  /*
735  if (_NULL_VALUE == hash_table[hash_value]) {
736  // hash entry is empty, store this offset
737  hash_table[hash_value] = i;
738  } else {
739  // hash entry is taken, go to the link table
740  PN_uint32 link_offset = hash_table[hash_value];
741 
742  while (_NULL_VALUE != link_table[link_offset]) {
743  link_offset = link_table[link_offset];
744  }
745  link_table[link_offset] = i;
746  }
747  */
748  }
749 }
750 
751 ////////////////////////////////////////////////////////////////////
752 // Function: Patchfile::calc_match_length
753 // Access: Private
754 // Description:
755 // This function calculates the length of a match between
756 // two strings of bytes
757 ////////////////////////////////////////////////////////////////////
758 PN_uint32 Patchfile::
759 calc_match_length(const char* buf1, const char* buf2, PN_uint32 max_length,
760  PN_uint32 min_length) {
761  // early out: look ahead and sample the end of the minimum range
762  if (min_length > 2) {
763  if (min_length >= max_length)
764  return 0;
765  if (buf1[min_length] != buf2[min_length] ||
766  buf1[min_length-1] != buf2[min_length-1] ||
767  buf1[min_length-2] != buf2[min_length-2]) {
768  return 0;
769  }
770  }
771 
772  PN_uint32 length = 0;
773  while ((length < max_length) && (*buf1 == *buf2)) {
774  buf1++, buf2++, length++;
775  }
776  return length;
777 }
778 
779 ////////////////////////////////////////////////////////////////////
780 // Function: Patchfile::find_longest_match
781 // Access: Private
782 // Description:
783 // This function will find the longest string in the
784 // original file that matches a string in the new file.
785 ////////////////////////////////////////////////////////////////////
786 void Patchfile::
787 find_longest_match(PN_uint32 new_pos, PN_uint32 &copy_pos, PN_uint16 &copy_length,
788  PN_uint32 *hash_table, PN_uint32 *link_table, const char* buffer_orig,
789  PN_uint32 length_orig, const char* buffer_new, PN_uint32 length_new) {
790 
791  // set length to a safe value
792  copy_length = 0;
793 
794  // get offset of matching string (in orig file) from hash table
795  PN_uint32 hash_value = calc_hash(&buffer_new[new_pos]);
796 
797  // if no match, bail
798  if (_NULL_VALUE == hash_table[hash_value])
799  return;
800 
801  copy_pos = hash_table[hash_value];
802 
803  // calc match length
804  copy_length = (PN_uint16)calc_match_length(&buffer_new[new_pos],
805  &buffer_orig[copy_pos],
806  min(min((length_new - new_pos),
807  (length_orig - copy_pos)),
808  _MAX_RUN_LENGTH),
809  0);
810 
811  // run through link table, see if we find any longer matches
812  PN_uint32 match_offset;
813  PN_uint16 match_length;
814  match_offset = link_table[copy_pos];
815 
816  while (match_offset != _NULL_VALUE) {
817  match_length = (PN_uint16)calc_match_length(&buffer_new[new_pos],
818  &buffer_orig[match_offset],
819  min(min((length_new - new_pos),
820  (length_orig - match_offset)),
821  _MAX_RUN_LENGTH),
822  copy_length);
823 
824  // have we found a longer match?
825  if (match_length > copy_length) {
826  copy_pos = match_offset;
827  copy_length = match_length;
828  }
829 
830  // traverse the link table
831  match_offset = link_table[match_offset];
832  }
833 }
834 
835 ////////////////////////////////////////////////////////////////////
836 // Function: Patchfile::emit_ADD
837 // Access: Private
838 // Description:
839 ////////////////////////////////////////////////////////////////////
840 void Patchfile::
841 emit_ADD(ostream &write_stream, PN_uint32 length, const char* buffer) {
842  nassertv(length == (PN_uint16)length); //we only write a uint16
843 
844  if (express_cat.is_spam()) {
845  express_cat.spam()
846  << "ADD: " << length << " (to " << _add_pos << ")" << endl;
847  }
848 
849  // write ADD length
850  StreamWriter patch_writer(write_stream);
851  patch_writer.add_uint16((PN_uint16)length);
852 
853  // if there are bytes to add, add them
854  if (length > 0) {
855  patch_writer.append_data(buffer, (PN_uint16)length);
856  }
857 
858  _add_pos += length;
859 }
860 
861 ////////////////////////////////////////////////////////////////////
862 // Function: Patchfile::emit_COPY
863 // Access: Private
864 // Description:
865 ////////////////////////////////////////////////////////////////////
866 void Patchfile::
867 emit_COPY(ostream &write_stream, PN_uint32 length, PN_uint32 copy_pos) {
868  nassertv(length == (PN_uint16)length); //we only write a uint16
869 
870  PN_int32 offset = (int)copy_pos - (int)_last_copy_pos;
871  if (express_cat.is_spam()) {
872  express_cat.spam()
873  << "COPY: " << length << " bytes from offset " << offset
874  << " (from " << copy_pos << " to " << _add_pos << ")" << endl;
875  }
876 
877  // write COPY length
878  StreamWriter patch_writer(write_stream);
879  patch_writer.add_uint16((PN_uint16)length);
880 
881  if ((PN_uint16)length != 0) {
882  // write COPY offset
883  patch_writer.add_int32(offset);
884  _last_copy_pos = copy_pos + length;
885  }
886 
887  _add_pos += length;
888 }
889 
890 ////////////////////////////////////////////////////////////////////
891 // Function: Patchfile::emit_add_and_copy
892 // Access: Private
893 // Description: Emits an add/copy pair. If necessary, repeats the
894 // pair as needed to work around the 16-bit chunk size
895 // limit.
896 ////////////////////////////////////////////////////////////////////
897 void Patchfile::
898 emit_add_and_copy(ostream &write_stream,
899  PN_uint32 add_length, const char *add_buffer,
900  PN_uint32 copy_length, PN_uint32 copy_pos) {
901  if (add_length == 0 && copy_length == 0) {
902  // Don't accidentally emit a termination code.
903  return;
904  }
905 
906  static const PN_uint16 max_write = 65535;
907  while (add_length > max_write) {
908  // Overflow. This chunk is too large to fit into a single
909  // ADD block, so we have to write it as multiple ADDs.
910  emit_ADD(write_stream, max_write, add_buffer);
911  add_buffer += max_write;
912  add_length -= max_write;
913  emit_COPY(write_stream, 0, 0);
914  }
915 
916  emit_ADD(write_stream, add_length, add_buffer);
917 
918  while (copy_length > max_write) {
919  // Overflow.
920  emit_COPY(write_stream, max_write, copy_pos);
921  copy_pos += max_write;
922  copy_length -= max_write;
923  emit_ADD(write_stream, 0, NULL);
924  }
925 
926  emit_COPY(write_stream, copy_length, copy_pos);
927 }
928 
929 ////////////////////////////////////////////////////////////////////
930 // Function: Patchfile::cache_add_and_copy
931 // Access: Private
932 // Description: Potentially emits one or more add/copy pairs. The
933 // current state is saved, so as to minimize wasted
934 // emits from consecutive adds or copies.
935 ////////////////////////////////////////////////////////////////////
936 void Patchfile::
937 cache_add_and_copy(ostream &write_stream,
938  PN_uint32 add_length, const char *add_buffer,
939  PN_uint32 copy_length, PN_uint32 copy_pos) {
940  if (add_length != 0) {
941  if (_cache_copy_length != 0) {
942  // Have to flush.
943  cache_flush(write_stream);
944  }
945  // Add the string to the current cache.
946  _cache_add_data += string(add_buffer, add_length);
947  }
948 
949  if (copy_length != 0) {
950  if (_cache_copy_length == 0) {
951  // Start a new copy phase.
952  _cache_copy_start = copy_pos;
953  _cache_copy_length = copy_length;
954 
955  } else if (_cache_copy_start + _cache_copy_length == copy_pos) {
956  // We can just tack on the copy to what we've already got.
957  _cache_copy_length += copy_length;
958 
959  } else {
960  // It's a discontinuous copy. We have to flush.
961  cache_flush(write_stream);
962  _cache_copy_start = copy_pos;
963  _cache_copy_length = copy_length;
964  }
965  }
966 }
967 
968 ////////////////////////////////////////////////////////////////////
969 // Function: Patchfile::cache_flush
970 // Access: Private
971 // Description: Closes any copy or add phases that are still open
972 // after a previous call to cache_add_and_copy().
973 ////////////////////////////////////////////////////////////////////
974 void Patchfile::
975 cache_flush(ostream &write_stream) {
976  emit_add_and_copy(write_stream,
977  _cache_add_data.size(), _cache_add_data.data(),
978  _cache_copy_length, _cache_copy_start);
979  _cache_add_data = string();
980  _cache_copy_length = 0;
981 }
982 
983 
984 ////////////////////////////////////////////////////////////////////
985 // Function: Patchfile::write_header
986 // Access: Private
987 // Description:
988 // Writes the patchfile header.
989 ////////////////////////////////////////////////////////////////////
990 void Patchfile::
991 write_header(ostream &write_stream,
992  istream &stream_orig, istream &stream_new) {
993  // prepare to write the patch file header
994 
995  // write the patch file header
996  StreamWriter patch_writer(write_stream);
997  patch_writer.add_uint32(_magic_number);
998  patch_writer.add_uint16(_current_version);
999 
1000  stream_orig.seekg(0, ios::end);
1001  streampos source_file_length = stream_orig.tellg();
1002  patch_writer.add_uint32((PN_uint32)source_file_length);
1003 
1004  // calc MD5 of original file
1005  _MD5_ofSource.hash_stream(stream_orig);
1006  // add it to the header
1007  _MD5_ofSource.write_stream(patch_writer);
1008 
1009  if (express_cat.is_debug()) {
1010  express_cat.debug()
1011  << "Orig: " << _MD5_ofSource << "\n";
1012  }
1013 
1014  stream_new.seekg(0, ios::end);
1015  streampos result_file_length = stream_new.tellg();
1016  patch_writer.add_uint32((PN_uint32)result_file_length);
1017 
1018  // calc MD5 of resultant patched file
1019  _MD5_ofResult.hash_stream(stream_new);
1020  // add it to the header
1021  _MD5_ofResult.write_stream(patch_writer);
1022 
1023  if (express_cat.is_debug()) {
1024  express_cat.debug()
1025  << " New: " << _MD5_ofResult << "\n";
1026  }
1027 }
1028 
1029 ////////////////////////////////////////////////////////////////////
1030 // Function: Patchfile::write_terminator
1031 // Access: Private
1032 // Description: Writes the patchfile terminator.
1033 ////////////////////////////////////////////////////////////////////
1034 void Patchfile::
1035 write_terminator(ostream &write_stream) {
1036  cache_flush(write_stream);
1037  // write terminator (null ADD, null COPY)
1038  emit_ADD(write_stream, 0, NULL);
1039  emit_COPY(write_stream, 0, 0);
1040 }
1041 
1042 ////////////////////////////////////////////////////////////////////
1043 // Function: Patchfile::compute_file_patches
1044 // Access: Private
1045 // Description: Computes the patches for the entire file (if it is
1046 // not a multifile) or for a single subfile (if it is)
1047 //
1048 // Returns true if successful, false on error.
1049 ////////////////////////////////////////////////////////////////////
1050 bool Patchfile::
1051 compute_file_patches(ostream &write_stream,
1052  PN_uint32 offset_orig, PN_uint32 offset_new,
1053  istream &stream_orig, istream &stream_new) {
1054  // read in original file
1055  stream_orig.seekg(0, ios::end);
1056  nassertr(stream_orig, false);
1057  PN_uint32 source_file_length = stream_orig.tellg();
1058  if (express_cat.is_debug()) {
1059  express_cat.debug()
1060  << "Allocating " << source_file_length << " bytes to read orig\n";
1061  }
1062 
1063  char *buffer_orig = (char *)PANDA_MALLOC_ARRAY(source_file_length);
1064  stream_orig.seekg(0, ios::beg);
1065  stream_orig.read(buffer_orig, source_file_length);
1066 
1067  // read in new file
1068  stream_new.seekg(0, ios::end);
1069  PN_uint32 result_file_length = stream_new.tellg();
1070  nassertr(stream_new, false);
1071  if (express_cat.is_debug()) {
1072  express_cat.debug()
1073  << "Allocating " << result_file_length << " bytes to read new\n";
1074  }
1075 
1076  char *buffer_new = (char *)PANDA_MALLOC_ARRAY(result_file_length);
1077  stream_new.seekg(0, ios::beg);
1078  stream_new.read(buffer_new, result_file_length);
1079 
1080  // allocate hash/link tables
1081  if (_hash_table == (PN_uint32 *)NULL) {
1082  if (express_cat.is_debug()) {
1083  express_cat.debug()
1084  << "Allocating hashtable of size " << _HASHTABLESIZE << " * 4\n";
1085  }
1086  _hash_table = (PN_uint32 *)PANDA_MALLOC_ARRAY(_HASHTABLESIZE * sizeof(PN_uint32));
1087  }
1088 
1089  if (express_cat.is_debug()) {
1090  express_cat.debug()
1091  << "Allocating linktable of size " << source_file_length << " * 4\n";
1092  }
1093 
1094  PN_uint32 *link_table = (PN_uint32 *)PANDA_MALLOC_ARRAY(source_file_length * sizeof(PN_uint32));
1095 
1096  // build hash and link tables for original file
1097  build_hash_link_tables(buffer_orig, source_file_length, _hash_table, link_table);
1098 
1099  // run through new file
1100 
1101  PN_uint32 new_pos = 0;
1102  PN_uint32 start_pos = new_pos; // this is the position for the start of ADD operations
1103 
1104  if(((PN_uint32) result_file_length) >= _footprint_length)
1105  {
1106  while (new_pos < (result_file_length - _footprint_length)) {
1107 
1108  // find best match for current position
1109  PN_uint32 COPY_pos;
1110  PN_uint16 COPY_length;
1111 
1112  find_longest_match(new_pos, COPY_pos, COPY_length, _hash_table, link_table,
1113  buffer_orig, source_file_length, buffer_new, result_file_length);
1114 
1115  // if no match or match not longer than footprint length, skip to next byte
1116  if (COPY_length < _footprint_length) {
1117  // go to next byte
1118  new_pos++;
1119  } else {
1120  // emit ADD for all skipped bytes
1121  int num_skipped = (int)new_pos - (int)start_pos;
1122  if (express_cat.is_spam()) {
1123  express_cat.spam()
1124  << "build: num_skipped = " << num_skipped
1125  << endl;
1126  }
1127  cache_add_and_copy(write_stream, num_skipped, &buffer_new[start_pos],
1128  COPY_length, COPY_pos + offset_orig);
1129  new_pos += (PN_uint32)COPY_length;
1130  start_pos = new_pos;
1131  }
1132  }
1133  }
1134 
1135  if (express_cat.is_spam()) {
1136  express_cat.spam()
1137  << "build: result_file_length = " << result_file_length
1138  << " start_pos = " << start_pos
1139  << endl;
1140  }
1141 
1142  // are there still more bytes left in the new file?
1143  if (start_pos != result_file_length) {
1144  // emit ADD for all remaining bytes
1145 
1146  PN_uint32 remaining_bytes = result_file_length - start_pos;
1147  cache_add_and_copy(write_stream, remaining_bytes, &buffer_new[start_pos],
1148  0, 0);
1149  start_pos += remaining_bytes;
1150  }
1151 
1152  PANDA_FREE_ARRAY(link_table);
1153 
1154  PANDA_FREE_ARRAY(buffer_orig);
1155  PANDA_FREE_ARRAY(buffer_new);
1156 
1157  return true;
1158 }
1159 
1160 ////////////////////////////////////////////////////////////////////
1161 // Function: Patchfile::compute_mf_patches
1162 // Access: Private
1163 // Description: Computes patches for the files, knowing that they are
1164 // both Panda Multifiles. This will build patches one
1165 // subfile at a time, which can potentially be much,
1166 // much faster for large Multifiles that contain many
1167 // small subfiles.
1168 ////////////////////////////////////////////////////////////////////
1169 bool Patchfile::
1170 compute_mf_patches(ostream &write_stream,
1171  PN_uint32 offset_orig, PN_uint32 offset_new,
1172  istream &stream_orig, istream &stream_new) {
1173  Multifile mf_orig, mf_new;
1174  IStreamWrapper stream_origw(stream_orig);
1175  IStreamWrapper stream_neww(stream_new);
1176  if (!mf_orig.open_read(&stream_origw) ||
1177  !mf_new.open_read(&stream_neww)) {
1178  express_cat.error()
1179  << "Input multifiles appear to be corrupt.\n";
1180  return false;
1181  }
1182 
1183  if (mf_new.needs_repack()) {
1184  express_cat.error()
1185  << "Input multifiles need to be repacked.\n";
1186  return false;
1187  }
1188 
1189  // First, compute the patch for the header / index.
1190 
1191  {
1192  ISubStream index_orig(&stream_origw, 0, mf_orig.get_index_end());
1193  ISubStream index_new(&stream_neww, 0, mf_new.get_index_end());
1194  if (!do_compute_patches("", "",
1195  write_stream, offset_orig, offset_new,
1196  index_orig, index_new)) {
1197  return false;
1198  }
1199  nassertr(_add_pos + _cache_add_data.size() + _cache_copy_length == offset_new + mf_new.get_index_end(), false);
1200  }
1201 
1202  // Now walk through each subfile in the new multifile. If a
1203  // particular subfile exists in both source files, we compute the
1204  // patches for the subfile; for a new subfile, we trivially add it.
1205  // If a subfile has been removed, we simply don't add it (we'll
1206  // never even notice this case).
1207  int new_num_subfiles = mf_new.get_num_subfiles();
1208  for (int ni = 0; ni < new_num_subfiles; ++ni) {
1209  nassertr(_add_pos + _cache_add_data.size() + _cache_copy_length == offset_new + mf_new.get_subfile_internal_start(ni), false);
1210  string name = mf_new.get_subfile_name(ni);
1211  int oi = mf_orig.find_subfile(name);
1212 
1213  if (oi < 0) {
1214  // This is a newly-added subfile. Add it the hard way.
1215  express_cat.info()
1216  << "Adding subfile " << mf_new.get_subfile_name(ni) << "\n";
1217 
1218  streampos new_start = mf_new.get_subfile_internal_start(ni);
1219  size_t new_size = mf_new.get_subfile_internal_length(ni);
1220  char *buffer_new = (char *)PANDA_MALLOC_ARRAY(new_size);
1221  stream_new.seekg(new_start, ios::beg);
1222  stream_new.read(buffer_new, new_size);
1223  cache_add_and_copy(write_stream, new_size, buffer_new, 0, 0);
1224  PANDA_FREE_ARRAY(buffer_new);
1225 
1226  } else {
1227  // This subfile exists in both the original and the new files.
1228  // Patch it.
1229  streampos orig_start = mf_orig.get_subfile_internal_start(oi);
1230  size_t orig_size = mf_orig.get_subfile_internal_length(oi);
1231 
1232  streampos new_start = mf_new.get_subfile_internal_start(ni);
1233  size_t new_size = mf_new.get_subfile_internal_length(ni);
1234 
1235  if (!patch_subfile(write_stream, offset_orig, offset_new,
1236  mf_new.get_subfile_name(ni),
1237  stream_origw, orig_start, orig_start + (streampos)orig_size,
1238  stream_neww, new_start, new_start + (streampos)new_size)) {
1239  return false;
1240  }
1241  }
1242  }
1243 
1244  return true;
1245 }
1246 
1247 #ifdef HAVE_TAR
1248 ////////////////////////////////////////////////////////////////////
1249 // Function: Patchfile::read_tar
1250 // Access: Private
1251 // Description: Uses libtar to extract the location within the tar
1252 // file of each of the subfiles. Returns true if the
1253 // tar file is read successfully, false if there is an
1254 // error (e.g. it is not a tar file).
1255 ////////////////////////////////////////////////////////////////////
1256 bool Patchfile::
1257 read_tar(TarDef &tar, istream &stream) {
1258  TAR *tfile;
1259  tartype_t tt;
1260  tt.openfunc = tar_openfunc;
1261  tt.closefunc = tar_closefunc;
1262  tt.readfunc = tar_readfunc;
1263  tt.writefunc = tar_writefunc;
1264 
1265  stream.seekg(0, ios::beg);
1266  nassertr(_tar_istream == NULL, false);
1267  _tar_istream = &stream;
1268  if (tar_open(&tfile, (char *)"dummy", &tt, O_RDONLY, 0, 0) != 0) {
1269  _tar_istream = NULL;
1270  return false;
1271  }
1272 
1273  // Walk through the tar file, noting the current file position as we
1274  // reach each subfile. Use this information to infer the start and
1275  // end of each subfile within the stream.
1276 
1277  streampos last_pos = 0;
1278  int flag = th_read(tfile);
1279  while (flag == 0) {
1280  TarSubfile subfile;
1281  subfile._name = th_get_pathname(tfile);
1282  subfile._header_start = last_pos;
1283  subfile._data_start = stream.tellg();
1284  subfile._data_end = subfile._data_start + (streampos)th_get_size(tfile);
1285  tar_skip_regfile(tfile);
1286  subfile._end = stream.tellg();
1287  tar.push_back(subfile);
1288 
1289  last_pos = subfile._end;
1290  flag = th_read(tfile);
1291  }
1292 
1293  // Create one more "subfile" for the bytes at the tail of the file.
1294  // This subfile has no name.
1295  TarSubfile subfile;
1296  subfile._header_start = last_pos;
1297  stream.clear();
1298  stream.seekg(0, ios::end);
1299  subfile._data_start = stream.tellg();
1300  subfile._data_end = subfile._data_start;
1301  subfile._end = subfile._data_start;
1302  tar.push_back(subfile);
1303 
1304  tar_close(tfile);
1305  _tar_istream = NULL;
1306  return (flag == 1);
1307 }
1308 #endif // HAVE_TAR
1309 
1310 #ifdef HAVE_TAR
1311 ////////////////////////////////////////////////////////////////////
1312 // Function: Patchfile::compute_tar_patches
1313 // Access: Private
1314 // Description: Computes patches for the files, knowing that they are
1315 // both tar files. This is similar to
1316 // compute_mf_patches().
1317 //
1318 // The tar indexes should have been built up by a
1319 // previous call to read_tar().
1320 ////////////////////////////////////////////////////////////////////
1321 bool Patchfile::
1322 compute_tar_patches(ostream &write_stream,
1323  PN_uint32 offset_orig, PN_uint32 offset_new,
1324  istream &stream_orig, istream &stream_new,
1325  TarDef &tar_orig, TarDef &tar_new) {
1326 
1327  // Sort the orig list by filename, so we can quickly look up files
1328  // from the new list.
1329  tar_orig.sort();
1330 
1331  // However, it is important to keep the new list in its original,
1332  // on-disk order.
1333 
1334  // Walk through each subfile in the new tar file. If a particular
1335  // subfile exists in both source files, we compute the patches for
1336  // the subfile; for a new subfile, we trivially add it. If a
1337  // subfile has been removed, we simply don't add it (we'll never
1338  // even notice this case).
1339 
1340  IStreamWrapper stream_origw(stream_orig);
1341  IStreamWrapper stream_neww(stream_new);
1342 
1343  TarDef::const_iterator ni;
1344  streampos last_pos = 0;
1345  for (ni = tar_new.begin(); ni != tar_new.end(); ++ni) {
1346  const TarSubfile &sf_new =(*ni);
1347  nassertr(sf_new._header_start == last_pos, false);
1348 
1349  TarDef::const_iterator oi = tar_orig.find(sf_new);
1350 
1351  if (oi == tar_orig.end()) {
1352  // This is a newly-added subfile. Add it the hard way.
1353  express_cat.info()
1354  << "Adding subfile " << sf_new._name << "\n";
1355 
1356  streampos new_start = sf_new._header_start;
1357  size_t new_size = sf_new._end - sf_new._header_start;
1358  char *buffer_new = (char *)PANDA_MALLOC_ARRAY(new_size);
1359  stream_new.seekg(new_start, ios::beg);
1360  stream_new.read(buffer_new, new_size);
1361  cache_add_and_copy(write_stream, new_size, buffer_new, 0, 0);
1362  PANDA_FREE_ARRAY(buffer_new);
1363 
1364  } else {
1365  // This subfile exists in both the original and the new files.
1366  // Patch it.
1367  const TarSubfile &sf_orig =(*oi);
1368 
1369  // We patch the header and data of the file separately, so we
1370  // can accurately detect nested multifiles. The extra data at
1371  // the end of the file (possibly introduced by a tar file's
1372  // blocking) is the footer, which is also patched separately.
1373  if (!patch_subfile(write_stream, offset_orig, offset_new, "",
1374  stream_origw, sf_orig._header_start, sf_orig._data_start,
1375  stream_neww, sf_new._header_start, sf_new._data_start)) {
1376  return false;
1377  }
1378 
1379  if (!patch_subfile(write_stream, offset_orig, offset_new, sf_new._name,
1380  stream_origw, sf_orig._data_start, sf_orig._data_end,
1381  stream_neww, sf_new._data_start, sf_new._data_end)) {
1382  return false;
1383  }
1384 
1385  if (!patch_subfile(write_stream, offset_orig, offset_new, "",
1386  stream_origw, sf_orig._data_end, sf_orig._end,
1387  stream_neww, sf_new._data_end, sf_new._end)) {
1388  return false;
1389  }
1390  }
1391 
1392  last_pos = sf_new._end;
1393  }
1394 
1395  return true;
1396 }
1397 #endif // HAVE_TAR
1398 
1399 #ifdef HAVE_TAR
1400 ////////////////////////////////////////////////////////////////////
1401 // Function: Patchfile::tar_openfunc
1402 // Access: Private, Static
1403 // Description: A callback function to redirect libtar to read from
1404 // our istream instead of using low-level Unix I/O.
1405 ////////////////////////////////////////////////////////////////////
1406 int Patchfile::
1407 tar_openfunc(const char *, int, ...) {
1408  // Since we don't actually open a file--the stream is already
1409  // open--we do nothing here.
1410  return 0;
1411 }
1412 #endif // HAVE_TAR
1413 
1414 #ifdef HAVE_TAR
1415 ////////////////////////////////////////////////////////////////////
1416 // Function: Patchfile::tar_closefunc
1417 // Access: Private, Static
1418 // Description: A callback function to redirect libtar to read from
1419 // our istream instead of using low-level Unix I/O.
1420 ////////////////////////////////////////////////////////////////////
1421 int Patchfile::
1422 tar_closefunc(int) {
1423  // Since we don't actually open a file, no need to close it either.
1424  return 0;
1425 }
1426 #endif // HAVE_TAR
1427 
1428 #ifdef HAVE_TAR
1429 ////////////////////////////////////////////////////////////////////
1430 // Function: Patchfile::tar_readfunc
1431 // Access: Private, Static
1432 // Description: A callback function to redirect libtar to read from
1433 // our istream instead of using low-level Unix I/O.
1434 ////////////////////////////////////////////////////////////////////
1435 ssize_t Patchfile::
1436 tar_readfunc(int, void *buffer, size_t nbytes) {
1437  nassertr(_tar_istream != NULL, 0);
1438  _tar_istream->read((char *)buffer, nbytes);
1439  return (ssize_t)_tar_istream->gcount();
1440 }
1441 #endif // HAVE_TAR
1442 
1443 #ifdef HAVE_TAR
1444 ////////////////////////////////////////////////////////////////////
1445 // Function: Patchfile::tar_writefunc
1446 // Access: Private, Static
1447 // Description: A callback function to redirect libtar to read from
1448 // our istream instead of using low-level Unix I/O.
1449 ////////////////////////////////////////////////////////////////////
1450 ssize_t Patchfile::
1451 tar_writefunc(int, const void *, size_t) {
1452  // Since we use libtar only for reading, it is an error if this
1453  // method gets called.
1454  nassertr(false, -1);
1455  return -1;
1456 }
1457 #endif // HAVE_TAR
1458 
1459 ////////////////////////////////////////////////////////////////////
1460 // Function: Patchfile::build
1461 // Access: Public
1462 // Description:
1463 // This implementation uses the "greedy differencing
1464 // algorithm" described in the masters thesis
1465 // "Differential Compression: A Generalized Solution
1466 // for Binary Files" by Randal C. Burns (p.13).
1467 // For an original file of size M and a new file of
1468 // size N, this algorithm is O(M) in space and
1469 // O(M*N) (worst-case) in time.
1470 // return false on error
1471 ////////////////////////////////////////////////////////////////////
1472 bool Patchfile::
1473 build(Filename file_orig, Filename file_new, Filename patch_name) {
1474  patch_name.set_binary();
1475 
1476  // Open the original file for read
1477  pifstream stream_orig;
1478  file_orig.set_binary();
1479  if (!file_orig.open_read(stream_orig)) {
1480  express_cat.error()
1481  << "Patchfile::build() - Failed to open file: " << file_orig << endl;
1482  return false;
1483  }
1484 
1485  // Open the new file for read
1486  pifstream stream_new;
1487  file_new.set_binary();
1488  if (!file_new.open_read(stream_new)) {
1489  express_cat.error()
1490  << "Patchfile::build() - Failed to open file: " << file_new << endl;
1491  return false;
1492  }
1493 
1494  // Open patch file for write
1495  pofstream write_stream;
1496  if (!patch_name.open_write(write_stream)) {
1497  express_cat.error()
1498  << "Patchfile::build() - Failed to open file: " << patch_name << endl;
1499  return false;
1500  }
1501 
1502  _last_copy_pos = 0;
1503  _add_pos = 0;
1504  _cache_add_data = string();
1505  _cache_copy_start = 0;
1506  _cache_copy_length = 0;
1507 
1508  write_header(write_stream, stream_orig, stream_new);
1509 
1510  if (!do_compute_patches(file_orig, file_new,
1511  write_stream, 0, 0,
1512  stream_orig, stream_new)) {
1513  return false;
1514  }
1515 
1516  write_terminator(write_stream);
1517 
1518  if (express_cat.is_debug()) {
1519  express_cat.debug()
1520  << "Patch file will generate " << _add_pos << "-byte file.\n";
1521  }
1522 
1523 #ifndef NDEBUG
1524  {
1525  // Make sure the resulting file would be the right size.
1526  stream_new.seekg(0, ios::end);
1527  streampos result_file_length = stream_new.tellg();
1528  nassertr(_add_pos == result_file_length, false);
1529  }
1530 #endif // NDEBUG
1531 
1532  return (_last_copy_pos != 0);
1533 }
1534 
1535 ////////////////////////////////////////////////////////////////////
1536 // Function: Patchfile::do_compute_patches
1537 // Access: Private
1538 // Description: Computes the patches for the indicated A to B files,
1539 // or subfiles. Checks for multifiles or tar files
1540 // before falling back to whole-file patching.
1541 ////////////////////////////////////////////////////////////////////
1542 bool Patchfile::
1543 do_compute_patches(const Filename &file_orig, const Filename &file_new,
1544  ostream &write_stream,
1545  PN_uint32 offset_orig, PN_uint32 offset_new,
1546  istream &stream_orig, istream &stream_new) {
1547  nassertr(_add_pos + _cache_add_data.size() + _cache_copy_length == offset_new, false);
1548 
1549  // Check whether our input files are Panda multifiles or tar files.
1550  bool is_multifile = false;
1551 #ifdef HAVE_TAR
1552  bool is_tarfile = false;
1553  TarDef tar_orig, tar_new;
1554 #endif // HAVE_TAR
1555 
1556  if (_allow_multifile) {
1557  if (strstr(file_orig.get_basename().c_str(), ".mf") != NULL ||
1558  strstr(file_new.get_basename().c_str(), ".mf") != NULL) {
1559  // Read the first n bytes of both files for the Multifile magic
1560  // number.
1561  string magic_number = Multifile::get_magic_number();
1562  char *buffer = (char *)PANDA_MALLOC_ARRAY(magic_number.size());
1563  stream_orig.seekg(0, ios::beg);
1564  stream_orig.read(buffer, magic_number.size());
1565 
1566  if (stream_orig.gcount() == (int)magic_number.size() &&
1567  memcmp(buffer, magic_number.data(), magic_number.size()) == 0) {
1568  stream_new.seekg(0, ios::beg);
1569  stream_new.read(buffer, magic_number.size());
1570  if (stream_new.gcount() == (int)magic_number.size() &&
1571  memcmp(buffer, magic_number.data(), magic_number.size()) == 0) {
1572  is_multifile = true;
1573  }
1574  }
1575  PANDA_FREE_ARRAY(buffer);
1576  }
1577 #ifdef HAVE_TAR
1578  if (strstr(file_orig.get_basename().c_str(), ".tar") != NULL ||
1579  strstr(file_new.get_basename().c_str(), ".tar") != NULL) {
1580  if (read_tar(tar_orig, stream_orig) &&
1581  read_tar(tar_new, stream_new)) {
1582  is_tarfile = true;
1583  }
1584  }
1585 #endif // HAVE_TAR
1586  }
1587 
1588  if (is_multifile) {
1589  if (express_cat.is_debug()) {
1590  express_cat.debug()
1591  << file_orig.get_basename() << " appears to be a Panda Multifile.\n";
1592  }
1593  if (!compute_mf_patches(write_stream, offset_orig, offset_new,
1594  stream_orig, stream_new)) {
1595  return false;
1596  }
1597 #ifdef HAVE_TAR
1598  } else if (is_tarfile) {
1599  if (express_cat.is_debug()) {
1600  express_cat.debug()
1601  << file_orig.get_basename() << " appears to be a tar file.\n";
1602  }
1603  if (!compute_tar_patches(write_stream, offset_orig, offset_new,
1604  stream_orig, stream_new, tar_orig, tar_new)) {
1605  return false;
1606  }
1607 #endif // HAVE_TAR
1608  } else {
1609  if (express_cat.is_debug()) {
1610  express_cat.debug()
1611  << file_orig.get_basename() << " is not a multifile.\n";
1612  }
1613  if (!compute_file_patches(write_stream, offset_orig, offset_new,
1614  stream_orig, stream_new)) {
1615  return false;
1616  }
1617  }
1618 
1619  return true;
1620 }
1621 
1622 ////////////////////////////////////////////////////////////////////
1623 // Function: Patchfile::patch_subfile
1624 // Access: Private
1625 // Description: Generates patches for a nested subfile of a Panda
1626 // Multifile or a tar file.
1627 ////////////////////////////////////////////////////////////////////
1628 bool Patchfile::
1629 patch_subfile(ostream &write_stream,
1630  PN_uint32 offset_orig, PN_uint32 offset_new,
1631  const Filename &filename,
1632  IStreamWrapper &stream_orig, streampos orig_start, streampos orig_end,
1633  IStreamWrapper &stream_new, streampos new_start, streampos new_end) {
1634  nassertr(_add_pos + _cache_add_data.size() + _cache_copy_length == offset_new + new_start, false);
1635 
1636  size_t new_size = new_end - new_start;
1637  size_t orig_size = orig_end - orig_start;
1638 
1639  ISubStream subfile_orig(&stream_orig, orig_start, orig_end);
1640  ISubStream subfile_new(&stream_new, new_start, new_end);
1641 
1642  bool is_unchanged = false;
1643  if (orig_size == new_size) {
1644  HashVal hash_orig, hash_new;
1645  hash_orig.hash_stream(subfile_orig);
1646  hash_new.hash_stream(subfile_new);
1647 
1648  if (hash_orig == hash_new) {
1649  // Actually, the subfile is unchanged; just emit it.
1650  is_unchanged = true;
1651  }
1652  }
1653 
1654  if (is_unchanged) {
1655  if (express_cat.is_debug() && !filename.empty()) {
1656  express_cat.debug()
1657  << "Keeping subfile " << filename << "\n";
1658  }
1659  cache_add_and_copy(write_stream, 0, NULL,
1660  orig_size, offset_orig + orig_start);
1661 
1662  } else {
1663  if (!filename.empty()) {
1664  express_cat.info()
1665  << "Patching subfile " << filename << "\n";
1666  }
1667 
1668  if (!do_compute_patches(filename, filename, write_stream,
1669  offset_orig + orig_start, offset_new + new_start,
1670  subfile_orig, subfile_new)) {
1671  return false;
1672  }
1673  }
1674 
1675  return true;
1676 }
1677 
1678 #endif // HAVE_OPENSSL
A StreamWriter object is used to write sequential binary data directly to an ostream.
Definition: streamWriter.h:33
string get_basename() const
Returns the basename part of the filename.
Definition: filename.I:436
bool open_write(ofstream &stream, bool truncate=true) const
Opens the indicated ifstream for writing the file, if possible.
Definition: filename.cxx:2045
size_t get_subfile_internal_length(int index) const
Returns the number of bytes the indicated subfile consumes within the archive.
Definition: multifile.cxx:1818
A hierarchy of directories and files that appears to be one continuous file system, even though the files may originate from several different sources that may not be related to the actual OS&#39;s file system.
bool needs_repack() const
Returns true if the Multifile index is suboptimal and should be repacked.
Definition: multifile.I:71
istream * open_read_file(const Filename &filename, bool auto_unwrap) const
Convenience function; returns a newly allocated istream if the file exists and can be read...
void set_binary()
Indicates that the filename represents a binary file.
Definition: filename.I:494
int find_subfile(const string &subfile_name) const
Returns the index of the subfile with the indicated name, or -1 if the named subfile is not within th...
Definition: multifile.cxx:1561
streampos get_index_end() const
Returns the first byte that is guaranteed to follow any index byte already written to disk in the Mul...
Definition: multifile.cxx:1785
Stores a 128-bit value that represents the hashed contents (typically MD5) of a file or buffer...
Definition: hashVal.h:32
static void close_read_file(istream *stream)
Closes a file opened by a previous call to open_read_file().
bool open_read(ifstream &stream) const
Opens the indicated ifstream for reading the file, if possible.
Definition: filename.cxx:2003
static Filename temporary(const string &dirname, const string &prefix, const string &suffix=string(), Type type=T_general)
Generates a temporary filename within the indicated directory, using the indicated prefix...
Definition: filename.cxx:439
Definition: buffer.h:26
An istream object that presents a subwindow into another istream.
Definition: subStream.h:34
The name of a file, such as a texture file or an Egg file.
Definition: filename.h:44
static string get_magic_number()
Returns a string with the first n bytes written to a Multifile, to identify it as a Multifile...
Definition: multifile.I:376
This class provides a locking wrapper around an arbitrary istream pointer.
Definition: streamWrapper.h:53
static VirtualFileSystem * get_global_ptr()
Returns the default global VirtualFileSystem.
bool open_read(const Filename &multifile_name, const streampos &offset=0)
Opens the named Multifile on disk for reading.
Definition: multifile.cxx:190
A file that contains a set of files.
Definition: multifile.h:34
A class to read sequential binary data directly from an istream.
Definition: streamReader.h:30
streampos get_subfile_internal_start(int index) const
Returns the starting byte position within the Multifile at which the indicated subfile begins...
Definition: multifile.cxx:1801
const string & get_subfile_name(int index) const
Returns the name of the nth subfile.
Definition: multifile.cxx:1686
int get_num_subfiles() const
Returns the number of subfiles within the Multifile.
Definition: multifile.cxx:1549