Panda3D
patchfile.cxx
Go to the documentation of this file.
1 /**
2  * PANDA 3D SOFTWARE
3  * Copyright (c) Carnegie Mellon University. All rights reserved.
4  *
5  * All use of this software is subject to the terms of the revised BSD
6  * license. You should have received a copy of this license along
7  * with this source code in a file named "LICENSE."
8  *
9  * @file patchfile.cxx
10  * @author darren, mike
11  * @date 1997-01-09
12  */
13 
14 #include "pandabase.h"
15 
16 #ifdef HAVE_OPENSSL
17 
18 #include "config_express.h"
19 #include "error_utils.h"
20 #include "patchfile.h"
21 #include "streamReader.h"
22 #include "streamWriter.h"
23 #include "multifile.h"
24 #include "hashVal.h"
25 #include "virtualFileSystem.h"
26 
27 #include <string.h> // for strstr
28 
29 #ifdef HAVE_TAR
30 #include <libtar.h>
31 #include <fcntl.h> // for O_RDONLY
32 #endif // HAVE_TAR
33 
34 #ifdef HAVE_TAR
35 std::istream *Patchfile::_tar_istream = nullptr;
36 #endif // HAVE_TAR
37 
38 using std::endl;
39 using std::ios;
40 using std::istream;
41 using std::min;
42 using std::ostream;
43 using std::streampos;
44 using std::string;
45 
46 // this actually slows things down... #define
47 // USE_MD5_FOR_HASHTABLE_INDEX_VALUES
48 
49 /*
50  * Patch File Format IF THIS CHANGES, UPDATE installerApplyPatch.cxx IN THE
51  * INSTALLER [ HEADER ] 4 bytes 0xfeebfaac ("magic number") (older patch
52  * files have a magic number 0xfeebfaab, indicating they are version number
53  * 0.) 2 bytes version number (if magic number == 0xfeebfaac) 4 bytes length
54  * of starting file (if version >= 1) 16 bytes MD5 of starting file (if
55  * version >= 1) 4 bytes length of resulting patched file 16 bytes MD5 of
56  * resultant patched file Note that MD5 hashes are written in the order
57  * observed by HashVal::read_stream() and HashVal::write_stream(), which is
58  * not the normal linear order. (Each group of four bytes is reversed.)
59  */
60 
61 const int _v0_header_length = 4 + 4 + 16;
62 const int _v1_header_length = 4 + 2 + 4 + 16 + 4 + 16;
63 /*
64  * [ ADDCOPY pairs; repeated N times ] 2 bytes AL = ADD length AL bytes
65  * bytes to add 2 bytes CL = COPY length 4 bytes offset of data to copy from
66  * original file, if CL != 0. If version >= 2, offset is relative to end of
67  * previous copy block; if version < 2, offset is relative to beginning of
68  * file. [ TERMINATOR ] 2 bytes zero-length ADD 2 bytes zero-length COPY
69  */
70 
71 // Defines
72 const uint32_t Patchfile::_v0_magic_number = 0xfeebfaab;
73 const uint32_t Patchfile::_magic_number = 0xfeebfaac;
74 
75 // Created version 1 on 11202 to store length and MD5 of original file. To
76 // version 2 on 11202 to store copy offsets as relative.
77 const uint16_t Patchfile::_current_version = 2;
78 
79 const uint32_t Patchfile::_HASH_BITS = 24;
80 const uint32_t Patchfile::_HASHTABLESIZE = uint32_t(1) << Patchfile::_HASH_BITS;
81 const uint32_t Patchfile::_DEFAULT_FOOTPRINT_LENGTH = 9; // this produced the smallest patch file for libpanda.dll when tested, 12/20/2000
82 const uint32_t Patchfile::_NULL_VALUE = uint32_t(0) - 1;
83 const uint32_t Patchfile::_MAX_RUN_LENGTH = (uint32_t(1) << 16) - 1;
84 const uint32_t Patchfile::_HASH_MASK = (uint32_t(1) << Patchfile::_HASH_BITS) - 1;
85 
86 /**
87  * Create a patch file and initializes internal data
88  */
89 Patchfile::
90 Patchfile() {
91  PT(Buffer) buffer = new Buffer(patchfile_buffer_size);
92  init(buffer);
93 }
94 
95 /**
96  * Create patch file with buffer to patch
97  */
98 Patchfile::
99 Patchfile(PT(Buffer) buffer) {
100  init(buffer);
101 }
102 
103 /**
104  *
105  */
106 void Patchfile::
107 init(PT(Buffer) buffer) {
108  _rename_output_to_orig = false;
109  _delete_patchfile = false;
110  _hash_table = nullptr;
111  _initiated = false;
112  nassertv(!buffer.is_null());
113  _buffer = buffer;
114 
115  _version_number = 0;
116  _allow_multifile = true;
117 
118  _patch_stream = nullptr;
119  _origfile_stream = nullptr;
120 
121  reset_footprint_length();
122 }
123 
124 /**
125  *
126  */
127 Patchfile::
128 ~Patchfile() {
129  if (_hash_table != nullptr) {
130  PANDA_FREE_ARRAY(_hash_table);
131  }
132 
133  if (_initiated) {
134  cleanup();
135  }
136 
137  nassertv(_patch_stream == nullptr);
138  nassertv(_origfile_stream == nullptr);
139 }
140 
141 /**
142  * Closes and clean up internal data structures
143  */
144 void Patchfile::
145 cleanup() {
146  if (!_initiated) {
147  express_cat.error()
148  << "Patchfile::cleanup() - Patching has not been initiated"
149  << endl;
150  return;
151  }
152 
153  // close files
155  if (_origfile_stream != nullptr) {
156  vfs->close_read_file(_origfile_stream);
157  _origfile_stream = nullptr;
158  }
159  if (_patch_stream != nullptr) {
160  vfs->close_read_file(_patch_stream);
161  _patch_stream = nullptr;
162  }
163  _write_stream.close();
164 
165  _initiated = false;
166 }
167 
168 // PATCH FILE APPLY MEMBER FUNCTIONS
169 
170 // NOTE: this patch-application functionality unfortunately has to be
171 // duplicated in the Installer. It is contained in the file
172 // installerApplyPatch.cxx PLEASE MAKE SURE THAT THAT FILE GETS UPDATED IF ANY
173 // OF THIS LOGIC CHANGES! (i.e. if the patch file format changes)
174 
175 /**
176  * Set up to apply the patch to the file (original file and patch are
177  * destroyed in the process).
178  */
179 int Patchfile::
180 initiate(const Filename &patch_file, const Filename &file) {
181  int result = initiate(patch_file, file, Filename::temporary("", "patch_"));
182  _rename_output_to_orig = true;
183  _delete_patchfile = !keep_temporary_files;
184  return result;
185 }
186 
187 /**
188  * Set up to apply the patch to the file. In this form, neither the original
189  * file nor the patch file are destroyed.
190  */
191 int Patchfile::
192 initiate(const Filename &patch_file, const Filename &orig_file,
193  const Filename &target_file) {
194  if (_initiated) {
195  express_cat.error()
196  << "Patchfile::initiate() - Patching has already been initiated"
197  << endl;
198  return EU_error_abort;
199  }
200 
201  nassertr(orig_file != target_file, EU_error_abort);
202 
204 
205  // Open the original file for read
206  nassertr(_origfile_stream == nullptr, EU_error_abort);
207  _orig_file = orig_file;
208  _orig_file.set_binary();
209  _origfile_stream = vfs->open_read_file(_orig_file, false);
210  if (_origfile_stream == nullptr) {
211  express_cat.error()
212  << "Patchfile::initiate() - Failed to open file: " << _orig_file << endl;
213  return get_write_error();
214  }
215 
216  // Open the temp file for write
217  _output_file = target_file;
218  _output_file.set_binary();
219  if (!_output_file.open_write(_write_stream)) {
220  express_cat.error()
221  << "Patchfile::initiate() - Failed to open file: " << _output_file << endl;
222  return get_write_error();
223  }
224 
225  if (express_cat.is_debug()) {
226  express_cat.debug()
227  << "Patchfile using output file " << _output_file << "\n";
228  }
229 
230  int result = internal_read_header(patch_file);
231  _total_bytes_processed = 0;
232 
233  _initiated = true;
234  return result;
235 }
236 
237 /**
238  * Opens the patch file for reading, and gets the header information from the
239  * file but does not begin to do any real work. This can be used to query the
240  * data stored in the patch.
241  */
242 int Patchfile::
243 read_header(const Filename &patch_file) {
244  if (_initiated) {
245  express_cat.error()
246  << "Patchfile::initiate() - Patching has already been initiated"
247  << endl;
248  return EU_error_abort;
249  }
250 
251  int result = internal_read_header(patch_file);
252  if (_patch_stream != nullptr) {
254  vfs->close_read_file(_patch_stream);
255  _patch_stream = nullptr;
256  }
257  return result;
258 }
259 
260 /**
261  * Perform one buffer's worth of patching Returns EU_ok while patching Returns
262  * EU_success when done If error happens will return one of: EU_error_abort :
263  * Patching has not been initiated EU_error_file_invalid : file is corrupted
264  * EU_error_invalid_checksum : incompatible patch file
265  * EU_error_write_file_rename : could not rename file
266  */
267 int Patchfile::
268 run() {
269  // Now patch the file using the given buffer
270  int buflen;
271  int bytes_read;
272  uint16_t ADD_length;
273  uint16_t COPY_length;
274  int32_t COPY_offset;
275 
276  if (_initiated == false) {
277  express_cat.error()
278  << "Patchfile::run() - Patching has not been initiated"
279  << endl;
280  return EU_error_abort;
281  }
282 
283  nassertr(_patch_stream != nullptr, EU_error_abort);
284  nassertr(_origfile_stream != nullptr, EU_error_abort);
285  StreamReader patch_reader(*_patch_stream);
286 
287  buflen = _buffer->get_length();
288  bytes_read = 0;
289 
290  while (bytes_read < buflen) {
291  // read # of ADD bytes
292  nassertr(_buffer->get_length() >= (int)sizeof(ADD_length), false);
293  ADD_length = patch_reader.get_uint16();
294  if (_patch_stream->fail()) {
295  express_cat.error()
296  << "Truncated patch file.\n";
297  return EU_error_file_invalid;
298  }
299 
300  bytes_read += (int)ADD_length;
301  _total_bytes_processed += (int)ADD_length;
302  if (_total_bytes_processed > _total_bytes_to_process) {
303  express_cat.error()
304  << "Runaway patch file.\n";
305  return EU_error_file_invalid;
306  }
307 
308  // if there are bytes to add, read them from patch file and write them to
309  // output
310  if (express_cat.is_spam() && ADD_length != 0) {
311  express_cat.spam()
312  << "ADD: " << ADD_length << " (to "
313  << _write_stream.tellp() << ")" << endl;
314  }
315 
316  uint32_t bytes_left = (uint32_t)ADD_length;
317  while (bytes_left > 0) {
318  uint32_t bytes_this_time = (uint32_t) min(bytes_left, (uint32_t) buflen);
319  _patch_stream->read(_buffer->_buffer, bytes_this_time);
320  if (_patch_stream->fail()) {
321  express_cat.error()
322  << "Truncated patch file.\n";
323  return EU_error_file_invalid;
324  }
325  _write_stream.write(_buffer->_buffer, bytes_this_time);
326  bytes_left -= bytes_this_time;
327  }
328 
329  // read # of COPY bytes
330  nassertr(_buffer->get_length() >= (int)sizeof(COPY_length), false);
331  COPY_length = patch_reader.get_uint16();
332  if (_patch_stream->fail()) {
333  express_cat.error()
334  << "Truncated patch file.\n";
335  return EU_error_file_invalid;
336  }
337 
338  bytes_read += (int)COPY_length;
339  _total_bytes_processed += (int)COPY_length;
340  if (_total_bytes_processed > _total_bytes_to_process) {
341  express_cat.error()
342  << "Runaway patch file.\n";
343  return EU_error_file_invalid;
344  }
345 
346  // if there are bytes to copy, read them from original file and write them
347  // to output
348  if (0 != COPY_length) {
349  // read copy offset
350  nassertr(_buffer->get_length() >= (int)sizeof(COPY_offset), false);
351  COPY_offset = patch_reader.get_int32();
352  if (_patch_stream->fail()) {
353  express_cat.error()
354  << "Truncated patch file.\n";
355  return EU_error_file_invalid;
356  }
357 
358  // seek to the copy source pos
359  if (_version_number < 2) {
360  _origfile_stream->seekg(COPY_offset, ios::beg);
361  } else {
362  _origfile_stream->seekg(COPY_offset, ios::cur);
363  }
364  if (_origfile_stream->fail()) {
365  express_cat.error()
366  << "Invalid copy offset in patch file.\n";
367  return EU_error_file_invalid;
368  }
369 
370  if (express_cat.is_spam()) {
371  express_cat.spam()
372  << "COPY: " << COPY_length << " bytes from offset "
373  << COPY_offset << " (from " << _origfile_stream->tellg()
374  << " to " << _write_stream.tellp() << ")"
375  << endl;
376  }
377 
378  // read the copy bytes from original file and write them to output
379  uint32_t bytes_left = (uint32_t)COPY_length;
380 
381  while (bytes_left > 0) {
382  uint32_t bytes_this_time = (uint32_t) min(bytes_left, (uint32_t) buflen);
383  _origfile_stream->read(_buffer->_buffer, bytes_this_time);
384  if (_origfile_stream->fail()) {
385  express_cat.error()
386  << "Invalid copy length in patch file.\n";
387  return EU_error_file_invalid;
388  }
389  _write_stream.write(_buffer->_buffer, bytes_this_time);
390  bytes_left -= bytes_this_time;
391  }
392  }
393 
394  // if we got a pair of zero-length ADD and COPY blocks, we're done
395  if ((0 == ADD_length) && (0 == COPY_length)) {
396  cleanup();
397 
398  if (express_cat.is_debug()) {
399  express_cat.debug()
400  // << "result file = " << _result_file_length
401  << " total bytes = " << _total_bytes_processed << endl;
402  }
403 
404  // check the MD5 from the patch file against the newly patched file
405  {
406  HashVal MD5_actual;
407  MD5_actual.hash_file(_output_file);
408  if (_MD5_ofResult != MD5_actual) {
409  // Whoops, patching screwed up somehow.
410  if (_origfile_stream != nullptr) {
412  vfs->close_read_file(_origfile_stream);
413  _origfile_stream = nullptr;
414  }
415  _write_stream.close();
416 
417  express_cat.info()
418  << "Patching produced incorrect checksum. Got:\n"
419  << " " << MD5_actual
420  << "\nExpected:\n"
421  << " " << _MD5_ofResult
422  << "\n";
423 
424  // This is a fine time to double-check the starting checksum.
425  if (!has_source_hash()) {
426  express_cat.info()
427  << "No source hash in patch file to verify.\n";
428  } else {
429  HashVal MD5_orig;
430  MD5_orig.hash_file(_orig_file);
431  if (MD5_orig != get_source_hash()) {
432  express_cat.info()
433  << "Started from incorrect source file. Got:\n"
434  << " " << MD5_orig
435  << "\nExpected:\n"
436  << " " << get_source_hash()
437  << "\n";
438  } else {
439  express_cat.info()
440  << "Started from correct source file:\n"
441  << " " << MD5_orig
442  << "\n";
443  }
444  }
445 
446  // delete the temp file and the patch file
447  if (_rename_output_to_orig) {
448  _output_file.unlink();
449  }
450  if (_delete_patchfile) {
451  _patch_file.unlink();
452  }
453  // return "invalid checksum"
454  return EU_error_invalid_checksum;
455  }
456  }
457 
458  // delete the patch file
459  if (_delete_patchfile) {
460  _patch_file.unlink();
461  }
462 
463  // rename the temp file to the original file name
464  if (_rename_output_to_orig) {
465  _orig_file.unlink();
466  if (!_output_file.rename_to(_orig_file)) {
467  express_cat.error()
468  << "Patchfile::run() failed to rename temp file to: " << _orig_file
469  << endl;
470  return EU_error_write_file_rename;
471  }
472  }
473 
474  return EU_success;
475  }
476  }
477 
478  return EU_ok;
479 }
480 
481 /**
482  * Patches the entire file in one call returns true on success and false on
483  * error
484  *
485  * This version will delete the patch file and overwrite the original file.
486  */
487 bool Patchfile::
488 apply(Filename &patch_file, Filename &file) {
489  int ret = initiate(patch_file, file);
490  if (ret < 0)
491  return false;
492  for (;;) {
493  ret = run();
494  if (ret == EU_success)
495  return true;
496  if (ret < 0)
497  return false;
498  }
499  return false;
500 }
501 
502 /**
503  * Patches the entire file in one call returns true on success and false on
504  * error
505  *
506  * This version will not delete any files.
507  */
508 bool Patchfile::
509 apply(Filename &patch_file, Filename &orig_file, const Filename &target_file) {
510  int ret = initiate(patch_file, orig_file, target_file);
511  if (ret < 0)
512  return false;
513  for (;;) {
514  ret = run();
515  if (ret == EU_success)
516  return true;
517  if (ret < 0)
518  return false;
519  }
520  return false;
521 }
522 
523 
524 /**
525  * Reads the header and leaves the patch file open.
526  */
527 int Patchfile::
528 internal_read_header(const Filename &patch_file) {
529  // Open the patch file for read
531  nassertr(_patch_stream == nullptr, EU_error_abort);
532  _patch_file = patch_file;
533  _patch_file.set_binary();
534  _patch_stream = vfs->open_read_file(_patch_file, true);
535  if (_patch_stream == nullptr) {
536  express_cat.error()
537  << "Patchfile::initiate() - Failed to open file: " << _patch_file << endl;
538  return get_write_error();
539  }
540 
541  // read header, make sure the patch file is valid
542  StreamReader patch_reader(*_patch_stream);
543 
544  // check the magic number
545  nassertr(_buffer->get_length() >= _v0_header_length, false);
546  uint32_t magic_number = patch_reader.get_uint32();
547  if (magic_number != _magic_number && magic_number != _v0_magic_number) {
548  express_cat.error()
549  << "Invalid patch file: " << _patch_file << endl;
550  return EU_error_file_invalid;
551  }
552 
553  _version_number = 0;
554  if (magic_number != _v0_magic_number) {
555  _version_number = patch_reader.get_uint16();
556  }
557  if (_version_number > _current_version) {
558  express_cat.error()
559  << "Can't read version " << _version_number << " patch files: "
560  << _patch_file << endl;
561  return EU_error_file_invalid;
562  }
563 
564  if (_version_number >= 1) {
565  // Get the length of the source file.
566  /*uint32_t source_file_length =*/ patch_reader.get_uint32();
567 
568  // get the MD5 of the source file.
569  _MD5_ofSource.read_stream(patch_reader);
570  }
571 
572  // get the length of the patched result file
573  _total_bytes_to_process = patch_reader.get_uint32();
574 
575  // get the MD5 of the resultant patched file
576  _MD5_ofResult.read_stream(patch_reader);
577 
578  express_cat.debug()
579  << "Patchfile::initiate() - valid patchfile" << endl;
580 
581  return EU_success;
582 }
583 
584 // PATCH FILE BUILDING MEMBER FUNCTIONS
585 
586 /**
587  *
588  */
589 uint32_t Patchfile::
590 calc_hash(const char *buffer) {
591 #ifdef USE_MD5_FOR_HASHTABLE_INDEX_VALUES
592  HashVal hash;
593  hash.hash_buffer(buffer, _footprint_length);
594 
595  // cout << uint16_t(hash.get_value(0)) << " ";
596 
597  return uint16_t(hash.get_value(0));
598 #else
599  uint32_t hash_value = 0;
600 
601  for(int i = 0; i < (int)_footprint_length; i++) {
602  // this is probably not such a good hash. to be replaced --> TRIED MD5,
603  // was not worth it for the execution-time hit on 800Mhz PC
604  hash_value ^= uint32_t(*buffer) << ((i * 2) % Patchfile::_HASH_BITS);
605  buffer++;
606  }
607 
608  // use the bits that overflowed past the end of the hash bit range (this is
609  // intended for _HASH_BITS == 24)
610  hash_value ^= (hash_value >> Patchfile::_HASH_BITS);
611 
612  // cout << hash_value << " ";
613 
614  return hash_value & _HASH_MASK;
615 #endif
616 }
617 
618 /**
619  *
620  * The hash and link tables allow for a quick, linear search of all locations
621  * in the file that begin with a particular sequence of bytes, or "footprint."
622  *
623  * The hash table is a table of offsets into the file, with one entry for
624  * every possible footprint hash value. For a hash of a footprint, the entry
625  * at the offset of the hash value provides an initial location in the file
626  * that has a matching footprint.
627  *
628  * The link table is a large linked list of file offsets, with one entry for
629  * every byte in the file. Each offset in the link table will point to
630  * another offset that has the same footprint at the corresponding offset in
631  * the actual file. Starting with an offset taken from the hash table, one
632  * can rapidly produce a list of offsets that all have the same footprint.
633  */
634 void Patchfile::
635 build_hash_link_tables(const char *buffer_orig, uint32_t length_orig,
636  uint32_t *hash_table, uint32_t *link_table) {
637 
638  uint32_t i;
639 
640  // clear hash table
641  for(i = 0; i < _HASHTABLESIZE; i++) {
642  hash_table[i] = _NULL_VALUE;
643  }
644 
645  // clear link table
646  for(i = 0; i < length_orig; i++) {
647  link_table[i] = _NULL_VALUE;
648  }
649 
650  if(length_orig < _footprint_length) return;
651 
652  // run through original file and hash each footprint
653  for(i = 0; i < (length_orig - _footprint_length); i++) {
654 
655  uint32_t hash_value = calc_hash(&buffer_orig[i]);
656 
657  // we must now store this file index in the hash table at the offset of
658  // the hash value
659 
660  // to account for multiple file offsets with identical hash values, there
661  // is a link table with an entry for every footprint in the file. We
662  // create linked lists of offsets in the link table.
663 
664  // first, set the value in the link table for the current offset to
665  // whatever the current list head is (the value in the hash table) (note
666  // that this only works because the hash and link tables both use
667  // _NULL_VALUE to indicate a null index)
668  link_table[i] = hash_table[hash_value];
669 
670  // set the new list head; store the current offset in the hash table at
671  // the offset of the footprint's hash value
672  hash_table[hash_value] = i;
673 
674  /*
675  if (_NULL_VALUE == hash_table[hash_value]) {
676  // hash entry is empty, store this offset
677  hash_table[hash_value] = i;
678  } else {
679  // hash entry is taken, go to the link table
680  uint32_t link_offset = hash_table[hash_value];
681 
682  while (_NULL_VALUE != link_table[link_offset]) {
683  link_offset = link_table[link_offset];
684  }
685  link_table[link_offset] = i;
686  }
687  */
688  }
689 }
690 
691 /**
692  *
693  * This function calculates the length of a match between two strings of bytes
694  */
695 uint32_t Patchfile::
696 calc_match_length(const char* buf1, const char* buf2, uint32_t max_length,
697  uint32_t min_length) {
698  // early out: look ahead and sample the end of the minimum range
699  if (min_length > 2) {
700  if (min_length >= max_length)
701  return 0;
702  if (buf1[min_length] != buf2[min_length] ||
703  buf1[min_length-1] != buf2[min_length-1] ||
704  buf1[min_length-2] != buf2[min_length-2]) {
705  return 0;
706  }
707  }
708 
709  uint32_t length = 0;
710  while ((length < max_length) && (*buf1 == *buf2)) {
711  buf1++, buf2++, length++;
712  }
713  return length;
714 }
715 
716 /**
717  *
718  * This function will find the longest string in the original file that
719  * matches a string in the new file.
720  */
721 void Patchfile::
722 find_longest_match(uint32_t new_pos, uint32_t &copy_pos, uint16_t &copy_length,
723  uint32_t *hash_table, uint32_t *link_table, const char* buffer_orig,
724  uint32_t length_orig, const char* buffer_new, uint32_t length_new) {
725 
726  // set length to a safe value
727  copy_length = 0;
728 
729  // get offset of matching string (in orig file) from hash table
730  uint32_t hash_value = calc_hash(&buffer_new[new_pos]);
731 
732  // if no match, bail
733  if (_NULL_VALUE == hash_table[hash_value])
734  return;
735 
736  copy_pos = hash_table[hash_value];
737 
738  // calc match length
739  copy_length = (uint16_t)calc_match_length(&buffer_new[new_pos],
740  &buffer_orig[copy_pos],
741  min(min((length_new - new_pos),
742  (length_orig - copy_pos)),
743  _MAX_RUN_LENGTH),
744  0);
745 
746  // run through link table, see if we find any longer matches
747  uint32_t match_offset;
748  uint16_t match_length;
749  match_offset = link_table[copy_pos];
750 
751  while (match_offset != _NULL_VALUE) {
752  match_length = (uint16_t)calc_match_length(&buffer_new[new_pos],
753  &buffer_orig[match_offset],
754  min(min((length_new - new_pos),
755  (length_orig - match_offset)),
756  _MAX_RUN_LENGTH),
757  copy_length);
758 
759  // have we found a longer match?
760  if (match_length > copy_length) {
761  copy_pos = match_offset;
762  copy_length = match_length;
763  }
764 
765  // traverse the link table
766  match_offset = link_table[match_offset];
767  }
768 }
769 
770 /**
771  *
772  */
773 void Patchfile::
774 emit_ADD(ostream &write_stream, uint32_t length, const char* buffer) {
775  nassertv(length == (uint16_t)length); //we only write a uint16
776 
777  if (express_cat.is_spam()) {
778  express_cat.spam()
779  << "ADD: " << length << " (to " << _add_pos << ")" << endl;
780  }
781 
782  // write ADD length
783  StreamWriter patch_writer(write_stream);
784  patch_writer.add_uint16((uint16_t)length);
785 
786  // if there are bytes to add, add them
787  if (length > 0) {
788  patch_writer.append_data(buffer, (uint16_t)length);
789  }
790 
791  _add_pos += length;
792 }
793 
794 /**
795  *
796  */
797 void Patchfile::
798 emit_COPY(ostream &write_stream, uint32_t length, uint32_t copy_pos) {
799  nassertv(length == (uint16_t)length); //we only write a uint16
800 
801  int32_t offset = (int)copy_pos - (int)_last_copy_pos;
802  if (express_cat.is_spam()) {
803  express_cat.spam()
804  << "COPY: " << length << " bytes from offset " << offset
805  << " (from " << copy_pos << " to " << _add_pos << ")" << endl;
806  }
807 
808  // write COPY length
809  StreamWriter patch_writer(write_stream);
810  patch_writer.add_uint16((uint16_t)length);
811 
812  if ((uint16_t)length != 0) {
813  // write COPY offset
814  patch_writer.add_int32(offset);
815  _last_copy_pos = copy_pos + length;
816  }
817 
818  _add_pos += length;
819 }
820 
821 /**
822  * Emits an add/copy pair. If necessary, repeats the pair as needed to work
823  * around the 16-bit chunk size limit.
824  */
825 void Patchfile::
826 emit_add_and_copy(ostream &write_stream,
827  uint32_t add_length, const char *add_buffer,
828  uint32_t copy_length, uint32_t copy_pos) {
829  if (add_length == 0 && copy_length == 0) {
830  // Don't accidentally emit a termination code.
831  return;
832  }
833 
834  static const uint16_t max_write = 65535;
835  while (add_length > max_write) {
836  // Overflow. This chunk is too large to fit into a single ADD block, so
837  // we have to write it as multiple ADDs.
838  emit_ADD(write_stream, max_write, add_buffer);
839  add_buffer += max_write;
840  add_length -= max_write;
841  emit_COPY(write_stream, 0, 0);
842  }
843 
844  emit_ADD(write_stream, add_length, add_buffer);
845 
846  while (copy_length > max_write) {
847  // Overflow.
848  emit_COPY(write_stream, max_write, copy_pos);
849  copy_pos += max_write;
850  copy_length -= max_write;
851  emit_ADD(write_stream, 0, nullptr);
852  }
853 
854  emit_COPY(write_stream, copy_length, copy_pos);
855 }
856 
857 /**
858  * Potentially emits one or more add/copy pairs. The current state is saved,
859  * so as to minimize wasted emits from consecutive adds or copies.
860  */
861 void Patchfile::
862 cache_add_and_copy(ostream &write_stream,
863  uint32_t add_length, const char *add_buffer,
864  uint32_t copy_length, uint32_t copy_pos) {
865  if (add_length != 0) {
866  if (_cache_copy_length != 0) {
867  // Have to flush.
868  cache_flush(write_stream);
869  }
870  // Add the string to the current cache.
871  _cache_add_data += string(add_buffer, add_length);
872  }
873 
874  if (copy_length != 0) {
875  if (_cache_copy_length == 0) {
876  // Start a new copy phase.
877  _cache_copy_start = copy_pos;
878  _cache_copy_length = copy_length;
879 
880  } else if (_cache_copy_start + _cache_copy_length == copy_pos) {
881  // We can just tack on the copy to what we've already got.
882  _cache_copy_length += copy_length;
883 
884  } else {
885  // It's a discontinuous copy. We have to flush.
886  cache_flush(write_stream);
887  _cache_copy_start = copy_pos;
888  _cache_copy_length = copy_length;
889  }
890  }
891 }
892 
893 /**
894  * Closes any copy or add phases that are still open after a previous call to
895  * cache_add_and_copy().
896  */
897 void Patchfile::
898 cache_flush(ostream &write_stream) {
899  emit_add_and_copy(write_stream,
900  _cache_add_data.size(), _cache_add_data.data(),
901  _cache_copy_length, _cache_copy_start);
902  _cache_add_data = string();
903  _cache_copy_length = 0;
904 }
905 
906 
907 /**
908  *
909  * Writes the patchfile header.
910  */
911 void Patchfile::
912 write_header(ostream &write_stream,
913  istream &stream_orig, istream &stream_new) {
914  // prepare to write the patch file header
915 
916  // write the patch file header
917  StreamWriter patch_writer(write_stream);
918  patch_writer.add_uint32(_magic_number);
919  patch_writer.add_uint16(_current_version);
920 
921  stream_orig.seekg(0, ios::end);
922  streampos source_file_length = stream_orig.tellg();
923  patch_writer.add_uint32((uint32_t)source_file_length);
924 
925  // calc MD5 of original file
926  _MD5_ofSource.hash_stream(stream_orig);
927  // add it to the header
928  _MD5_ofSource.write_stream(patch_writer);
929 
930  if (express_cat.is_debug()) {
931  express_cat.debug()
932  << "Orig: " << _MD5_ofSource << "\n";
933  }
934 
935  stream_new.seekg(0, ios::end);
936  streampos result_file_length = stream_new.tellg();
937  patch_writer.add_uint32((uint32_t)result_file_length);
938 
939  // calc MD5 of resultant patched file
940  _MD5_ofResult.hash_stream(stream_new);
941  // add it to the header
942  _MD5_ofResult.write_stream(patch_writer);
943 
944  if (express_cat.is_debug()) {
945  express_cat.debug()
946  << " New: " << _MD5_ofResult << "\n";
947  }
948 }
949 
950 /**
951  * Writes the patchfile terminator.
952  */
953 void Patchfile::
954 write_terminator(ostream &write_stream) {
955  cache_flush(write_stream);
956  // write terminator (null ADD, null COPY)
957  emit_ADD(write_stream, 0, nullptr);
958  emit_COPY(write_stream, 0, 0);
959 }
960 
961 /**
962  * Computes the patches for the entire file (if it is not a multifile) or for
963  * a single subfile (if it is)
964  *
965  * Returns true if successful, false on error.
966  */
967 bool Patchfile::
968 compute_file_patches(ostream &write_stream,
969  uint32_t offset_orig, uint32_t offset_new,
970  istream &stream_orig, istream &stream_new) {
971  // read in original file
972  stream_orig.seekg(0, ios::end);
973  nassertr(stream_orig, false);
974  uint32_t source_file_length = stream_orig.tellg();
975  if (express_cat.is_debug()) {
976  express_cat.debug()
977  << "Allocating " << source_file_length << " bytes to read orig\n";
978  }
979 
980  char *buffer_orig = (char *)PANDA_MALLOC_ARRAY(source_file_length);
981  stream_orig.seekg(0, ios::beg);
982  stream_orig.read(buffer_orig, source_file_length);
983 
984  // read in new file
985  stream_new.seekg(0, ios::end);
986  uint32_t result_file_length = stream_new.tellg();
987  nassertr(stream_new, false);
988  if (express_cat.is_debug()) {
989  express_cat.debug()
990  << "Allocating " << result_file_length << " bytes to read new\n";
991  }
992 
993  char *buffer_new = (char *)PANDA_MALLOC_ARRAY(result_file_length);
994  stream_new.seekg(0, ios::beg);
995  stream_new.read(buffer_new, result_file_length);
996 
997  // allocate hashlink tables
998  if (_hash_table == nullptr) {
999  if (express_cat.is_debug()) {
1000  express_cat.debug()
1001  << "Allocating hashtable of size " << _HASHTABLESIZE << " * 4\n";
1002  }
1003  _hash_table = (uint32_t *)PANDA_MALLOC_ARRAY(_HASHTABLESIZE * sizeof(uint32_t));
1004  }
1005 
1006  if (express_cat.is_debug()) {
1007  express_cat.debug()
1008  << "Allocating linktable of size " << source_file_length << " * 4\n";
1009  }
1010 
1011  uint32_t *link_table = (uint32_t *)PANDA_MALLOC_ARRAY(source_file_length * sizeof(uint32_t));
1012 
1013  // build hash and link tables for original file
1014  build_hash_link_tables(buffer_orig, source_file_length, _hash_table, link_table);
1015 
1016  // run through new file
1017 
1018  uint32_t new_pos = 0;
1019  uint32_t start_pos = new_pos; // this is the position for the start of ADD operations
1020 
1021  if(((uint32_t) result_file_length) >= _footprint_length)
1022  {
1023  while (new_pos < (result_file_length - _footprint_length)) {
1024 
1025  // find best match for current position
1026  uint32_t COPY_pos;
1027  uint16_t COPY_length;
1028 
1029  find_longest_match(new_pos, COPY_pos, COPY_length, _hash_table, link_table,
1030  buffer_orig, source_file_length, buffer_new, result_file_length);
1031 
1032  // if no match or match not longer than footprint length, skip to next
1033  // byte
1034  if (COPY_length < _footprint_length) {
1035  // go to next byte
1036  new_pos++;
1037  } else {
1038  // emit ADD for all skipped bytes
1039  int num_skipped = (int)new_pos - (int)start_pos;
1040  if (express_cat.is_spam()) {
1041  express_cat.spam()
1042  << "build: num_skipped = " << num_skipped
1043  << endl;
1044  }
1045  cache_add_and_copy(write_stream, num_skipped, &buffer_new[start_pos],
1046  COPY_length, COPY_pos + offset_orig);
1047  new_pos += (uint32_t)COPY_length;
1048  start_pos = new_pos;
1049  }
1050  }
1051  }
1052 
1053  if (express_cat.is_spam()) {
1054  express_cat.spam()
1055  << "build: result_file_length = " << result_file_length
1056  << " start_pos = " << start_pos
1057  << endl;
1058  }
1059 
1060  // are there still more bytes left in the new file?
1061  if (start_pos != result_file_length) {
1062  // emit ADD for all remaining bytes
1063 
1064  uint32_t remaining_bytes = result_file_length - start_pos;
1065  cache_add_and_copy(write_stream, remaining_bytes, &buffer_new[start_pos],
1066  0, 0);
1067  start_pos += remaining_bytes;
1068  }
1069 
1070  PANDA_FREE_ARRAY(link_table);
1071 
1072  PANDA_FREE_ARRAY(buffer_orig);
1073  PANDA_FREE_ARRAY(buffer_new);
1074 
1075  return true;
1076 }
1077 
1078 /**
1079  * Computes patches for the files, knowing that they are both Panda
1080  * Multifiles. This will build patches one subfile at a time, which can
1081  * potentially be much, much faster for large Multifiles that contain many
1082  * small subfiles.
1083  */
1084 bool Patchfile::
1085 compute_mf_patches(ostream &write_stream,
1086  uint32_t offset_orig, uint32_t offset_new,
1087  istream &stream_orig, istream &stream_new) {
1088  Multifile mf_orig, mf_new;
1089  IStreamWrapper stream_origw(stream_orig);
1090  IStreamWrapper stream_neww(stream_new);
1091  if (!mf_orig.open_read(&stream_origw) ||
1092  !mf_new.open_read(&stream_neww)) {
1093  express_cat.error()
1094  << "Input multifiles appear to be corrupt.\n";
1095  return false;
1096  }
1097 
1098  if (mf_new.needs_repack()) {
1099  express_cat.error()
1100  << "Input multifiles need to be repacked.\n";
1101  return false;
1102  }
1103 
1104  // First, compute the patch for the header index.
1105 
1106  {
1107  ISubStream index_orig(&stream_origw, 0, mf_orig.get_index_end());
1108  ISubStream index_new(&stream_neww, 0, mf_new.get_index_end());
1109  if (!do_compute_patches("", "",
1110  write_stream, offset_orig, offset_new,
1111  index_orig, index_new)) {
1112  return false;
1113  }
1114  nassertr(_add_pos + _cache_add_data.size() + _cache_copy_length == offset_new + (uint32_t)mf_new.get_index_end(), false);
1115  }
1116 
1117  // Now walk through each subfile in the new multifile. If a particular
1118  // subfile exists in both source files, we compute the patches for the
1119  // subfile; for a new subfile, we trivially add it. If a subfile has been
1120  // removed, we simply don't add it (we'll never even notice this case).
1121  int new_num_subfiles = mf_new.get_num_subfiles();
1122  for (int ni = 0; ni < new_num_subfiles; ++ni) {
1123  nassertr(_add_pos + _cache_add_data.size() + _cache_copy_length == offset_new + (uint32_t)mf_new.get_subfile_internal_start(ni), false);
1124  string name = mf_new.get_subfile_name(ni);
1125  int oi = mf_orig.find_subfile(name);
1126 
1127  if (oi < 0) {
1128  // This is a newly-added subfile. Add it the hard way.
1129  express_cat.info()
1130  << "Adding subfile " << mf_new.get_subfile_name(ni) << "\n";
1131 
1132  streampos new_start = mf_new.get_subfile_internal_start(ni);
1133  size_t new_size = mf_new.get_subfile_internal_length(ni);
1134  char *buffer_new = (char *)PANDA_MALLOC_ARRAY(new_size);
1135  stream_new.seekg(new_start, ios::beg);
1136  stream_new.read(buffer_new, new_size);
1137  cache_add_and_copy(write_stream, new_size, buffer_new, 0, 0);
1138  PANDA_FREE_ARRAY(buffer_new);
1139 
1140  } else {
1141  // This subfile exists in both the original and the new files. Patch
1142  // it.
1143  streampos orig_start = mf_orig.get_subfile_internal_start(oi);
1144  size_t orig_size = mf_orig.get_subfile_internal_length(oi);
1145 
1146  streampos new_start = mf_new.get_subfile_internal_start(ni);
1147  size_t new_size = mf_new.get_subfile_internal_length(ni);
1148 
1149  if (!patch_subfile(write_stream, offset_orig, offset_new,
1150  mf_new.get_subfile_name(ni),
1151  stream_origw, orig_start, orig_start + (streampos)orig_size,
1152  stream_neww, new_start, new_start + (streampos)new_size)) {
1153  return false;
1154  }
1155  }
1156  }
1157 
1158  return true;
1159 }
1160 
1161 #ifdef HAVE_TAR
1162 /**
1163  * Uses libtar to extract the location within the tar file of each of the
1164  * subfiles. Returns true if the tar file is read successfully, false if
1165  * there is an error (e.g. it is not a tar file).
1166  */
1167 bool Patchfile::
1168 read_tar(TarDef &tar, istream &stream) {
1169  TAR *tfile;
1170  tartype_t tt;
1171  tt.openfunc = tar_openfunc;
1172  tt.closefunc = tar_closefunc;
1173  tt.readfunc = tar_readfunc;
1174  tt.writefunc = tar_writefunc;
1175 
1176  stream.seekg(0, ios::beg);
1177  nassertr(_tar_istream == nullptr, false);
1178  _tar_istream = &stream;
1179  if (tar_open(&tfile, (char *)"dummy", &tt, O_RDONLY, 0, 0) != 0) {
1180  _tar_istream = nullptr;
1181  return false;
1182  }
1183 
1184  // Walk through the tar file, noting the current file position as we reach
1185  // each subfile. Use this information to infer the start and end of each
1186  // subfile within the stream.
1187 
1188  streampos last_pos = 0;
1189  int flag = th_read(tfile);
1190  while (flag == 0) {
1191  TarSubfile subfile;
1192  subfile._name = th_get_pathname(tfile);
1193  subfile._header_start = last_pos;
1194  subfile._data_start = stream.tellg();
1195  subfile._data_end = subfile._data_start + (streampos)th_get_size(tfile);
1196  tar_skip_regfile(tfile);
1197  subfile._end = stream.tellg();
1198  tar.push_back(subfile);
1199 
1200  last_pos = subfile._end;
1201  flag = th_read(tfile);
1202  }
1203 
1204  // Create one more "subfile" for the bytes at the tail of the file. This
1205  // subfile has no name.
1206  TarSubfile subfile;
1207  subfile._header_start = last_pos;
1208  stream.clear();
1209  stream.seekg(0, ios::end);
1210  subfile._data_start = stream.tellg();
1211  subfile._data_end = subfile._data_start;
1212  subfile._end = subfile._data_start;
1213  tar.push_back(subfile);
1214 
1215  tar_close(tfile);
1216  _tar_istream = nullptr;
1217  return (flag == 1);
1218 }
1219 #endif // HAVE_TAR
1220 
1221 #ifdef HAVE_TAR
1222 /**
1223  * Computes patches for the files, knowing that they are both tar files. This
1224  * is similar to compute_mf_patches().
1225  *
1226  * The tar indexes should have been built up by a previous call to read_tar().
1227  */
1228 bool Patchfile::
1229 compute_tar_patches(ostream &write_stream,
1230  uint32_t offset_orig, uint32_t offset_new,
1231  istream &stream_orig, istream &stream_new,
1232  TarDef &tar_orig, TarDef &tar_new) {
1233 
1234  // Sort the orig list by filename, so we can quickly look up files from the
1235  // new list.
1236  tar_orig.sort();
1237 
1238  // However, it is important to keep the new list in its original, on-disk
1239  // order.
1240 
1241  // Walk through each subfile in the new tar file. If a particular subfile
1242  // exists in both source files, we compute the patches for the subfile; for
1243  // a new subfile, we trivially add it. If a subfile has been removed, we
1244  // simply don't add it (we'll never even notice this case).
1245 
1246  IStreamWrapper stream_origw(stream_orig);
1247  IStreamWrapper stream_neww(stream_new);
1248 
1249  TarDef::const_iterator ni;
1250  streampos last_pos = 0;
1251  for (ni = tar_new.begin(); ni != tar_new.end(); ++ni) {
1252  const TarSubfile &sf_new =(*ni);
1253  nassertr(sf_new._header_start == last_pos, false);
1254 
1255  TarDef::const_iterator oi = tar_orig.find(sf_new);
1256 
1257  if (oi == tar_orig.end()) {
1258  // This is a newly-added subfile. Add it the hard way.
1259  express_cat.info()
1260  << "Adding subfile " << sf_new._name << "\n";
1261 
1262  streampos new_start = sf_new._header_start;
1263  size_t new_size = sf_new._end - sf_new._header_start;
1264  char *buffer_new = (char *)PANDA_MALLOC_ARRAY(new_size);
1265  stream_new.seekg(new_start, ios::beg);
1266  stream_new.read(buffer_new, new_size);
1267  cache_add_and_copy(write_stream, new_size, buffer_new, 0, 0);
1268  PANDA_FREE_ARRAY(buffer_new);
1269 
1270  } else {
1271  // This subfile exists in both the original and the new files. Patch
1272  // it.
1273  const TarSubfile &sf_orig =(*oi);
1274 
1275  // We patch the header and data of the file separately, so we can
1276  // accurately detect nested multifiles. The extra data at the end of
1277  // the file (possibly introduced by a tar file's blocking) is the
1278  // footer, which is also patched separately.
1279  if (!patch_subfile(write_stream, offset_orig, offset_new, "",
1280  stream_origw, sf_orig._header_start, sf_orig._data_start,
1281  stream_neww, sf_new._header_start, sf_new._data_start)) {
1282  return false;
1283  }
1284 
1285  if (!patch_subfile(write_stream, offset_orig, offset_new, sf_new._name,
1286  stream_origw, sf_orig._data_start, sf_orig._data_end,
1287  stream_neww, sf_new._data_start, sf_new._data_end)) {
1288  return false;
1289  }
1290 
1291  if (!patch_subfile(write_stream, offset_orig, offset_new, "",
1292  stream_origw, sf_orig._data_end, sf_orig._end,
1293  stream_neww, sf_new._data_end, sf_new._end)) {
1294  return false;
1295  }
1296  }
1297 
1298  last_pos = sf_new._end;
1299  }
1300 
1301  return true;
1302 }
1303 #endif // HAVE_TAR
1304 
1305 #ifdef HAVE_TAR
1306 /**
1307  * A callback function to redirect libtar to read from our istream instead of
1308  * using low-level Unix I/O.
1309  */
1310 int Patchfile::
1311 tar_openfunc(const char *, int, ...) {
1312  // Since we don't actually open a file--the stream is already open--we do
1313  // nothing here.
1314  return 0;
1315 }
1316 #endif // HAVE_TAR
1317 
1318 #ifdef HAVE_TAR
1319 /**
1320  * A callback function to redirect libtar to read from our istream instead of
1321  * using low-level Unix I/O.
1322  */
1323 int Patchfile::
1324 tar_closefunc(int) {
1325  // Since we don't actually open a file, no need to close it either.
1326  return 0;
1327 }
1328 #endif // HAVE_TAR
1329 
1330 #ifdef HAVE_TAR
1331 /**
1332  * A callback function to redirect libtar to read from our istream instead of
1333  * using low-level Unix I/O.
1334  */
1335 ssize_t Patchfile::
1336 tar_readfunc(int, void *buffer, size_t nbytes) {
1337  nassertr(_tar_istream != nullptr, 0);
1338  _tar_istream->read((char *)buffer, nbytes);
1339  return (ssize_t)_tar_istream->gcount();
1340 }
1341 #endif // HAVE_TAR
1342 
1343 #ifdef HAVE_TAR
1344 /**
1345  * A callback function to redirect libtar to read from our istream instead of
1346  * using low-level Unix I/O.
1347  */
1348 ssize_t Patchfile::
1349 tar_writefunc(int, const void *, size_t) {
1350  // Since we use libtar only for reading, it is an error if this method gets
1351  // called.
1352  nassertr(false, -1);
1353  return -1;
1354 }
1355 #endif // HAVE_TAR
1356 
1357 /**
1358  *
1359  * This implementation uses the "greedy differencing algorithm" described in
1360  * the masters thesis "Differential Compression: A Generalized Solution for
1361  * Binary Files" by Randal C. Burns (p.13). For an original file of size M and
1362  * a new file of size N, this algorithm is O(M) in space and O(M*N) (worst-
1363  * case) in time. return false on error
1364  */
1365 bool Patchfile::
1366 build(Filename file_orig, Filename file_new, Filename patch_name) {
1367  patch_name.set_binary();
1368 
1369  // Open the original file for read
1370  pifstream stream_orig;
1371  file_orig.set_binary();
1372  if (!file_orig.open_read(stream_orig)) {
1373  express_cat.error()
1374  << "Patchfile::build() - Failed to open file: " << file_orig << endl;
1375  return false;
1376  }
1377 
1378  // Open the new file for read
1379  pifstream stream_new;
1380  file_new.set_binary();
1381  if (!file_new.open_read(stream_new)) {
1382  express_cat.error()
1383  << "Patchfile::build() - Failed to open file: " << file_new << endl;
1384  return false;
1385  }
1386 
1387  // Open patch file for write
1388  pofstream write_stream;
1389  if (!patch_name.open_write(write_stream)) {
1390  express_cat.error()
1391  << "Patchfile::build() - Failed to open file: " << patch_name << endl;
1392  return false;
1393  }
1394 
1395  _last_copy_pos = 0;
1396  _add_pos = 0;
1397  _cache_add_data = string();
1398  _cache_copy_start = 0;
1399  _cache_copy_length = 0;
1400 
1401  write_header(write_stream, stream_orig, stream_new);
1402 
1403  if (!do_compute_patches(file_orig, file_new,
1404  write_stream, 0, 0,
1405  stream_orig, stream_new)) {
1406  return false;
1407  }
1408 
1409  write_terminator(write_stream);
1410 
1411  if (express_cat.is_debug()) {
1412  express_cat.debug()
1413  << "Patch file will generate " << _add_pos << "-byte file.\n";
1414  }
1415 
1416 #ifndef NDEBUG
1417  {
1418  // Make sure the resulting file would be the right size.
1419  stream_new.seekg(0, ios::end);
1420  streampos result_file_length = stream_new.tellg();
1421  nassertr(_add_pos == result_file_length, false);
1422  }
1423 #endif // NDEBUG
1424 
1425  return (_last_copy_pos != 0);
1426 }
1427 
1428 /**
1429  * Computes the patches for the indicated A to B files, or subfiles. Checks
1430  * for multifiles or tar files before falling back to whole-file patching.
1431  */
1432 bool Patchfile::
1433 do_compute_patches(const Filename &file_orig, const Filename &file_new,
1434  ostream &write_stream,
1435  uint32_t offset_orig, uint32_t offset_new,
1436  istream &stream_orig, istream &stream_new) {
1437  nassertr(_add_pos + _cache_add_data.size() + _cache_copy_length == offset_new, false);
1438 
1439  // Check whether our input files are Panda multifiles or tar files.
1440  bool is_multifile = false;
1441 #ifdef HAVE_TAR
1442  bool is_tarfile = false;
1443  TarDef tar_orig, tar_new;
1444 #endif // HAVE_TAR
1445 
1446  if (_allow_multifile) {
1447  if (strstr(file_orig.get_basename().c_str(), ".mf") != nullptr ||
1448  strstr(file_new.get_basename().c_str(), ".mf") != nullptr) {
1449  // Read the first n bytes of both files for the Multifile magic number.
1450  string magic_number = Multifile::get_magic_number();
1451  char *buffer = (char *)PANDA_MALLOC_ARRAY(magic_number.size());
1452  stream_orig.seekg(0, ios::beg);
1453  stream_orig.read(buffer, magic_number.size());
1454 
1455  if (stream_orig.gcount() == (int)magic_number.size() &&
1456  memcmp(buffer, magic_number.data(), magic_number.size()) == 0) {
1457  stream_new.seekg(0, ios::beg);
1458  stream_new.read(buffer, magic_number.size());
1459  if (stream_new.gcount() == (int)magic_number.size() &&
1460  memcmp(buffer, magic_number.data(), magic_number.size()) == 0) {
1461  is_multifile = true;
1462  }
1463  }
1464  PANDA_FREE_ARRAY(buffer);
1465  }
1466 #ifdef HAVE_TAR
1467  if (strstr(file_orig.get_basename().c_str(), ".tar") != nullptr ||
1468  strstr(file_new.get_basename().c_str(), ".tar") != nullptr) {
1469  if (read_tar(tar_orig, stream_orig) &&
1470  read_tar(tar_new, stream_new)) {
1471  is_tarfile = true;
1472  }
1473  }
1474 #endif // HAVE_TAR
1475  }
1476 
1477  if (is_multifile) {
1478  if (express_cat.is_debug()) {
1479  express_cat.debug()
1480  << file_orig.get_basename() << " appears to be a Panda Multifile.\n";
1481  }
1482  if (!compute_mf_patches(write_stream, offset_orig, offset_new,
1483  stream_orig, stream_new)) {
1484  return false;
1485  }
1486 #ifdef HAVE_TAR
1487  } else if (is_tarfile) {
1488  if (express_cat.is_debug()) {
1489  express_cat.debug()
1490  << file_orig.get_basename() << " appears to be a tar file.\n";
1491  }
1492  if (!compute_tar_patches(write_stream, offset_orig, offset_new,
1493  stream_orig, stream_new, tar_orig, tar_new)) {
1494  return false;
1495  }
1496 #endif // HAVE_TAR
1497  } else {
1498  if (express_cat.is_debug()) {
1499  express_cat.debug()
1500  << file_orig.get_basename() << " is not a multifile.\n";
1501  }
1502  if (!compute_file_patches(write_stream, offset_orig, offset_new,
1503  stream_orig, stream_new)) {
1504  return false;
1505  }
1506  }
1507 
1508  return true;
1509 }
1510 
1511 /**
1512  * Generates patches for a nested subfile of a Panda Multifile or a tar file.
1513  */
1514 bool Patchfile::
1515 patch_subfile(ostream &write_stream,
1516  uint32_t offset_orig, uint32_t offset_new,
1517  const Filename &filename,
1518  IStreamWrapper &stream_orig, streampos orig_start, streampos orig_end,
1519  IStreamWrapper &stream_new, streampos new_start, streampos new_end) {
1520  nassertr(_add_pos + _cache_add_data.size() + _cache_copy_length == offset_new + (uint32_t)new_start, false);
1521 
1522  size_t new_size = new_end - new_start;
1523  size_t orig_size = orig_end - orig_start;
1524 
1525  ISubStream subfile_orig(&stream_orig, orig_start, orig_end);
1526  ISubStream subfile_new(&stream_new, new_start, new_end);
1527 
1528  bool is_unchanged = false;
1529  if (orig_size == new_size) {
1530  HashVal hash_orig, hash_new;
1531  hash_orig.hash_stream(subfile_orig);
1532  hash_new.hash_stream(subfile_new);
1533 
1534  if (hash_orig == hash_new) {
1535  // Actually, the subfile is unchanged; just emit it.
1536  is_unchanged = true;
1537  }
1538  }
1539 
1540  if (is_unchanged) {
1541  if (express_cat.is_debug() && !filename.empty()) {
1542  express_cat.debug()
1543  << "Keeping subfile " << filename << "\n";
1544  }
1545  cache_add_and_copy(write_stream, 0, nullptr,
1546  orig_size, offset_orig + orig_start);
1547 
1548  } else {
1549  if (!filename.empty()) {
1550  express_cat.info()
1551  << "Patching subfile " << filename << "\n";
1552  }
1553 
1554  if (!do_compute_patches(filename, filename, write_stream,
1555  offset_orig + orig_start, offset_new + new_start,
1556  subfile_orig, subfile_new)) {
1557  return false;
1558  }
1559  }
1560 
1561  return true;
1562 }
1563 
1564 #endif // HAVE_OPENSSL
A StreamWriter object is used to write sequential binary data directly to an ostream.
Definition: streamWriter.h:29
bool open_write(std::ofstream &stream, bool truncate=true) const
Opens the indicated ifstream for writing the file, if possible.
Definition: filename.cxx:1899
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.
size_t get_subfile_internal_length(int index) const
Returns the number of bytes the indicated subfile consumes within the archive.
Definition: multifile.cxx:1580
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.
A hierarchy of directories and files that appears to be one continuous file system, even though the files may originate from several different sources that may not be related to the actual OS&#39;s file system.
bool needs_repack() const
Returns true if the Multifile index is suboptimal and should be repacked.
Definition: multifile.I:55
std::istream * open_read_file(const Filename &filename, bool auto_unwrap) const
Convenience function; returns a newly allocated istream if the file exists and can be read...
void set_binary()
Indicates that the filename represents a binary file.
Definition: filename.I:414
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.
bool open_read(std::ifstream &stream) const
Opens the indicated ifstream for reading the file, if possible.
Definition: filename.cxx:1863
std::streampos get_index_end() const
Returns the first byte that is guaranteed to follow any index byte already written to disk in the Mul...
Definition: multifile.cxx:1555
Stores a 128-bit value that represents the hashed contents (typically MD5) of a file or buffer...
Definition: hashVal.h:31
static void close_read_file(std::istream *stream)
Closes a file opened by a previous call to open_read_file().
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.
Definition: buffer.h:24
An istream object that presents a subwindow into another istream.
Definition: subStream.h:30
int find_subfile(const std::string &subfile_name) const
Returns the index of the subfile with the indicated name, or -1 if the named subfile is not within th...
Definition: multifile.cxx:1367
The name of a file, such as a texture file or an Egg file.
Definition: filename.h:39
get_subfile_name
Returns the name of the nth subfile.
Definition: multifile.h:117
This class provides a locking wrapper around an arbitrary istream pointer.
Definition: streamWrapper.h:59
static VirtualFileSystem * get_global_ptr()
Returns the default global VirtualFileSystem.
get_magic_number
Returns a string with the first n bytes written to a Multifile, to identify it as a Multifile...
Definition: multifile.h:139
std::string get_basename() const
Returns the basename part of the filename.
Definition: filename.I:367
get_num_subfiles
Returns the number of subfiles within the Multifile.
Definition: multifile.h:117
A file that contains a set of files.
Definition: multifile.h:37
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.
static Filename temporary(const std::string &dirname, const std::string &prefix, const std::string &suffix=std::string(), Type type=T_general)
Generates a temporary filename within the indicated directory, using the indicated prefix...
Definition: filename.cxx:424
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.
A class to read sequential binary data directly from an istream.
Definition: streamReader.h:28
std::streampos get_subfile_internal_start(int index) const
Returns the starting byte position within the Multifile at which the indicated subfile begins...
Definition: multifile.cxx:1567
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.