Panda3D
patchfile.cxx
Go to the documentation of this file.
1 /**
2  * PANDA 3D SOFTWARE
3  * Copyright (c) Carnegie Mellon University. All rights reserved.
4  *
5  * All use of this software is subject to the terms of the revised BSD
6  * license. You should have received a copy of this license along
7  * with this source code in a file named "LICENSE."
8  *
9  * @file patchfile.cxx
10  * @author darren, mike
11  * @date 1997-01-09
12  */
13 
14 #include "pandabase.h"
15 
16 #ifdef HAVE_OPENSSL
17 
18 #include "config_express.h"
19 #include "error_utils.h"
20 #include "patchfile.h"
21 #include "streamReader.h"
22 #include "streamWriter.h"
23 #include "multifile.h"
24 #include "hashVal.h"
25 #include "virtualFileSystem.h"
26 
27 #include <string.h> // for strstr
28 
29 #ifdef HAVE_TAR
30 #include <libtar.h>
31 #include <fcntl.h> // for O_RDONLY
32 #endif // HAVE_TAR
33 
34 #ifdef HAVE_TAR
35 std::istream *Patchfile::_tar_istream = nullptr;
36 #endif // HAVE_TAR
37 
38 using std::endl;
39 using std::ios;
40 using std::istream;
41 using std::min;
42 using std::ostream;
43 using std::streampos;
44 using std::string;
45 
46 // this actually slows things down... #define
47 // USE_MD5_FOR_HASHTABLE_INDEX_VALUES
48 
49 /*
50  * Patch File Format IF THIS CHANGES, UPDATE installerApplyPatch.cxx IN THE
51  * INSTALLER [ HEADER ] 4 bytes 0xfeebfaac ("magic number") (older patch
52  * files have a magic number 0xfeebfaab, indicating they are version number
53  * 0.) 2 bytes version number (if magic number == 0xfeebfaac) 4 bytes length
54  * of starting file (if version >= 1) 16 bytes MD5 of starting file (if
55  * version >= 1) 4 bytes length of resulting patched file 16 bytes MD5 of
56  * resultant patched file Note that MD5 hashes are written in the order
57  * observed by HashVal::read_stream() and HashVal::write_stream(), which is
58  * not the normal linear order. (Each group of four bytes is reversed.)
59  */
60 
61 const int _v0_header_length = 4 + 4 + 16;
62 const int _v1_header_length = 4 + 2 + 4 + 16 + 4 + 16;
63 /*
64  * [ ADDCOPY pairs; repeated N times ] 2 bytes AL = ADD length AL bytes
65  * bytes to add 2 bytes CL = COPY length 4 bytes offset of data to copy from
66  * original file, if CL != 0. If version >= 2, offset is relative to end of
67  * previous copy block; if version < 2, offset is relative to beginning of
68  * file. [ TERMINATOR ] 2 bytes zero-length ADD 2 bytes zero-length COPY
69  */
70 
71 // Defines
72 const uint32_t Patchfile::_v0_magic_number = 0xfeebfaab;
73 const uint32_t Patchfile::_magic_number = 0xfeebfaac;
74 
75 // Created version 1 on 11202 to store length and MD5 of original file. To
76 // version 2 on 11202 to store copy offsets as relative.
77 const uint16_t Patchfile::_current_version = 2;
78 
79 const uint32_t Patchfile::_HASH_BITS = 24;
80 const uint32_t Patchfile::_HASHTABLESIZE = uint32_t(1) << Patchfile::_HASH_BITS;
81 const uint32_t Patchfile::_DEFAULT_FOOTPRINT_LENGTH = 9; // this produced the smallest patch file for libpanda.dll when tested, 12/20/2000
82 const uint32_t Patchfile::_NULL_VALUE = uint32_t(0) - 1;
83 const uint32_t Patchfile::_MAX_RUN_LENGTH = (uint32_t(1) << 16) - 1;
84 const uint32_t Patchfile::_HASH_MASK = (uint32_t(1) << Patchfile::_HASH_BITS) - 1;
85 
86 /**
87  * Create a patch file and initializes internal data
88  */
89 Patchfile::
90 Patchfile() {
91  PT(Buffer) buffer = new Buffer(patchfile_buffer_size);
92  init(buffer);
93 }
94 
95 /**
96  * Create patch file with buffer to patch
97  */
98 Patchfile::
99 Patchfile(PT(Buffer) buffer) {
100  init(buffer);
101 }
102 
103 /**
104  *
105  */
106 void Patchfile::
107 init(PT(Buffer) buffer) {
108  _rename_output_to_orig = false;
109  _delete_patchfile = false;
110  _hash_table = nullptr;
111  _initiated = false;
112  nassertv(!buffer.is_null());
113  _buffer = buffer;
114 
115  _version_number = 0;
116  _allow_multifile = true;
117 
118  _patch_stream = nullptr;
119  _origfile_stream = nullptr;
120 
121  reset_footprint_length();
122 }
123 
124 /**
125  *
126  */
127 Patchfile::
128 ~Patchfile() {
129  if (_hash_table != nullptr) {
130  PANDA_FREE_ARRAY(_hash_table);
131  }
132 
133  if (_initiated) {
134  cleanup();
135  }
136 
137  nassertv(_patch_stream == nullptr);
138  nassertv(_origfile_stream == nullptr);
139 }
140 
141 /**
142  * Closes and clean up internal data structures
143  */
144 void Patchfile::
145 cleanup() {
146  if (!_initiated) {
147  express_cat.error()
148  << "Patchfile::cleanup() - Patching has not been initiated"
149  << endl;
150  return;
151  }
152 
153  // close files
155  if (_origfile_stream != nullptr) {
156  vfs->close_read_file(_origfile_stream);
157  _origfile_stream = nullptr;
158  }
159  if (_patch_stream != nullptr) {
160  vfs->close_read_file(_patch_stream);
161  _patch_stream = nullptr;
162  }
163  _write_stream.close();
164 
165  _initiated = false;
166 }
167 
168 // PATCH FILE APPLY MEMBER FUNCTIONS
169 
170 // NOTE: this patch-application functionality unfortunately has to be
171 // duplicated in the Installer. It is contained in the file
172 // installerApplyPatch.cxx PLEASE MAKE SURE THAT THAT FILE GETS UPDATED IF ANY
173 // OF THIS LOGIC CHANGES! (i.e. if the patch file format changes)
174 
175 /**
176  * Set up to apply the patch to the file (original file and patch are
177  * destroyed in the process).
178  */
179 int Patchfile::
180 initiate(const Filename &patch_file, const Filename &file) {
181  int result = initiate(patch_file, file, Filename::temporary("", "patch_"));
182  _rename_output_to_orig = true;
183  _delete_patchfile = !keep_temporary_files;
184  return result;
185 }
186 
187 /**
188  * Set up to apply the patch to the file. In this form, neither the original
189  * file nor the patch file are destroyed.
190  */
191 int Patchfile::
192 initiate(const Filename &patch_file, const Filename &orig_file,
193  const Filename &target_file) {
194  if (_initiated) {
195  express_cat.error()
196  << "Patchfile::initiate() - Patching has already been initiated"
197  << endl;
198  return EU_error_abort;
199  }
200 
201  nassertr(orig_file != target_file, EU_error_abort);
202 
204 
205  // Open the original file for read
206  nassertr(_origfile_stream == nullptr, EU_error_abort);
207  _orig_file = orig_file;
208  _orig_file.set_binary();
209  _origfile_stream = vfs->open_read_file(_orig_file, false);
210  if (_origfile_stream == nullptr) {
211  express_cat.error()
212  << "Patchfile::initiate() - Failed to open file: " << _orig_file << endl;
213  return get_write_error();
214  }
215 
216  // Open the temp file for write
217  _output_file = target_file;
218  _output_file.set_binary();
219  if (!_output_file.open_write(_write_stream)) {
220  express_cat.error()
221  << "Patchfile::initiate() - Failed to open file: " << _output_file << endl;
222  return get_write_error();
223  }
224 
225  if (express_cat.is_debug()) {
226  express_cat.debug()
227  << "Patchfile using output file " << _output_file << "\n";
228  }
229 
230  int result = internal_read_header(patch_file);
231  _total_bytes_processed = 0;
232 
233  _initiated = true;
234  return result;
235 }
236 
237 /**
238  * Opens the patch file for reading, and gets the header information from the
239  * file but does not begin to do any real work. This can be used to query the
240  * data stored in the patch.
241  */
242 int Patchfile::
243 read_header(const Filename &patch_file) {
244  if (_initiated) {
245  express_cat.error()
246  << "Patchfile::initiate() - Patching has already been initiated"
247  << endl;
248  return EU_error_abort;
249  }
250 
251  int result = internal_read_header(patch_file);
252  if (_patch_stream != nullptr) {
254  vfs->close_read_file(_patch_stream);
255  _patch_stream = nullptr;
256  }
257  return result;
258 }
259 
260 /**
261  * Perform one buffer's worth of patching Returns EU_ok while patching Returns
262  * EU_success when done If error happens will return one of: EU_error_abort :
263  * Patching has not been initiated EU_error_file_invalid : file is corrupted
264  * EU_error_invalid_checksum : incompatible patch file
265  * EU_error_write_file_rename : could not rename file
266  */
267 int Patchfile::
268 run() {
269  // Now patch the file using the given buffer
270  int buflen;
271  int bytes_read;
272  uint16_t ADD_length;
273  uint16_t COPY_length;
274  int32_t COPY_offset;
275 
276  if (_initiated == false) {
277  express_cat.error()
278  << "Patchfile::run() - Patching has not been initiated"
279  << endl;
280  return EU_error_abort;
281  }
282 
283  nassertr(_patch_stream != nullptr, EU_error_abort);
284  nassertr(_origfile_stream != nullptr, EU_error_abort);
285  StreamReader patch_reader(*_patch_stream);
286 
287  buflen = _buffer->get_length();
288  bytes_read = 0;
289 
290  while (bytes_read < buflen) {
291  // read # of ADD bytes
292  nassertr(_buffer->get_length() >= (int)sizeof(ADD_length), false);
293  ADD_length = patch_reader.get_uint16();
294  if (_patch_stream->fail()) {
295  express_cat.error()
296  << "Truncated patch file.\n";
297  return EU_error_file_invalid;
298  }
299 
300  bytes_read += (int)ADD_length;
301  _total_bytes_processed += (int)ADD_length;
302  if (_total_bytes_processed > _total_bytes_to_process) {
303  express_cat.error()
304  << "Runaway patch file.\n";
305  return EU_error_file_invalid;
306  }
307 
308  // if there are bytes to add, read them from patch file and write them to
309  // output
310  if (express_cat.is_spam() && ADD_length != 0) {
311  express_cat.spam()
312  << "ADD: " << ADD_length << " (to "
313  << _write_stream.tellp() << ")" << endl;
314  }
315 
316  uint32_t bytes_left = (uint32_t)ADD_length;
317  while (bytes_left > 0) {
318  uint32_t bytes_this_time = (uint32_t) min(bytes_left, (uint32_t) buflen);
319  _patch_stream->read(_buffer->_buffer, bytes_this_time);
320  if (_patch_stream->fail()) {
321  express_cat.error()
322  << "Truncated patch file.\n";
323  return EU_error_file_invalid;
324  }
325  _write_stream.write(_buffer->_buffer, bytes_this_time);
326  bytes_left -= bytes_this_time;
327  }
328 
329  // read # of COPY bytes
330  nassertr(_buffer->get_length() >= (int)sizeof(COPY_length), false);
331  COPY_length = patch_reader.get_uint16();
332  if (_patch_stream->fail()) {
333  express_cat.error()
334  << "Truncated patch file.\n";
335  return EU_error_file_invalid;
336  }
337 
338  bytes_read += (int)COPY_length;
339  _total_bytes_processed += (int)COPY_length;
340  if (_total_bytes_processed > _total_bytes_to_process) {
341  express_cat.error()
342  << "Runaway patch file.\n";
343  return EU_error_file_invalid;
344  }
345 
346  // if there are bytes to copy, read them from original file and write them
347  // to output
348  if (0 != COPY_length) {
349  // read copy offset
350  nassertr(_buffer->get_length() >= (int)sizeof(COPY_offset), false);
351  COPY_offset = patch_reader.get_int32();
352  if (_patch_stream->fail()) {
353  express_cat.error()
354  << "Truncated patch file.\n";
355  return EU_error_file_invalid;
356  }
357 
358  // seek to the copy source pos
359  if (_version_number < 2) {
360  _origfile_stream->seekg(COPY_offset, ios::beg);
361  } else {
362  _origfile_stream->seekg(COPY_offset, ios::cur);
363  }
364  if (_origfile_stream->fail()) {
365  express_cat.error()
366  << "Invalid copy offset in patch file.\n";
367  return EU_error_file_invalid;
368  }
369 
370  if (express_cat.is_spam()) {
371  express_cat.spam()
372  << "COPY: " << COPY_length << " bytes from offset "
373  << COPY_offset << " (from " << _origfile_stream->tellg()
374  << " to " << _write_stream.tellp() << ")"
375  << endl;
376  }
377 
378  // read the copy bytes from original file and write them to output
379  uint32_t bytes_left = (uint32_t)COPY_length;
380 
381  while (bytes_left > 0) {
382  uint32_t bytes_this_time = (uint32_t) min(bytes_left, (uint32_t) buflen);
383  _origfile_stream->read(_buffer->_buffer, bytes_this_time);
384  if (_origfile_stream->fail()) {
385  express_cat.error()
386  << "Invalid copy length in patch file.\n";
387  return EU_error_file_invalid;
388  }
389  _write_stream.write(_buffer->_buffer, bytes_this_time);
390  bytes_left -= bytes_this_time;
391  }
392  }
393 
394  // if we got a pair of zero-length ADD and COPY blocks, we're done
395  if ((0 == ADD_length) && (0 == COPY_length)) {
396  cleanup();
397 
398  if (express_cat.is_debug()) {
399  express_cat.debug()
400  // << "result file = " << _result_file_length
401  << " total bytes = " << _total_bytes_processed << endl;
402  }
403 
404  // check the MD5 from the patch file against the newly patched file
405  {
406  HashVal MD5_actual;
407  MD5_actual.hash_file(_output_file);
408  if (_MD5_ofResult != MD5_actual) {
409  // Whoops, patching screwed up somehow.
410  if (_origfile_stream != nullptr) {
412  vfs->close_read_file(_origfile_stream);
413  _origfile_stream = nullptr;
414  }
415  _write_stream.close();
416 
417  express_cat.info()
418  << "Patching produced incorrect checksum. Got:\n"
419  << " " << MD5_actual
420  << "\nExpected:\n"
421  << " " << _MD5_ofResult
422  << "\n";
423 
424  // This is a fine time to double-check the starting checksum.
425  if (!has_source_hash()) {
426  express_cat.info()
427  << "No source hash in patch file to verify.\n";
428  } else {
429  HashVal MD5_orig;
430  MD5_orig.hash_file(_orig_file);
431  if (MD5_orig != get_source_hash()) {
432  express_cat.info()
433  << "Started from incorrect source file. Got:\n"
434  << " " << MD5_orig
435  << "\nExpected:\n"
436  << " " << get_source_hash()
437  << "\n";
438  } else {
439  express_cat.info()
440  << "Started from correct source file:\n"
441  << " " << MD5_orig
442  << "\n";
443  }
444  }
445 
446  // delete the temp file and the patch file
447  if (_rename_output_to_orig) {
448  _output_file.unlink();
449  }
450  if (_delete_patchfile) {
451  _patch_file.unlink();
452  }
453  // return "invalid checksum"
454  return EU_error_invalid_checksum;
455  }
456  }
457 
458  // delete the patch file
459  if (_delete_patchfile) {
460  _patch_file.unlink();
461  }
462 
463  // rename the temp file to the original file name
464  if (_rename_output_to_orig) {
465  _orig_file.unlink();
466  if (!_output_file.rename_to(_orig_file)) {
467  express_cat.error()
468  << "Patchfile::run() failed to rename temp file to: " << _orig_file
469  << endl;
470  return EU_error_write_file_rename;
471  }
472  }
473 
474  return EU_success;
475  }
476  }
477 
478  return EU_ok;
479 }
480 
481 /**
482  * Patches the entire file in one call returns true on success and false on
483  * error
484  *
485  * This version will delete the patch file and overwrite the original file.
486  */
487 bool Patchfile::
488 apply(Filename &patch_file, Filename &file) {
489  int ret = initiate(patch_file, file);
490  if (ret < 0)
491  return false;
492  for (;;) {
493  ret = run();
494  if (ret == EU_success)
495  return true;
496  if (ret < 0)
497  return false;
498  }
499  return false;
500 }
501 
502 /**
503  * Patches the entire file in one call returns true on success and false on
504  * error
505  *
506  * This version will not delete any files.
507  */
508 bool Patchfile::
509 apply(Filename &patch_file, Filename &orig_file, const Filename &target_file) {
510  int ret = initiate(patch_file, orig_file, target_file);
511  if (ret < 0)
512  return false;
513  for (;;) {
514  ret = run();
515  if (ret == EU_success)
516  return true;
517  if (ret < 0)
518  return false;
519  }
520  return false;
521 }
522 
523 
524 /**
525  * Reads the header and leaves the patch file open.
526  */
527 int Patchfile::
528 internal_read_header(const Filename &patch_file) {
529  // Open the patch file for read
531  nassertr(_patch_stream == nullptr, EU_error_abort);
532  _patch_file = patch_file;
533  _patch_file.set_binary();
534  _patch_stream = vfs->open_read_file(_patch_file, true);
535  if (_patch_stream == nullptr) {
536  express_cat.error()
537  << "Patchfile::initiate() - Failed to open file: " << _patch_file << endl;
538  return get_write_error();
539  }
540 
541  // read header, make sure the patch file is valid
542  StreamReader patch_reader(*_patch_stream);
543 
544  // check the magic number
545  nassertr(_buffer->get_length() >= _v0_header_length, false);
546  uint32_t magic_number = patch_reader.get_uint32();
547  if (magic_number != _magic_number && magic_number != _v0_magic_number) {
548  express_cat.error()
549  << "Invalid patch file: " << _patch_file << endl;
550  return EU_error_file_invalid;
551  }
552 
553  _version_number = 0;
554  if (magic_number != _v0_magic_number) {
555  _version_number = patch_reader.get_uint16();
556  }
557  if (_version_number > _current_version) {
558  express_cat.error()
559  << "Can't read version " << _version_number << " patch files: "
560  << _patch_file << endl;
561  return EU_error_file_invalid;
562  }
563 
564  if (_version_number >= 1) {
565  // Get the length of the source file.
566  /*uint32_t source_file_length =*/ patch_reader.get_uint32();
567 
568  // get the MD5 of the source file.
569  _MD5_ofSource.read_stream(patch_reader);
570  }
571 
572  // get the length of the patched result file
573  _total_bytes_to_process = patch_reader.get_uint32();
574 
575  // get the MD5 of the resultant patched file
576  _MD5_ofResult.read_stream(patch_reader);
577 
578  express_cat.debug()
579  << "Patchfile::initiate() - valid patchfile" << endl;
580 
581  return EU_success;
582 }
583 
584 // PATCH FILE BUILDING MEMBER FUNCTIONS
585 
586 /**
587  *
588  */
589 uint32_t Patchfile::
590 calc_hash(const char *buffer) {
591 #ifdef USE_MD5_FOR_HASHTABLE_INDEX_VALUES
592  HashVal hash;
593  hash.hash_buffer(buffer, _footprint_length);
594 
595  // cout << uint16_t(hash.get_value(0)) << " ";
596 
597  return uint16_t(hash.get_value(0));
598 #else
599  uint32_t hash_value = 0;
600 
601  for(int i = 0; i < (int)_footprint_length; i++) {
602  // this is probably not such a good hash. to be replaced --> TRIED MD5,
603  // was not worth it for the execution-time hit on 800Mhz PC
604  hash_value ^= uint32_t(*buffer) << ((i * 2) % Patchfile::_HASH_BITS);
605  buffer++;
606  }
607 
608  // use the bits that overflowed past the end of the hash bit range (this is
609  // intended for _HASH_BITS == 24)
610  hash_value ^= (hash_value >> Patchfile::_HASH_BITS);
611 
612  // cout << hash_value << " ";
613 
614  return hash_value & _HASH_MASK;
615 #endif
616 }
617 
618 /**
619  *
620  * The hash and link tables allow for a quick, linear search of all locations
621  * in the file that begin with a particular sequence of bytes, or "footprint."
622  *
623  * The hash table is a table of offsets into the file, with one entry for
624  * every possible footprint hash value. For a hash of a footprint, the entry
625  * at the offset of the hash value provides an initial location in the file
626  * that has a matching footprint.
627  *
628  * The link table is a large linked list of file offsets, with one entry for
629  * every byte in the file. Each offset in the link table will point to
630  * another offset that has the same footprint at the corresponding offset in
631  * the actual file. Starting with an offset taken from the hash table, one
632  * can rapidly produce a list of offsets that all have the same footprint.
633  */
634 void Patchfile::
635 build_hash_link_tables(const char *buffer_orig, uint32_t length_orig,
636  uint32_t *hash_table, uint32_t *link_table) {
637 
638  uint32_t i;
639 
640  // clear hash table
641  for(i = 0; i < _HASHTABLESIZE; i++) {
642  hash_table[i] = _NULL_VALUE;
643  }
644 
645  // clear link table
646  for(i = 0; i < length_orig; i++) {
647  link_table[i] = _NULL_VALUE;
648  }
649 
650  if(length_orig < _footprint_length) return;
651 
652  // run through original file and hash each footprint
653  for(i = 0; i < (length_orig - _footprint_length); i++) {
654 
655  uint32_t hash_value = calc_hash(&buffer_orig[i]);
656 
657  // we must now store this file index in the hash table at the offset of
658  // the hash value
659 
660  // to account for multiple file offsets with identical hash values, there
661  // is a link table with an entry for every footprint in the file. We
662  // create linked lists of offsets in the link table.
663 
664  // first, set the value in the link table for the current offset to
665  // whatever the current list head is (the value in the hash table) (note
666  // that this only works because the hash and link tables both use
667  // _NULL_VALUE to indicate a null index)
668  link_table[i] = hash_table[hash_value];
669 
670  // set the new list head; store the current offset in the hash table at
671  // the offset of the footprint's hash value
672  hash_table[hash_value] = i;
673 
674  /*
675  if (_NULL_VALUE == hash_table[hash_value]) {
676  // hash entry is empty, store this offset
677  hash_table[hash_value] = i;
678  } else {
679  // hash entry is taken, go to the link table
680  uint32_t link_offset = hash_table[hash_value];
681 
682  while (_NULL_VALUE != link_table[link_offset]) {
683  link_offset = link_table[link_offset];
684  }
685  link_table[link_offset] = i;
686  }
687  */
688  }
689 }
690 
691 /**
692  *
693  * This function calculates the length of a match between two strings of bytes
694  */
695 uint32_t Patchfile::
696 calc_match_length(const char* buf1, const char* buf2, uint32_t max_length,
697  uint32_t min_length) {
698  // early out: look ahead and sample the end of the minimum range
699  if (min_length > 2) {
700  if (min_length >= max_length)
701  return 0;
702  if (buf1[min_length] != buf2[min_length] ||
703  buf1[min_length-1] != buf2[min_length-1] ||
704  buf1[min_length-2] != buf2[min_length-2]) {
705  return 0;
706  }
707  }
708 
709  uint32_t length = 0;
710  while ((length < max_length) && (*buf1 == *buf2)) {
711  buf1++, buf2++, length++;
712  }
713  return length;
714 }
715 
716 /**
717  *
718  * This function will find the longest string in the original file that
719  * matches a string in the new file.
720  */
721 void Patchfile::
722 find_longest_match(uint32_t new_pos, uint32_t &copy_pos, uint16_t &copy_length,
723  uint32_t *hash_table, uint32_t *link_table, const char* buffer_orig,
724  uint32_t length_orig, const char* buffer_new, uint32_t length_new) {
725 
726  // set length to a safe value
727  copy_length = 0;
728 
729  // get offset of matching string (in orig file) from hash table
730  uint32_t hash_value = calc_hash(&buffer_new[new_pos]);
731 
732  // if no match, bail
733  if (_NULL_VALUE == hash_table[hash_value])
734  return;
735 
736  copy_pos = hash_table[hash_value];
737 
738  // calc match length
739  copy_length = (uint16_t)calc_match_length(&buffer_new[new_pos],
740  &buffer_orig[copy_pos],
741  min(min((length_new - new_pos),
742  (length_orig - copy_pos)),
743  _MAX_RUN_LENGTH),
744  0);
745 
746  // run through link table, see if we find any longer matches
747  uint32_t match_offset;
748  uint16_t match_length;
749  match_offset = link_table[copy_pos];
750 
751  while (match_offset != _NULL_VALUE) {
752  match_length = (uint16_t)calc_match_length(&buffer_new[new_pos],
753  &buffer_orig[match_offset],
754  min(min((length_new - new_pos),
755  (length_orig - match_offset)),
756  _MAX_RUN_LENGTH),
757  copy_length);
758 
759  // have we found a longer match?
760  if (match_length > copy_length) {
761  copy_pos = match_offset;
762  copy_length = match_length;
763  }
764 
765  // traverse the link table
766  match_offset = link_table[match_offset];
767  }
768 }
769 
770 /**
771  *
772  */
773 void Patchfile::
774 emit_ADD(ostream &write_stream, uint32_t length, const char* buffer) {
775  nassertv(length == (uint16_t)length); //we only write a uint16
776 
777  if (express_cat.is_spam()) {
778  express_cat.spam()
779  << "ADD: " << length << " (to " << _add_pos << ")" << endl;
780  }
781 
782  // write ADD length
783  StreamWriter patch_writer(write_stream);
784  patch_writer.add_uint16((uint16_t)length);
785 
786  // if there are bytes to add, add them
787  if (length > 0) {
788  patch_writer.append_data(buffer, (uint16_t)length);
789  }
790 
791  _add_pos += length;
792 }
793 
794 /**
795  *
796  */
797 void Patchfile::
798 emit_COPY(ostream &write_stream, uint32_t length, uint32_t copy_pos) {
799  nassertv(length == (uint16_t)length); //we only write a uint16
800 
801  int32_t offset = (int)copy_pos - (int)_last_copy_pos;
802  if (express_cat.is_spam()) {
803  express_cat.spam()
804  << "COPY: " << length << " bytes from offset " << offset
805  << " (from " << copy_pos << " to " << _add_pos << ")" << endl;
806  }
807 
808  // write COPY length
809  StreamWriter patch_writer(write_stream);
810  patch_writer.add_uint16((uint16_t)length);
811 
812  if ((uint16_t)length != 0) {
813  // write COPY offset
814  patch_writer.add_int32(offset);
815  _last_copy_pos = copy_pos + length;
816  }
817 
818  _add_pos += length;
819 }
820 
821 /**
822  * Emits an add/copy pair. If necessary, repeats the pair as needed to work
823  * around the 16-bit chunk size limit.
824  */
825 void Patchfile::
826 emit_add_and_copy(ostream &write_stream,
827  uint32_t add_length, const char *add_buffer,
828  uint32_t copy_length, uint32_t copy_pos) {
829  if (add_length == 0 && copy_length == 0) {
830  // Don't accidentally emit a termination code.
831  return;
832  }
833 
834  static const uint16_t max_write = 65535;
835  while (add_length > max_write) {
836  // Overflow. This chunk is too large to fit into a single ADD block, so
837  // we have to write it as multiple ADDs.
838  emit_ADD(write_stream, max_write, add_buffer);
839  add_buffer += max_write;
840  add_length -= max_write;
841  emit_COPY(write_stream, 0, 0);
842  }
843 
844  emit_ADD(write_stream, add_length, add_buffer);
845 
846  while (copy_length > max_write) {
847  // Overflow.
848  emit_COPY(write_stream, max_write, copy_pos);
849  copy_pos += max_write;
850  copy_length -= max_write;
851  emit_ADD(write_stream, 0, nullptr);
852  }
853 
854  emit_COPY(write_stream, copy_length, copy_pos);
855 }
856 
857 /**
858  * Potentially emits one or more add/copy pairs. The current state is saved,
859  * so as to minimize wasted emits from consecutive adds or copies.
860  */
861 void Patchfile::
862 cache_add_and_copy(ostream &write_stream,
863  uint32_t add_length, const char *add_buffer,
864  uint32_t copy_length, uint32_t copy_pos) {
865  if (add_length != 0) {
866  if (_cache_copy_length != 0) {
867  // Have to flush.
868  cache_flush(write_stream);
869  }
870  // Add the string to the current cache.
871  _cache_add_data += string(add_buffer, add_length);
872  }
873 
874  if (copy_length != 0) {
875  if (_cache_copy_length == 0) {
876  // Start a new copy phase.
877  _cache_copy_start = copy_pos;
878  _cache_copy_length = copy_length;
879 
880  } else if (_cache_copy_start + _cache_copy_length == copy_pos) {
881  // We can just tack on the copy to what we've already got.
882  _cache_copy_length += copy_length;
883 
884  } else {
885  // It's a discontinuous copy. We have to flush.
886  cache_flush(write_stream);
887  _cache_copy_start = copy_pos;
888  _cache_copy_length = copy_length;
889  }
890  }
891 }
892 
893 /**
894  * Closes any copy or add phases that are still open after a previous call to
895  * cache_add_and_copy().
896  */
897 void Patchfile::
898 cache_flush(ostream &write_stream) {
899  emit_add_and_copy(write_stream,
900  _cache_add_data.size(), _cache_add_data.data(),
901  _cache_copy_length, _cache_copy_start);
902  _cache_add_data = string();
903  _cache_copy_length = 0;
904 }
905 
906 
907 /**
908  *
909  * Writes the patchfile header.
910  */
911 void Patchfile::
912 write_header(ostream &write_stream,
913  istream &stream_orig, istream &stream_new) {
914  // prepare to write the patch file header
915 
916  // write the patch file header
917  StreamWriter patch_writer(write_stream);
918  patch_writer.add_uint32(_magic_number);
919  patch_writer.add_uint16(_current_version);
920 
921  stream_orig.seekg(0, ios::end);
922  streampos source_file_length = stream_orig.tellg();
923  patch_writer.add_uint32((uint32_t)source_file_length);
924 
925  // calc MD5 of original file
926  _MD5_ofSource.hash_stream(stream_orig);
927  // add it to the header
928  _MD5_ofSource.write_stream(patch_writer);
929 
930  if (express_cat.is_debug()) {
931  express_cat.debug()
932  << "Orig: " << _MD5_ofSource << "\n";
933  }
934 
935  stream_new.seekg(0, ios::end);
936  streampos result_file_length = stream_new.tellg();
937  patch_writer.add_uint32((uint32_t)result_file_length);
938 
939  // calc MD5 of resultant patched file
940  _MD5_ofResult.hash_stream(stream_new);
941  // add it to the header
942  _MD5_ofResult.write_stream(patch_writer);
943 
944  if (express_cat.is_debug()) {
945  express_cat.debug()
946  << " New: " << _MD5_ofResult << "\n";
947  }
948 }
949 
950 /**
951  * Writes the patchfile terminator.
952  */
953 void Patchfile::
954 write_terminator(ostream &write_stream) {
955  cache_flush(write_stream);
956  // write terminator (null ADD, null COPY)
957  emit_ADD(write_stream, 0, nullptr);
958  emit_COPY(write_stream, 0, 0);
959 }
960 
961 /**
962  * Computes the patches for the entire file (if it is not a multifile) or for
963  * a single subfile (if it is)
964  *
965  * Returns true if successful, false on error.
966  */
967 bool Patchfile::
968 compute_file_patches(ostream &write_stream,
969  uint32_t offset_orig, uint32_t offset_new,
970  istream &stream_orig, istream &stream_new) {
971  // read in original file
972  stream_orig.seekg(0, ios::end);
973  nassertr(stream_orig, false);
974  uint32_t source_file_length = stream_orig.tellg();
975  if (express_cat.is_debug()) {
976  express_cat.debug()
977  << "Allocating " << source_file_length << " bytes to read orig\n";
978  }
979 
980  char *buffer_orig = (char *)PANDA_MALLOC_ARRAY(source_file_length);
981  stream_orig.seekg(0, ios::beg);
982  stream_orig.read(buffer_orig, source_file_length);
983 
984  // read in new file
985  stream_new.seekg(0, ios::end);
986  uint32_t result_file_length = stream_new.tellg();
987  nassertr(stream_new, false);
988  if (express_cat.is_debug()) {
989  express_cat.debug()
990  << "Allocating " << result_file_length << " bytes to read new\n";
991  }
992 
993  char *buffer_new = (char *)PANDA_MALLOC_ARRAY(result_file_length);
994  stream_new.seekg(0, ios::beg);
995  stream_new.read(buffer_new, result_file_length);
996 
997  // allocate hashlink tables
998  if (_hash_table == nullptr) {
999  if (express_cat.is_debug()) {
1000  express_cat.debug()
1001  << "Allocating hashtable of size " << _HASHTABLESIZE << " * 4\n";
1002  }
1003  _hash_table = (uint32_t *)PANDA_MALLOC_ARRAY(_HASHTABLESIZE * sizeof(uint32_t));
1004  }
1005 
1006  if (express_cat.is_debug()) {
1007  express_cat.debug()
1008  << "Allocating linktable of size " << source_file_length << " * 4\n";
1009  }
1010 
1011  uint32_t *link_table = (uint32_t *)PANDA_MALLOC_ARRAY(source_file_length * sizeof(uint32_t));
1012 
1013  // build hash and link tables for original file
1014  build_hash_link_tables(buffer_orig, source_file_length, _hash_table, link_table);
1015 
1016  // run through new file
1017 
1018  uint32_t new_pos = 0;
1019  uint32_t start_pos = new_pos; // this is the position for the start of ADD operations
1020 
1021  if(((uint32_t) result_file_length) >= _footprint_length)
1022  {
1023  while (new_pos < (result_file_length - _footprint_length)) {
1024 
1025  // find best match for current position
1026  uint32_t COPY_pos;
1027  uint16_t COPY_length;
1028 
1029  find_longest_match(new_pos, COPY_pos, COPY_length, _hash_table, link_table,
1030  buffer_orig, source_file_length, buffer_new, result_file_length);
1031 
1032  // if no match or match not longer than footprint length, skip to next
1033  // byte
1034  if (COPY_length < _footprint_length) {
1035  // go to next byte
1036  new_pos++;
1037  } else {
1038  // emit ADD for all skipped bytes
1039  int num_skipped = (int)new_pos - (int)start_pos;
1040  if (express_cat.is_spam()) {
1041  express_cat.spam()
1042  << "build: num_skipped = " << num_skipped
1043  << endl;
1044  }
1045  cache_add_and_copy(write_stream, num_skipped, &buffer_new[start_pos],
1046  COPY_length, COPY_pos + offset_orig);
1047  new_pos += (uint32_t)COPY_length;
1048  start_pos = new_pos;
1049  }
1050  }
1051  }
1052 
1053  if (express_cat.is_spam()) {
1054  express_cat.spam()
1055  << "build: result_file_length = " << result_file_length
1056  << " start_pos = " << start_pos
1057  << endl;
1058  }
1059 
1060  // are there still more bytes left in the new file?
1061  if (start_pos != result_file_length) {
1062  // emit ADD for all remaining bytes
1063 
1064  uint32_t remaining_bytes = result_file_length - start_pos;
1065  cache_add_and_copy(write_stream, remaining_bytes, &buffer_new[start_pos],
1066  0, 0);
1067  start_pos += remaining_bytes;
1068  }
1069 
1070  PANDA_FREE_ARRAY(link_table);
1071 
1072  PANDA_FREE_ARRAY(buffer_orig);
1073  PANDA_FREE_ARRAY(buffer_new);
1074 
1075  return true;
1076 }
1077 
1078 /**
1079  * Computes patches for the files, knowing that they are both Panda
1080  * Multifiles. This will build patches one subfile at a time, which can
1081  * potentially be much, much faster for large Multifiles that contain many
1082  * small subfiles.
1083  */
1084 bool Patchfile::
1085 compute_mf_patches(ostream &write_stream,
1086  uint32_t offset_orig, uint32_t offset_new,
1087  istream &stream_orig, istream &stream_new) {
1088  Multifile mf_orig, mf_new;
1089  IStreamWrapper stream_origw(stream_orig);
1090  IStreamWrapper stream_neww(stream_new);
1091  if (!mf_orig.open_read(&stream_origw) ||
1092  !mf_new.open_read(&stream_neww)) {
1093  express_cat.error()
1094  << "Input multifiles appear to be corrupt.\n";
1095  return false;
1096  }
1097 
1098  if (mf_new.needs_repack()) {
1099  express_cat.error()
1100  << "Input multifiles need to be repacked.\n";
1101  return false;
1102  }
1103 
1104  // First, compute the patch for the header index.
1105 
1106  {
1107  ISubStream index_orig(&stream_origw, 0, mf_orig.get_index_end());
1108  ISubStream index_new(&stream_neww, 0, mf_new.get_index_end());
1109  if (!do_compute_patches("", "",
1110  write_stream, offset_orig, offset_new,
1111  index_orig, index_new)) {
1112  return false;
1113  }
1114  nassertr(_add_pos + _cache_add_data.size() + _cache_copy_length == offset_new + (uint32_t)mf_new.get_index_end(), false);
1115  }
1116 
1117  // Now walk through each subfile in the new multifile. If a particular
1118  // subfile exists in both source files, we compute the patches for the
1119  // subfile; for a new subfile, we trivially add it. If a subfile has been
1120  // removed, we simply don't add it (we'll never even notice this case).
1121  int new_num_subfiles = mf_new.get_num_subfiles();
1122  for (int ni = 0; ni < new_num_subfiles; ++ni) {
1123  nassertr(_add_pos + _cache_add_data.size() + _cache_copy_length == offset_new + (uint32_t)mf_new.get_subfile_internal_start(ni), false);
1124  string name = mf_new.get_subfile_name(ni);
1125  int oi = mf_orig.find_subfile(name);
1126 
1127  if (oi < 0) {
1128  // This is a newly-added subfile. Add it the hard way.
1129  express_cat.info()
1130  << "Adding subfile " << mf_new.get_subfile_name(ni) << "\n";
1131 
1132  streampos new_start = mf_new.get_subfile_internal_start(ni);
1133  size_t new_size = mf_new.get_subfile_internal_length(ni);
1134  char *buffer_new = (char *)PANDA_MALLOC_ARRAY(new_size);
1135  stream_new.seekg(new_start, ios::beg);
1136  stream_new.read(buffer_new, new_size);
1137  cache_add_and_copy(write_stream, new_size, buffer_new, 0, 0);
1138  PANDA_FREE_ARRAY(buffer_new);
1139 
1140  } else {
1141  // This subfile exists in both the original and the new files. Patch
1142  // it.
1143  streampos orig_start = mf_orig.get_subfile_internal_start(oi);
1144  size_t orig_size = mf_orig.get_subfile_internal_length(oi);
1145 
1146  streampos new_start = mf_new.get_subfile_internal_start(ni);
1147  size_t new_size = mf_new.get_subfile_internal_length(ni);
1148 
1149  if (!patch_subfile(write_stream, offset_orig, offset_new,
1150  mf_new.get_subfile_name(ni),
1151  stream_origw, orig_start, orig_start + (streampos)orig_size,
1152  stream_neww, new_start, new_start + (streampos)new_size)) {
1153  return false;
1154  }
1155  }
1156  }
1157 
1158  return true;
1159 }
1160 
1161 #ifdef HAVE_TAR
1162 /**
1163  * Uses libtar to extract the location within the tar file of each of the
1164  * subfiles. Returns true if the tar file is read successfully, false if
1165  * there is an error (e.g. it is not a tar file).
1166  */
1167 bool Patchfile::
1168 read_tar(TarDef &tar, istream &stream) {
1169  TAR *tfile;
1170  tartype_t tt;
1171  tt.openfunc = tar_openfunc;
1172  tt.closefunc = tar_closefunc;
1173  tt.readfunc = tar_readfunc;
1174  tt.writefunc = tar_writefunc;
1175 
1176  stream.seekg(0, ios::beg);
1177  nassertr(_tar_istream == nullptr, false);
1178  _tar_istream = &stream;
1179  if (tar_open(&tfile, (char *)"dummy", &tt, O_RDONLY, 0, 0) != 0) {
1180  _tar_istream = nullptr;
1181  return false;
1182  }
1183 
1184  // Walk through the tar file, noting the current file position as we reach
1185  // each subfile. Use this information to infer the start and end of each
1186  // subfile within the stream.
1187 
1188  streampos last_pos = 0;
1189  int flag = th_read(tfile);
1190  while (flag == 0) {
1191  TarSubfile subfile;
1192  subfile._name = th_get_pathname(tfile);
1193  subfile._header_start = last_pos;
1194  subfile._data_start = stream.tellg();
1195  subfile._data_end = subfile._data_start + (streampos)th_get_size(tfile);
1196  tar_skip_regfile(tfile);
1197  subfile._end = stream.tellg();
1198  tar.push_back(subfile);
1199 
1200  last_pos = subfile._end;
1201  flag = th_read(tfile);
1202  }
1203 
1204  // Create one more "subfile" for the bytes at the tail of the file. This
1205  // subfile has no name.
1206  TarSubfile subfile;
1207  subfile._header_start = last_pos;
1208  stream.clear();
1209  stream.seekg(0, ios::end);
1210  subfile._data_start = stream.tellg();
1211  subfile._data_end = subfile._data_start;
1212  subfile._end = subfile._data_start;
1213  tar.push_back(subfile);
1214 
1215  tar_close(tfile);
1216  _tar_istream = nullptr;
1217  return (flag == 1);
1218 }
1219 #endif // HAVE_TAR
1220 
1221 #ifdef HAVE_TAR
1222 /**
1223  * Computes patches for the files, knowing that they are both tar files. This
1224  * is similar to compute_mf_patches().
1225  *
1226  * The tar indexes should have been built up by a previous call to read_tar().
1227  */
1228 bool Patchfile::
1229 compute_tar_patches(ostream &write_stream,
1230  uint32_t offset_orig, uint32_t offset_new,
1231  istream &stream_orig, istream &stream_new,
1232  TarDef &tar_orig, TarDef &tar_new) {
1233 
1234  // Sort the orig list by filename, so we can quickly look up files from the
1235  // new list.
1236  tar_orig.sort();
1237 
1238  // However, it is important to keep the new list in its original, on-disk
1239  // order.
1240 
1241  // Walk through each subfile in the new tar file. If a particular subfile
1242  // exists in both source files, we compute the patches for the subfile; for
1243  // a new subfile, we trivially add it. If a subfile has been removed, we
1244  // simply don't add it (we'll never even notice this case).
1245 
1246  IStreamWrapper stream_origw(stream_orig);
1247  IStreamWrapper stream_neww(stream_new);
1248 
1249  TarDef::const_iterator ni;
1250  streampos last_pos = 0;
1251  for (ni = tar_new.begin(); ni != tar_new.end(); ++ni) {
1252  const TarSubfile &sf_new =(*ni);
1253  nassertr(sf_new._header_start == last_pos, false);
1254 
1255  TarDef::const_iterator oi = tar_orig.find(sf_new);
1256 
1257  if (oi == tar_orig.end()) {
1258  // This is a newly-added subfile. Add it the hard way.
1259  express_cat.info()
1260  << "Adding subfile " << sf_new._name << "\n";
1261 
1262  streampos new_start = sf_new._header_start;
1263  size_t new_size = sf_new._end - sf_new._header_start;
1264  char *buffer_new = (char *)PANDA_MALLOC_ARRAY(new_size);
1265  stream_new.seekg(new_start, ios::beg);
1266  stream_new.read(buffer_new, new_size);
1267  cache_add_and_copy(write_stream, new_size, buffer_new, 0, 0);
1268  PANDA_FREE_ARRAY(buffer_new);
1269 
1270  } else {
1271  // This subfile exists in both the original and the new files. Patch
1272  // it.
1273  const TarSubfile &sf_orig =(*oi);
1274 
1275  // We patch the header and data of the file separately, so we can
1276  // accurately detect nested multifiles. The extra data at the end of
1277  // the file (possibly introduced by a tar file's blocking) is the
1278  // footer, which is also patched separately.
1279  if (!patch_subfile(write_stream, offset_orig, offset_new, "",
1280  stream_origw, sf_orig._header_start, sf_orig._data_start,
1281  stream_neww, sf_new._header_start, sf_new._data_start)) {
1282  return false;
1283  }
1284 
1285  if (!patch_subfile(write_stream, offset_orig, offset_new, sf_new._name,
1286  stream_origw, sf_orig._data_start, sf_orig._data_end,
1287  stream_neww, sf_new._data_start, sf_new._data_end)) {
1288  return false;
1289  }
1290 
1291  if (!patch_subfile(write_stream, offset_orig, offset_new, "",
1292  stream_origw, sf_orig._data_end, sf_orig._end,
1293  stream_neww, sf_new._data_end, sf_new._end)) {
1294  return false;
1295  }
1296  }
1297 
1298  last_pos = sf_new._end;
1299  }
1300 
1301  return true;
1302 }
1303 #endif // HAVE_TAR
1304 
1305 #ifdef HAVE_TAR
1306 /**
1307  * A callback function to redirect libtar to read from our istream instead of
1308  * using low-level Unix I/O.
1309  */
1310 int Patchfile::
1311 tar_openfunc(const char *, int, ...) {
1312  // Since we don't actually open a file--the stream is already open--we do
1313  // nothing here.
1314  return 0;
1315 }
1316 #endif // HAVE_TAR
1317 
1318 #ifdef HAVE_TAR
1319 /**
1320  * A callback function to redirect libtar to read from our istream instead of
1321  * using low-level Unix I/O.
1322  */
1323 int Patchfile::
1324 tar_closefunc(int) {
1325  // Since we don't actually open a file, no need to close it either.
1326  return 0;
1327 }
1328 #endif // HAVE_TAR
1329 
1330 #ifdef HAVE_TAR
1331 /**
1332  * A callback function to redirect libtar to read from our istream instead of
1333  * using low-level Unix I/O.
1334  */
1335 ssize_t Patchfile::
1336 tar_readfunc(int, void *buffer, size_t nbytes) {
1337  nassertr(_tar_istream != nullptr, 0);
1338  _tar_istream->read((char *)buffer, nbytes);
1339  return (ssize_t)_tar_istream->gcount();
1340 }
1341 #endif // HAVE_TAR
1342 
1343 #ifdef HAVE_TAR
1344 /**
1345  * A callback function to redirect libtar to read from our istream instead of
1346  * using low-level Unix I/O.
1347  */
1348 ssize_t Patchfile::
1349 tar_writefunc(int, const void *, size_t) {
1350  // Since we use libtar only for reading, it is an error if this method gets
1351  // called.
1352  nassertr(false, -1);
1353  return -1;
1354 }
1355 #endif // HAVE_TAR
1356 
1357 /**
1358  *
1359  * This implementation uses the "greedy differencing algorithm" described in
1360  * the masters thesis "Differential Compression: A Generalized Solution for
1361  * Binary Files" by Randal C. Burns (p.13). For an original file of size M and
1362  * a new file of size N, this algorithm is O(M) in space and O(M*N) (worst-
1363  * case) in time. return false on error
1364  */
1365 bool Patchfile::
1366 build(Filename file_orig, Filename file_new, Filename patch_name) {
1367  patch_name.set_binary();
1368 
1369  // Open the original file for read
1370  pifstream stream_orig;
1371  file_orig.set_binary();
1372  if (!file_orig.open_read(stream_orig)) {
1373  express_cat.error()
1374  << "Patchfile::build() - Failed to open file: " << file_orig << endl;
1375  return false;
1376  }
1377 
1378  // Open the new file for read
1379  pifstream stream_new;
1380  file_new.set_binary();
1381  if (!file_new.open_read(stream_new)) {
1382  express_cat.error()
1383  << "Patchfile::build() - Failed to open file: " << file_new << endl;
1384  return false;
1385  }
1386 
1387  // Open patch file for write
1388  pofstream write_stream;
1389  if (!patch_name.open_write(write_stream)) {
1390  express_cat.error()
1391  << "Patchfile::build() - Failed to open file: " << patch_name << endl;
1392  return false;
1393  }
1394 
1395  _last_copy_pos = 0;
1396  _add_pos = 0;
1397  _cache_add_data = string();
1398  _cache_copy_start = 0;
1399  _cache_copy_length = 0;
1400 
1401  write_header(write_stream, stream_orig, stream_new);
1402 
1403  if (!do_compute_patches(file_orig, file_new,
1404  write_stream, 0, 0,
1405  stream_orig, stream_new)) {
1406  return false;
1407  }
1408 
1409  write_terminator(write_stream);
1410 
1411  if (express_cat.is_debug()) {
1412  express_cat.debug()
1413  << "Patch file will generate " << _add_pos << "-byte file.\n";
1414  }
1415 
1416 #ifndef NDEBUG
1417  {
1418  // Make sure the resulting file would be the right size.
1419  stream_new.seekg(0, ios::end);
1420  streampos result_file_length = stream_new.tellg();
1421  nassertr(_add_pos == result_file_length, false);
1422  }
1423 #endif // NDEBUG
1424 
1425  return (_last_copy_pos != 0);
1426 }
1427 
1428 /**
1429  * Computes the patches for the indicated A to B files, or subfiles. Checks
1430  * for multifiles or tar files before falling back to whole-file patching.
1431  */
1432 bool Patchfile::
1433 do_compute_patches(const Filename &file_orig, const Filename &file_new,
1434  ostream &write_stream,
1435  uint32_t offset_orig, uint32_t offset_new,
1436  istream &stream_orig, istream &stream_new) {
1437  nassertr(_add_pos + _cache_add_data.size() + _cache_copy_length == offset_new, false);
1438 
1439  // Check whether our input files are Panda multifiles or tar files.
1440  bool is_multifile = false;
1441 #ifdef HAVE_TAR
1442  bool is_tarfile = false;
1443  TarDef tar_orig, tar_new;
1444 #endif // HAVE_TAR
1445 
1446  if (_allow_multifile) {
1447  if (strstr(file_orig.get_basename().c_str(), ".mf") != nullptr ||
1448  strstr(file_new.get_basename().c_str(), ".mf") != nullptr) {
1449  // Read the first n bytes of both files for the Multifile magic number.
1450  string magic_number = Multifile::get_magic_number();
1451  char *buffer = (char *)PANDA_MALLOC_ARRAY(magic_number.size());
1452  stream_orig.seekg(0, ios::beg);
1453  stream_orig.read(buffer, magic_number.size());
1454 
1455  if (stream_orig.gcount() == (int)magic_number.size() &&
1456  memcmp(buffer, magic_number.data(), magic_number.size()) == 0) {
1457  stream_new.seekg(0, ios::beg);
1458  stream_new.read(buffer, magic_number.size());
1459  if (stream_new.gcount() == (int)magic_number.size() &&
1460  memcmp(buffer, magic_number.data(), magic_number.size()) == 0) {
1461  is_multifile = true;
1462  }
1463  }
1464  PANDA_FREE_ARRAY(buffer);
1465  }
1466 #ifdef HAVE_TAR
1467  if (strstr(file_orig.get_basename().c_str(), ".tar") != nullptr ||
1468  strstr(file_new.get_basename().c_str(), ".tar") != nullptr) {
1469  if (read_tar(tar_orig, stream_orig) &&
1470  read_tar(tar_new, stream_new)) {
1471  is_tarfile = true;
1472  }
1473  }
1474 #endif // HAVE_TAR
1475  }
1476 
1477  if (is_multifile) {
1478  if (express_cat.is_debug()) {
1479  express_cat.debug()
1480  << file_orig.get_basename() << " appears to be a Panda Multifile.\n";
1481  }
1482  if (!compute_mf_patches(write_stream, offset_orig, offset_new,
1483  stream_orig, stream_new)) {
1484  return false;
1485  }
1486 #ifdef HAVE_TAR
1487  } else if (is_tarfile) {
1488  if (express_cat.is_debug()) {
1489  express_cat.debug()
1490  << file_orig.get_basename() << " appears to be a tar file.\n";
1491  }
1492  if (!compute_tar_patches(write_stream, offset_orig, offset_new,
1493  stream_orig, stream_new, tar_orig, tar_new)) {
1494  return false;
1495  }
1496 #endif // HAVE_TAR
1497  } else {
1498  if (express_cat.is_debug()) {
1499  express_cat.debug()
1500  << file_orig.get_basename() << " is not a multifile.\n";
1501  }
1502  if (!compute_file_patches(write_stream, offset_orig, offset_new,
1503  stream_orig, stream_new)) {
1504  return false;
1505  }
1506  }
1507 
1508  return true;
1509 }
1510 
1511 /**
1512  * Generates patches for a nested subfile of a Panda Multifile or a tar file.
1513  */
1514 bool Patchfile::
1515 patch_subfile(ostream &write_stream,
1516  uint32_t offset_orig, uint32_t offset_new,
1517  const Filename &filename,
1518  IStreamWrapper &stream_orig, streampos orig_start, streampos orig_end,
1519  IStreamWrapper &stream_new, streampos new_start, streampos new_end) {
1520  nassertr(_add_pos + _cache_add_data.size() + _cache_copy_length == offset_new + (uint32_t)new_start, false);
1521 
1522  size_t new_size = new_end - new_start;
1523  size_t orig_size = orig_end - orig_start;
1524 
1525  ISubStream subfile_orig(&stream_orig, orig_start, orig_end);
1526  ISubStream subfile_new(&stream_new, new_start, new_end);
1527 
1528  bool is_unchanged = false;
1529  if (orig_size == new_size) {
1530  HashVal hash_orig, hash_new;
1531  hash_orig.hash_stream(subfile_orig);
1532  hash_new.hash_stream(subfile_new);
1533 
1534  if (hash_orig == hash_new) {
1535  // Actually, the subfile is unchanged; just emit it.
1536  is_unchanged = true;
1537  }
1538  }
1539 
1540  if (is_unchanged) {
1541  if (express_cat.is_debug() && !filename.empty()) {
1542  express_cat.debug()
1543  << "Keeping subfile " << filename << "\n";
1544  }
1545  cache_add_and_copy(write_stream, 0, nullptr,
1546  orig_size, offset_orig + orig_start);
1547 
1548  } else {
1549  if (!filename.empty()) {
1550  express_cat.info()
1551  << "Patching subfile " << filename << "\n";
1552  }
1553 
1554  if (!do_compute_patches(filename, filename, write_stream,
1555  offset_orig + orig_start, offset_new + new_start,
1556  subfile_orig, subfile_new)) {
1557  return false;
1558  }
1559  }
1560 
1561  return true;
1562 }
1563 
1564 #endif // HAVE_OPENSSL
A StreamWriter object is used to write sequential binary data directly to an ostream.
Definition: streamWriter.h:29
bool open_write(std::ofstream &stream, bool truncate=true) const
Opens the indicated ifstream for writing the file, if possible.
Definition: filename.cxx:1899
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.
size_t get_subfile_internal_length(int index) const
Returns the number of bytes the indicated subfile consumes within the archive.
Definition: multifile.cxx:1580
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.
A hierarchy of directories and files that appears to be one continuous file system,...
bool needs_repack() const
Returns true if the Multifile index is suboptimal and should be repacked.
Definition: multifile.I:55
std::istream * open_read_file(const Filename &filename, bool auto_unwrap) const
Convenience function; returns a newly allocated istream if the file exists and can be read,...
void set_binary()
Indicates that the filename represents a binary file.
Definition: filename.I:414
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.
bool open_read(std::ifstream &stream) const
Opens the indicated ifstream for reading the file, if possible.
Definition: filename.cxx:1863
std::streampos get_index_end() const
Returns the first byte that is guaranteed to follow any index byte already written to disk in the Mul...
Definition: multifile.cxx:1555
Stores a 128-bit value that represents the hashed contents (typically MD5) of a file or buffer.
Definition: hashVal.h:31
static void close_read_file(std::istream *stream)
Closes a file opened by a previous call to open_read_file().
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.
Definition: buffer.h:24
An istream object that presents a subwindow into another istream.
Definition: subStream.h:30
int find_subfile(const std::string &subfile_name) const
Returns the index of the subfile with the indicated name, or -1 if the named subfile is not within th...
Definition: multifile.cxx:1367
The name of a file, such as a texture file or an Egg file.
Definition: filename.h:39
get_subfile_name
Returns the name of the nth subfile.
Definition: multifile.h:117
This class provides a locking wrapper around an arbitrary istream pointer.
Definition: streamWrapper.h:59
static VirtualFileSystem * get_global_ptr()
Returns the default global VirtualFileSystem.
get_magic_number
Returns a string with the first n bytes written to a Multifile, to identify it as a Multifile.
Definition: multifile.h:139
std::string get_basename() const
Returns the basename part of the filename.
Definition: filename.I:367
get_num_subfiles
Returns the number of subfiles within the Multifile.
Definition: multifile.h:117
A file that contains a set of files.
Definition: multifile.h:37
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.
static Filename temporary(const std::string &dirname, const std::string &prefix, const std::string &suffix=std::string(), Type type=T_general)
Generates a temporary filename within the indicated directory, using the indicated prefix.
Definition: filename.cxx:424
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.
A class to read sequential binary data directly from an istream.
Definition: streamReader.h:28
std::streampos get_subfile_internal_start(int index) const
Returns the starting byte position within the Multifile at which the indicated subfile begins.
Definition: multifile.cxx:1567
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.