Panda3D
patchfile.cxx
Go to the documentation of this file.
1 /**
2  * PANDA 3D SOFTWARE
3  * Copyright (c) Carnegie Mellon University. All rights reserved.
4  *
5  * All use of this software is subject to the terms of the revised BSD
6  * license. You should have received a copy of this license along
7  * with this source code in a file named "LICENSE."
8  *
9  * @file patchfile.cxx
10  * @author darren, mike
11  * @date 1997-01-09
12  */
13 
14 #include "pandabase.h"
15 
16 #ifdef HAVE_OPENSSL
17 
18 #include "config_express.h"
19 #include "error_utils.h"
20 #include "patchfile.h"
21 #include "streamReader.h"
22 #include "streamWriter.h"
23 #include "multifile.h"
24 #include "hashVal.h"
25 #include "virtualFileSystem.h"
26 
27 #include <string.h> // for strstr
28 
29 #ifdef HAVE_TAR
30 #include <libtar.h>
31 #include <fcntl.h> // for O_RDONLY
32 #endif // HAVE_TAR
33 
34 #ifdef HAVE_TAR
35 std::istream *Patchfile::_tar_istream = nullptr;
36 #endif // HAVE_TAR
37 
38 using std::endl;
39 using std::ios;
40 using std::istream;
41 using std::min;
42 using std::ostream;
43 using std::streampos;
44 using std::string;
45 
46 // this actually slows things down... #define
47 // USE_MD5_FOR_HASHTABLE_INDEX_VALUES
48 
49 /*
50  * Patch File Format
51  * IF THIS CHANGES, UPDATE installerApplyPatch.cxx IN THE INSTALLER
52  *
53  * [ HEADER ]
54  * 4 bytes 0xfeebfaac ("magic number")
55  * (older patch files have a magic number 0xfeebfaab,
56  * indicating they are version number 0.)
57  * 2 bytes version number (if magic number == 0xfeebfaac)
58  * 4 bytes length of starting file (if version >= 1)
59  * 16 bytes MD5 of starting file (if version >= 1)
60  * 4 bytes length of resulting patched file
61  * 16 bytes MD5 of resultant patched file
62  *
63  * Note that MD5 hashes are written in the order observed by
64  * HashVal::read_stream() and HashVal::write_stream(), which is not
65  * the normal linear order. (Each group of four bytes is reversed.)
66  */
67 
68 const int _v0_header_length = 4 + 4 + 16;
69 const int _v1_header_length = 4 + 2 + 4 + 16 + 4 + 16;
70 /*
71  * [ ADD/COPY pairs; repeated N times ]
72  * 2 bytes AL = ADD length
73  * AL bytes bytes to add
74  * 2 bytes CL = COPY length
75  * 4 bytes offset of data to copy from original file, if CL != 0.
76  * If version >= 2, offset is relative to end of previous
77  * copy block; if version < 2, offset is relative to
78  * beginning of file.
79  *
80  * [ TERMINATOR ]
81  * 2 bytes zero-length ADD
82  * 2 bytes zero-length COPY
83  */
84 
85 // Defines
86 const uint32_t Patchfile::_v0_magic_number = 0xfeebfaab;
87 const uint32_t Patchfile::_magic_number = 0xfeebfaac;
88 
89 // Created version 1 on 11202 to store length and MD5 of original file. To
90 // version 2 on 11202 to store copy offsets as relative.
91 const uint16_t Patchfile::_current_version = 2;
92 
93 const uint32_t Patchfile::_HASH_BITS = 24;
94 const uint32_t Patchfile::_HASHTABLESIZE = uint32_t(1) << Patchfile::_HASH_BITS;
95 const uint32_t Patchfile::_DEFAULT_FOOTPRINT_LENGTH = 9; // this produced the smallest patch file for libpanda.dll when tested, 12/20/2000
96 const uint32_t Patchfile::_NULL_VALUE = uint32_t(0) - 1;
97 const uint32_t Patchfile::_MAX_RUN_LENGTH = (uint32_t(1) << 16) - 1;
98 const uint32_t Patchfile::_HASH_MASK = (uint32_t(1) << Patchfile::_HASH_BITS) - 1;
99 
100 /**
101  * Create a patch file and initializes internal data
102  */
103 Patchfile::
104 Patchfile() {
105  PT(Buffer) buffer = new Buffer(patchfile_buffer_size);
106  init(buffer);
107 }
108 
109 /**
110  * Create patch file with buffer to patch
111  */
112 Patchfile::
113 Patchfile(PT(Buffer) buffer) {
114  init(buffer);
115 }
116 
117 /**
118  *
119  */
120 void Patchfile::
121 init(PT(Buffer) buffer) {
122  _rename_output_to_orig = false;
123  _delete_patchfile = false;
124  _hash_table = nullptr;
125  _initiated = false;
126  nassertv(!buffer.is_null());
127  _buffer = buffer;
128 
129  _version_number = 0;
130  _allow_multifile = true;
131 
132  _patch_stream = nullptr;
133  _origfile_stream = nullptr;
134 
135  reset_footprint_length();
136 }
137 
138 /**
139  *
140  */
141 Patchfile::
142 ~Patchfile() {
143  if (_hash_table != nullptr) {
144  PANDA_FREE_ARRAY(_hash_table);
145  }
146 
147  if (_initiated) {
148  cleanup();
149  }
150 
151  nassertv(_patch_stream == nullptr);
152  nassertv(_origfile_stream == nullptr);
153 }
154 
155 /**
156  * Closes and clean up internal data structures
157  */
158 void Patchfile::
159 cleanup() {
160  if (!_initiated) {
161  express_cat.error()
162  << "Patchfile::cleanup() - Patching has not been initiated"
163  << endl;
164  return;
165  }
166 
167  // close files
169  if (_origfile_stream != nullptr) {
170  vfs->close_read_file(_origfile_stream);
171  _origfile_stream = nullptr;
172  }
173  if (_patch_stream != nullptr) {
174  vfs->close_read_file(_patch_stream);
175  _patch_stream = nullptr;
176  }
177  _write_stream.close();
178 
179  _initiated = false;
180 }
181 
182 // PATCH FILE APPLY MEMBER FUNCTIONS
183 
184 // NOTE: this patch-application functionality unfortunately has to be
185 // duplicated in the Installer. It is contained in the file
186 // installerApplyPatch.cxx PLEASE MAKE SURE THAT THAT FILE GETS UPDATED IF ANY
187 // OF THIS LOGIC CHANGES! (i.e. if the patch file format changes)
188 
189 /**
190  * Set up to apply the patch to the file (original file and patch are
191  * destroyed in the process).
192  */
193 int Patchfile::
194 initiate(const Filename &patch_file, const Filename &file) {
195  int result = initiate(patch_file, file, Filename::temporary("", "patch_"));
196  _rename_output_to_orig = true;
197  _delete_patchfile = !keep_temporary_files;
198  return result;
199 }
200 
201 /**
202  * Set up to apply the patch to the file. In this form, neither the original
203  * file nor the patch file are destroyed.
204  */
205 int Patchfile::
206 initiate(const Filename &patch_file, const Filename &orig_file,
207  const Filename &target_file) {
208  if (_initiated) {
209  express_cat.error()
210  << "Patchfile::initiate() - Patching has already been initiated"
211  << endl;
212  return EU_error_abort;
213  }
214 
215  nassertr(orig_file != target_file, EU_error_abort);
216 
218 
219  // Open the original file for read
220  nassertr(_origfile_stream == nullptr, EU_error_abort);
221  _orig_file = orig_file;
222  _orig_file.set_binary();
223  _origfile_stream = vfs->open_read_file(_orig_file, false);
224  if (_origfile_stream == nullptr) {
225  express_cat.error()
226  << "Patchfile::initiate() - Failed to open file: " << _orig_file << endl;
227  return get_write_error();
228  }
229 
230  // Open the temp file for write
231  _output_file = target_file;
232  _output_file.set_binary();
233  if (!_output_file.open_write(_write_stream)) {
234  express_cat.error()
235  << "Patchfile::initiate() - Failed to open file: " << _output_file << endl;
236  return get_write_error();
237  }
238 
239  if (express_cat.is_debug()) {
240  express_cat.debug()
241  << "Patchfile using output file " << _output_file << "\n";
242  }
243 
244  int result = internal_read_header(patch_file);
245  _total_bytes_processed = 0;
246 
247  _initiated = true;
248  return result;
249 }
250 
251 /**
252  * Opens the patch file for reading, and gets the header information from the
253  * file but does not begin to do any real work. This can be used to query the
254  * data stored in the patch.
255  */
256 int Patchfile::
257 read_header(const Filename &patch_file) {
258  if (_initiated) {
259  express_cat.error()
260  << "Patchfile::initiate() - Patching has already been initiated"
261  << endl;
262  return EU_error_abort;
263  }
264 
265  int result = internal_read_header(patch_file);
266  if (_patch_stream != nullptr) {
268  vfs->close_read_file(_patch_stream);
269  _patch_stream = nullptr;
270  }
271  return result;
272 }
273 
274 /**
275  * Perform one buffer's worth of patching.
276  * Returns one of the following values:
277  * @li @c EU_ok : while patching
278  * @li @c EU_success : when done
279  * @li @c EU_error_abort : Patching has not been initiated
280  * @li @c EU_error_file_invalid : file is corrupted
281  * @li @c EU_error_invalid_checksum : incompatible patch file
282  * @li @c EU_error_write_file_rename : could not rename file
283  */
284 int Patchfile::
285 run() {
286  // Now patch the file using the given buffer
287  int buflen;
288  int bytes_read;
289  uint16_t ADD_length;
290  uint16_t COPY_length;
291  int32_t COPY_offset;
292 
293  if (_initiated == false) {
294  express_cat.error()
295  << "Patchfile::run() - Patching has not been initiated"
296  << endl;
297  return EU_error_abort;
298  }
299 
300  nassertr(_patch_stream != nullptr, EU_error_abort);
301  nassertr(_origfile_stream != nullptr, EU_error_abort);
302  StreamReader patch_reader(*_patch_stream);
303 
304  buflen = _buffer->get_length();
305  bytes_read = 0;
306 
307  while (bytes_read < buflen) {
308  // read # of ADD bytes
309  nassertr(_buffer->get_length() >= (int)sizeof(ADD_length), false);
310  ADD_length = patch_reader.get_uint16();
311  if (_patch_stream->fail()) {
312  express_cat.error()
313  << "Truncated patch file.\n";
314  return EU_error_file_invalid;
315  }
316 
317  bytes_read += (int)ADD_length;
318  _total_bytes_processed += (int)ADD_length;
319  if (_total_bytes_processed > _total_bytes_to_process) {
320  express_cat.error()
321  << "Runaway patch file.\n";
322  return EU_error_file_invalid;
323  }
324 
325  // if there are bytes to add, read them from patch file and write them to
326  // output
327  if (express_cat.is_spam() && ADD_length != 0) {
328  express_cat.spam()
329  << "ADD: " << ADD_length << " (to "
330  << _write_stream.tellp() << ")" << endl;
331  }
332 
333  uint32_t bytes_left = (uint32_t)ADD_length;
334  while (bytes_left > 0) {
335  uint32_t bytes_this_time = (uint32_t) min(bytes_left, (uint32_t) buflen);
336  _patch_stream->read(_buffer->_buffer, bytes_this_time);
337  if (_patch_stream->fail()) {
338  express_cat.error()
339  << "Truncated patch file.\n";
340  return EU_error_file_invalid;
341  }
342  _write_stream.write(_buffer->_buffer, bytes_this_time);
343  bytes_left -= bytes_this_time;
344  }
345 
346  // read # of COPY bytes
347  nassertr(_buffer->get_length() >= (int)sizeof(COPY_length), false);
348  COPY_length = patch_reader.get_uint16();
349  if (_patch_stream->fail()) {
350  express_cat.error()
351  << "Truncated patch file.\n";
352  return EU_error_file_invalid;
353  }
354 
355  bytes_read += (int)COPY_length;
356  _total_bytes_processed += (int)COPY_length;
357  if (_total_bytes_processed > _total_bytes_to_process) {
358  express_cat.error()
359  << "Runaway patch file.\n";
360  return EU_error_file_invalid;
361  }
362 
363  // if there are bytes to copy, read them from original file and write them
364  // to output
365  if (0 != COPY_length) {
366  // read copy offset
367  nassertr(_buffer->get_length() >= (int)sizeof(COPY_offset), false);
368  COPY_offset = patch_reader.get_int32();
369  if (_patch_stream->fail()) {
370  express_cat.error()
371  << "Truncated patch file.\n";
372  return EU_error_file_invalid;
373  }
374 
375  // seek to the copy source pos
376  if (_version_number < 2) {
377  _origfile_stream->seekg(COPY_offset, ios::beg);
378  } else {
379  _origfile_stream->seekg(COPY_offset, ios::cur);
380  }
381  if (_origfile_stream->fail()) {
382  express_cat.error()
383  << "Invalid copy offset in patch file.\n";
384  return EU_error_file_invalid;
385  }
386 
387  if (express_cat.is_spam()) {
388  express_cat.spam()
389  << "COPY: " << COPY_length << " bytes from offset "
390  << COPY_offset << " (from " << _origfile_stream->tellg()
391  << " to " << _write_stream.tellp() << ")"
392  << endl;
393  }
394 
395  // read the copy bytes from original file and write them to output
396  uint32_t bytes_left = (uint32_t)COPY_length;
397 
398  while (bytes_left > 0) {
399  uint32_t bytes_this_time = (uint32_t) min(bytes_left, (uint32_t) buflen);
400  _origfile_stream->read(_buffer->_buffer, bytes_this_time);
401  if (_origfile_stream->fail()) {
402  express_cat.error()
403  << "Invalid copy length in patch file.\n";
404  return EU_error_file_invalid;
405  }
406  _write_stream.write(_buffer->_buffer, bytes_this_time);
407  bytes_left -= bytes_this_time;
408  }
409  }
410 
411  // if we got a pair of zero-length ADD and COPY blocks, we're done
412  if ((0 == ADD_length) && (0 == COPY_length)) {
413  cleanup();
414 
415  if (express_cat.is_debug()) {
416  express_cat.debug()
417  // << "result file = " << _result_file_length
418  << " total bytes = " << _total_bytes_processed << endl;
419  }
420 
421  // check the MD5 from the patch file against the newly patched file
422  {
423  HashVal MD5_actual;
424  MD5_actual.hash_file(_output_file);
425  if (_MD5_ofResult != MD5_actual) {
426  // Whoops, patching screwed up somehow.
427  if (_origfile_stream != nullptr) {
429  vfs->close_read_file(_origfile_stream);
430  _origfile_stream = nullptr;
431  }
432  _write_stream.close();
433 
434  express_cat.info()
435  << "Patching produced incorrect checksum. Got:\n"
436  << " " << MD5_actual
437  << "\nExpected:\n"
438  << " " << _MD5_ofResult
439  << "\n";
440 
441  // This is a fine time to double-check the starting checksum.
442  if (!has_source_hash()) {
443  express_cat.info()
444  << "No source hash in patch file to verify.\n";
445  } else {
446  HashVal MD5_orig;
447  MD5_orig.hash_file(_orig_file);
448  if (MD5_orig != get_source_hash()) {
449  express_cat.info()
450  << "Started from incorrect source file. Got:\n"
451  << " " << MD5_orig
452  << "\nExpected:\n"
453  << " " << get_source_hash()
454  << "\n";
455  } else {
456  express_cat.info()
457  << "Started from correct source file:\n"
458  << " " << MD5_orig
459  << "\n";
460  }
461  }
462 
463  // delete the temp file and the patch file
464  if (_rename_output_to_orig) {
465  _output_file.unlink();
466  }
467  if (_delete_patchfile) {
468  _patch_file.unlink();
469  }
470  // return "invalid checksum"
471  return EU_error_invalid_checksum;
472  }
473  }
474 
475  // delete the patch file
476  if (_delete_patchfile) {
477  _patch_file.unlink();
478  }
479 
480  // rename the temp file to the original file name
481  if (_rename_output_to_orig) {
482  _orig_file.unlink();
483  if (!_output_file.rename_to(_orig_file)) {
484  express_cat.error()
485  << "Patchfile::run() failed to rename temp file to: " << _orig_file
486  << endl;
487  return EU_error_write_file_rename;
488  }
489  }
490 
491  return EU_success;
492  }
493  }
494 
495  return EU_ok;
496 }
497 
498 /**
499  * Patches the entire file in one call returns true on success and false on
500  * error
501  *
502  * This version will delete the patch file and overwrite the original file.
503  */
504 bool Patchfile::
505 apply(Filename &patch_file, Filename &file) {
506  int ret = initiate(patch_file, file);
507  if (ret < 0)
508  return false;
509  for (;;) {
510  ret = run();
511  if (ret == EU_success)
512  return true;
513  if (ret < 0)
514  return false;
515  }
516  return false;
517 }
518 
519 /**
520  * Patches the entire file in one call returns true on success and false on
521  * error
522  *
523  * This version will not delete any files.
524  */
525 bool Patchfile::
526 apply(Filename &patch_file, Filename &orig_file, const Filename &target_file) {
527  int ret = initiate(patch_file, orig_file, target_file);
528  if (ret < 0)
529  return false;
530  for (;;) {
531  ret = run();
532  if (ret == EU_success)
533  return true;
534  if (ret < 0)
535  return false;
536  }
537  return false;
538 }
539 
540 
541 /**
542  * Reads the header and leaves the patch file open.
543  */
544 int Patchfile::
545 internal_read_header(const Filename &patch_file) {
546  // Open the patch file for read
548  nassertr(_patch_stream == nullptr, EU_error_abort);
549  _patch_file = patch_file;
550  _patch_file.set_binary();
551  _patch_stream = vfs->open_read_file(_patch_file, true);
552  if (_patch_stream == nullptr) {
553  express_cat.error()
554  << "Patchfile::initiate() - Failed to open file: " << _patch_file << endl;
555  return get_write_error();
556  }
557 
558  // read header, make sure the patch file is valid
559  StreamReader patch_reader(*_patch_stream);
560 
561  // check the magic number
562  nassertr(_buffer->get_length() >= _v0_header_length, false);
563  uint32_t magic_number = patch_reader.get_uint32();
564  if (magic_number != _magic_number && magic_number != _v0_magic_number) {
565  express_cat.error()
566  << "Invalid patch file: " << _patch_file << endl;
567  return EU_error_file_invalid;
568  }
569 
570  _version_number = 0;
571  if (magic_number != _v0_magic_number) {
572  _version_number = patch_reader.get_uint16();
573  }
574  if (_version_number > _current_version) {
575  express_cat.error()
576  << "Can't read version " << _version_number << " patch files: "
577  << _patch_file << endl;
578  return EU_error_file_invalid;
579  }
580 
581  if (_version_number >= 1) {
582  // Get the length of the source file.
583  /*uint32_t source_file_length =*/ patch_reader.get_uint32();
584 
585  // get the MD5 of the source file.
586  _MD5_ofSource.read_stream(patch_reader);
587  }
588 
589  // get the length of the patched result file
590  _total_bytes_to_process = patch_reader.get_uint32();
591 
592  // get the MD5 of the resultant patched file
593  _MD5_ofResult.read_stream(patch_reader);
594 
595  express_cat.debug()
596  << "Patchfile::initiate() - valid patchfile" << endl;
597 
598  return EU_success;
599 }
600 
601 // PATCH FILE BUILDING MEMBER FUNCTIONS
602 
603 /**
604  *
605  */
606 uint32_t Patchfile::
607 calc_hash(const char *buffer) {
608 #ifdef USE_MD5_FOR_HASHTABLE_INDEX_VALUES
609  HashVal hash;
610  hash.hash_buffer(buffer, _footprint_length);
611 
612  // cout << uint16_t(hash.get_value(0)) << " ";
613 
614  return uint16_t(hash.get_value(0));
615 #else
616  uint32_t hash_value = 0;
617 
618  for(int i = 0; i < (int)_footprint_length; i++) {
619  // this is probably not such a good hash. to be replaced --> TRIED MD5,
620  // was not worth it for the execution-time hit on 800Mhz PC
621  hash_value ^= uint32_t(*buffer) << ((i * 2) % Patchfile::_HASH_BITS);
622  buffer++;
623  }
624 
625  // use the bits that overflowed past the end of the hash bit range (this is
626  // intended for _HASH_BITS == 24)
627  hash_value ^= (hash_value >> Patchfile::_HASH_BITS);
628 
629  // cout << hash_value << " ";
630 
631  return hash_value & _HASH_MASK;
632 #endif
633 }
634 
635 /**
636  *
637  * The hash and link tables allow for a quick, linear search of all locations
638  * in the file that begin with a particular sequence of bytes, or "footprint."
639  *
640  * The hash table is a table of offsets into the file, with one entry for
641  * every possible footprint hash value. For a hash of a footprint, the entry
642  * at the offset of the hash value provides an initial location in the file
643  * that has a matching footprint.
644  *
645  * The link table is a large linked list of file offsets, with one entry for
646  * every byte in the file. Each offset in the link table will point to
647  * another offset that has the same footprint at the corresponding offset in
648  * the actual file. Starting with an offset taken from the hash table, one
649  * can rapidly produce a list of offsets that all have the same footprint.
650  */
651 void Patchfile::
652 build_hash_link_tables(const char *buffer_orig, uint32_t length_orig,
653  uint32_t *hash_table, uint32_t *link_table) {
654 
655  uint32_t i;
656 
657  // clear hash table
658  for(i = 0; i < _HASHTABLESIZE; i++) {
659  hash_table[i] = _NULL_VALUE;
660  }
661 
662  // clear link table
663  for(i = 0; i < length_orig; i++) {
664  link_table[i] = _NULL_VALUE;
665  }
666 
667  if(length_orig < _footprint_length) return;
668 
669  // run through original file and hash each footprint
670  for(i = 0; i < (length_orig - _footprint_length); i++) {
671 
672  uint32_t hash_value = calc_hash(&buffer_orig[i]);
673 
674  // we must now store this file index in the hash table at the offset of
675  // the hash value
676 
677  // to account for multiple file offsets with identical hash values, there
678  // is a link table with an entry for every footprint in the file. We
679  // create linked lists of offsets in the link table.
680 
681  // first, set the value in the link table for the current offset to
682  // whatever the current list head is (the value in the hash table) (note
683  // that this only works because the hash and link tables both use
684  // _NULL_VALUE to indicate a null index)
685  link_table[i] = hash_table[hash_value];
686 
687  // set the new list head; store the current offset in the hash table at
688  // the offset of the footprint's hash value
689  hash_table[hash_value] = i;
690 
691  /*
692  if (_NULL_VALUE == hash_table[hash_value]) {
693  // hash entry is empty, store this offset
694  hash_table[hash_value] = i;
695  } else {
696  // hash entry is taken, go to the link table
697  uint32_t link_offset = hash_table[hash_value];
698 
699  while (_NULL_VALUE != link_table[link_offset]) {
700  link_offset = link_table[link_offset];
701  }
702  link_table[link_offset] = i;
703  }
704  */
705  }
706 }
707 
708 /**
709  *
710  * This function calculates the length of a match between two strings of bytes
711  */
712 uint32_t Patchfile::
713 calc_match_length(const char* buf1, const char* buf2, uint32_t max_length,
714  uint32_t min_length) {
715  // early out: look ahead and sample the end of the minimum range
716  if (min_length > 2) {
717  if (min_length >= max_length)
718  return 0;
719  if (buf1[min_length] != buf2[min_length] ||
720  buf1[min_length-1] != buf2[min_length-1] ||
721  buf1[min_length-2] != buf2[min_length-2]) {
722  return 0;
723  }
724  }
725 
726  uint32_t length = 0;
727  while ((length < max_length) && (*buf1 == *buf2)) {
728  buf1++, buf2++, length++;
729  }
730  return length;
731 }
732 
733 /**
734  *
735  * This function will find the longest string in the original file that
736  * matches a string in the new file.
737  */
738 void Patchfile::
739 find_longest_match(uint32_t new_pos, uint32_t &copy_pos, uint16_t &copy_length,
740  uint32_t *hash_table, uint32_t *link_table, const char* buffer_orig,
741  uint32_t length_orig, const char* buffer_new, uint32_t length_new) {
742 
743  // set length to a safe value
744  copy_length = 0;
745 
746  // get offset of matching string (in orig file) from hash table
747  uint32_t hash_value = calc_hash(&buffer_new[new_pos]);
748 
749  // if no match, bail
750  if (_NULL_VALUE == hash_table[hash_value])
751  return;
752 
753  copy_pos = hash_table[hash_value];
754 
755  // calc match length
756  copy_length = (uint16_t)calc_match_length(&buffer_new[new_pos],
757  &buffer_orig[copy_pos],
758  min(min((length_new - new_pos),
759  (length_orig - copy_pos)),
760  _MAX_RUN_LENGTH),
761  0);
762 
763  // run through link table, see if we find any longer matches
764  uint32_t match_offset;
765  uint16_t match_length;
766  match_offset = link_table[copy_pos];
767 
768  while (match_offset != _NULL_VALUE) {
769  match_length = (uint16_t)calc_match_length(&buffer_new[new_pos],
770  &buffer_orig[match_offset],
771  min(min((length_new - new_pos),
772  (length_orig - match_offset)),
773  _MAX_RUN_LENGTH),
774  copy_length);
775 
776  // have we found a longer match?
777  if (match_length > copy_length) {
778  copy_pos = match_offset;
779  copy_length = match_length;
780  }
781 
782  // traverse the link table
783  match_offset = link_table[match_offset];
784  }
785 }
786 
787 /**
788  *
789  */
790 void Patchfile::
791 emit_ADD(ostream &write_stream, uint32_t length, const char* buffer) {
792  nassertv(length == (uint16_t)length); //we only write a uint16
793 
794  if (express_cat.is_spam()) {
795  express_cat.spam()
796  << "ADD: " << length << " (to " << _add_pos << ")" << endl;
797  }
798 
799  // write ADD length
800  StreamWriter patch_writer(write_stream);
801  patch_writer.add_uint16((uint16_t)length);
802 
803  // if there are bytes to add, add them
804  if (length > 0) {
805  patch_writer.append_data(buffer, (uint16_t)length);
806  }
807 
808  _add_pos += length;
809 }
810 
811 /**
812  *
813  */
814 void Patchfile::
815 emit_COPY(ostream &write_stream, uint32_t length, uint32_t copy_pos) {
816  nassertv(length == (uint16_t)length); //we only write a uint16
817 
818  int32_t offset = (int)copy_pos - (int)_last_copy_pos;
819  if (express_cat.is_spam()) {
820  express_cat.spam()
821  << "COPY: " << length << " bytes from offset " << offset
822  << " (from " << copy_pos << " to " << _add_pos << ")" << endl;
823  }
824 
825  // write COPY length
826  StreamWriter patch_writer(write_stream);
827  patch_writer.add_uint16((uint16_t)length);
828 
829  if ((uint16_t)length != 0) {
830  // write COPY offset
831  patch_writer.add_int32(offset);
832  _last_copy_pos = copy_pos + length;
833  }
834 
835  _add_pos += length;
836 }
837 
838 /**
839  * Emits an add/copy pair. If necessary, repeats the pair as needed to work
840  * around the 16-bit chunk size limit.
841  */
842 void Patchfile::
843 emit_add_and_copy(ostream &write_stream,
844  uint32_t add_length, const char *add_buffer,
845  uint32_t copy_length, uint32_t copy_pos) {
846  if (add_length == 0 && copy_length == 0) {
847  // Don't accidentally emit a termination code.
848  return;
849  }
850 
851  static const uint16_t max_write = 65535;
852  while (add_length > max_write) {
853  // Overflow. This chunk is too large to fit into a single ADD block, so
854  // we have to write it as multiple ADDs.
855  emit_ADD(write_stream, max_write, add_buffer);
856  add_buffer += max_write;
857  add_length -= max_write;
858  emit_COPY(write_stream, 0, 0);
859  }
860 
861  emit_ADD(write_stream, add_length, add_buffer);
862 
863  while (copy_length > max_write) {
864  // Overflow.
865  emit_COPY(write_stream, max_write, copy_pos);
866  copy_pos += max_write;
867  copy_length -= max_write;
868  emit_ADD(write_stream, 0, nullptr);
869  }
870 
871  emit_COPY(write_stream, copy_length, copy_pos);
872 }
873 
874 /**
875  * Potentially emits one or more add/copy pairs. The current state is saved,
876  * so as to minimize wasted emits from consecutive adds or copies.
877  */
878 void Patchfile::
879 cache_add_and_copy(ostream &write_stream,
880  uint32_t add_length, const char *add_buffer,
881  uint32_t copy_length, uint32_t copy_pos) {
882  if (add_length != 0) {
883  if (_cache_copy_length != 0) {
884  // Have to flush.
885  cache_flush(write_stream);
886  }
887  // Add the string to the current cache.
888  _cache_add_data += string(add_buffer, add_length);
889  }
890 
891  if (copy_length != 0) {
892  if (_cache_copy_length == 0) {
893  // Start a new copy phase.
894  _cache_copy_start = copy_pos;
895  _cache_copy_length = copy_length;
896 
897  } else if (_cache_copy_start + _cache_copy_length == copy_pos) {
898  // We can just tack on the copy to what we've already got.
899  _cache_copy_length += copy_length;
900 
901  } else {
902  // It's a discontinuous copy. We have to flush.
903  cache_flush(write_stream);
904  _cache_copy_start = copy_pos;
905  _cache_copy_length = copy_length;
906  }
907  }
908 }
909 
910 /**
911  * Closes any copy or add phases that are still open after a previous call to
912  * cache_add_and_copy().
913  */
914 void Patchfile::
915 cache_flush(ostream &write_stream) {
916  emit_add_and_copy(write_stream,
917  _cache_add_data.size(), _cache_add_data.data(),
918  _cache_copy_length, _cache_copy_start);
919  _cache_add_data = string();
920  _cache_copy_length = 0;
921 }
922 
923 
924 /**
925  *
926  * Writes the patchfile header.
927  */
928 void Patchfile::
929 write_header(ostream &write_stream,
930  istream &stream_orig, istream &stream_new) {
931  // prepare to write the patch file header
932 
933  // write the patch file header
934  StreamWriter patch_writer(write_stream);
935  patch_writer.add_uint32(_magic_number);
936  patch_writer.add_uint16(_current_version);
937 
938  stream_orig.seekg(0, ios::end);
939  streampos source_file_length = stream_orig.tellg();
940  patch_writer.add_uint32((uint32_t)source_file_length);
941 
942  // calc MD5 of original file
943  _MD5_ofSource.hash_stream(stream_orig);
944  // add it to the header
945  _MD5_ofSource.write_stream(patch_writer);
946 
947  if (express_cat.is_debug()) {
948  express_cat.debug()
949  << "Orig: " << _MD5_ofSource << "\n";
950  }
951 
952  stream_new.seekg(0, ios::end);
953  streampos result_file_length = stream_new.tellg();
954  patch_writer.add_uint32((uint32_t)result_file_length);
955 
956  // calc MD5 of resultant patched file
957  _MD5_ofResult.hash_stream(stream_new);
958  // add it to the header
959  _MD5_ofResult.write_stream(patch_writer);
960 
961  if (express_cat.is_debug()) {
962  express_cat.debug()
963  << " New: " << _MD5_ofResult << "\n";
964  }
965 }
966 
967 /**
968  * Writes the patchfile terminator.
969  */
970 void Patchfile::
971 write_terminator(ostream &write_stream) {
972  cache_flush(write_stream);
973  // write terminator (null ADD, null COPY)
974  emit_ADD(write_stream, 0, nullptr);
975  emit_COPY(write_stream, 0, 0);
976 }
977 
978 /**
979  * Computes the patches for the entire file (if it is not a multifile) or for
980  * a single subfile (if it is)
981  *
982  * Returns true if successful, false on error.
983  */
984 bool Patchfile::
985 compute_file_patches(ostream &write_stream,
986  uint32_t offset_orig, uint32_t offset_new,
987  istream &stream_orig, istream &stream_new) {
988  // read in original file
989  stream_orig.seekg(0, ios::end);
990  nassertr(stream_orig, false);
991  uint32_t source_file_length = stream_orig.tellg();
992  if (express_cat.is_debug()) {
993  express_cat.debug()
994  << "Allocating " << source_file_length << " bytes to read orig\n";
995  }
996 
997  char *buffer_orig = (char *)PANDA_MALLOC_ARRAY(source_file_length);
998  stream_orig.seekg(0, ios::beg);
999  stream_orig.read(buffer_orig, source_file_length);
1000 
1001  // read in new file
1002  stream_new.seekg(0, ios::end);
1003  uint32_t result_file_length = stream_new.tellg();
1004  nassertr(stream_new, false);
1005  if (express_cat.is_debug()) {
1006  express_cat.debug()
1007  << "Allocating " << result_file_length << " bytes to read new\n";
1008  }
1009 
1010  char *buffer_new = (char *)PANDA_MALLOC_ARRAY(result_file_length);
1011  stream_new.seekg(0, ios::beg);
1012  stream_new.read(buffer_new, result_file_length);
1013 
1014  // allocate hashlink tables
1015  if (_hash_table == nullptr) {
1016  if (express_cat.is_debug()) {
1017  express_cat.debug()
1018  << "Allocating hashtable of size " << _HASHTABLESIZE << " * 4\n";
1019  }
1020  _hash_table = (uint32_t *)PANDA_MALLOC_ARRAY(_HASHTABLESIZE * sizeof(uint32_t));
1021  }
1022 
1023  if (express_cat.is_debug()) {
1024  express_cat.debug()
1025  << "Allocating linktable of size " << source_file_length << " * 4\n";
1026  }
1027 
1028  uint32_t *link_table = (uint32_t *)PANDA_MALLOC_ARRAY(source_file_length * sizeof(uint32_t));
1029 
1030  // build hash and link tables for original file
1031  build_hash_link_tables(buffer_orig, source_file_length, _hash_table, link_table);
1032 
1033  // run through new file
1034 
1035  uint32_t new_pos = 0;
1036  uint32_t start_pos = new_pos; // this is the position for the start of ADD operations
1037 
1038  if(((uint32_t) result_file_length) >= _footprint_length)
1039  {
1040  while (new_pos < (result_file_length - _footprint_length)) {
1041 
1042  // find best match for current position
1043  uint32_t COPY_pos;
1044  uint16_t COPY_length;
1045 
1046  find_longest_match(new_pos, COPY_pos, COPY_length, _hash_table, link_table,
1047  buffer_orig, source_file_length, buffer_new, result_file_length);
1048 
1049  // if no match or match not longer than footprint length, skip to next
1050  // byte
1051  if (COPY_length < _footprint_length) {
1052  // go to next byte
1053  new_pos++;
1054  } else {
1055  // emit ADD for all skipped bytes
1056  int num_skipped = (int)new_pos - (int)start_pos;
1057  if (express_cat.is_spam()) {
1058  express_cat.spam()
1059  << "build: num_skipped = " << num_skipped
1060  << endl;
1061  }
1062  cache_add_and_copy(write_stream, num_skipped, &buffer_new[start_pos],
1063  COPY_length, COPY_pos + offset_orig);
1064  new_pos += (uint32_t)COPY_length;
1065  start_pos = new_pos;
1066  }
1067  }
1068  }
1069 
1070  if (express_cat.is_spam()) {
1071  express_cat.spam()
1072  << "build: result_file_length = " << result_file_length
1073  << " start_pos = " << start_pos
1074  << endl;
1075  }
1076 
1077  // are there still more bytes left in the new file?
1078  if (start_pos != result_file_length) {
1079  // emit ADD for all remaining bytes
1080 
1081  uint32_t remaining_bytes = result_file_length - start_pos;
1082  cache_add_and_copy(write_stream, remaining_bytes, &buffer_new[start_pos],
1083  0, 0);
1084  start_pos += remaining_bytes;
1085  }
1086 
1087  PANDA_FREE_ARRAY(link_table);
1088 
1089  PANDA_FREE_ARRAY(buffer_orig);
1090  PANDA_FREE_ARRAY(buffer_new);
1091 
1092  return true;
1093 }
1094 
1095 /**
1096  * Computes patches for the files, knowing that they are both Panda
1097  * Multifiles. This will build patches one subfile at a time, which can
1098  * potentially be much, much faster for large Multifiles that contain many
1099  * small subfiles.
1100  */
1101 bool Patchfile::
1102 compute_mf_patches(ostream &write_stream,
1103  uint32_t offset_orig, uint32_t offset_new,
1104  istream &stream_orig, istream &stream_new) {
1105  Multifile mf_orig, mf_new;
1106  IStreamWrapper stream_origw(stream_orig);
1107  IStreamWrapper stream_neww(stream_new);
1108  if (!mf_orig.open_read(&stream_origw) ||
1109  !mf_new.open_read(&stream_neww)) {
1110  express_cat.error()
1111  << "Input multifiles appear to be corrupt.\n";
1112  return false;
1113  }
1114 
1115  if (mf_new.needs_repack()) {
1116  express_cat.error()
1117  << "Input multifiles need to be repacked.\n";
1118  return false;
1119  }
1120 
1121  // First, compute the patch for the header index.
1122 
1123  {
1124  ISubStream index_orig(&stream_origw, 0, mf_orig.get_index_end());
1125  ISubStream index_new(&stream_neww, 0, mf_new.get_index_end());
1126  if (!do_compute_patches("", "",
1127  write_stream, offset_orig, offset_new,
1128  index_orig, index_new)) {
1129  return false;
1130  }
1131  nassertr(_add_pos + _cache_add_data.size() + _cache_copy_length == offset_new + (uint32_t)mf_new.get_index_end(), false);
1132  }
1133 
1134  // Now walk through each subfile in the new multifile. If a particular
1135  // subfile exists in both source files, we compute the patches for the
1136  // subfile; for a new subfile, we trivially add it. If a subfile has been
1137  // removed, we simply don't add it (we'll never even notice this case).
1138  int new_num_subfiles = mf_new.get_num_subfiles();
1139  for (int ni = 0; ni < new_num_subfiles; ++ni) {
1140  nassertr(_add_pos + _cache_add_data.size() + _cache_copy_length == offset_new + (uint32_t)mf_new.get_subfile_internal_start(ni), false);
1141  string name = mf_new.get_subfile_name(ni);
1142  int oi = mf_orig.find_subfile(name);
1143 
1144  if (oi < 0) {
1145  // This is a newly-added subfile. Add it the hard way.
1146  express_cat.info()
1147  << "Adding subfile " << mf_new.get_subfile_name(ni) << "\n";
1148 
1149  streampos new_start = mf_new.get_subfile_internal_start(ni);
1150  size_t new_size = mf_new.get_subfile_internal_length(ni);
1151  char *buffer_new = (char *)PANDA_MALLOC_ARRAY(new_size);
1152  stream_new.seekg(new_start, ios::beg);
1153  stream_new.read(buffer_new, new_size);
1154  cache_add_and_copy(write_stream, new_size, buffer_new, 0, 0);
1155  PANDA_FREE_ARRAY(buffer_new);
1156 
1157  } else {
1158  // This subfile exists in both the original and the new files. Patch
1159  // it.
1160  streampos orig_start = mf_orig.get_subfile_internal_start(oi);
1161  size_t orig_size = mf_orig.get_subfile_internal_length(oi);
1162 
1163  streampos new_start = mf_new.get_subfile_internal_start(ni);
1164  size_t new_size = mf_new.get_subfile_internal_length(ni);
1165 
1166  if (!patch_subfile(write_stream, offset_orig, offset_new,
1167  mf_new.get_subfile_name(ni),
1168  stream_origw, orig_start, orig_start + (streampos)orig_size,
1169  stream_neww, new_start, new_start + (streampos)new_size)) {
1170  return false;
1171  }
1172  }
1173  }
1174 
1175  return true;
1176 }
1177 
1178 #ifdef HAVE_TAR
1179 /**
1180  * Uses libtar to extract the location within the tar file of each of the
1181  * subfiles. Returns true if the tar file is read successfully, false if
1182  * there is an error (e.g. it is not a tar file).
1183  */
1184 bool Patchfile::
1185 read_tar(TarDef &tar, istream &stream) {
1186  TAR *tfile;
1187  tartype_t tt;
1188  tt.openfunc = tar_openfunc;
1189  tt.closefunc = tar_closefunc;
1190  tt.readfunc = tar_readfunc;
1191  tt.writefunc = tar_writefunc;
1192 
1193  stream.seekg(0, ios::beg);
1194  nassertr(_tar_istream == nullptr, false);
1195  _tar_istream = &stream;
1196  if (tar_open(&tfile, (char *)"dummy", &tt, O_RDONLY, 0, 0) != 0) {
1197  _tar_istream = nullptr;
1198  return false;
1199  }
1200 
1201  // Walk through the tar file, noting the current file position as we reach
1202  // each subfile. Use this information to infer the start and end of each
1203  // subfile within the stream.
1204 
1205  streampos last_pos = 0;
1206  int flag = th_read(tfile);
1207  while (flag == 0) {
1208  TarSubfile subfile;
1209  subfile._name = th_get_pathname(tfile);
1210  subfile._header_start = last_pos;
1211  subfile._data_start = stream.tellg();
1212  subfile._data_end = subfile._data_start + (streampos)th_get_size(tfile);
1213  tar_skip_regfile(tfile);
1214  subfile._end = stream.tellg();
1215  tar.push_back(subfile);
1216 
1217  last_pos = subfile._end;
1218  flag = th_read(tfile);
1219  }
1220 
1221  // Create one more "subfile" for the bytes at the tail of the file. This
1222  // subfile has no name.
1223  TarSubfile subfile;
1224  subfile._header_start = last_pos;
1225  stream.clear();
1226  stream.seekg(0, ios::end);
1227  subfile._data_start = stream.tellg();
1228  subfile._data_end = subfile._data_start;
1229  subfile._end = subfile._data_start;
1230  tar.push_back(subfile);
1231 
1232  tar_close(tfile);
1233  _tar_istream = nullptr;
1234  return (flag == 1);
1235 }
1236 #endif // HAVE_TAR
1237 
1238 #ifdef HAVE_TAR
1239 /**
1240  * Computes patches for the files, knowing that they are both tar files. This
1241  * is similar to compute_mf_patches().
1242  *
1243  * The tar indexes should have been built up by a previous call to read_tar().
1244  */
1245 bool Patchfile::
1246 compute_tar_patches(ostream &write_stream,
1247  uint32_t offset_orig, uint32_t offset_new,
1248  istream &stream_orig, istream &stream_new,
1249  TarDef &tar_orig, TarDef &tar_new) {
1250 
1251  // Sort the orig list by filename, so we can quickly look up files from the
1252  // new list.
1253  tar_orig.sort();
1254 
1255  // However, it is important to keep the new list in its original, on-disk
1256  // order.
1257 
1258  // Walk through each subfile in the new tar file. If a particular subfile
1259  // exists in both source files, we compute the patches for the subfile; for
1260  // a new subfile, we trivially add it. If a subfile has been removed, we
1261  // simply don't add it (we'll never even notice this case).
1262 
1263  IStreamWrapper stream_origw(stream_orig);
1264  IStreamWrapper stream_neww(stream_new);
1265 
1266  TarDef::const_iterator ni;
1267  streampos last_pos = 0;
1268  for (ni = tar_new.begin(); ni != tar_new.end(); ++ni) {
1269  const TarSubfile &sf_new =(*ni);
1270  nassertr(sf_new._header_start == last_pos, false);
1271 
1272  TarDef::const_iterator oi = tar_orig.find(sf_new);
1273 
1274  if (oi == tar_orig.end()) {
1275  // This is a newly-added subfile. Add it the hard way.
1276  express_cat.info()
1277  << "Adding subfile " << sf_new._name << "\n";
1278 
1279  streampos new_start = sf_new._header_start;
1280  size_t new_size = sf_new._end - sf_new._header_start;
1281  char *buffer_new = (char *)PANDA_MALLOC_ARRAY(new_size);
1282  stream_new.seekg(new_start, ios::beg);
1283  stream_new.read(buffer_new, new_size);
1284  cache_add_and_copy(write_stream, new_size, buffer_new, 0, 0);
1285  PANDA_FREE_ARRAY(buffer_new);
1286 
1287  } else {
1288  // This subfile exists in both the original and the new files. Patch
1289  // it.
1290  const TarSubfile &sf_orig =(*oi);
1291 
1292  // We patch the header and data of the file separately, so we can
1293  // accurately detect nested multifiles. The extra data at the end of
1294  // the file (possibly introduced by a tar file's blocking) is the
1295  // footer, which is also patched separately.
1296  if (!patch_subfile(write_stream, offset_orig, offset_new, "",
1297  stream_origw, sf_orig._header_start, sf_orig._data_start,
1298  stream_neww, sf_new._header_start, sf_new._data_start)) {
1299  return false;
1300  }
1301 
1302  if (!patch_subfile(write_stream, offset_orig, offset_new, sf_new._name,
1303  stream_origw, sf_orig._data_start, sf_orig._data_end,
1304  stream_neww, sf_new._data_start, sf_new._data_end)) {
1305  return false;
1306  }
1307 
1308  if (!patch_subfile(write_stream, offset_orig, offset_new, "",
1309  stream_origw, sf_orig._data_end, sf_orig._end,
1310  stream_neww, sf_new._data_end, sf_new._end)) {
1311  return false;
1312  }
1313  }
1314 
1315  last_pos = sf_new._end;
1316  }
1317 
1318  return true;
1319 }
1320 #endif // HAVE_TAR
1321 
1322 #ifdef HAVE_TAR
1323 /**
1324  * A callback function to redirect libtar to read from our istream instead of
1325  * using low-level Unix I/O.
1326  */
1327 int Patchfile::
1328 tar_openfunc(const char *, int, ...) {
1329  // Since we don't actually open a file--the stream is already open--we do
1330  // nothing here.
1331  return 0;
1332 }
1333 #endif // HAVE_TAR
1334 
1335 #ifdef HAVE_TAR
1336 /**
1337  * A callback function to redirect libtar to read from our istream instead of
1338  * using low-level Unix I/O.
1339  */
1340 int Patchfile::
1341 tar_closefunc(int) {
1342  // Since we don't actually open a file, no need to close it either.
1343  return 0;
1344 }
1345 #endif // HAVE_TAR
1346 
1347 #ifdef HAVE_TAR
1348 /**
1349  * A callback function to redirect libtar to read from our istream instead of
1350  * using low-level Unix I/O.
1351  */
1352 ssize_t Patchfile::
1353 tar_readfunc(int, void *buffer, size_t nbytes) {
1354  nassertr(_tar_istream != nullptr, 0);
1355  _tar_istream->read((char *)buffer, nbytes);
1356  return (ssize_t)_tar_istream->gcount();
1357 }
1358 #endif // HAVE_TAR
1359 
1360 #ifdef HAVE_TAR
1361 /**
1362  * A callback function to redirect libtar to read from our istream instead of
1363  * using low-level Unix I/O.
1364  */
1365 ssize_t Patchfile::
1366 tar_writefunc(int, const void *, size_t) {
1367  // Since we use libtar only for reading, it is an error if this method gets
1368  // called.
1369  nassertr(false, -1);
1370  return -1;
1371 }
1372 #endif // HAVE_TAR
1373 
1374 /**
1375  *
1376  * This implementation uses the "greedy differencing algorithm" described in
1377  * the masters thesis "Differential Compression: A Generalized Solution for
1378  * Binary Files" by Randal C. Burns (p.13). For an original file of size M and
1379  * a new file of size N, this algorithm is O(M) in space and O(M*N) (worst-
1380  * case) in time. return false on error
1381  */
1382 bool Patchfile::
1383 build(Filename file_orig, Filename file_new, Filename patch_name) {
1384  patch_name.set_binary();
1385 
1386  // Open the original file for read
1387  pifstream stream_orig;
1388  file_orig.set_binary();
1389  if (!file_orig.open_read(stream_orig)) {
1390  express_cat.error()
1391  << "Patchfile::build() - Failed to open file: " << file_orig << endl;
1392  return false;
1393  }
1394 
1395  // Open the new file for read
1396  pifstream stream_new;
1397  file_new.set_binary();
1398  if (!file_new.open_read(stream_new)) {
1399  express_cat.error()
1400  << "Patchfile::build() - Failed to open file: " << file_new << endl;
1401  return false;
1402  }
1403 
1404  // Open patch file for write
1405  pofstream write_stream;
1406  if (!patch_name.open_write(write_stream)) {
1407  express_cat.error()
1408  << "Patchfile::build() - Failed to open file: " << patch_name << endl;
1409  return false;
1410  }
1411 
1412  _last_copy_pos = 0;
1413  _add_pos = 0;
1414  _cache_add_data = string();
1415  _cache_copy_start = 0;
1416  _cache_copy_length = 0;
1417 
1418  write_header(write_stream, stream_orig, stream_new);
1419 
1420  if (!do_compute_patches(file_orig, file_new,
1421  write_stream, 0, 0,
1422  stream_orig, stream_new)) {
1423  return false;
1424  }
1425 
1426  write_terminator(write_stream);
1427 
1428  if (express_cat.is_debug()) {
1429  express_cat.debug()
1430  << "Patch file will generate " << _add_pos << "-byte file.\n";
1431  }
1432 
1433 #ifndef NDEBUG
1434  {
1435  // Make sure the resulting file would be the right size.
1436  stream_new.seekg(0, ios::end);
1437  streampos result_file_length = stream_new.tellg();
1438  nassertr(_add_pos == result_file_length, false);
1439  }
1440 #endif // NDEBUG
1441 
1442  return (_last_copy_pos != 0);
1443 }
1444 
1445 /**
1446  * Computes the patches for the indicated A to B files, or subfiles. Checks
1447  * for multifiles or tar files before falling back to whole-file patching.
1448  */
1449 bool Patchfile::
1450 do_compute_patches(const Filename &file_orig, const Filename &file_new,
1451  ostream &write_stream,
1452  uint32_t offset_orig, uint32_t offset_new,
1453  istream &stream_orig, istream &stream_new) {
1454  nassertr(_add_pos + _cache_add_data.size() + _cache_copy_length == offset_new, false);
1455 
1456  // Check whether our input files are Panda multifiles or tar files.
1457  bool is_multifile = false;
1458 #ifdef HAVE_TAR
1459  bool is_tarfile = false;
1460  TarDef tar_orig, tar_new;
1461 #endif // HAVE_TAR
1462 
1463  if (_allow_multifile) {
1464  if (strstr(file_orig.get_basename().c_str(), ".mf") != nullptr ||
1465  strstr(file_new.get_basename().c_str(), ".mf") != nullptr) {
1466  // Read the first n bytes of both files for the Multifile magic number.
1467  string magic_number = Multifile::get_magic_number();
1468  char *buffer = (char *)PANDA_MALLOC_ARRAY(magic_number.size());
1469  stream_orig.seekg(0, ios::beg);
1470  stream_orig.read(buffer, magic_number.size());
1471 
1472  if (stream_orig.gcount() == (int)magic_number.size() &&
1473  memcmp(buffer, magic_number.data(), magic_number.size()) == 0) {
1474  stream_new.seekg(0, ios::beg);
1475  stream_new.read(buffer, magic_number.size());
1476  if (stream_new.gcount() == (int)magic_number.size() &&
1477  memcmp(buffer, magic_number.data(), magic_number.size()) == 0) {
1478  is_multifile = true;
1479  }
1480  }
1481  PANDA_FREE_ARRAY(buffer);
1482  }
1483 #ifdef HAVE_TAR
1484  if (strstr(file_orig.get_basename().c_str(), ".tar") != nullptr ||
1485  strstr(file_new.get_basename().c_str(), ".tar") != nullptr) {
1486  if (read_tar(tar_orig, stream_orig) &&
1487  read_tar(tar_new, stream_new)) {
1488  is_tarfile = true;
1489  }
1490  }
1491 #endif // HAVE_TAR
1492  }
1493 
1494  if (is_multifile) {
1495  if (express_cat.is_debug()) {
1496  express_cat.debug()
1497  << file_orig.get_basename() << " appears to be a Panda Multifile.\n";
1498  }
1499  if (!compute_mf_patches(write_stream, offset_orig, offset_new,
1500  stream_orig, stream_new)) {
1501  return false;
1502  }
1503 #ifdef HAVE_TAR
1504  } else if (is_tarfile) {
1505  if (express_cat.is_debug()) {
1506  express_cat.debug()
1507  << file_orig.get_basename() << " appears to be a tar file.\n";
1508  }
1509  if (!compute_tar_patches(write_stream, offset_orig, offset_new,
1510  stream_orig, stream_new, tar_orig, tar_new)) {
1511  return false;
1512  }
1513 #endif // HAVE_TAR
1514  } else {
1515  if (express_cat.is_debug()) {
1516  express_cat.debug()
1517  << file_orig.get_basename() << " is not a multifile.\n";
1518  }
1519  if (!compute_file_patches(write_stream, offset_orig, offset_new,
1520  stream_orig, stream_new)) {
1521  return false;
1522  }
1523  }
1524 
1525  return true;
1526 }
1527 
1528 /**
1529  * Generates patches for a nested subfile of a Panda Multifile or a tar file.
1530  */
1531 bool Patchfile::
1532 patch_subfile(ostream &write_stream,
1533  uint32_t offset_orig, uint32_t offset_new,
1534  const Filename &filename,
1535  IStreamWrapper &stream_orig, streampos orig_start, streampos orig_end,
1536  IStreamWrapper &stream_new, streampos new_start, streampos new_end) {
1537  nassertr(_add_pos + _cache_add_data.size() + _cache_copy_length == offset_new + (uint32_t)new_start, false);
1538 
1539  size_t new_size = new_end - new_start;
1540  size_t orig_size = orig_end - orig_start;
1541 
1542  ISubStream subfile_orig(&stream_orig, orig_start, orig_end);
1543  ISubStream subfile_new(&stream_new, new_start, new_end);
1544 
1545  bool is_unchanged = false;
1546  if (orig_size == new_size) {
1547  HashVal hash_orig, hash_new;
1548  hash_orig.hash_stream(subfile_orig);
1549  hash_new.hash_stream(subfile_new);
1550 
1551  if (hash_orig == hash_new) {
1552  // Actually, the subfile is unchanged; just emit it.
1553  is_unchanged = true;
1554  }
1555  }
1556 
1557  if (is_unchanged) {
1558  if (express_cat.is_debug() && !filename.empty()) {
1559  express_cat.debug()
1560  << "Keeping subfile " << filename << "\n";
1561  }
1562  cache_add_and_copy(write_stream, 0, nullptr,
1563  orig_size, offset_orig + orig_start);
1564 
1565  } else {
1566  if (!filename.empty()) {
1567  express_cat.info()
1568  << "Patching subfile " << filename << "\n";
1569  }
1570 
1571  if (!do_compute_patches(filename, filename, write_stream,
1572  offset_orig + orig_start, offset_new + new_start,
1573  subfile_orig, subfile_new)) {
1574  return false;
1575  }
1576  }
1577 
1578  return true;
1579 }
1580 
1581 #endif // HAVE_OPENSSL
Definition: buffer.h:24
The name of a file, such as a texture file or an Egg file.
Definition: filename.h:39
std::string get_basename() const
Returns the basename part of the filename.
Definition: filename.I:367
bool open_read(std::ifstream &stream) const
Opens the indicated ifstream for reading the file, if possible.
Definition: filename.cxx:1863
void set_binary()
Indicates that the filename represents a binary file.
Definition: filename.I:414
static Filename temporary(const std::string &dirname, const std::string &prefix, const std::string &suffix=std::string(), Type type=T_general)
Generates a temporary filename within the indicated directory, using the indicated prefix.
Definition: filename.cxx:424
bool open_write(std::ofstream &stream, bool truncate=true) const
Opens the indicated ifstream for writing the file, if possible.
Definition: filename.cxx:1899
Stores a 128-bit value that represents the hashed contents (typically MD5) of a file or buffer.
Definition: hashVal.h:31
This class provides a locking wrapper around an arbitrary istream pointer.
Definition: streamWrapper.h:59
An istream object that presents a subwindow into another istream.
Definition: subStream.h:30
A file that contains a set of files.
Definition: multifile.h:37
std::streampos get_index_end() const
Returns the first byte that is guaranteed to follow any index byte already written to disk in the Mul...
Definition: multifile.cxx:1555
get_num_subfiles
Returns the number of subfiles within the Multifile.
Definition: multifile.h:118
int find_subfile(const std::string &subfile_name) const
Returns the index of the subfile with the indicated name, or -1 if the named subfile is not within th...
Definition: multifile.cxx:1367
bool needs_repack() const
Returns true if the Multifile index is suboptimal and should be repacked.
Definition: multifile.I:55
std::streampos get_subfile_internal_start(int index) const
Returns the starting byte position within the Multifile at which the indicated subfile begins.
Definition: multifile.cxx:1567
size_t get_subfile_internal_length(int index) const
Returns the number of bytes the indicated subfile consumes within the archive.
Definition: multifile.cxx:1580
get_magic_number
Returns a string with the first n bytes written to a Multifile, to identify it as a Multifile.
Definition: multifile.h:140
get_subfile_name
Returns the name of the nth subfile.
Definition: multifile.h:118
A class to read sequential binary data directly from an istream.
Definition: streamReader.h:28
A StreamWriter object is used to write sequential binary data directly to an ostream.
Definition: streamWriter.h:29
A hierarchy of directories and files that appears to be one continuous file system,...
static void close_read_file(std::istream *stream)
Closes a file opened by a previous call to open_read_file().
std::istream * open_read_file(const Filename &filename, bool auto_unwrap) const
Convenience function; returns a newly allocated istream if the file exists and can be read,...
static VirtualFileSystem * get_global_ptr()
Returns the default global VirtualFileSystem.
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.