Panda3D
httpChannel.cxx
Go to the documentation of this file.
1 /**
2  * PANDA 3D SOFTWARE
3  * Copyright (c) Carnegie Mellon University. All rights reserved.
4  *
5  * All use of this software is subject to the terms of the revised BSD
6  * license. You should have received a copy of this license along
7  * with this source code in a file named "LICENSE."
8  *
9  * @file httpChannel.cxx
10  * @author drose
11  * @date 2002-09-24
12  */
13 
14 #include "httpChannel.h"
15 #include "httpClient.h"
16 #include "httpCookie.h"
17 #include "bioStream.h"
18 #include "chunkedStream.h"
19 #include "identityStream.h"
20 #include "config_downloader.h"
21 #include "virtualFileSystem.h"
22 #include "virtualFileMountHTTP.h"
23 #include "ramfile.h"
24 #include "globPattern.h"
25 
26 #include <stdio.h>
27 
28 #ifdef HAVE_OPENSSL
29 
30 #include "openSSLWrapper.h"
31 
32 #if defined(WIN32_VC) || defined(WIN64_VC)
33  #include <WinSock2.h>
34  #include <windows.h> // for select()
35  #undef X509_NAME
36 #endif // WIN32_VC
37 
38 using std::istream;
39 using std::min;
40 using std::ostream;
41 using std::ostringstream;
42 using std::string;
43 
44 TypeHandle HTTPChannel::_type_handle;
45 
46 #define _NOTIFY_HTTP_CHANNEL_ID "[" << this << "] "
47 
48 /**
49  *
50  */
51 HTTPChannel::
52 HTTPChannel(HTTPClient *client) :
53  _client(client)
54 {
55  if (downloader_cat.is_debug()) {
56  downloader_cat.debug()
57  << _NOTIFY_HTTP_CHANNEL_ID
58  << "created.\n";
59  }
60 
61  ConfigVariableDouble extra_ssl_handshake_time
62  ("extra-ssl-handshake-time", 0.0,
63  PRC_DESC("This specifies how much extra time to try to establish"
64  "the ssl handshake before we bail."));
65  _extra_ssl_handshake_time = extra_ssl_handshake_time;
66  _proxy_next_index = 0;
67  _persistent_connection = false;
68  _allow_proxy = true;
69  _proxy_tunnel = http_proxy_tunnel;
70  _connect_timeout = http_connect_timeout;
71  _http_timeout = http_timeout;
72  _skip_body_size = http_skip_body_size;
73  _idle_timeout = http_idle_timeout;
74  _blocking_connect = false;
75  _download_throttle = download_throttle;
76  _max_bytes_per_second = downloader_byte_rate;
77  _seconds_per_update = downloader_frequency;
78  _max_updates_per_second = 1.0f / _seconds_per_update;
79  _bytes_per_update = int(_max_bytes_per_second * _seconds_per_update);
80 
81  // _nonblocking is true if the socket is actually in non-blocking mode.
82  _nonblocking = false;
83 
84  // _wanted_nonblocking is true if the user specifically requested one of the
85  // non-blocking interfaces. It is false if the socket is only incidentally
86  // non-blocking (for instance, because SIMPLE_THREADS is on).
87  _wanted_nonblocking = false;
88 
89  _want_ssl = false;
90  _proxy_serves_document = false;
91  _proxy_tunnel_now = false;
92  _first_byte_requested = 0;
93  _last_byte_requested = 0;
94  _first_byte_delivered = 0;
95  _last_byte_delivered = 0;
96  _read_index = 0;
97  _expected_file_size = 0;
98  _file_size = 0;
99  _transfer_file_size = 0;
100  _got_expected_file_size = false;
101  _got_file_size = false;
102  _got_transfer_file_size = false;
103  _bytes_downloaded = 0;
104  _bytes_requested = 0;
105  _status_entry = StatusEntry();
106  _response_type = RT_none;
107  _http_version = _client->get_http_version();
108  _http_version_string = _client->get_http_version_string();
109  _content_type = "application/x-www-form-urlencoded";
110  _state = S_new;
111  _done_state = S_new;
112  _started_download = false;
113  _sent_so_far = 0;
114  _body_stream = nullptr;
115  _owns_body_stream = false;
116  _sbio = nullptr;
117  _cipher_list = _client->get_cipher_list();
118  _last_status_code = 0;
119  _last_run_time = 0.0f;
120  _download_to_ramfile = nullptr;
121  _download_to_stream = nullptr;
122 }
123 
124 /**
125  *
126  */
127 HTTPChannel::
128 ~HTTPChannel() {
129  if (downloader_cat.is_debug()) {
130  downloader_cat.debug()
131  << _NOTIFY_HTTP_CHANNEL_ID
132  << "destroyed.\n";
133  }
134 
135  close_connection();
136  reset_download_to();
137 }
138 
139 /**
140  * Returns the string as returned by the server describing the status code for
141  * humans. This may or may not be meaningful.
142  */
143 string HTTPChannel::
144 get_status_string() const {
145  switch (_status_entry._status_code) {
146  case SC_incomplete:
147  return "Connection in progress";
148 
149  case SC_internal_error:
150  return "Internal error";
151 
152  case SC_no_connection:
153  return "No connection";
154 
155  case SC_timeout:
156  return "Timeout on connection";
157 
158  case SC_lost_connection:
159  return "Lost connection";
160 
161  case SC_non_http_response:
162  return "Non-HTTP response";
163 
164  case SC_invalid_http:
165  return "Could not understand HTTP response";
166 
167  case SC_socks_invalid_version:
168  return "Unsupported SOCKS version";
169 
170  case SC_socks_no_acceptable_login_method:
171  return "No acceptable SOCKS login method";
172 
173  case SC_socks_refused:
174  return "SOCKS proxy refused connection";
175 
176  case SC_socks_no_connection:
177  return "SOCKS proxy unable to connect";
178 
179  case SC_ssl_internal_failure:
180  return "SSL internal failure";
181 
182  case SC_ssl_no_handshake:
183  return "No SSL handshake";
184 
185  case SC_http_error_watermark:
186  // This shouldn't be triggered.
187  return "Internal error";
188 
189  case SC_ssl_invalid_server_certificate:
190  return "SSL invalid server certificate";
191 
192  case SC_ssl_unexpected_server:
193  return "Unexpected SSL server";
194 
195  case SC_download_open_error:
196  return "Error opening file";
197 
198  case SC_download_write_error:
199  return "Error writing to disk";
200 
201  case SC_download_invalid_range:
202  return "Invalid subrange requested";
203  }
204 
205  return _status_entry._status_string;
206 }
207 
208 /**
209  * Returns the HTML header value associated with the indicated key, or empty
210  * string if the key was not defined in the message returned by the server.
211  */
212 string HTTPChannel::
213 get_header_value(const string &key) const {
214  Headers::const_iterator hi = _headers.find(downcase(key));
215  if (hi != _headers.end()) {
216  return (*hi).second;
217  }
218  return string();
219 }
220 
221 /**
222  * Returns true if the server has indicated it will close the connection after
223  * this document has been read, or false if it will remain open (and future
224  * documents may be requested on the same connection).
225  */
226 bool HTTPChannel::
227 will_close_connection() const {
228  if (get_http_version() < HTTPEnum::HV_11) {
229  // pre-HTTP 1.1 always closes.
230  return true;
231  }
232 
233  string connection = get_header_value("Connection");
234  if (downcase(connection) == "close") {
235  // The server says it will close.
236  return true;
237  }
238 
239  if (connection.empty() && !get_persistent_connection()) {
240  // The server didn't say, but we asked it to close.
241  return true;
242  }
243 
244  // Assume the server will keep it open.
245  return false;
246 }
247 
248 /**
249  * Returns the size of the file, if it is known. Returns the value set by
250  * set_expected_file_size() if the file size is not known, or 0 if this value
251  * was not set.
252  *
253  * If the file is dynamically generated, the size may not be available until a
254  * read has started (e.g. open_read_body() has been called); and even then it
255  * may increase as more of the file is read due to the nature of HTTP/1.1
256  * requests which can change their minds midstream about how much data they're
257  * sending you.
258  */
259 std::streamsize HTTPChannel::
260 get_file_size() const {
261  if (_got_file_size) {
262  return _file_size;
263  } else if (_got_transfer_file_size) {
264  return _transfer_file_size;
265  } else if (_got_expected_file_size) {
266  return _expected_file_size;
267  } else {
268  return 0;
269  }
270 }
271 
272 /**
273  * Outputs a list of all headers defined by the server to the indicated output
274  * stream.
275  */
276 void HTTPChannel::
277 write_headers(ostream &out) const {
278  Headers::const_iterator hi;
279  for (hi = _headers.begin(); hi != _headers.end(); ++hi) {
280  out << (*hi).first << ": " << (*hi).second << "\n";
281  }
282 }
283 
284 /**
285  * This must be called from time to time when non-blocking I/O is in use. It
286  * checks for data coming in on the socket and writes data out to the socket
287  * when possible, and does whatever processing is required towards completing
288  * the current task.
289  *
290  * The return value is true if the task is still pending (and run() will need
291  * to be called again in the future), or false if the current task is
292  * complete.
293  */
294 bool HTTPChannel::
295 run() {
296  if (downloader_cat.is_spam()) {
297  downloader_cat.spam()
298  << _NOTIFY_HTTP_CHANNEL_ID
299  << "run().\n";
300  }
301 
302  if (_state == _done_state || _state == S_failure) {
303  clear_extra_headers();
304  if (!reached_done_state()) {
305  return false;
306  }
307  }
308 
309  if (_started_download) {
310  if (_wanted_nonblocking && _download_throttle) {
311  double now = TrueClock::get_global_ptr()->get_short_time();
312  double elapsed = now - _last_run_time;
313  if (elapsed < _seconds_per_update) {
314  // Come back later.
315  thread_yield();
316  return true;
317  }
318  int num_potential_updates = (int)(elapsed / _seconds_per_update);
319  _last_run_time = now;
320  _bytes_requested += _bytes_per_update * num_potential_updates;
321  if (downloader_cat.is_spam()) {
322  downloader_cat.spam()
323  << _NOTIFY_HTTP_CHANNEL_ID
324  << "elapsed = " << elapsed << " num_potential_updates = "
325  << num_potential_updates << " bytes_requested = "
326  << _bytes_requested << "\n";
327  }
328  }
329 
330  bool repeat_later = false;
331  switch (_download_dest) {
332  case DD_none:
333  // We're done.
334  break;
335 
336  case DD_file:
337  repeat_later = run_download_to_file();
338  break;
339 
340  case DD_ram:
341  repeat_later = run_download_to_ram();
342  break;
343 
344  case DD_stream:
345  repeat_later = run_download_to_stream();
346  break;
347  }
348  if (repeat_later) {
349  thread_yield();
350  }
351  return repeat_later;
352  }
353 
354  /*
355  if (downloader_cat.is_spam()) {
356  downloader_cat.spam()
357  << _NOTIFY_HTTP_CHANNEL_ID
358  << "begin run(), _state = " << _state << ", _done_state = "
359  << _done_state << "\n";
360  }
361  */
362 
363  if (_state == _done_state) {
364  return reached_done_state();
365  }
366 
367  bool repeat_later;
368  do {
369  // If we're in a state that expects to have a connection already (that is,
370  // any state other that S_try_next_proxy), then reestablish the connection
371  // if it has been dropped.
372  if (_bio.is_null() && _state != S_try_next_proxy) {
373  if (_connect_count > http_max_connect_count) {
374  // Too many connection attempts; just give up. We should never
375  // trigger this failsafe, since the code in each individual case has
376  // similar logic to prevent more than two consecutive lost
377  // connections.
378  downloader_cat.warning()
379  << _NOTIFY_HTTP_CHANNEL_ID
380  << "Too many lost connections, giving up.\n";
381  _status_entry._status_code = SC_lost_connection;
382  _state = S_failure;
383  return false;
384  }
385 
386  // No connection. Attempt to establish one.
387  URLSpec url;
388  if (_proxy.empty()) {
389  url = _request.get_url();
390  } else {
391  url = _proxy;
392  }
393  _bio = new BioPtr(url);
394  _source = new BioStreamPtr(new BioStream(_bio));
395  if (_nonblocking) {
396  _bio->set_nbio(true);
397  }
398 
399  if (downloader_cat.is_debug()) {
400  if (_connect_count > 0) {
401  downloader_cat.debug()
402  << _NOTIFY_HTTP_CHANNEL_ID
403  << "Reconnecting to " << _bio->get_server_name() << " port "
404  << _bio->get_port() << "\n";
405  } else {
406  downloader_cat.debug()
407  << _NOTIFY_HTTP_CHANNEL_ID
408  << "Connecting to " << _bio->get_server_name() << " port "
409  << _bio->get_port() << "\n";
410  }
411  }
412 
413  _state = S_connecting;
414  _started_connecting_time =
415  TrueClock::get_global_ptr()->get_short_time();
416  _connect_count++;
417  }
418 
419  /*
420  if (downloader_cat.is_spam()) {
421  downloader_cat.spam()
422  << _NOTIFY_HTTP_CHANNEL_ID
423  << "continue run(), _state = " << _state << "\n";
424  }
425  */
426 
427  switch (_state) {
428  case S_try_next_proxy:
429  repeat_later = run_try_next_proxy();
430  break;
431 
432  case S_connecting:
433  repeat_later = run_connecting();
434  break;
435 
436  case S_connecting_wait:
437  repeat_later = run_connecting_wait();
438  break;
439 
440  case S_http_proxy_ready:
441  repeat_later = run_http_proxy_ready();
442  break;
443 
444  case S_http_proxy_request_sent:
445  repeat_later = run_http_proxy_request_sent();
446  break;
447 
448  case S_http_proxy_reading_header:
449  repeat_later = run_http_proxy_reading_header();
450  break;
451 
452  case S_socks_proxy_greet:
453  repeat_later = run_socks_proxy_greet();
454  break;
455 
456  case S_socks_proxy_greet_reply:
457  repeat_later = run_socks_proxy_greet_reply();
458  break;
459 
460  case S_socks_proxy_connect:
461  repeat_later = run_socks_proxy_connect();
462  break;
463 
464  case S_socks_proxy_connect_reply:
465  repeat_later = run_socks_proxy_connect_reply();
466  break;
467 
468  case S_setup_ssl:
469  repeat_later = run_setup_ssl();
470  break;
471 
472  case S_ssl_handshake:
473  repeat_later = run_ssl_handshake();
474  break;
475 
476  case S_ready:
477  repeat_later = run_ready();
478  break;
479 
480  case S_request_sent:
481  repeat_later = run_request_sent();
482  break;
483 
484  case S_reading_header:
485  repeat_later = run_reading_header();
486  break;
487 
488  case S_start_direct_file_read:
489  repeat_later = run_start_direct_file_read();
490  break;
491 
492  case S_read_header:
493  repeat_later = run_read_header();
494  break;
495 
496  case S_begin_body:
497  repeat_later = run_begin_body();
498  break;
499 
500  case S_reading_body:
501  repeat_later = run_reading_body();
502  break;
503 
504  case S_read_body:
505  repeat_later = run_read_body();
506  break;
507 
508  case S_read_trailer:
509  repeat_later = run_read_trailer();
510  break;
511 
512  default:
513  downloader_cat.warning()
514  << _NOTIFY_HTTP_CHANNEL_ID
515  << "Unhandled state " << _state << "\n";
516  return false;
517  }
518 
519  if (_state == _done_state || _state == S_failure) {
520  clear_extra_headers();
521  // We've reached our terminal state.
522  return reached_done_state();
523  }
524  thread_consider_yield();
525  } while (!repeat_later || _bio.is_null());
526 
527  /*
528  if (downloader_cat.is_spam()) {
529  downloader_cat.spam()
530  << _NOTIFY_HTTP_CHANNEL_ID
531  << "later run(), _state = " << _state
532  << ", _done_state = " << _done_state << "\n";
533  }
534  */
535 
536  thread_yield();
537  return true;
538 }
539 
540 /**
541  * Returns a newly-allocated istream suitable for reading the body of the
542  * document. This may only be called immediately after a call to
543  * get_document() or post_form(), or after a call to run() has returned false.
544  *
545  * Note that, in nonblocking mode, the returned stream may report an early
546  * EOF, even before the actual end of file. When this happens, you should
547  * call stream->is_closed() to determine whether you should attempt to read
548  * some more later.
549  *
550  * The user is responsible for passing the returned istream to
551  * close_read_body() later.
552  */
553 ISocketStream *HTTPChannel::
554 open_read_body() {
555  reset_body_stream();
556 
557  if ((_state != S_read_header && _state != S_begin_body) || _source.is_null()) {
558  return nullptr;
559  }
560 
561  string transfer_coding = downcase(get_header_value("Transfer-Encoding"));
562 
563  ISocketStream *result;
564  if (transfer_coding == "chunked") {
565  // "chunked" transfer encoding. This means we will have to decode the
566  // length of the file as we read it in chunks. The IChunkedStream does
567  // this.
568  _state = S_reading_body;
569  _read_index++;
570  result = new IChunkedStream(_source, this);
571 
572  } else {
573  // If the transfer encoding is anything else, assume "identity". This is
574  // just the literal characters following the header, up until _file_size
575  // bytes have been read (if content-length was specified), or till end of
576  // file otherwise.
577  _state = S_reading_body;
578  _read_index++;
579  result = new IIdentityStream(_source, this, _got_file_size, _file_size);
580  }
581 
582  result->_channel = this;
583  _body_stream = result;
584  _owns_body_stream = false;
585 
586  return result;
587 }
588 
589 /**
590  * Closes a file opened by a previous call to open_read_body(). This really
591  * just deletes the istream pointer, but it is recommended to use this
592  * interface instead of deleting it explicitly, to help work around compiler
593  * issues.
594  */
595 void HTTPChannel::
596 close_read_body(istream *stream) const {
597  if (stream != nullptr) {
598  // For some reason--compiler bug in gcc 3.2?--explicitly deleting the
599  // stream pointer does not call the appropriate global delete function;
600  // instead apparently calling the system delete function. So we call the
601  // delete function by hand instead.
602 #if !defined(USE_MEMORY_NOWRAPPERS) && defined(REDEFINE_GLOBAL_OPERATOR_NEW)
603  stream->~istream();
604  (*global_operator_delete)(stream);
605 #else
606  delete stream;
607 #endif
608  }
609 }
610 
611 /**
612  * Specifies the name of a file to download the resulting document to. This
613  * should be called immediately after get_document() or begin_get_document()
614  * or related functions.
615  *
616  * In the case of the blocking I/O methods like get_document(), this function
617  * will download the entire document to the file and return true if it was
618  * successfully downloaded, false otherwise.
619  *
620  * In the case of non-blocking I/O methods like begin_get_document(), this
621  * function simply indicates an intention to download to the indicated file.
622  * It returns true if the file can be opened for writing, false otherwise, but
623  * the contents will not be completely downloaded until run() has returned
624  * false. At this time, it is possible that a communications error will have
625  * left a partial file, so is_download_complete() may be called to test this.
626  *
627  * If subdocument_resumes is true and the document in question was previously
628  * requested as a subdocument (i.e. get_subdocument() with a first_byte value
629  * greater than zero), this will automatically seek to the appropriate byte
630  * within the file for writing the output. In this case, the file must
631  * already exist and must have at least first_byte bytes in it. If
632  * subdocument_resumes is false, a subdocument will always be downloaded
633  * beginning at the first byte of the file.
634  */
635 bool HTTPChannel::
636 download_to_file(const Filename &filename, bool subdocument_resumes) {
637  reset_download_to();
638  _download_to_filename = filename;
639  _download_to_filename.set_binary();
640  _subdocument_resumes = subdocument_resumes;
641 
642  _download_dest = DD_file;
643 
644  if (_wanted_nonblocking && _state != S_read_header) {
645  // In nonblocking mode, we can't start the download yet; that will be done
646  // later as run() is called.
647  return true;
648  }
649 
650  // In normal, blocking mode, go ahead and do the download.
651  if (!open_download_file()) {
652  reset_download_to();
653  return false;
654  }
655 
656  while (run()) {
657  }
658  return is_download_complete() && is_valid();
659 }
660 
661 /**
662  * Specifies a Ramfile object to download the resulting document to. This
663  * should be called immediately after get_document() or begin_get_document()
664  * or related functions.
665  *
666  * In the case of the blocking I/O methods like get_document(), this function
667  * will download the entire document to the Ramfile and return true if it was
668  * successfully downloaded, false otherwise.
669  *
670  * In the case of non-blocking I/O methods like begin_get_document(), this
671  * function simply indicates an intention to download to the indicated
672  * Ramfile. It returns true if the file can be opened for writing, false
673  * otherwise, but the contents will not be completely downloaded until run()
674  * has returned false. At this time, it is possible that a communications
675  * error will have left a partial file, so is_download_complete() may be
676  * called to test this.
677  *
678  * If subdocument_resumes is true and the document in question was previously
679  * requested as a subdocument (i.e. get_subdocument() with a first_byte value
680  * greater than zero), this will automatically seek to the appropriate byte
681  * within the Ramfile for writing the output. In this case, the Ramfile must
682  * already have at least first_byte bytes in it.
683  */
684 bool HTTPChannel::
685 download_to_ram(Ramfile *ramfile, bool subdocument_resumes) {
686  nassertr(ramfile != nullptr, false);
687  reset_download_to();
688  ramfile->_pos = 0;
689  _download_to_ramfile = ramfile;
690  _download_dest = DD_ram;
691  _subdocument_resumes = (subdocument_resumes && _first_byte_delivered != 0);
692 
693  if (_wanted_nonblocking && _state != S_read_header) {
694  // In nonblocking mode, we can't start the download yet; that will be done
695  // later as run() is called.
696  return true;
697  }
698 
699  // In normal, blocking mode, go ahead and do the download.
700  if (!open_download_file()) {
701  reset_download_to();
702  return false;
703  }
704 
705  while (run()) {
706  }
707  return is_download_complete() && is_valid();
708 }
709 
710 /**
711  * Specifies the name of an ostream to download the resulting document to.
712  * This should be called immediately after get_document() or
713  * begin_get_document() or related functions.
714  *
715  * In the case of the blocking I/O methods like get_document(), this function
716  * will download the entire document to the file and return true if it was
717  * successfully downloaded, false otherwise.
718  *
719  * In the case of non-blocking I/O methods like begin_get_document(), this
720  * function simply indicates an intention to download to the indicated file.
721  * It returns true if the file can be opened for writing, false otherwise, but
722  * the contents will not be completely downloaded until run() has returned
723  * false. At this time, it is possible that a communications error will have
724  * left a partial file, so is_download_complete() may be called to test this.
725  *
726  * If subdocument_resumes is true and the document in question was previously
727  * requested as a subdocument (i.e. get_subdocument() with a first_byte value
728  * greater than zero), this will automatically seek to the appropriate byte
729  * within the file for writing the output. In this case, the file must
730  * already exist and must have at least first_byte bytes in it. If
731  * subdocument_resumes is false, a subdocument will always be downloaded
732  * beginning at the first byte of the file.
733  */
734 bool HTTPChannel::
735 download_to_stream(ostream *strm, bool subdocument_resumes) {
736  reset_download_to();
737  _download_to_stream = strm;
738  _download_to_stream->clear();
739  _subdocument_resumes = subdocument_resumes;
740 
741  _download_dest = DD_stream;
742 
743  if (_wanted_nonblocking && _state != S_read_header) {
744  // In nonblocking mode, we can't start the download yet; that will be done
745  // later as run() is called.
746  return true;
747  }
748 
749  // In normal, blocking mode, go ahead and do the download.
750  if (!open_download_file()) {
751  reset_download_to();
752  return false;
753  }
754 
755  while (run()) {
756  }
757  return is_download_complete() && is_valid();
758 }
759 
760 /**
761  * Returns the connection that was established via a previous call to
762  * connect_to() or begin_connect_to(), or NULL if the connection attempt
763  * failed or if those methods have not recently been called.
764  *
765  * This stream has been allocated from the free store. It is the user's
766  * responsibility to delete this pointer when finished with it.
767  */
768 SocketStream *HTTPChannel::
769 get_connection() {
770  if (!is_connection_ready()) {
771  return nullptr;
772  }
773 
774  BioStream *stream = _source->get_stream();
775  _source->set_stream(nullptr);
776 
777  // We're now passing ownership of the connection to the caller.
778  if (downloader_cat.is_debug()) {
779  downloader_cat.debug()
780  << _NOTIFY_HTTP_CHANNEL_ID
781  << "passing ownership of connection to caller.\n";
782  }
783  reset_to_new();
784 
785  return stream;
786 }
787 
788 /**
789  * Returns the input string with all uppercase letters converted to lowercase.
790  */
791 string HTTPChannel::
792 downcase(const string &s) {
793  string result;
794  result.reserve(s.size());
795  string::const_iterator p;
796  for (p = s.begin(); p != s.end(); ++p) {
797  result += tolower(*p);
798  }
799  return result;
800 }
801 
802 /**
803  * Called by ISocketStream destructor when _body_stream is destructing.
804  */
805 void HTTPChannel::
806 body_stream_destructs(ISocketStream *stream) {
807  if (stream == _body_stream) {
808  if (_state == S_reading_body) {
809  switch (_body_stream->get_read_state()) {
810  case ISocketStream::RS_complete:
811  finished_body(false);
812  break;
813 
814  case ISocketStream::RS_error:
815  _state = HTTPChannel::S_failure;
816  _status_entry._status_code = HTTPChannel::SC_lost_connection;
817  break;
818 
819  default:
820  break;
821  }
822  }
823  _body_stream = nullptr;
824  _owns_body_stream = false;
825  }
826 }
827 
828 
829 /**
830  * Called by run() after it reaches the done state, this simply checks to see
831  * if a download was requested, and begins the download if it has been.
832  */
833 bool HTTPChannel::
834 reached_done_state() {
835  /*
836  if (downloader_cat.is_spam()) {
837  downloader_cat.spam()
838  << _NOTIFY_HTTP_CHANNEL_ID
839  << "terminating run(), _state = " << _state
840  << ", _done_state = " << _done_state << "\n";
841  }
842  */
843 
844  if (_state == S_failure) {
845  // We had to give up. Each proxy we tried, in sequence, failed. But
846  // maybe the last attempt didn't give us the most informative response; go
847  // back and find the best one.
848  if (!_status_list.empty()) {
849  _status_list.push_back(_status_entry);
850  if (downloader_cat.is_debug()) {
851  downloader_cat.debug()
852  << _NOTIFY_HTTP_CHANNEL_ID
853  << "Reexamining failure responses.\n";
854  }
855  size_t best_i = 0;
856  if (downloader_cat.is_debug()) {
857  downloader_cat.debug()
858  << _NOTIFY_HTTP_CHANNEL_ID
859  << " " << 0 << ". " << _status_list[0]._status_code << " "
860  << _status_list[0]._status_string << "\n";
861  }
862  for (size_t i = 1; i < _status_list.size(); i++) {
863  if (downloader_cat.is_debug()) {
864  downloader_cat.debug()
865  << _NOTIFY_HTTP_CHANNEL_ID
866  << " " << i << ". " << _status_list[i]._status_code << " "
867  << _status_list[i]._status_string << "\n";
868  }
869  if (more_useful_status_code(_status_list[i]._status_code,
870  _status_list[best_i]._status_code)) {
871  best_i = i;
872  }
873  }
874  if (downloader_cat.is_debug()) {
875  downloader_cat.debug()
876  << _NOTIFY_HTTP_CHANNEL_ID
877  << "chose index " << best_i << ", above.\n";
878  }
879  _status_entry = _status_list[best_i];
880  _status_list.clear();
881  }
882 
883  return false;
884  }
885 
886  // We don't need the list of previous failures any more--we've connected.
887  _status_list.clear();
888 
889  if (_download_dest == DD_none) {
890  // All done.
891  return false;
892 
893  } else {
894  // Oops, we have to download the body now.
895  open_read_body();
896  if (_body_stream == nullptr) {
897  if (downloader_cat.is_debug()) {
898  downloader_cat.debug()
899  << _NOTIFY_HTTP_CHANNEL_ID
900  << "Unable to download body: " << _request.get_url() << "\n";
901  }
902  return false;
903 
904  } else {
905  _owns_body_stream = true;
906  if (_state != S_reading_body) {
907  reset_body_stream();
908  }
909  _started_download = true;
910 
911  _done_state = S_read_trailer;
912  _last_run_time = TrueClock::get_global_ptr()->get_short_time();
913  return true;
914  }
915  }
916 }
917 
918 /**
919  * This state is reached when a previous connection attempt fails. If we have
920  * multiple proxies in line to try, it sets us up for the next proxy and tries
921  * to connect again; otherwise, it sets the state to S_failure.
922  */
923 bool HTTPChannel::
924 run_try_next_proxy() {
925  if (_proxy_next_index < _proxies.size()) {
926  // Record the previous proxy's status entry, so we can come back to it
927  // later if we get nonsense from the remaining proxies.
928  _status_list.push_back(_status_entry);
929  _status_entry = StatusEntry();
930 
931  // Now try the next proxy in sequence.
932  _proxy = _proxies[_proxy_next_index];
933  _proxy_auth = nullptr;
934  _proxy_next_index++;
935  close_connection();
936  reconsider_proxy();
937  _state = S_connecting;
938 
939  return false;
940  }
941 
942  // No more proxies to try, or we're not using a proxy.
943  _state = S_failure;
944  return false;
945 }
946 
947 /**
948  * In this state, we have not yet established a network connection to the
949  * server (or proxy).
950  */
951 bool HTTPChannel::
952 run_connecting() {
953  _status_entry = StatusEntry();
954 
955  if (!_bio->connect()) {
956  if (_bio->should_retry()) {
957  _state = S_connecting_wait;
958  return false;
959  }
960  downloader_cat.info()
961  << _NOTIFY_HTTP_CHANNEL_ID
962  << "Could not connect to " << _bio->get_server_name() << " port "
963  << _bio->get_port() << "\n";
964  OpenSSLWrapper::get_global_ptr()->notify_ssl_errors();
965  _status_entry._status_code = SC_no_connection;
966  _state = S_try_next_proxy;
967  return false;
968  }
969 
970  if (downloader_cat.is_debug()) {
971  downloader_cat.debug()
972  << _NOTIFY_HTTP_CHANNEL_ID
973  << "Connected to " << _bio->get_server_name() << " port "
974  << _bio->get_port() << "\n";
975  }
976 
977  if (_proxy_tunnel_now) {
978  if (_proxy.get_scheme() == "socks") {
979  _state = S_socks_proxy_greet;
980  } else {
981  _state = S_http_proxy_ready;
982  }
983 
984  } else {
985  if (_want_ssl) {
986  _state = S_setup_ssl;
987  } else {
988  _state = S_ready;
989  }
990  }
991  return false;
992 }
993 
994 /**
995  * Here we have begun to establish a nonblocking connection, but we got a
996  * come-back-later message, so we are waiting for the socket to finish
997  * connecting.
998  */
999 bool HTTPChannel::
1000 run_connecting_wait() {
1001  int fd = -1;
1002  BIO_get_fd(*_bio, &fd);
1003  if (fd < 0) {
1004  downloader_cat.warning()
1005  << _NOTIFY_HTTP_CHANNEL_ID
1006  << "nonblocking socket BIO has no file descriptor.\n";
1007  // This shouldn't be possible.
1008  _status_entry._status_code = SC_internal_error;
1009  _state = S_try_next_proxy;
1010  return false;
1011  }
1012 
1013  if (downloader_cat.is_spam()) {
1014  downloader_cat.spam()
1015  << _NOTIFY_HTTP_CHANNEL_ID
1016  << "waiting to connect to " << _request.get_url().get_server_and_port() << ".\n";
1017  }
1018  fd_set wset;
1019  FD_ZERO(&wset);
1020  FD_SET(fd, &wset);
1021  struct timeval tv;
1022  if (get_blocking_connect()) {
1023  // Since we'll be blocking on this connect, fill in the timeout into the
1024  // structure.
1025  tv.tv_sec = (int)_connect_timeout;
1026  tv.tv_usec = (int)((_connect_timeout - tv.tv_sec) * 1000000.0);
1027  } else {
1028  // We won't block on this connect, so select() for 0 time.
1029  tv.tv_sec = 0;
1030  tv.tv_usec = 0;
1031  }
1032  int errcode = select(fd + 1, nullptr, &wset, nullptr, &tv);
1033  if (errcode < 0) {
1034  downloader_cat.warning()
1035  << _NOTIFY_HTTP_CHANNEL_ID
1036  << "Error in select.\n";
1037  // This shouldn't be possible.
1038  _status_entry._status_code = SC_internal_error;
1039  _state = S_try_next_proxy;
1040  return false;
1041  }
1042 
1043  if (errcode == 0) {
1044  // Nothing's happened so far; come back later.
1045  if (get_blocking_connect() ||
1046  (TrueClock::get_global_ptr()->get_short_time() -
1047  _started_connecting_time > get_connect_timeout())) {
1048  // Time to give up.
1049  downloader_cat.info()
1050  << _NOTIFY_HTTP_CHANNEL_ID
1051  << "Timeout connecting to "
1052  << _request.get_url().get_server_and_port()
1053  << " for " << _request.get_url()
1054  << ".\n";
1055  _status_entry._status_code = SC_timeout;
1056  _state = S_try_next_proxy;
1057  return false;
1058  }
1059  return true;
1060  }
1061 
1062  // The socket is now ready for writing.
1063  _state = S_connecting;
1064  return false;
1065 }
1066 
1067 
1068 /**
1069  * This state is reached only after first establishing a connection to the
1070  * proxy, if a proxy is in use and we are tunneling through it via a CONNECT
1071  * command.
1072  */
1073 bool HTTPChannel::
1074 run_http_proxy_ready() {
1075  // If there's a request to be sent to the proxy, send it now.
1076  nassertr(!_proxy_request_text.empty(), false);
1077  if (!server_send(_proxy_request_text, false)) {
1078  return true;
1079  }
1080 
1081  // All done sending request.
1082  _state = S_http_proxy_request_sent;
1083  _sent_request_time = TrueClock::get_global_ptr()->get_short_time();
1084  return false;
1085 }
1086 
1087 /**
1088  * This state is reached only after we have sent a special message to the
1089  * proxy and we are waiting for the proxy's response. It is not used in the
1090  * normal http-over-proxy case, which does not require a special message to
1091  * the proxy.
1092  */
1093 bool HTTPChannel::
1094 run_http_proxy_request_sent() {
1095  // Wait for the first line to come back from the server.
1096  string line;
1097  if (!server_getline_failsafe(line)) {
1098  return true;
1099  }
1100 
1101  // Skip unexpected blank lines. We're just being generous here.
1102  while (line.empty()) {
1103  if (!server_getline_failsafe(line)) {
1104  return true;
1105  }
1106  }
1107 
1108  if (!parse_http_response(line)) {
1109  return false;
1110  }
1111 
1112  _state = S_http_proxy_reading_header;
1113  _current_field_name = string();
1114  _current_field_value = string();
1115  _headers.clear();
1116  _got_file_size = false;
1117  _got_transfer_file_size = false;
1118  return false;
1119 }
1120 
1121 /**
1122  * In this state we are reading the header lines from the proxy's response to
1123  * our special message.
1124  */
1125 bool HTTPChannel::
1126 run_http_proxy_reading_header() {
1127  if (parse_http_header()) {
1128  return true;
1129  }
1130 
1131  _redirect = get_header_value("Location");
1132  // We can take the proxy's word for it that this is the actual URL for the
1133  // redirect.
1134 
1135  _server_response_has_no_body =
1136  (get_status_code() / 100 == 1 ||
1137  get_status_code() == 204 ||
1138  get_status_code() == 304);
1139 
1140  int last_status = _last_status_code;
1141  _last_status_code = get_status_code();
1142 
1143  if (get_status_code() == 407 && last_status != 407 && !_proxy.empty()) {
1144  // 407: not authorized to proxy. Try to get the authorization.
1145  string authenticate_request = get_header_value("Proxy-Authenticate");
1146  _proxy_auth = _client->generate_auth(_proxy, true, authenticate_request);
1147  if (_proxy_auth != nullptr) {
1148  _proxy_realm = _proxy_auth->get_realm();
1149  _proxy_username = _client->select_username(_proxy, true, _proxy_realm);
1150  if (!_proxy_username.empty()) {
1151  make_proxy_request_text();
1152 
1153  // Roll the state forward to force a new request.
1154  _state = S_begin_body;
1155  return false;
1156  }
1157  }
1158  }
1159 
1160  if (!is_valid()) {
1161  // Proxy wouldn't open connection.
1162 
1163  // Change some of the status codes a proxy might return to differentiate
1164  // them from similar status codes the destination server might have
1165  // returned.
1166  if (get_status_code() != 407) {
1167  _status_entry._status_code += 1000;
1168  }
1169 
1170  _state = S_try_next_proxy;
1171  return false;
1172  }
1173 
1174  // Now we have a tunnel opened through the proxy.
1175  make_request_text();
1176 
1177  if (_want_ssl) {
1178  _state = S_setup_ssl;
1179  } else {
1180  _state = S_ready;
1181  }
1182 
1183  return false;
1184 }
1185 
1186 /**
1187  * This state is reached only after first establishing a connection to a SOCKS
1188  * proxy, with which we now have to negotiate a connection.
1189  */
1190 bool HTTPChannel::
1191 run_socks_proxy_greet() {
1192  static const char socks_greeting[] = {
1193  0x05, // Socks version 5
1194  0x01, // Number of supported login methods
1195  0x00, // Login method 0: no authentication
1196  /*
1197  0x01, // Login method 1: GSSAPI
1198  0x02 // Login method 2: username/password
1199  */
1200  };
1201  static const int socks_greeting_len = sizeof(socks_greeting);
1202  if (!server_send(string(socks_greeting, socks_greeting_len), true)) {
1203  return true;
1204  }
1205  _sent_request_time = TrueClock::get_global_ptr()->get_short_time();
1206 
1207  // All done sending request.
1208  _state = S_socks_proxy_greet_reply;
1209  return false;
1210 }
1211 
1212 /**
1213  * We are waiting for the SOCKS proxy to respond to our greeting.
1214  */
1215 bool HTTPChannel::
1216 run_socks_proxy_greet_reply() {
1217  string reply;
1218 
1219  // Get the two-byte reply from the SOCKS server.
1220  if (!server_get_failsafe(reply, 2)) {
1221  return true;
1222  }
1223 
1224  if (reply[0] != 0x05) {
1225  // We only speak Socks5.
1226  downloader_cat.info()
1227  << _NOTIFY_HTTP_CHANNEL_ID
1228  << "Rejecting Socks version " << (int)reply[0] << "\n";
1229  _status_entry._status_code = SC_socks_invalid_version;
1230  _state = S_try_next_proxy;
1231  return false;
1232  }
1233 
1234  if (reply[1] == (char)0xff) {
1235  downloader_cat.info()
1236  << _NOTIFY_HTTP_CHANNEL_ID
1237  << "Socks server does not accept our available login methods.\n";
1238  _status_entry._status_code = SC_socks_no_acceptable_login_method;
1239  _state = S_try_next_proxy;
1240  return false;
1241  }
1242 
1243  if (reply[1] == 0x00) {
1244  // No login method required. Proceed directly to the connect message.
1245  _state = S_socks_proxy_connect;
1246  return false;
1247  }
1248 
1249  // The server accepted a login method we didn't offer!
1250  downloader_cat.info()
1251  << _NOTIFY_HTTP_CHANNEL_ID
1252  << "Socks server accepted unrequested login method "
1253  << (int)reply[1] << "\n";
1254  _status_entry._status_code = SC_socks_no_acceptable_login_method;
1255  _state = S_try_next_proxy;
1256  return false;
1257 }
1258 
1259 /**
1260  * The SOCKS proxy has accepted us, and now we may issue the connect request.
1261  */
1262 bool HTTPChannel::
1263 run_socks_proxy_connect() {
1264  static const char socks_connect[] = {
1265  0x05, // Socks version 5
1266  0x01, // Command 1: connect
1267  0x00, // reserved
1268  0x03, // DNS name
1269  };
1270  static const int socks_connect_len = sizeof(socks_connect);
1271 
1272  string hostname = _request.get_url().get_server();
1273  int port = _request.get_url().get_port();
1274 
1275  if (downloader_cat.is_debug()) {
1276  downloader_cat.debug()
1277  << _NOTIFY_HTTP_CHANNEL_ID
1278  << "Requesting SOCKS5 connection to "
1279  << _request.get_url().get_server_and_port() << "\n";
1280  }
1281 
1282  string connect =
1283  string(socks_connect, socks_connect_len) +
1284  string(1, (char)hostname.length()) +
1285  hostname +
1286  string(1, (char)((port >> 8) & 0xff)) +
1287  string(1, (char)(port & 0xff));
1288 
1289  if (!server_send(connect, true)) {
1290  return true;
1291  }
1292  _sent_request_time = TrueClock::get_global_ptr()->get_short_time();
1293 
1294  _state = S_socks_proxy_connect_reply;
1295  return false;
1296 }
1297 
1298 /**
1299  * We are waiting for the SOCKS proxy to honor our connect request.
1300  */
1301 bool HTTPChannel::
1302 run_socks_proxy_connect_reply() {
1303  string reply;
1304 
1305  // Get the first two bytes of the connect reply.
1306  if (!server_get_failsafe(reply, 2)) {
1307  return true;
1308  }
1309 
1310  if (reply[0] != 0x05) {
1311  // We only speak Socks5.
1312  downloader_cat.info()
1313  << _NOTIFY_HTTP_CHANNEL_ID
1314  << "Rejecting Socks version " << (int)reply[0] << "\n";
1315  close_connection(); // connection is now bad.
1316  _status_entry._status_code = SC_socks_invalid_version;
1317  _state = S_try_next_proxy;
1318  return false;
1319  }
1320 
1321  if (reply[1] != 0x00) {
1322  downloader_cat.info()
1323  << _NOTIFY_HTTP_CHANNEL_ID
1324  << "Connection refused, SOCKS code " << (int)reply[1] << "\n";
1325  /*
1326  Socks error codes (from RFC1928):
1327  o X'00' succeeded
1328  o X'01' general SOCKS server failure
1329  o X'02' connection not allowed by ruleset
1330  o X'03' Network unreachable
1331  o X'04' Host unreachable
1332  o X'05' Connection refused
1333  o X'06' TTL expired
1334  o X'07' Command not supported
1335  o X'08' Address type not supported
1336  o X'09' to X'FF' unassigned
1337  */
1338 
1339  switch (reply[1]) {
1340  case 0x03:
1341  case 0x04:
1342  case 0x05:
1343  case 0x06:
1344  // These generally mean the same thing: the SOCKS proxy tried, but
1345  // couldn't reach the host.
1346  _status_entry._status_code = SC_socks_no_connection;
1347  break;
1348 
1349  default:
1350  _status_entry._status_code = SC_socks_refused;
1351  }
1352 
1353  close_connection(); // connection is now bad.
1354  _state = S_try_next_proxy;
1355  return false;
1356  }
1357 
1358  // Now put those bytes back, and get five bytes of the reply.
1359  _working_get = reply;
1360  if (!server_get_failsafe(reply, 5)) {
1361  return true;
1362  }
1363 
1364  // Figure out how many bytes total we will expect for the reply.
1365  int total_bytes = 6;
1366 
1367  switch (reply[3]) {
1368  case 0x01: // IPv4
1369  total_bytes += 4;
1370  break;
1371 
1372  case 0x03: // DNS
1373  total_bytes += (unsigned int)reply[4];
1374  break;
1375 
1376  case 0x04: // IPv6
1377  total_bytes += 16;
1378  break;
1379 
1380  default:
1381  downloader_cat.info()
1382  << _NOTIFY_HTTP_CHANNEL_ID
1383  << "Unsupported SOCKS address type: " << (int)reply[3] << "\n";
1384  _status_entry._status_code = SC_socks_invalid_version;
1385  _state = S_try_next_proxy;
1386  return false;
1387  }
1388 
1389  // Now put back the bytes we've read so far, and get the rest of them.
1390  _working_get = reply;
1391  if (!server_get_failsafe(reply, total_bytes)) {
1392  return true;
1393  }
1394 
1395  if (downloader_cat.is_debug()) {
1396  // Finally, we can decode the whole thing.
1397  string connect_host;
1398 
1399  switch (reply[3]) {
1400  case 0x01: // IPv4
1401  {
1402  ostringstream strm;
1403  strm << (unsigned int)(unsigned char)reply[4] << "."
1404  << (unsigned int)(unsigned char)reply[5] << "."
1405  << (unsigned int)(unsigned char)reply[6] << "."
1406  << (unsigned int)(unsigned char)reply[7];
1407  connect_host = strm.str();
1408  }
1409  break;
1410 
1411  case 0x03: // DNS
1412  connect_host = string(&reply[5], (unsigned int)reply[4]);
1413  break;
1414 
1415  case 0x04: // IPv6
1416  {
1417  char buf[48];
1418  sprintf(buf, "[%02hhx%02hhx:%02hhx%02hhx:%02hhx%02hhx:%02hhx%02hhx"
1419  ":%02hhx%02hhx:%02hhx%02hhx:%02hhx%02hhx:%02hhx%02hhx]",
1420  reply[4], reply[5], reply[6], reply[7], reply[8], reply[9],
1421  reply[10], reply[11], reply[12], reply[13], reply[14],
1422  reply[15], reply[16], reply[17], reply[18], reply[19]);
1423  total_bytes += 16;
1424  }
1425  break;
1426  }
1427 
1428  int connect_port =
1429  (((unsigned int)(unsigned char)reply[total_bytes - 2]) << 8) |
1430  ((unsigned int)(unsigned char)reply[total_bytes - 1]);
1431 
1432  downloader_cat.debug()
1433  << _NOTIFY_HTTP_CHANNEL_ID
1434  << _proxy << " directed us to " << connect_host << ":"
1435  << connect_port << "\n";
1436  }
1437 
1438  if (_want_ssl) {
1439  _state = S_setup_ssl;
1440  } else {
1441  _state = S_ready;
1442  }
1443 
1444  return false;
1445 }
1446 
1447 /**
1448  * This state begins elevating our existing, unsecure connection to a secure,
1449  * SSL connection.
1450  */
1451 bool HTTPChannel::
1452 run_setup_ssl() {
1453  _sbio = BIO_new_ssl(_client->get_ssl_ctx(), true);
1454  BIO_push(_sbio, *_bio);
1455 
1456  SSL *ssl = nullptr;
1457  BIO_get_ssl(_sbio, &ssl);
1458  nassertr(ssl != nullptr, false);
1459 
1460  // We only take one word at a time from the _cipher_list. If that
1461  // connection fails, then we take the next word.
1462  string cipher_list = _cipher_list;
1463  if (!cipher_list.empty()) {
1464  size_t space = cipher_list.find(" ");
1465  if (space != string::npos) {
1466  cipher_list = cipher_list.substr(0, space);
1467  }
1468  }
1469 
1470  if (downloader_cat.is_debug()) {
1471  downloader_cat.debug()
1472  << _NOTIFY_HTTP_CHANNEL_ID
1473  << "Setting ssl-cipher-list '" << cipher_list << "'\n";
1474  }
1475  int result = SSL_set_cipher_list(ssl, cipher_list.c_str());
1476  if (result == 0) {
1477  downloader_cat.error()
1478  << _NOTIFY_HTTP_CHANNEL_ID
1479  << "Invalid cipher list: '" << cipher_list << "'\n";
1480  OpenSSLWrapper::get_global_ptr()->notify_ssl_errors();
1481  _status_entry._status_code = SC_ssl_internal_failure;
1482  _state = S_failure;
1483  return false;
1484  }
1485 
1486  string hostname = _request.get_url().get_server();
1487  result = SSL_set_tlsext_host_name(ssl, hostname.c_str());
1488  if (result == 0) {
1489  downloader_cat.error()
1490  << _NOTIFY_HTTP_CHANNEL_ID
1491  << "Could not set TLS SNI hostname to '" << hostname << "'\n";
1492  }
1493 
1494 /*
1495  * It would be nice to use something like SSL_set_client_cert_cb() here to set
1496  * a callback to provide the certificate should it be requested, or even to
1497  * potentially provide any of a number of certificates according to the
1498  * server's CA presented, but that interface as provided by OpenSSL is broken
1499  * since there's no way to pass additional data to the callback function (and
1500  * hence no way to tie it back to the HTTPChannel object, other than by
1501  * building a messy mapping of SSL pointers back to HTTPChannel pointers).
1502  */
1503  if (_client->load_client_certificate()) {
1504  SSL_use_certificate(ssl, _client->_client_certificate_pub);
1505  SSL_use_PrivateKey(ssl, _client->_client_certificate_priv);
1506  if (!SSL_check_private_key(ssl)) {
1507  downloader_cat.warning()
1508  << _NOTIFY_HTTP_CHANNEL_ID
1509  << "Client private key does not match public key!\n";
1510  }
1511  }
1512 
1513  if (downloader_cat.is_spam()) {
1514  downloader_cat.spam()
1515  << _NOTIFY_HTTP_CHANNEL_ID
1516  << "SSL Ciphers available:\n";
1517  const char *name;
1518  int pri = 0;
1519  name = SSL_get_cipher_list(ssl, pri);
1520  while (name != nullptr) {
1521  downloader_cat.spam()
1522  << _NOTIFY_HTTP_CHANNEL_ID
1523  << " " << pri + 1 << ". " << name << "\n";
1524  pri++;
1525  name = SSL_get_cipher_list(ssl, pri);
1526  }
1527  }
1528 
1529  if (downloader_cat.is_debug()) {
1530  downloader_cat.debug()
1531  << _NOTIFY_HTTP_CHANNEL_ID
1532  << "performing SSL handshake\n";
1533  }
1534  _state = S_ssl_handshake;
1535 
1536  // We start the connect timer over again when we reach the SSL handshake.
1537  _started_connecting_time =
1538  TrueClock::get_global_ptr()->get_short_time();
1539 
1540  return false;
1541 }
1542 
1543 /**
1544  * This state performs the SSL handshake with the server, and also verifies
1545  * the server's identity when the handshake has successfully completed.
1546  */
1547 bool HTTPChannel::
1548 run_ssl_handshake() {
1549  if (BIO_do_handshake(_sbio) <= 0) {
1550  if (BIO_should_retry(_sbio)) {
1551  double elapsed =
1552  TrueClock::get_global_ptr()->get_short_time() -
1553  _started_connecting_time;
1554  if (elapsed <= get_connect_timeout() + _extra_ssl_handshake_time) {
1555  // Keep trying.
1556  return true;
1557  }
1558  // Time to give up on the handshake.
1559  }
1560 
1561  downloader_cat.info()
1562  << _NOTIFY_HTTP_CHANNEL_ID
1563  << "Could not establish SSL handshake with "
1564  << _request.get_url().get_server_and_port() << "\n";
1565  OpenSSLWrapper::get_global_ptr()->notify_ssl_errors();
1566 
1567  // It seems to be an error to free sbio at this point; perhaps it's
1568  // already been freed?
1569 
1570  if (!_cipher_list.empty()) {
1571  // If we've got another cipher to try, do so.
1572  size_t space = _cipher_list.find(" ");
1573  if (space != string::npos) {
1574  while (space < _cipher_list.length() && _cipher_list[space] == ' ') {
1575  ++space;
1576  }
1577  _cipher_list = _cipher_list.substr(space);
1578  if (!_cipher_list.empty()) {
1579  close_connection();
1580  reconsider_proxy();
1581  _state = S_connecting;
1582  return false;
1583  }
1584  }
1585  }
1586 
1587  // All done trying ciphers; they all failed.
1588  _cipher_list = _client->get_cipher_list();
1589  _status_entry._status_code = SC_ssl_no_handshake;
1590  _state = S_failure;
1591  return false;
1592  }
1593 
1594  SSL *ssl = nullptr;
1595  BIO_get_ssl(_sbio, &ssl);
1596  nassertr(ssl != nullptr, false);
1597 
1598  if (!_nonblocking) {
1599  SSL_set_mode(ssl, SSL_MODE_AUTO_RETRY);
1600  }
1601 
1602  const SSL_CIPHER *cipher = SSL_get_current_cipher(ssl);
1603  if (cipher == nullptr) {
1604  downloader_cat.warning()
1605  << _NOTIFY_HTTP_CHANNEL_ID
1606  << "No current cipher on SSL connection.\n";
1607  } else {
1608  if (downloader_cat.is_debug()) {
1609  downloader_cat.debug()
1610  << _NOTIFY_HTTP_CHANNEL_ID
1611  << "Using cipher " << SSL_CIPHER_get_name((SSL_CIPHER *) cipher) << "\n";
1612  }
1613  }
1614 
1615  // Now that we've made an SSL handshake, we can use the SSL bio to do all of
1616  // our communication henceforth.
1617  _bio->set_bio(_sbio);
1618  _sbio = nullptr;
1619 
1620  X509 *cert = SSL_get_peer_certificate(ssl);
1621  if (cert == nullptr) {
1622  downloader_cat.info()
1623  << _NOTIFY_HTTP_CHANNEL_ID
1624  << "No certificate was presented by server.\n";
1625 
1626  // This shouldn't be possible, per the SSL specs.
1627  _status_entry._status_code = SC_ssl_invalid_server_certificate;
1628  _state = S_failure;
1629  return false;
1630  }
1631 
1632  X509_NAME *subject = X509_get_subject_name(cert);
1633  if (downloader_cat.is_debug()) {
1634  string org_name = get_x509_name_component(subject, NID_organizationName);
1635  string org_unit_name = get_x509_name_component(subject, NID_organizationalUnitName);
1636  string common_name = get_x509_name_component(subject, NID_commonName);
1637 
1638  downloader_cat.debug()
1639  << _NOTIFY_HTTP_CHANNEL_ID
1640  << "Server is " << common_name << " from " << org_unit_name
1641  << " / " << org_name << "\n";
1642 
1643  if (downloader_cat.is_spam()) {
1644  downloader_cat.spam()
1645  << _NOTIFY_HTTP_CHANNEL_ID
1646  << "Received certificate from server:\n" << std::flush;
1647  X509_print_fp(stderr, cert);
1648  fflush(stderr);
1649  }
1650  }
1651 
1652  bool cert_preapproved = false;
1653  bool cert_name_preapproved = false;
1654  check_preapproved_server_certificate(cert, cert_preapproved, cert_name_preapproved);
1655 
1656  // Now verify the server certificate is valid.
1657  long verify_result = SSL_get_verify_result(ssl);
1658  bool cert_valid = true;
1659 
1660  if (verify_result == X509_V_ERR_CERT_HAS_EXPIRED) {
1661  downloader_cat.info()
1662  << _NOTIFY_HTTP_CHANNEL_ID
1663  << "Expired certificate from " << _request.get_url().get_server_and_port() << "\n";
1664  if (_client->get_verify_ssl() == HTTPClient::VS_normal && !cert_preapproved) {
1665  cert_valid = false;
1666  }
1667 
1668  } else if (verify_result == X509_V_ERR_CERT_NOT_YET_VALID) {
1669  downloader_cat.info()
1670  << _NOTIFY_HTTP_CHANNEL_ID
1671  << "Premature certificate from " << _request.get_url().get_server_and_port() << "\n";
1672  if (_client->get_verify_ssl() == HTTPClient::VS_normal && !cert_preapproved) {
1673  cert_valid = false;
1674  }
1675 
1676  } else if (verify_result == X509_V_ERR_DEPTH_ZERO_SELF_SIGNED_CERT ||
1677  verify_result == X509_V_ERR_SELF_SIGNED_CERT_IN_CHAIN) {
1678  downloader_cat.info()
1679  << _NOTIFY_HTTP_CHANNEL_ID
1680  << "Self-signed certificate from " << _request.get_url().get_server_and_port() << "\n";
1681  if (_client->get_verify_ssl() != HTTPClient::VS_no_verify && !cert_preapproved) {
1682  cert_valid = false;
1683  }
1684 
1685  } else if (verify_result != X509_V_OK) {
1686  downloader_cat.info()
1687  << _NOTIFY_HTTP_CHANNEL_ID
1688  << "Unable to verify identity of " << _request.get_url().get_server_and_port()
1689  << ", verify error code " << verify_result << "\n";
1690  if (_client->get_verify_ssl() != HTTPClient::VS_no_verify && !cert_preapproved) {
1691  cert_valid = false;
1692  }
1693  }
1694 
1695  if (!cert_valid) {
1696  _status_entry._status_code = SC_ssl_invalid_server_certificate;
1697  _state = S_failure;
1698  return false;
1699  }
1700 
1701  if (_client->get_verify_ssl() != HTTPClient::VS_no_verify && !cert_name_preapproved) {
1702  // Check that the server is someone we expected to be talking to.
1703  if (!validate_server_name(cert)) {
1704  _status_entry._status_code = SC_ssl_unexpected_server;
1705  _state = S_failure;
1706  return false;
1707  }
1708  }
1709 
1710  X509_free(cert);
1711 
1712  _state = S_ready;
1713  return false;
1714 }
1715 
1716 /**
1717  * This is the main "ready" state. In this state, we have established a
1718  * (possibly secure) connection to the server (or proxy), and the server (or
1719  * proxy) is idle and waiting for us to send a request.
1720  *
1721  * If persistent_connection is true, we will generally come back to this state
1722  * after finishing each request on a given connection.
1723  */
1724 bool HTTPChannel::
1725 run_ready() {
1726  // If there's a request to be sent upstream, send it now.
1727  if (!_request_text.empty()) {
1728  if (!server_send(_request_text, false)) {
1729  return true;
1730  }
1731  }
1732 
1733  // All done sending request.
1734  _state = S_request_sent;
1735  _sent_request_time = TrueClock::get_global_ptr()->get_short_time();
1736  return false;
1737 }
1738 
1739 /**
1740  * In this state we have sent our request to the server (or proxy) and we are
1741  * waiting for a response.
1742  */
1743 bool HTTPChannel::
1744 run_request_sent() {
1745  // Wait for the first line to come back from the server.
1746  string line;
1747  if (!server_getline_failsafe(line)) {
1748  return true;
1749  }
1750 
1751  // Skip unexpected blank lines. We're just being generous here.
1752  while (line.empty()) {
1753  if (!server_getline_failsafe(line)) {
1754  return true;
1755  }
1756  }
1757 
1758  if (!parse_http_response(line)) {
1759  // Not an HTTP response. _state is already set appropriately.
1760  return false;
1761  }
1762 
1763  _state = S_reading_header;
1764  _current_field_name = string();
1765  _current_field_value = string();
1766  _headers.clear();
1767  _got_file_size = false;
1768  _got_transfer_file_size = false;
1769  return false;
1770 }
1771 
1772 /**
1773  * In this state we have received the first response to our request from the
1774  * server (or proxy) and we are reading the set of header lines preceding the
1775  * requested document.
1776  */
1777 bool HTTPChannel::
1778 run_reading_header() {
1779  if (parse_http_header()) {
1780  if (_bio.is_null()) {
1781  downloader_cat.info()
1782  << _NOTIFY_HTTP_CHANNEL_ID
1783  << "Connection lost while reading HTTP response.\n";
1784  if (_response_type == RT_http_hangup) {
1785  // This was our second hangup in a row. Give up.
1786  _status_entry._status_code = SC_lost_connection;
1787  _state = S_try_next_proxy;
1788 
1789  } else {
1790  // Try again, once.
1791  _response_type = RT_http_hangup;
1792  }
1793 
1794  } else {
1795  double elapsed =
1796  TrueClock::get_global_ptr()->get_short_time() -
1797  _sent_request_time;
1798  if (elapsed > get_http_timeout()) {
1799  // Time to give up.
1800  downloader_cat.info()
1801  << _NOTIFY_HTTP_CHANNEL_ID
1802  << "Timeout waiting for "
1803  << _request.get_url().get_server_and_port()
1804  << " in run_reading_header (" << elapsed
1805  << " seconds elapsed).\n";
1806  _status_entry._status_code = SC_timeout;
1807  _state = S_try_next_proxy;
1808  }
1809  }
1810  return true;
1811  }
1812  _response_type = RT_http_complete;
1813 
1814  // Ok, we've established an HTTP connection to the server. Our extra send
1815  // headers have done their job; clear them for next time.
1816  clear_extra_headers();
1817 
1818  _server_response_has_no_body =
1819  (get_status_code() / 100 == 1 ||
1820  get_status_code() == 204 ||
1821  get_status_code() == 304 ||
1822  _method == HTTPEnum::M_head);
1823 
1824  // Look for key properties in the header fields.
1825  if (get_status_code() == 206) {
1826  string content_range = get_header_value("Content-Range");
1827  if (content_range.empty()) {
1828  downloader_cat.warning()
1829  << _NOTIFY_HTTP_CHANNEL_ID
1830  << "Got 206 response without Content-Range header!\n";
1831  _status_entry._status_code = SC_invalid_http;
1832  _state = S_failure;
1833  return false;
1834 
1835  } else {
1836  if (!parse_content_range(content_range)) {
1837  downloader_cat.warning()
1838  << _NOTIFY_HTTP_CHANNEL_ID
1839  << "Couldn't parse Content-Range: " << content_range << "\n";
1840  _status_entry._status_code = SC_invalid_http;
1841  _state = S_failure;
1842  return false;
1843  }
1844  }
1845 
1846  } else {
1847  _first_byte_delivered = 0;
1848  _last_byte_delivered = 0;
1849  }
1850  if (downloader_cat.is_debug()) {
1851  if (_first_byte_requested != 0 || _last_byte_requested != 0 ||
1852  _first_byte_delivered != 0 || _last_byte_delivered != 0) {
1853  downloader_cat.debug()
1854  << _NOTIFY_HTTP_CHANNEL_ID
1855  << "Requested byte range " << _first_byte_requested
1856  << " to " << _last_byte_delivered
1857  << "; server delivers range " << _first_byte_delivered
1858  << " to " << _last_byte_delivered
1859  << "\n";
1860  }
1861  }
1862 
1863  // Set the _document_spec to reflect what we just retrieved.
1864  _document_spec = DocumentSpec(_request.get_url());
1865  string tag = get_header_value("ETag");
1866  if (!tag.empty()) {
1867  _document_spec.set_tag(HTTPEntityTag(tag));
1868  }
1869  string date = get_header_value("Last-Modified");
1870  if (!date.empty()) {
1871  _document_spec.set_date(HTTPDate(date));
1872  }
1873 
1874  // In case we've got a download in effect, now we know what the first byte
1875  // of the subdocument request will be, so we can open the file and position
1876  // it.
1877  if (_server_response_has_no_body) {
1878  // Never mind on the download.
1879  reset_download_to();
1880  }
1881 
1882  if (!open_download_file()) {
1883  return false;
1884  }
1885 
1886  _got_expected_file_size = false;
1887  _got_file_size = false;
1888  _got_transfer_file_size = false;
1889 
1890  string content_length = get_header_value("Content-Length");
1891  if (!content_length.empty()) {
1892  _file_size = atoi(content_length.c_str());
1893  _got_file_size = true;
1894 
1895  } else if (get_status_code() == 206) {
1896  // Well, we didn't get a content-length from the server, but we can infer
1897  // the number of bytes based on the range we're given.
1898  _file_size = _last_byte_delivered - _first_byte_delivered + 1;
1899  _got_file_size = true;
1900  }
1901  _redirect = get_header_value("Location");
1902 
1903  // The server might have given us just a filename for the redirect. In that
1904  // case, it's relative to the same server. If it's a relative path, it's
1905  // relative to the same directory.
1906  if (_redirect.has_path() && !_redirect.has_authority()) {
1907  URLSpec url = _document_spec.get_url();
1908  Filename path = _redirect.get_path();
1909  if (path.is_local()) {
1910  Filename rel_to = Filename(url.get_path()).get_dirname();
1911  _redirect.set_path(Filename(rel_to, path));
1912  }
1913  _redirect.set_scheme(url.get_scheme());
1914  _redirect.set_authority(url.get_authority());
1915  }
1916 
1917  _state = S_read_header;
1918 
1919  if (_server_response_has_no_body && will_close_connection()) {
1920  // If the server said it will close the connection, we should close it
1921  // too.
1922  close_connection();
1923  }
1924 
1925  // Handle automatic retries and redirects.
1926  int last_status = _last_status_code;
1927  _last_status_code = get_status_code();
1928 
1929  if (get_status_code() == 407 && last_status != 407 && !_proxy.empty()) {
1930  // 407: not authorized to proxy. Try to get the authorization.
1931  string authenticate_request = get_header_value("Proxy-Authenticate");
1932  _proxy_auth =
1933  _client->generate_auth(_proxy, true, authenticate_request);
1934  if (_proxy_auth != nullptr) {
1935  _proxy_realm = _proxy_auth->get_realm();
1936  _proxy_username = _client->select_username(_proxy, true, _proxy_realm);
1937  if (!_proxy_username.empty()) {
1938  make_request_text();
1939 
1940  // Roll the state forward to force a new request.
1941  _state = S_begin_body;
1942  return false;
1943  }
1944  }
1945  }
1946 
1947  if (get_status_code() == 401 && last_status != 401) {
1948  // 401: not authorized to remote server. Try to get the authorization.
1949  string authenticate_request = get_header_value("WWW-Authenticate");
1950  _www_auth = _client->generate_auth(_request.get_url(), false, authenticate_request);
1951  if (_www_auth != nullptr) {
1952  _www_realm = _www_auth->get_realm();
1953  _www_username = _client->select_username(_request.get_url(), false, _www_realm);
1954  if (!_www_username.empty()) {
1955  make_request_text();
1956 
1957  // Roll the state forward to force a new request.
1958  _state = S_begin_body;
1959  return false;
1960  }
1961  }
1962  }
1963 
1964  if ((get_status_code() == 300 ||
1965  get_status_code() == 301 ||
1966  get_status_code() == 302 ||
1967  get_status_code() == 303 ||
1968  get_status_code() == 307) && !get_redirect().empty()) {
1969  // Redirect. Should we handle it automatically?
1970 
1971  // According to the letter of RFC 2616, 301 and 302 responses to POST
1972  // requests must not be automatically redirected without confirmation by
1973  // the user. In reality, browsers do allow automatic redirection of these
1974  // responses, changing the POST to a GET, and we reproduce this behavior
1975  // here.
1976  if (_method == HTTPEnum::M_post) {
1977  _method = HTTPEnum::M_get;
1978  _body = string();
1979  }
1980 
1981  if (_method == HTTPEnum::M_get || _method == HTTPEnum::M_head) {
1982  // Sure!
1983  URLSpec new_url = get_redirect();
1984  if (find(_redirect_trail.begin(), _redirect_trail.end(),
1985  new_url) != _redirect_trail.end()) {
1986  downloader_cat.warning()
1987  << _NOTIFY_HTTP_CHANNEL_ID
1988  << "cycle detected in redirect to " << new_url << "\n";
1989 
1990  } else {
1991  _redirect_trail.push_back(new_url);
1992 
1993  if (downloader_cat.is_debug()) {
1994  downloader_cat.debug()
1995  << _NOTIFY_HTTP_CHANNEL_ID
1996  << "following redirect to " << new_url << "\n";
1997  }
1998  if (_request.get_url().has_username()) {
1999  new_url.set_username(_request.get_url().get_username());
2000  }
2001  reset_url(_request.get_url(), new_url);
2002  _request.set_url(new_url);
2003  _want_ssl = _request.get_url().is_ssl();
2004  reconsider_proxy();
2005  make_header();
2006  make_request_text();
2007 
2008  // Roll the state forward to force a new request.
2009  _state = S_begin_body;
2010  return false;
2011  }
2012  }
2013  }
2014 
2015  if (_state == S_read_header &&
2016  ((get_status_code() / 100) == 4 || (get_status_code() / 100) == 5) &&
2017  _proxy_serves_document && _proxy_next_index < _proxies.size()) {
2018  // If we were using a proxy (but not tunneling through the proxy) and we
2019  // got some kind of a server error, try the next proxy in sequence (if we
2020  // have one). This handles the case of a working proxy that cannot see
2021  // the host (and so returns 504 or something along those lines). Some
2022  // proxies are so broken they return a 404 in this case, so we have to
2023  // consider that along the same lines.
2024  _state = S_try_next_proxy;
2025  return false;
2026  }
2027 
2028  // Otherwise, we're good to go.
2029  return false;
2030 }
2031 
2032 /**
2033  * This is the first state when reading a file:// URL. All it does is skip
2034  * past the non-existent "header".
2035  */
2036 bool HTTPChannel::
2037 run_start_direct_file_read() {
2038  _state = S_read_header;
2039  if (!open_download_file()) {
2040  return false;
2041  }
2042  return false;
2043 }
2044 
2045 /**
2046  * In this state we have completely read the header lines returned by the
2047  * server (or proxy) in response to our request. This state represents the
2048  * normal stopping point of a call to get_document(), etc.; further reads will
2049  * return the body of the request, the requested document.
2050  *
2051  * Normally run_read_header() is not called unless the user has elected not to
2052  * read the returned document himself. In fact, the state itself only exists
2053  * so we can make a distinction between S_read_header and S_begin_body, where
2054  * S_read_header is safe to return to the user and S_begin_body means we need
2055  * to start skipping the document.
2056  */
2057 bool HTTPChannel::
2058 run_read_header() {
2059  _state = S_begin_body;
2060  return false;
2061 }
2062 
2063 /**
2064  * This state begins to skip over the body in preparation for making a new
2065  * request.
2066  */
2067 bool HTTPChannel::
2068 run_begin_body() {
2069  if (will_close_connection()) {
2070  // If the socket will close anyway, no point in skipping past the previous
2071  // body; just reset.
2072  if (downloader_cat.is_debug()) {
2073  downloader_cat.debug()
2074  << _NOTIFY_HTTP_CHANNEL_ID
2075  << "resetting to begin body; server would close anyway.\n";
2076  }
2077  reset_to_new();
2078  return false;
2079  }
2080 
2081  if (_server_response_has_no_body) {
2082  // We have already "read" the nonexistent body.
2083  _state = S_read_trailer;
2084 
2085  } else if (get_file_size() > (int)_skip_body_size) {
2086  // If we know the size of the body we are about to skip and it's too
2087  // large, then don't bother skipping it--just drop the connection and get
2088  // a new one.
2089  if (downloader_cat.is_debug()) {
2090  downloader_cat.debug()
2091  << _NOTIFY_HTTP_CHANNEL_ID
2092  << "Dropping connection rather than skipping past "
2093  << get_file_size() << " bytes.\n";
2094  }
2095  reset_to_new();
2096 
2097  } else {
2098  open_read_body();
2099  if (_body_stream == nullptr) {
2100  if (downloader_cat.is_debug()) {
2101  downloader_cat.debug()
2102  << _NOTIFY_HTTP_CHANNEL_ID
2103  << "Unable to skip body.\n";
2104  }
2105  reset_to_new();
2106 
2107  } else {
2108  _owns_body_stream = true;
2109  if (_state != S_reading_body) {
2110  reset_body_stream();
2111  }
2112  }
2113  }
2114 
2115  return false;
2116 }
2117 
2118 /**
2119  * In this state we are in the process of reading the response's body. We
2120  * will only come to this function if the user did not choose to read the
2121  * entire body himself (by calling open_read_body()).
2122  *
2123  * In this case we should skip past the body to reset the connection for
2124  * making a new request.
2125  */
2126 bool HTTPChannel::
2127 run_reading_body() {
2128  if (will_close_connection()) {
2129  // If the socket will close anyway, no point in skipping past the previous
2130  // body; just reset.
2131  if (downloader_cat.is_debug()) {
2132  downloader_cat.debug()
2133  << _NOTIFY_HTTP_CHANNEL_ID
2134  << "resetting to read body; server would close anyway.\n";
2135  }
2136  reset_to_new();
2137  return false;
2138  }
2139 
2140  // Skip the body we've already started.
2141  if (_body_stream == nullptr || !_owns_body_stream) {
2142  // Whoops, we're not in skip-body mode. Better reset.
2143  if (downloader_cat.is_debug()) {
2144  downloader_cat.debug()
2145  << _NOTIFY_HTTP_CHANNEL_ID
2146  << "resetting, not in skip-body mode.\n";
2147  }
2148  reset_to_new();
2149  return false;
2150  }
2151 
2152  string line;
2153  std::getline(*_body_stream, line);
2154  while (!_body_stream->fail() && !_body_stream->eof()) {
2155  if (downloader_cat.is_spam()) {
2156  downloader_cat.spam()
2157  << _NOTIFY_HTTP_CHANNEL_ID
2158  << "skip: " << line << "\n";
2159  }
2160  std::getline(*_body_stream, line);
2161  }
2162 
2163  if (!_body_stream->is_closed()) {
2164  // There's more to come later.
2165  return true;
2166  }
2167 
2168  reset_body_stream();
2169 
2170  // This should have been set by the call to finished_body(), above.
2171  nassertr(_state != S_reading_body, false);
2172  return false;
2173 }
2174 
2175 /**
2176  * In this state we have completely read (or skipped over) the body of the
2177  * response. We should continue skipping past the trailer following the body.
2178  *
2179  * Not all bodies come with trailers; in particular, the "identity" transfer
2180  * encoding does not include a trailer. It is therefore the responsibility of
2181  * the IdentityStreamBuf or ChunkedStreamBuf to set the state appropriately to
2182  * either S_read_body or S_read_trailer following the completion of the body.
2183  */
2184 bool HTTPChannel::
2185 run_read_body() {
2186  if (will_close_connection()) {
2187  // If the socket will close anyway, no point in skipping past the previous
2188  // body; just reset.
2189  if (downloader_cat.is_debug()) {
2190  downloader_cat.debug()
2191  << _NOTIFY_HTTP_CHANNEL_ID
2192  << "resetting to read body; server would close anyway.\n";
2193  }
2194  reset_to_new();
2195  return false;
2196  }
2197  // Skip the trailer following the recently-read body.
2198 
2199  string line;
2200  if (!server_getline(line)) {
2201  return true;
2202  }
2203  while (!line.empty()) {
2204  if (!server_getline(line)) {
2205  return true;
2206  }
2207  }
2208 
2209  _state = S_read_trailer;
2210  return false;
2211 }
2212 
2213 /**
2214  * In this state we have completely read the body and the trailer. This state
2215  * is simply a pass-through back to S_ready.
2216  */
2217 bool HTTPChannel::
2218 run_read_trailer() {
2219  if (will_close_connection()) {
2220  // If the socket will close anyway, no point in skipping past the previous
2221  // body; just reset.
2222  if (downloader_cat.is_debug()) {
2223  downloader_cat.debug()
2224  << _NOTIFY_HTTP_CHANNEL_ID
2225  << "resetting to read trailer; server would close anyway.\n";
2226  }
2227  reset_to_new();
2228  return false;
2229  }
2230 
2231  _state = S_ready;
2232  return false;
2233 }
2234 
2235 /**
2236  * After the headers, etc. have been read, this streams the download to the
2237  * named file.
2238  */
2239 bool HTTPChannel::
2240 run_download_to_file() {
2241  nassertr(_body_stream != nullptr && _owns_body_stream, false);
2242 
2243  bool do_throttle = _wanted_nonblocking && _download_throttle;
2244 
2245  static const size_t buffer_size = 4096;
2246  char buffer[buffer_size];
2247 
2248  size_t remaining_this_pass = buffer_size;
2249  if (do_throttle) {
2250  remaining_this_pass = _bytes_per_update;
2251  }
2252 
2253  _body_stream->read(buffer, min(buffer_size, remaining_this_pass));
2254  size_t count = _body_stream->gcount();
2255  while (count != 0) {
2256  _download_to_stream->write(buffer, count);
2257  _bytes_downloaded += count;
2258  if (do_throttle) {
2259  nassertr(count <= remaining_this_pass, false);
2260  remaining_this_pass -= count;
2261  if (remaining_this_pass == 0) {
2262  // That's enough for now.
2263  return true;
2264  }
2265  }
2266 
2267  thread_consider_yield();
2268  _body_stream->read(buffer, min(buffer_size, remaining_this_pass));
2269  count = _body_stream->gcount();
2270  }
2271 
2272  if (_download_to_stream->fail()) {
2273  downloader_cat.warning()
2274  << _NOTIFY_HTTP_CHANNEL_ID
2275  << "Error writing to " << _download_to_filename << "\n";
2276  _status_entry._status_code = SC_download_write_error;
2277  _state = S_failure;
2278  reset_download_to();
2279  return false;
2280  }
2281 
2282  _download_to_stream->flush();
2283 
2284  if (_body_stream->is_closed()) {
2285  // Done.
2286  reset_body_stream();
2287  close_download_stream();
2288  _started_download = false;
2289  return false;
2290  } else {
2291  // More to come.
2292  return true;
2293  }
2294 }
2295 
2296 /**
2297  * After the headers, etc. have been read, this streams the download to the
2298  * specified Ramfile object.
2299  */
2300 bool HTTPChannel::
2301 run_download_to_ram() {
2302  nassertr(_body_stream != nullptr && _owns_body_stream, false);
2303  nassertr(_download_to_ramfile != nullptr, false);
2304 
2305  bool do_throttle = _wanted_nonblocking && _download_throttle;
2306 
2307  static const size_t buffer_size = 4096;
2308  char buffer[buffer_size];
2309 
2310  size_t remaining_this_pass = buffer_size;
2311  if (do_throttle) {
2312  remaining_this_pass = _bytes_per_update;
2313  }
2314 
2315  _body_stream->read(buffer, min(buffer_size, remaining_this_pass));
2316  size_t count = _body_stream->gcount();
2317  while (count != 0) {
2318  _download_to_ramfile->_data += string(buffer, count);
2319  _bytes_downloaded += count;
2320  if (do_throttle) {
2321  nassertr(count <= remaining_this_pass, false);
2322  remaining_this_pass -= count;
2323  if (remaining_this_pass == 0) {
2324  // That's enough for now.
2325  return true;
2326  }
2327  }
2328 
2329  thread_consider_yield();
2330  _body_stream->read(buffer, min(buffer_size, remaining_this_pass));
2331  count = _body_stream->gcount();
2332  }
2333 
2334  if (_body_stream->is_closed()) {
2335  // Done.
2336  reset_body_stream();
2337  close_download_stream();
2338  _started_download = false;
2339  return false;
2340  } else {
2341  // More to come.
2342  return true;
2343  }
2344 }
2345 
2346 /**
2347  * After the headers, etc. have been read, this streams the download to the
2348  * named file.
2349  */
2350 bool HTTPChannel::
2351 run_download_to_stream() {
2352  nassertr(_body_stream != nullptr && _owns_body_stream, false);
2353 
2354  bool do_throttle = _wanted_nonblocking && _download_throttle;
2355 
2356  static const size_t buffer_size = 4096;
2357  char buffer[buffer_size];
2358 
2359  size_t remaining_this_pass = buffer_size;
2360  if (do_throttle) {
2361  remaining_this_pass = _bytes_per_update;
2362  }
2363 
2364  _body_stream->read(buffer, min(buffer_size, remaining_this_pass));
2365  size_t count = _body_stream->gcount();
2366  while (count != 0) {
2367  _download_to_stream->write(buffer, count);
2368  _bytes_downloaded += count;
2369  if (do_throttle) {
2370  nassertr(count <= remaining_this_pass, false);
2371  remaining_this_pass -= count;
2372  if (remaining_this_pass == 0) {
2373  // That's enough for now.
2374  return true;
2375  }
2376  }
2377 
2378  thread_consider_yield();
2379  _body_stream->read(buffer, min(buffer_size, remaining_this_pass));
2380  count = _body_stream->gcount();
2381  }
2382 
2383  if (_download_to_stream->fail()) {
2384  downloader_cat.warning()
2385  << _NOTIFY_HTTP_CHANNEL_ID
2386  << "Error writing to stream\n";
2387  _status_entry._status_code = SC_download_write_error;
2388  _state = S_failure;
2389  reset_download_to();
2390  return false;
2391  }
2392 
2393  _download_to_stream->flush();
2394 
2395  if (_body_stream->is_closed()) {
2396  // Done.
2397  reset_body_stream();
2398  close_download_stream();
2399  _started_download = false;
2400  return false;
2401  } else {
2402  // More to come.
2403  return true;
2404  }
2405 }
2406 
2407 
2408 /**
2409  * Begins a new document request to the server, throwing away whatever request
2410  * was currently pending if necessary.
2411  */
2412 void HTTPChannel::
2413 begin_request(HTTPEnum::Method method, const DocumentSpec &url,
2414  const string &body, bool nonblocking,
2415  size_t first_byte, size_t last_byte) {
2416 
2417  downloader_cat.info()
2418  << _NOTIFY_HTTP_CHANNEL_ID
2419  << "begin " << method << " " << url << "\n";
2420 
2421  reset_for_new_request();
2422 
2423  _wanted_nonblocking = nonblocking;
2424 #if defined(HAVE_THREADS) && defined(SIMPLE_THREADS)
2425  // In the presence of SIMPLE_THREADS, we always use non-blocking IO. We
2426  // simulate blocking by yielding the thread.
2427  nonblocking = true;
2428 #endif
2429 
2430  // Get the set of proxies that are appropriate for this URL.
2431  _proxies.clear();
2432  _proxy_next_index = 0;
2433  if (get_allow_proxy()) {
2434  _client->get_proxies_for_url(url.get_url(), _proxies);
2435  }
2436 
2437  // If we still have a live connection to a proxy that is on the list, that
2438  // proxy should be moved immediately to the front of the list (to minimize
2439  // restarting connections unnecessarily).
2440  if (!_bio.is_null() && !_proxies.empty() && !_proxy.empty()) {
2441  Proxies::iterator pi = find(_proxies.begin(), _proxies.end(), _proxy);
2442  if (pi != _proxies.end()) {
2443  _proxies.erase(pi);
2444  _proxies.insert(_proxies.begin(), _proxy);
2445  }
2446  }
2447 
2448  URLSpec new_proxy;
2449  if (_proxy_next_index < _proxies.size()) {
2450  new_proxy = _proxies[_proxy_next_index];
2451  _proxy_next_index++;
2452  }
2453 
2454  // Changing the proxy is grounds for dropping the old connection, if any.
2455  if (_proxy != new_proxy) {
2456  _proxy = new_proxy;
2457  _proxy_auth = nullptr;
2458  if (downloader_cat.is_debug()) {
2459  downloader_cat.debug()
2460  << _NOTIFY_HTTP_CHANNEL_ID
2461  << "resetting to change proxy to " << _proxy << "\n";
2462  }
2463  reset_to_new();
2464  }
2465 
2466  // Ditto with changing the nonblocking state.
2467  if (_nonblocking != nonblocking) {
2468  _nonblocking = nonblocking;
2469  if (downloader_cat.is_debug()) {
2470  downloader_cat.debug()
2471  << _NOTIFY_HTTP_CHANNEL_ID
2472  << "resetting to change nonblocking state to " << _nonblocking << ".\n";
2473  }
2474  reset_to_new();
2475  }
2476 
2477  reset_url(_request.get_url(), url.get_url());
2478  _request = url;
2479  _document_spec = DocumentSpec();
2480  _method = method;
2481  _body = body;
2482 
2483  // An https-style request means we'll need to establish an SSL connection.
2484  _want_ssl = _request.get_url().is_ssl();
2485 
2486  _first_byte_requested = first_byte;
2487  _last_byte_requested = last_byte;
2488  _connect_count = 0;
2489 
2490  reconsider_proxy();
2491 
2492  // Reset from whatever previous request might still be pending.
2493  if (_request.get_url().get_scheme() == "file") {
2494  // A "file" URL just means we're reading a raw file. This only supports
2495  // actual disk files, not the VFS, because we use a BIO_new_file()
2496  // underneath this.
2497  reset_to_new();
2498  _bio = new BioPtr(_request.get_url());
2499  if (_bio->get_bio() != nullptr) {
2500  // Successfully opened the file.
2501  _source = new BioStreamPtr(new BioStream(_bio));
2502  _status_entry._status_code = 200;
2503  _state = S_start_direct_file_read;
2504 
2505  // Get the file size.
2506  FILE *fp = nullptr;
2507  BIO_get_fp(_bio->get_bio(), &fp);
2508  if (fp != nullptr) {
2509  if (fseek(fp, 0, SEEK_END) == 0) {
2510  _file_size = ftell(fp);
2511  _got_file_size = true;
2512  fseek(fp, 0, SEEK_SET);
2513  }
2514  }
2515 
2516  } else {
2517  // Couldn't read the file.
2518  OpenSSLWrapper::get_global_ptr()->notify_ssl_errors();
2519  _status_entry._status_code = SC_no_connection;
2520  _state = S_failure;
2521  }
2522 
2523  } else {
2524  // We're reading a normal network URL.
2525  if (_state == S_failure || (_state < S_read_header && _state != S_ready)) {
2526  if (downloader_cat.is_debug()) {
2527  downloader_cat.debug()
2528  << _NOTIFY_HTTP_CHANNEL_ID
2529  << "resetting to clear previous request.\n";
2530  }
2531  reset_to_new();
2532 
2533  } else if (TrueClock::get_global_ptr()->get_short_time() - _last_run_time >= _idle_timeout) {
2534  if (downloader_cat.is_debug()) {
2535  downloader_cat.debug()
2536  << _NOTIFY_HTTP_CHANNEL_ID
2537  << "resetting old connection: "
2538  << TrueClock::get_global_ptr()->get_short_time() - _last_run_time
2539  << " s old.\n";
2540  }
2541  reset_to_new();
2542 
2543  } else if (_state == S_read_header) {
2544  // Roll one step forwards to start skipping past the previous body.
2545  _state = S_begin_body;
2546  }
2547  }
2548 
2549  if (_method == HTTPEnum::M_connect) {
2550  _done_state = S_ready;
2551  } else {
2552  _done_state = S_read_header;
2553  }
2554 }
2555 
2556 /**
2557  * Reevaluates the flags and strings that are computed based on the particular
2558  * proxy we are attempting to connect to. This should be called when we
2559  * initiate a request, and also whenever we change proxies while processing a
2560  * request.
2561  */
2562 void HTTPChannel::
2563 reconsider_proxy() {
2564  _proxy_tunnel_now = false;
2565  _proxy_serves_document = false;
2566 
2567  if (!_proxy.empty()) {
2568  // If the user insists we always tunnel through a proxy, or if we're
2569  // opening an SSL connection, or the user has explicitly asked for a
2570  // direct connection of some kind, or if we have a SOCKS-style proxy; each
2571  // of these demands a tunnel through the proxy to speak directly to the
2572  // http server.
2573  _proxy_tunnel_now =
2574  (get_proxy_tunnel() || _want_ssl ||
2575  _method == HTTPEnum::M_connect || _proxy.get_scheme() == "socks");
2576 
2577  // Otherwise (but we still have a proxy), then we ask the proxy to hand us
2578  // the document.
2579  _proxy_serves_document = !_proxy_tunnel_now;
2580  }
2581 
2582  make_header();
2583  make_request_text();
2584 
2585  if (_proxy_tunnel_now) {
2586  // Maybe we need to tunnel through the proxy to connect to the server
2587  // directly.
2588  ostringstream request;
2589  request
2590  << "CONNECT " << _request.get_url().get_server_and_port()
2591  << " " << _client->get_http_version_string() << "\r\n";
2592  if (_client->get_http_version() >= HTTPEnum::HV_11) {
2593  request
2594  << "Host: " << _request.get_url().get_server_and_port() << "\r\n";
2595  }
2596  _proxy_header = request.str();
2597  make_proxy_request_text();
2598 
2599  } else {
2600  _proxy_header = string();
2601  _proxy_request_text = string();
2602  }
2603 }
2604 
2605 
2606 /**
2607  * Resets the internal state variables in preparation for beginning a new
2608  * request.
2609  */
2610 void HTTPChannel::
2611 reset_for_new_request() {
2612  if (downloader_cat.is_spam()) {
2613  downloader_cat.spam()
2614  << _NOTIFY_HTTP_CHANNEL_ID
2615  << "reset_for_new_request.\n";
2616  }
2617 
2618  reset_download_to();
2619  reset_body_stream();
2620 
2621  _last_status_code = 0;
2622  _status_entry = StatusEntry();
2623 
2624  _response_type = RT_none;
2625  _redirect_trail.clear();
2626  _bytes_downloaded = 0;
2627  _bytes_requested = 0;
2628 }
2629 
2630 /**
2631  * This is called by the body reading classes--ChunkedStreamBuf and
2632  * IdentityStreamBuf--when they have finished reading the body. It advances
2633  * the state appropriately.
2634  *
2635  * has_trailer should be set true if the body type has an associated trailer
2636  * which should be read or skipped, or false if there is no trailer.
2637  */
2638 void HTTPChannel::
2639 finished_body(bool has_trailer) {
2640  if (will_close_connection() && _download_dest == DD_none) {
2641  if (downloader_cat.is_debug()) {
2642  downloader_cat.debug()
2643  << _NOTIFY_HTTP_CHANNEL_ID
2644  << "resetting to finish body; server would close anyway.\n";
2645  }
2646  reset_to_new();
2647 
2648  } else {
2649  if (has_trailer) {
2650  _state = HTTPChannel::S_read_body;
2651  } else {
2652  _state = HTTPChannel::S_read_trailer;
2653  }
2654  }
2655 }
2656 
2657 /**
2658  * If a download has been requested, opens the file on disk (or prepares the
2659  * RamFile or stream) and seeks within it to the appropriate
2660  * _first_byte_delivered position, so that downloaded bytes will be written to
2661  * the appropriate point within the file. Returns true if the starting
2662  * position is valid, false otherwise (in which case the state is set to
2663  * S_failure).
2664  */
2665 bool HTTPChannel::
2666 open_download_file() {
2667  _subdocument_resumes = (_subdocument_resumes && _first_byte_delivered != 0);
2668 
2669  if (_download_dest == DD_file) {
2671  _download_to_stream = vfs->open_write_file(_download_to_filename, false, !_subdocument_resumes);
2672  if (_download_to_stream == nullptr) {
2673  downloader_cat.info()
2674  << _NOTIFY_HTTP_CHANNEL_ID
2675  << "Could not open " << _download_to_filename << " for writing.\n";
2676  _status_entry._status_code = SC_download_open_error;
2677  _state = S_failure;
2678  return false;
2679  }
2680  }
2681 
2682  if (_subdocument_resumes) {
2683  if (_download_dest == DD_file) {
2684  // Windows doesn't complain if you try to seek past the end of file--it
2685  // happily appends enough zero bytes to make the difference. Blecch.
2686  // That means we need to get the file size first to check it ourselves.
2687  _download_to_stream->seekp(0, std::ios::end);
2688  if (_first_byte_delivered > (size_t)_download_to_stream->tellp()) {
2689  downloader_cat.info()
2690  << _NOTIFY_HTTP_CHANNEL_ID
2691  << "Invalid starting position of byte " << _first_byte_delivered
2692  << " within " << _download_to_filename << " (which has "
2693  << _download_to_stream->tellp() << " bytes)\n";
2694  close_download_stream();
2695  _status_entry._status_code = SC_download_invalid_range;
2696  _state = S_failure;
2697  return false;
2698  }
2699 
2700  _download_to_stream->seekp(_first_byte_delivered);
2701 
2702  } else if (_download_dest == DD_ram) {
2703  if (_first_byte_delivered > _download_to_ramfile->_data.length()) {
2704  downloader_cat.info()
2705  << _NOTIFY_HTTP_CHANNEL_ID
2706  << "Invalid starting position of byte " << _first_byte_delivered
2707  << " within Ramfile (which has "
2708  << _download_to_ramfile->_data.length() << " bytes)\n";
2709  close_download_stream();
2710  _status_entry._status_code = SC_download_invalid_range;
2711  _state = S_failure;
2712  return false;
2713  }
2714 
2715  if (_first_byte_delivered == 0) {
2716  _download_to_ramfile->_data = string();
2717  } else {
2718  _download_to_ramfile->_data =
2719  _download_to_ramfile->_data.substr(0, _first_byte_delivered);
2720  }
2721  } else if (_download_dest == DD_stream) {
2722  // Windows doesn't complain if you try to seek past the end of file--it
2723  // happily appends enough zero bytes to make the difference. Blecch.
2724  // That means we need to get the file size first to check it ourselves.
2725  _download_to_stream->seekp(0, std::ios::end);
2726  if (_first_byte_delivered > (size_t)_download_to_stream->tellp()) {
2727  downloader_cat.info()
2728  << _NOTIFY_HTTP_CHANNEL_ID
2729  << "Invalid starting position of byte " << _first_byte_delivered
2730  << " within stream (which has "
2731  << _download_to_stream->tellp() << " bytes)\n";
2732  close_download_stream();
2733  _status_entry._status_code = SC_download_invalid_range;
2734  _state = S_failure;
2735  return false;
2736  }
2737 
2738  _download_to_stream->seekp(_first_byte_delivered);
2739  }
2740 
2741  } else {
2742  // If _subdocument_resumes is false, we should be sure to reset to the
2743  // beginning of the file, regardless of the value of
2744  // _first_byte_delivered.
2745  if (_download_dest == DD_file || _download_dest == DD_stream) {
2746  _download_to_stream->seekp(0);
2747  } else if (_download_dest == DD_ram) {
2748  _download_to_ramfile->_data = string();
2749  }
2750  }
2751 
2752  return true;
2753 }
2754 
2755 
2756 /**
2757  * Reads a single line from the server's reply. Returns true if the line is
2758  * successfully retrieved, or false if a complete line has not yet been
2759  * received or if the connection has been closed.
2760  */
2761 bool HTTPChannel::
2762 server_getline(string &str) {
2763  nassertr(!_source.is_null(), false);
2764  int ch = (*_source)->get();
2765  while (ch != EOF && !(*_source)->fail()) {
2766  switch (ch) {
2767  case '\n':
2768  // end-of-line character, we're done.
2769  str = _working_get;
2770  _working_get = string();
2771  {
2772  // Trim trailing whitespace. We're not required to do this per the
2773  // HTTP spec, but let's be generous.
2774  size_t p = str.length();
2775  while (p > 0 && isspace(str[p - 1])) {
2776  --p;
2777  }
2778  str = str.substr(0, p);
2779  }
2780  if (downloader_cat.is_spam()) {
2781  downloader_cat.spam()
2782  << _NOTIFY_HTTP_CHANNEL_ID
2783  << "recv: " << str << "\n";
2784  }
2785  return true;
2786 
2787  case '\r':
2788  // Ignore CR characters.
2789  break;
2790 
2791  default:
2792  _working_get += (char)ch;
2793  }
2794  ch = (*_source)->get();
2795  }
2796 
2797  check_socket();
2798  return false;
2799 }
2800 
2801 /**
2802  * Reads a line from the server's reply. If the server disconnects or times
2803  * out before sending a reply, moves on to the next proxy server (or sets
2804  * failure mode) and returns false; otherwise, returns true.
2805  */
2806 bool HTTPChannel::
2807 server_getline_failsafe(string &str) {
2808  if (!server_getline(str)) {
2809  if (_bio.is_null()) {
2810  // Huh, the server hung up on us as soon as we tried to connect.
2811  if (_response_type == RT_hangup) {
2812  // This was our second immediate hangup in a row. Give up.
2813  _status_entry._status_code = SC_lost_connection;
2814  _state = S_try_next_proxy;
2815 
2816  } else {
2817  // Try again, once.
2818  _response_type = RT_hangup;
2819  }
2820 
2821  } else {
2822  double elapsed =
2823  TrueClock::get_global_ptr()->get_short_time() -
2824  _sent_request_time;
2825  if (elapsed > get_http_timeout()) {
2826  // Time to give up.
2827  downloader_cat.info()
2828  << _NOTIFY_HTTP_CHANNEL_ID
2829  << "Timeout waiting for "
2830  << _request.get_url().get_server_and_port()
2831  << " in server_getline_failsafe (" << elapsed
2832  << " seconds elapsed).\n";
2833  _status_entry._status_code = SC_timeout;
2834  _state = S_try_next_proxy;
2835  }
2836  }
2837 
2838  return false;
2839  }
2840  return true;
2841 }
2842 
2843 /**
2844  * Reads a fixed number of bytes from the server's reply. Returns true if the
2845  * indicated number of bytes are successfully retrieved, or false if the
2846  * complete set has not yet been received or if the connection has been
2847  * closed.
2848  */
2849 bool HTTPChannel::
2850 server_get(string &str, size_t num_bytes) {
2851  nassertr(!_source.is_null(), false);
2852  int ch = (*_source)->get();
2853  while (ch != EOF && !(*_source)->fail()) {
2854  _working_get += (char)ch;
2855  if (_working_get.length() >= num_bytes) {
2856  str = _working_get;
2857  _working_get = string();
2858  return true;
2859  }
2860 
2861  ch = (*_source)->get();
2862  }
2863 
2864  check_socket();
2865  return false;
2866 }
2867 
2868 /**
2869  * Reads a fixed number of bytes from the server. If the server disconnects
2870  * or times out before sending a reply, moves on to the next proxy server (or
2871  * sets failure mode) and returns false; otherwise, returns true.
2872  */
2873 bool HTTPChannel::
2874 server_get_failsafe(string &str, size_t num_bytes) {
2875  if (!server_get(str, num_bytes)) {
2876  if (_bio.is_null()) {
2877  // Huh, the server hung up on us as soon as we tried to connect.
2878  if (_response_type == RT_hangup) {
2879  // This was our second immediate hangup in a row. Give up.
2880  _status_entry._status_code = SC_lost_connection;
2881  _state = S_try_next_proxy;
2882 
2883  } else {
2884  // Try again, once.
2885  _response_type = RT_hangup;
2886  }
2887 
2888  } else {
2889  double elapsed =
2890  TrueClock::get_global_ptr()->get_short_time() -
2891  _sent_request_time;
2892  if (elapsed > get_http_timeout()) {
2893  // Time to give up.
2894  downloader_cat.info()
2895  << _NOTIFY_HTTP_CHANNEL_ID
2896  << "Timeout waiting for "
2897  << _request.get_url().get_server_and_port()
2898  << " in server_get_failsafe (" << elapsed
2899  << " seconds elapsed).\n";
2900  _status_entry._status_code = SC_timeout;
2901  _state = S_try_next_proxy;
2902  }
2903  }
2904 
2905  return false;
2906  }
2907  return true;
2908 }
2909 
2910 /**
2911  * Sends a series of lines to the server. Returns true if the buffer is fully
2912  * sent, or false if some of it remains. If this returns false, the function
2913  * must be called again later, passing in the exact same string, until the
2914  * return value is true.
2915  *
2916  * If the secret flag is true, the data is not echoed to the log (even in spam
2917  * mode). This may be desirable if the data may contain binary data, or if it
2918  * may contain passwords etc.
2919  */
2920 bool HTTPChannel::
2921 server_send(const string &str, bool secret) {
2922  nassertr(str.length() > _sent_so_far, true);
2923 
2924  // Use the underlying BIO to write to the server, instead of the BIOStream,
2925  // which would insist on blocking (and might furthermore delay the send due
2926  // to collect-tcp mode being enabled).
2927  size_t bytes_to_send = str.length() - _sent_so_far;
2928  int write_count =
2929  BIO_write(*_bio, str.data() + _sent_so_far, bytes_to_send);
2930 
2931  if (write_count <= 0) {
2932  if (BIO_should_retry(*_bio)) {
2933  // Temporary failure: the pipe is full. Wait till later.
2934  return false;
2935  }
2936  // Oops, the connection has been closed!
2937  if (downloader_cat.is_debug()) {
2938  downloader_cat.debug()
2939  << _NOTIFY_HTTP_CHANNEL_ID
2940  << "Lost connection to server unexpectedly during write.\n";
2941  }
2942  reset_to_new();
2943  return false;
2944  }
2945 
2946  if (downloader_cat.is_spam()) {
2947  downloader_cat.spam()
2948  << _NOTIFY_HTTP_CHANNEL_ID
2949  << "wrote " << write_count << " bytes to " << _bio << "\n";
2950  }
2951 
2952 #ifndef NDEBUG
2953  if (!secret && downloader_cat.is_spam()) {
2954  show_send(str.substr(0, write_count));
2955  }
2956 #endif
2957 
2958  if (write_count < (int)bytes_to_send) {
2959  _sent_so_far += write_count;
2960  return false;
2961  }
2962 
2963  // Buffer completely sent.
2964  _sent_so_far = 0;
2965  return true;
2966 }
2967 
2968 /**
2969  * Parses the first line sent back from an HTTP server or proxy and stores the
2970  * result in _status_code and _http_version, etc. Returns true on success,
2971  * false on invalid response.
2972  */
2973 bool HTTPChannel::
2974 parse_http_response(const string &line) {
2975  // The first line back should include the HTTP version and the result code.
2976  if (line.length() < 5 || line.substr(0, 5) != string("HTTP/")) {
2977  // Not an HTTP response.
2978  _status_entry._status_code = SC_non_http_response;
2979  if (_response_type == RT_non_http) {
2980  // This was our second non-HTTP response in a row. Give up.
2981  _state = S_try_next_proxy;
2982 
2983  } else {
2984  // Maybe we were just in some bad state. Drop the connection and try
2985  // again, once.
2986  if (downloader_cat.is_debug()) {
2987  downloader_cat.debug()
2988  << _NOTIFY_HTTP_CHANNEL_ID
2989  << "got non-HTTP response, resetting.\n";
2990  }
2991  reset_to_new();
2992  _response_type = RT_non_http;
2993  }
2994  return false;
2995  }
2996 
2997  // Split out the first line into its three components.
2998  size_t p = 5;
2999  while (p < line.length() && !isspace(line[p])) {
3000  p++;
3001  }
3002  _http_version_string = line.substr(0, p);
3003  _http_version = HTTPClient::parse_http_version_string(_http_version_string);
3004 
3005  while (p < line.length() && isspace(line[p])) {
3006  p++;
3007  }
3008  size_t q = p;
3009  while (q < line.length() && !isspace(line[q])) {
3010  q++;
3011  }
3012  string status_code = line.substr(p, q - p);
3013  _status_entry._status_code = atoi(status_code.c_str());
3014 
3015  while (q < line.length() && isspace(line[q])) {
3016  q++;
3017  }
3018  _status_entry._status_string = line.substr(q, line.length() - q);
3019 
3020  return true;
3021 }
3022 
3023 /**
3024  * Reads the series of header lines from the server and stores them in
3025  * _headers. Returns true if there is more to read, false when done.
3026  */
3027 bool HTTPChannel::
3028 parse_http_header() {
3029  string line;
3030  if (!server_getline(line)) {
3031  return true;
3032  }
3033 
3034  while (!line.empty()) {
3035  if (isspace(line[0])) {
3036  // If the line begins with a space, that continues the previous field.
3037  size_t p = 0;
3038  while (p < line.length() && isspace(line[p])) {
3039  p++;
3040  }
3041  _current_field_value += line.substr(p - 1);
3042 
3043  } else {
3044  // If the line does not begin with a space, that defines a new field.
3045  if (!_current_field_name.empty()) {
3046  store_header_field(_current_field_name, _current_field_value);
3047  _current_field_value = string();
3048  }
3049 
3050  size_t colon = line.find(':');
3051  if (colon != string::npos) {
3052  _current_field_name = downcase(line.substr(0, colon));
3053  size_t p = colon + 1;
3054  while (p < line.length() && isspace(line[p])) {
3055  p++;
3056  }
3057  _current_field_value = line.substr(p);
3058  }
3059  }
3060 
3061  if (!server_getline(line)) {
3062  return true;
3063  }
3064  }
3065 
3066  // After reading an empty line, we're done with the headers.
3067  if (!_current_field_name.empty()) {
3068  store_header_field(_current_field_name, _current_field_value);
3069  _current_field_value = string();
3070  }
3071 
3072  return false;
3073 }
3074 
3075 /**
3076  * Interprets the "Content-Range" header in the reply, and fills in
3077  * _first_byte_delivered and _last_byte_delivered appropriately if the header
3078  * response can be understood.
3079  */
3080 bool HTTPChannel::
3081 parse_content_range(const string &content_range) {
3082  // First, get the units indication.
3083  size_t p = 0;
3084  while (p < content_range.length() && !isspace(content_range[p])) {
3085  p++;
3086  }
3087 
3088  string units = content_range.substr(0, p);
3089  while (p < content_range.length() && isspace(content_range[p])) {
3090  p++;
3091  }
3092 
3093  if (units == "bytes") {
3094  const char *c_str = content_range.c_str();
3095  char *endptr;
3096  if (p < content_range.length() && isdigit(content_range[p])) {
3097  long first_byte = strtol(c_str + p, &endptr, 10);
3098  p = endptr - c_str;
3099  if (p < content_range.length() && content_range[p] == '-') {
3100  p++;
3101  if (p < content_range.length() && isdigit(content_range[p])) {
3102  long last_byte = strtol(c_str + p, &endptr, 10);
3103  p = endptr - c_str;
3104 
3105  if (last_byte >= first_byte) {
3106  _first_byte_delivered = first_byte;
3107  _last_byte_delivered = last_byte;
3108  return true;
3109  }
3110  }
3111  }
3112  }
3113  }
3114 
3115  // Invalid or unhandled response.
3116  return false;
3117 }
3118 
3119 
3120 /**
3121  * Checks whether the connection to the server has been closed after a failed
3122  * read. If it has, issues a warning and calls reset_to_new().
3123  */
3124 void HTTPChannel::
3125 check_socket() {
3126  nassertv(!_source.is_null());
3127  if ((*_source)->is_closed()) {
3128  if (downloader_cat.is_debug()) {
3129  downloader_cat.debug()
3130  << _NOTIFY_HTTP_CHANNEL_ID
3131  << "Lost connection to server unexpectedly during read.\n";
3132  }
3133  reset_to_new();
3134  }
3135 }
3136 
3137 /*
3138  Certificate verify error codes:
3139 
3140 0 X509_V_OK: ok
3141 
3142  the operation was successful.
3143 
3144 2 X509_V_ERR_UNABLE_TO_GET_ISSUER_CERT: unable to get issuer certificate
3145 
3146  the issuer certificate could not be found: this occurs if the
3147  issuer certificate of an untrusted certificate cannot be found.
3148 
3149 3 X509_V_ERR_UNABLE_TO_GET_CRL unable to get certificate CRL
3150 
3151  the CRL of a certificate could not be found. Unused.
3152 
3153 4 X509_V_ERR_UNABLE_TO_DECRYPT_CERT_SIGNATURE: unable to decrypt
3154 certificate's signature
3155 
3156  the certificate signature could not be decrypted. This means that
3157  the actual signature value could not be determined rather than it
3158  not matching the expected value, this is only meaningful for RSA
3159  keys.
3160 
3161 5 X509_V_ERR_UNABLE_TO_DECRYPT_CRL_SIGNATURE: unable to decrypt CRL's signature
3162 
3163  the CRL signature could not be decrypted: this means that the
3164  actual signature value could not be determined rather than it not
3165  matching the expected value. Unused.
3166 
3167 6 X509_V_ERR_UNABLE_TO_DECODE_ISSUER_PUBLIC_KEY: unable to decode
3168 issuer public key
3169 
3170  the public key in the certificate SubjectPublicKeyInfo could not
3171  be read.
3172 
3173 7 X509_V_ERR_CERT_SIGNATURE_FAILURE: certificate signature failure
3174 
3175  the signature of the certificate is invalid.
3176 
3177 8 X509_V_ERR_CRL_SIGNATURE_FAILURE: CRL signature failure
3178 
3179  the signature of the certificate is invalid. Unused.
3180 
3181 9 X509_V_ERR_CERT_NOT_YET_VALID: certificate is not yet valid
3182 
3183  the certificate is not yet valid: the notBefore date is after the
3184  current time.
3185 
3186 10 X509_V_ERR_CERT_HAS_EXPIRED: certificate has expired
3187 
3188  the certificate has expired: that is the notAfter date is before
3189  the current time.
3190 
3191 11 X509_V_ERR_CRL_NOT_YET_VALID: CRL is not yet valid
3192 
3193  the CRL is not yet valid. Unused.
3194 
3195 12 X509_V_ERR_CRL_HAS_EXPIRED: CRL has expired
3196 
3197  the CRL has expired. Unused.
3198 
3199 13 X509_V_ERR_ERROR_IN_CERT_NOT_BEFORE_FIELD: format error in
3200 certificate's notBefore field
3201 
3202  the certificate notBefore field contains an invalid time.
3203 
3204 14 X509_V_ERR_ERROR_IN_CERT_NOT_AFTER_FIELD: format error in
3205 certificate's notAfter field
3206 
3207  the certificate notAfter field contains an invalid time.
3208 
3209 15 X509_V_ERR_ERROR_IN_CRL_LAST_UPDATE_FIELD: format error in CRL's
3210 lastUpdate field
3211 
3212  the CRL lastUpdate field contains an invalid time. Unused.
3213 
3214 16 X509_V_ERR_ERROR_IN_CRL_NEXT_UPDATE_FIELD: format error in CRL's
3215 nextUpdate field
3216 
3217  the CRL nextUpdate field contains an invalid time. Unused.
3218 
3219 17 X509_V_ERR_OUT_OF_MEM: out of memory
3220 
3221  an error occurred trying to allocate memory. This should never
3222  happen.
3223 
3224 18 X509_V_ERR_DEPTH_ZERO_SELF_SIGNED_CERT: self signed certificate
3225 
3226  the passed certificate is self signed and the same certificate
3227  cannot be found in the list of trusted certificates.
3228 
3229 19 X509_V_ERR_SELF_SIGNED_CERT_IN_CHAIN: self signed certificate in
3230 certificate chain
3231 
3232  the certificate chain could be built up using the untrusted
3233  certificates but the root could not be found locally.
3234 
3235 20 X509_V_ERR_UNABLE_TO_GET_ISSUER_CERT_LOCALLY: unable to get local
3236 issuer certificate
3237 
3238  the issuer certificate of a locally looked up certificate could
3239  not be found. This normally means the list of trusted certificates
3240  is not complete.
3241 
3242 21 X509_V_ERR_UNABLE_TO_VERIFY_LEAF_SIGNATURE: unable to verify the
3243 first certificate
3244 
3245  no signatures could be verified because the chain contains only
3246  one certificate and it is not self signed.
3247 
3248 22 X509_V_ERR_CERT_CHAIN_TOO_LONG: certificate chain too long
3249 
3250  the certificate chain length is greater than the supplied maximum
3251  depth. Unused.
3252 
3253 23 X509_V_ERR_CERT_REVOKED: certificate revoked
3254 
3255  the certificate has been revoked. Unused.
3256 
3257 24 X509_V_ERR_INVALID_CA: invalid CA certificate
3258 
3259  a CA certificate is invalid. Either it is not a CA or its
3260  extensions are not consistent with the supplied purpose.
3261 
3262 25 X509_V_ERR_PATH_LENGTH_EXCEEDED: path length constraint exceeded
3263 
3264  the basicConstraints pathlength parameter has been exceeded.
3265 
3266 26 X509_V_ERR_INVALID_PURPOSE: unsupported certificate purpose
3267 
3268  the supplied certificate cannot be used for the specified purpose.
3269 
3270 27 X509_V_ERR_CERT_UNTRUSTED: certificate not trusted
3271 
3272  the root CA is not marked as trusted for the specified purpose.
3273 
3274 28 X509_V_ERR_CERT_REJECTED: certificate rejected
3275 
3276  the root CA is marked to reject the specified purpose.
3277 
3278 29 X509_V_ERR_SUBJECT_ISSUER_MISMATCH: subject issuer mismatch
3279 
3280  the current candidate issuer certificate was rejected because its
3281  subject name did not match the issuer name of the current
3282  certificate. Only displayed when the -issuer_checks option is set.
3283 
3284 30 X509_V_ERR_AKID_SKID_MISMATCH: authority and subject key identifier
3285 mismatch
3286 
3287  the current candidate issuer certificate was rejected because its
3288  subject key identifier was present and did not match the authority
3289  key identifier current certificate. Only displayed when the
3290  -issuer_checks option is set.
3291 
3292 31 X509_V_ERR_AKID_ISSUER_SERIAL_MISMATCH: authority and issuer serial
3293 number mismatch
3294 
3295  the current candidate issuer certificate was rejected because its
3296  issuer name and serial number was present and did not match the
3297  authority key identifier of the current certificate. Only
3298  displayed when the -issuer_checks option is set.
3299 
3300 32 X509_V_ERR_KEYUSAGE_NO_CERTSIGN:key usage does not include
3301 certificate signing
3302 
3303  the current candidate issuer certificate was rejected because its
3304  keyUsage extension does not permit certificate signing.
3305 
3306 50 X509_V_ERR_APPLICATION_VERIFICATION: application verification failure
3307 
3308  an application specific error. Unused.
3309 
3310 */
3311 
3312 /**
3313  * Checks to see if the indicated certificate is on the pre-approved list for
3314  * the current server.
3315  *
3316  * If the full cert itself (including its key) is on the pre-approved list,
3317  * sets both cert_preapproved and cert_name_preapproved to true.
3318  *
3319  * If the full cert is not on the pre-approved list, but its name matches a
3320  * name on the pre-approved list, sets cert_name_preapproved to true, and
3321  * cert_preapproved to false.
3322  *
3323  * Otherwise, sets both values to false. This doesn't mean the cert is
3324  * necessarily invalid, just that it wasn't on the pre-approved list (which is
3325  * usually empty anyway).
3326  */
3327 void HTTPChannel::
3328 check_preapproved_server_certificate(X509 *cert, bool &cert_preapproved,
3329  bool &cert_name_preapproved) const {
3330  return _client->check_preapproved_server_certificate(_request.get_url(),
3331  cert, cert_preapproved,
3332  cert_name_preapproved);
3333 }
3334 
3335 /**
3336  * Returns true if the name in the cert matches the hostname of the server,
3337  * false otherwise.
3338  */
3339 bool HTTPChannel::
3340 validate_server_name(X509 *cert) {
3341  string hostname = _request.get_url().get_server();
3342 
3343  vector_string cert_names;
3344 
3345  // According to RFC 2818, we should check the DNS name(s) in the
3346  // subjectAltName extension first, if that extension exists.
3347  STACK_OF(GENERAL_NAME) *subject_alt_names =
3348  (STACK_OF(GENERAL_NAME) *)X509_get_ext_d2i(cert, NID_subject_alt_name, nullptr, nullptr);
3349  if (subject_alt_names != nullptr) {
3350  int num_alts = sk_GENERAL_NAME_num(subject_alt_names);
3351  for (int i = 0; i < num_alts; ++i) {
3352  // Get the ith alt name.
3353  const GENERAL_NAME *alt_name =
3354  sk_GENERAL_NAME_value(subject_alt_names, i);
3355 
3356  if (alt_name->type == GEN_DNS) {
3357  char *buffer = nullptr;
3358  int len = ASN1_STRING_to_UTF8((unsigned char**)&buffer,
3359  alt_name->d.ia5);
3360  if (len > 0) {
3361  cert_names.push_back(string(buffer, len));
3362  }
3363  if (buffer != nullptr) {
3364  OPENSSL_free(buffer);
3365  }
3366  }
3367  }
3368  }
3369 
3370  if (cert_names.empty()) {
3371  // If there were no DNS names, use the common name instead.
3372 
3373  X509_NAME *xname = X509_get_subject_name(cert);
3374  if (xname != nullptr) {
3375  string common_name = get_x509_name_component(xname, NID_commonName);
3376  cert_names.push_back(common_name);
3377  }
3378  }
3379 
3380  if (cert_names.empty()) {
3381  downloader_cat.info()
3382  << _NOTIFY_HTTP_CHANNEL_ID
3383  << "Server certificate from " << hostname
3384  << " provides no name.\n";
3385  return false;
3386  }
3387 
3388  if (downloader_cat.is_debug()) {
3389  downloader_cat.debug()
3390  << _NOTIFY_HTTP_CHANNEL_ID
3391  << "Server certificate from " << hostname
3392  << " provides name(s):";
3393  vector_string::const_iterator si;
3394  for (si = cert_names.begin(); si != cert_names.end(); ++si) {
3395  const string &cert_name = (*si);
3396  downloader_cat.debug(false)
3397  << " " << cert_name;
3398  }
3399  downloader_cat.debug(false)
3400  << "\n";
3401  }
3402 
3403  // Now validate the names we found. If any of them matches, the cert
3404  // matches.
3405  vector_string::const_iterator si;
3406  for (si = cert_names.begin(); si != cert_names.end(); ++si) {
3407  const string &cert_name = (*si);
3408 
3409  if (match_cert_name(cert_name, hostname)) {
3410  return true;
3411  }
3412  }
3413 
3414  downloader_cat.info()
3415  << _NOTIFY_HTTP_CHANNEL_ID
3416  << "Server certificate from " << hostname
3417  << " provides wrong name(s):";
3418  for (si = cert_names.begin(); si != cert_names.end(); ++si) {
3419  const string &cert_name = (*si);
3420  downloader_cat.info(false)
3421  << " " << cert_name;
3422  }
3423  downloader_cat.info(false)
3424  << "\n";
3425 
3426  return false;
3427 }
3428 
3429 /**
3430  * Returns true if this particular name from the certificate matches the
3431  * indicated hostname, false otherwise.
3432  */
3433 bool HTTPChannel::
3434 match_cert_name(const string &cert_name, const string &hostname) {
3435  // We use GlobPattern to match the name. This isn't quite consistent with
3436  // RFC2818, since it also accepts additional wildcard characters like "?"
3437  // and "[]", but I think it's close enough.
3438 
3439  GlobPattern pattern(cert_name);
3440  pattern.set_case_sensitive(false);
3441  pattern.set_nomatch_chars(".");
3442  return pattern.matches(hostname);
3443 }
3444 
3445 /**
3446  * Returns the indicated component of the X509 name as a string, if defined,
3447  * or empty string if it is not.
3448  */
3449 string HTTPChannel::
3450 get_x509_name_component(X509_NAME *name, int nid) {
3451  ASN1_OBJECT *obj = OBJ_nid2obj(nid);
3452 
3453  if (obj == nullptr) {
3454  // Unknown nid. See opensslobjects.h.
3455  return string();
3456  }
3457 
3458  int i = X509_NAME_get_index_by_OBJ(name, obj, -1);
3459  if (i < 0) {
3460  return string();
3461  }
3462 
3463  ASN1_STRING *data = X509_NAME_ENTRY_get_data(X509_NAME_get_entry(name, i));
3464  return string((char *)data->data, data->length);
3465 }
3466 
3467 /**
3468  * Formats the appropriate GET or POST (or whatever) request to send to the
3469  * server, based on the current _method, _document_spec, _body, and _proxy
3470  * settings.
3471  */
3472 void HTTPChannel::
3473 make_header() {
3474  _proxy_auth = _client->select_auth(_proxy, true, _proxy_realm);
3475  _proxy_username = string();
3476  if (_proxy_auth != nullptr) {
3477  _proxy_realm = _proxy_auth->get_realm();
3478  _proxy_username = _client->select_username(_proxy, true, _proxy_realm);
3479  }
3480 
3481  if (_method == HTTPEnum::M_connect) {
3482  // This method doesn't require an HTTP header at all; we'll just open a
3483  // plain connection. (Except when we're using a proxy; but in that case,
3484  // it's the proxy_header we'll need, not the regular HTTP header.)
3485  _header = string();
3486  return;
3487  }
3488 
3489  _www_auth = _client->select_auth(_request.get_url(), false, _www_realm);
3490  _www_username = string();
3491  if (_www_auth != nullptr) {
3492  _www_realm = _www_auth->get_realm();
3493  _www_username = _client->select_username(_request.get_url(), false, _www_realm);
3494  }
3495 
3496  string request_path;
3497  if (_proxy_serves_document) {
3498  // If we'll be asking the proxy for the document, we need its full URL--
3499  // but we omit the username, which is information just for us.
3500  URLSpec url_no_username = _request.get_url();
3501  url_no_username.set_username(string());
3502  request_path = url_no_username.get_url();
3503 
3504  } else {
3505  // If we'll be asking the server directly for the document, we just want
3506  // its path relative to the server.
3507  request_path = _request.get_url().get_path_and_query();
3508  }
3509 
3510  // HTTP syntax always requires something in the request path. If it is
3511  // empty, put in a star as a placeholder (OPTIONS, for instance, uses this).
3512  if (request_path.empty()) {
3513  request_path = "*";
3514  }
3515 
3516  ostringstream stream;
3517 
3518  stream
3519  << _method << " " << request_path << " "
3520  << _client->get_http_version_string() << "\r\n";
3521 
3522  if (_client->get_http_version() >= HTTPEnum::HV_11) {
3523 
3524  if (_request.get_url().has_port() && _request.get_url().is_default_port()) {
3525  // It appears that some servers (notably gstatic.com) might return a 404
3526  // if you include an explicit port number in with the Host: header, even
3527  // if it is the default port. So, don't include the port number unless
3528  // we need to.
3529  string server = _request.get_url().get_server();
3530  if (server.find(':') != string::npos) {
3531  stream << "Host: [" << server << "]";
3532  } else {
3533  stream << "Host: " << server;
3534  }
3535  } else {
3536  stream << "Host: " << _request.get_url().get_server_and_port();
3537  }
3538  stream << "\r\n";
3539  if (!get_persistent_connection()) {
3540  stream
3541  << "Connection: close\r\n";
3542  }
3543  }
3544 
3545  if (_last_byte_requested != 0) {
3546  stream
3547  << "Range: bytes=" << _first_byte_requested << "-"
3548  << _last_byte_requested << "\r\n";
3549 
3550  } else if (_first_byte_requested != 0) {
3551  stream
3552  << "Range: bytes=" << _first_byte_requested << "-\r\n";
3553  }
3554 
3555  switch (_request.get_request_mode()) {
3556  case DocumentSpec::RM_any:
3557  // No particular request; give us any document that matches the URL.
3558  if (_first_byte_requested != 0) {
3559  // Unless we're requesting a subrange, in which case if the exact
3560  // document matches, retrieve the subrange indicated; otherwise,
3561  // retrieve the entire document.
3562  if (_request.has_tag()) {
3563  stream
3564  << "If-Range: " << _request.get_tag().get_string() << "\r\n";
3565  } else if (_request.has_date()) {
3566  stream
3567  << "If-Range: " << _request.get_date().get_string() << "\r\n";
3568  }
3569  }
3570  break;
3571 
3572  case DocumentSpec::RM_equal:
3573  // Give us only this particular version of the document, or nothing.
3574  if (_request.has_tag()) {
3575  stream
3576  << "If-Match: " << _request.get_tag().get_string() << "\r\n";
3577  }
3578  if (_request.has_date()) {
3579  stream
3580  << "If-Unmodified-Since: " << _request.get_date().get_string()
3581  << "\r\n";
3582  }
3583  break;
3584 
3585  case DocumentSpec::RM_newer:
3586  // Give us anything newer than this document, or nothing.
3587  if (_request.has_tag()) {
3588  stream
3589  << "If-None-Match: " << _request.get_tag().get_string() << "\r\n";
3590  }
3591  if (_request.has_date()) {
3592  stream
3593  << "If-Modified-Since: " << _request.get_date().get_string()
3594  << "\r\n";
3595  }
3596  break;
3597 
3598  case DocumentSpec::RM_equal_or_newer:
3599  // Just don't give us anything older.
3600  if (_request.has_date()) {
3601  // This is a little unreliable: we ask for any document that's been
3602  // modified since one second before our last-modified-date. Who knows
3603  // whether the server will honor this properly.
3604  stream
3605  << "If-Modified-Since: " << (_request.get_date() - 1).get_string()
3606  << "\r\n";
3607  }
3608  break;
3609  }
3610 
3611  switch (_request.get_cache_control()) {
3612  case DocumentSpec::CC_allow_cache:
3613  // Normal, caching behavior.
3614  break;
3615 
3616  case DocumentSpec::CC_revalidate:
3617  // Request the server to revalidate its cache before returning it.
3618  stream
3619  << "Cache-Control: max-age=0\r\n";
3620  break;
3621 
3622  case DocumentSpec::CC_no_cache:
3623  // Request the server to get a fresh copy regardless of its cache.
3624  stream
3625  << "Cache-Control: no-cache\r\n"
3626  << "Pragma: no-cache\r\n";
3627  break;
3628  }
3629 
3630  _client->send_cookies(stream, _request.get_url());
3631 
3632  if (!_body.empty()) {
3633  stream
3634  << "Content-Type: " << _content_type << "\r\n"
3635  << "Content-Length: " << _body.length() << "\r\n";
3636  }
3637 
3638  _header = stream.str();
3639 }
3640 
3641 /**
3642  * Builds the _proxy_request_text string. This is a special request that will
3643  * be sent directly to the proxy prior to the request tailored for the server.
3644  * Generally this is used to open a tunnelling connection for https-over-
3645  * proxy.
3646  */
3647 void HTTPChannel::
3648 make_proxy_request_text() {
3649  _proxy_request_text = _proxy_header;
3650 
3651  if (_proxy_auth != nullptr && !_proxy_username.empty()) {
3652  _proxy_request_text += "Proxy-Authorization: ";
3653  _proxy_request_text +=
3654  _proxy_auth->generate(HTTPEnum::M_connect, _request.get_url().get_server_and_port(),
3655  _proxy_username, _body);
3656  _proxy_request_text += "\r\n";
3657  }
3658 
3659  _proxy_request_text += "\r\n";
3660 }
3661 
3662 /**
3663  * Builds the _request_text string. This is the specific request that will be
3664  * sent to the server this pass, based on the current header and body.
3665  */
3666 void HTTPChannel::
3667 make_request_text() {
3668  _request_text = _header;
3669 
3670  if (_proxy_serves_document &&
3671  _proxy_auth != nullptr && !_proxy_username.empty()) {
3672  _request_text += "Proxy-Authorization: ";
3673  _request_text +=
3674  _proxy_auth->generate(_method, _request.get_url().get_url(), _proxy_username, _body);
3675  _request_text += "\r\n";
3676  }
3677 
3678  if (_www_auth != nullptr && !_www_username.empty()) {
3679  string authorization =
3680  _request_text += "Authorization: ";
3681  _request_text +=
3682  _www_auth->generate(_method, _request.get_url().get_path_and_query(), _www_username, _body);
3683  _request_text += "\r\n";
3684  }
3685 
3686  _request_text += _send_extra_headers;
3687  _request_text += "\r\n";
3688  _request_text += _body;
3689 }
3690 
3691 /**
3692  * Redirects the next connection to the indicated URL (from the previous URL).
3693  * This resets the socket if necessary when we are about to switch servers.
3694  */
3695 void HTTPChannel::
3696 reset_url(const URLSpec &old_url, const URLSpec &new_url) {
3697  // If we change between http and https, we have to reset the connection
3698  // regardless of proxy. Otherwise, we have to drop the connection if the
3699  // server or port changes, unless we're communicating through a proxy.
3700 
3701  if (new_url.get_scheme() != old_url.get_scheme() ||
3702  (_proxy.empty() && (new_url.get_server() != old_url.get_server() ||
3703  new_url.get_port() != old_url.get_port()))) {
3704  if (downloader_cat.is_debug()) {
3705  downloader_cat.debug()
3706  << _NOTIFY_HTTP_CHANNEL_ID
3707  << "resetting for new server "
3708  << new_url.get_server_and_port() << "\n";
3709  }
3710  reset_to_new();
3711  }
3712 }
3713 
3714 /**
3715  * Stores a single name: value pair in the header list, or appends the value
3716  * to the end of the existing value, if the header has been repeated.
3717  */
3718 void HTTPChannel::
3719 store_header_field(const string &field_name, const string &field_value) {
3720  std::pair<Headers::iterator, bool> insert_result =
3721  _headers.insert(Headers::value_type(field_name, field_value));
3722 
3723  if (!insert_result.second) {
3724  // It didn't insert; thus, the field already existed. Append the new
3725  // value.
3726  Headers::iterator hi = insert_result.first;
3727  (*hi).second += ", ";
3728  (*hi).second += field_value;
3729  }
3730 
3731  if (field_name == "set-cookie") {
3732  _client->set_cookie(HTTPCookie(field_value, _request.get_url()));
3733  }
3734 }
3735 
3736 #ifndef NDEBUG
3737 /**
3738  * Writes the outgoing message, one line at a time, to the debugging log.
3739  */
3740 void HTTPChannel::
3741 show_send(const string &message) {
3742  size_t start = 0;
3743  size_t newline = message.find('\n', start);
3744  while (newline != string::npos) {
3745  // Assume every \n is preceded by a \r.
3746  downloader_cat.spam()
3747  << "send: " << message.substr(start, newline - start - 1) << "\n";
3748  start = newline + 1;
3749  newline = message.find('\n', start);
3750  }
3751 
3752  if (start < message.length()) {
3753  downloader_cat.spam()
3754  << "send: " << message.substr(start) << " (no newline)\n";
3755  }
3756 }
3757 #endif // NDEBUG
3758 
3759 /**
3760  * Resets the indication of how the document will be downloaded. This must be
3761  * re-specified after each get_document() (or related) call.
3762  */
3763 void HTTPChannel::
3764 reset_download_to() {
3765  _started_download = false;
3766  close_download_stream();
3767  _download_dest = DD_none;
3768 }
3769 
3770 /**
3771  * Ensures the file opened for receiving the download has been correctly
3772  * closed.
3773  */
3774 void HTTPChannel::
3775 close_download_stream() {
3776  if (_download_to_stream != nullptr) {
3777  _download_to_stream->flush();
3778  if (_download_dest == DD_file) {
3779  VirtualFileSystem::close_write_file(_download_to_stream);
3780  }
3781  }
3782  _download_to_ramfile = nullptr;
3783  _download_to_stream = nullptr;
3784 }
3785 
3786 
3787 /**
3788  * Closes the connection and resets the state to S_new.
3789  */
3790 void HTTPChannel::
3791 reset_to_new() {
3792  if (downloader_cat.is_spam()) {
3793  downloader_cat.spam()
3794  << _NOTIFY_HTTP_CHANNEL_ID
3795  << "reset_to_new.\n";
3796  }
3797 
3798  close_connection();
3799  _state = S_new;
3800 }
3801 
3802 /**
3803  * Clears the _body_stream pointer, if it is set.
3804  */
3805 void HTTPChannel::
3806 reset_body_stream() {
3807  if (_owns_body_stream) {
3808  if (_body_stream != nullptr) {
3809  close_read_body(_body_stream);
3810  nassertv(_body_stream == nullptr && !_owns_body_stream);
3811  }
3812  } else {
3813  _body_stream = nullptr;
3814  }
3815 }
3816 
3817 
3818 /**
3819  * Closes the connection but leaves the _state unchanged.
3820  */
3821 void HTTPChannel::
3822 close_connection() {
3823  reset_body_stream();
3824  _source.clear();
3825  _bio.clear();
3826  _working_get = string();
3827  _sent_so_far = 0;
3828  _read_index++;
3829 }
3830 
3831 /**
3832  * Returns true if status code a is a more useful value (that is, it
3833  * represents a more-nearly successfully connection attempt, or contains more
3834  * information) than b, or false otherwise.
3835  */
3836 bool HTTPChannel::
3837 more_useful_status_code(int a, int b) {
3838  if (a >= 100 && b >= 100) {
3839  // Both represent HTTP responses. Responses from a server (< 1000) are
3840  // better than those from a proxy; we take advantage of the fact that we
3841  // have already added 1000 to proxy responses. Except for 407, so let's
3842  // fix that now.
3843  if (a == 407) {
3844  a += 1000;
3845  }
3846  if (b == 407) {
3847  b += 1000;
3848  }
3849 
3850  // Now just check the series.
3851  int series_a = (a / 100);
3852  int series_b = (b / 100);
3853 
3854  // In general, a lower series is a closer success.
3855  return (series_a < series_b);
3856  }
3857 
3858  if (a < 100 && b < 100) {
3859  // Both represent non-HTTP responses. Here a larger number is better.
3860  return (a > b);
3861  }
3862 
3863  if (a < 100) {
3864  // a is a non-HTTP response, while b is an HTTP response. HTTP is
3865  // generally, better, unless we exceeded SC_http_error_watermark.
3866  return (a > SC_http_error_watermark);
3867  }
3868 
3869  // Exactly the opposite case as above.
3870  return (b < SC_http_error_watermark);
3871 }
3872 
3873 
3874 /**
3875  *
3876  */
3877 ostream &
3878 operator << (ostream &out, HTTPChannel::State state) {
3879 #ifdef NDEBUG
3880  return out << (int)state;
3881 #else
3882  switch (state) {
3883  case HTTPChannel::S_new:
3884  return out << "new";
3885 
3886  case HTTPChannel::S_try_next_proxy:
3887  return out << "try_next_proxy";
3888 
3889  case HTTPChannel::S_connecting:
3890  return out << "connecting";
3891 
3892  case HTTPChannel::S_connecting_wait:
3893  return out << "connecting_wait";
3894 
3895  case HTTPChannel::S_http_proxy_ready:
3896  return out << "http_proxy_ready";
3897 
3898  case HTTPChannel::S_http_proxy_request_sent:
3899  return out << "http_proxy_request_sent";
3900 
3901  case HTTPChannel::S_http_proxy_reading_header:
3902  return out << "http_proxy_reading_header";
3903 
3904  case HTTPChannel::S_socks_proxy_greet:
3905  return out << "socks_proxy_greet";
3906 
3907  case HTTPChannel::S_socks_proxy_greet_reply:
3908  return out << "socks_proxy_greet_reply";
3909 
3910  case HTTPChannel::S_socks_proxy_connect:
3911  return out << "socks_proxy_connect";
3912 
3913  case HTTPChannel::S_socks_proxy_connect_reply:
3914  return out << "socks_proxy_connect_reply";
3915 
3916  case HTTPChannel::S_setup_ssl:
3917  return out << "setup_ssl";
3918 
3919  case HTTPChannel::S_ssl_handshake:
3920  return out << "ssl_handshake";
3921 
3922  case HTTPChannel::S_ready:
3923  return out << "ready";
3924 
3925  case HTTPChannel::S_request_sent:
3926  return out << "request_sent";
3927 
3928  case HTTPChannel::S_reading_header:
3929  return out << "reading_header";
3930 
3931  case HTTPChannel::S_start_direct_file_read:
3932  return out << "start_direct_file_read";
3933 
3934  case HTTPChannel::S_read_header:
3935  return out << "read_header";
3936 
3937  case HTTPChannel::S_begin_body:
3938  return out << "begin_body";
3939 
3940  case HTTPChannel::S_reading_body:
3941  return out << "reading_body";
3942 
3943  case HTTPChannel::S_read_body:
3944  return out << "read_body";
3945 
3946  case HTTPChannel::S_read_trailer:
3947  return out << "read_trailer";
3948 
3949  case HTTPChannel::S_failure:
3950  return out << "failure";
3951  }
3952 
3953  return out << "invalid state(" << (int)state << ")";
3954 #endif // NDEBUG
3955 }
3956 
3957 #endif // HAVE_OPENSSL
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.
static TrueClock * get_global_ptr()
Returns a pointer to the one TrueClock object in the world.
Definition: trueClock.I:68
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.
A container for a URL, e.g.
Definition: urlSpec.h:28
A hierarchy of directories and files that appears to be one continuous file system,...
A container for an "entity tag" from an HTTP server.
Definition: httpEntityTag.h:24
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.
void set_binary()
Indicates that the filename represents a binary file.
Definition: filename.I:414
string downcase(const string &s)
Returns the input string with all uppercase letters converted to lowercase.
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.
This is a convenience class to specialize ConfigVariable as a floating- point type.
get_scheme
Returns the scheme specified by the URL, or empty string if no scheme is specified.
Definition: urlSpec.h:93
The name of a file, such as a texture file or an Egg file.
Definition: filename.h:39
An in-memory buffer specifically designed for downloading files to memory.
Definition: ramfile.h:25
static VirtualFileSystem * get_global_ptr()
Returns the default global VirtualFileSystem.
get_path
Returns the path specified by the URL, or "/" if no path is specified.
Definition: urlSpec.h:99
A container for an HTTP-legal time/date indication.
Definition: httpDate.h:27
get_authority
Returns the authority specified by the URL (this includes username, server, and/or port),...
Definition: urlSpec.h:94
bool is_local() const
Returns true if the filename is local, e.g.
Definition: filename.I:549
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.
static void close_write_file(std::ostream *stream)
Closes a file opened by a previous call to open_write_file().
set_username
Replaces the username part of the URL specification.
Definition: urlSpec.h:95
get_port
Returns the port number specified by the URL, or the default port if not specified.
Definition: urlSpec.h:97
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.
std::ostream * open_write_file(const Filename &filename, bool auto_wrap, bool truncate)
Convenience function; returns a newly allocated ostream if the file exists and can be written,...
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.
const std::string & get_url() const
Returns the complete URL specification.
Definition: urlSpec.I:184
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.
get_server
Returns the server name specified by the URL, if any.
Definition: urlSpec.h:96
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.
A descriptor that refers to a particular version of a document.
Definition: documentSpec.h:30
TypeHandle is the identifier used to differentiate C++ class types.
Definition: typeHandle.h:81
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.
get_server_and_port
Returns a string consisting of the server name, followed by a colon, followed by the port number.
Definition: urlSpec.h:98
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.
This class can be used to test for string matches against standard Unix- shell filename globbing conv...
Definition: globPattern.h:32