Panda3D
httpChannel.cxx
Go to the documentation of this file.
1 /**
2  * PANDA 3D SOFTWARE
3  * Copyright (c) Carnegie Mellon University. All rights reserved.
4  *
5  * All use of this software is subject to the terms of the revised BSD
6  * license. You should have received a copy of this license along
7  * with this source code in a file named "LICENSE."
8  *
9  * @file httpChannel.cxx
10  * @author drose
11  * @date 2002-09-24
12  */
13 
14 #include "httpChannel.h"
15 #include "httpClient.h"
16 #include "httpCookie.h"
17 #include "bioStream.h"
18 #include "chunkedStream.h"
19 #include "identityStream.h"
20 #include "config_downloader.h"
21 #include "virtualFileSystem.h"
22 #include "virtualFileMountHTTP.h"
23 #include "ramfile.h"
24 #include "globPattern.h"
25 
26 #include <stdio.h>
27 
28 #ifdef HAVE_OPENSSL
29 
30 #include "openSSLWrapper.h"
31 
32 #if defined(WIN32_VC) || defined(WIN64_VC)
33  #include <winsock2.h>
34  #include <windows.h> // for select()
35  #undef X509_NAME
36 #endif // WIN32_VC
37 
38 using std::istream;
39 using std::min;
40 using std::ostream;
41 using std::ostringstream;
42 using std::string;
43 
44 TypeHandle HTTPChannel::_type_handle;
45 
46 #define _NOTIFY_HTTP_CHANNEL_ID "[" << this << "] "
47 
48 /**
49  *
50  */
51 HTTPChannel::
52 HTTPChannel(HTTPClient *client) :
53  _client(client)
54 {
55  if (downloader_cat.is_debug()) {
56  downloader_cat.debug()
57  << _NOTIFY_HTTP_CHANNEL_ID
58  << "created.\n";
59  }
60 
61  ConfigVariableDouble extra_ssl_handshake_time
62  ("extra-ssl-handshake-time", 0.0,
63  PRC_DESC("This specifies how much extra time to try to establish"
64  "the ssl handshake before we bail."));
65  _extra_ssl_handshake_time = extra_ssl_handshake_time;
66  _proxy_next_index = 0;
67  _persistent_connection = false;
68  _allow_proxy = true;
69  _proxy_tunnel = http_proxy_tunnel;
70  _connect_timeout = http_connect_timeout;
71  _http_timeout = http_timeout;
72  _skip_body_size = http_skip_body_size;
73  _idle_timeout = http_idle_timeout;
74  _blocking_connect = false;
75  _download_throttle = download_throttle;
76  _max_bytes_per_second = downloader_byte_rate;
77  _seconds_per_update = downloader_frequency;
78  _max_updates_per_second = 1.0f / _seconds_per_update;
79  _bytes_per_update = int(_max_bytes_per_second * _seconds_per_update);
80 
81  // _nonblocking is true if the socket is actually in non-blocking mode.
82  _nonblocking = false;
83 
84  // _wanted_nonblocking is true if the user specifically requested one of the
85  // non-blocking interfaces. It is false if the socket is only incidentally
86  // non-blocking (for instance, because SIMPLE_THREADS is on).
87  _wanted_nonblocking = false;
88 
89  _want_ssl = false;
90  _proxy_serves_document = false;
91  _proxy_tunnel_now = false;
92  _first_byte_requested = 0;
93  _last_byte_requested = 0;
94  _first_byte_delivered = 0;
95  _last_byte_delivered = 0;
96  _read_index = 0;
97  _expected_file_size = 0;
98  _file_size = 0;
99  _transfer_file_size = 0;
100  _got_expected_file_size = false;
101  _got_file_size = false;
102  _got_transfer_file_size = false;
103  _bytes_downloaded = 0;
104  _bytes_requested = 0;
105  _status_entry = StatusEntry();
106  _response_type = RT_none;
107  _http_version = _client->get_http_version();
108  _http_version_string = _client->get_http_version_string();
109  _content_type = "application/x-www-form-urlencoded";
110  _state = S_new;
111  _done_state = S_new;
112  _started_download = false;
113  _sent_so_far = 0;
114  _body_stream = nullptr;
115  _owns_body_stream = false;
116  _sbio = nullptr;
117  _cipher_list = _client->get_cipher_list();
118  _last_status_code = 0;
119  _last_run_time = 0.0f;
120  _download_dest = DD_none;
121  _download_to_ramfile = nullptr;
122  _download_to_stream = nullptr;
123 }
124 
125 /**
126  *
127  */
128 HTTPChannel::
129 ~HTTPChannel() {
130  if (downloader_cat.is_debug()) {
131  downloader_cat.debug()
132  << _NOTIFY_HTTP_CHANNEL_ID
133  << "destroyed.\n";
134  }
135 
136  close_connection();
137  reset_download_to();
138 }
139 
140 /**
141  * Returns the string as returned by the server describing the status code for
142  * humans. This may or may not be meaningful.
143  */
144 string HTTPChannel::
145 get_status_string() const {
146  switch (_status_entry._status_code) {
147  case SC_incomplete:
148  return "Connection in progress";
149 
150  case SC_internal_error:
151  return "Internal error";
152 
153  case SC_no_connection:
154  return "No connection";
155 
156  case SC_timeout:
157  return "Timeout on connection";
158 
159  case SC_lost_connection:
160  return "Lost connection";
161 
162  case SC_non_http_response:
163  return "Non-HTTP response";
164 
165  case SC_invalid_http:
166  return "Could not understand HTTP response";
167 
168  case SC_socks_invalid_version:
169  return "Unsupported SOCKS version";
170 
171  case SC_socks_no_acceptable_login_method:
172  return "No acceptable SOCKS login method";
173 
174  case SC_socks_refused:
175  return "SOCKS proxy refused connection";
176 
177  case SC_socks_no_connection:
178  return "SOCKS proxy unable to connect";
179 
180  case SC_ssl_internal_failure:
181  return "SSL internal failure";
182 
183  case SC_ssl_no_handshake:
184  return "No SSL handshake";
185 
186  case SC_http_error_watermark:
187  // This shouldn't be triggered.
188  return "Internal error";
189 
190  case SC_ssl_invalid_server_certificate:
191  return "SSL invalid server certificate";
192 
193  case SC_ssl_unexpected_server:
194  return "Unexpected SSL server";
195 
196  case SC_download_open_error:
197  return "Error opening file";
198 
199  case SC_download_write_error:
200  return "Error writing to disk";
201 
202  case SC_download_invalid_range:
203  return "Invalid subrange requested";
204  }
205 
206  return _status_entry._status_string;
207 }
208 
209 /**
210  * Returns the HTML header value associated with the indicated key, or empty
211  * string if the key was not defined in the message returned by the server.
212  */
213 string HTTPChannel::
214 get_header_value(const string &key) const {
215  Headers::const_iterator hi = _headers.find(downcase(key));
216  if (hi != _headers.end()) {
217  return (*hi).second;
218  }
219  return string();
220 }
221 
222 /**
223  * Returns true if the server has indicated it will close the connection after
224  * this document has been read, or false if it will remain open (and future
225  * documents may be requested on the same connection).
226  */
227 bool HTTPChannel::
228 will_close_connection() const {
229  if (get_http_version() < HTTPEnum::HV_11) {
230  // pre-HTTP 1.1 always closes.
231  return true;
232  }
233 
234  string connection = get_header_value("Connection");
235  if (downcase(connection) == "close") {
236  // The server says it will close.
237  return true;
238  }
239 
240  if (connection.empty() && !get_persistent_connection()) {
241  // The server didn't say, but we asked it to close.
242  return true;
243  }
244 
245  // Assume the server will keep it open.
246  return false;
247 }
248 
249 /**
250  * Returns the size of the file, if it is known. Returns the value set by
251  * set_expected_file_size() if the file size is not known, or 0 if this value
252  * was not set.
253  *
254  * If the file is dynamically generated, the size may not be available until a
255  * read has started (e.g. open_read_body() has been called); and even then it
256  * may increase as more of the file is read due to the nature of HTTP/1.1
257  * requests which can change their minds midstream about how much data they're
258  * sending you.
259  */
260 std::streamsize HTTPChannel::
261 get_file_size() const {
262  if (_got_file_size) {
263  return _file_size;
264  } else if (_got_transfer_file_size) {
265  return _transfer_file_size;
266  } else if (_got_expected_file_size) {
267  return _expected_file_size;
268  } else {
269  return 0;
270  }
271 }
272 
273 /**
274  * Outputs a list of all headers defined by the server to the indicated output
275  * stream.
276  */
277 void HTTPChannel::
278 write_headers(ostream &out) const {
279  Headers::const_iterator hi;
280  for (hi = _headers.begin(); hi != _headers.end(); ++hi) {
281  out << (*hi).first << ": " << (*hi).second << "\n";
282  }
283 }
284 
285 /**
286  * This must be called from time to time when non-blocking I/O is in use. It
287  * checks for data coming in on the socket and writes data out to the socket
288  * when possible, and does whatever processing is required towards completing
289  * the current task.
290  *
291  * The return value is true if the task is still pending (and run() will need
292  * to be called again in the future), or false if the current task is
293  * complete.
294  */
295 bool HTTPChannel::
296 run() {
297  if (downloader_cat.is_spam()) {
298  downloader_cat.spam()
299  << _NOTIFY_HTTP_CHANNEL_ID
300  << "run().\n";
301  }
302 
303  if (_state == _done_state || _state == S_failure) {
304  clear_extra_headers();
305  if (!reached_done_state()) {
306  return false;
307  }
308  }
309 
310  if (_started_download) {
311  if (_wanted_nonblocking && _download_throttle) {
312  double now = TrueClock::get_global_ptr()->get_short_time();
313  double elapsed = now - _last_run_time;
314  if (elapsed < _seconds_per_update) {
315  // Come back later.
316  thread_yield();
317  return true;
318  }
319  int num_potential_updates = (int)(elapsed / _seconds_per_update);
320  _last_run_time = now;
321  _bytes_requested += _bytes_per_update * num_potential_updates;
322  if (downloader_cat.is_spam()) {
323  downloader_cat.spam()
324  << _NOTIFY_HTTP_CHANNEL_ID
325  << "elapsed = " << elapsed << " num_potential_updates = "
326  << num_potential_updates << " bytes_requested = "
327  << _bytes_requested << "\n";
328  }
329  }
330 
331  bool repeat_later = false;
332  switch (_download_dest) {
333  case DD_none:
334  // We're done.
335  break;
336 
337  case DD_file:
338  repeat_later = run_download_to_file();
339  break;
340 
341  case DD_ram:
342  repeat_later = run_download_to_ram();
343  break;
344 
345  case DD_stream:
346  repeat_later = run_download_to_stream();
347  break;
348  }
349  if (repeat_later) {
350  thread_yield();
351  }
352  return repeat_later;
353  }
354 
355  /*
356  if (downloader_cat.is_spam()) {
357  downloader_cat.spam()
358  << _NOTIFY_HTTP_CHANNEL_ID
359  << "begin run(), _state = " << _state << ", _done_state = "
360  << _done_state << "\n";
361  }
362  */
363 
364  if (_state == _done_state) {
365  return reached_done_state();
366  }
367 
368  bool repeat_later;
369  do {
370  // If we're in a state that expects to have a connection already (that is,
371  // any state other that S_try_next_proxy), then reestablish the connection
372  // if it has been dropped.
373  if (_bio.is_null() && _state != S_try_next_proxy) {
374  if (_connect_count > http_max_connect_count) {
375  // Too many connection attempts; just give up. We should never
376  // trigger this failsafe, since the code in each individual case has
377  // similar logic to prevent more than two consecutive lost
378  // connections.
379  downloader_cat.warning()
380  << _NOTIFY_HTTP_CHANNEL_ID
381  << "Too many lost connections, giving up.\n";
382  _status_entry._status_code = SC_lost_connection;
383  _state = S_failure;
384  return false;
385  }
386 
387  // No connection. Attempt to establish one.
388  URLSpec url;
389  if (_proxy.empty()) {
390  url = _request.get_url();
391  } else {
392  url = _proxy;
393  }
394  _bio = new BioPtr(url);
395  _source = new BioStreamPtr(new BioStream(_bio));
396  if (_nonblocking) {
397  _bio->set_nbio(true);
398  }
399 
400  if (downloader_cat.is_debug()) {
401  if (_connect_count > 0) {
402  downloader_cat.debug()
403  << _NOTIFY_HTTP_CHANNEL_ID
404  << "Reconnecting to " << _bio->get_server_name() << " port "
405  << _bio->get_port() << "\n";
406  } else {
407  downloader_cat.debug()
408  << _NOTIFY_HTTP_CHANNEL_ID
409  << "Connecting to " << _bio->get_server_name() << " port "
410  << _bio->get_port() << "\n";
411  }
412  }
413 
414  _state = S_connecting;
415  _started_connecting_time =
416  TrueClock::get_global_ptr()->get_short_time();
417  _connect_count++;
418  }
419 
420  /*
421  if (downloader_cat.is_spam()) {
422  downloader_cat.spam()
423  << _NOTIFY_HTTP_CHANNEL_ID
424  << "continue run(), _state = " << _state << "\n";
425  }
426  */
427 
428  switch (_state) {
429  case S_try_next_proxy:
430  repeat_later = run_try_next_proxy();
431  break;
432 
433  case S_connecting:
434  repeat_later = run_connecting();
435  break;
436 
437  case S_connecting_wait:
438  repeat_later = run_connecting_wait();
439  break;
440 
441  case S_http_proxy_ready:
442  repeat_later = run_http_proxy_ready();
443  break;
444 
445  case S_http_proxy_request_sent:
446  repeat_later = run_http_proxy_request_sent();
447  break;
448 
449  case S_http_proxy_reading_header:
450  repeat_later = run_http_proxy_reading_header();
451  break;
452 
453  case S_socks_proxy_greet:
454  repeat_later = run_socks_proxy_greet();
455  break;
456 
457  case S_socks_proxy_greet_reply:
458  repeat_later = run_socks_proxy_greet_reply();
459  break;
460 
461  case S_socks_proxy_connect:
462  repeat_later = run_socks_proxy_connect();
463  break;
464 
465  case S_socks_proxy_connect_reply:
466  repeat_later = run_socks_proxy_connect_reply();
467  break;
468 
469  case S_setup_ssl:
470  repeat_later = run_setup_ssl();
471  break;
472 
473  case S_ssl_handshake:
474  repeat_later = run_ssl_handshake();
475  break;
476 
477  case S_ready:
478  repeat_later = run_ready();
479  break;
480 
481  case S_request_sent:
482  repeat_later = run_request_sent();
483  break;
484 
485  case S_reading_header:
486  repeat_later = run_reading_header();
487  break;
488 
489  case S_start_direct_file_read:
490  repeat_later = run_start_direct_file_read();
491  break;
492 
493  case S_read_header:
494  repeat_later = run_read_header();
495  break;
496 
497  case S_begin_body:
498  repeat_later = run_begin_body();
499  break;
500 
501  case S_reading_body:
502  repeat_later = run_reading_body();
503  break;
504 
505  case S_read_body:
506  repeat_later = run_read_body();
507  break;
508 
509  case S_read_trailer:
510  repeat_later = run_read_trailer();
511  break;
512 
513  default:
514  downloader_cat.warning()
515  << _NOTIFY_HTTP_CHANNEL_ID
516  << "Unhandled state " << _state << "\n";
517  return false;
518  }
519 
520  if (_state == _done_state || _state == S_failure) {
521  clear_extra_headers();
522  // We've reached our terminal state.
523  return reached_done_state();
524  }
525  thread_consider_yield();
526  } while (!repeat_later || _bio.is_null());
527 
528  /*
529  if (downloader_cat.is_spam()) {
530  downloader_cat.spam()
531  << _NOTIFY_HTTP_CHANNEL_ID
532  << "later run(), _state = " << _state
533  << ", _done_state = " << _done_state << "\n";
534  }
535  */
536 
537  thread_yield();
538  return true;
539 }
540 
541 /**
542  * Returns a newly-allocated istream suitable for reading the body of the
543  * document. This may only be called immediately after a call to
544  * get_document() or post_form(), or after a call to run() has returned false.
545  *
546  * Note that, in nonblocking mode, the returned stream may report an early
547  * EOF, even before the actual end of file. When this happens, you should
548  * call stream->is_closed() to determine whether you should attempt to read
549  * some more later.
550  *
551  * The user is responsible for passing the returned istream to
552  * close_read_body() later.
553  */
554 ISocketStream *HTTPChannel::
555 open_read_body() {
556  reset_body_stream();
557 
558  if ((_state != S_read_header && _state != S_begin_body) || _source.is_null()) {
559  return nullptr;
560  }
561 
562  string transfer_coding = downcase(get_header_value("Transfer-Encoding"));
563 
564  ISocketStream *result;
565  if (transfer_coding == "chunked") {
566  // "chunked" transfer encoding. This means we will have to decode the
567  // length of the file as we read it in chunks. The IChunkedStream does
568  // this.
569  _state = S_reading_body;
570  _read_index++;
571  result = new IChunkedStream(_source, this);
572 
573  } else {
574  // If the transfer encoding is anything else, assume "identity". This is
575  // just the literal characters following the header, up until _file_size
576  // bytes have been read (if content-length was specified), or till end of
577  // file otherwise.
578  _state = S_reading_body;
579  _read_index++;
580  result = new IIdentityStream(_source, this, _got_file_size, _file_size);
581  }
582 
583  result->_channel = this;
584  _body_stream = result;
585  _owns_body_stream = false;
586 
587  return result;
588 }
589 
590 /**
591  * Closes a file opened by a previous call to open_read_body(). This really
592  * just deletes the istream pointer, but it is recommended to use this
593  * interface instead of deleting it explicitly, to help work around compiler
594  * issues.
595  */
596 void HTTPChannel::
597 close_read_body(istream *stream) const {
598  if (stream != nullptr) {
599  // For some reason--compiler bug in gcc 3.2?--explicitly deleting the
600  // stream pointer does not call the appropriate global delete function;
601  // instead apparently calling the system delete function. So we call the
602  // delete function by hand instead.
603 #if !defined(USE_MEMORY_NOWRAPPERS) && defined(REDEFINE_GLOBAL_OPERATOR_NEW)
604  stream->~istream();
605  (*global_operator_delete)(stream);
606 #else
607  delete stream;
608 #endif
609  }
610 }
611 
612 /**
613  * Specifies the name of a file to download the resulting document to. This
614  * should be called immediately after get_document() or begin_get_document()
615  * or related functions.
616  *
617  * In the case of the blocking I/O methods like get_document(), this function
618  * will download the entire document to the file and return true if it was
619  * successfully downloaded, false otherwise.
620  *
621  * In the case of non-blocking I/O methods like begin_get_document(), this
622  * function simply indicates an intention to download to the indicated file.
623  * It returns true if the file can be opened for writing, false otherwise, but
624  * the contents will not be completely downloaded until run() has returned
625  * false. At this time, it is possible that a communications error will have
626  * left a partial file, so is_download_complete() may be called to test this.
627  *
628  * If subdocument_resumes is true and the document in question was previously
629  * requested as a subdocument (i.e. get_subdocument() with a first_byte value
630  * greater than zero), this will automatically seek to the appropriate byte
631  * within the file for writing the output. In this case, the file must
632  * already exist and must have at least first_byte bytes in it. If
633  * subdocument_resumes is false, a subdocument will always be downloaded
634  * beginning at the first byte of the file.
635  */
636 bool HTTPChannel::
637 download_to_file(const Filename &filename, bool subdocument_resumes) {
638  reset_download_to();
639  _download_to_filename = filename;
640  _download_to_filename.set_binary();
641  _subdocument_resumes = subdocument_resumes;
642 
643  _download_dest = DD_file;
644 
645  if (_wanted_nonblocking && _state != S_read_header) {
646  // In nonblocking mode, we can't start the download yet; that will be done
647  // later as run() is called.
648  return true;
649  }
650 
651  // In normal, blocking mode, go ahead and do the download.
652  if (!open_download_file()) {
653  reset_download_to();
654  return false;
655  }
656 
657  while (run()) {
658  }
659  return is_download_complete() && is_valid();
660 }
661 
662 /**
663  * Specifies a Ramfile object to download the resulting document to. This
664  * should be called immediately after get_document() or begin_get_document()
665  * or related functions.
666  *
667  * In the case of the blocking I/O methods like get_document(), this function
668  * will download the entire document to the Ramfile and return true if it was
669  * successfully downloaded, false otherwise.
670  *
671  * In the case of non-blocking I/O methods like begin_get_document(), this
672  * function simply indicates an intention to download to the indicated
673  * Ramfile. It returns true if the file can be opened for writing, false
674  * otherwise, but the contents will not be completely downloaded until run()
675  * has returned false. At this time, it is possible that a communications
676  * error will have left a partial file, so is_download_complete() may be
677  * called to test this.
678  *
679  * If subdocument_resumes is true and the document in question was previously
680  * requested as a subdocument (i.e. get_subdocument() with a first_byte value
681  * greater than zero), this will automatically seek to the appropriate byte
682  * within the Ramfile for writing the output. In this case, the Ramfile must
683  * already have at least first_byte bytes in it.
684  */
685 bool HTTPChannel::
686 download_to_ram(Ramfile *ramfile, bool subdocument_resumes) {
687  nassertr(ramfile != nullptr, false);
688  reset_download_to();
689  ramfile->_pos = 0;
690  _download_to_ramfile = ramfile;
691  _download_dest = DD_ram;
692  _subdocument_resumes = (subdocument_resumes && _first_byte_delivered != 0);
693 
694  if (_wanted_nonblocking && _state != S_read_header) {
695  // In nonblocking mode, we can't start the download yet; that will be done
696  // later as run() is called.
697  return true;
698  }
699 
700  // In normal, blocking mode, go ahead and do the download.
701  if (!open_download_file()) {
702  reset_download_to();
703  return false;
704  }
705 
706  while (run()) {
707  }
708  return is_download_complete() && is_valid();
709 }
710 
711 /**
712  * Specifies the name of an ostream to download the resulting document to.
713  * This should be called immediately after get_document() or
714  * begin_get_document() or related functions.
715  *
716  * In the case of the blocking I/O methods like get_document(), this function
717  * will download the entire document to the file and return true if it was
718  * successfully downloaded, false otherwise.
719  *
720  * In the case of non-blocking I/O methods like begin_get_document(), this
721  * function simply indicates an intention to download to the indicated file.
722  * It returns true if the file can be opened for writing, false otherwise, but
723  * the contents will not be completely downloaded until run() has returned
724  * false. At this time, it is possible that a communications error will have
725  * left a partial file, so is_download_complete() may be called to test this.
726  *
727  * If subdocument_resumes is true and the document in question was previously
728  * requested as a subdocument (i.e. get_subdocument() with a first_byte value
729  * greater than zero), this will automatically seek to the appropriate byte
730  * within the file for writing the output. In this case, the file must
731  * already exist and must have at least first_byte bytes in it. If
732  * subdocument_resumes is false, a subdocument will always be downloaded
733  * beginning at the first byte of the file.
734  */
735 bool HTTPChannel::
736 download_to_stream(ostream *strm, bool subdocument_resumes) {
737  reset_download_to();
738  _download_to_stream = strm;
739  _download_to_stream->clear();
740  _subdocument_resumes = subdocument_resumes;
741 
742  _download_dest = DD_stream;
743 
744  if (_wanted_nonblocking && _state != S_read_header) {
745  // In nonblocking mode, we can't start the download yet; that will be done
746  // later as run() is called.
747  return true;
748  }
749 
750  // In normal, blocking mode, go ahead and do the download.
751  if (!open_download_file()) {
752  reset_download_to();
753  return false;
754  }
755 
756  while (run()) {
757  }
758  return is_download_complete() && is_valid();
759 }
760 
761 /**
762  * Returns the connection that was established via a previous call to
763  * connect_to() or begin_connect_to(), or NULL if the connection attempt
764  * failed or if those methods have not recently been called.
765  *
766  * This stream has been allocated from the free store. It is the user's
767  * responsibility to delete this pointer when finished with it.
768  */
769 SocketStream *HTTPChannel::
770 get_connection() {
771  if (!is_connection_ready()) {
772  return nullptr;
773  }
774 
775  BioStream *stream = _source->get_stream();
776  _source->set_stream(nullptr);
777 
778  // We're now passing ownership of the connection to the caller.
779  if (downloader_cat.is_debug()) {
780  downloader_cat.debug()
781  << _NOTIFY_HTTP_CHANNEL_ID
782  << "passing ownership of connection to caller.\n";
783  }
784  reset_to_new();
785 
786  return stream;
787 }
788 
789 /**
790  * Returns the input string with all uppercase letters converted to lowercase.
791  */
792 string HTTPChannel::
793 downcase(const string &s) {
794  string result;
795  result.reserve(s.size());
796  string::const_iterator p;
797  for (p = s.begin(); p != s.end(); ++p) {
798  result += tolower(*p);
799  }
800  return result;
801 }
802 
803 /**
804  * Called by ISocketStream destructor when _body_stream is destructing.
805  */
806 void HTTPChannel::
807 body_stream_destructs(ISocketStream *stream) {
808  if (stream == _body_stream) {
809  if (_state == S_reading_body) {
810  switch (_body_stream->get_read_state()) {
811  case ISocketStream::RS_complete:
812  finished_body(false);
813  break;
814 
815  case ISocketStream::RS_error:
816  _state = HTTPChannel::S_failure;
817  _status_entry._status_code = HTTPChannel::SC_lost_connection;
818  break;
819 
820  default:
821  break;
822  }
823  }
824  _body_stream = nullptr;
825  _owns_body_stream = false;
826  }
827 }
828 
829 
830 /**
831  * Called by run() after it reaches the done state, this simply checks to see
832  * if a download was requested, and begins the download if it has been.
833  */
834 bool HTTPChannel::
835 reached_done_state() {
836  /*
837  if (downloader_cat.is_spam()) {
838  downloader_cat.spam()
839  << _NOTIFY_HTTP_CHANNEL_ID
840  << "terminating run(), _state = " << _state
841  << ", _done_state = " << _done_state << "\n";
842  }
843  */
844 
845  if (_state == S_failure) {
846  // We had to give up. Each proxy we tried, in sequence, failed. But
847  // maybe the last attempt didn't give us the most informative response; go
848  // back and find the best one.
849  if (!_status_list.empty()) {
850  _status_list.push_back(_status_entry);
851  if (downloader_cat.is_debug()) {
852  downloader_cat.debug()
853  << _NOTIFY_HTTP_CHANNEL_ID
854  << "Reexamining failure responses.\n";
855  }
856  size_t best_i = 0;
857  if (downloader_cat.is_debug()) {
858  downloader_cat.debug()
859  << _NOTIFY_HTTP_CHANNEL_ID
860  << " " << 0 << ". " << _status_list[0]._status_code << " "
861  << _status_list[0]._status_string << "\n";
862  }
863  for (size_t i = 1; i < _status_list.size(); i++) {
864  if (downloader_cat.is_debug()) {
865  downloader_cat.debug()
866  << _NOTIFY_HTTP_CHANNEL_ID
867  << " " << i << ". " << _status_list[i]._status_code << " "
868  << _status_list[i]._status_string << "\n";
869  }
870  if (more_useful_status_code(_status_list[i]._status_code,
871  _status_list[best_i]._status_code)) {
872  best_i = i;
873  }
874  }
875  if (downloader_cat.is_debug()) {
876  downloader_cat.debug()
877  << _NOTIFY_HTTP_CHANNEL_ID
878  << "chose index " << best_i << ", above.\n";
879  }
880  _status_entry = _status_list[best_i];
881  _status_list.clear();
882  }
883 
884  return false;
885  }
886 
887  // We don't need the list of previous failures any more--we've connected.
888  _status_list.clear();
889 
890  if (_download_dest == DD_none) {
891  // All done.
892  return false;
893 
894  } else {
895  // Oops, we have to download the body now.
896  open_read_body();
897  if (_body_stream == nullptr) {
898  if (downloader_cat.is_debug()) {
899  downloader_cat.debug()
900  << _NOTIFY_HTTP_CHANNEL_ID
901  << "Unable to download body: " << _request.get_url() << "\n";
902  }
903  return false;
904 
905  } else {
906  _owns_body_stream = true;
907  if (_state != S_reading_body) {
908  reset_body_stream();
909  }
910  _started_download = true;
911 
912  _done_state = S_read_trailer;
913  _last_run_time = TrueClock::get_global_ptr()->get_short_time();
914  return true;
915  }
916  }
917 }
918 
919 /**
920  * This state is reached when a previous connection attempt fails. If we have
921  * multiple proxies in line to try, it sets us up for the next proxy and tries
922  * to connect again; otherwise, it sets the state to S_failure.
923  */
924 bool HTTPChannel::
925 run_try_next_proxy() {
926  if (_proxy_next_index < _proxies.size()) {
927  // Record the previous proxy's status entry, so we can come back to it
928  // later if we get nonsense from the remaining proxies.
929  _status_list.push_back(_status_entry);
930  _status_entry = StatusEntry();
931 
932  // Now try the next proxy in sequence.
933  _proxy = _proxies[_proxy_next_index];
934  _proxy_auth = nullptr;
935  _proxy_next_index++;
936  close_connection();
937  reconsider_proxy();
938  _state = S_connecting;
939 
940  return false;
941  }
942 
943  // No more proxies to try, or we're not using a proxy.
944  _state = S_failure;
945  return false;
946 }
947 
948 /**
949  * In this state, we have not yet established a network connection to the
950  * server (or proxy).
951  */
952 bool HTTPChannel::
953 run_connecting() {
954  _status_entry = StatusEntry();
955 
956  if (!_bio->connect()) {
957  if (_bio->should_retry()) {
958  _state = S_connecting_wait;
959  return false;
960  }
961  downloader_cat.info()
962  << _NOTIFY_HTTP_CHANNEL_ID
963  << "Could not connect to " << _bio->get_server_name() << " port "
964  << _bio->get_port() << "\n";
965  OpenSSLWrapper::get_global_ptr()->notify_ssl_errors();
966  _status_entry._status_code = SC_no_connection;
967  _state = S_try_next_proxy;
968  return false;
969  }
970 
971  if (downloader_cat.is_debug()) {
972  downloader_cat.debug()
973  << _NOTIFY_HTTP_CHANNEL_ID
974  << "Connected to " << _bio->get_server_name() << " port "
975  << _bio->get_port() << "\n";
976  }
977 
978  if (_proxy_tunnel_now) {
979  if (_proxy.get_scheme() == "socks") {
980  _state = S_socks_proxy_greet;
981  } else {
982  _state = S_http_proxy_ready;
983  }
984 
985  } else {
986  if (_want_ssl) {
987  _state = S_setup_ssl;
988  } else {
989  _state = S_ready;
990  }
991  }
992  return false;
993 }
994 
995 /**
996  * Here we have begun to establish a nonblocking connection, but we got a
997  * come-back-later message, so we are waiting for the socket to finish
998  * connecting.
999  */
1000 bool HTTPChannel::
1001 run_connecting_wait() {
1002  int fd = -1;
1003  BIO_get_fd(*_bio, &fd);
1004  if (fd < 0) {
1005  downloader_cat.warning()
1006  << _NOTIFY_HTTP_CHANNEL_ID
1007  << "nonblocking socket BIO has no file descriptor.\n";
1008  // This shouldn't be possible.
1009  _status_entry._status_code = SC_internal_error;
1010  _state = S_try_next_proxy;
1011  return false;
1012  }
1013 
1014  if (downloader_cat.is_spam()) {
1015  downloader_cat.spam()
1016  << _NOTIFY_HTTP_CHANNEL_ID
1017  << "waiting to connect to " << _request.get_url().get_server_and_port() << ".\n";
1018  }
1019  fd_set wset;
1020  FD_ZERO(&wset);
1021  FD_SET(fd, &wset);
1022  struct timeval tv;
1023  if (get_blocking_connect()) {
1024  // Since we'll be blocking on this connect, fill in the timeout into the
1025  // structure.
1026  tv.tv_sec = (int)_connect_timeout;
1027  tv.tv_usec = (int)((_connect_timeout - tv.tv_sec) * 1000000.0);
1028  } else {
1029  // We won't block on this connect, so select() for 0 time.
1030  tv.tv_sec = 0;
1031  tv.tv_usec = 0;
1032  }
1033  int errcode = select(fd + 1, nullptr, &wset, nullptr, &tv);
1034  if (errcode < 0) {
1035  downloader_cat.warning()
1036  << _NOTIFY_HTTP_CHANNEL_ID
1037  << "Error in select.\n";
1038  // This shouldn't be possible.
1039  _status_entry._status_code = SC_internal_error;
1040  _state = S_try_next_proxy;
1041  return false;
1042  }
1043 
1044  if (errcode == 0) {
1045  // Nothing's happened so far; come back later.
1046  if (get_blocking_connect() ||
1047  (TrueClock::get_global_ptr()->get_short_time() -
1048  _started_connecting_time > get_connect_timeout())) {
1049  // Time to give up.
1050  downloader_cat.info()
1051  << _NOTIFY_HTTP_CHANNEL_ID
1052  << "Timeout connecting to "
1053  << _request.get_url().get_server_and_port()
1054  << " for " << _request.get_url()
1055  << ".\n";
1056  _status_entry._status_code = SC_timeout;
1057  _state = S_try_next_proxy;
1058  return false;
1059  }
1060  return true;
1061  }
1062 
1063  // The socket is now ready for writing.
1064  _state = S_connecting;
1065  return false;
1066 }
1067 
1068 
1069 /**
1070  * This state is reached only after first establishing a connection to the
1071  * proxy, if a proxy is in use and we are tunneling through it via a CONNECT
1072  * command.
1073  */
1074 bool HTTPChannel::
1075 run_http_proxy_ready() {
1076  // If there's a request to be sent to the proxy, send it now.
1077  nassertr(!_proxy_request_text.empty(), false);
1078  if (!server_send(_proxy_request_text, false)) {
1079  return true;
1080  }
1081 
1082  // All done sending request.
1083  _state = S_http_proxy_request_sent;
1084  _sent_request_time = TrueClock::get_global_ptr()->get_short_time();
1085  return false;
1086 }
1087 
1088 /**
1089  * This state is reached only after we have sent a special message to the
1090  * proxy and we are waiting for the proxy's response. It is not used in the
1091  * normal http-over-proxy case, which does not require a special message to
1092  * the proxy.
1093  */
1094 bool HTTPChannel::
1095 run_http_proxy_request_sent() {
1096  // Wait for the first line to come back from the server.
1097  string line;
1098  if (!server_getline_failsafe(line)) {
1099  return true;
1100  }
1101 
1102  // Skip unexpected blank lines. We're just being generous here.
1103  while (line.empty()) {
1104  if (!server_getline_failsafe(line)) {
1105  return true;
1106  }
1107  }
1108 
1109  if (!parse_http_response(line)) {
1110  return false;
1111  }
1112 
1113  _state = S_http_proxy_reading_header;
1114  _current_field_name = string();
1115  _current_field_value = string();
1116  _headers.clear();
1117  _got_file_size = false;
1118  _got_transfer_file_size = false;
1119  return false;
1120 }
1121 
1122 /**
1123  * In this state we are reading the header lines from the proxy's response to
1124  * our special message.
1125  */
1126 bool HTTPChannel::
1127 run_http_proxy_reading_header() {
1128  if (parse_http_header()) {
1129  return true;
1130  }
1131 
1132  _redirect = get_header_value("Location");
1133  // We can take the proxy's word for it that this is the actual URL for the
1134  // redirect.
1135 
1136  _server_response_has_no_body =
1137  (get_status_code() / 100 == 1 ||
1138  get_status_code() == 204 ||
1139  get_status_code() == 304);
1140 
1141  int last_status = _last_status_code;
1142  _last_status_code = get_status_code();
1143 
1144  if (get_status_code() == 407 && last_status != 407 && !_proxy.empty()) {
1145  // 407: not authorized to proxy. Try to get the authorization.
1146  string authenticate_request = get_header_value("Proxy-Authenticate");
1147  _proxy_auth = _client->generate_auth(_proxy, true, authenticate_request);
1148  if (_proxy_auth != nullptr) {
1149  _proxy_realm = _proxy_auth->get_realm();
1150  _proxy_username = _client->select_username(_proxy, true, _proxy_realm);
1151  if (!_proxy_username.empty()) {
1152  make_proxy_request_text();
1153 
1154  // Roll the state forward to force a new request.
1155  _state = S_begin_body;
1156  return false;
1157  }
1158  }
1159  }
1160 
1161  if (!is_valid()) {
1162  // Proxy wouldn't open connection.
1163 
1164  // Change some of the status codes a proxy might return to differentiate
1165  // them from similar status codes the destination server might have
1166  // returned.
1167  if (get_status_code() != 407) {
1168  _status_entry._status_code += 1000;
1169  }
1170 
1171  _state = S_try_next_proxy;
1172  return false;
1173  }
1174 
1175  // Now we have a tunnel opened through the proxy.
1176  make_request_text();
1177 
1178  if (_want_ssl) {
1179  _state = S_setup_ssl;
1180  } else {
1181  _state = S_ready;
1182  }
1183 
1184  return false;
1185 }
1186 
1187 /**
1188  * This state is reached only after first establishing a connection to a SOCKS
1189  * proxy, with which we now have to negotiate a connection.
1190  */
1191 bool HTTPChannel::
1192 run_socks_proxy_greet() {
1193  static const char socks_greeting[] = {
1194  0x05, // Socks version 5
1195  0x01, // Number of supported login methods
1196  0x00, // Login method 0: no authentication
1197  /*
1198  0x01, // Login method 1: GSSAPI
1199  0x02 // Login method 2: username/password
1200  */
1201  };
1202  static const int socks_greeting_len = sizeof(socks_greeting);
1203  if (!server_send(string(socks_greeting, socks_greeting_len), true)) {
1204  return true;
1205  }
1206  _sent_request_time = TrueClock::get_global_ptr()->get_short_time();
1207 
1208  // All done sending request.
1209  _state = S_socks_proxy_greet_reply;
1210  return false;
1211 }
1212 
1213 /**
1214  * We are waiting for the SOCKS proxy to respond to our greeting.
1215  */
1216 bool HTTPChannel::
1217 run_socks_proxy_greet_reply() {
1218  string reply;
1219 
1220  // Get the two-byte reply from the SOCKS server.
1221  if (!server_get_failsafe(reply, 2)) {
1222  return true;
1223  }
1224 
1225  if (reply[0] != 0x05) {
1226  // We only speak Socks5.
1227  downloader_cat.info()
1228  << _NOTIFY_HTTP_CHANNEL_ID
1229  << "Rejecting Socks version " << (int)reply[0] << "\n";
1230  _status_entry._status_code = SC_socks_invalid_version;
1231  _state = S_try_next_proxy;
1232  return false;
1233  }
1234 
1235  if (reply[1] == (char)0xff) {
1236  downloader_cat.info()
1237  << _NOTIFY_HTTP_CHANNEL_ID
1238  << "Socks server does not accept our available login methods.\n";
1239  _status_entry._status_code = SC_socks_no_acceptable_login_method;
1240  _state = S_try_next_proxy;
1241  return false;
1242  }
1243 
1244  if (reply[1] == 0x00) {
1245  // No login method required. Proceed directly to the connect message.
1246  _state = S_socks_proxy_connect;
1247  return false;
1248  }
1249 
1250  // The server accepted a login method we didn't offer!
1251  downloader_cat.info()
1252  << _NOTIFY_HTTP_CHANNEL_ID
1253  << "Socks server accepted unrequested login method "
1254  << (int)reply[1] << "\n";
1255  _status_entry._status_code = SC_socks_no_acceptable_login_method;
1256  _state = S_try_next_proxy;
1257  return false;
1258 }
1259 
1260 /**
1261  * The SOCKS proxy has accepted us, and now we may issue the connect request.
1262  */
1263 bool HTTPChannel::
1264 run_socks_proxy_connect() {
1265  static const char socks_connect[] = {
1266  0x05, // Socks version 5
1267  0x01, // Command 1: connect
1268  0x00, // reserved
1269  0x03, // DNS name
1270  };
1271  static const int socks_connect_len = sizeof(socks_connect);
1272 
1273  string hostname = _request.get_url().get_server();
1274  int port = _request.get_url().get_port();
1275 
1276  if (downloader_cat.is_debug()) {
1277  downloader_cat.debug()
1278  << _NOTIFY_HTTP_CHANNEL_ID
1279  << "Requesting SOCKS5 connection to "
1280  << _request.get_url().get_server_and_port() << "\n";
1281  }
1282 
1283  string connect =
1284  string(socks_connect, socks_connect_len) +
1285  string(1, (char)hostname.length()) +
1286  hostname +
1287  string(1, (char)((port >> 8) & 0xff)) +
1288  string(1, (char)(port & 0xff));
1289 
1290  if (!server_send(connect, true)) {
1291  return true;
1292  }
1293  _sent_request_time = TrueClock::get_global_ptr()->get_short_time();
1294 
1295  _state = S_socks_proxy_connect_reply;
1296  return false;
1297 }
1298 
1299 /**
1300  * We are waiting for the SOCKS proxy to honor our connect request.
1301  */
1302 bool HTTPChannel::
1303 run_socks_proxy_connect_reply() {
1304  string reply;
1305 
1306  // Get the first two bytes of the connect reply.
1307  if (!server_get_failsafe(reply, 2)) {
1308  return true;
1309  }
1310 
1311  if (reply[0] != 0x05) {
1312  // We only speak Socks5.
1313  downloader_cat.info()
1314  << _NOTIFY_HTTP_CHANNEL_ID
1315  << "Rejecting Socks version " << (int)reply[0] << "\n";
1316  close_connection(); // connection is now bad.
1317  _status_entry._status_code = SC_socks_invalid_version;
1318  _state = S_try_next_proxy;
1319  return false;
1320  }
1321 
1322  if (reply[1] != 0x00) {
1323  downloader_cat.info()
1324  << _NOTIFY_HTTP_CHANNEL_ID
1325  << "Connection refused, SOCKS code " << (int)reply[1] << "\n";
1326  /*
1327  Socks error codes (from RFC1928):
1328  o X'00' succeeded
1329  o X'01' general SOCKS server failure
1330  o X'02' connection not allowed by ruleset
1331  o X'03' Network unreachable
1332  o X'04' Host unreachable
1333  o X'05' Connection refused
1334  o X'06' TTL expired
1335  o X'07' Command not supported
1336  o X'08' Address type not supported
1337  o X'09' to X'FF' unassigned
1338  */
1339 
1340  switch (reply[1]) {
1341  case 0x03:
1342  case 0x04:
1343  case 0x05:
1344  case 0x06:
1345  // These generally mean the same thing: the SOCKS proxy tried, but
1346  // couldn't reach the host.
1347  _status_entry._status_code = SC_socks_no_connection;
1348  break;
1349 
1350  default:
1351  _status_entry._status_code = SC_socks_refused;
1352  }
1353 
1354  close_connection(); // connection is now bad.
1355  _state = S_try_next_proxy;
1356  return false;
1357  }
1358 
1359  // Now put those bytes back, and get five bytes of the reply.
1360  _working_get = reply;
1361  if (!server_get_failsafe(reply, 5)) {
1362  return true;
1363  }
1364 
1365  // Figure out how many bytes total we will expect for the reply.
1366  int total_bytes = 6;
1367 
1368  switch (reply[3]) {
1369  case 0x01: // IPv4
1370  total_bytes += 4;
1371  break;
1372 
1373  case 0x03: // DNS
1374  total_bytes += (unsigned int)reply[4];
1375  break;
1376 
1377  case 0x04: // IPv6
1378  total_bytes += 16;
1379  break;
1380 
1381  default:
1382  downloader_cat.info()
1383  << _NOTIFY_HTTP_CHANNEL_ID
1384  << "Unsupported SOCKS address type: " << (int)reply[3] << "\n";
1385  _status_entry._status_code = SC_socks_invalid_version;
1386  _state = S_try_next_proxy;
1387  return false;
1388  }
1389 
1390  // Now put back the bytes we've read so far, and get the rest of them.
1391  _working_get = reply;
1392  if (!server_get_failsafe(reply, total_bytes)) {
1393  return true;
1394  }
1395 
1396  if (downloader_cat.is_debug()) {
1397  // Finally, we can decode the whole thing.
1398  string connect_host;
1399 
1400  switch (reply[3]) {
1401  case 0x01: // IPv4
1402  {
1403  ostringstream strm;
1404  strm << (unsigned int)(unsigned char)reply[4] << "."
1405  << (unsigned int)(unsigned char)reply[5] << "."
1406  << (unsigned int)(unsigned char)reply[6] << "."
1407  << (unsigned int)(unsigned char)reply[7];
1408  connect_host = strm.str();
1409  }
1410  break;
1411 
1412  case 0x03: // DNS
1413  connect_host = string(&reply[5], (unsigned int)reply[4]);
1414  break;
1415 
1416  case 0x04: // IPv6
1417  {
1418  char buf[48];
1419  sprintf(buf, "[%02hhx%02hhx:%02hhx%02hhx:%02hhx%02hhx:%02hhx%02hhx"
1420  ":%02hhx%02hhx:%02hhx%02hhx:%02hhx%02hhx:%02hhx%02hhx]",
1421  reply[4], reply[5], reply[6], reply[7], reply[8], reply[9],
1422  reply[10], reply[11], reply[12], reply[13], reply[14],
1423  reply[15], reply[16], reply[17], reply[18], reply[19]);
1424  total_bytes += 16;
1425  }
1426  break;
1427  }
1428 
1429  int connect_port =
1430  (((unsigned int)(unsigned char)reply[total_bytes - 2]) << 8) |
1431  ((unsigned int)(unsigned char)reply[total_bytes - 1]);
1432 
1433  downloader_cat.debug()
1434  << _NOTIFY_HTTP_CHANNEL_ID
1435  << _proxy << " directed us to " << connect_host << ":"
1436  << connect_port << "\n";
1437  }
1438 
1439  if (_want_ssl) {
1440  _state = S_setup_ssl;
1441  } else {
1442  _state = S_ready;
1443  }
1444 
1445  return false;
1446 }
1447 
1448 /**
1449  * This state begins elevating our existing, unsecure connection to a secure,
1450  * SSL connection.
1451  */
1452 bool HTTPChannel::
1453 run_setup_ssl() {
1454  _sbio = BIO_new_ssl(_client->get_ssl_ctx(), true);
1455  BIO_push(_sbio, *_bio);
1456 
1457  SSL *ssl = nullptr;
1458  BIO_get_ssl(_sbio, &ssl);
1459  nassertr(ssl != nullptr, false);
1460 
1461  // We only take one word at a time from the _cipher_list. If that
1462  // connection fails, then we take the next word.
1463  string cipher_list = _cipher_list;
1464  if (!cipher_list.empty()) {
1465  size_t space = cipher_list.find(" ");
1466  if (space != string::npos) {
1467  cipher_list = cipher_list.substr(0, space);
1468  }
1469  }
1470 
1471  if (downloader_cat.is_debug()) {
1472  downloader_cat.debug()
1473  << _NOTIFY_HTTP_CHANNEL_ID
1474  << "Setting ssl-cipher-list '" << cipher_list << "'\n";
1475  }
1476  int result = SSL_set_cipher_list(ssl, cipher_list.c_str());
1477  if (result == 0) {
1478  downloader_cat.error()
1479  << _NOTIFY_HTTP_CHANNEL_ID
1480  << "Invalid cipher list: '" << cipher_list << "'\n";
1481  OpenSSLWrapper::get_global_ptr()->notify_ssl_errors();
1482  _status_entry._status_code = SC_ssl_internal_failure;
1483  _state = S_failure;
1484  return false;
1485  }
1486 
1487  string hostname = _request.get_url().get_server();
1488  result = SSL_set_tlsext_host_name(ssl, hostname.c_str());
1489  if (result == 0) {
1490  downloader_cat.error()
1491  << _NOTIFY_HTTP_CHANNEL_ID
1492  << "Could not set TLS SNI hostname to '" << hostname << "'\n";
1493  }
1494 
1495 /*
1496  * It would be nice to use something like SSL_set_client_cert_cb() here to set
1497  * a callback to provide the certificate should it be requested, or even to
1498  * potentially provide any of a number of certificates according to the
1499  * server's CA presented, but that interface as provided by OpenSSL is broken
1500  * since there's no way to pass additional data to the callback function (and
1501  * hence no way to tie it back to the HTTPChannel object, other than by
1502  * building a messy mapping of SSL pointers back to HTTPChannel pointers).
1503  */
1504  if (_client->load_client_certificate()) {
1505  SSL_use_certificate(ssl, _client->_client_certificate_pub);
1506  SSL_use_PrivateKey(ssl, _client->_client_certificate_priv);
1507  if (!SSL_check_private_key(ssl)) {
1508  downloader_cat.warning()
1509  << _NOTIFY_HTTP_CHANNEL_ID
1510  << "Client private key does not match public key!\n";
1511  }
1512  }
1513 
1514  if (downloader_cat.is_spam()) {
1515  downloader_cat.spam()
1516  << _NOTIFY_HTTP_CHANNEL_ID
1517  << "SSL Ciphers available:\n";
1518  const char *name;
1519  int pri = 0;
1520  name = SSL_get_cipher_list(ssl, pri);
1521  while (name != nullptr) {
1522  downloader_cat.spam()
1523  << _NOTIFY_HTTP_CHANNEL_ID
1524  << " " << pri + 1 << ". " << name << "\n";
1525  pri++;
1526  name = SSL_get_cipher_list(ssl, pri);
1527  }
1528  }
1529 
1530  if (downloader_cat.is_debug()) {
1531  downloader_cat.debug()
1532  << _NOTIFY_HTTP_CHANNEL_ID
1533  << "performing SSL handshake\n";
1534  }
1535  _state = S_ssl_handshake;
1536 
1537  // We start the connect timer over again when we reach the SSL handshake.
1538  _started_connecting_time =
1539  TrueClock::get_global_ptr()->get_short_time();
1540 
1541  return false;
1542 }
1543 
1544 /**
1545  * This state performs the SSL handshake with the server, and also verifies
1546  * the server's identity when the handshake has successfully completed.
1547  */
1548 bool HTTPChannel::
1549 run_ssl_handshake() {
1550  if (BIO_do_handshake(_sbio) <= 0) {
1551  if (BIO_should_retry(_sbio)) {
1552  double elapsed =
1553  TrueClock::get_global_ptr()->get_short_time() -
1554  _started_connecting_time;
1555  if (elapsed <= get_connect_timeout() + _extra_ssl_handshake_time) {
1556  // Keep trying.
1557  return true;
1558  }
1559  // Time to give up on the handshake.
1560  }
1561 
1562  downloader_cat.info()
1563  << _NOTIFY_HTTP_CHANNEL_ID
1564  << "Could not establish SSL handshake with "
1565  << _request.get_url().get_server_and_port() << "\n";
1566  OpenSSLWrapper::get_global_ptr()->notify_ssl_errors();
1567 
1568  // It seems to be an error to free sbio at this point; perhaps it's
1569  // already been freed?
1570 
1571  if (!_cipher_list.empty()) {
1572  // If we've got another cipher to try, do so.
1573  size_t space = _cipher_list.find(" ");
1574  if (space != string::npos) {
1575  while (space < _cipher_list.length() && _cipher_list[space] == ' ') {
1576  ++space;
1577  }
1578  _cipher_list = _cipher_list.substr(space);
1579  if (!_cipher_list.empty()) {
1580  close_connection();
1581  reconsider_proxy();
1582  _state = S_connecting;
1583  return false;
1584  }
1585  }
1586  }
1587 
1588  // All done trying ciphers; they all failed.
1589  _cipher_list = _client->get_cipher_list();
1590  _status_entry._status_code = SC_ssl_no_handshake;
1591  _state = S_failure;
1592  return false;
1593  }
1594 
1595  SSL *ssl = nullptr;
1596  BIO_get_ssl(_sbio, &ssl);
1597  nassertr(ssl != nullptr, false);
1598 
1599  if (!_nonblocking) {
1600  SSL_set_mode(ssl, SSL_MODE_AUTO_RETRY);
1601  }
1602 
1603  const SSL_CIPHER *cipher = SSL_get_current_cipher(ssl);
1604  if (cipher == nullptr) {
1605  downloader_cat.warning()
1606  << _NOTIFY_HTTP_CHANNEL_ID
1607  << "No current cipher on SSL connection.\n";
1608  } else {
1609  if (downloader_cat.is_debug()) {
1610  downloader_cat.debug()
1611  << _NOTIFY_HTTP_CHANNEL_ID
1612  << "Using cipher " << SSL_CIPHER_get_name((SSL_CIPHER *) cipher) << "\n";
1613  }
1614  }
1615 
1616  // Now that we've made an SSL handshake, we can use the SSL bio to do all of
1617  // our communication henceforth.
1618  _bio->set_bio(_sbio);
1619  _sbio = nullptr;
1620 
1621  X509 *cert = SSL_get_peer_certificate(ssl);
1622  if (cert == nullptr) {
1623  downloader_cat.info()
1624  << _NOTIFY_HTTP_CHANNEL_ID
1625  << "No certificate was presented by server.\n";
1626 
1627  // This shouldn't be possible, per the SSL specs.
1628  _status_entry._status_code = SC_ssl_invalid_server_certificate;
1629  _state = S_failure;
1630  return false;
1631  }
1632 
1633  X509_NAME *subject = X509_get_subject_name(cert);
1634  if (downloader_cat.is_debug()) {
1635  string org_name = get_x509_name_component(subject, NID_organizationName);
1636  string org_unit_name = get_x509_name_component(subject, NID_organizationalUnitName);
1637  string common_name = get_x509_name_component(subject, NID_commonName);
1638 
1639  downloader_cat.debug()
1640  << _NOTIFY_HTTP_CHANNEL_ID
1641  << "Server is " << common_name << " from " << org_unit_name
1642  << " / " << org_name << "\n";
1643 
1644  if (downloader_cat.is_spam()) {
1645  downloader_cat.spam()
1646  << _NOTIFY_HTTP_CHANNEL_ID
1647  << "Received certificate from server:\n" << std::flush;
1648  X509_print_fp(stderr, cert);
1649  fflush(stderr);
1650  }
1651  }
1652 
1653  bool cert_preapproved = false;
1654  bool cert_name_preapproved = false;
1655  check_preapproved_server_certificate(cert, cert_preapproved, cert_name_preapproved);
1656 
1657  // Now verify the server certificate is valid.
1658  long verify_result = SSL_get_verify_result(ssl);
1659  bool cert_valid = true;
1660 
1661  if (verify_result == X509_V_ERR_CERT_HAS_EXPIRED) {
1662  downloader_cat.info()
1663  << _NOTIFY_HTTP_CHANNEL_ID
1664  << "Expired certificate from " << _request.get_url().get_server_and_port() << "\n";
1665  if (_client->get_verify_ssl() == HTTPClient::VS_normal && !cert_preapproved) {
1666  cert_valid = false;
1667  }
1668 
1669  } else if (verify_result == X509_V_ERR_CERT_NOT_YET_VALID) {
1670  downloader_cat.info()
1671  << _NOTIFY_HTTP_CHANNEL_ID
1672  << "Premature certificate from " << _request.get_url().get_server_and_port() << "\n";
1673  if (_client->get_verify_ssl() == HTTPClient::VS_normal && !cert_preapproved) {
1674  cert_valid = false;
1675  }
1676 
1677  } else if (verify_result == X509_V_ERR_DEPTH_ZERO_SELF_SIGNED_CERT ||
1678  verify_result == X509_V_ERR_SELF_SIGNED_CERT_IN_CHAIN) {
1679  downloader_cat.info()
1680  << _NOTIFY_HTTP_CHANNEL_ID
1681  << "Self-signed certificate from " << _request.get_url().get_server_and_port() << "\n";
1682  if (_client->get_verify_ssl() != HTTPClient::VS_no_verify && !cert_preapproved) {
1683  cert_valid = false;
1684  }
1685 
1686  } else if (verify_result != X509_V_OK) {
1687  downloader_cat.info()
1688  << _NOTIFY_HTTP_CHANNEL_ID
1689  << "Unable to verify identity of " << _request.get_url().get_server_and_port()
1690  << ", verify error code " << verify_result << "\n";
1691  if (_client->get_verify_ssl() != HTTPClient::VS_no_verify && !cert_preapproved) {
1692  cert_valid = false;
1693  }
1694  }
1695 
1696  if (!cert_valid) {
1697  _status_entry._status_code = SC_ssl_invalid_server_certificate;
1698  _state = S_failure;
1699  return false;
1700  }
1701 
1702  if (_client->get_verify_ssl() != HTTPClient::VS_no_verify && !cert_name_preapproved) {
1703  // Check that the server is someone we expected to be talking to.
1704  if (!validate_server_name(cert)) {
1705  _status_entry._status_code = SC_ssl_unexpected_server;
1706  _state = S_failure;
1707  return false;
1708  }
1709  }
1710 
1711  X509_free(cert);
1712 
1713  _state = S_ready;
1714  return false;
1715 }
1716 
1717 /**
1718  * This is the main "ready" state. In this state, we have established a
1719  * (possibly secure) connection to the server (or proxy), and the server (or
1720  * proxy) is idle and waiting for us to send a request.
1721  *
1722  * If persistent_connection is true, we will generally come back to this state
1723  * after finishing each request on a given connection.
1724  */
1725 bool HTTPChannel::
1726 run_ready() {
1727  // If there's a request to be sent upstream, send it now.
1728  if (!_request_text.empty()) {
1729  if (!server_send(_request_text, false)) {
1730  return true;
1731  }
1732  }
1733 
1734  // All done sending request.
1735  _state = S_request_sent;
1736  _sent_request_time = TrueClock::get_global_ptr()->get_short_time();
1737  return false;
1738 }
1739 
1740 /**
1741  * In this state we have sent our request to the server (or proxy) and we are
1742  * waiting for a response.
1743  */
1744 bool HTTPChannel::
1745 run_request_sent() {
1746  // Wait for the first line to come back from the server.
1747  string line;
1748  if (!server_getline_failsafe(line)) {
1749  return true;
1750  }
1751 
1752  // Skip unexpected blank lines. We're just being generous here.
1753  while (line.empty()) {
1754  if (!server_getline_failsafe(line)) {
1755  return true;
1756  }
1757  }
1758 
1759  if (!parse_http_response(line)) {
1760  // Not an HTTP response. _state is already set appropriately.
1761  return false;
1762  }
1763 
1764  _state = S_reading_header;
1765  _current_field_name = string();
1766  _current_field_value = string();
1767  _headers.clear();
1768  _got_file_size = false;
1769  _got_transfer_file_size = false;
1770  return false;
1771 }
1772 
1773 /**
1774  * In this state we have received the first response to our request from the
1775  * server (or proxy) and we are reading the set of header lines preceding the
1776  * requested document.
1777  */
1778 bool HTTPChannel::
1779 run_reading_header() {
1780  if (parse_http_header()) {
1781  if (_bio.is_null()) {
1782  downloader_cat.info()
1783  << _NOTIFY_HTTP_CHANNEL_ID
1784  << "Connection lost while reading HTTP response.\n";
1785  if (_response_type == RT_http_hangup) {
1786  // This was our second hangup in a row. Give up.
1787  _status_entry._status_code = SC_lost_connection;
1788  _state = S_try_next_proxy;
1789 
1790  } else {
1791  // Try again, once.
1792  _response_type = RT_http_hangup;
1793  }
1794 
1795  } else {
1796  double elapsed =
1797  TrueClock::get_global_ptr()->get_short_time() -
1798  _sent_request_time;
1799  if (elapsed > get_http_timeout()) {
1800  // Time to give up.
1801  downloader_cat.info()
1802  << _NOTIFY_HTTP_CHANNEL_ID
1803  << "Timeout waiting for "
1804  << _request.get_url().get_server_and_port()
1805  << " in run_reading_header (" << elapsed
1806  << " seconds elapsed).\n";
1807  _status_entry._status_code = SC_timeout;
1808  _state = S_try_next_proxy;
1809  }
1810  }
1811  return true;
1812  }
1813  _response_type = RT_http_complete;
1814 
1815  // Ok, we've established an HTTP connection to the server. Our extra send
1816  // headers have done their job; clear them for next time.
1817  clear_extra_headers();
1818 
1819  _server_response_has_no_body =
1820  (get_status_code() / 100 == 1 ||
1821  get_status_code() == 204 ||
1822  get_status_code() == 304 ||
1823  _method == HTTPEnum::M_head);
1824 
1825  // Look for key properties in the header fields.
1826  if (get_status_code() == 206) {
1827  string content_range = get_header_value("Content-Range");
1828  if (content_range.empty()) {
1829  downloader_cat.warning()
1830  << _NOTIFY_HTTP_CHANNEL_ID
1831  << "Got 206 response without Content-Range header!\n";
1832  _status_entry._status_code = SC_invalid_http;
1833  _state = S_failure;
1834  return false;
1835 
1836  } else {
1837  if (!parse_content_range(content_range)) {
1838  downloader_cat.warning()
1839  << _NOTIFY_HTTP_CHANNEL_ID
1840  << "Couldn't parse Content-Range: " << content_range << "\n";
1841  _status_entry._status_code = SC_invalid_http;
1842  _state = S_failure;
1843  return false;
1844  }
1845  }
1846 
1847  } else {
1848  _first_byte_delivered = 0;
1849  _last_byte_delivered = 0;
1850  }
1851  if (downloader_cat.is_debug()) {
1852  if (_first_byte_requested != 0 || _last_byte_requested != 0 ||
1853  _first_byte_delivered != 0 || _last_byte_delivered != 0) {
1854  downloader_cat.debug()
1855  << _NOTIFY_HTTP_CHANNEL_ID
1856  << "Requested byte range " << _first_byte_requested
1857  << " to " << _last_byte_delivered
1858  << "; server delivers range " << _first_byte_delivered
1859  << " to " << _last_byte_delivered
1860  << "\n";
1861  }
1862  }
1863 
1864  // Set the _document_spec to reflect what we just retrieved.
1865  _document_spec = DocumentSpec(_request.get_url());
1866  string tag = get_header_value("ETag");
1867  if (!tag.empty()) {
1868  _document_spec.set_tag(HTTPEntityTag(tag));
1869  }
1870  string date = get_header_value("Last-Modified");
1871  if (!date.empty()) {
1872  _document_spec.set_date(HTTPDate(date));
1873  }
1874 
1875  // In case we've got a download in effect, now we know what the first byte
1876  // of the subdocument request will be, so we can open the file and position
1877  // it.
1878  if (_server_response_has_no_body) {
1879  // Never mind on the download.
1880  reset_download_to();
1881  }
1882 
1883  if (!open_download_file()) {
1884  return false;
1885  }
1886 
1887  _got_expected_file_size = false;
1888  _got_file_size = false;
1889  _got_transfer_file_size = false;
1890 
1891  string content_length = get_header_value("Content-Length");
1892  if (!content_length.empty()) {
1893  _file_size = atoi(content_length.c_str());
1894  _got_file_size = true;
1895 
1896  } else if (get_status_code() == 206) {
1897  // Well, we didn't get a content-length from the server, but we can infer
1898  // the number of bytes based on the range we're given.
1899  _file_size = _last_byte_delivered - _first_byte_delivered + 1;
1900  _got_file_size = true;
1901  }
1902  _redirect = get_header_value("Location");
1903 
1904  // The server might have given us just a filename for the redirect. In that
1905  // case, it's relative to the same server. If it's a relative path, it's
1906  // relative to the same directory.
1907  if (_redirect.has_path() && !_redirect.has_authority()) {
1908  URLSpec url = _document_spec.get_url();
1909  Filename path = _redirect.get_path();
1910  if (path.is_local()) {
1911  Filename rel_to = Filename(url.get_path()).get_dirname();
1912  _redirect.set_path(Filename(rel_to, path));
1913  }
1914  _redirect.set_scheme(url.get_scheme());
1915  _redirect.set_authority(url.get_authority());
1916  }
1917 
1918  _state = S_read_header;
1919 
1920  if (_server_response_has_no_body && will_close_connection()) {
1921  // If the server said it will close the connection, we should close it
1922  // too.
1923  close_connection();
1924  }
1925 
1926  // Handle automatic retries and redirects.
1927  int last_status = _last_status_code;
1928  _last_status_code = get_status_code();
1929 
1930  if (get_status_code() == 407 && last_status != 407 && !_proxy.empty()) {
1931  // 407: not authorized to proxy. Try to get the authorization.
1932  string authenticate_request = get_header_value("Proxy-Authenticate");
1933  _proxy_auth =
1934  _client->generate_auth(_proxy, true, authenticate_request);
1935  if (_proxy_auth != nullptr) {
1936  _proxy_realm = _proxy_auth->get_realm();
1937  _proxy_username = _client->select_username(_proxy, true, _proxy_realm);
1938  if (!_proxy_username.empty()) {
1939  make_request_text();
1940 
1941  // Roll the state forward to force a new request.
1942  _state = S_begin_body;
1943  return false;
1944  }
1945  }
1946  }
1947 
1948  if (get_status_code() == 401 && last_status != 401) {
1949  // 401: not authorized to remote server. Try to get the authorization.
1950  string authenticate_request = get_header_value("WWW-Authenticate");
1951  _www_auth = _client->generate_auth(_request.get_url(), false, authenticate_request);
1952  if (_www_auth != nullptr) {
1953  _www_realm = _www_auth->get_realm();
1954  _www_username = _client->select_username(_request.get_url(), false, _www_realm);
1955  if (!_www_username.empty()) {
1956  make_request_text();
1957 
1958  // Roll the state forward to force a new request.
1959  _state = S_begin_body;
1960  return false;
1961  }
1962  }
1963  }
1964 
1965  if ((get_status_code() == 300 ||
1966  get_status_code() == 301 ||
1967  get_status_code() == 302 ||
1968  get_status_code() == 303 ||
1969  get_status_code() == 307) && !get_redirect().empty()) {
1970  // Redirect. Should we handle it automatically?
1971 
1972  // According to the letter of RFC 2616, 301 and 302 responses to POST
1973  // requests must not be automatically redirected without confirmation by
1974  // the user. In reality, browsers do allow automatic redirection of these
1975  // responses, changing the POST to a GET, and we reproduce this behavior
1976  // here.
1977  if (_method == HTTPEnum::M_post) {
1978  _method = HTTPEnum::M_get;
1979  _body = string();
1980  }
1981 
1982  if (_method == HTTPEnum::M_get || _method == HTTPEnum::M_head) {
1983  // Sure!
1984  URLSpec new_url = get_redirect();
1985  if (find(_redirect_trail.begin(), _redirect_trail.end(),
1986  new_url) != _redirect_trail.end()) {
1987  downloader_cat.warning()
1988  << _NOTIFY_HTTP_CHANNEL_ID
1989  << "cycle detected in redirect to " << new_url << "\n";
1990 
1991  } else {
1992  _redirect_trail.push_back(new_url);
1993 
1994  if (downloader_cat.is_debug()) {
1995  downloader_cat.debug()
1996  << _NOTIFY_HTTP_CHANNEL_ID
1997  << "following redirect to " << new_url << "\n";
1998  }
1999  if (_request.get_url().has_username()) {
2000  new_url.set_username(_request.get_url().get_username());
2001  }
2002  reset_url(_request.get_url(), new_url);
2003  _request.set_url(new_url);
2004  _want_ssl = _request.get_url().is_ssl();
2005  reconsider_proxy();
2006  make_header();
2007  make_request_text();
2008 
2009  // Roll the state forward to force a new request.
2010  _state = S_begin_body;
2011  return false;
2012  }
2013  }
2014  }
2015 
2016  if (_state == S_read_header &&
2017  ((get_status_code() / 100) == 4 || (get_status_code() / 100) == 5) &&
2018  _proxy_serves_document && _proxy_next_index < _proxies.size()) {
2019  // If we were using a proxy (but not tunneling through the proxy) and we
2020  // got some kind of a server error, try the next proxy in sequence (if we
2021  // have one). This handles the case of a working proxy that cannot see
2022  // the host (and so returns 504 or something along those lines). Some
2023  // proxies are so broken they return a 404 in this case, so we have to
2024  // consider that along the same lines.
2025  _state = S_try_next_proxy;
2026  return false;
2027  }
2028 
2029  // Otherwise, we're good to go.
2030  return false;
2031 }
2032 
2033 /**
2034  * This is the first state when reading a file:// URL. All it does is skip
2035  * past the non-existent "header".
2036  */
2037 bool HTTPChannel::
2038 run_start_direct_file_read() {
2039  _state = S_read_header;
2040  if (!open_download_file()) {
2041  return false;
2042  }
2043  return false;
2044 }
2045 
2046 /**
2047  * In this state we have completely read the header lines returned by the
2048  * server (or proxy) in response to our request. This state represents the
2049  * normal stopping point of a call to get_document(), etc.; further reads will
2050  * return the body of the request, the requested document.
2051  *
2052  * Normally run_read_header() is not called unless the user has elected not to
2053  * read the returned document himself. In fact, the state itself only exists
2054  * so we can make a distinction between S_read_header and S_begin_body, where
2055  * S_read_header is safe to return to the user and S_begin_body means we need
2056  * to start skipping the document.
2057  */
2058 bool HTTPChannel::
2059 run_read_header() {
2060  _state = S_begin_body;
2061  return false;
2062 }
2063 
2064 /**
2065  * This state begins to skip over the body in preparation for making a new
2066  * request.
2067  */
2068 bool HTTPChannel::
2069 run_begin_body() {
2070  if (will_close_connection()) {
2071  // If the socket will close anyway, no point in skipping past the previous
2072  // body; just reset.
2073  if (downloader_cat.is_debug()) {
2074  downloader_cat.debug()
2075  << _NOTIFY_HTTP_CHANNEL_ID
2076  << "resetting to begin body; server would close anyway.\n";
2077  }
2078  reset_to_new();
2079  return false;
2080  }
2081 
2082  if (_server_response_has_no_body) {
2083  // We have already "read" the nonexistent body.
2084  _state = S_read_trailer;
2085 
2086  } else if (get_file_size() > (int)_skip_body_size) {
2087  // If we know the size of the body we are about to skip and it's too
2088  // large, then don't bother skipping it--just drop the connection and get
2089  // a new one.
2090  if (downloader_cat.is_debug()) {
2091  downloader_cat.debug()
2092  << _NOTIFY_HTTP_CHANNEL_ID
2093  << "Dropping connection rather than skipping past "
2094  << get_file_size() << " bytes.\n";
2095  }
2096  reset_to_new();
2097 
2098  } else {
2099  open_read_body();
2100  if (_body_stream == nullptr) {
2101  if (downloader_cat.is_debug()) {
2102  downloader_cat.debug()
2103  << _NOTIFY_HTTP_CHANNEL_ID
2104  << "Unable to skip body.\n";
2105  }
2106  reset_to_new();
2107 
2108  } else {
2109  _owns_body_stream = true;
2110  if (_state != S_reading_body) {
2111  reset_body_stream();
2112  }
2113  }
2114  }
2115 
2116  return false;
2117 }
2118 
2119 /**
2120  * In this state we are in the process of reading the response's body. We
2121  * will only come to this function if the user did not choose to read the
2122  * entire body himself (by calling open_read_body()).
2123  *
2124  * In this case we should skip past the body to reset the connection for
2125  * making a new request.
2126  */
2127 bool HTTPChannel::
2128 run_reading_body() {
2129  if (will_close_connection()) {
2130  // If the socket will close anyway, no point in skipping past the previous
2131  // body; just reset.
2132  if (downloader_cat.is_debug()) {
2133  downloader_cat.debug()
2134  << _NOTIFY_HTTP_CHANNEL_ID
2135  << "resetting to read body; server would close anyway.\n";
2136  }
2137  reset_to_new();
2138  return false;
2139  }
2140 
2141  // Skip the body we've already started.
2142  if (_body_stream == nullptr || !_owns_body_stream) {
2143  // Whoops, we're not in skip-body mode. Better reset.
2144  if (downloader_cat.is_debug()) {
2145  downloader_cat.debug()
2146  << _NOTIFY_HTTP_CHANNEL_ID
2147  << "resetting, not in skip-body mode.\n";
2148  }
2149  reset_to_new();
2150  return false;
2151  }
2152 
2153  string line;
2154  std::getline(*_body_stream, line);
2155  while (!_body_stream->fail() && !_body_stream->eof()) {
2156  if (downloader_cat.is_spam()) {
2157  downloader_cat.spam()
2158  << _NOTIFY_HTTP_CHANNEL_ID
2159  << "skip: " << line << "\n";
2160  }
2161  std::getline(*_body_stream, line);
2162  }
2163 
2164  if (!_body_stream->is_closed()) {
2165  // There's more to come later.
2166  return true;
2167  }
2168 
2169  reset_body_stream();
2170 
2171  // This should have been set by the call to finished_body(), above.
2172  nassertr(_state != S_reading_body, false);
2173  return false;
2174 }
2175 
2176 /**
2177  * In this state we have completely read (or skipped over) the body of the
2178  * response. We should continue skipping past the trailer following the body.
2179  *
2180  * Not all bodies come with trailers; in particular, the "identity" transfer
2181  * encoding does not include a trailer. It is therefore the responsibility of
2182  * the IdentityStreamBuf or ChunkedStreamBuf to set the state appropriately to
2183  * either S_read_body or S_read_trailer following the completion of the body.
2184  */
2185 bool HTTPChannel::
2186 run_read_body() {
2187  if (will_close_connection()) {
2188  // If the socket will close anyway, no point in skipping past the previous
2189  // body; just reset.
2190  if (downloader_cat.is_debug()) {
2191  downloader_cat.debug()
2192  << _NOTIFY_HTTP_CHANNEL_ID
2193  << "resetting to read body; server would close anyway.\n";
2194  }
2195  reset_to_new();
2196  return false;
2197  }
2198  // Skip the trailer following the recently-read body.
2199 
2200  string line;
2201  if (!server_getline(line)) {
2202  return true;
2203  }
2204  while (!line.empty()) {
2205  if (!server_getline(line)) {
2206  return true;
2207  }
2208  }
2209 
2210  _state = S_read_trailer;
2211  return false;
2212 }
2213 
2214 /**
2215  * In this state we have completely read the body and the trailer. This state
2216  * is simply a pass-through back to S_ready.
2217  */
2218 bool HTTPChannel::
2219 run_read_trailer() {
2220  if (will_close_connection()) {
2221  // If the socket will close anyway, no point in skipping past the previous
2222  // body; just reset.
2223  if (downloader_cat.is_debug()) {
2224  downloader_cat.debug()
2225  << _NOTIFY_HTTP_CHANNEL_ID
2226  << "resetting to read trailer; server would close anyway.\n";
2227  }
2228  reset_to_new();
2229  return false;
2230  }
2231 
2232  _state = S_ready;
2233  return false;
2234 }
2235 
2236 /**
2237  * After the headers, etc. have been read, this streams the download to the
2238  * named file.
2239  */
2240 bool HTTPChannel::
2241 run_download_to_file() {
2242  nassertr(_body_stream != nullptr && _owns_body_stream, false);
2243 
2244  bool do_throttle = _wanted_nonblocking && _download_throttle;
2245 
2246  static const size_t buffer_size = 4096;
2247  char buffer[buffer_size];
2248 
2249  size_t remaining_this_pass = buffer_size;
2250  if (do_throttle) {
2251  remaining_this_pass = _bytes_per_update;
2252  }
2253 
2254  _body_stream->read(buffer, min(buffer_size, remaining_this_pass));
2255  size_t count = _body_stream->gcount();
2256  while (count != 0) {
2257  _download_to_stream->write(buffer, count);
2258  _bytes_downloaded += count;
2259  if (do_throttle) {
2260  nassertr(count <= remaining_this_pass, false);
2261  remaining_this_pass -= count;
2262  if (remaining_this_pass == 0) {
2263  // That's enough for now.
2264  return true;
2265  }
2266  }
2267 
2268  thread_consider_yield();
2269  _body_stream->read(buffer, min(buffer_size, remaining_this_pass));
2270  count = _body_stream->gcount();
2271  }
2272 
2273  if (_download_to_stream->fail()) {
2274  downloader_cat.warning()
2275  << _NOTIFY_HTTP_CHANNEL_ID
2276  << "Error writing to " << _download_to_filename << "\n";
2277  _status_entry._status_code = SC_download_write_error;
2278  _state = S_failure;
2279  reset_download_to();
2280  return false;
2281  }
2282 
2283  _download_to_stream->flush();
2284 
2285  if (_body_stream->is_closed()) {
2286  // Done.
2287  reset_body_stream();
2288  close_download_stream();
2289  _started_download = false;
2290  return false;
2291  } else {
2292  // More to come.
2293  return true;
2294  }
2295 }
2296 
2297 /**
2298  * After the headers, etc. have been read, this streams the download to the
2299  * specified Ramfile object.
2300  */
2301 bool HTTPChannel::
2302 run_download_to_ram() {
2303  nassertr(_body_stream != nullptr && _owns_body_stream, false);
2304  nassertr(_download_to_ramfile != nullptr, false);
2305 
2306  bool do_throttle = _wanted_nonblocking && _download_throttle;
2307 
2308  static const size_t buffer_size = 4096;
2309  char buffer[buffer_size];
2310 
2311  size_t remaining_this_pass = buffer_size;
2312  if (do_throttle) {
2313  remaining_this_pass = _bytes_per_update;
2314  }
2315 
2316  _body_stream->read(buffer, min(buffer_size, remaining_this_pass));
2317  size_t count = _body_stream->gcount();
2318  while (count != 0) {
2319  _download_to_ramfile->_data += string(buffer, count);
2320  _bytes_downloaded += count;
2321  if (do_throttle) {
2322  nassertr(count <= remaining_this_pass, false);
2323  remaining_this_pass -= count;
2324  if (remaining_this_pass == 0) {
2325  // That's enough for now.
2326  return true;
2327  }
2328  }
2329 
2330  thread_consider_yield();
2331  _body_stream->read(buffer, min(buffer_size, remaining_this_pass));
2332  count = _body_stream->gcount();
2333  }
2334 
2335  if (_body_stream->is_closed()) {
2336  // Done.
2337  reset_body_stream();
2338  close_download_stream();
2339  _started_download = false;
2340  return false;
2341  } else {
2342  // More to come.
2343  return true;
2344  }
2345 }
2346 
2347 /**
2348  * After the headers, etc. have been read, this streams the download to the
2349  * named file.
2350  */
2351 bool HTTPChannel::
2352 run_download_to_stream() {
2353  nassertr(_body_stream != nullptr && _owns_body_stream, false);
2354 
2355  bool do_throttle = _wanted_nonblocking && _download_throttle;
2356 
2357  static const size_t buffer_size = 4096;
2358  char buffer[buffer_size];
2359 
2360  size_t remaining_this_pass = buffer_size;
2361  if (do_throttle) {
2362  remaining_this_pass = _bytes_per_update;
2363  }
2364 
2365  _body_stream->read(buffer, min(buffer_size, remaining_this_pass));
2366  size_t count = _body_stream->gcount();
2367  while (count != 0) {
2368  _download_to_stream->write(buffer, count);
2369  _bytes_downloaded += count;
2370  if (do_throttle) {
2371  nassertr(count <= remaining_this_pass, false);
2372  remaining_this_pass -= count;
2373  if (remaining_this_pass == 0) {
2374  // That's enough for now.
2375  return true;
2376  }
2377  }
2378 
2379  thread_consider_yield();
2380  _body_stream->read(buffer, min(buffer_size, remaining_this_pass));
2381  count = _body_stream->gcount();
2382  }
2383 
2384  if (_download_to_stream->fail()) {
2385  downloader_cat.warning()
2386  << _NOTIFY_HTTP_CHANNEL_ID
2387  << "Error writing to stream\n";
2388  _status_entry._status_code = SC_download_write_error;
2389  _state = S_failure;
2390  reset_download_to();
2391  return false;
2392  }
2393 
2394  _download_to_stream->flush();
2395 
2396  if (_body_stream->is_closed()) {
2397  // Done.
2398  reset_body_stream();
2399  close_download_stream();
2400  _started_download = false;
2401  return false;
2402  } else {
2403  // More to come.
2404  return true;
2405  }
2406 }
2407 
2408 
2409 /**
2410  * Begins a new document request to the server, throwing away whatever request
2411  * was currently pending if necessary.
2412  */
2413 void HTTPChannel::
2414 begin_request(HTTPEnum::Method method, const DocumentSpec &url,
2415  const string &body, bool nonblocking,
2416  size_t first_byte, size_t last_byte) {
2417 
2418  downloader_cat.info()
2419  << _NOTIFY_HTTP_CHANNEL_ID
2420  << "begin " << method << " " << url << "\n";
2421 
2422  reset_for_new_request();
2423 
2424  _wanted_nonblocking = nonblocking;
2425 #if defined(HAVE_THREADS) && defined(SIMPLE_THREADS)
2426  // In the presence of SIMPLE_THREADS, we always use non-blocking IO. We
2427  // simulate blocking by yielding the thread.
2428  nonblocking = true;
2429 #endif
2430 
2431  // Get the set of proxies that are appropriate for this URL.
2432  _proxies.clear();
2433  _proxy_next_index = 0;
2434  if (get_allow_proxy()) {
2435  _client->get_proxies_for_url(url.get_url(), _proxies);
2436  }
2437 
2438  // If we still have a live connection to a proxy that is on the list, that
2439  // proxy should be moved immediately to the front of the list (to minimize
2440  // restarting connections unnecessarily).
2441  if (!_bio.is_null() && !_proxies.empty() && !_proxy.empty()) {
2442  Proxies::iterator pi = find(_proxies.begin(), _proxies.end(), _proxy);
2443  if (pi != _proxies.end()) {
2444  _proxies.erase(pi);
2445  _proxies.insert(_proxies.begin(), _proxy);
2446  }
2447  }
2448 
2449  URLSpec new_proxy;
2450  if (_proxy_next_index < _proxies.size()) {
2451  new_proxy = _proxies[_proxy_next_index];
2452  _proxy_next_index++;
2453  }
2454 
2455  // Changing the proxy is grounds for dropping the old connection, if any.
2456  if (_proxy != new_proxy) {
2457  _proxy = new_proxy;
2458  _proxy_auth = nullptr;
2459  if (downloader_cat.is_debug()) {
2460  downloader_cat.debug()
2461  << _NOTIFY_HTTP_CHANNEL_ID
2462  << "resetting to change proxy to " << _proxy << "\n";
2463  }
2464  reset_to_new();
2465  }
2466 
2467  // Ditto with changing the nonblocking state.
2468  if (_nonblocking != nonblocking) {
2469  _nonblocking = nonblocking;
2470  if (downloader_cat.is_debug()) {
2471  downloader_cat.debug()
2472  << _NOTIFY_HTTP_CHANNEL_ID
2473  << "resetting to change nonblocking state to " << _nonblocking << ".\n";
2474  }
2475  reset_to_new();
2476  }
2477 
2478  reset_url(_request.get_url(), url.get_url());
2479  _request = url;
2480  _document_spec = DocumentSpec();
2481  _method = method;
2482  _body = body;
2483 
2484  // An https-style request means we'll need to establish an SSL connection.
2485  _want_ssl = _request.get_url().is_ssl();
2486 
2487  _first_byte_requested = first_byte;
2488  _last_byte_requested = last_byte;
2489  _connect_count = 0;
2490 
2491  reconsider_proxy();
2492 
2493  // Reset from whatever previous request might still be pending.
2494  if (_request.get_url().get_scheme() == "file") {
2495  // A "file" URL just means we're reading a raw file. This only supports
2496  // actual disk files, not the VFS, because we use a BIO_new_file()
2497  // underneath this.
2498  reset_to_new();
2499  _bio = new BioPtr(_request.get_url());
2500  if (_bio->get_bio() != nullptr) {
2501  // Successfully opened the file.
2502  _source = new BioStreamPtr(new BioStream(_bio));
2503  _status_entry._status_code = 200;
2504  _state = S_start_direct_file_read;
2505 
2506  // Get the file size.
2507  FILE *fp = nullptr;
2508  BIO_get_fp(_bio->get_bio(), &fp);
2509  if (fp != nullptr) {
2510  if (fseek(fp, 0, SEEK_END) == 0) {
2511  _file_size = ftell(fp);
2512  _got_file_size = true;
2513  fseek(fp, 0, SEEK_SET);
2514  }
2515  }
2516 
2517  } else {
2518  // Couldn't read the file.
2519  OpenSSLWrapper::get_global_ptr()->notify_ssl_errors();
2520  _status_entry._status_code = SC_no_connection;
2521  _state = S_failure;
2522  }
2523 
2524  } else {
2525  // We're reading a normal network URL.
2526  if (_state == S_failure || (_state < S_read_header && _state != S_ready)) {
2527  if (downloader_cat.is_debug()) {
2528  downloader_cat.debug()
2529  << _NOTIFY_HTTP_CHANNEL_ID
2530  << "resetting to clear previous request.\n";
2531  }
2532  reset_to_new();
2533 
2534  } else if (TrueClock::get_global_ptr()->get_short_time() - _last_run_time >= _idle_timeout) {
2535  if (downloader_cat.is_debug()) {
2536  downloader_cat.debug()
2537  << _NOTIFY_HTTP_CHANNEL_ID
2538  << "resetting old connection: "
2539  << TrueClock::get_global_ptr()->get_short_time() - _last_run_time
2540  << " s old.\n";
2541  }
2542  reset_to_new();
2543 
2544  } else if (_state == S_read_header) {
2545  // Roll one step forwards to start skipping past the previous body.
2546  _state = S_begin_body;
2547  }
2548  }
2549 
2550  if (_method == HTTPEnum::M_connect) {
2551  _done_state = S_ready;
2552  } else {
2553  _done_state = S_read_header;
2554  }
2555 }
2556 
2557 /**
2558  * Reevaluates the flags and strings that are computed based on the particular
2559  * proxy we are attempting to connect to. This should be called when we
2560  * initiate a request, and also whenever we change proxies while processing a
2561  * request.
2562  */
2563 void HTTPChannel::
2564 reconsider_proxy() {
2565  _proxy_tunnel_now = false;
2566  _proxy_serves_document = false;
2567 
2568  if (!_proxy.empty()) {
2569  // If the user insists we always tunnel through a proxy, or if we're
2570  // opening an SSL connection, or the user has explicitly asked for a
2571  // direct connection of some kind, or if we have a SOCKS-style proxy; each
2572  // of these demands a tunnel through the proxy to speak directly to the
2573  // http server.
2574  _proxy_tunnel_now =
2575  (get_proxy_tunnel() || _want_ssl ||
2576  _method == HTTPEnum::M_connect || _proxy.get_scheme() == "socks");
2577 
2578  // Otherwise (but we still have a proxy), then we ask the proxy to hand us
2579  // the document.
2580  _proxy_serves_document = !_proxy_tunnel_now;
2581  }
2582 
2583  make_header();
2584  make_request_text();
2585 
2586  if (_proxy_tunnel_now) {
2587  // Maybe we need to tunnel through the proxy to connect to the server
2588  // directly.
2589  ostringstream request;
2590  request
2591  << "CONNECT " << _request.get_url().get_server_and_port()
2592  << " " << _client->get_http_version_string() << "\r\n";
2593  if (_client->get_http_version() >= HTTPEnum::HV_11) {
2594  request
2595  << "Host: " << _request.get_url().get_server_and_port() << "\r\n";
2596  }
2597  _proxy_header = request.str();
2598  make_proxy_request_text();
2599 
2600  } else {
2601  _proxy_header = string();
2602  _proxy_request_text = string();
2603  }
2604 }
2605 
2606 
2607 /**
2608  * Resets the internal state variables in preparation for beginning a new
2609  * request.
2610  */
2611 void HTTPChannel::
2612 reset_for_new_request() {
2613  if (downloader_cat.is_spam()) {
2614  downloader_cat.spam()
2615  << _NOTIFY_HTTP_CHANNEL_ID
2616  << "reset_for_new_request.\n";
2617  }
2618 
2619  reset_download_to();
2620  reset_body_stream();
2621 
2622  _last_status_code = 0;
2623  _status_entry = StatusEntry();
2624 
2625  _response_type = RT_none;
2626  _redirect_trail.clear();
2627  _bytes_downloaded = 0;
2628  _bytes_requested = 0;
2629 }
2630 
2631 /**
2632  * This is called by the body reading classes--ChunkedStreamBuf and
2633  * IdentityStreamBuf--when they have finished reading the body. It advances
2634  * the state appropriately.
2635  *
2636  * has_trailer should be set true if the body type has an associated trailer
2637  * which should be read or skipped, or false if there is no trailer.
2638  */
2639 void HTTPChannel::
2640 finished_body(bool has_trailer) {
2641  if (will_close_connection() && _download_dest == DD_none) {
2642  if (downloader_cat.is_debug()) {
2643  downloader_cat.debug()
2644  << _NOTIFY_HTTP_CHANNEL_ID
2645  << "resetting to finish body; server would close anyway.\n";
2646  }
2647  reset_to_new();
2648 
2649  } else {
2650  if (has_trailer) {
2651  _state = HTTPChannel::S_read_body;
2652  } else {
2653  _state = HTTPChannel::S_read_trailer;
2654  }
2655  }
2656 }
2657 
2658 /**
2659  * If a download has been requested, opens the file on disk (or prepares the
2660  * RamFile or stream) and seeks within it to the appropriate
2661  * _first_byte_delivered position, so that downloaded bytes will be written to
2662  * the appropriate point within the file. Returns true if the starting
2663  * position is valid, false otherwise (in which case the state is set to
2664  * S_failure).
2665  */
2666 bool HTTPChannel::
2667 open_download_file() {
2668  _subdocument_resumes = (_subdocument_resumes && _first_byte_delivered != 0);
2669 
2670  if (_download_dest == DD_file) {
2672  _download_to_stream = vfs->open_write_file(_download_to_filename, false, !_subdocument_resumes);
2673  if (_download_to_stream == nullptr) {
2674  downloader_cat.info()
2675  << _NOTIFY_HTTP_CHANNEL_ID
2676  << "Could not open " << _download_to_filename << " for writing.\n";
2677  _status_entry._status_code = SC_download_open_error;
2678  _state = S_failure;
2679  return false;
2680  }
2681  }
2682 
2683  if (_subdocument_resumes) {
2684  if (_download_dest == DD_file) {
2685  // Windows doesn't complain if you try to seek past the end of file--it
2686  // happily appends enough zero bytes to make the difference. Blecch.
2687  // That means we need to get the file size first to check it ourselves.
2688  _download_to_stream->seekp(0, std::ios::end);
2689  if (_first_byte_delivered > (size_t)_download_to_stream->tellp()) {
2690  downloader_cat.info()
2691  << _NOTIFY_HTTP_CHANNEL_ID
2692  << "Invalid starting position of byte " << _first_byte_delivered
2693  << " within " << _download_to_filename << " (which has "
2694  << _download_to_stream->tellp() << " bytes)\n";
2695  close_download_stream();
2696  _status_entry._status_code = SC_download_invalid_range;
2697  _state = S_failure;
2698  return false;
2699  }
2700 
2701  _download_to_stream->seekp(_first_byte_delivered);
2702 
2703  } else if (_download_dest == DD_ram) {
2704  if (_first_byte_delivered > _download_to_ramfile->_data.length()) {
2705  downloader_cat.info()
2706  << _NOTIFY_HTTP_CHANNEL_ID
2707  << "Invalid starting position of byte " << _first_byte_delivered
2708  << " within Ramfile (which has "
2709  << _download_to_ramfile->_data.length() << " bytes)\n";
2710  close_download_stream();
2711  _status_entry._status_code = SC_download_invalid_range;
2712  _state = S_failure;
2713  return false;
2714  }
2715 
2716  if (_first_byte_delivered == 0) {
2717  _download_to_ramfile->_data = string();
2718  } else {
2719  _download_to_ramfile->_data =
2720  _download_to_ramfile->_data.substr(0, _first_byte_delivered);
2721  }
2722  } else if (_download_dest == DD_stream) {
2723  // Windows doesn't complain if you try to seek past the end of file--it
2724  // happily appends enough zero bytes to make the difference. Blecch.
2725  // That means we need to get the file size first to check it ourselves.
2726  _download_to_stream->seekp(0, std::ios::end);
2727  if (_first_byte_delivered > (size_t)_download_to_stream->tellp()) {
2728  downloader_cat.info()
2729  << _NOTIFY_HTTP_CHANNEL_ID
2730  << "Invalid starting position of byte " << _first_byte_delivered
2731  << " within stream (which has "
2732  << _download_to_stream->tellp() << " bytes)\n";
2733  close_download_stream();
2734  _status_entry._status_code = SC_download_invalid_range;
2735  _state = S_failure;
2736  return false;
2737  }
2738 
2739  _download_to_stream->seekp(_first_byte_delivered);
2740  }
2741 
2742  } else {
2743  // If _subdocument_resumes is false, we should be sure to reset to the
2744  // beginning of the file, regardless of the value of
2745  // _first_byte_delivered.
2746  if (_download_dest == DD_file || _download_dest == DD_stream) {
2747  _download_to_stream->seekp(0);
2748  } else if (_download_dest == DD_ram) {
2749  _download_to_ramfile->_data = string();
2750  }
2751  }
2752 
2753  return true;
2754 }
2755 
2756 
2757 /**
2758  * Reads a single line from the server's reply. Returns true if the line is
2759  * successfully retrieved, or false if a complete line has not yet been
2760  * received or if the connection has been closed.
2761  */
2762 bool HTTPChannel::
2763 server_getline(string &str) {
2764  nassertr(!_source.is_null(), false);
2765  int ch = (*_source)->get();
2766  while (ch != EOF && !(*_source)->fail()) {
2767  switch (ch) {
2768  case '\n':
2769  // end-of-line character, we're done.
2770  str = _working_get;
2771  _working_get = string();
2772  {
2773  // Trim trailing whitespace. We're not required to do this per the
2774  // HTTP spec, but let's be generous.
2775  size_t p = str.length();
2776  while (p > 0 && isspace(str[p - 1])) {
2777  --p;
2778  }
2779  str = str.substr(0, p);
2780  }
2781  if (downloader_cat.is_spam()) {
2782  downloader_cat.spam()
2783  << _NOTIFY_HTTP_CHANNEL_ID
2784  << "recv: " << str << "\n";
2785  }
2786  return true;
2787 
2788  case '\r':
2789  // Ignore CR characters.
2790  break;
2791 
2792  default:
2793  _working_get += (char)ch;
2794  }
2795  ch = (*_source)->get();
2796  }
2797 
2798  check_socket();
2799  return false;
2800 }
2801 
2802 /**
2803  * Reads a line from the server's reply. If the server disconnects or times
2804  * out before sending a reply, moves on to the next proxy server (or sets
2805  * failure mode) and returns false; otherwise, returns true.
2806  */
2807 bool HTTPChannel::
2808 server_getline_failsafe(string &str) {
2809  if (!server_getline(str)) {
2810  if (_bio.is_null()) {
2811  // Huh, the server hung up on us as soon as we tried to connect.
2812  if (_response_type == RT_hangup) {
2813  // This was our second immediate hangup in a row. Give up.
2814  _status_entry._status_code = SC_lost_connection;
2815  _state = S_try_next_proxy;
2816 
2817  } else {
2818  // Try again, once.
2819  _response_type = RT_hangup;
2820  }
2821 
2822  } else {
2823  double elapsed =
2824  TrueClock::get_global_ptr()->get_short_time() -
2825  _sent_request_time;
2826  if (elapsed > get_http_timeout()) {
2827  // Time to give up.
2828  downloader_cat.info()
2829  << _NOTIFY_HTTP_CHANNEL_ID
2830  << "Timeout waiting for "
2831  << _request.get_url().get_server_and_port()
2832  << " in server_getline_failsafe (" << elapsed
2833  << " seconds elapsed).\n";
2834  _status_entry._status_code = SC_timeout;
2835  _state = S_try_next_proxy;
2836  }
2837  }
2838 
2839  return false;
2840  }
2841  return true;
2842 }
2843 
2844 /**
2845  * Reads a fixed number of bytes from the server's reply. Returns true if the
2846  * indicated number of bytes are successfully retrieved, or false if the
2847  * complete set has not yet been received or if the connection has been
2848  * closed.
2849  */
2850 bool HTTPChannel::
2851 server_get(string &str, size_t num_bytes) {
2852  nassertr(!_source.is_null(), false);
2853  int ch = (*_source)->get();
2854  while (ch != EOF && !(*_source)->fail()) {
2855  _working_get += (char)ch;
2856  if (_working_get.length() >= num_bytes) {
2857  str = _working_get;
2858  _working_get = string();
2859  return true;
2860  }
2861 
2862  ch = (*_source)->get();
2863  }
2864 
2865  check_socket();
2866  return false;
2867 }
2868 
2869 /**
2870  * Reads a fixed number of bytes from the server. If the server disconnects
2871  * or times out before sending a reply, moves on to the next proxy server (or
2872  * sets failure mode) and returns false; otherwise, returns true.
2873  */
2874 bool HTTPChannel::
2875 server_get_failsafe(string &str, size_t num_bytes) {
2876  if (!server_get(str, num_bytes)) {
2877  if (_bio.is_null()) {
2878  // Huh, the server hung up on us as soon as we tried to connect.
2879  if (_response_type == RT_hangup) {
2880  // This was our second immediate hangup in a row. Give up.
2881  _status_entry._status_code = SC_lost_connection;
2882  _state = S_try_next_proxy;
2883 
2884  } else {
2885  // Try again, once.
2886  _response_type = RT_hangup;
2887  }
2888 
2889  } else {
2890  double elapsed =
2891  TrueClock::get_global_ptr()->get_short_time() -
2892  _sent_request_time;
2893  if (elapsed > get_http_timeout()) {
2894  // Time to give up.
2895  downloader_cat.info()
2896  << _NOTIFY_HTTP_CHANNEL_ID
2897  << "Timeout waiting for "
2898  << _request.get_url().get_server_and_port()
2899  << " in server_get_failsafe (" << elapsed
2900  << " seconds elapsed).\n";
2901  _status_entry._status_code = SC_timeout;
2902  _state = S_try_next_proxy;
2903  }
2904  }
2905 
2906  return false;
2907  }
2908  return true;
2909 }
2910 
2911 /**
2912  * Sends a series of lines to the server. Returns true if the buffer is fully
2913  * sent, or false if some of it remains. If this returns false, the function
2914  * must be called again later, passing in the exact same string, until the
2915  * return value is true.
2916  *
2917  * If the secret flag is true, the data is not echoed to the log (even in spam
2918  * mode). This may be desirable if the data may contain binary data, or if it
2919  * may contain passwords etc.
2920  */
2921 bool HTTPChannel::
2922 server_send(const string &str, bool secret) {
2923  nassertr(str.length() > _sent_so_far, true);
2924 
2925  // Use the underlying BIO to write to the server, instead of the BIOStream,
2926  // which would insist on blocking (and might furthermore delay the send due
2927  // to collect-tcp mode being enabled).
2928  size_t bytes_to_send = str.length() - _sent_so_far;
2929  int write_count =
2930  BIO_write(*_bio, str.data() + _sent_so_far, bytes_to_send);
2931 
2932  if (write_count <= 0) {
2933  if (BIO_should_retry(*_bio)) {
2934  // Temporary failure: the pipe is full. Wait till later.
2935  return false;
2936  }
2937  // Oops, the connection has been closed!
2938  if (downloader_cat.is_debug()) {
2939  downloader_cat.debug()
2940  << _NOTIFY_HTTP_CHANNEL_ID
2941  << "Lost connection to server unexpectedly during write.\n";
2942  }
2943  reset_to_new();
2944  return false;
2945  }
2946 
2947  if (downloader_cat.is_spam()) {
2948  downloader_cat.spam()
2949  << _NOTIFY_HTTP_CHANNEL_ID
2950  << "wrote " << write_count << " bytes to " << _bio << "\n";
2951  }
2952 
2953 #ifndef NDEBUG
2954  if (!secret && downloader_cat.is_spam()) {
2955  show_send(str.substr(0, write_count));
2956  }
2957 #endif
2958 
2959  if (write_count < (int)bytes_to_send) {
2960  _sent_so_far += write_count;
2961  return false;
2962  }
2963 
2964  // Buffer completely sent.
2965  _sent_so_far = 0;
2966  return true;
2967 }
2968 
2969 /**
2970  * Parses the first line sent back from an HTTP server or proxy and stores the
2971  * result in _status_code and _http_version, etc. Returns true on success,
2972  * false on invalid response.
2973  */
2974 bool HTTPChannel::
2975 parse_http_response(const string &line) {
2976  // The first line back should include the HTTP version and the result code.
2977  if (line.length() < 5 || line.substr(0, 5) != string("HTTP/")) {
2978  // Not an HTTP response.
2979  _status_entry._status_code = SC_non_http_response;
2980  if (_response_type == RT_non_http) {
2981  // This was our second non-HTTP response in a row. Give up.
2982  _state = S_try_next_proxy;
2983 
2984  } else {
2985  // Maybe we were just in some bad state. Drop the connection and try
2986  // again, once.
2987  if (downloader_cat.is_debug()) {
2988  downloader_cat.debug()
2989  << _NOTIFY_HTTP_CHANNEL_ID
2990  << "got non-HTTP response, resetting.\n";
2991  }
2992  reset_to_new();
2993  _response_type = RT_non_http;
2994  }
2995  return false;
2996  }
2997 
2998  // Split out the first line into its three components.
2999  size_t p = 5;
3000  while (p < line.length() && !isspace(line[p])) {
3001  p++;
3002  }
3003  _http_version_string = line.substr(0, p);
3004  _http_version = HTTPClient::parse_http_version_string(_http_version_string);
3005 
3006  while (p < line.length() && isspace(line[p])) {
3007  p++;
3008  }
3009  size_t q = p;
3010  while (q < line.length() && !isspace(line[q])) {
3011  q++;
3012  }
3013  string status_code = line.substr(p, q - p);
3014  _status_entry._status_code = atoi(status_code.c_str());
3015 
3016  while (q < line.length() && isspace(line[q])) {
3017  q++;
3018  }
3019  _status_entry._status_string = line.substr(q, line.length() - q);
3020 
3021  return true;
3022 }
3023 
3024 /**
3025  * Reads the series of header lines from the server and stores them in
3026  * _headers. Returns true if there is more to read, false when done.
3027  */
3028 bool HTTPChannel::
3029 parse_http_header() {
3030  string line;
3031  if (!server_getline(line)) {
3032  return true;
3033  }
3034 
3035  while (!line.empty()) {
3036  if (isspace(line[0])) {
3037  // If the line begins with a space, that continues the previous field.
3038  size_t p = 0;
3039  while (p < line.length() && isspace(line[p])) {
3040  p++;
3041  }
3042  _current_field_value += line.substr(p - 1);
3043 
3044  } else {
3045  // If the line does not begin with a space, that defines a new field.
3046  if (!_current_field_name.empty()) {
3047  store_header_field(_current_field_name, _current_field_value);
3048  _current_field_value = string();
3049  }
3050 
3051  size_t colon = line.find(':');
3052  if (colon != string::npos) {
3053  _current_field_name = downcase(line.substr(0, colon));
3054  size_t p = colon + 1;
3055  while (p < line.length() && isspace(line[p])) {
3056  p++;
3057  }
3058  _current_field_value = line.substr(p);
3059  }
3060  }
3061 
3062  if (!server_getline(line)) {
3063  return true;
3064  }
3065  }
3066 
3067  // After reading an empty line, we're done with the headers.
3068  if (!_current_field_name.empty()) {
3069  store_header_field(_current_field_name, _current_field_value);
3070  _current_field_value = string();
3071  }
3072 
3073  return false;
3074 }
3075 
3076 /**
3077  * Interprets the "Content-Range" header in the reply, and fills in
3078  * _first_byte_delivered and _last_byte_delivered appropriately if the header
3079  * response can be understood.
3080  */
3081 bool HTTPChannel::
3082 parse_content_range(const string &content_range) {
3083  // First, get the units indication.
3084  size_t p = 0;
3085  while (p < content_range.length() && !isspace(content_range[p])) {
3086  p++;
3087  }
3088 
3089  string units = content_range.substr(0, p);
3090  while (p < content_range.length() && isspace(content_range[p])) {
3091  p++;
3092  }
3093 
3094  if (units == "bytes") {
3095  const char *c_str = content_range.c_str();
3096  char *endptr;
3097  if (p < content_range.length() && isdigit(content_range[p])) {
3098  long first_byte = strtol(c_str + p, &endptr, 10);
3099  p = endptr - c_str;
3100  if (p < content_range.length() && content_range[p] == '-') {
3101  p++;
3102  if (p < content_range.length() && isdigit(content_range[p])) {
3103  long last_byte = strtol(c_str + p, &endptr, 10);
3104  p = endptr - c_str;
3105 
3106  if (last_byte >= first_byte) {
3107  _first_byte_delivered = first_byte;
3108  _last_byte_delivered = last_byte;
3109  return true;
3110  }
3111  }
3112  }
3113  }
3114  }
3115 
3116  // Invalid or unhandled response.
3117  return false;
3118 }
3119 
3120 
3121 /**
3122  * Checks whether the connection to the server has been closed after a failed
3123  * read. If it has, issues a warning and calls reset_to_new().
3124  */
3125 void HTTPChannel::
3126 check_socket() {
3127  nassertv(!_source.is_null());
3128  if ((*_source)->is_closed()) {
3129  if (downloader_cat.is_debug()) {
3130  downloader_cat.debug()
3131  << _NOTIFY_HTTP_CHANNEL_ID
3132  << "Lost connection to server unexpectedly during read.\n";
3133  }
3134  reset_to_new();
3135  }
3136 }
3137 
3138 /*
3139  Certificate verify error codes:
3140 
3141 0 X509_V_OK: ok
3142 
3143  the operation was successful.
3144 
3145 2 X509_V_ERR_UNABLE_TO_GET_ISSUER_CERT: unable to get issuer certificate
3146 
3147  the issuer certificate could not be found: this occurs if the
3148  issuer certificate of an untrusted certificate cannot be found.
3149 
3150 3 X509_V_ERR_UNABLE_TO_GET_CRL unable to get certificate CRL
3151 
3152  the CRL of a certificate could not be found. Unused.
3153 
3154 4 X509_V_ERR_UNABLE_TO_DECRYPT_CERT_SIGNATURE: unable to decrypt
3155 certificate's signature
3156 
3157  the certificate signature could not be decrypted. This means that
3158  the actual signature value could not be determined rather than it
3159  not matching the expected value, this is only meaningful for RSA
3160  keys.
3161 
3162 5 X509_V_ERR_UNABLE_TO_DECRYPT_CRL_SIGNATURE: unable to decrypt CRL's signature
3163 
3164  the CRL signature could not be decrypted: this means that the
3165  actual signature value could not be determined rather than it not
3166  matching the expected value. Unused.
3167 
3168 6 X509_V_ERR_UNABLE_TO_DECODE_ISSUER_PUBLIC_KEY: unable to decode
3169 issuer public key
3170 
3171  the public key in the certificate SubjectPublicKeyInfo could not
3172  be read.
3173 
3174 7 X509_V_ERR_CERT_SIGNATURE_FAILURE: certificate signature failure
3175 
3176  the signature of the certificate is invalid.
3177 
3178 8 X509_V_ERR_CRL_SIGNATURE_FAILURE: CRL signature failure
3179 
3180  the signature of the certificate is invalid. Unused.
3181 
3182 9 X509_V_ERR_CERT_NOT_YET_VALID: certificate is not yet valid
3183 
3184  the certificate is not yet valid: the notBefore date is after the
3185  current time.
3186 
3187 10 X509_V_ERR_CERT_HAS_EXPIRED: certificate has expired
3188 
3189  the certificate has expired: that is the notAfter date is before
3190  the current time.
3191 
3192 11 X509_V_ERR_CRL_NOT_YET_VALID: CRL is not yet valid
3193 
3194  the CRL is not yet valid. Unused.
3195 
3196 12 X509_V_ERR_CRL_HAS_EXPIRED: CRL has expired
3197 
3198  the CRL has expired. Unused.
3199 
3200 13 X509_V_ERR_ERROR_IN_CERT_NOT_BEFORE_FIELD: format error in
3201 certificate's notBefore field
3202 
3203  the certificate notBefore field contains an invalid time.
3204 
3205 14 X509_V_ERR_ERROR_IN_CERT_NOT_AFTER_FIELD: format error in
3206 certificate's notAfter field
3207 
3208  the certificate notAfter field contains an invalid time.
3209 
3210 15 X509_V_ERR_ERROR_IN_CRL_LAST_UPDATE_FIELD: format error in CRL's
3211 lastUpdate field
3212 
3213  the CRL lastUpdate field contains an invalid time. Unused.
3214 
3215 16 X509_V_ERR_ERROR_IN_CRL_NEXT_UPDATE_FIELD: format error in CRL's
3216 nextUpdate field
3217 
3218  the CRL nextUpdate field contains an invalid time. Unused.
3219 
3220 17 X509_V_ERR_OUT_OF_MEM: out of memory
3221 
3222  an error occurred trying to allocate memory. This should never
3223  happen.
3224 
3225 18 X509_V_ERR_DEPTH_ZERO_SELF_SIGNED_CERT: self signed certificate
3226 
3227  the passed certificate is self signed and the same certificate
3228  cannot be found in the list of trusted certificates.
3229 
3230 19 X509_V_ERR_SELF_SIGNED_CERT_IN_CHAIN: self signed certificate in
3231 certificate chain
3232 
3233  the certificate chain could be built up using the untrusted
3234  certificates but the root could not be found locally.
3235 
3236 20 X509_V_ERR_UNABLE_TO_GET_ISSUER_CERT_LOCALLY: unable to get local
3237 issuer certificate
3238 
3239  the issuer certificate of a locally looked up certificate could
3240  not be found. This normally means the list of trusted certificates
3241  is not complete.
3242 
3243 21 X509_V_ERR_UNABLE_TO_VERIFY_LEAF_SIGNATURE: unable to verify the
3244 first certificate
3245 
3246  no signatures could be verified because the chain contains only
3247  one certificate and it is not self signed.
3248 
3249 22 X509_V_ERR_CERT_CHAIN_TOO_LONG: certificate chain too long
3250 
3251  the certificate chain length is greater than the supplied maximum
3252  depth. Unused.
3253 
3254 23 X509_V_ERR_CERT_REVOKED: certificate revoked
3255 
3256  the certificate has been revoked. Unused.
3257 
3258 24 X509_V_ERR_INVALID_CA: invalid CA certificate
3259 
3260  a CA certificate is invalid. Either it is not a CA or its
3261  extensions are not consistent with the supplied purpose.
3262 
3263 25 X509_V_ERR_PATH_LENGTH_EXCEEDED: path length constraint exceeded
3264 
3265  the basicConstraints pathlength parameter has been exceeded.
3266 
3267 26 X509_V_ERR_INVALID_PURPOSE: unsupported certificate purpose
3268 
3269  the supplied certificate cannot be used for the specified purpose.
3270 
3271 27 X509_V_ERR_CERT_UNTRUSTED: certificate not trusted
3272 
3273  the root CA is not marked as trusted for the specified purpose.
3274 
3275 28 X509_V_ERR_CERT_REJECTED: certificate rejected
3276 
3277  the root CA is marked to reject the specified purpose.
3278 
3279 29 X509_V_ERR_SUBJECT_ISSUER_MISMATCH: subject issuer mismatch
3280 
3281  the current candidate issuer certificate was rejected because its
3282  subject name did not match the issuer name of the current
3283  certificate. Only displayed when the -issuer_checks option is set.
3284 
3285 30 X509_V_ERR_AKID_SKID_MISMATCH: authority and subject key identifier
3286 mismatch
3287 
3288  the current candidate issuer certificate was rejected because its
3289  subject key identifier was present and did not match the authority
3290  key identifier current certificate. Only displayed when the
3291  -issuer_checks option is set.
3292 
3293 31 X509_V_ERR_AKID_ISSUER_SERIAL_MISMATCH: authority and issuer serial
3294 number mismatch
3295 
3296  the current candidate issuer certificate was rejected because its
3297  issuer name and serial number was present and did not match the
3298  authority key identifier of the current certificate. Only
3299  displayed when the -issuer_checks option is set.
3300 
3301 32 X509_V_ERR_KEYUSAGE_NO_CERTSIGN:key usage does not include
3302 certificate signing
3303 
3304  the current candidate issuer certificate was rejected because its
3305  keyUsage extension does not permit certificate signing.
3306 
3307 50 X509_V_ERR_APPLICATION_VERIFICATION: application verification failure
3308 
3309  an application specific error. Unused.
3310 
3311 */
3312 
3313 /**
3314  * Checks to see if the indicated certificate is on the pre-approved list for
3315  * the current server.
3316  *
3317  * If the full cert itself (including its key) is on the pre-approved list,
3318  * sets both cert_preapproved and cert_name_preapproved to true.
3319  *
3320  * If the full cert is not on the pre-approved list, but its name matches a
3321  * name on the pre-approved list, sets cert_name_preapproved to true, and
3322  * cert_preapproved to false.
3323  *
3324  * Otherwise, sets both values to false. This doesn't mean the cert is
3325  * necessarily invalid, just that it wasn't on the pre-approved list (which is
3326  * usually empty anyway).
3327  */
3328 void HTTPChannel::
3329 check_preapproved_server_certificate(X509 *cert, bool &cert_preapproved,
3330  bool &cert_name_preapproved) const {
3331  return _client->check_preapproved_server_certificate(_request.get_url(),
3332  cert, cert_preapproved,
3333  cert_name_preapproved);
3334 }
3335 
3336 /**
3337  * Returns true if the name in the cert matches the hostname of the server,
3338  * false otherwise.
3339  */
3340 bool HTTPChannel::
3341 validate_server_name(X509 *cert) {
3342  string hostname = _request.get_url().get_server();
3343 
3344  vector_string cert_names;
3345 
3346  // According to RFC 2818, we should check the DNS name(s) in the
3347  // subjectAltName extension first, if that extension exists.
3348  STACK_OF(GENERAL_NAME) *subject_alt_names =
3349  (STACK_OF(GENERAL_NAME) *)X509_get_ext_d2i(cert, NID_subject_alt_name, nullptr, nullptr);
3350  if (subject_alt_names != nullptr) {
3351  int num_alts = sk_GENERAL_NAME_num(subject_alt_names);
3352  for (int i = 0; i < num_alts; ++i) {
3353  // Get the ith alt name.
3354  const GENERAL_NAME *alt_name =
3355  sk_GENERAL_NAME_value(subject_alt_names, i);
3356 
3357  if (alt_name->type == GEN_DNS) {
3358  char *buffer = nullptr;
3359  int len = ASN1_STRING_to_UTF8((unsigned char**)&buffer,
3360  alt_name->d.ia5);
3361  if (len > 0) {
3362  cert_names.push_back(string(buffer, len));
3363  }
3364  if (buffer != nullptr) {
3365  OPENSSL_free(buffer);
3366  }
3367  }
3368  }
3369  }
3370 
3371  if (cert_names.empty()) {
3372  // If there were no DNS names, use the common name instead.
3373 
3374  X509_NAME *xname = X509_get_subject_name(cert);
3375  if (xname != nullptr) {
3376  string common_name = get_x509_name_component(xname, NID_commonName);
3377  cert_names.push_back(common_name);
3378  }
3379  }
3380 
3381  if (cert_names.empty()) {
3382  downloader_cat.info()
3383  << _NOTIFY_HTTP_CHANNEL_ID
3384  << "Server certificate from " << hostname
3385  << " provides no name.\n";
3386  return false;
3387  }
3388 
3389  if (downloader_cat.is_debug()) {
3390  downloader_cat.debug()
3391  << _NOTIFY_HTTP_CHANNEL_ID
3392  << "Server certificate from " << hostname
3393  << " provides name(s):";
3394  vector_string::const_iterator si;
3395  for (si = cert_names.begin(); si != cert_names.end(); ++si) {
3396  const string &cert_name = (*si);
3397  downloader_cat.debug(false)
3398  << " " << cert_name;
3399  }
3400  downloader_cat.debug(false)
3401  << "\n";
3402  }
3403 
3404  // Now validate the names we found. If any of them matches, the cert
3405  // matches.
3406  vector_string::const_iterator si;
3407  for (si = cert_names.begin(); si != cert_names.end(); ++si) {
3408  const string &cert_name = (*si);
3409 
3410  if (match_cert_name(cert_name, hostname)) {
3411  return true;
3412  }
3413  }
3414 
3415  downloader_cat.info()
3416  << _NOTIFY_HTTP_CHANNEL_ID
3417  << "Server certificate from " << hostname
3418  << " provides wrong name(s):";
3419  for (si = cert_names.begin(); si != cert_names.end(); ++si) {
3420  const string &cert_name = (*si);
3421  downloader_cat.info(false)
3422  << " " << cert_name;
3423  }
3424  downloader_cat.info(false)
3425  << "\n";
3426 
3427  return false;
3428 }
3429 
3430 /**
3431  * Returns true if this particular name from the certificate matches the
3432  * indicated hostname, false otherwise.
3433  */
3434 bool HTTPChannel::
3435 match_cert_name(const string &cert_name, const string &hostname) {
3436  // We use GlobPattern to match the name. This isn't quite consistent with
3437  // RFC2818, since it also accepts additional wildcard characters like "?"
3438  // and "[]", but I think it's close enough.
3439 
3440  GlobPattern pattern(cert_name);
3441  pattern.set_case_sensitive(false);
3442  pattern.set_nomatch_chars(".");
3443  return pattern.matches(hostname);
3444 }
3445 
3446 /**
3447  * Returns the indicated component of the X509 name as a string, if defined,
3448  * or empty string if it is not.
3449  */
3450 string HTTPChannel::
3451 get_x509_name_component(X509_NAME *name, int nid) {
3452  ASN1_OBJECT *obj = OBJ_nid2obj(nid);
3453 
3454  if (obj == nullptr) {
3455  // Unknown nid. See opensslobjects.h.
3456  return string();
3457  }
3458 
3459  int i = X509_NAME_get_index_by_OBJ(name, obj, -1);
3460  if (i < 0) {
3461  return string();
3462  }
3463 
3464  ASN1_STRING *data = X509_NAME_ENTRY_get_data(X509_NAME_get_entry(name, i));
3465  return string((char *)data->data, data->length);
3466 }
3467 
3468 /**
3469  * Formats the appropriate GET or POST (or whatever) request to send to the
3470  * server, based on the current _method, _document_spec, _body, and _proxy
3471  * settings.
3472  */
3473 void HTTPChannel::
3474 make_header() {
3475  _proxy_auth = _client->select_auth(_proxy, true, _proxy_realm);
3476  _proxy_username = string();
3477  if (_proxy_auth != nullptr) {
3478  _proxy_realm = _proxy_auth->get_realm();
3479  _proxy_username = _client->select_username(_proxy, true, _proxy_realm);
3480  }
3481 
3482  if (_method == HTTPEnum::M_connect) {
3483  // This method doesn't require an HTTP header at all; we'll just open a
3484  // plain connection. (Except when we're using a proxy; but in that case,
3485  // it's the proxy_header we'll need, not the regular HTTP header.)
3486  _header = string();
3487  return;
3488  }
3489 
3490  _www_auth = _client->select_auth(_request.get_url(), false, _www_realm);
3491  _www_username = string();
3492  if (_www_auth != nullptr) {
3493  _www_realm = _www_auth->get_realm();
3494  _www_username = _client->select_username(_request.get_url(), false, _www_realm);
3495  }
3496 
3497  string request_path;
3498  if (_proxy_serves_document) {
3499  // If we'll be asking the proxy for the document, we need its full URL--
3500  // but we omit the username, which is information just for us.
3501  URLSpec url_no_username = _request.get_url();
3502  url_no_username.set_username(string());
3503  request_path = url_no_username.get_url();
3504 
3505  } else {
3506  // If we'll be asking the server directly for the document, we just want
3507  // its path relative to the server.
3508  request_path = _request.get_url().get_path_and_query();
3509  }
3510 
3511  // HTTP syntax always requires something in the request path. If it is
3512  // empty, put in a star as a placeholder (OPTIONS, for instance, uses this).
3513  if (request_path.empty()) {
3514  request_path = "*";
3515  }
3516 
3517  ostringstream stream;
3518 
3519  stream
3520  << _method << " " << request_path << " "
3521  << _client->get_http_version_string() << "\r\n";
3522 
3523  if (_client->get_http_version() >= HTTPEnum::HV_11) {
3524 
3525  if (_request.get_url().has_port() && _request.get_url().is_default_port()) {
3526  // It appears that some servers (notably gstatic.com) might return a 404
3527  // if you include an explicit port number in with the Host: header, even
3528  // if it is the default port. So, don't include the port number unless
3529  // we need to.
3530  string server = _request.get_url().get_server();
3531  if (server.find(':') != string::npos) {
3532  stream << "Host: [" << server << "]";
3533  } else {
3534  stream << "Host: " << server;
3535  }
3536  } else {
3537  stream << "Host: " << _request.get_url().get_server_and_port();
3538  }
3539  stream << "\r\n";
3540  if (!get_persistent_connection()) {
3541  stream
3542  << "Connection: close\r\n";
3543  }
3544  }
3545 
3546  if (_last_byte_requested != 0) {
3547  stream
3548  << "Range: bytes=" << _first_byte_requested << "-"
3549  << _last_byte_requested << "\r\n";
3550 
3551  } else if (_first_byte_requested != 0) {
3552  stream
3553  << "Range: bytes=" << _first_byte_requested << "-\r\n";
3554  }
3555 
3556  switch (_request.get_request_mode()) {
3557  case DocumentSpec::RM_any:
3558  // No particular request; give us any document that matches the URL.
3559  if (_first_byte_requested != 0) {
3560  // Unless we're requesting a subrange, in which case if the exact
3561  // document matches, retrieve the subrange indicated; otherwise,
3562  // retrieve the entire document.
3563  if (_request.has_tag()) {
3564  stream
3565  << "If-Range: " << _request.get_tag().get_string() << "\r\n";
3566  } else if (_request.has_date()) {
3567  stream
3568  << "If-Range: " << _request.get_date().get_string() << "\r\n";
3569  }
3570  }
3571  break;
3572 
3573  case DocumentSpec::RM_equal:
3574  // Give us only this particular version of the document, or nothing.
3575  if (_request.has_tag()) {
3576  stream
3577  << "If-Match: " << _request.get_tag().get_string() << "\r\n";
3578  }
3579  if (_request.has_date()) {
3580  stream
3581  << "If-Unmodified-Since: " << _request.get_date().get_string()
3582  << "\r\n";
3583  }
3584  break;
3585 
3586  case DocumentSpec::RM_newer:
3587  // Give us anything newer than this document, or nothing.
3588  if (_request.has_tag()) {
3589  stream
3590  << "If-None-Match: " << _request.get_tag().get_string() << "\r\n";
3591  }
3592  if (_request.has_date()) {
3593  stream
3594  << "If-Modified-Since: " << _request.get_date().get_string()
3595  << "\r\n";
3596  }
3597  break;
3598 
3599  case DocumentSpec::RM_equal_or_newer:
3600  // Just don't give us anything older.
3601  if (_request.has_date()) {
3602  // This is a little unreliable: we ask for any document that's been
3603  // modified since one second before our last-modified-date. Who knows
3604  // whether the server will honor this properly.
3605  stream
3606  << "If-Modified-Since: " << (_request.get_date() - 1).get_string()
3607  << "\r\n";
3608  }
3609  break;
3610  }
3611 
3612  switch (_request.get_cache_control()) {
3613  case DocumentSpec::CC_allow_cache:
3614  // Normal, caching behavior.
3615  break;
3616 
3617  case DocumentSpec::CC_revalidate:
3618  // Request the server to revalidate its cache before returning it.
3619  stream
3620  << "Cache-Control: max-age=0\r\n";
3621  break;
3622 
3623  case DocumentSpec::CC_no_cache:
3624  // Request the server to get a fresh copy regardless of its cache.
3625  stream
3626  << "Cache-Control: no-cache\r\n"
3627  << "Pragma: no-cache\r\n";
3628  break;
3629  }
3630 
3631  _client->send_cookies(stream, _request.get_url());
3632 
3633  if (!_body.empty()) {
3634  stream
3635  << "Content-Type: " << _content_type << "\r\n"
3636  << "Content-Length: " << _body.length() << "\r\n";
3637  }
3638 
3639  _header = stream.str();
3640 }
3641 
3642 /**
3643  * Builds the _proxy_request_text string. This is a special request that will
3644  * be sent directly to the proxy prior to the request tailored for the server.
3645  * Generally this is used to open a tunnelling connection for https-over-
3646  * proxy.
3647  */
3648 void HTTPChannel::
3649 make_proxy_request_text() {
3650  _proxy_request_text = _proxy_header;
3651 
3652  if (_proxy_auth != nullptr && !_proxy_username.empty()) {
3653  _proxy_request_text += "Proxy-Authorization: ";
3654  _proxy_request_text +=
3655  _proxy_auth->generate(HTTPEnum::M_connect, _request.get_url().get_server_and_port(),
3656  _proxy_username, _body);
3657  _proxy_request_text += "\r\n";
3658  }
3659 
3660  _proxy_request_text += "\r\n";
3661 }
3662 
3663 /**
3664  * Builds the _request_text string. This is the specific request that will be
3665  * sent to the server this pass, based on the current header and body.
3666  */
3667 void HTTPChannel::
3668 make_request_text() {
3669  _request_text = _header;
3670 
3671  if (_proxy_serves_document &&
3672  _proxy_auth != nullptr && !_proxy_username.empty()) {
3673  _request_text += "Proxy-Authorization: ";
3674  _request_text +=
3675  _proxy_auth->generate(_method, _request.get_url().get_url(), _proxy_username, _body);
3676  _request_text += "\r\n";
3677  }
3678 
3679  if (_www_auth != nullptr && !_www_username.empty()) {
3680  string authorization =
3681  _request_text += "Authorization: ";
3682  _request_text +=
3683  _www_auth->generate(_method, _request.get_url().get_path_and_query(), _www_username, _body);
3684  _request_text += "\r\n";
3685  }
3686 
3687  _request_text += _send_extra_headers;
3688  _request_text += "\r\n";
3689  _request_text += _body;
3690 }
3691 
3692 /**
3693  * Redirects the next connection to the indicated URL (from the previous URL).
3694  * This resets the socket if necessary when we are about to switch servers.
3695  */
3696 void HTTPChannel::
3697 reset_url(const URLSpec &old_url, const URLSpec &new_url) {
3698  // If we change between http and https, we have to reset the connection
3699  // regardless of proxy. Otherwise, we have to drop the connection if the
3700  // server or port changes, unless we're communicating through a proxy.
3701 
3702  if (new_url.get_scheme() != old_url.get_scheme() ||
3703  (_proxy.empty() && (new_url.get_server() != old_url.get_server() ||
3704  new_url.get_port() != old_url.get_port()))) {
3705  if (downloader_cat.is_debug()) {
3706  downloader_cat.debug()
3707  << _NOTIFY_HTTP_CHANNEL_ID
3708  << "resetting for new server "
3709  << new_url.get_server_and_port() << "\n";
3710  }
3711  reset_to_new();
3712  }
3713 }
3714 
3715 /**
3716  * Stores a single name: value pair in the header list, or appends the value
3717  * to the end of the existing value, if the header has been repeated.
3718  */
3719 void HTTPChannel::
3720 store_header_field(const string &field_name, const string &field_value) {
3721  std::pair<Headers::iterator, bool> insert_result =
3722  _headers.insert(Headers::value_type(field_name, field_value));
3723 
3724  if (!insert_result.second) {
3725  // It didn't insert; thus, the field already existed. Append the new
3726  // value.
3727  Headers::iterator hi = insert_result.first;
3728  (*hi).second += ", ";
3729  (*hi).second += field_value;
3730  }
3731 
3732  if (field_name == "set-cookie") {
3733  _client->set_cookie(HTTPCookie(field_value, _request.get_url()));
3734  }
3735 }
3736 
3737 #ifndef NDEBUG
3738 /**
3739  * Writes the outgoing message, one line at a time, to the debugging log.
3740  */
3741 void HTTPChannel::
3742 show_send(const string &message) {
3743  size_t start = 0;
3744  size_t newline = message.find('\n', start);
3745  while (newline != string::npos) {
3746  // Assume every \n is preceded by a \r.
3747  downloader_cat.spam()
3748  << "send: " << message.substr(start, newline - start - 1) << "\n";
3749  start = newline + 1;
3750  newline = message.find('\n', start);
3751  }
3752 
3753  if (start < message.length()) {
3754  downloader_cat.spam()
3755  << "send: " << message.substr(start) << " (no newline)\n";
3756  }
3757 }
3758 #endif // NDEBUG
3759 
3760 /**
3761  * Resets the indication of how the document will be downloaded. This must be
3762  * re-specified after each get_document() (or related) call.
3763  */
3764 void HTTPChannel::
3765 reset_download_to() {
3766  _started_download = false;
3767  close_download_stream();
3768  _download_dest = DD_none;
3769 }
3770 
3771 /**
3772  * Ensures the file opened for receiving the download has been correctly
3773  * closed.
3774  */
3775 void HTTPChannel::
3776 close_download_stream() {
3777  if (_download_to_stream != nullptr) {
3778  _download_to_stream->flush();
3779  if (_download_dest == DD_file) {
3780  VirtualFileSystem::close_write_file(_download_to_stream);
3781  }
3782  }
3783  _download_to_ramfile = nullptr;
3784  _download_to_stream = nullptr;
3785 }
3786 
3787 
3788 /**
3789  * Closes the connection and resets the state to S_new.
3790  */
3791 void HTTPChannel::
3792 reset_to_new() {
3793  if (downloader_cat.is_spam()) {
3794  downloader_cat.spam()
3795  << _NOTIFY_HTTP_CHANNEL_ID
3796  << "reset_to_new.\n";
3797  }
3798 
3799  close_connection();
3800  _state = S_new;
3801 }
3802 
3803 /**
3804  * Clears the _body_stream pointer, if it is set.
3805  */
3806 void HTTPChannel::
3807 reset_body_stream() {
3808  if (_owns_body_stream) {
3809  if (_body_stream != nullptr) {
3810  close_read_body(_body_stream);
3811  nassertv(_body_stream == nullptr && !_owns_body_stream);
3812  }
3813  } else {
3814  _body_stream = nullptr;
3815  }
3816 }
3817 
3818 
3819 /**
3820  * Closes the connection but leaves the _state unchanged.
3821  */
3822 void HTTPChannel::
3823 close_connection() {
3824  reset_body_stream();
3825  _source.clear();
3826  _bio.clear();
3827  _working_get = string();
3828  _sent_so_far = 0;
3829  _read_index++;
3830 }
3831 
3832 /**
3833  * Returns true if status code a is a more useful value (that is, it
3834  * represents a more-nearly successfully connection attempt, or contains more
3835  * information) than b, or false otherwise.
3836  */
3837 bool HTTPChannel::
3838 more_useful_status_code(int a, int b) {
3839  if (a >= 100 && b >= 100) {
3840  // Both represent HTTP responses. Responses from a server (< 1000) are
3841  // better than those from a proxy; we take advantage of the fact that we
3842  // have already added 1000 to proxy responses. Except for 407, so let's
3843  // fix that now.
3844  if (a == 407) {
3845  a += 1000;
3846  }
3847  if (b == 407) {
3848  b += 1000;
3849  }
3850 
3851  // Now just check the series.
3852  int series_a = (a / 100);
3853  int series_b = (b / 100);
3854 
3855  // In general, a lower series is a closer success.
3856  return (series_a < series_b);
3857  }
3858 
3859  if (a < 100 && b < 100) {
3860  // Both represent non-HTTP responses. Here a larger number is better.
3861  return (a > b);
3862  }
3863 
3864  if (a < 100) {
3865  // a is a non-HTTP response, while b is an HTTP response. HTTP is
3866  // generally, better, unless we exceeded SC_http_error_watermark.
3867  return (a > SC_http_error_watermark);
3868  }
3869 
3870  // Exactly the opposite case as above.
3871  return (b < SC_http_error_watermark);
3872 }
3873 
3874 
3875 /**
3876  *
3877  */
3878 ostream &
3879 operator << (ostream &out, HTTPChannel::State state) {
3880 #ifdef NDEBUG
3881  return out << (int)state;
3882 #else
3883  switch (state) {
3884  case HTTPChannel::S_new:
3885  return out << "new";
3886 
3887  case HTTPChannel::S_try_next_proxy:
3888  return out << "try_next_proxy";
3889 
3890  case HTTPChannel::S_connecting:
3891  return out << "connecting";
3892 
3893  case HTTPChannel::S_connecting_wait:
3894  return out << "connecting_wait";
3895 
3896  case HTTPChannel::S_http_proxy_ready:
3897  return out << "http_proxy_ready";
3898 
3899  case HTTPChannel::S_http_proxy_request_sent:
3900  return out << "http_proxy_request_sent";
3901 
3902  case HTTPChannel::S_http_proxy_reading_header:
3903  return out << "http_proxy_reading_header";
3904 
3905  case HTTPChannel::S_socks_proxy_greet:
3906  return out << "socks_proxy_greet";
3907 
3908  case HTTPChannel::S_socks_proxy_greet_reply:
3909  return out << "socks_proxy_greet_reply";
3910 
3911  case HTTPChannel::S_socks_proxy_connect:
3912  return out << "socks_proxy_connect";
3913 
3914  case HTTPChannel::S_socks_proxy_connect_reply:
3915  return out << "socks_proxy_connect_reply";
3916 
3917  case HTTPChannel::S_setup_ssl:
3918  return out << "setup_ssl";
3919 
3920  case HTTPChannel::S_ssl_handshake:
3921  return out << "ssl_handshake";
3922 
3923  case HTTPChannel::S_ready:
3924  return out << "ready";
3925 
3926  case HTTPChannel::S_request_sent:
3927  return out << "request_sent";
3928 
3929  case HTTPChannel::S_reading_header:
3930  return out << "reading_header";
3931 
3932  case HTTPChannel::S_start_direct_file_read:
3933  return out << "start_direct_file_read";
3934 
3935  case HTTPChannel::S_read_header:
3936  return out << "read_header";
3937 
3938  case HTTPChannel::S_begin_body:
3939  return out << "begin_body";
3940 
3941  case HTTPChannel::S_reading_body:
3942  return out << "reading_body";
3943 
3944  case HTTPChannel::S_read_body:
3945  return out << "read_body";
3946 
3947  case HTTPChannel::S_read_trailer:
3948  return out << "read_trailer";
3949 
3950  case HTTPChannel::S_failure:
3951  return out << "failure";
3952  }
3953 
3954  return out << "invalid state(" << (int)state << ")";
3955 #endif // NDEBUG
3956 }
3957 
3958 #endif // HAVE_OPENSSL
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.
This is a convenience class to specialize ConfigVariable as a floating- point type.
A descriptor that refers to a particular version of a document.
Definition: documentSpec.h:30
The name of a file, such as a texture file or an Egg file.
Definition: filename.h:39
void set_binary()
Indicates that the filename represents a binary file.
Definition: filename.I:414
bool is_local() const
Returns true if the filename is local, e.g.
Definition: filename.I:549
std::string get_dirname() const
Returns the directory part of the filename.
Definition: filename.I:358
This class can be used to test for string matches against standard Unix- shell filename globbing conv...
Definition: globPattern.h:32
A container for an HTTP-legal time/date indication.
Definition: httpDate.h:27
A container for an "entity tag" from an HTTP server.
Definition: httpEntityTag.h:24
An in-memory buffer specifically designed for downloading files to memory.
Definition: ramfile.h:25
static TrueClock * get_global_ptr()
Returns a pointer to the one TrueClock object in the world.
Definition: trueClock.I:68
TypeHandle is the identifier used to differentiate C++ class types.
Definition: typeHandle.h:81
A container for a URL, e.g.
Definition: urlSpec.h:28
const std::string & get_url() const
Returns the complete URL specification.
Definition: urlSpec.I:184
get_server
Returns the server name specified by the URL, if any.
Definition: urlSpec.h:96
get_path
Returns the path specified by the URL, or "/" if no path is specified.
Definition: urlSpec.h:99
get_server_and_port
Returns a string consisting of the server name, followed by a colon, followed by the port number.
Definition: urlSpec.h:98
get_scheme
Returns the scheme specified by the URL, or empty string if no scheme is specified.
Definition: urlSpec.h:93
get_port
Returns the port number specified by the URL, or the default port if not specified.
Definition: urlSpec.h:97
set_username
Replaces the username part of the URL specification.
Definition: urlSpec.h:95
get_authority
Returns the authority specified by the URL (this includes username, server, and/or port),...
Definition: urlSpec.h:94
A hierarchy of directories and files that appears to be one continuous file system,...
static void close_write_file(std::ostream *stream)
Closes a file opened by a previous call to open_write_file().
std::ostream * open_write_file(const Filename &filename, bool auto_wrap, bool truncate)
Convenience function; returns a newly allocated ostream if the file exists and can be written,...
static VirtualFileSystem * get_global_ptr()
Returns the default global VirtualFileSystem.
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.
string downcase(const string &s)
Returns the input string with all uppercase letters converted to lowercase.
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.