Panda3D

autorestart.c

00001 /* Filename: autorestart.c
00002  * Created by:  drose (05Sep02)
00003  *
00004  * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
00005  *
00006  * PANDA 3D SOFTWARE
00007  * Copyright (c) Carnegie Mellon University.  All rights reserved.
00008  *
00009  * All use of this software is subject to the terms of the revised BSD
00010  * license.  You should have received a copy of this license along
00011  * with this source code in a file named "LICENSE."
00012  *
00013  * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
00014 
00015 #ifdef WITHIN_PANDA
00016 #include "dtoolbase.h"
00017 #endif
00018 
00019 #include <getopt.h>
00020 #include <stdio.h>
00021 #include <errno.h>
00022 #include <string.h>  /* for strerror */
00023 #include <unistd.h>
00024 #include <sys/types.h>
00025 #include <sys/wait.h>
00026 #include <sys/stat.h>
00027 #include <fcntl.h>
00028 #include <time.h>
00029 #include <signal.h>
00030 #include <stdlib.h>
00031 #include <assert.h>
00032 #include <pwd.h>
00033 #include <grp.h>
00034 
00035 #ifdef HAVE_LIBCURL
00036 #include <curl/curl.h>
00037 #endif
00038 
00039 /* The maximum number of seconds to wait for a process to go away
00040    after issuing SIGTERM.  This is only used in watchdog mode, when -W
00041    is provided on the command line. */
00042 #define MAX_WAITTERM_SEC 10
00043 
00044 char **params = NULL;
00045 char *logfile_name = NULL;
00046 char *pidfile_name = NULL;
00047 int dont_fork = 0;
00048 char *watchdog_url = NULL;
00049 int watchdog_start_sec = 0;
00050 int watchdog_cycle_sec = 0;
00051 int watchdog_timeout_sec = 0;
00052 char *startup_username = NULL;
00053 char *startup_groupname = NULL;
00054 char *startup_chdir = NULL;
00055 int logfile_fd = -1;
00056 int stop_on_terminate = 0;
00057 int stop_always = 0;
00058 char *respawn_script = NULL;
00059 int respawn_count_time = 0;
00060 
00061 /* If requested, delay these many seconds between restart attempts */
00062 int respawn_delay_time = 5;
00063 
00064 
00065 /* We shouldn't respawn more than (spam_respawn_count - 1) times over
00066    spam_respawn_time seconds. */
00067 int spam_respawn_count = 5;
00068 int spam_respawn_time = 60;
00069 int spam_restart_delay_time = 600;  /* Optionally, do not exit if we spam too much; simply sleep for this many seconds*/
00070 
00071 
00072 
00073 pid_t child_pid = 0;
00074 pid_t watchdog_pid = 0;
00075 
00076 #define TIME_BUFFER_SIZE 128
00077 
00078 /* Keep track of the frequency with which we respawn, so we can report
00079    this to our respawn script. */
00080 typedef struct respawn_record_struct {
00081   time_t _time;
00082   struct respawn_record_struct *_next;
00083 } respawn_record;
00084 
00085 respawn_record *respawns = NULL;
00086 
00087 int
00088 record_respawn(time_t now) {
00089   /* Records the respawning event in the respawn_record, and returns
00090      the number of respawns in the last respawn_count_time
00091      interval. */
00092   respawn_record *rec;
00093   respawn_record *next;
00094   int count;
00095 
00096   if (respawn_count_time <= 0) {
00097     /* We're not tracking respawns if respawn_count_time is 0. */
00098     return 0;
00099   }
00100 
00101   rec = (respawn_record *)malloc(sizeof(respawn_record));
00102   rec->_time = now;
00103   rec->_next = respawns;
00104   respawns = rec;
00105 
00106   /* Now walk through the rest of the list and count up the number of
00107      respawn events until we reach a record more than
00108      respawn_count_time seconds old. */
00109   count = 0;
00110   while (rec->_next != NULL &&
00111          (now - rec->_time) <= respawn_count_time) {
00112     rec = rec->_next;
00113     count++;
00114   }
00115 
00116   /* The remaining respawn records get removed. */
00117   next = rec->_next;
00118   rec->_next = NULL;
00119   while (next != NULL) {
00120     rec = next;
00121     next = rec->_next;
00122     free(rec);
00123   }
00124 
00125   return count;
00126 }
00127 
00128 void
00129 invoke_respawn_script(time_t now) {
00130   char buffer[32];
00131   char *new_command;
00132   int new_command_length;
00133 
00134   /* The process is about to be respawned; run the script that we were
00135      given on the command line. */
00136   if (respawn_count_time <= 0) {
00137     /* We're not counting respawn times, so just run the script
00138        directly. */
00139     system(respawn_script);
00140 
00141   } else {
00142     /* We are counting respawn times, so append that information as a
00143        parameter to the command. */
00144     sprintf(buffer, " %d", record_respawn(now));
00145     new_command_length = strlen(respawn_script) + strlen(buffer);
00146     new_command = (char *)malloc(new_command_length + 1);
00147     strcpy(new_command, respawn_script);
00148     strcat(new_command, buffer);
00149     assert(strlen(new_command) == new_command_length);
00150 
00151     system(new_command);
00152 
00153     free(new_command);
00154   }
00155 }
00156 
00157 /* A callback function passed to libcurl that simply discards the data
00158    retrieved from the server.  We only care about the HTTP status. */
00159 size_t 
00160 watchdog_bitbucket(void *ptr, size_t size, size_t nmemb, void *userdata) {
00161   return size * nmemb;
00162 }
00163 
00164 /* Waits up to timeout_ms for a particular child to terminate.
00165    Returns 0 if the timeout expires. */
00166 pid_t 
00167 waitpid_timeout(pid_t child_pid, int *status_ptr, int timeout_ms) {
00168   pid_t result;
00169   struct timeval now, tv;
00170   int now_ms, start_ms, elapsed_ms;
00171   
00172   gettimeofday(&now, NULL);
00173   start_ms = now.tv_sec * 1000 + now.tv_usec / 1000;
00174     
00175   result = waitpid(child_pid, status_ptr, WNOHANG);
00176   while (result == 0) {
00177     gettimeofday(&now, NULL);
00178     now_ms = now.tv_sec * 1000 + now.tv_usec / 1000;
00179     elapsed_ms = now_ms - start_ms;
00180     
00181     if (elapsed_ms > timeout_ms) {
00182       /* Tired of waiting. */
00183       return 0;
00184     }
00185     
00186     /* Yield the timeslice and wait some more. */
00187     tv.tv_sec = 0;
00188     tv.tv_usec = 1;
00189     select(0, NULL, NULL, NULL, &tv);
00190     result = waitpid(child_pid, status_ptr, WNOHANG);
00191   }
00192   if (result == -1) {
00193     perror("waitpid");
00194   }
00195 
00196   return result;
00197 }
00198 
00199 
00200 /* Poll the requested URL until a failure or timeout occurs, or until
00201    the child terminates on its own.  Returns 1 on HTTP failure or
00202    timeout, 0 on self-termination.  In either case, *status_ptr is
00203    filled in with the status value returned by waitpid().*/
00204 int 
00205 do_watchdog(int *status_ptr) {
00206 #ifndef HAVE_LIBCURL
00207   fprintf(stderr, "Cannot watchdog; no libcurl available.\n");
00208   return 0;
00209 #else  /* HAVE_LIBCURL */
00210 
00211   CURL *curl;
00212   CURLcode res;
00213   char error_buffer[CURL_ERROR_SIZE];
00214   pid_t wresult;
00215 
00216   // Before we start polling the URL, wait at least start milliseconds.
00217   wresult = waitpid_timeout(child_pid, status_ptr, watchdog_start_sec * 1000);
00218   if (wresult == child_pid) {
00219     // The child terminated on its own before we got started.
00220     return 0;
00221   }
00222 
00223   curl = curl_easy_init();
00224   if (!curl) {
00225     fprintf(stderr, "Cannot watchdog; curl failed to init.\n");
00226     return 0;
00227   }
00228 
00229   curl_easy_setopt(curl, CURLOPT_URL, watchdog_url);
00230   /*curl_easy_setopt(curl, CURLOPT_VERBOSE, 1);*/
00231   curl_easy_setopt(curl, CURLOPT_TIMEOUT_MS, watchdog_timeout_sec * 1000);
00232   curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, watchdog_bitbucket);
00233   curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, error_buffer);
00234   curl_easy_setopt(curl, CURLOPT_USERAGENT, "autorestart");
00235   curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1);
00236   curl_easy_setopt(curl, CURLOPT_FRESH_CONNECT, 1);
00237   curl_easy_setopt(curl, CURLOPT_FORBID_REUSE, 1);
00238 
00239   res = curl_easy_perform(curl);
00240   while (res == 0) {
00241     /* 0: The HTTP request finished successfully (but might or might
00242        not have returned an error code like a 404). */
00243     long http_response = 0;
00244     curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &http_response);
00245     if ((http_response / 100) != 2) {
00246       /* Anything in the 200 range is deemed success.  Anything else
00247          is deemed failure. */
00248       fprintf(stderr, "%s returned %ld\n", watchdog_url, http_response);
00249       break;
00250     }
00251 
00252     wresult = waitpid_timeout(child_pid, status_ptr, watchdog_cycle_sec * 1000);
00253     if (wresult == child_pid) {
00254       /* The process terminated on its own.  Return 0 to indicate this. */
00255       return 0;
00256     }
00257 
00258     res = curl_easy_perform(curl);
00259   }
00260 
00261   curl_easy_cleanup(curl);
00262 
00263   /* Failed to retrieve the watchdog URL. */
00264   if (res != 0) {
00265     fprintf(stderr, "Failed to contact %s: %s\n", watchdog_url, error_buffer);
00266   }
00267   
00268   /* Kill the child process and wait for it to go away. */
00269   kill(child_pid, SIGTERM);
00270 
00271   pid_t result = waitpid_timeout(child_pid, status_ptr, MAX_WAITTERM_SEC * 1000);
00272   if (result != child_pid) {
00273     if (result == -1) {
00274       perror("waitpid");
00275     } else {
00276       /* SIGTERM didn't make the process die.  Try SIGKILL. */
00277       fprintf(stderr, "Force-killing child process\n");
00278       kill(child_pid, SIGKILL);
00279       result = waitpid_timeout(child_pid, status_ptr, MAX_WAITTERM_SEC * 1000);
00280       if (result == -1) {
00281         perror("waitpid");
00282       }
00283     }
00284   }
00285 
00286   /* Return 1 to indicate we killed the child due to an HTTP error. */
00287   return 1;
00288 #endif  /* HAVE_LIBCURL */
00289 }
00290 
00291 void
00292 exec_process() {
00293   /* First, output the command line to the log file. */
00294   char **p;
00295   for (p = params; *p != NULL; ++p) {
00296     fprintf(stderr, "%s ", *p);
00297   }
00298   fprintf(stderr, "\n");
00299   execvp(params[0], params);
00300   fprintf(stderr, "Cannot exec %s: %s\n", params[0], strerror(errno));
00301 
00302   /* Exit with a status of 0, to indicate to the parent process that
00303      we should stop. */
00304   exit(0); 
00305 }
00306 
00307 int
00308 spawn_process() {
00309   /* Spawns the child process.  Returns true if the process terminated
00310      by itself and should be respawned, false if it was explicitly
00311      killed (or some other error condition exists), and it should not
00312      respawn any more. */
00313   pid_t wresult;
00314   int status;
00315   int error_exit;
00316 
00317   child_pid = fork();
00318   if (child_pid < 0) {
00319     /* Fork error. */
00320     perror("fork");
00321     return 0;
00322   }
00323 
00324   if (child_pid == 0) {
00325     /* Child.  Exec the process. */
00326     fprintf(stderr, "Child pid is %d.\n", getpid());
00327     exec_process();
00328     /* Shouldn't get here. */
00329     exit(1);
00330   }
00331 
00332   /* Parent. */
00333 
00334   error_exit = 0;
00335 
00336   if (watchdog_url != NULL) {
00337     /* If we're watchdogging, then go check the URL.  This function
00338        won't return until the URL fails or the child exits. */
00339     error_exit = do_watchdog(&status);
00340 
00341   } else {
00342     /* If we're not watchdogging, then just wait for the child to
00343        terminate, and diagnose the reason. */
00344     wresult = waitpid(child_pid, &status, 0);
00345     if (wresult < 0) {
00346       perror("waitpid");
00347       return 0;
00348     }
00349   }
00350 
00351   /* Now that we've returned from waitpid, clear the child pid number
00352      so our signal handler doesn't get too confused. */
00353   child_pid = 0;
00354 
00355   if (error_exit) {
00356     /* An HTTP error exit is a reason to respawn. */
00357     return 1;
00358 
00359   } else if (WIFSIGNALED(status)) {
00360     int signal = WTERMSIG(status);
00361     fprintf(stderr, "\nprocess caught signal %d.\n\n", signal);
00362     /* A signal exit is a reason to respawn unless the signal is TERM
00363        or KILL. */
00364     return !stop_on_terminate || (signal != SIGTERM && signal != SIGKILL);
00365 
00366   } else {
00367     int exit_status = WEXITSTATUS(status);
00368     fprintf(stderr, "\nprocess exited with status %d.\n\n", WEXITSTATUS(status));
00369     /* Normal exit is a reason to respawn if the status indicates failure. */
00370     return !stop_on_terminate || (exit_status != 0);
00371   }
00372 }
00373 
00374 void
00375 sigterm_handler() {
00376   pid_t wresult;
00377   int status;
00378   time_t now;
00379   char time_buffer[TIME_BUFFER_SIZE];
00380 
00381   now = time(NULL);
00382   strftime(time_buffer, TIME_BUFFER_SIZE, "%T on %A, %d %b %Y", localtime(&now));
00383 
00384   fprintf(stderr, "\nsigterm caught at %s; shutting down.\n", time_buffer);
00385   if (child_pid == 0) {
00386     fprintf(stderr, "no child process.\n\n");
00387 
00388   } else {
00389     kill(child_pid, SIGTERM);
00390 
00391     wresult = waitpid(child_pid, &status, 0);
00392     if (wresult < 0) {
00393       perror("waitpid");
00394     } else {
00395       fprintf(stderr, "child process terminated.\n\n");
00396     }
00397   }
00398   exit(1);
00399 }
00400 
00401 void
00402 sighup_handler() {
00403   time_t now;
00404   char time_buffer[TIME_BUFFER_SIZE];
00405 
00406   now = time(NULL);
00407   strftime(time_buffer, TIME_BUFFER_SIZE, "%T on %A, %d %b %Y", localtime(&now));
00408 
00409   fprintf(stderr, "\nsighup caught at %s.\n", time_buffer);
00410   if (child_pid == 0) {
00411     fprintf(stderr, "no child process.\n\n");
00412 
00413   } else {
00414     kill(child_pid, SIGHUP);
00415   }
00416 }
00417 
00418 void 
00419 sigalarm_handler() {
00420   fprintf(stderr, "sleep epoch was complete.\n");
00421 }
00422 
00423 void
00424 do_autorestart() {
00425   char time_buffer[TIME_BUFFER_SIZE];
00426   time_t now;
00427   time_t *spam_respawn = NULL;
00428   int sri, num_sri;
00429   struct sigaction sa;
00430 
00431   if (spam_respawn_count > 1) {
00432     spam_respawn = (time_t *)malloc(sizeof(time_t) * spam_respawn_count);
00433   }
00434 
00435   /* Make our process its own process group. */
00436   setpgid(0, 0);
00437 
00438   /* Set up a signal handler to trap SIGTERM. */
00439   sa.sa_handler = sigterm_handler;
00440   sigemptyset(&sa.sa_mask);
00441   sa.sa_flags = 0;
00442   if (sigaction(SIGTERM, &sa, NULL) < 0) {
00443     perror("sigaction");
00444   }
00445 
00446   /* Set up a signal handler to trap SIGHUP.  We pass this into the
00447      child. */
00448   sa.sa_handler = sighup_handler;
00449   sigemptyset(&sa.sa_mask);
00450   sa.sa_flags = 0;
00451   if (sigaction(SIGHUP, &sa, NULL) < 0) {
00452     perror("sigaction");
00453   }
00454 
00455   if (logfile_fd >= 0) {
00456     /* If we have a logfile, dup it onto stdout and stderr. */
00457     dup2(logfile_fd, STDOUT_FILENO);
00458     dup2(logfile_fd, STDERR_FILENO);
00459     close(logfile_fd);
00460   }
00461 
00462   /* Make sure stdin is closed. */
00463   close(STDIN_FILENO);
00464 
00465   now = time(NULL);
00466   strftime(time_buffer, TIME_BUFFER_SIZE, "%T on %A, %d %b %Y", localtime(&now));
00467   fprintf(stderr, "autorestart begun at %s.\n", time_buffer);
00468 
00469   if (pidfile_name != NULL) {
00470     unlink(pidfile_name);
00471     FILE *pidfile = fopen(pidfile_name, "w");
00472     if (pidfile == NULL) {
00473       fprintf(stderr, "Could not write pidfile %s\n", pidfile_name);
00474     } else {
00475       fprintf(pidfile, "%d\n", getpid());
00476       fclose(pidfile);
00477     }
00478   }
00479 
00480   sri = 1;
00481   num_sri = 1;
00482   if (spam_respawn_count > 1) {
00483     spam_respawn[1] = now;
00484   }
00485   
00486   while (spawn_process()) {
00487     now = time(NULL);
00488 
00489     if (respawn_script != NULL) {
00490       invoke_respawn_script(now);
00491     }
00492     
00493     if (respawn_delay_time) {
00494       sleep(respawn_delay_time);
00495     }
00496 
00497     /* Make sure we're not respawning too fast. */
00498     if (spam_respawn_count > 1) {
00499       sri = (sri + 1) % spam_respawn_count;
00500       spam_respawn[sri] = now;
00501       if (num_sri < spam_respawn_count) {
00502         num_sri++;
00503       } else {
00504         time_t last = spam_respawn[(sri + 1) % spam_respawn_count];
00505         if (now - last < spam_respawn_time) 
00506         {
00507           if(!spam_restart_delay_time) 
00508           {
00509             fprintf(stderr, "respawning too fast, giving up.\n");
00510             break;
00511           } 
00512           else 
00513           {
00514             num_sri = 1; /* reset num_sri */
00515             fprintf(stderr, "respawning too fast, will sleep for %d seconds.\n", spam_restart_delay_time);
00516             signal (SIGALRM, sigalarm_handler);
00517             alarm(spam_restart_delay_time);
00518             pause();
00519             signal (SIGALRM, SIG_IGN);
00520           }
00521         }
00522       }
00523     }
00524     
00525     if (stop_always) {
00526       fprintf(stderr, "instructed to not autorestart, exiting.\n");
00527       break;
00528     }
00529       
00530     strftime(time_buffer, TIME_BUFFER_SIZE, "%T on %A, %d %b %Y", localtime(&now));
00531     fprintf(stderr, "respawning at %s.\n", time_buffer);
00532   }
00533 
00534   now = time(NULL);
00535   strftime(time_buffer, TIME_BUFFER_SIZE, "%T on %A, %d %b %Y", localtime(&now));
00536   fprintf(stderr, "autorestart terminated at %s.\n", time_buffer);
00537   exit(0);
00538 }
00539 
00540 void
00541 double_fork() {
00542   pid_t child, grandchild, wresult;
00543   int status;
00544 
00545   /* Fork once, then again, to disassociate the child from the command
00546      shell process group. */
00547   child = fork();
00548   if (child < 0) {
00549     /* Failure to fork. */
00550     perror("fork");
00551     exit(1);
00552   }
00553 
00554   if (child == 0) {
00555     /* Child.  Fork again. */
00556     grandchild = fork();
00557     if (grandchild < 0) {
00558       perror("fork");
00559       exit(1);
00560     }
00561 
00562     if (grandchild == 0) {
00563       /* Grandchild.  Begin useful work. */
00564       do_autorestart();
00565       /* Shouldn't get here. */
00566       exit(1);
00567     }
00568 
00569     /* Child.  Report the new pid, then terminate gracefully. */
00570     fprintf(stderr, "Spawned, monitoring pid is %d.\n", grandchild);
00571     exit(0);
00572   }
00573 
00574   /* Parent.  Wait for the child to terminate, then return. */
00575   wresult = waitpid(child, &status, 0);
00576   if (wresult < 0) {
00577     perror("waitpid");
00578     exit(1);
00579   }
00580 
00581   if (!WIFEXITED(status)) {
00582     if (WIFSIGNALED(status)) {
00583       fprintf(stderr, "child caught signal %d unexpectedly.\n", WTERMSIG(status));
00584     } else {
00585       fprintf(stderr, "child exited with status %d.\n", WEXITSTATUS(status));
00586     }
00587     exit(1);
00588   }
00589 }
00590 
00591 void
00592 usage() {
00593   fprintf(stderr,
00594           "\n"
00595           "autorestart [opts] program [args . . . ]\n"
00596           "autorestart -h\n\n");
00597 }
00598 
00599 void
00600 help() {
00601   usage();
00602   fprintf(stderr,
00603           "This program is used to run a program as a background task and\n"
00604           "automatically restart it should it terminate for any reason other\n"
00605           "than normal exit or explicit user kill.\n\n"
00606 
00607           "If the program exits with a status of 0, indicating successful\n"
00608           "completion, it is not restarted.\n\n"
00609 
00610           "If the program is terminated via a TERM or KILL signal (e.g. via\n"
00611           "kill [pid] or kill -9 [pid]), it is assumed the user meant for the\n"
00612           "process to stop, and it is not restarted.\n\n"
00613 
00614           "Options:\n\n"
00615 
00616           "  -l logfilename\n"
00617           "     Route stdout and stderr from the child process into the indicated\n"
00618           "     log file.\n\n"
00619 
00620           "  -p pidfilename\n"
00621           "     Write the pid of the monitoring process to the indicated pidfile.\n\n"
00622           "  -f\n"
00623           "     Don't fork autorestart itself; run it as a foreground process. \n"
00624           "     (Normally, autorestart forks itself to run as a background process.)\n"
00625           "     In this case, the file named by -p is not used.\n\n"
00626           
00627           "  -n\n"
00628           "     Do not attempt to restart the process under any circumstance.\n"
00629           "     The program can still be used to execute a script on abnormal\n"
00630           "     process termination.\n\n"
00631 
00632           "  -t\n"
00633           "     Stop on terminate: don't restart if the child process exits\n"
00634           "     normally or is killed with a SIGTERM.  With this flag, the\n"
00635           "     child process will be restarted only if it exits with a\n"
00636           "     non-zero exit status, or if it is killed with a signal other\n"
00637           "     than SIGTERM.  Without this flag, the default behavior is to\n"
00638           "     restart the child process if it exits for any reason.\n\n"
00639 
00640           "  -r count,secs,sleep\n"
00641           "     Sleep 'sleep' seconds if the process respawns 'count' times\n"
00642           "     within 'secs' seconds.  This is designed to prevent respawning\n"
00643           "     from using too many system resources if something is wrong with\n"
00644           "     the child process.  The default value is %d,%d,%d. Use -r 0,0,0\n"
00645           "     to disable this feature.\n\n"
00646 
00647           "  -s \"command\"\n"
00648           "     Run the indicated command or script each time the process is\n"
00649           "     respawned, using the system() call.  This may be useful, for\n"
00650           "     instance, to notify an operator via email each time a respawn\n"
00651           "     occurs.  If -c is also specified, an additional parameter will\n"
00652           "     be appended to the command, indicating the number of times the\n"
00653           "     respawn has occurred in the given time interval.\n\n"
00654 
00655           "  -c secs\n"
00656           "     Specifies the number of seconds over which to count respawn events\n"
00657           "     for the purposes of passing an argument to the script named with\n"
00658           "     -s.\n\n"
00659 
00660           "  -d secs\n"
00661           "     Specifies the number of seconds to delay for between restarts.\n"
00662           "     The default is %d.\n\n"
00663 
00664 #ifdef HAVE_LIBCURL
00665           "  -W watchdog_url,start,cycle,timeout\n"
00666           "     Specifies an optional URL to watch while waiting for the process\n"
00667           "     to terminate.  If this is specified, autorestart will start the process,\n"
00668           "     wait start seconds, and then repeatedly poll the indicated URL\n"
00669           "     every cycle seconds.  If a HTTP failure code is detected,\n"
00670           "     or no response is received within timeout seconds, then the\n"
00671           "     child is terminated and restarted.  The start, cycle, and timeout\n"
00672           "     parameters are all required.\n\n"
00673 #endif  /* HAVE_LIBCURL */
00674 
00675           "  -U username\n"
00676           "     Change to the indicated user upon startup.  The logfile is still\n"
00677           "     created as the initial user.\n\n"
00678 
00679           "  -G groupname\n"
00680           "     Change to the indicated group upon startup.\n\n"
00681 
00682           "  -D dirname\n"
00683           "     Change to the indicated working directory upon startup.  The logfile\n"
00684           "     is still created relative to the initial startup directory.\n\n"
00685 
00686           "  -h\n"
00687           "     Output this help information.\n\n",
00688           spam_respawn_count, spam_respawn_time, spam_restart_delay_time, respawn_delay_time);
00689 }
00690 
00691 void
00692 parse_int_triplet(char *param, int *a, int *b, int *c) {
00693   char *comma;
00694   char *comma2;
00695   
00696   comma = strchr(param, ',');
00697   if (comma == NULL) {
00698     fprintf(stderr, "Comma required: %s\n", param);
00699     exit(1);
00700   }
00701 
00702   comma2 = strchr(comma+1, ',');
00703   if (comma2 == NULL) {
00704     fprintf(stderr, "Second comma required: %s\n", param);
00705     exit(1);
00706   }
00707 
00708   *comma = '\0';
00709   *comma2 = '\0';
00710   
00711   *a = atoi(param);
00712   *b = atoi(comma + 1);
00713   *c = atoi(comma2 + 1);
00714 }
00715 
00716 void 
00717 parse_watchdog(char *param) {
00718   char *comma;
00719   char *comma2;
00720   char *comma3;
00721 
00722 #ifndef HAVE_LIBCURL
00723   fprintf(stderr, "-W requires autorestart to have been compiled with libcurl support.\n");
00724   exit(1);
00725 #endif  /* HAVE_LIBCURL */
00726 
00727   comma = strrchr(param, ',');
00728   if (comma == NULL) {
00729     fprintf(stderr, "Comma required: %s\n", param);
00730     exit(1);
00731   }
00732   *comma = '\0';
00733 
00734   comma2 = strrchr(param, ',');
00735   if (comma2 == NULL) {
00736     *comma = ',';
00737     fprintf(stderr, "Second comma required: %s\n", param);
00738     exit(1);
00739   }
00740   *comma2 = '\0';
00741 
00742   comma3 = strrchr(param, ',');
00743   if (comma3 == NULL) {
00744     *comma = ',';
00745     *comma2 = ',';
00746     fprintf(stderr, "Third comma required: %s\n", param);
00747     exit(1);
00748   }
00749   *comma3 = '\0';
00750 
00751   watchdog_url = param;
00752   watchdog_start_sec = atoi(comma3 + 1);
00753   watchdog_cycle_sec = atoi(comma2 + 1);
00754   watchdog_timeout_sec = atoi(comma + 1);
00755 }
00756 
00757 
00758 int 
00759 main(int argc, char *argv[]) {
00760   extern char *optarg;
00761   extern int optind;
00762   /* The initial '+' instructs GNU getopt not to reorder switches. */
00763   static const char *optflags = "+l:p:fntr:s:c:d:W:U:G:D:h";
00764   int flag;
00765 
00766   flag = getopt(argc, argv, optflags);
00767   while (flag != EOF) {
00768     switch (flag) {
00769     case 'l':
00770       logfile_name = optarg;
00771       break;
00772 
00773     case 'p':
00774       pidfile_name = optarg;
00775       break;
00776 
00777     case 'f':
00778       dont_fork = 1;
00779       break;
00780 
00781     case 'n':
00782       stop_always = 1;
00783       break;
00784 
00785     case 't':
00786       stop_on_terminate = 1;
00787       break;
00788 
00789     case 'r':
00790       parse_int_triplet(optarg, &spam_respawn_count, &spam_respawn_time, &spam_restart_delay_time);
00791       break;
00792 
00793     case 's':
00794       respawn_script = optarg;
00795       break;
00796 
00797     case 'c':
00798       respawn_count_time = atoi(optarg);
00799       break;
00800 
00801     case 'd':
00802       respawn_delay_time = atoi(optarg);
00803       break;
00804 
00805     case 'W':
00806       parse_watchdog(optarg);
00807       break;
00808 
00809     case 'U':
00810       startup_username = optarg;
00811       break;
00812 
00813     case 'G':
00814       startup_groupname = optarg;
00815       break;
00816 
00817     case 'D':
00818       startup_chdir = optarg;
00819       break;
00820       
00821     case 'h':
00822       help();
00823       return 1;
00824 
00825     case '?':
00826     case '+':
00827       usage();
00828       return 1;
00829 
00830     default:
00831       fprintf(stderr, "Unhandled switch: -%c\n", flag);
00832       return 1;
00833     }
00834     flag = getopt(argc, argv, optflags);
00835   }
00836 
00837   argc -= (optind - 1);
00838   argv += (optind - 1);
00839 
00840   if (argc < 2) {
00841     fprintf(stderr, "No program to execute given.\n");
00842     usage();
00843     return 1;
00844   }
00845 
00846   params = &argv[1];
00847 
00848   if (logfile_name != NULL) {
00849     logfile_fd = open(logfile_name, O_WRONLY | O_CREAT | O_TRUNC, 0666);
00850     if (logfile_fd < 0) {
00851       fprintf(stderr, "Cannot write to logfile %s: %s\n", 
00852               logfile_name, strerror(errno));
00853       return 1;
00854     }
00855     fprintf(stderr, "Generating output to %s.\n", logfile_name);
00856   }
00857 
00858   if (startup_chdir != NULL) {
00859     if (chdir(startup_chdir) != 0) {
00860       perror(startup_chdir);
00861       return 1;
00862     }
00863   }
00864 
00865   if (startup_groupname != NULL) {
00866     struct group *grp;
00867     grp = getgrnam(startup_groupname);
00868     if (grp == NULL) {
00869       perror(startup_groupname);
00870       return 1;
00871     }
00872 
00873     if (setgid(grp->gr_gid) != 0) {
00874       perror(startup_groupname);
00875       return 1;
00876     }
00877   }
00878 
00879   if (startup_username != NULL) {
00880     struct passwd *pwd;
00881     pwd = getpwnam(startup_username);
00882     if (pwd == NULL) {
00883       perror(startup_username);
00884       return 1;
00885     }
00886 
00887     if (setuid(pwd->pw_uid) != 0) {
00888       perror(startup_username);
00889       return 1;
00890     }
00891   }
00892 
00893   if (dont_fork) {
00894     do_autorestart();
00895   } else {
00896     double_fork();
00897   }
00898 
00899   return 0;
00900 }
00901 
 All Classes Functions Variables Enumerations