/[Apache-SVN]/httpd/httpd/branches/2.2.x/server/mpm/worker/worker.c
ViewVC logotype

Contents of /httpd/httpd/branches/2.2.x/server/mpm/worker/worker.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 292949 - (hide annotations)
Sat Oct 1 11:02:10 2005 UTC (4 years, 1 month ago) by trawick
File MIME type: text/plain
File size: 80650 byte(s)
merge from trunk:

*) SECURITY: CAN-2005-2970 (cve.mitre.org)
     worker MPM: Fix a memory leak which can occur after an aborted
     connection in some limited circumstances.  [Greg Ames, Jeff Trawick]

1 jerenkrantz 151408 /* Copyright 2001-2005 The Apache Software Foundation or its licensors, as
2     * applicable.
3 rbb 89781 *
4 nd 102525 * Licensed under the Apache License, Version 2.0 (the "License");
5     * you may not use this file except in compliance with the License.
6     * You may obtain a copy of the License at
7 rbb 89781 *
8 nd 102525 * http://www.apache.org/licenses/LICENSE-2.0
9 rbb 89781 *
10 nd 102525 * Unless required by applicable law or agreed to in writing, software
11     * distributed under the License is distributed on an "AS IS" BASIS,
12     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13     * See the License for the specific language governing permissions and
14     * limitations under the License.
15 rbb 89781 */
16    
17 rbb 89938 /* The purpose of this MPM is to fix the design flaws in the threaded
18     * model. Because of the way that pthreads and mutex locks interact,
19     * it is basically impossible to cleanly gracefully shutdown a child
20     * process if multiple threads are all blocked in accept. This model
21     * fixes those problems.
22     */
23    
24 rbb 89781 #include "apr.h"
25     #include "apr_portable.h"
26     #include "apr_strings.h"
27     #include "apr_file_io.h"
28     #include "apr_thread_proc.h"
29     #include "apr_signal.h"
30 aaron 91518 #include "apr_thread_mutex.h"
31 aaron 91582 #include "apr_proc_mutex.h"
32 rbb 96005 #include "apr_poll.h"
33 rbb 89781 #define APR_WANT_STRFUNC
34     #include "apr_want.h"
35    
36     #if APR_HAVE_UNISTD_H
37     #include <unistd.h>
38     #endif
39     #if APR_HAVE_SYS_SOCKET_H
40     #include <sys/socket.h>
41     #endif
42     #if APR_HAVE_SYS_WAIT_H
43     #include <sys/wait.h>
44     #endif
45     #ifdef HAVE_SYS_PROCESSOR_H
46     #include <sys/processor.h> /* for bindprocessor() */
47     #endif
48    
49     #if !APR_HAS_THREADS
50     #error The Worker MPM requires APR threads, but they are unavailable.
51     #endif
52    
53     #define CORE_PRIVATE
54    
55     #include "ap_config.h"
56     #include "httpd.h"
57     #include "http_main.h"
58     #include "http_log.h"
59 jerenkrantz 92473 #include "http_config.h" /* for read_config */
60     #include "http_core.h" /* for get_remote_host */
61 rbb 89781 #include "http_connection.h"
62     #include "ap_mpm.h"
63 rbb 93358 #include "pod.h"
64 rbb 89781 #include "mpm_common.h"
65     #include "ap_listen.h"
66     #include "scoreboard.h"
67     #include "fdqueue.h"
68 trawick 92512 #include "mpm_default.h"
69 rbb 89781
70     #include <signal.h>
71     #include <limits.h> /* for INT_MAX */
72    
73 trawick 92512 /* Limit on the total --- clients will be locked out if more servers than
74     * this are needed. It is intended solely to keep the server from crashing
75     * when things get out of hand.
76     *
77     * We keep a hard maximum number of servers, for two reasons --- first off,
78     * in case something goes seriously wrong, we want to stop the fork bomb
79     * short of actually crashing the machine we're running on by filling some
80     * kernel table. Secondly, it keeps the size of the scoreboard file small
81     * enough that we can read the whole thing without worrying too much about
82     * the overhead.
83     */
84 trawick 92530 #ifndef DEFAULT_SERVER_LIMIT
85     #define DEFAULT_SERVER_LIMIT 16
86 trawick 92512 #endif
87    
88 trawick 92530 /* Admin can't tune ServerLimit beyond MAX_SERVER_LIMIT. We want
89     * some sort of compile-time limit to help catch typos.
90     */
91     #ifndef MAX_SERVER_LIMIT
92     #define MAX_SERVER_LIMIT 20000
93     #endif
94    
95 trawick 92512 /* Limit on the threads per process. Clients will be locked out if more than
96 trawick 92530 * this * server_limit are needed.
97 trawick 92512 *
98     * We keep this for one reason it keeps the size of the scoreboard file small
99     * enough that we can read the whole thing without worrying too much about
100     * the overhead.
101     */
102 trawick 92530 #ifndef DEFAULT_THREAD_LIMIT
103     #define DEFAULT_THREAD_LIMIT 64
104 trawick 92512 #endif
105    
106 trawick 92530 /* Admin can't tune ThreadLimit beyond MAX_THREAD_LIMIT. We want
107     * some sort of compile-time limit to help catch typos.
108     */
109     #ifndef MAX_THREAD_LIMIT
110     #define MAX_THREAD_LIMIT 20000
111     #endif
112    
113 rbb 89781 /*
114     * Actual definitions of config globals
115     */
116    
117 jerenkrantz 92473 int ap_threads_per_child = 0; /* Worker threads per child */
118     static int ap_daemons_to_start = 0;
119     static int min_spare_threads = 0;
120     static int max_spare_threads = 0;
121 trawick 93528 static int ap_daemons_limit = 0;
122 trawick 92530 static int server_limit = DEFAULT_SERVER_LIMIT;
123     static int first_server_limit;
124     static int thread_limit = DEFAULT_THREAD_LIMIT;
125     static int first_thread_limit;
126     static int changed_limit_at_restart;
127 rbb 89781 static int dying = 0;
128     static int workers_may_exit = 0;
129 trawick 94095 static int start_thread_may_exit = 0;
130     static int listener_may_exit = 0;
131 rbb 89781 static int requests_this_child;
132     static int num_listensocks = 0;
133 gregames 93366 static int resource_shortage = 0;
134 rbb 90635 static fd_queue_t *worker_queue;
135 aaron 94824 static fd_queue_info_t *worker_queue_info;
136 trawick 102045 static int mpm_state = AP_MPMQ_STARTING;
137 gregames 168182 static int sick_child_detected;
138 rbb 89781
139     /* The structure used to pass unique initialization info to each thread */
140     typedef struct {
141     int pid;
142     int tid;
143     int sd;
144     } proc_info;
145    
146     /* Structure used to pass information to the thread responsible for
147     * creating the rest of the threads.
148     */
149     typedef struct {
150     apr_thread_t **threads;
151 trawick 94031 apr_thread_t *listener;
152 rbb 89781 int child_num_arg;
153     apr_threadattr_t *threadattr;
154     } thread_starter;
155    
156 trawick 92530 #define ID_FROM_CHILD_THREAD(c, t) ((c * thread_limit) + t)
157 trawick 92512
158 rbb 89781 /*
159     * The max child slot ever assigned, preserved across restarts. Necessary
160 jerenkrantz 91076 * to deal with MaxClients changes across AP_SIG_GRACEFUL restarts. We
161     * use this value to optimize routines that have to scan the entire
162     * scoreboard.
163 rbb 89781 */
164     int ap_max_daemons_limit = -1;
165    
166 brianp 93096 static ap_pod_t *pod;
167 rbb 89781
168     /* *Non*-shared http_main globals... */
169    
170     server_rec *ap_server_conf;
171    
172 jwoolley 90790 /* The worker MPM respects a couple of runtime flags that can aid
173     * in debugging. Setting the -DNO_DETACH flag will prevent the root process
174     * from detaching from its controlling terminal. Additionally, setting
175     * the -DONE_PROCESS flag (which implies -DNO_DETACH) will get you the
176     * child_main loop running in the process which originally started up.
177     * This gives you a pretty nice debugging environment. (You'll get a SIGHUP
178 rbb 89781 * early in standalone_main; just continue through. This is the server
179     * trying to kill off any child processes which it might have lying
180     * around --- Apache doesn't keep track of their pids, it just sends
181     * SIGHUP to the process group, ignoring it in the root process.
182     * Continue through and you'll be fine.).
183     */
184    
185     static int one_process = 0;
186    
187     #ifdef DEBUG_SIGSTOP
188     int raise_sigstop_flags;
189     #endif
190    
191 jerenkrantz 92473 static apr_pool_t *pconf; /* Pool for config stuff */
192     static apr_pool_t *pchild; /* Pool for httpd child stuff */
193 rbb 89781
194     static pid_t ap_my_pid; /* Linux getpid() doesn't work except in main
195     thread. Use this instead */
196 gregames 90064 static pid_t parent_pid;
197 trawick 94031 static apr_os_thread_t *listener_os_thread;
198 rbb 89781
199     /* Locks for accept serialization */
200 aaron 91580 static apr_proc_mutex_t *accept_mutex;
201 rbb 89781
202 brianp 93096 #ifdef SINGLE_LISTEN_UNSERIALIZED_ACCEPT
203     #define SAFE_ACCEPT(stmt) (ap_listeners->next ? (stmt) : APR_SUCCESS)
204 rbb 89781 #else
205     #define SAFE_ACCEPT(stmt) (stmt)
206     #endif
207    
208 trawick 94031 /* The LISTENER_SIGNAL signal will be sent from the main thread to the
209     * listener thread to wake it up for graceful termination (what a child
210     * process from an old generation does when the admin does "apachectl
211     * graceful"). This signal will be blocked in all threads of a child
212     * process except for the listener thread.
213     */
214     #define LISTENER_SIGNAL SIGHUP
215    
216 jerenkrantz 94886 /* An array of socket descriptors in use by each thread used to
217     * perform a non-graceful (forced) shutdown of the server. */
218     static apr_socket_t **worker_sockets;
219    
220     static void close_worker_sockets(void)
221     {
222     int i;
223     for (i = 0; i < ap_threads_per_child; i++) {
224     if (worker_sockets[i]) {
225     apr_socket_close(worker_sockets[i]);
226     worker_sockets[i] = NULL;
227     }
228     }
229     }
230    
231 trawick 94031 static void wakeup_listener(void)
232     {
233 trawick 94095 listener_may_exit = 1;
234 trawick 94420 if (!listener_os_thread) {
235     /* XXX there is an obscure path that this doesn't handle perfectly:
236     * right after listener thread is created but before
237     * listener_os_thread is set, the first worker thread hits an
238     * error and starts graceful termination
239     */
240     return;
241     }
242 trawick 94031 /*
243 trawick 94417 * we should just be able to "kill(ap_my_pid, LISTENER_SIGNAL)" on all
244     * platforms and wake up the listener thread since it is the only thread
245     * with SIGHUP unblocked, but that doesn't work on Linux
246 trawick 94031 */
247 trawick 94417 #ifdef HAVE_PTHREAD_KILL
248 trawick 94031 pthread_kill(*listener_os_thread, LISTENER_SIGNAL);
249 trawick 94417 #else
250     kill(ap_my_pid, LISTENER_SIGNAL);
251     #endif
252 trawick 94031 }
253    
254 trawick 94232 #define ST_INIT 0
255 trawick 94095 #define ST_GRACEFUL 1
256     #define ST_UNGRACEFUL 2
257    
258 trawick 94232 static int terminate_mode = ST_INIT;
259    
260 trawick 94095 static void signal_threads(int mode)
261 rbb 89827 {
262 trawick 94232 if (terminate_mode == mode) {
263 trawick 94106 return;
264     }
265 trawick 94232 terminate_mode = mode;
266 trawick 102045 mpm_state = AP_MPMQ_STOPPING;
267 trawick 94106
268 trawick 94031 /* in case we weren't called from the listener thread, wake up the
269     * listener thread
270     */
271     wakeup_listener();
272    
273 trawick 94106 /* for ungraceful termination, let the workers exit now;
274     * for graceful termination, the listener thread will notify the
275     * workers to exit once it has stopped accepting new connections
276     */
277     if (mode == ST_UNGRACEFUL) {
278     workers_may_exit = 1;
279     ap_queue_interrupt_all(worker_queue);
280 aaron 94824 ap_queue_info_term(worker_queue_info);
281 jerenkrantz 94886 close_worker_sockets(); /* forcefully kill all current connections */
282 trawick 94106 }
283 rbb 89827 }
284    
285 rbb 89781 AP_DECLARE(apr_status_t) ap_mpm_query(int query_code, int *result)
286     {
287     switch(query_code){
288     case AP_MPMQ_MAX_DAEMON_USED:
289     *result = ap_max_daemons_limit;
290     return APR_SUCCESS;
291     case AP_MPMQ_IS_THREADED:
292     *result = AP_MPMQ_STATIC;
293     return APR_SUCCESS;
294     case AP_MPMQ_IS_FORKED:
295     *result = AP_MPMQ_DYNAMIC;
296     return APR_SUCCESS;
297     case AP_MPMQ_HARD_LIMIT_DAEMONS:
298 trawick 92530 *result = server_limit;
299 rbb 89781 return APR_SUCCESS;
300     case AP_MPMQ_HARD_LIMIT_THREADS:
301 trawick 92530 *result = thread_limit;
302 rbb 89781 return APR_SUCCESS;
303     case AP_MPMQ_MAX_THREADS:
304     *result = ap_threads_per_child;
305     return APR_SUCCESS;
306 jwoolley 91777 case AP_MPMQ_MIN_SPARE_DAEMONS:
307 rbb 89781 *result = 0;
308     return APR_SUCCESS;
309     case AP_MPMQ_MIN_SPARE_THREADS:
310     *result = min_spare_threads;
311     return APR_SUCCESS;
312     case AP_MPMQ_MAX_SPARE_DAEMONS:
313     *result = 0;
314     return APR_SUCCESS;
315     case AP_MPMQ_MAX_SPARE_THREADS:
316     *result = max_spare_threads;
317     return APR_SUCCESS;
318 jwoolley 91777 case AP_MPMQ_MAX_REQUESTS_DAEMON:
319 rbb 89781 *result = ap_max_requests_per_child;
320     return APR_SUCCESS;
321     case AP_MPMQ_MAX_DAEMONS:
322     *result = ap_daemons_limit;
323     return APR_SUCCESS;
324 trawick 102045 case AP_MPMQ_MPM_STATE:
325     *result = mpm_state;
326     return APR_SUCCESS;
327 rbb 89781 }
328     return APR_ENOTIMPL;
329     }
330    
331     /* a clean exit from a child with proper cleanup */
332     static void clean_child_exit(int code) __attribute__ ((noreturn));
333     static void clean_child_exit(int code)
334     {
335 trawick 102045 mpm_state = AP_MPMQ_STOPPING;
336 rbb 89781 if (pchild) {
337 jerenkrantz 92473 apr_pool_destroy(pchild);
338 rbb 89781 }
339     exit(code);
340     }
341    
342     static void just_die(int sig)
343     {
344     clean_child_exit(0);
345     }
346    
347     /*****************************************************************
348     * Connection structures and accounting...
349     */
350    
351     /* volatile just in case */
352     static int volatile shutdown_pending;
353     static int volatile restart_pending;
354     static int volatile is_graceful;
355 trawick 92019 static volatile int child_fatal;
356 rbb 89781 ap_generation_t volatile ap_my_generation;
357    
358     /*
359     * ap_start_shutdown() and ap_start_restart(), below, are a first stab at
360     * functions to initiate shutdown or restart without relying on signals.
361     * Previously this was initiated in sig_term() and restart() signal handlers,
362     * but we want to be able to start a shutdown/restart from other sources --
363     * e.g. on Win32, from the service manager. Now the service manager can
364     * call ap_start_shutdown() or ap_start_restart() as appropiate. Note that
365     * these functions can also be called by the child processes, since global
366     * variables are no longer used to pass on the required action to the parent.
367     *
368     * These should only be called from the parent process itself, since the
369     * parent process will use the shutdown_pending and restart_pending variables
370     * to determine whether to shutdown or restart. The child process should
371     * call signal_parent() directly to tell the parent to die -- this will
372     * cause neither of those variable to be set, which the parent will
373     * assume means something serious is wrong (which it will be, for the
374     * child to force an exit) and so do an exit anyway.
375     */
376    
377 colm 290189 static void ap_start_shutdown(int graceful)
378 rbb 89781 {
379 trawick 102045 mpm_state = AP_MPMQ_STOPPING;
380 rbb 89781 if (shutdown_pending == 1) {
381 jerenkrantz 92473 /* Um, is this _probably_ not an error, if the user has
382     * tried to do a shutdown twice quickly, so we won't
383     * worry about reporting it.
384     */
385     return;
386 rbb 89781 }
387     shutdown_pending = 1;
388 colm 290189 is_graceful = graceful;
389 rbb 89781 }
390    
391     /* do a graceful restart if graceful == 1 */
392     static void ap_start_restart(int graceful)
393     {
394 trawick 102045 mpm_state = AP_MPMQ_STOPPING;
395 rbb 89781 if (restart_pending == 1) {
396 jerenkrantz 92473 /* Probably not an error - don't bother reporting it */
397     return;
398 rbb 89781 }
399     restart_pending = 1;
400     is_graceful = graceful;
401     }
402    
403     static void sig_term(int sig)
404     {
405 colm 290189 ap_start_shutdown(sig == AP_SIG_GRACEFUL_STOP);
406 rbb 89781 }
407    
408     static void restart(int sig)
409     {
410 jerenkrantz 91076 ap_start_restart(sig == AP_SIG_GRACEFUL);
411 rbb 89781 }
412    
413     static void set_signals(void)
414     {
415     #ifndef NO_USE_SIGACTION
416     struct sigaction sa;
417 trawick 99312 #endif
418 rbb 89781
419 trawick 99312 if (!one_process) {
420     ap_fatal_signal_setup(ap_server_conf, pconf);
421     }
422    
423     #ifndef NO_USE_SIGACTION
424 rbb 89781 sigemptyset(&sa.sa_mask);
425     sa.sa_flags = 0;
426    
427     sa.sa_handler = sig_term;
428     if (sigaction(SIGTERM, &sa, NULL) < 0)
429 jerenkrantz 92473 ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf,
430     "sigaction(SIGTERM)");
431 colm 290189 #ifdef AP_SIG_GRACEFUL_STOP
432     if (sigaction(AP_SIG_GRACEFUL_STOP, &sa, NULL) < 0)
433     ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf,
434     "sigaction(" AP_SIG_GRACEFUL_STOP_STRING ")");
435     #endif
436 rbb 89781 #ifdef SIGINT
437     if (sigaction(SIGINT, &sa, NULL) < 0)
438 jerenkrantz 92473 ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf,
439     "sigaction(SIGINT)");
440 rbb 89781 #endif
441     #ifdef SIGXCPU
442     sa.sa_handler = SIG_DFL;
443     if (sigaction(SIGXCPU, &sa, NULL) < 0)
444 jerenkrantz 92473 ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf,
445     "sigaction(SIGXCPU)");
446 rbb 89781 #endif
447     #ifdef SIGXFSZ
448     sa.sa_handler = SIG_DFL;
449     if (sigaction(SIGXFSZ, &sa, NULL) < 0)
450 jerenkrantz 92473 ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf,
451     "sigaction(SIGXFSZ)");
452 rbb 89781 #endif
453     #ifdef SIGPIPE
454     sa.sa_handler = SIG_IGN;
455     if (sigaction(SIGPIPE, &sa, NULL) < 0)
456 jerenkrantz 92473 ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf,
457     "sigaction(SIGPIPE)");
458 rbb 89781 #endif
459    
460 jerenkrantz 91076 /* we want to ignore HUPs and AP_SIG_GRACEFUL while we're busy
461     * processing one */
462 rbb 89781 sigaddset(&sa.sa_mask, SIGHUP);
463 jerenkrantz 91076 sigaddset(&sa.sa_mask, AP_SIG_GRACEFUL);
464 rbb 89781 sa.sa_handler = restart;
465     if (sigaction(SIGHUP, &sa, NULL) < 0)
466 jerenkrantz 92473 ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf,
467     "sigaction(SIGHUP)");
468 jerenkrantz 91076 if (sigaction(AP_SIG_GRACEFUL, &sa, NULL) < 0)
469 jerenkrantz 92473 ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf,
470     "sigaction(" AP_SIG_GRACEFUL_STRING ")");
471 rbb 89781 #else
472     if (!one_process) {
473     #ifdef SIGXCPU
474 jerenkrantz 92473 apr_signal(SIGXCPU, SIG_DFL);
475 rbb 89781 #endif /* SIGXCPU */
476     #ifdef SIGXFSZ
477 jerenkrantz 92473 apr_signal(SIGXFSZ, SIG_DFL);
478 rbb 89781 #endif /* SIGXFSZ */
479     }
480    
481     apr_signal(SIGTERM, sig_term);
482     #ifdef SIGHUP
483     apr_signal(SIGHUP, restart);
484     #endif /* SIGHUP */
485 jerenkrantz 91076 #ifdef AP_SIG_GRACEFUL
486     apr_signal(AP_SIG_GRACEFUL, restart);
487     #endif /* AP_SIG_GRACEFUL */
488 colm 290189 #ifdef AP_SIG_GRACEFUL_STOP
489     apr_signal(AP_SIG_GRACEFUL_STOP, sig_term);
490     #endif /* AP_SIG_GRACEFUL_STOP */
491 rbb 89781 #ifdef SIGPIPE
492     apr_signal(SIGPIPE, SIG_IGN);
493     #endif /* SIGPIPE */
494    
495     #endif
496     }
497    
498     /*****************************************************************
499     * Here follows a long bunch of generic server bookkeeping stuff...
500     */
501    
502     int ap_graceful_stop_signalled(void)
503 gregames 89881 /* XXX this is really a bad confusing obsolete name
504     * maybe it should be ap_mpm_process_exiting?
505     */
506 rbb 89781 {
507 trawick 94095 /* note: for a graceful termination, listener_may_exit will be set before
508     * workers_may_exit, so check listener_may_exit
509     */
510     return listener_may_exit;
511 rbb 89781 }
512    
513     /*****************************************************************
514     * Child process main loop.
515     */
516    
517 jerenkrantz 92473 static void process_socket(apr_pool_t *p, apr_socket_t *sock, int my_child_num,
518 jwoolley 94304 int my_thread_num, apr_bucket_alloc_t *bucket_alloc)
519 rbb 89781 {
520     conn_rec *current_conn;
521 trawick 92512 long conn_id = ID_FROM_CHILD_THREAD(my_child_num, my_thread_num);
522 rbb 89781 int csd;
523 wrowe 92791 ap_sb_handle_t *sbh;
524 rbb 89781
525 trawick 92512 ap_create_sb_handle(&sbh, p, my_child_num, my_thread_num);
526 jwoolley 91082 apr_os_sock_get(&csd, sock);
527 rbb 89781
528 jwoolley 94304 current_conn = ap_run_create_connection(p, ap_server_conf, sock,
529     conn_id, sbh, bucket_alloc);
530 rbb 89781 if (current_conn) {
531 stoddard 93087 ap_process_connection(current_conn, sock);
532 rbb 91968 ap_lingering_close(current_conn);
533 rbb 89781 }
534     }
535    
536     /* requests_this_child has gone to zero or below. See if the admin coded
537     "MaxRequestsPerChild 0", and keep going in that case. Doing it this way
538     simplifies the hot path in worker_thread */
539     static void check_infinite_requests(void)
540     {
541     if (ap_max_requests_per_child) {
542 trawick 94095 signal_threads(ST_GRACEFUL);
543 rbb 89781 }
544     else {
545     /* wow! if you're executing this code, you may have set a record.
546     * either this child process has served over 2 billion requests, or
547     * you're running a threaded 2.0 on a 16 bit machine.
548     *
549     * I'll buy pizza and beers at Apachecon for the first person to do
550     * the former without cheating (dorking with INT_MAX, or running with
551     * uncommitted performance patches, for example).
552     *
553     * for the latter case, you probably deserve a beer too. Greg Ames
554     */
555    
556     requests_this_child = INT_MAX; /* keep going */
557     }
558     }
559    
560 trawick 94232 static void unblock_signal(int sig)
561 trawick 94031 {
562 trawick 94232 sigset_t sig_mask;
563    
564     sigemptyset(&sig_mask);
565     sigaddset(&sig_mask, sig);
566     #if defined(SIGPROCMASK_SETS_THREAD_MASK)
567     sigprocmask(SIG_UNBLOCK, &sig_mask, NULL);
568     #else
569     pthread_sigmask(SIG_UNBLOCK, &sig_mask, NULL);
570     #endif
571     }
572    
573     static void dummy_signal_handler(int sig)
574     {
575 trawick 94031 /* XXX If specifying SIG_IGN is guaranteed to unblock a syscall,
576     * then we don't need this goofy function.
577     */
578     }
579    
580 rbb 89781 static void *listener_thread(apr_thread_t *thd, void * dummy)
581     {
582     proc_info * ti = dummy;
583     int process_slot = ti->pid;
584 rbb 90635 apr_pool_t *tpool = apr_thread_pool_get(thd);
585 rbb 91955 void *csd = NULL;
586 jerenkrantz 92473 apr_pool_t *ptrans; /* Pool for per-transaction stuff */
587 brianp 93386 apr_pool_t *recycled_pool = NULL;
588 gstein 101801 apr_pollset_t *pollset;
589 rbb 89781 apr_status_t rv;
590 gstein 101801 ap_listen_rec *lr;
591 brianp 95270 int have_idle_worker = 0;
592 gstein 101801 int last_poll_idx = 0;
593 rbb 89781
594     free(ti);
595    
596 gstein 101801 /* ### check the status */
597     (void) apr_pollset_create(&pollset, num_listensocks, tpool, 0);
598 rbb 89781
599 gstein 101801 for (lr = ap_listeners; lr != NULL; lr = lr->next) {
600     apr_pollfd_t pfd = { 0 };
601    
602     pfd.desc_type = APR_POLL_SOCKET;
603     pfd.desc.s = lr->sd;
604     pfd.reqevents = APR_POLLIN;
605     pfd.client_data = lr;
606    
607     /* ### check the status */
608     (void) apr_pollset_add(pollset, &pfd);
609     }
610    
611 trawick 94031 /* Unblock the signal used to wake this thread up, and set a handler for
612     * it.
613     */
614 trawick 94232 unblock_signal(LISTENER_SIGNAL);
615     apr_signal(LISTENER_SIGNAL, dummy_signal_handler);
616 trawick 94031
617 rbb 89781 /* TODO: Switch to a system where threads reuse the results from earlier
618     poll calls - manoj */
619     while (1) {
620 rbb 90635 /* TODO: requests_this_child should be synchronized - aaron */
621 rbb 89781 if (requests_this_child <= 0) {
622     check_infinite_requests();
623     }
624 trawick 94095 if (listener_may_exit) break;
625 rbb 89781
626 brianp 95270 if (!have_idle_worker) {
627     rv = ap_queue_info_wait_for_idler(worker_queue_info,
628     &recycled_pool);
629     if (APR_STATUS_IS_EOF(rv)) {
630     break; /* we've been signaled to die now */
631     }
632     else if (rv != APR_SUCCESS) {
633     ap_log_error(APLOG_MARK, APLOG_EMERG, rv, ap_server_conf,
634     "apr_queue_info_wait failed. Attempting to "
635     " shutdown process gracefully.");
636     signal_threads(ST_GRACEFUL);
637     break;
638     }
639     have_idle_worker = 1;
640 aaron 94824 }
641 brianp 95270
642 aaron 94824 /* We've already decremented the idle worker count inside
643     * ap_queue_info_wait_for_idler. */
644    
645 aaron 91580 if ((rv = SAFE_ACCEPT(apr_proc_mutex_lock(accept_mutex)))
646 rbb 89781 != APR_SUCCESS) {
647 trawick 93720 int level = APLOG_EMERG;
648    
649 trawick 94095 if (listener_may_exit) {
650 trawick 94031 break;
651     }
652 trawick 93720 if (ap_scoreboard_image->parent[process_slot].generation !=
653     ap_scoreboard_image->global->running_generation) {
654     level = APLOG_DEBUG; /* common to get these at restart time */
655     }
656     ap_log_error(APLOG_MARK, level, rv, ap_server_conf,
657 aaron 91582 "apr_proc_mutex_lock failed. Attempting to shutdown "
658 rbb 89781 "process gracefully.");
659 trawick 94095 signal_threads(ST_GRACEFUL);
660 trawick 93718 break; /* skip the lock release */
661 rbb 89781 }
662    
663 brianp 93096 if (!ap_listeners->next) {
664     /* Only one listener, so skip the poll */
665     lr = ap_listeners;
666     }
667     else {
668 trawick 94095 while (!listener_may_exit) {
669 gstein 101801 apr_int32_t numdesc;
670     const apr_pollfd_t *pdesc;
671 rbb 89781
672 gstein 101801 rv = apr_pollset_poll(pollset, -1, &numdesc, &pdesc);
673     if (rv != APR_SUCCESS) {
674     if (APR_STATUS_IS_EINTR(rv)) {
675 brianp 93096 continue;
676     }
677    
678 trawick 102045 /* apr_pollset_poll() will only return errors in catastrophic
679 brianp 93096 * circumstances. Let's try exiting gracefully, for now. */
680 gstein 101801 ap_log_error(APLOG_MARK, APLOG_ERR, rv,
681     (const server_rec *) ap_server_conf,
682     "apr_pollset_poll: (listen)");
683 trawick 94095 signal_threads(ST_GRACEFUL);
684 rbb 89781 }
685    
686 trawick 94095 if (listener_may_exit) break;
687 rbb 89781
688 gstein 101801 /* We can always use pdesc[0], but sockets at position N
689     * could end up completely starved of attention in a very
690     * busy server. Therefore, we round-robin across the
691     * returned set of descriptors. While it is possible that
692     * the returned set of descriptors might flip around and
693     * continue to starve some sockets, we happen to know the
694     * internal pollset implementation retains ordering
695     * stability of the sockets. Thus, the round-robin should
696     * ensure that a socket will eventually be serviced.
697     */
698     if (last_poll_idx >= numdesc)
699     last_poll_idx = 0;
700    
701     /* Grab a listener record from the client_data of the poll
702     * descriptor, and advance our saved index to round-robin
703     * the next fetch.
704     *
705     * ### hmm... this descriptor might have POLLERR rather
706     * ### than POLLIN
707     */
708     lr = pdesc[last_poll_idx++].client_data;
709     break;
710    
711     } /* while */
712    
713     } /* if/else */
714    
715 trawick 94095 if (!listener_may_exit) {
716 rbb 91075 /* create a new transaction pool for each accepted socket */
717 brianp 93386 if (recycled_pool == NULL) {
718 striker 93943 apr_allocator_t *allocator;
719    
720     apr_allocator_create(&allocator);
721 striker 95954 apr_allocator_max_free_set(allocator, ap_max_mem_free);
722 jorton 170896 apr_pool_create_ex(&ptrans, pconf, NULL, allocator);
723 striker 95373 apr_allocator_owner_set(allocator, ptrans);
724 brianp 93386 }
725     else {
726     ptrans = recycled_pool;
727 trawick 292949 recycled_pool = NULL;
728 brianp 93386 }
729 brianp 92482 apr_pool_tag(ptrans, "transaction");
730 rbb 91960 rv = lr->accept_func(&csd, lr, ptrans);
731 trawick 94625 /* later we trash rv and rely on csd to indicate success/failure */
732     AP_DEBUG_ASSERT(rv == APR_SUCCESS || !csd);
733 rbb 91955
734     if (rv == APR_EGENERAL) {
735 gregames 93366 /* E[NM]FILE, ENOMEM, etc */
736     resource_shortage = 1;
737 trawick 94095 signal_threads(ST_GRACEFUL);
738 rbb 89781 }
739 aaron 91580 if ((rv = SAFE_ACCEPT(apr_proc_mutex_unlock(accept_mutex)))
740 rbb 89781 != APR_SUCCESS) {
741 trawick 93720 int level = APLOG_EMERG;
742    
743 trawick 94095 if (listener_may_exit) {
744 trawick 94031 break;
745     }
746 trawick 93720 if (ap_scoreboard_image->parent[process_slot].generation !=
747     ap_scoreboard_image->global->running_generation) {
748     level = APLOG_DEBUG; /* common to get these at restart time */
749     }
750     ap_log_error(APLOG_MARK, level, rv, ap_server_conf,
751 trawick 93719 "apr_proc_mutex_unlock failed. Attempting to "
752 aaron 91582 "shutdown process gracefully.");
753 trawick 94095 signal_threads(ST_GRACEFUL);
754 rbb 89781 }
755     if (csd != NULL) {
756 brianp 94830 rv = ap_queue_push(worker_queue, csd, ptrans);
757 trawick 91089 if (rv) {
758     /* trash the connection; we couldn't queue the connected
759     * socket to a worker
760     */
761     apr_socket_close(csd);
762 trawick 93530 ap_log_error(APLOG_MARK, APLOG_CRIT, rv, ap_server_conf,
763     "ap_queue_push failed");
764 trawick 292949 recycled_pool = ptrans;
765 trawick 91089 }
766 brianp 95270 else {
767     have_idle_worker = 0;
768     }
769 rbb 89781 }
770 trawick 292949 else {
771     recycled_pool = ptrans;
772     }
773 rbb 89781 }
774     else {
775 aaron 91580 if ((rv = SAFE_ACCEPT(apr_proc_mutex_unlock(accept_mutex)))
776 rbb 89781 != APR_SUCCESS) {
777 trawick 179317 int level = APLOG_EMERG;
778    
779     if (ap_scoreboard_image->parent[process_slot].generation !=
780     ap_scoreboard_image->global->running_generation) {
781     level = APLOG_DEBUG; /* common to get these at restart time */
782     }
783     ap_log_error(APLOG_MARK, level, rv, ap_server_conf,
784 aaron 91582 "apr_proc_mutex_unlock failed. Attempting to "
785     "shutdown process gracefully.");
786 trawick 94095 signal_threads(ST_GRACEFUL);
787 rbb 89781 }
788     break;
789     }
790     }
791    
792 colm 290179 ap_close_listeners();
793 trawick 94106 ap_queue_term(worker_queue);
794 rbb 89781 dying = 1;
795 rbb 90065 ap_scoreboard_image->parent[process_slot].quiescing = 1;
796 trawick 94112
797 trawick 94232 /* wake up the main thread */
798 rbb 90065 kill(ap_my_pid, SIGTERM);
799 rbb 89781
800 rbb 90635 apr_thread_exit(thd, APR_SUCCESS);
801 rbb 89781 return NULL;
802     }
803    
804 trawick 94068 /* XXX For ungraceful termination/restart, we definitely don't want to
805     * wait for active connections to finish but we may want to wait
806     * for idle workers to get out of the queue code and release mutexes,
807     * since those mutexes are cleaned up pretty soon and some systems
808     * may not react favorably (i.e., segfault) if operations are attempted
809     * on cleaned-up mutexes.
810     */
811 wrowe 93264 static void * APR_THREAD_FUNC worker_thread(apr_thread_t *thd, void * dummy)
812 rbb 89781 {
813     proc_info * ti = dummy;
814     int process_slot = ti->pid;
815     int thread_slot = ti->tid;
816     apr_socket_t *csd = NULL;
817 jwoolley 94304 apr_bucket_alloc_t *bucket_alloc;
818 brianp 93386 apr_pool_t *last_ptrans = NULL;
819 jerenkrantz 92473 apr_pool_t *ptrans; /* Pool for per-transaction stuff */
820 rbb 90635 apr_status_t rv;
821 brianp 95270 int is_idle = 0;
822 rbb 89781
823     free(ti);
824    
825 trawick 156274 ap_scoreboard_image->servers[process_slot][thread_slot].pid = ap_my_pid;
826     ap_scoreboard_image->servers[process_slot][thread_slot].generation = ap_my_generation;
827 trawick 92512 ap_update_child_status_from_indexes(process_slot, thread_slot, SERVER_STARTING, NULL);
828 jwoolley 94304
829 rbb 89781 while (!workers_may_exit) {
830 brianp 95270 if (!is_idle) {
831     rv = ap_queue_info_set_idle(worker_queue_info, last_ptrans);
832     last_ptrans = NULL;
833     if (rv != APR_SUCCESS) {
834     ap_log_error(APLOG_MARK, APLOG_EMERG, rv, ap_server_conf,
835     "ap_queue_info_set_idle failed. Attempting to "
836     "shutdown process gracefully.");
837     signal_threads(ST_GRACEFUL);
838     break;
839     }
840     is_idle = 1;
841 aaron 94824 }
842    
843 trawick 92512 ap_update_child_status_from_indexes(process_slot, thread_slot, SERVER_READY, NULL);
844 aaron 94840 worker_pop:
845     if (workers_may_exit) {
846     break;
847     }
848 brianp 94830 rv = ap_queue_pop(worker_queue, &csd, &ptrans);
849 brianp 93386
850 trawick 94067 if (rv != APR_SUCCESS) {
851 trawick 94106 /* We get APR_EOF during a graceful shutdown once all the connections
852     * accepted by this server process have been handled.
853     */
854 aaron 94840 if (APR_STATUS_IS_EOF(rv)) {
855 trawick 94106 break;
856     }
857 trawick 94067 /* We get APR_EINTR whenever ap_queue_pop() has been interrupted
858     * from an explicit call to ap_queue_interrupt_all(). This allows
859     * us to unblock threads stuck in ap_queue_pop() when a shutdown
860     * is pending.
861 trawick 94068 *
862     * If workers_may_exit is set and this is ungraceful termination/
863     * restart, we are bound to get an error on some systems (e.g.,
864     * AIX, which sanity-checks mutex operations) since the queue
865     * may have already been cleaned up. Don't log the "error" if
866     * workers_may_exit is set.
867 trawick 94067 */
868 aaron 94840 else if (APR_STATUS_IS_EINTR(rv)) {
869     goto worker_pop;
870     }
871     /* We got some other error. */
872     else if (!workers_may_exit) {
873 trawick 94067 ap_log_error(APLOG_MARK, APLOG_CRIT, rv, ap_server_conf,
874     "ap_queue_pop failed");
875     }
876 rbb 90065 continue;
877     }
878 brianp 95270 is_idle = 0;
879 jerenkrantz 94886 worker_sockets[thread_slot] = csd;
880 jwoolley 101122 bucket_alloc = apr_bucket_alloc_create(ptrans);
881 jwoolley 94304 process_socket(ptrans, csd, process_slot, thread_slot, bucket_alloc);
882 jerenkrantz 94886 worker_sockets[thread_slot] = NULL;
883 rbb 90635 requests_this_child--; /* FIXME: should be synchronized - aaron */
884 brianp 93386 apr_pool_clear(ptrans);
885     last_ptrans = ptrans;
886 rbb 89781 }
887    
888 trawick 92512 ap_update_child_status_from_indexes(process_slot, thread_slot,
889 rbb 90771 (dying) ? SERVER_DEAD : SERVER_GRACEFUL, (request_rec *) NULL);
890 rbb 89781
891 aaron 92622 apr_thread_exit(thd, APR_SUCCESS);
892 rbb 89781 return NULL;
893     }
894    
895 aaron 93403 static int check_signal(int signum)
896     {
897     switch (signum) {
898     case SIGTERM:
899     case SIGINT:
900     return 1;
901     }
902     return 0;
903     }
904    
905 trawick 94420 static void create_listener_thread(thread_starter *ts)
906     {
907     int my_child_num = ts->child_num_arg;
908     apr_threadattr_t *thread_attr = ts->threadattr;
909     proc_info *my_info;
910     apr_status_t rv;
911    
912     my_info = (proc_info *)malloc(sizeof(proc_info));
913     my_info->pid = my_child_num;
914     my_info->tid = -1; /* listener thread doesn't have a thread slot */
915     my_info->sd = 0;
916     rv = apr_thread_create(&ts->listener, thread_attr, listener_thread,
917 trawick 94700 my_info, pchild);
918 trawick 94420 if (rv != APR_SUCCESS) {
919     ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf,
920     "apr_thread_create: unable to create listener thread");
921 gregames 168182 /* let the parent decide how bad this really is */
922     clean_child_exit(APEXIT_CHILDSICK);
923 trawick 94420 }
924     apr_os_thread_get(&listener_os_thread, ts->listener);
925     }
926    
927 trawick 94059 /* XXX under some circumstances not understood, children can get stuck
928     * in start_threads forever trying to take over slots which will
929     * never be cleaned up; for now there is an APLOG_DEBUG message issued
930     * every so often when this condition occurs
931 trawick 94030 */
932 wrowe 93264 static void * APR_THREAD_FUNC start_threads(apr_thread_t *thd, void *dummy)
933 rbb 89781 {
934     thread_starter *ts = dummy;
935     apr_thread_t **threads = ts->threads;
936     apr_threadattr_t *thread_attr = ts->threadattr;
937     int child_num_arg = ts->child_num_arg;
938     int my_child_num = child_num_arg;
939 trawick 94420 proc_info *my_info;
940 rbb 89781 apr_status_t rv;
941 trawick 94420 int i;
942 rbb 89781 int threads_created = 0;
943 stoddard 94889 int listener_started = 0;
944 trawick 94059 int loops;
945     int prev_threads_created;
946 rbb 89781
947 rbb 90771 /* We must create the fd queues before we start up the listener
948 rbb 90635 * and worker threads. */
949 rbb 91075 worker_queue = apr_pcalloc(pchild, sizeof(*worker_queue));
950 trawick 93530 rv = ap_queue_init(worker_queue, ap_threads_per_child, pchild);
951     if (rv != APR_SUCCESS) {
952     ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf,
953     "ap_queue_init() failed");
954     clean_child_exit(APEXIT_CHILDFATAL);
955     }
956 rbb 90635
957 brianp 94830 rv = ap_queue_info_create(&worker_queue_info, pchild,
958     ap_threads_per_child);
959 aaron 94824 if (rv != APR_SUCCESS) {
960     ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf,
961     "ap_queue_info_create() failed");
962     clean_child_exit(APEXIT_CHILDFATAL);
963     }
964    
965 jerenkrantz 94886 worker_sockets = apr_pcalloc(pchild, ap_threads_per_child
966     * sizeof(apr_socket_t *));
967    
968 trawick 94059 loops = prev_threads_created = 0;
969 rbb 89781 while (1) {
970 dougm 90673 /* ap_threads_per_child does not include the listener thread */
971 dougm 90670 for (i = 0; i < ap_threads_per_child; i++) {
972 rbb 89781 int status = ap_scoreboard_image->servers[child_num_arg][i].status;
973    
974     if (status != SERVER_GRACEFUL && status != SERVER_DEAD) {
975     continue;
976     }
977    
978 aaron 92632 my_info = (proc_info *)malloc(sizeof(proc_info));
979 rbb 89781 if (my_info == NULL) {
980     ap_log_error(APLOG_MARK, APLOG_ALERT, errno, ap_server_conf,
981 jerenkrantz 92473 "malloc: out of memory");
982 rbb 89781 clean_child_exit(APEXIT_CHILDFATAL);
983     }
984 jerenkrantz 92473 my_info->pid = my_child_num;
985 rbb 89781 my_info->tid = i;
986 jerenkrantz 92473 my_info->sd = 0;
987    
988 aaron 92632 /* We are creating threads right now */
989     ap_update_child_status_from_indexes(my_child_num, i,
990     SERVER_STARTING, NULL);
991 rbb 89781 /* We let each thread update its own scoreboard entry. This is
992     * done because it lets us deal with tid better.
993 jerenkrantz 92473 */
994 aaron 92632 rv = apr_thread_create(&threads[i], thread_attr,
995 trawick 94700 worker_thread, my_info, pchild);
996 aaron 92632 if (rv != APR_SUCCESS) {
997 jerenkrantz 92473 ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf,
998 aaron 92632 "apr_thread_create: unable to create worker thread");
999 gregames 168182 /* let the parent decide how bad this really is */
1000     clean_child_exit(APEXIT_CHILDSICK);
1001 jerenkrantz 92473 }
1002 rbb 89781 threads_created++;
1003     }
1004 stoddard 94889 /* Start the listener only when there are workers available */
1005     if (!listener_started && threads_created) {
1006     create_listener_thread(ts);
1007     listener_started = 1;
1008     }
1009 trawick 94095 if (start_thread_may_exit || threads_created == ap_threads_per_child) {
1010 rbb 89781 break;
1011     }
1012 wrowe 93264 /* wait for previous generation to clean up an entry */
1013 brianp 95959 apr_sleep(apr_time_from_sec(1));
1014 trawick 94059 ++loops;
1015     if (loops % 120 == 0) { /* every couple of minutes */
1016     if (prev_threads_created == threads_created) {
1017     ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
1018     "child %" APR_PID_T_FMT " isn't taking over "
1019     "slots very quickly (%d of %d)",
1020     ap_my_pid, threads_created, ap_threads_per_child);
1021     }
1022     prev_threads_created = threads_created;
1023     }
1024 rbb 89781 }
1025    
1026 jerenkrantz 92473 /* What state should this child_main process be listed as in the
1027     * scoreboard...?
1028 trawick 92512 * ap_update_child_status_from_indexes(my_child_num, i, SERVER_STARTING,
1029     * (request_rec *) NULL);
1030 rbb 89781 *
1031     * This state should be listed separately in the scoreboard, in some kind
1032     * of process_status, not mixed in with the worker threads' status.
1033     * "life_status" is almost right, but it's in the worker's structure, and
1034     * the name could be clearer. gla
1035     */
1036 aaron 92622 apr_thread_exit(thd, APR_SUCCESS);
1037 rbb 89781 return NULL;
1038     }
1039    
1040 trawick 94031 static void join_workers(apr_thread_t *listener, apr_thread_t **threads)
1041 trawick 93561 {
1042     int i;
1043     apr_status_t rv, thread_rv;
1044    
1045 trawick 94031 if (listener) {
1046     int iter;
1047    
1048     /* deal with a rare timing window which affects waking up the
1049     * listener thread... if the signal sent to the listener thread
1050     * is delivered between the time it verifies that the
1051 trawick 94095 * listener_may_exit flag is clear and the time it enters a
1052 trawick 94031 * blocking syscall, the signal didn't do any good... work around
1053     * that by sleeping briefly and sending it again
1054     */
1055    
1056     iter = 0;
1057 trawick 94417 while (iter < 10 &&
1058     #ifdef HAVE_PTHREAD_KILL
1059     pthread_kill(*listener_os_thread, 0)
1060     #else
1061     kill(ap_my_pid, 0)
1062     #endif
1063     == 0) {
1064 trawick 94031 /* listener not dead yet */
1065 brianp 95959 apr_sleep(apr_time_make(0, 500000));
1066 trawick 94031 wakeup_listener();
1067     ++iter;
1068     }
1069     if (iter >= 10) {
1070 trawick 101165 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
1071 trawick 94031 "the listener thread didn't exit");
1072     }
1073     else {
1074     rv = apr_thread_join(&thread_rv, listener);
1075     if (rv != APR_SUCCESS) {
1076     ap_log_error(APLOG_MARK, APLOG_CRIT, rv, ap_server_conf,
1077     "apr_thread_join: unable to join listener thread");
1078     }
1079     }
1080     }
1081    
1082 trawick 93561 for (i = 0; i < ap_threads_per_child; i++) {
1083     if (threads[i]) { /* if we ever created this thread */
1084     rv = apr_thread_join(&thread_rv, threads[i]);
1085     if (rv != APR_SUCCESS) {
1086     ap_log_error(APLOG_MARK, APLOG_CRIT, rv, ap_server_conf,
1087     "apr_thread_join: unable to join worker "
1088     "thread %d",
1089     i);
1090     }
1091     }
1092     }
1093     }
1094    
1095     static void join_start_thread(apr_thread_t *start_thread_id)
1096     {
1097     apr_status_t rv, thread_rv;
1098    
1099 trawick 94095 start_thread_may_exit = 1; /* tell it to give up in case it is still
1100     * trying to take over slots from a
1101     * previous generation
1102     */
1103 trawick 93561 rv = apr_thread_join(&thread_rv, start_thread_id);
1104     if (rv != APR_SUCCESS) {
1105     ap_log_error(APLOG_MARK, APLOG_CRIT, rv, ap_server_conf,
1106     "apr_thread_join: unable to join the start "
1107     "thread");
1108     }
1109     }
1110    
1111 rbb 89781 static void child_main(int child_num_arg)
1112     {
1113     apr_thread_t **threads;
1114     apr_status_t rv;
1115     thread_starter *ts;
1116     apr_threadattr_t *thread_attr;
1117     apr_thread_t *start_thread_id;
1118    
1119 trawick 102045 mpm_state = AP_MPMQ_STARTING; /* for benefit of any hooks that run as this
1120     * child initializes
1121     */
1122 rbb 89781 ap_my_pid = getpid();
1123 trawick 99312 ap_fatal_signal_child_setup(ap_server_conf);
1124 rbb 89781 apr_pool_create(&pchild, pconf);
1125    
1126     /*stuff to do before we switch id's, so we have permissions.*/
1127 rbb 93119 ap_reopen_scoreboard(pchild, NULL, 0);
1128 rbb 89781
1129 trawick 92412 rv = SAFE_ACCEPT(apr_proc_mutex_child_init(&accept_mutex, ap_lock_fname,
1130 aaron 91580 pchild));
1131 rbb 89781 if (rv != APR_SUCCESS) {
1132     ap_log_error(APLOG_MARK, APLOG_EMERG, rv, ap_server_conf,
1133     "Couldn't initialize cross-process lock in child");
1134     clean_child_exit(APEXIT_CHILDFATAL);
1135     }
1136    
1137     if (unixd_setup_child()) {
1138 jerenkrantz 92473 clean_child_exit(APEXIT_CHILDFATAL);
1139 rbb 89781 }
1140    
1141     ap_run_child_init(pchild, ap_server_conf);
1142    
1143 rbb 90771 /* done with init critical section */
1144 rbb 89781
1145 rbb 93358 /* Just use the standard apr_setup_signal_thread to block all signals
1146     * from being received. The child processes no longer use signals for
1147     * any communication with the parent process.
1148     */
1149 rbb 89781 rv = apr_setup_signal_thread();
1150     if (rv != APR_SUCCESS) {
1151     ap_log_error(APLOG_MARK, APLOG_EMERG, rv, ap_server_conf,
1152     "Couldn't initialize signal thread");
1153     clean_child_exit(APEXIT_CHILDFATAL);
1154     }
1155    
1156     if (ap_max_requests_per_child) {
1157     requests_this_child = ap_max_requests_per_child;
1158     }
1159     else {
1160     /* coding a value of zero means infinity */
1161     requests_this_child = INT_MAX;
1162     }
1163    
1164     /* Setup worker threads */
1165    
1166 jerenkrantz 92473 /* clear the storage; we may not create all our threads immediately,
1167     * and we want a 0 entry to indicate a thread which was not created
1168 rbb 89781 */
1169 jerenkrantz 92473 threads = (apr_thread_t **)calloc(1,
1170     sizeof(apr_thread_t *) * ap_threads_per_child);
1171 rbb 89781 if (threads == NULL) {
1172     ap_log_error(APLOG_MARK, APLOG_ALERT, errno, ap_server_conf,
1173     "malloc: out of memory");
1174     clean_child_exit(APEXIT_CHILDFATAL);
1175     }
1176    
1177 rbb 90635 ts = (thread_starter *)apr_palloc(pchild, sizeof(*ts));
1178 rbb 89781
1179     apr_threadattr_create(&thread_attr, pchild);
1180 jerenkrantz 92473 /* 0 means PTHREAD_CREATE_JOINABLE */
1181     apr_threadattr_detach_set(thread_attr, 0);
1182 rbb 89781
1183 trawick 102975 if (ap_thread_stacksize != 0) {
1184     apr_threadattr_stacksize_set(thread_attr, ap_thread_stacksize);
1185 trawick 102931 }
1186    
1187 rbb 89781 ts->threads = threads;
1188 trawick 94031 ts->listener = NULL;
1189 rbb 89781 ts->child_num_arg = child_num_arg;
1190     ts->threadattr = thread_attr;
1191    
1192 aaron 92632 rv = apr_thread_create(&start_thread_id, thread_attr, start_threads,
1193 trawick 94700 ts, pchild);
1194 aaron 92632 if (rv != APR_SUCCESS) {
1195 rbb 89781 ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf,
1196     "apr_thread_create: unable to create worker thread");
1197 gregames 168649 /* let the parent decide how bad this really is */
1198     clean_child_exit(APEXIT_CHILDSICK);
1199 rbb 89781 }
1200    
1201 trawick 102045 mpm_state = AP_MPMQ_RUNNING;
1202    
1203 aaron 93403 /* If we are only running in one_process mode, we will want to
1204     * still handle signals. */
1205     if (one_process) {
1206 trawick 94091 /* Block until we get a terminating signal. */
1207 aaron 93403 apr_signal_thread(check_signal);
1208 trawick 94095 /* make sure the start thread has finished; signal_threads()
1209 trawick 93561 * and join_workers() depend on that
1210     */
1211 trawick 94094 /* XXX join_start_thread() won't be awakened if one of our
1212     * threads encounters a critical error and attempts to
1213     * shutdown this child
1214     */
1215 trawick 93561 join_start_thread(start_thread_id);
1216 trawick 94095 signal_threads(ST_UNGRACEFUL); /* helps us terminate a little more
1217     * quickly than the dispatch of the signal thread
1218 aaron 93403 * beats the Pipe of Death and the browsers
1219     */
1220     /* A terminating signal was received. Now join each of the
1221     * workers to clean them up.
1222     * If the worker already exited, then the join frees
1223     * their resources and returns.
1224     * If the worker hasn't exited, then this blocks until
1225     * they have (then cleans up).
1226 rbb 93358 */
1227 trawick 94031 join_workers(ts->listener, threads);
1228 rbb 89781 }
1229 aaron 93403 else { /* !one_process */
1230 trawick 94232 /* remove SIGTERM from the set of blocked signals... if one of
1231     * the other threads in the process needs to take us down
1232     * (e.g., for MaxRequestsPerChild) it will send us SIGTERM
1233     */
1234     unblock_signal(SIGTERM);
1235     apr_signal(SIGTERM, dummy_signal_handler);
1236 aaron 93403 /* Watch for any messages from the parent over the POD */
1237     while (1) {
1238     rv = ap_mpm_pod_check(pod);
1239 trawick 94232 if (rv == AP_NORESTART) {
1240     /* see if termination was triggered while we slept */
1241     switch(terminate_mode) {
1242     case ST_GRACEFUL:
1243     rv = AP_GRACEFUL;
1244     break;
1245     case ST_UNGRACEFUL:
1246     rv = AP_RESTART;
1247     break;
1248     }
1249     }
1250 aaron 93403 if (rv == AP_GRACEFUL || rv == AP_RESTART) {
1251 trawick 93561 /* make sure the start thread has finished;
1252 trawick 94095 * signal_threads() and join_workers depend on that
1253 trawick 93561 */
1254     join_start_thread(start_thread_id);
1255 trawick 94095 signal_threads(rv == AP_GRACEFUL ? ST_GRACEFUL : ST_UNGRACEFUL);
1256 aaron 93403 break;
1257     }
1258     }
1259 rbb 89781
1260 trawick 94892 /* A terminating signal was received. Now join each of the
1261     * workers to clean them up.
1262     * If the worker already exited, then the join frees
1263     * their resources and returns.
1264     * If the worker hasn't exited, then this blocks until
1265     * they have (then cleans up).
1266     */
1267     join_workers(ts->listener, threads);
1268 aaron 93403 }
1269    
1270 rbb 89781 free(threads);
1271    
1272 gregames 93366 clean_child_exit(resource_shortage ? APEXIT_CHILDSICK : 0);
1273 rbb 89781 }
1274    
1275     static int make_child(server_rec *s, int slot)
1276     {
1277     int pid;
1278    
1279     if (slot + 1 > ap_max_daemons_limit) {
1280 jerenkrantz 92473 ap_max_daemons_limit = slot + 1;
1281 rbb 89781 }
1282    
1283     if (one_process) {
1284 jerenkrantz 92473 set_signals();
1285 rbb 89781 ap_scoreboard_image->parent[slot].pid = getpid();
1286 jerenkrantz 92473 child_main(slot);
1287 rbb 89781 }
1288    
1289     if ((pid = fork()) == -1) {
1290 jerenkrantz 92473 ap_log_error(APLOG_MARK, APLOG_ERR, errno, s,
1291     "fork: Unable to fork new process");
1292 rbb 89781
1293     /* fork didn't succeed. Fix the scoreboard or else
1294     * it will say SERVER_STARTING forever and ever
1295     */
1296 trawick 92512 ap_update_child_status_from_indexes(slot, 0, SERVER_DEAD, NULL);
1297 rbb 89781
1298 jerenkrantz 92473 /* In case system resources are maxxed out, we don't want
1299     Apache running away with the CPU trying to fork over and
1300     over and over again. */
1301 brianp 95959 apr_sleep(apr_time_from_sec(10));
1302 rbb 89781
1303 jerenkrantz 92473 return -1;
1304 rbb 89781 }
1305    
1306     if (!pid) {
1307     #ifdef HAVE_BINDPROCESSOR
1308     /* By default, AIX binds to a single processor. This bit unbinds
1309 jerenkrantz 92473 * children which will then bind to another CPU.
1310 rbb 89781 */
1311     int status = bindprocessor(BINDPROCESS, (int)getpid(),
1312 jerenkrantz 92473 PROCESSOR_CLASS_ANY);
1313     if (status != OK)
1314 trawick 95149 ap_log_error(APLOG_MARK, APLOG_WARNING, errno,
1315 jerenkrantz 92473 ap_server_conf,
1316     "processor unbind failed %d", status);
1317 rbb 89781 #endif
1318     RAISE_SIGSTOP(MAKE_CHILD);
1319    
1320     apr_signal(SIGTERM, just_die);
1321     child_main(slot);
1322    
1323     clean_child_exit(0);
1324     }
1325     /* else */
1326 trawick 109510 if (ap_scoreboard_image->parent[slot].pid != 0) {
1327     /* This new child process is squatting on the scoreboard
1328     * entry owned by an exiting child process, which cannot
1329     * exit until all active requests complete.
1330     * Don't forget about this exiting child process, or we
1331     * won't be able to kill it if it doesn't exit by the
1332     * time the server is shut down.
1333     */
1334     ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
1335     "taking over scoreboard slot from %" APR_PID_T_FMT "%s",
1336     ap_scoreboard_image->parent[slot].pid,
1337     ap_scoreboard_image->parent[slot].quiescing ?
1338     " (quiescing)" : "");
1339     ap_register_extra_mpm_process(ap_scoreboard_image->parent[slot].pid);
1340     }
1341 rbb 89781 ap_scoreboard_image->parent[slot].quiescing = 0;
1342     ap_scoreboard_image->parent[slot].pid = pid;
1343     return 0;
1344     }
1345    
1346     /* start up a bunch of children */
1347     static void startup_children(int number_to_start)
1348     {
1349     int i;
1350    
1351     for (i = 0; number_to_start && i < ap_daemons_limit; ++i) {
1352 jerenkrantz 92473 if (ap_scoreboard_image->parent[i].pid != 0) {
1353     continue;
1354     }
1355     if (make_child(ap_server_conf, i) < 0) {
1356     break;
1357     }
1358     --number_to_start;
1359 rbb 89781 }
1360     }
1361    
1362    
1363     /*
1364     * idle_spawn_rate is the number of children that will be spawned on the
1365     * next maintenance cycle if there aren't enough idle servers. It is
1366     * doubled up to MAX_SPAWN_RATE, and reset only when a cycle goes by
1367     * without the need to spawn.
1368     */
1369     static int idle_spawn_rate = 1;
1370     #ifndef MAX_SPAWN_RATE
1371 jerenkrantz 92473 #define MAX_SPAWN_RATE (32)
1372 rbb 89781 #endif
1373     static int hold_off_on_exponential_spawning;
1374    
1375     static void perform_idle_server_maintenance(void)
1376     {
1377     int i, j;
1378     int idle_thread_count;
1379     worker_score *ws;
1380     process_score *ps;
1381     int free_length;
1382 gregames 89928 int totally_free_length = 0;
1383 rbb 89781 int free_slots[MAX_SPAWN_RATE];
1384     int last_non_dead;
1385     int total_non_dead;
1386 gregames 168182 int active_thread_count = 0;
1387 rbb 89781
1388     /* initialize the free_list */
1389     free_length = 0;
1390    
1391     idle_thread_count = 0;
1392     last_non_dead = -1;
1393     total_non_dead = 0;
1394    
1395     for (i = 0; i < ap_daemons_limit; ++i) {
1396 jerenkrantz 92473 /* Initialization to satisfy the compiler. It doesn't know
1397     * that ap_threads_per_child is always > 0 */
1398     int status = SERVER_DEAD;
1399     int any_dying_threads = 0;
1400     int any_dead_threads = 0;
1401     int all_dead_threads = 1;
1402 rbb 89781
1403 jerenkrantz 92473 if (i >= ap_max_daemons_limit && totally_free_length == idle_spawn_rate)
1404     break;
1405 rbb 89781 ps = &ap_scoreboard_image->parent[i];
1406 jerenkrantz 92473 for (j = 0; j < ap_threads_per_child; j++) {
1407 rbb 89781 ws = &ap_scoreboard_image->servers[i][j];
1408 jerenkrantz 92473 status = ws->status;
1409 rbb 89781
1410 gregames 89928 /* XXX any_dying_threads is probably no longer needed GLA */
1411 jerenkrantz 92473 any_dying_threads = any_dying_threads ||
1412     (status == SERVER_GRACEFUL);
1413     any_dead_threads = any_dead_threads || (status == SERVER_DEAD);
1414     all_dead_threads = all_dead_threads &&
1415 gregames 89928 (status == SERVER_DEAD ||
1416     status == SERVER_GRACEFUL);
1417 rbb 89781
1418 jerenkrantz 92473 /* We consider a starting server as idle because we started it
1419     * at least a cycle ago, and if it still hasn't finished starting
1420     * then we're just going to swamp things worse by forking more.
1421     * So we hopefully won't need to fork more if we count it.
1422     * This depends on the ordering of SERVER_READY and SERVER_STARTING.
1423     */
1424 gregames 168182 if (ps->pid != 0) { /* XXX just set all_dead_threads in outer for
1425     loop if no pid? not much else matters */
1426     if (status <= SERVER_READY && status != SERVER_DEAD &&
1427     !ps->quiescing &&
1428     ps->generation == ap_my_generation) {
1429     ++idle_thread_count;
1430     }
1431     if (status >= SERVER_READY && status < SERVER_GRACEFUL) {
1432     ++active_thread_count;
1433     }
1434 jerenkrantz 92473 }
1435     }
1436 trawick 102425 if (any_dead_threads && totally_free_length < idle_spawn_rate
1437     && free_length < MAX_SPAWN_RATE
1438 rbb 89781 && (!ps->pid /* no process in the slot */
1439     || ps->quiescing)) { /* or at least one is going away */
1440 gregames 89928 if (all_dead_threads) {
1441     /* great! we prefer these, because the new process can
1442     * start more threads sooner. So prioritize this slot
1443     * by putting it ahead of any slots with active threads.
1444     *
1445     * first, make room by moving a slot that's potentially still
1446     * in use to the end of the array
1447     */
1448     free_slots[free_length] = free_slots[totally_free_length];
1449     free_slots[totally_free_length++] = i;
1450     }
1451     else {
1452     /* slot is still in use - back of the bus
1453     */
1454 gregames 160211 free_slots[free_length] = i;
1455 gregames 89928 }
1456 jerenkrantz 92473 ++free_length;
1457     }
1458 gregames 89928 /* XXX if (!ps->quiescing) is probably more reliable GLA */
1459 jerenkrantz 92473 if (!any_dying_threads) {
1460 rbb 89781 last_non_dead = i;
1461     ++total_non_dead;
1462     }
1463     }
1464 gregames 168182
1465     if (sick_child_detected) {
1466     if (active_thread_count > 0) {
1467     /* some child processes appear to be working. don't kill the
1468     * whole server.
1469     */
1470     sick_child_detected = 0;
1471     }
1472     else {
1473     /* looks like a basket case. give up.
1474     */
1475     shutdown_pending = 1;
1476     child_fatal = 1;
1477     ap_log_error(APLOG_MARK, APLOG_ALERT, 0,
1478     ap_server_conf,
1479     "No active workers found..."
1480     " Apache is exiting!");
1481     /* the child already logged the failure details */
1482     return;
1483     }
1484     }
1485    
1486 rbb 89781 ap_max_daemons_limit = last_non_dead + 1;
1487    
1488     if (idle_thread_count > max_spare_threads) {
1489     /* Kill off one child */
1490 rbb 93358 ap_mpm_pod_signal(pod, TRUE);
1491 rbb 89781 idle_spawn_rate = 1;
1492     }
1493     else if (idle_thread_count < min_spare_threads) {
1494     /* terminate the free list */
1495     if (free_length == 0) {
1496 jerenkrantz 92473 /* only report this condition once */
1497     static int reported = 0;
1498    
1499     if (!reported) {
1500 trawick 95149 ap_log_error(APLOG_MARK, APLOG_ERR, 0,
1501 jerenkrantz 92473 ap_server_conf,
1502     "server reached MaxClients setting, consider"
1503     " raising the MaxClients setting");
1504     reported = 1;
1505     }
1506     idle_spawn_rate = 1;
1507     }
1508     else {
1509 gregames 89928 if (free_length > idle_spawn_rate) {
1510     free_length = idle_spawn_rate;
1511     }
1512 jerenkrantz 92473 if (idle_spawn_rate >= 8) {
1513 trawick 95149 ap_log_error(APLOG_MARK, APLOG_INFO, 0,
1514 jerenkrantz 92473 ap_server_conf,
1515     "server seems busy, (you may need "
1516     "to increase StartServers, ThreadsPerChild "
1517 rbb 89781 "or Min/MaxSpareThreads), "
1518 jerenkrantz 92473 "spawning %d children, there are around %d idle "
1519 rbb 89781 "threads, and %d total children", free_length,
1520 jerenkrantz 92473 idle_thread_count, total_non_dead);
1521     }
1522     for (i = 0; i < free_length; ++i) {
1523     make_child(ap_server_conf, free_slots[i]);
1524     }
1525     /* the next time around we want to spawn twice as many if this
1526     * wasn't good enough, but not if we've just done a graceful
1527     */
1528     if (hold_off_on_exponential_spawning) {
1529     --hold_off_on_exponential_spawning;
1530     }
1531     else if (idle_spawn_rate < MAX_SPAWN_RATE) {
1532     idle_spawn_rate *= 2;
1533     }
1534     }
1535 rbb 89781 }
1536     else {
1537     idle_spawn_rate = 1;
1538     }
1539     }
1540    
1541     static void server_main_loop(int remaining_children_to_start)
1542     {
1543     int child_slot;
1544 rbb 91648 apr_exit_why_e exitwhy;
1545 gregames 93366 int status, processed_status;
1546 rbb 89781 apr_proc_t pid;
1547     int i;
1548    
1549     while (!restart_pending && !shutdown_pending) {
1550 rbb 91648 ap_wait_or_timeout(&exitwhy, &status, &pid, pconf);
1551 rbb 89781
1552     if (pid.pid != -1) {
1553 gregames 93366 processed_status = ap_process_child_status(&pid, exitwhy, status);
1554     if (processed_status == APEXIT_CHILDFATAL) {
1555 trawick 92019 shutdown_pending = 1;
1556     child_fatal = 1;
1557     return;
1558     }
1559 gregames 168182 else if (processed_status == APEXIT_CHILDSICK) {
1560     /* tell perform_idle_server_maintenance to check into this
1561     * on the next timer pop
1562     */
1563     sick_child_detected = 1;
1564     }
1565 rbb 89781 /* non-fatal death... note that it's gone in the scoreboard. */
1566     child_slot = find_child_by_pid(&pid);
1567     if (child_slot >= 0) {
1568     for (i = 0; i < ap_threads_per_child; i++)
1569 trawick 92512 ap_update_child_status_from_indexes(child_slot, i, SERVER_DEAD,
1570     (request_rec *) NULL);
1571 rbb 89781
1572     ap_scoreboard_image->parent[child_slot].pid = 0;
1573     ap_scoreboard_image->parent[child_slot].quiescing = 0;
1574 gregames 93366 if (processed_status == APEXIT_CHILDSICK) {
1575     /* resource shortage, minimize the fork rate */
1576     idle_spawn_rate = 1;
1577     }
1578     else if (remaining_children_to_start
1579 jerenkrantz 92473 && child_slot < ap_daemons_limit) {
1580     /* we're still doing a 1-for-1 replacement of dead
1581 rbb 89781 * children with new children
1582     */
1583 jerenkrantz 92473 make_child(ap_server_conf, child_slot);
1584     --remaining_children_to_start;
1585     }
1586 trawick 109510 }
1587     else if (ap_unregister_extra_mpm_process(pid.pid) == 1) {
1588     /* handled */
1589 rbb 89781 #if APR_HAS_OTHER_CHILD
1590 jerenkrantz 92473 }
1591 brianp 101858 else if (apr_proc_other_child_alert(&pid, APR_OC_REASON_DEATH,
1592     status) == 0) {
1593 jerenkrantz 92473 /* handled */
1594 rbb 89781 #endif
1595 jerenkrantz 92473 }
1596     else if (is_graceful) {
1597     /* Great, we've probably just lost a slot in the
1598     * scoreboard. Somehow we don't know about this child.
1599     */
1600 trawick 95149 ap_log_error(APLOG_MARK, APLOG_WARNING, 0,
1601 jerenkrantz 92473 ap_server_conf,
1602     "long lost child came home! (pid %ld)",
1603     (long)pid.pid);
1604     }
1605     /* Don't perform idle maintenance when a child dies,
1606 rbb 89781 * only do it when there's a timeout. Remember only a
1607     * finite number of children can die, and it's pretty
1608     * pathological for a lot to die suddenly.
1609     */
1610 jerenkrantz 92473 continue;
1611     }
1612     else if (remaining_children_to_start) {
1613     /* we hit a 1 second timeout in which none of the previous
1614     * generation of children needed to be reaped... so assume
1615     * they're all done, and pick up the slack if any is left.
1616     */
1617     startup_children(remaining_children_to_start);
1618     remaining_children_to_start = 0;
1619     /* In any event we really shouldn't do the code below because
1620     * few of the servers we just started are in the IDLE state
1621     * yet, so we'd mistakenly create an extra server.
1622     */
1623     continue;
1624     }
1625 rbb 89781
1626 jerenkrantz 92473 perform_idle_server_maintenance();
1627 rbb 89781 }
1628     }
1629    
1630 rbb 91955 int ap_mpm_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s)
1631     {
1632     int remaining_children_to_start;
1633     apr_status_t rv;
1634    
1635 rbb 93227 ap_log_pid(pconf, ap_pid_fname);
1636    
1637 trawick 92530 first_server_limit = server_limit;
1638     first_thread_limit = thread_limit;
1639     if (changed_limit_at_restart) {
1640 trawick 95149 ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
1641 trawick 92530 "WARNING: Attempt to change ServerLimit or ThreadLimit "
1642     "ignored during restart");
1643     changed_limit_at_restart = 0;
1644     }
1645    
1646 rbb 89781 /* Initialize cross-process accept lock */
1647 wrowe 93969 ap_lock_fname = apr_psprintf(_pconf, "%s.%" APR_PID_T_FMT,
1648 trawick 92412 ap_server_root_relative(_pconf, ap_lock_fname),
1649     ap_my_pid);
1650 dreid 92596
1651 trawick 92653 rv = apr_proc_mutex_create(&accept_mutex, ap_lock_fname,
1652     ap_accept_lock_mech, _pconf);
1653 rbb 89781 if (rv != APR_SUCCESS) {
1654     ap_log_error(APLOG_MARK, APLOG_EMERG, rv, s,
1655     "Couldn't create accept lock");
1656 trawick 102045 mpm_state = AP_MPMQ_STOPPING;
1657 rbb 89781 return 1;
1658     }
1659    
1660 trawick 90213 #if APR_USE_SYSVSEM_SERIALIZE
1661     if (ap_accept_lock_mech == APR_LOCK_DEFAULT ||
1662     ap_accept_lock_mech == APR_LOCK_SYSVSEM) {
1663     #else
1664     if (ap_accept_lock_mech == APR_LOCK_SYSVSEM) {
1665     #endif
1666 aaron 91580 rv = unixd_set_proc_mutex_perms(accept_mutex);
1667 trawick 90213 if (rv != APR_SUCCESS) {
1668     ap_log_error(APLOG_MARK, APLOG_EMERG, rv, s,
1669 trawick 94541 "Couldn't set permissions on cross-process lock; "
1670     "check User and Group directives");
1671 trawick 102045 mpm_state = AP_MPMQ_STOPPING;
1672 trawick 90213 return 1;
1673     }
1674     }
1675    
1676 rbb 89781 if (!is_graceful) {
1677 wrowe 94039 if (ap_run_pre_mpm(s->process->pool, SB_SHARED) != OK) {
1678 trawick 102045 mpm_state = AP_MPMQ_STOPPING;
1679 trawick 93055 return 1;
1680     }
1681 trawick 93998 /* fix the generation number in the global score; we just got a new,
1682     * cleared scoreboard
1683     */
1684     ap_scoreboard_image->global->running_generation = ap_my_generation;
1685 rbb 89781 }
1686    
1687     set_signals();
1688     /* Don't thrash... */
1689     if (max_spare_threads < min_spare_threads + ap_threads_per_child)
1690 jerenkrantz 92473 max_spare_threads = min_spare_threads + ap_threads_per_child;
1691 rbb 89781
1692     /* If we're doing a graceful_restart then we're going to see a lot
1693     * of children exiting immediately when we get into the main loop
1694 jerenkrantz 91076 * below (because we just sent them AP_SIG_GRACEFUL). This happens pretty
1695 rbb 89781 * rapidly... and for each one that exits we'll start a new one until
1696     * we reach at least daemons_min_free. But we may be permitted to
1697     * start more than that, so we'll just keep track of how many we're
1698     * supposed to start up without the 1 second penalty between each fork.
1699     */
1700     remaining_children_to_start = ap_daemons_to_start;
1701     if (remaining_children_to_start > ap_daemons_limit) {
1702 jerenkrantz 92473 remaining_children_to_start = ap_daemons_limit;
1703 rbb 89781 }
1704     if (!is_graceful) {
1705 jerenkrantz 92473 startup_children(remaining_children_to_start);
1706     remaining_children_to_start = 0;
1707 rbb 89781 }
1708     else {
1709 jerenkrantz 92473 /* give the system some time to recover before kicking into
1710     * exponential mode */
1711     hold_off_on_exponential_spawning = 10;
1712 rbb 89781 }
1713    
1714 trawick 95149 ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf,
1715 jerenkrantz 92473 "%s configured -- resuming normal operations",
1716     ap_get_server_version());
1717 trawick 95149 ap_log_error(APLOG_MARK, APLOG_INFO, 0, ap_server_conf,
1718 jerenkrantz 92473 "Server built: %s", ap_get_server_built());
1719 jim 94055 #ifdef AP_MPM_WANT_SET_ACCEPT_LOCK_MECH
1720 trawick 95149 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
1721 jim 95568 "AcceptMutex: %s (default: %s)",
1722     apr_proc_mutex_name(accept_mutex),
1723     apr_proc_mutex_defname());
1724 jim 94055 #endif
1725 rbb 89781 restart_pending = shutdown_pending = 0;
1726 trawick 102045 mpm_state = AP_MPMQ_RUNNING;
1727    
1728 rbb 89781 server_main_loop(remaining_children_to_start);
1729 trawick 102045 mpm_state = AP_MPMQ_STOPPING;
1730 rbb 89781
1731 colm 290189 if (shutdown_pending && !is_graceful) {
1732     /* Time to shut down:
1733 rbb 89781 * Kill child processes, tell them to call child_exit, etc...
1734     */
1735 trawick 94024 ap_mpm_pod_killpg(pod, ap_daemons_limit, FALSE);
1736 jerenkrantz 92473 ap_reclaim_child_processes(1); /* Start with SIGTERM */
1737 trawick 92019
1738     if (!child_fatal) {
1739     /* cleanup pid file on normal shutdown */
1740 rbb 89781 const char *pidfile = NULL;
1741     pidfile = ap_server_root_relative (pconf, ap_pid_fname);
1742     if ( pidfile != NULL && unlink(pidfile) == 0)
1743 trawick 95149 ap_log_error(APLOG_MARK, APLOG_INFO, 0,
1744 jerenkrantz 92473 ap_server_conf,
1745 colm 290189 "removed PID file %s (pid=%" APR_PID_T_FMT ")",
1746     pidfile, getpid());
1747 trawick 92019
1748 trawick 95149 ap_log_error(APLOG_MARK, APLOG_NOTICE, 0,
1749 trawick 92019 ap_server_conf, "caught SIGTERM, shutting down");
1750 rbb 89781 }
1751 jerenkrantz 92473 return 1;
1752 colm 290189 } else if (shutdown_pending) {
1753     /* Time to gracefully shut down:
1754     * Kill child processes, tell them to call child_exit, etc...
1755     */
1756     int active_children;
1757     int index;
1758     apr_time_t cutoff = 0;
1759    
1760     /* Close our listeners, and then ask our children to do same */
1761     ap_close_listeners();
1762     ap_mpm_pod_killpg(pod, ap_daemons_limit, TRUE);
1763     ap_relieve_child_processes();
1764    
1765     if (!child_fatal) {
1766     /* cleanup pid file on normal shutdown */
1767     const char *pidfile = NULL;
1768     pidfile = ap_server_root_relative (pconf, ap_pid_fname);
1769     if ( pidfile != NULL && unlink(pidfile) == 0)
1770     ap_log_error(APLOG_MARK, APLOG_INFO, 0,
1771     ap_server_conf,
1772     "removed PID file %s (pid=%" APR_PID_T_FMT ")",
1773     pidfile, getpid());
1774    
1775     ap_log_error(APLOG_MARK, APLOG_NOTICE, 0,
1776     ap_server_conf, "caught SIGTERM, shutting down");
1777     }
1778    
1779     /* Don't really exit until each child has finished */
1780     shutdown_pending = 0;
1781     do {
1782     /* Pause for a second */
1783     apr_sleep(apr_time_from_sec(1));
1784    
1785     /* Relieve any children which have now exited */
1786     ap_relieve_child_processes();
1787    
1788     active_children = 0;
1789     for (index = 0; index < ap_daemons_limit; ++index) {
1790     if (MPM_CHILD_PID(index) != 0) {
1791     if (kill(MPM_CHILD_PID(index), 0) == 0) {
1792     active_children = 1;
1793     /* Having just one child is enough to stay around */
1794     break;
1795     }
1796     }
1797     }
1798     } while (!shutdown_pending && active_children &&
1799     (!ap_graceful_shutdown_timeout || apr_time_now() < cutoff));
1800    
1801     /* We might be here because we received SIGTERM, either
1802     * way, try and make sure that all of our processes are
1803     * really dead.
1804     */
1805     ap_mpm_pod_killpg(pod, ap_daemons_limit, FALSE);
1806     ap_reclaim_child_processes(1);
1807    
1808     return 1;
1809 rbb 89781 }
1810    
1811     /* we've been told to restart */
1812     apr_signal(SIGHUP, SIG_IGN);
1813    
1814     if (one_process) {
1815 jerenkrantz 92473 /* not worth thinking about */
1816     return 1;
1817 rbb 89781 }
1818    
1819     /* advance to the next generation */
1820     /* XXX: we really need to make sure this new generation number isn't in
1821     * use by any of the children.
1822     */
1823     ++ap_my_generation;
1824 wrowe 92791 ap_scoreboard_image->global->running_generation = ap_my_generation;
1825 rbb 89781