Coverage Report

Created: 2025-07-11 06:40

/src/httpd/server/mpm/event/event.c
Line
Count
Source (jump to first uncovered line)
1
/* Licensed to the Apache Software Foundation (ASF) under one or more
2
 * contributor license agreements.  See the NOTICE file distributed with
3
 * this work for additional information regarding copyright ownership.
4
 * The ASF licenses this file to You under the Apache License, Version 2.0
5
 * (the "License"); you may not use this file except in compliance with
6
 * the License.  You may obtain a copy of the License at
7
 *
8
 *     http://www.apache.org/licenses/LICENSE-2.0
9
 *
10
 * Unless required by applicable law or agreed to in writing, software
11
 * distributed under the License is distributed on an "AS IS" BASIS,
12
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
 * See the License for the specific language governing permissions and
14
 * limitations under the License.
15
 */
16
17
/**
18
 * This MPM tries to fix the 'keep alive problem' in HTTP.
19
 *
20
 * After a client completes the first request, the client can keep the
21
 * connection open to send more requests with the same socket.  This can save
22
 * significant overhead in creating TCP connections.  However, the major
23
 * disadvantage is that Apache traditionally keeps an entire child
24
 * process/thread waiting for data from the client.  To solve this problem,
25
 * this MPM has a dedicated thread for handling both the Listening sockets,
26
 * and all sockets that are in a Keep Alive status.
27
 *
28
 * The MPM assumes the underlying apr_pollset implementation is somewhat
29
 * threadsafe.  This currently is only compatible with KQueue and EPoll.  This
30
 * enables the MPM to avoid extra high level locking or having to wake up the
31
 * listener thread when a keep-alive socket needs to be sent to it.
32
 *
33
 * This MPM does not perform well on older platforms that do not have very good
34
 * threading, like Linux with a 2.4 kernel, but this does not matter, since we
35
 * require EPoll or KQueue.
36
 *
37
 * For FreeBSD, use 5.3.  It is possible to run this MPM on FreeBSD 5.2.1, if
38
 * you use libkse (see `man libmap.conf`).
39
 *
40
 * For NetBSD, use at least 2.0.
41
 *
42
 * For Linux, you should use a 2.6 kernel, and make sure your glibc has epoll
43
 * support compiled in.
44
 *
45
 */
46
47
#include "apr.h"
48
#include "apr_portable.h"
49
#include "apr_strings.h"
50
#include "apr_file_io.h"
51
#include "apr_thread_proc.h"
52
#include "apr_signal.h"
53
#include "apr_thread_mutex.h"
54
#include "apr_poll.h"
55
#include "apr_ring.h"
56
#include "apr_queue.h"
57
#include "apr_atomic.h"
58
#define APR_WANT_STRFUNC
59
#include "apr_want.h"
60
#include "apr_version.h"
61
62
#include <stdlib.h>
63
64
#if APR_HAVE_UNISTD_H
65
#include <unistd.h>
66
#endif
67
#if APR_HAVE_SYS_SOCKET_H
68
#include <sys/socket.h>
69
#endif
70
#if APR_HAVE_SYS_WAIT_H
71
#include <sys/wait.h>
72
#endif
73
#ifdef HAVE_SYS_PROCESSOR_H
74
#include <sys/processor.h>      /* for bindprocessor() */
75
#endif
76
77
#if !APR_HAS_THREADS
78
#error The Event MPM requires APR threads, but they are unavailable.
79
#endif
80
81
#include "ap_config.h"
82
#include "httpd.h"
83
#include "http_main.h"
84
#include "http_log.h"
85
#include "http_config.h"        /* for read_config */
86
#include "http_core.h"          /* for get_remote_host */
87
#include "http_connection.h"
88
#include "http_protocol.h"
89
#include "ap_mpm.h"
90
#include "mpm_common.h"
91
#include "ap_listen.h"
92
#include "scoreboard.h"
93
#include "mpm_fdqueue.h"
94
#include "mpm_default.h"
95
#include "http_vhost.h"
96
#include "unixd.h"
97
#include "apr_skiplist.h"
98
#include "util_time.h"
99
100
#include <signal.h>
101
#include <limits.h>             /* for INT_MAX */
102
103
104
#if HAVE_SERF
105
#include "mod_serf.h"
106
#include "serf.h"
107
#endif
108
109
/* Limit on the total --- clients will be locked out if more servers than
110
 * this are needed.  It is intended solely to keep the server from crashing
111
 * when things get out of hand.
112
 *
113
 * We keep a hard maximum number of servers, for two reasons --- first off,
114
 * in case something goes seriously wrong, we want to stop the fork bomb
115
 * short of actually crashing the machine we're running on by filling some
116
 * kernel table.  Secondly, it keeps the size of the scoreboard file small
117
 * enough that we can read the whole thing without worrying too much about
118
 * the overhead.
119
 */
120
#ifndef DEFAULT_SERVER_LIMIT
121
0
#define DEFAULT_SERVER_LIMIT 16
122
#endif
123
124
/* Admin can't tune ServerLimit beyond MAX_SERVER_LIMIT.  We want
125
 * some sort of compile-time limit to help catch typos.
126
 */
127
#ifndef MAX_SERVER_LIMIT
128
0
#define MAX_SERVER_LIMIT 20000
129
#endif
130
131
/* Limit on the threads per process.  Clients will be locked out if more than
132
 * this are needed.
133
 *
134
 * We keep this for one reason it keeps the size of the scoreboard file small
135
 * enough that we can read the whole thing without worrying too much about
136
 * the overhead.
137
 */
138
#ifndef DEFAULT_THREAD_LIMIT
139
0
#define DEFAULT_THREAD_LIMIT 64
140
#endif
141
142
/* Admin can't tune ThreadLimit beyond MAX_THREAD_LIMIT.  We want
143
 * some sort of compile-time limit to help catch typos.
144
 */
145
#ifndef MAX_THREAD_LIMIT
146
0
#define MAX_THREAD_LIMIT 100000
147
#endif
148
149
0
#define MPM_CHILD_PID(i) (ap_scoreboard_image->parent[i].pid)
150
151
#if !APR_VERSION_AT_LEAST(1,4,0)
152
#define apr_time_from_msec(x) (x * 1000)
153
#endif
154
155
0
#define CONN_STATE_IS_LINGERING_CLOSE(s) ((s) >= CONN_STATE_LINGER && \
156
0
                                          (s) <= CONN_STATE_LINGER_SHORT)
157
#ifndef MAX_SECS_TO_LINGER
158
#define MAX_SECS_TO_LINGER 30
159
#endif
160
#define SECONDS_TO_LINGER  2
161
162
/*
163
 * Actual definitions of config globals
164
 */
165
166
#ifndef DEFAULT_WORKER_FACTOR
167
#define DEFAULT_WORKER_FACTOR 2
168
#endif
169
0
#define WORKER_FACTOR_SCALE   16  /* scale factor to allow fractional values */
170
static unsigned int worker_factor = DEFAULT_WORKER_FACTOR * WORKER_FACTOR_SCALE;
171
    /* AsyncRequestWorkerFactor * 16 */
172
173
static int threads_per_child = 0;           /* ThreadsPerChild */
174
static int ap_daemons_to_start = 0;         /* StartServers */
175
static int min_spare_threads = 0;           /* MinSpareThreads */
176
static int max_spare_threads = 0;           /* MaxSpareThreads */
177
static int active_daemons_limit = 0;        /* MaxRequestWorkers / ThreadsPerChild */
178
static int max_workers = 0;                 /* MaxRequestWorkers */
179
static int server_limit = 0;                /* ServerLimit */
180
static int thread_limit = 0;                /* ThreadLimit */
181
static int had_healthy_child = 0;
182
static volatile int dying = 0;
183
static volatile int workers_may_exit = 0;
184
static volatile int start_thread_may_exit = 0;
185
static volatile int listener_may_exit = 0;
186
static int listener_is_wakeable = 0;        /* Pollset supports APR_POLLSET_WAKEABLE */
187
static int num_listensocks = 0;
188
static apr_int32_t conns_this_child;        /* MaxConnectionsPerChild, only access
189
                                               in listener thread */
190
static apr_uint32_t connection_count = 0;   /* Number of open connections */
191
static apr_uint32_t lingering_count = 0;    /* Number of connections in lingering close */
192
static apr_uint32_t suspended_count = 0;    /* Number of suspended connections */
193
static apr_uint32_t clogged_count = 0;      /* Number of threads processing ssl conns */
194
static apr_uint32_t threads_shutdown = 0;   /* Number of threads that have shutdown
195
                                               early during graceful termination */
196
static int resource_shortage = 0;
197
static fd_queue_t *worker_queue;
198
static fd_queue_info_t *worker_queue_info;
199
200
static apr_thread_mutex_t *timeout_mutex;
201
202
module AP_MODULE_DECLARE_DATA mpm_event_module;
203
204
/* forward declare */
205
struct event_srv_cfg_s;
206
typedef struct event_srv_cfg_s event_srv_cfg;
207
208
static apr_pollfd_t *listener_pollfd;
209
210
/*
211
 * The pollset for sockets that are in any of the timeout queues. Currently
212
 * we use the timeout_mutex to make sure that connections are added/removed
213
 * atomically to/from both event_pollset and a timeout queue. Otherwise
214
 * some confusion can happen under high load if timeout queues and pollset
215
 * get out of sync.
216
 * XXX: It should be possible to make the lock unnecessary in many or even all
217
 * XXX: cases.
218
 */
219
static apr_pollset_t *event_pollset;
220
221
typedef struct event_conn_state_t event_conn_state_t;
222
223
/*
224
 * The chain of connections to be shutdown by a worker thread (deferred),
225
 * linked list updated atomically.
226
 */
227
static event_conn_state_t *volatile defer_linger_chain;
228
229
struct event_conn_state_t {
230
    /** APR_RING of expiration timeouts */
231
    APR_RING_ENTRY(event_conn_state_t) timeout_list;
232
    /** the time when the entry was queued */
233
    apr_time_t queue_timestamp;
234
    /** connection record this struct refers to */
235
    conn_rec *c;
236
    /** request record (if any) this struct refers to */
237
    request_rec *r;
238
    /** server config this struct refers to */
239
    event_srv_cfg *sc;
240
    /** scoreboard handle for the conn_rec */
241
    ap_sb_handle_t *sbh;
242
    /** is the current conn_rec suspended?  (disassociated with
243
     * a particular MPM thread; for suspend_/resume_connection
244
     * hooks)
245
     */
246
    int suspended;
247
    /** memory pool to allocate from */
248
    apr_pool_t *p;
249
    /** bucket allocator */
250
    apr_bucket_alloc_t *bucket_alloc;
251
    /** poll file descriptor information */
252
    apr_pollfd_t pfd;
253
    /** public parts of the connection state */
254
    conn_state_t pub;
255
    /** chaining in defer_linger_chain */
256
    struct event_conn_state_t *chain;
257
    unsigned int 
258
        /** Is lingering close from defer_lingering_close()? */
259
        deferred_linger :1,
260
        /** Has ap_start_lingering_close() been called? */
261
        linger_started  :1;
262
};
263
264
APR_RING_HEAD(timeout_head_t, event_conn_state_t);
265
266
struct timeout_queue {
267
    struct timeout_head_t head;
268
    apr_interval_time_t timeout;
269
    apr_uint32_t count;         /* for this queue */
270
    apr_uint32_t *total;        /* for all chained/related queues */
271
    struct timeout_queue *next; /* chaining */
272
};
273
/*
274
 * Several timeout queues that use different timeouts, so that we always can
275
 * simply append to the end.
276
 *   waitio_q           uses vhost's TimeOut
277
 *   write_completion_q uses vhost's TimeOut
278
 *   keepalive_q        uses vhost's KeepAliveTimeOut
279
 *   linger_q           uses MAX_SECS_TO_LINGER
280
 *   short_linger_q     uses SECONDS_TO_LINGER
281
 */
282
static struct timeout_queue *waitio_q,
283
                            *write_completion_q,
284
                            *keepalive_q,
285
                            *linger_q,
286
                            *short_linger_q;
287
static volatile apr_time_t  queues_next_expiry;
288
289
/* Prevent extra poll/wakeup calls for timeouts close in the future (queues
290
 * have the granularity of a second anyway).
291
 * XXX: Wouldn't 0.5s (instead of 0.1s) be "enough"?
292
 */
293
0
#define TIMEOUT_FUDGE_FACTOR apr_time_from_msec(100)
294
295
/*
296
 * Macros for accessing struct timeout_queue.
297
 * For TO_QUEUE_APPEND and TO_QUEUE_REMOVE, timeout_mutex must be held.
298
 */
299
static void TO_QUEUE_APPEND(struct timeout_queue *q, event_conn_state_t *el)
300
0
{
301
0
    apr_time_t elem_expiry;
302
0
    apr_time_t next_expiry;
303
304
0
    APR_RING_INSERT_TAIL(&q->head, el, event_conn_state_t, timeout_list);
305
0
    ++*q->total;
306
0
    ++q->count;
307
308
    /* Cheaply update the global queues_next_expiry with the one of the
309
     * first entry of this queue (oldest) if it expires before.
310
     */
311
0
    el = APR_RING_FIRST(&q->head);
312
0
    elem_expiry = el->queue_timestamp + q->timeout;
313
0
    next_expiry = queues_next_expiry;
314
0
    if (!next_expiry || next_expiry > elem_expiry + TIMEOUT_FUDGE_FACTOR) {
315
0
        queues_next_expiry = elem_expiry;
316
        /* Unblock the poll()ing listener for it to update its timeout. */
317
0
        if (listener_is_wakeable) {
318
0
            apr_pollset_wakeup(event_pollset);
319
0
        }
320
0
    }
321
0
}
322
323
static void TO_QUEUE_REMOVE(struct timeout_queue *q, event_conn_state_t *el)
324
0
{
325
0
    APR_RING_REMOVE(el, timeout_list);
326
0
    APR_RING_ELEM_INIT(el, timeout_list);
327
0
    --*q->total;
328
0
    --q->count;
329
0
}
330
331
static struct timeout_queue *TO_QUEUE_MAKE(apr_pool_t *p, apr_time_t t,
332
                                           struct timeout_queue *ref)
333
0
{
334
0
    struct timeout_queue *q;
335
                                           
336
0
    q = apr_pcalloc(p, sizeof *q);
337
0
    APR_RING_INIT(&q->head, event_conn_state_t, timeout_list);
338
0
    q->total = (ref) ? ref->total : apr_pcalloc(p, sizeof *q->total);
339
0
    q->timeout = t;
340
341
0
    return q;
342
0
}
343
344
#define TO_QUEUE_ELEM_INIT(el) \
345
0
    APR_RING_ELEM_INIT((el), timeout_list)
346
347
#if HAVE_SERF
348
typedef struct {
349
    apr_pollset_t *pollset;
350
    apr_pool_t *pool;
351
} s_baton_t;
352
353
static serf_context_t *g_serf;
354
#endif
355
356
/* The structure used to pass unique initialization info to each thread */
357
typedef struct
358
{
359
    int pslot;  /* process slot */
360
    int tslot;  /* worker slot of the thread */
361
} proc_info;
362
363
/* Structure used to pass information to the thread responsible for
364
 * creating the rest of the threads.
365
 */
366
typedef struct
367
{
368
    apr_thread_t **threads;
369
    apr_thread_t *listener;
370
    int child_num_arg;
371
    apr_threadattr_t *threadattr;
372
} thread_starter;
373
374
typedef enum
375
{
376
    PT_CSD,
377
    PT_ACCEPT
378
#if HAVE_SERF
379
    , PT_SERF
380
#endif
381
    , PT_USER
382
} poll_type_e;
383
384
typedef struct
385
{
386
    poll_type_e type;
387
    void *baton;
388
} listener_poll_type;
389
390
typedef struct socket_callback_baton
391
{
392
    ap_mpm_callback_fn_t *cbfunc;
393
    void *user_baton;
394
    apr_array_header_t *pfds;
395
    timer_event_t *cancel_event; /* If a timeout was requested, a pointer to the timer event */
396
    struct socket_callback_baton *next;
397
    unsigned int signaled :1;
398
} socket_callback_baton_t;
399
400
typedef struct event_child_bucket {
401
    ap_pod_t *pod;
402
    ap_listen_rec *listeners;
403
} event_child_bucket;
404
static event_child_bucket *my_bucket;  /* Current child bucket */
405
406
/* data retained by event across load/unload of the module
407
 * allocated on first call to pre-config hook; located on
408
 * subsequent calls to pre-config hook
409
 */
410
typedef struct event_retained_data {
411
    ap_unixd_mpm_retained_data *mpm;
412
413
    apr_pool_t *gen_pool; /* generation pool (children start->stop lifetime) */
414
    event_child_bucket *buckets; /* children buckets (reset per generation) */
415
416
    int first_server_limit;
417
    int first_thread_limit;
418
    int sick_child_detected;
419
    int maxclients_reported;
420
    int near_maxclients_reported;
421
    /*
422
     * The max child slot ever assigned, preserved across restarts.  Necessary
423
     * to deal with MaxRequestWorkers changes across AP_SIG_GRACEFUL restarts.
424
     * We use this value to optimize routines that have to scan the entire
425
     * scoreboard.
426
     */
427
    int max_daemon_used;
428
429
    /*
430
     * All running workers, active and shutting down, including those that
431
     * may be left from before a graceful restart.
432
     * Not kept up-to-date when shutdown is pending.
433
     */
434
    int total_daemons;
435
    /*
436
     * Workers that still active, i.e. are not shutting down gracefully.
437
     */
438
    int active_daemons;
439
    /*
440
     * idle_spawn_rate is the number of children that will be spawned on the
441
     * next maintenance cycle if there aren't enough idle servers.  It is
442
     * maintained per listeners bucket, doubled up to MAX_SPAWN_RATE, and
443
     * reset only when a cycle goes by without the need to spawn.
444
     */
445
    int *idle_spawn_rate;
446
    int hold_off_on_exponential_spawning;
447
} event_retained_data;
448
static event_retained_data *retained;
449
450
#ifndef MAX_SPAWN_RATE
451
0
#define MAX_SPAWN_RATE 32
452
#endif
453
static int max_spawn_rate_per_bucket = MAX_SPAWN_RATE / 1;
454
455
struct event_srv_cfg_s {
456
    struct timeout_queue *io_q,
457
                         *wc_q,
458
                         *ka_q;
459
};
460
461
0
#define ID_FROM_CHILD_THREAD(c, t)    ((c * thread_limit) + t)
462
463
/* The event MPM respects a couple of runtime flags that can aid
464
 * in debugging. Setting the -DNO_DETACH flag will prevent the root process
465
 * from detaching from its controlling terminal. Additionally, setting
466
 * the -DONE_PROCESS flag (which implies -DNO_DETACH) will get you the
467
 * child_main loop running in the process which originally started up.
468
 * This gives you a pretty nice debugging environment.  (You'll get a SIGHUP
469
 * early in standalone_main; just continue through.  This is the server
470
 * trying to kill off any child processes which it might have lying
471
 * around --- Apache doesn't keep track of their pids, it just sends
472
 * SIGHUP to the process group, ignoring it in the root process.
473
 * Continue through and you'll be fine.).
474
 */
475
476
static int one_process = 0;
477
478
#ifdef DEBUG_SIGSTOP
479
int raise_sigstop_flags;
480
#endif
481
482
static apr_pool_t *pconf;       /* Pool for config stuff */
483
static apr_pool_t *pchild;      /* Pool for httpd child stuff */
484
static apr_pool_t *pruntime;    /* Pool for MPM threads stuff */
485
486
static pid_t ap_my_pid;         /* Linux getpid() doesn't work except in main
487
                                   thread. Use this instead */
488
static pid_t parent_pid;
489
static apr_os_thread_t *listener_os_thread;
490
491
static int ap_child_slot;       /* Current child process slot in scoreboard */
492
493
/* The LISTENER_SIGNAL signal will be sent from the main thread to the
494
 * listener thread to wake it up for graceful termination (what a child
495
 * process from an old generation does when the admin does "apachectl
496
 * graceful").  This signal will be blocked in all threads of a child
497
 * process except for the listener thread.
498
 */
499
0
#define LISTENER_SIGNAL     SIGHUP
500
501
/* An array of socket descriptors in use by each thread used to
502
 * perform a non-graceful (forced) shutdown of the server.
503
 */
504
static apr_socket_t **worker_sockets;
505
506
static volatile apr_uint32_t listensocks_disabled;
507
508
static void disable_listensocks(void)
509
0
{
510
0
    int i;
511
0
    if (apr_atomic_cas32(&listensocks_disabled, 1, 0) != 0) {
512
0
        return;
513
0
    }
514
0
    if (event_pollset) {
515
0
        for (i = 0; i < num_listensocks; i++) {
516
0
            apr_pollset_remove(event_pollset, &listener_pollfd[i]);
517
0
        }
518
0
    }
519
0
    ap_scoreboard_image->parent[ap_child_slot].not_accepting = 1;
520
0
}
521
522
static void enable_listensocks(void)
523
0
{
524
0
    int i;
525
0
    if (listener_may_exit
526
0
            || apr_atomic_cas32(&listensocks_disabled, 0, 1) != 1) {
527
0
        return;
528
0
    }
529
0
    ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf, APLOGNO(00457)
530
0
                 "Accepting new connections again: "
531
0
                 "%u active conns (%u lingering/%u clogged/%u suspended), "
532
0
                 "%u idle workers",
533
0
                 apr_atomic_read32(&connection_count),
534
0
                 apr_atomic_read32(&lingering_count),
535
0
                 apr_atomic_read32(&clogged_count),
536
0
                 apr_atomic_read32(&suspended_count),
537
0
                 ap_queue_info_num_idlers(worker_queue_info));
538
0
    for (i = 0; i < num_listensocks; i++)
539
0
        apr_pollset_add(event_pollset, &listener_pollfd[i]);
540
    /*
541
     * XXX: This is not yet optimal. If many workers suddenly become available,
542
     * XXX: the parent may kill some processes off too soon.
543
     */
544
0
    ap_scoreboard_image->parent[ap_child_slot].not_accepting = 0;
545
0
}
546
547
static APR_INLINE apr_uint32_t listeners_disabled(void)
548
0
{
549
0
    return apr_atomic_read32(&listensocks_disabled);
550
0
}
551
552
static APR_INLINE int connections_above_limit(int *busy)
553
0
{
554
0
    apr_uint32_t i_count = ap_queue_info_num_idlers(worker_queue_info);
555
0
    if (i_count > 0) {
556
0
        apr_uint32_t c_count = apr_atomic_read32(&connection_count);
557
0
        apr_uint32_t l_count = apr_atomic_read32(&lingering_count);
558
0
        if (c_count <= l_count
559
                /* Off by 'listeners_disabled()' to avoid flip flop */
560
0
                || c_count - l_count < (apr_uint32_t)threads_per_child +
561
0
                                       (i_count - listeners_disabled()) *
562
0
                                       (worker_factor / WORKER_FACTOR_SCALE)) {
563
0
            return 0;
564
0
        }
565
0
    }
566
0
    else if (busy) {
567
0
        *busy = 1;
568
0
    }
569
0
    return 1;
570
0
}
571
572
static APR_INLINE int should_enable_listensocks(void)
573
0
{
574
0
    return !dying && listeners_disabled() && !connections_above_limit(NULL);
575
0
}
576
577
static void close_socket_nonblocking_(apr_socket_t *csd,
578
                                      const char *from, int line)
579
0
{
580
0
    apr_status_t rv;
581
0
    apr_os_sock_t fd = -1;
582
583
    /* close_worker_sockets() may have closed it already */
584
0
    rv = apr_os_sock_get(&fd, csd);
585
0
    ap_log_error(APLOG_MARK, APLOG_TRACE8, 0, ap_server_conf,
586
0
                "closing socket %i/%pp from %s:%i", (int)fd, csd, from, line);
587
0
    if (rv == APR_SUCCESS && fd == -1) {
588
0
        return;
589
0
    }
590
591
0
    apr_socket_timeout_set(csd, 0);
592
0
    rv = apr_socket_close(csd);
593
0
    if (rv != APR_SUCCESS) {
594
0
        ap_log_error(APLOG_MARK, APLOG_ERR, rv, ap_server_conf, APLOGNO(00468)
595
0
                     "error closing socket");
596
0
        AP_DEBUG_ASSERT(0);
597
0
    }
598
0
}
599
#define close_socket_nonblocking(csd) \
600
0
    close_socket_nonblocking_(csd, __FUNCTION__, __LINE__)
601
602
static void close_worker_sockets(void)
603
0
{
604
0
    int i;
605
0
    for (i = 0; i < threads_per_child; i++) {
606
0
        apr_socket_t *csd = worker_sockets[i];
607
0
        if (csd) {
608
0
            worker_sockets[i] = NULL;
609
0
            close_socket_nonblocking(csd);
610
0
        }
611
0
    }
612
0
}
613
614
static void wakeup_listener(void)
615
0
{
616
0
    ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
617
0
                 "wake up listener%s", listener_may_exit ? " again" : "");
618
619
0
    listener_may_exit = 1;
620
0
    disable_listensocks();
621
622
    /* Unblock the listener if it's poll()ing */
623
0
    if (event_pollset && listener_is_wakeable) {
624
0
        apr_pollset_wakeup(event_pollset);
625
0
    }
626
627
    /* unblock the listener if it's waiting for a worker */
628
0
    if (worker_queue_info) {
629
0
        ap_queue_info_term(worker_queue_info);
630
0
    }
631
632
0
    if (!listener_os_thread) {
633
        /* XXX there is an obscure path that this doesn't handle perfectly:
634
         *     right after listener thread is created but before
635
         *     listener_os_thread is set, the first worker thread hits an
636
         *     error and starts graceful termination
637
         */
638
0
        return;
639
0
    }
640
    /*
641
     * we should just be able to "kill(ap_my_pid, LISTENER_SIGNAL)" on all
642
     * platforms and wake up the listener thread since it is the only thread
643
     * with SIGHUP unblocked, but that doesn't work on Linux
644
     */
645
0
#ifdef HAVE_PTHREAD_KILL
646
0
    pthread_kill(*listener_os_thread, LISTENER_SIGNAL);
647
#else
648
    kill(ap_my_pid, LISTENER_SIGNAL);
649
#endif
650
0
}
651
652
0
#define ST_INIT              0
653
0
#define ST_GRACEFUL          1
654
0
#define ST_UNGRACEFUL        2
655
656
static int terminate_mode = ST_INIT;
657
658
static void signal_threads(int mode)
659
0
{
660
0
    if (terminate_mode >= mode) {
661
0
        return;
662
0
    }
663
0
    terminate_mode = mode;
664
0
    retained->mpm->mpm_state = AP_MPMQ_STOPPING;
665
666
    /* in case we weren't called from the listener thread, wake up the
667
     * listener thread
668
     */
669
0
    wakeup_listener();
670
671
    /* for ungraceful termination, let the workers exit now;
672
     * for graceful termination, the listener thread will notify the
673
     * workers to exit once it has stopped accepting new connections
674
     */
675
0
    if (mode == ST_UNGRACEFUL) {
676
0
        workers_may_exit = 1;
677
0
        ap_queue_interrupt_all(worker_queue);
678
0
        close_worker_sockets(); /* forcefully kill all current connections */
679
0
    }
680
681
0
    ap_run_child_stopping(pchild, mode == ST_GRACEFUL);
682
0
}
683
684
static int event_query(int query_code, int *result, apr_status_t *rv)
685
0
{
686
0
    *rv = APR_SUCCESS;
687
0
    switch (query_code) {
688
0
    case AP_MPMQ_MAX_DAEMON_USED:
689
0
        *result = retained->max_daemon_used;
690
0
        break;
691
0
    case AP_MPMQ_IS_THREADED:
692
0
        *result = AP_MPMQ_STATIC;
693
0
        break;
694
0
    case AP_MPMQ_IS_FORKED:
695
0
        *result = AP_MPMQ_DYNAMIC;
696
0
        break;
697
0
    case AP_MPMQ_IS_ASYNC:
698
0
        *result = 1;
699
0
        break;
700
0
    case AP_MPMQ_HAS_SERF:
701
0
        *result = 1;
702
0
        break;
703
0
    case AP_MPMQ_HARD_LIMIT_DAEMONS:
704
0
        *result = server_limit;
705
0
        break;
706
0
    case AP_MPMQ_HARD_LIMIT_THREADS:
707
0
        *result = thread_limit;
708
0
        break;
709
0
    case AP_MPMQ_MAX_THREADS:
710
0
        *result = threads_per_child;
711
0
        break;
712
0
    case AP_MPMQ_MIN_SPARE_DAEMONS:
713
0
        *result = 0;
714
0
        break;
715
0
    case AP_MPMQ_MIN_SPARE_THREADS:
716
0
        *result = min_spare_threads;
717
0
        break;
718
0
    case AP_MPMQ_MAX_SPARE_DAEMONS:
719
0
        *result = 0;
720
0
        break;
721
0
    case AP_MPMQ_MAX_SPARE_THREADS:
722
0
        *result = max_spare_threads;
723
0
        break;
724
0
    case AP_MPMQ_MAX_REQUESTS_DAEMON:
725
0
        *result = ap_max_requests_per_child;
726
0
        break;
727
0
    case AP_MPMQ_MAX_DAEMONS:
728
0
        *result = active_daemons_limit;
729
0
        break;
730
0
    case AP_MPMQ_MPM_STATE:
731
0
        *result = retained->mpm->mpm_state;
732
0
        break;
733
0
    case AP_MPMQ_GENERATION:
734
0
        *result = retained->mpm->my_generation;
735
0
        break;
736
0
    case AP_MPMQ_CAN_SUSPEND:
737
0
        *result = 1;
738
0
        break;
739
0
    case AP_MPMQ_CAN_POLL:
740
0
        *result = 1;
741
0
        break;
742
0
    case AP_MPMQ_CAN_WAITIO:
743
0
        *result = 1;
744
0
        break;
745
0
    default:
746
0
        *rv = APR_ENOTIMPL;
747
0
        break;
748
0
    }
749
0
    return OK;
750
0
}
751
752
static void event_note_child_stopped(int slot, pid_t pid, ap_generation_t gen)
753
0
{
754
0
    if (slot != -1) { /* child had a scoreboard slot? */
755
0
        process_score *ps = &ap_scoreboard_image->parent[slot];
756
0
        int i;
757
758
0
        pid = ps->pid;
759
0
        gen = ps->generation;
760
0
        for (i = 0; i < threads_per_child; i++) {
761
0
            ap_update_child_status_from_indexes(slot, i, SERVER_DEAD, NULL);
762
0
        }
763
0
        ap_run_child_status(ap_server_conf, pid, gen, slot, MPM_CHILD_EXITED);
764
0
        if (ps->quiescing != 2) { /* vs perform_idle_server_maintenance() */
765
0
            retained->active_daemons--;
766
0
        }
767
0
        retained->total_daemons--;
768
0
        ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
769
0
                     "Child %d stopped: pid %d, gen %d, "
770
0
                     "active %d/%d, total %d/%d/%d, quiescing %d",
771
0
                     slot, (int)pid, (int)gen,
772
0
                     retained->active_daemons, active_daemons_limit,
773
0
                     retained->total_daemons, retained->max_daemon_used,
774
0
                     server_limit, ps->quiescing);
775
0
        ps->not_accepting = 0;
776
0
        ps->quiescing = 0;
777
0
        ps->pid = 0;
778
0
    }
779
0
    else {
780
0
        ap_run_child_status(ap_server_conf, pid, gen, -1, MPM_CHILD_EXITED);
781
0
    }
782
0
}
783
784
static void event_note_child_started(int slot, pid_t pid)
785
0
{
786
0
    ap_generation_t gen = retained->mpm->my_generation;
787
788
0
    retained->total_daemons++;
789
0
    retained->active_daemons++;
790
0
    ap_scoreboard_image->parent[slot].pid = pid;
791
0
    ap_scoreboard_image->parent[slot].generation = gen;
792
0
    ap_run_child_status(ap_server_conf, pid, gen, slot, MPM_CHILD_STARTED);
793
0
    ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
794
0
                 "Child %d started: pid %d, gen %d, "
795
0
                 "active %d/%d, total %d/%d/%d",
796
0
                 slot, (int)pid, (int)gen,
797
0
                 retained->active_daemons, active_daemons_limit,
798
0
                 retained->total_daemons, retained->max_daemon_used,
799
0
                 server_limit);
800
0
}
801
802
static const char *event_get_name(void)
803
0
{
804
0
    return "event";
805
0
}
806
807
/* a clean exit from a child with proper cleanup */
808
static void clean_child_exit(int code) __attribute__ ((noreturn));
809
static void clean_child_exit(int code)
810
0
{
811
0
    retained->mpm->mpm_state = AP_MPMQ_STOPPING;
812
0
    if (terminate_mode == ST_INIT) {
813
0
        ap_run_child_stopping(pchild, 0);
814
0
    }
815
816
0
    if (pchild) {
817
0
        ap_run_child_stopped(pchild, terminate_mode == ST_GRACEFUL);
818
0
        apr_pool_destroy(pchild);
819
0
    }
820
821
0
    if (one_process) {
822
0
        event_note_child_stopped(/* slot */ 0, 0, 0);
823
0
    }
824
825
0
    exit(code);
826
0
}
827
828
static void just_die(int sig)
829
0
{
830
0
    clean_child_exit(0);
831
0
}
832
833
/*****************************************************************
834
 * Connection structures and accounting...
835
 */
836
837
static int child_fatal;
838
839
static apr_status_t decrement_connection_count(void *cs_)
840
0
{
841
0
    int is_last_connection;
842
0
    event_conn_state_t *cs = cs_;
843
0
    ap_log_cerror(APLOG_MARK, APLOG_TRACE8, 0, cs->c,
844
0
                  "cleanup connection from state %i", (int)cs->pub.state);
845
0
    switch (cs->pub.state) {
846
0
        case CONN_STATE_LINGER:
847
0
        case CONN_STATE_LINGER_NORMAL:
848
0
        case CONN_STATE_LINGER_SHORT:
849
0
            apr_atomic_dec32(&lingering_count);
850
0
            break;
851
0
        case CONN_STATE_SUSPENDED:
852
0
            apr_atomic_dec32(&suspended_count);
853
0
            break;
854
0
        default:
855
0
            break;
856
0
    }
857
    /* Unblock the listener if it's waiting for connection_count = 0,
858
     * or if the listening sockets were disabled due to limits and can
859
     * now accept new connections.
860
     */
861
0
    is_last_connection = !apr_atomic_dec32(&connection_count);
862
0
    if (listener_is_wakeable
863
0
            && ((is_last_connection && listener_may_exit)
864
0
                || should_enable_listensocks())) {
865
0
        apr_pollset_wakeup(event_pollset);
866
0
    }
867
0
    if (dying) {
868
        /* Help worker_thread_should_exit_early() */
869
0
        ap_queue_interrupt_one(worker_queue);
870
0
    }
871
0
    return APR_SUCCESS;
872
0
}
873
874
static void notify_suspend(event_conn_state_t *cs)
875
0
{
876
0
    ap_run_suspend_connection(cs->c, cs->r);
877
0
    cs->c->sbh = NULL;
878
0
    cs->suspended = 1;
879
0
}
880
881
static void notify_resume(event_conn_state_t *cs, int cleanup)
882
0
{
883
0
    cs->suspended = 0;
884
0
    cs->c->sbh = cleanup ? NULL : cs->sbh;
885
0
    ap_run_resume_connection(cs->c, cs->r);
886
0
}
887
888
/*
889
 * Defer flush and close of the connection by adding it to defer_linger_chain,
890
 * for a worker to grab it and do the job (should that be blocking).
891
 * Pre-condition: nonblocking, can be called from anywhere provided cs is not
892
 *                in any timeout queue or in the pollset.
893
 */
894
static int defer_lingering_close(event_conn_state_t *cs)
895
0
{
896
0
    ap_log_cerror(APLOG_MARK, APLOG_TRACE6, 0, cs->c,
897
0
                  "deferring close from state %i", (int)cs->pub.state);
898
899
    /* The connection is not shutdown() yet strictly speaking, but it's not
900
     * in any queue nor handled by a worker either (will be very soon), so
901
     * to account for it somewhere we bump lingering_count now (and set
902
     * deferred_linger for process_lingering_close() to know).
903
     */
904
0
    cs->pub.state = CONN_STATE_LINGER;
905
0
    apr_atomic_inc32(&lingering_count);
906
0
    cs->deferred_linger = 1;
907
0
    for (;;) {
908
0
        event_conn_state_t *chain = cs->chain = defer_linger_chain;
909
0
        if (apr_atomic_casptr((void *)&defer_linger_chain, cs,
910
0
                              chain) != chain) {
911
            /* Race lost, try again */
912
0
            continue;
913
0
        }
914
0
        return 1;
915
0
    }
916
0
}
917
918
/* Close the connection and release its resources (ptrans), either because an
919
 * unrecoverable error occured (queues or pollset add/remove) or more usually
920
 * if lingering close timed out.
921
 * Pre-condition: nonblocking, can be called from anywhere provided cs is not
922
 *                in any timeout queue or in the pollset.
923
 */
924
static void close_connection(event_conn_state_t *cs)
925
0
{
926
0
    ap_log_cerror(APLOG_MARK, APLOG_TRACE6, 0, cs->c,
927
0
                  "closing connection from state %i", (int)cs->pub.state);
928
929
0
    close_socket_nonblocking(cs->pfd.desc.s);
930
0
    ap_queue_info_push_pool(worker_queue_info, cs->p);
931
0
}
932
933
/* Shutdown the connection in case of timeout, error or resources shortage.
934
 * This starts short lingering close if not already there, or directly closes
935
 * the connection otherwise.
936
 * Pre-condition: nonblocking, can be called from anywhere provided cs is not
937
 *                in any timeout queue or in the pollset.
938
 */
939
static int shutdown_connection(event_conn_state_t *cs)
940
0
{
941
0
    if (!CONN_STATE_IS_LINGERING_CLOSE(cs->pub.state)) {
942
0
        apr_table_setn(cs->c->notes, "short-lingering-close", "1");
943
0
        defer_lingering_close(cs);
944
0
    }
945
0
    else {
946
0
        close_connection(cs);
947
0
    }
948
0
    return 1;
949
0
}
950
951
/*
952
 * This runs before any non-MPM cleanup code on the connection;
953
 * if the connection is currently suspended as far as modules
954
 * know, provide notification of resumption.
955
 */
956
static apr_status_t ptrans_pre_cleanup(void *dummy)
957
0
{
958
0
    event_conn_state_t *cs = dummy;
959
960
0
    if (cs->suspended) {
961
0
        notify_resume(cs, 1);
962
0
    }
963
0
    return APR_SUCCESS;
964
0
}
965
966
/*
967
 * event_pre_read_request() and event_request_cleanup() track the
968
 * current r for a given connection.
969
 */
970
static apr_status_t event_request_cleanup(void *dummy)
971
0
{
972
0
    conn_rec *c = dummy;
973
0
    event_conn_state_t *cs = ap_get_module_config(c->conn_config,
974
0
                                                  &mpm_event_module);
975
976
0
    cs->r = NULL;
977
0
    return APR_SUCCESS;
978
0
}
979
980
static void event_pre_read_request(request_rec *r, conn_rec *c)
981
0
{
982
0
    event_conn_state_t *cs = ap_get_module_config(c->conn_config,
983
0
                                                  &mpm_event_module);
984
985
0
    cs->r = r;
986
0
    cs->sc = ap_get_module_config(ap_server_conf->module_config,
987
0
                                  &mpm_event_module);
988
0
    apr_pool_cleanup_register(r->pool, c, event_request_cleanup,
989
0
                              apr_pool_cleanup_null);
990
0
}
991
992
/*
993
 * event_post_read_request() tracks the current server config for a
994
 * given request.
995
 */
996
static int event_post_read_request(request_rec *r)
997
0
{
998
0
    conn_rec *c = r->connection;
999
0
    event_conn_state_t *cs = ap_get_module_config(c->conn_config,
1000
0
                                                  &mpm_event_module);
1001
1002
    /* To preserve legacy behaviour (consistent with other MPMs), use
1003
     * the keepalive timeout from the base server (first on this IP:port)
1004
     * when none is explicitly configured on this server.
1005
     */
1006
0
    if (r->server->keep_alive_timeout_set) {
1007
0
        cs->sc = ap_get_module_config(r->server->module_config,
1008
0
                                      &mpm_event_module);
1009
0
    }
1010
0
    else {
1011
0
        cs->sc = ap_get_module_config(c->base_server->module_config,
1012
0
                                      &mpm_event_module);
1013
0
    }
1014
0
    return OK;
1015
0
}
1016
1017
/* Forward declare */
1018
static void process_lingering_close(event_conn_state_t *cs);
1019
1020
static void update_reqevents_from_sense(event_conn_state_t *cs,
1021
                                        int default_sense)
1022
0
{
1023
0
    int sense = default_sense;
1024
1025
0
    if (cs->pub.sense != CONN_SENSE_DEFAULT) {
1026
0
        sense = cs->pub.sense;
1027
1028
        /* Reset to default for the next round */
1029
0
        cs->pub.sense = CONN_SENSE_DEFAULT;
1030
0
    }
1031
1032
0
    if (sense == CONN_SENSE_WANT_READ) {
1033
0
        cs->pfd.reqevents = APR_POLLIN | APR_POLLHUP;
1034
0
    }
1035
0
    else {
1036
0
        cs->pfd.reqevents = APR_POLLOUT;
1037
0
    }
1038
    /* POLLERR is usually returned event only, but some pollset
1039
     * backends may require it in reqevents to do the right thing,
1040
     * so it shouldn't hurt (ignored otherwise).
1041
     */
1042
0
    cs->pfd.reqevents |= APR_POLLERR;
1043
0
}
1044
1045
/*
1046
 * process one connection in the worker
1047
 */
1048
static void process_socket(apr_thread_t *thd, apr_pool_t * p, apr_socket_t * sock,
1049
                          event_conn_state_t * cs, int my_child_num,
1050
                          int my_thread_num)
1051
0
{
1052
0
    conn_rec *c;
1053
0
    long conn_id = ID_FROM_CHILD_THREAD(my_child_num, my_thread_num);
1054
0
    int clogging = 0, from_wc_q = 0;
1055
0
    apr_status_t rv;
1056
0
    int rc = OK;
1057
1058
0
    if (cs == NULL) {           /* This is a new connection */
1059
0
        listener_poll_type *pt = apr_pcalloc(p, sizeof(*pt));
1060
0
        cs = apr_pcalloc(p, sizeof(event_conn_state_t));
1061
0
        cs->bucket_alloc = apr_bucket_alloc_create(p);
1062
0
        ap_create_sb_handle(&cs->sbh, p, my_child_num, my_thread_num);
1063
0
        c = ap_run_create_connection(p, ap_server_conf, sock,
1064
0
                                     conn_id, cs->sbh, cs->bucket_alloc);
1065
0
        if (!c) {
1066
0
            ap_queue_info_push_pool(worker_queue_info, p);
1067
0
            return;
1068
0
        }
1069
0
        apr_atomic_inc32(&connection_count);
1070
0
        apr_pool_cleanup_register(c->pool, cs, decrement_connection_count,
1071
0
                                  apr_pool_cleanup_null);
1072
0
        ap_set_module_config(c->conn_config, &mpm_event_module, cs);
1073
0
        c->current_thread = thd;
1074
0
        c->cs = &cs->pub;
1075
0
        cs->c = c;
1076
0
        cs->p = p;
1077
0
        cs->sc = ap_get_module_config(ap_server_conf->module_config,
1078
0
                                      &mpm_event_module);
1079
0
        cs->pfd.desc_type = APR_POLL_SOCKET;
1080
0
        cs->pfd.desc.s = sock;
1081
0
        pt->type = PT_CSD;
1082
0
        pt->baton = cs;
1083
0
        cs->pfd.client_data = pt;
1084
0
        apr_pool_pre_cleanup_register(p, cs, ptrans_pre_cleanup);
1085
0
        TO_QUEUE_ELEM_INIT(cs);
1086
1087
0
        ap_update_vhost_given_ip(c);
1088
1089
0
        rc = ap_pre_connection(c, sock);
1090
0
        if (rc != OK && rc != DONE) {
1091
0
            ap_log_cerror(APLOG_MARK, APLOG_DEBUG, 0, c, APLOGNO(00469)
1092
0
                          "process_socket: connection aborted");
1093
0
            close_connection(cs);
1094
0
            return;
1095
0
        }
1096
1097
        /**
1098
         * XXX If the platform does not have a usable way of bundling
1099
         * accept() with a socket readability check, like Win32,
1100
         * and there are measurable delays before the
1101
         * socket is readable due to the first data packet arriving,
1102
         * it might be better to create the cs on the listener thread
1103
         * with the state set to CONN_STATE_KEEPALIVE
1104
         *
1105
         * FreeBSD users will want to enable the HTTP accept filter
1106
         * module in their kernel for the highest performance
1107
         * When the accept filter is active, sockets are kept in the
1108
         * kernel until a HTTP request is received.
1109
         */
1110
0
        cs->pub.state = CONN_STATE_PROCESSING;
1111
0
        cs->pub.sense = CONN_SENSE_DEFAULT;
1112
0
    }
1113
0
    else {
1114
0
        c = cs->c;
1115
0
        ap_update_sb_handle(cs->sbh, my_child_num, my_thread_num);
1116
0
        notify_resume(cs, 0);
1117
0
        c->current_thread = thd;
1118
        /* Subsequent request on a conn, and thread number is part of ID */
1119
0
        c->id = conn_id;
1120
0
    }
1121
1122
0
    if (CONN_STATE_IS_LINGERING_CLOSE(cs->pub.state)) {
1123
0
        goto lingering_close;
1124
0
    }
1125
1126
0
    if (cs->pub.state == CONN_STATE_PROCESSING
1127
        /* If we have an input filter which 'clogs' the input stream,
1128
         * like mod_ssl used to, lets just do the normal read from input
1129
         * filters, like the Worker MPM does. Filters that need to write
1130
         * where they would otherwise read, or read where they would
1131
         * otherwise write, should set the sense appropriately.
1132
         */
1133
0
         || c->clogging_input_filters) {
1134
0
 process_connection:
1135
0
        cs->pub.state = CONN_STATE_PROCESSING;
1136
1137
0
        clogging = c->clogging_input_filters;
1138
0
        if (clogging) {
1139
0
            apr_atomic_inc32(&clogged_count);
1140
0
        }
1141
0
        rc = ap_run_process_connection(c);
1142
0
        if (clogging) {
1143
0
            apr_atomic_dec32(&clogged_count);
1144
0
        }
1145
        /*
1146
         * The process_connection hooks should set the appropriate connection
1147
         * state upon return, for event MPM to either:
1148
         * - CONN_STATE_LINGER: do lingering close;
1149
         * - CONN_STATE_WRITE_COMPLETION: flush pending outputs using Timeout
1150
         *   and wait for next incoming data using KeepAliveTimeout, then come
1151
         *   back to process_connection() hooks;
1152
         * - CONN_STATE_SUSPENDED: suspend the connection such that it now
1153
         *   interacts with the MPM through suspend/resume_connection() hooks,
1154
         *   and/or registered poll callbacks (PT_USER), and/or registered
1155
         *   timed callbacks triggered by timer events;
1156
         * - CONN_STATE_ASYNC_WAITIO: wait for read/write-ability of the underlying
1157
         *   socket using Timeout and come back to process_connection() hooks when
1158
         *   ready;
1159
         * - CONN_STATE_KEEPALIVE: now handled by CONN_STATE_WRITE_COMPLETION
1160
         *   to flush before waiting for next data (that might depend on it).
1161
         * If a process_connection hook returns an error or no hook sets the state
1162
         * to one of the above expected value, forcibly close the connection w/
1163
         * CONN_STATE_LINGER.  This covers the cases where no process_connection
1164
         * hook executes (DECLINED), or one returns OK w/o touching the state (i.e.
1165
         * CONN_STATE_PROCESSING remains after the call) which can happen with
1166
         * third-party modules not updated to work specifically with event MPM
1167
         * while this was expected to do lingering close unconditionally with
1168
         * worker or prefork MPMs for instance.
1169
         */
1170
0
        switch (rc) {
1171
0
        case DONE:
1172
0
            rc = OK; /* same as OK, fall through */
1173
0
        case OK:
1174
0
            if (cs->pub.state == CONN_STATE_PROCESSING) {
1175
0
                cs->pub.state = CONN_STATE_LINGER;
1176
0
            }
1177
0
            else if (cs->pub.state == CONN_STATE_KEEPALIVE) {
1178
0
                cs->pub.state = CONN_STATE_WRITE_COMPLETION;
1179
0
            }
1180
0
            break;
1181
0
        }
1182
0
        if (rc != OK || (cs->pub.state != CONN_STATE_LINGER
1183
0
                         && cs->pub.state != CONN_STATE_ASYNC_WAITIO
1184
0
                         && cs->pub.state != CONN_STATE_WRITE_COMPLETION
1185
0
                         && cs->pub.state != CONN_STATE_SUSPENDED)) {
1186
0
            ap_log_cerror(APLOG_MARK, APLOG_DEBUG, 0, c, APLOGNO(10111)
1187
0
                          "process_socket: connection processing returned %i "
1188
0
                          "(%sstate %i): closing",
1189
0
                          rc, rc ? "" : "unexpected ", (int)cs->pub.state);
1190
0
            cs->pub.state = CONN_STATE_LINGER;
1191
0
        }
1192
0
        else if (c->aborted) {
1193
0
            cs->pub.state = CONN_STATE_LINGER;
1194
0
        }
1195
0
        if (cs->pub.state == CONN_STATE_LINGER) {
1196
0
            goto lingering_close;
1197
0
        }
1198
0
    }
1199
0
    else if (cs->pub.state == CONN_STATE_WRITE_COMPLETION) {
1200
0
        from_wc_q = 1;
1201
0
    }
1202
1203
0
    if (cs->pub.state == CONN_STATE_ASYNC_WAITIO) {
1204
        /* Set a read/write timeout for this connection, and let the
1205
         * event thread poll for read/writeability.
1206
         */
1207
0
        cs->queue_timestamp = apr_time_now();
1208
0
        notify_suspend(cs);
1209
1210
0
        ap_update_child_status(cs->sbh, SERVER_BUSY_READ, NULL);
1211
1212
        /* Modules might set c->cs->sense to CONN_SENSE_WANT_WRITE,
1213
         * the default is CONN_SENSE_WANT_READ still.
1214
         */
1215
0
        update_reqevents_from_sense(cs, CONN_SENSE_WANT_READ);
1216
0
        apr_thread_mutex_lock(timeout_mutex);
1217
0
        TO_QUEUE_APPEND(cs->sc->io_q, cs);
1218
0
        rv = apr_pollset_add(event_pollset, &cs->pfd);
1219
0
        if (rv != APR_SUCCESS && !APR_STATUS_IS_EEXIST(rv)) {
1220
0
            AP_DEBUG_ASSERT(0);
1221
0
            TO_QUEUE_REMOVE(cs->sc->io_q, cs);
1222
0
            apr_thread_mutex_unlock(timeout_mutex);
1223
0
            ap_log_error(APLOG_MARK, APLOG_ERR, rv, ap_server_conf, APLOGNO(10503)
1224
0
                         "process_socket: apr_pollset_add failure in "
1225
0
                         "CONN_STATE_ASYNC_WAITIO");
1226
0
            close_connection(cs);
1227
0
            signal_threads(ST_GRACEFUL);
1228
0
        }
1229
0
        else {
1230
0
            apr_thread_mutex_unlock(timeout_mutex);
1231
0
        }
1232
0
        return;
1233
0
    }
1234
1235
0
    if (cs->pub.state == CONN_STATE_WRITE_COMPLETION) {
1236
0
        int pending = DECLINED;
1237
1238
        /* Flush all pending outputs before going to CONN_STATE_KEEPALIVE or
1239
         * straight to CONN_STATE_PROCESSING if inputs are pending already.
1240
         */
1241
        
1242
0
        ap_update_child_status(cs->sbh, SERVER_BUSY_WRITE, NULL);
1243
1244
0
        if (from_wc_q) {
1245
0
            from_wc_q = 0; /* one shot */
1246
0
            pending = ap_run_output_pending(c);
1247
0
        }
1248
0
        else if (ap_filter_should_yield(c->output_filters)) {
1249
0
            pending = OK;
1250
0
        }
1251
0
        if (pending == OK) {
1252
            /* Let the event thread poll for write */
1253
0
            cs->queue_timestamp = apr_time_now();
1254
0
            notify_suspend(cs);
1255
1256
            /* Add work to pollset. */
1257
0
            cs->pub.sense = CONN_SENSE_DEFAULT;
1258
0
            update_reqevents_from_sense(cs, CONN_SENSE_WANT_WRITE);
1259
0
            apr_thread_mutex_lock(timeout_mutex);
1260
0
            TO_QUEUE_APPEND(cs->sc->wc_q, cs);
1261
0
            rv = apr_pollset_add(event_pollset, &cs->pfd);
1262
0
            if (rv != APR_SUCCESS && !APR_STATUS_IS_EEXIST(rv)) {
1263
0
                AP_DEBUG_ASSERT(0);
1264
0
                TO_QUEUE_REMOVE(cs->sc->wc_q, cs);
1265
0
                apr_thread_mutex_unlock(timeout_mutex);
1266
0
                ap_log_error(APLOG_MARK, APLOG_ERR, rv, ap_server_conf, APLOGNO(03465)
1267
0
                             "process_socket: apr_pollset_add failure in "
1268
0
                             "CONN_STATE_WRITE_COMPLETION");
1269
0
                close_connection(cs);
1270
0
                signal_threads(ST_GRACEFUL);
1271
0
            }
1272
0
            else {
1273
0
                apr_thread_mutex_unlock(timeout_mutex);
1274
0
            }
1275
0
            return;
1276
0
        }
1277
0
        if (pending != DECLINED || c->aborted || c->keepalive != AP_CONN_KEEPALIVE) {
1278
0
            cs->pub.state = CONN_STATE_LINGER;
1279
0
            goto lingering_close;
1280
0
        }
1281
0
        if (ap_run_input_pending(c) == OK) {
1282
0
            goto process_connection;
1283
0
        }
1284
0
        if (listener_may_exit) {
1285
0
            cs->pub.state = CONN_STATE_LINGER;
1286
0
            goto lingering_close;
1287
0
        }
1288
1289
        /* Fall through */
1290
0
        cs->pub.state = CONN_STATE_KEEPALIVE;
1291
0
    }
1292
1293
0
    if (cs->pub.state == CONN_STATE_KEEPALIVE) {
1294
0
        ap_update_child_status(cs->sbh, SERVER_BUSY_KEEPALIVE, NULL);
1295
1296
        /* It greatly simplifies the logic to use a single timeout value per q
1297
         * because the new element can just be added to the end of the list and
1298
         * it will stay sorted in expiration time sequence.  If brand new
1299
         * sockets are sent to the event thread for a readability check, this
1300
         * will be a slight behavior change - they use the non-keepalive
1301
         * timeout today.  With a normal client, the socket will be readable in
1302
         * a few milliseconds anyway.
1303
         */
1304
0
        cs->queue_timestamp = apr_time_now();
1305
0
        notify_suspend(cs);
1306
1307
        /* Add work to pollset. */
1308
0
        cs->pub.sense = CONN_SENSE_DEFAULT;
1309
0
        update_reqevents_from_sense(cs, CONN_SENSE_WANT_READ);
1310
0
        apr_thread_mutex_lock(timeout_mutex);
1311
0
        TO_QUEUE_APPEND(cs->sc->ka_q, cs);
1312
0
        rv = apr_pollset_add(event_pollset, &cs->pfd);
1313
0
        if (rv != APR_SUCCESS && !APR_STATUS_IS_EEXIST(rv)) {
1314
0
            AP_DEBUG_ASSERT(0);
1315
0
            TO_QUEUE_REMOVE(cs->sc->ka_q, cs);
1316
0
            apr_thread_mutex_unlock(timeout_mutex);
1317
0
            ap_log_error(APLOG_MARK, APLOG_ERR, rv, ap_server_conf, APLOGNO(03093)
1318
0
                         "process_socket: apr_pollset_add failure for "
1319
0
                         "keep alive");
1320
0
            close_connection(cs);
1321
0
            signal_threads(ST_GRACEFUL);
1322
0
        }
1323
0
        else {
1324
0
            apr_thread_mutex_unlock(timeout_mutex);
1325
0
        }
1326
0
        return;
1327
0
    }
1328
1329
0
    if (cs->pub.state == CONN_STATE_SUSPENDED) {
1330
0
        cs->c->suspended_baton = cs;
1331
0
        apr_atomic_inc32(&suspended_count);
1332
0
        notify_suspend(cs);
1333
0
        return;
1334
0
    }
1335
1336
0
 lingering_close:
1337
    /* CONN_STATE_LINGER[_*] fall through process_lingering_close() */
1338
0
    process_lingering_close(cs);
1339
0
}
1340
1341
/* Put a SUSPENDED connection back into a queue. */
1342
static apr_status_t event_resume_suspended (conn_rec *c)
1343
0
{
1344
0
    event_conn_state_t* cs = (event_conn_state_t*) c->suspended_baton;
1345
0
    if (cs == NULL) {
1346
0
        ap_log_cerror (APLOG_MARK, LOG_WARNING, 0, c, APLOGNO(02615)
1347
0
                "event_resume_suspended: suspended_baton is NULL");
1348
0
        return APR_EGENERAL;
1349
0
    } else if (!cs->suspended) {
1350
0
        ap_log_cerror (APLOG_MARK, LOG_WARNING, 0, c, APLOGNO(02616)
1351
0
                "event_resume_suspended: Thread isn't suspended");
1352
0
        return APR_EGENERAL;
1353
0
    }
1354
0
    apr_atomic_dec32(&suspended_count);
1355
0
    c->suspended_baton = NULL;
1356
1357
0
    if (!CONN_STATE_IS_LINGERING_CLOSE(cs->pub.state)) {
1358
0
        cs->queue_timestamp = apr_time_now();
1359
0
        notify_suspend(cs);
1360
1361
0
        cs->pub.sense = CONN_SENSE_DEFAULT;
1362
0
        cs->pub.state = CONN_STATE_WRITE_COMPLETION;
1363
0
        update_reqevents_from_sense(cs, CONN_SENSE_WANT_WRITE);
1364
0
        apr_thread_mutex_lock(timeout_mutex);
1365
0
        TO_QUEUE_APPEND(cs->sc->wc_q, cs);
1366
0
        apr_pollset_add(event_pollset, &cs->pfd);
1367
0
        apr_thread_mutex_unlock(timeout_mutex);
1368
0
    }
1369
0
    else {
1370
0
        process_lingering_close(cs);
1371
0
    }
1372
1373
0
    return OK;
1374
0
}
1375
1376
/* conns_this_child has gone to zero or below.  See if the admin coded
1377
   "MaxConnectionsPerChild 0", and keep going in that case.  Doing it this way
1378
   simplifies the hot path in worker_thread */
1379
static void check_infinite_requests(void)
1380
0
{
1381
0
    if (ap_max_requests_per_child) {
1382
0
        ap_log_error(APLOG_MARK, APLOG_TRACE1, 0, ap_server_conf,
1383
0
                     "Stopping process due to MaxConnectionsPerChild");
1384
0
        signal_threads(ST_GRACEFUL);
1385
0
    }
1386
    /* keep going */
1387
0
    conns_this_child = APR_INT32_MAX;
1388
0
}
1389
1390
static int close_listeners(int *closed)
1391
0
{
1392
0
    if (!*closed) {
1393
0
        int i;
1394
1395
0
        ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
1396
0
                     "closing listeners (connection_count=%u)",
1397
0
                     apr_atomic_read32(&connection_count));
1398
0
        ap_close_listeners_ex(my_bucket->listeners);
1399
1400
0
        dying = 1;
1401
0
        ap_scoreboard_image->parent[ap_child_slot].quiescing = 1;
1402
0
        for (i = 0; i < threads_per_child; ++i) {
1403
0
            ap_update_child_status_from_indexes(ap_child_slot, i,
1404
0
                                                SERVER_GRACEFUL, NULL);
1405
0
        }
1406
        /* wake up the main thread */
1407
0
        kill(ap_my_pid, SIGTERM);
1408
1409
0
        ap_queue_info_free_idle_pools(worker_queue_info);
1410
0
        ap_queue_interrupt_all(worker_queue);
1411
1412
0
        *closed = 1; /* once */
1413
0
        return 1;
1414
0
    }
1415
1416
0
    ap_log_error(APLOG_MARK, APLOG_TRACE6, 0, ap_server_conf,
1417
0
                 "closed listeners (connection_count=%u)",
1418
0
                 apr_atomic_read32(&connection_count));
1419
0
    return 0;
1420
0
}
1421
1422
static void unblock_signal(int sig)
1423
0
{
1424
0
    sigset_t sig_mask;
1425
1426
0
    sigemptyset(&sig_mask);
1427
0
    sigaddset(&sig_mask, sig);
1428
#if defined(SIGPROCMASK_SETS_THREAD_MASK)
1429
    sigprocmask(SIG_UNBLOCK, &sig_mask, NULL);
1430
#else
1431
0
    pthread_sigmask(SIG_UNBLOCK, &sig_mask, NULL);
1432
0
#endif
1433
0
}
1434
1435
static void dummy_signal_handler(int sig)
1436
0
{
1437
    /* XXX If specifying SIG_IGN is guaranteed to unblock a syscall,
1438
     *     then we don't need this goofy function.
1439
     */
1440
0
}
1441
1442
1443
#if HAVE_SERF
1444
static apr_status_t s_socket_add(void *user_baton,
1445
                                 apr_pollfd_t *pfd,
1446
                                 void *serf_baton)
1447
{
1448
    s_baton_t *s = (s_baton_t*)user_baton;
1449
    /* XXXXX: recycle listener_poll_types */
1450
    listener_poll_type *pt = ap_malloc(sizeof(*pt));
1451
    pt->type = PT_SERF;
1452
    pt->baton = serf_baton;
1453
    pfd->client_data = pt;
1454
    return apr_pollset_add(s->pollset, pfd);
1455
}
1456
1457
static apr_status_t s_socket_remove(void *user_baton,
1458
                                    apr_pollfd_t *pfd,
1459
                                    void *serf_baton)
1460
{
1461
    s_baton_t *s = (s_baton_t*)user_baton;
1462
    listener_poll_type *pt = pfd->client_data;
1463
    free(pt);
1464
    return apr_pollset_remove(s->pollset, pfd);
1465
}
1466
#endif
1467
1468
#if HAVE_SERF
1469
static void init_serf(apr_pool_t *p)
1470
{
1471
    s_baton_t *baton = NULL;
1472
1473
    baton = apr_pcalloc(p, sizeof(*baton));
1474
    baton->pollset = event_pollset;
1475
    /* TODO: subpools, threads, reuse, etc.  -- currently use malloc() inside :( */
1476
    baton->pool = p;
1477
1478
    g_serf = serf_context_create_ex(baton,
1479
                                    s_socket_add,
1480
                                    s_socket_remove, p);
1481
1482
    ap_register_provider(p, "mpm_serf",
1483
                         "instance", "0", g_serf);
1484
}
1485
#endif
1486
1487
static apr_status_t push_timer2worker(timer_event_t* te)
1488
0
{
1489
0
    return ap_queue_push_timer(worker_queue, te);
1490
0
}
1491
1492
/*
1493
 * Pre-condition: cs is neither in event_pollset nor a timeout queue
1494
 * this function may only be called by the listener
1495
 */
1496
static apr_status_t push2worker(event_conn_state_t *cs, apr_socket_t *csd,
1497
                                apr_pool_t *ptrans)
1498
0
{
1499
0
    apr_status_t rc;
1500
1501
0
    if (cs) {
1502
0
        csd = cs->pfd.desc.s;
1503
0
        ptrans = cs->p;
1504
0
    }
1505
0
    rc = ap_queue_push_socket(worker_queue, csd, cs, ptrans);
1506
0
    if (rc != APR_SUCCESS) {
1507
0
        ap_log_error(APLOG_MARK, APLOG_CRIT, rc, ap_server_conf, APLOGNO(00471)
1508
0
                     "push2worker: ap_queue_push_socket failed");
1509
        /* trash the connection; we couldn't queue the connected
1510
         * socket to a worker
1511
         */
1512
0
        if (cs) {
1513
0
            shutdown_connection(cs);
1514
0
        }
1515
0
        else {
1516
0
            if (csd) {
1517
0
                close_socket_nonblocking(csd);
1518
0
            }
1519
0
            if (ptrans) {
1520
0
                ap_queue_info_push_pool(worker_queue_info, ptrans);
1521
0
            }
1522
0
        }
1523
0
        signal_threads(ST_GRACEFUL);
1524
0
    }
1525
1526
0
    return rc;
1527
0
}
1528
1529
/* get_worker:
1530
 *     If *have_idle_worker_p == 0, reserve a worker thread, and set
1531
 *     *have_idle_worker_p = 1.
1532
 *     If *have_idle_worker_p is already 1, will do nothing.
1533
 *     If blocking == 1, block if all workers are currently busy.
1534
 *     If no worker was available immediately, will set *all_busy to 1.
1535
 *     XXX: If there are no workers, we should not block immediately but
1536
 *     XXX: close all keep-alive connections first.
1537
 */
1538
static void get_worker(int *have_idle_worker_p, int blocking, int *all_busy)
1539
0
{
1540
0
    apr_status_t rc;
1541
1542
0
    if (*have_idle_worker_p) {
1543
        /* already reserved a worker thread - must have hit a
1544
         * transient error on a previous pass
1545
         */
1546
0
        return;
1547
0
    }
1548
1549
0
    if (blocking)
1550
0
        rc = ap_queue_info_wait_for_idler(worker_queue_info, all_busy);
1551
0
    else
1552
0
        rc = ap_queue_info_try_get_idler(worker_queue_info);
1553
1554
0
    if (rc == APR_SUCCESS || APR_STATUS_IS_EOF(rc)) {
1555
0
        *have_idle_worker_p = 1;
1556
0
    }
1557
0
    else if (!blocking && rc == APR_EAGAIN) {
1558
0
        *all_busy = 1;
1559
0
    }
1560
0
    else {
1561
0
        ap_log_error(APLOG_MARK, APLOG_ERR, rc, ap_server_conf, APLOGNO(00472)
1562
0
                     "ap_queue_info_wait_for_idler failed.  "
1563
0
                     "Attempting to shutdown process gracefully");
1564
0
        signal_threads(ST_GRACEFUL);
1565
0
    }
1566
0
}
1567
1568
/* Structures to reuse */
1569
static timer_event_t timer_free_ring;
1570
1571
static apr_skiplist *timer_skiplist;
1572
static volatile apr_time_t timers_next_expiry;
1573
1574
/* Same goal as for TIMEOUT_FUDGE_FACTOR (avoid extra poll calls), but applied
1575
 * to timers. Since their timeouts are custom (user defined), we can't be too
1576
 * approximative here (hence using 0.01s).
1577
 */
1578
0
#define EVENT_FUDGE_FACTOR apr_time_from_msec(10)
1579
1580
/* The following compare function is used by apr_skiplist_insert() to keep the
1581
 * elements (timers) sorted and provide O(log n) complexity (this is also true
1582
 * for apr_skiplist_{find,remove}(), but those are not used in MPM event where
1583
 * inserted timers are not searched nor removed, but with apr_skiplist_pop()
1584
 * which does use any compare function).  It is meant to return 0 when a == b,
1585
 * <0 when a < b, and >0 when a > b.  However apr_skiplist_insert() will not
1586
 * add duplicates (i.e. a == b), and apr_skiplist_add() is only available in
1587
 * APR 1.6, yet multiple timers could possibly be created in the same micro-
1588
 * second (duplicates with regard to apr_time_t); therefore we implement the
1589
 * compare function to return +1 instead of 0 when compared timers are equal,
1590
 * thus duplicates are still added after each other (in order of insertion).
1591
 */
1592
static int timer_comp(void *a, void *b)
1593
0
{
1594
0
    apr_time_t t1 = (apr_time_t) ((timer_event_t *)a)->when;
1595
0
    apr_time_t t2 = (apr_time_t) ((timer_event_t *)b)->when;
1596
0
    AP_DEBUG_ASSERT(t1);
1597
0
    AP_DEBUG_ASSERT(t2);
1598
0
    return ((t1 < t2) ? -1 : 1);
1599
0
}
1600
1601
static apr_thread_mutex_t *g_timer_skiplist_mtx;
1602
1603
static timer_event_t * event_get_timer_event(apr_time_t t,
1604
                                             ap_mpm_callback_fn_t *cbfn,
1605
                                             void *baton,
1606
                                             int insert, 
1607
                                             apr_array_header_t *pfds)
1608
0
{
1609
0
    timer_event_t *te;
1610
0
    apr_time_t now = (t < 0) ? 0 : apr_time_now();
1611
1612
    /* oh yeah, and make locking smarter/fine grained. */
1613
1614
0
    apr_thread_mutex_lock(g_timer_skiplist_mtx);
1615
1616
0
    if (!APR_RING_EMPTY(&timer_free_ring.link, timer_event_t, link)) {
1617
0
        te = APR_RING_FIRST(&timer_free_ring.link);
1618
0
        APR_RING_REMOVE(te, link);
1619
0
    }
1620
0
    else {
1621
0
        te = apr_skiplist_alloc(timer_skiplist, sizeof(timer_event_t));
1622
0
        APR_RING_ELEM_INIT(te, link);
1623
0
    }
1624
1625
0
    te->cbfunc = cbfn;
1626
0
    te->baton = baton;
1627
0
    te->canceled = 0;
1628
0
    te->when = now + t;
1629
0
    te->pfds = pfds;
1630
1631
0
    if (insert) { 
1632
0
        apr_time_t next_expiry;
1633
1634
        /* Okay, add sorted by when.. */
1635
0
        apr_skiplist_insert(timer_skiplist, te);
1636
1637
        /* Cheaply update the global timers_next_expiry with this event's
1638
         * if it expires before.
1639
         */
1640
0
        next_expiry = timers_next_expiry;
1641
0
        if (!next_expiry || next_expiry > te->when + EVENT_FUDGE_FACTOR) {
1642
0
            timers_next_expiry = te->when;
1643
            /* Unblock the poll()ing listener for it to update its timeout. */
1644
0
            if (listener_is_wakeable) {
1645
0
                apr_pollset_wakeup(event_pollset);
1646
0
            }
1647
0
        }
1648
0
    }
1649
0
    apr_thread_mutex_unlock(g_timer_skiplist_mtx);
1650
1651
0
    return te;
1652
0
}
1653
1654
static apr_status_t event_register_timed_callback_ex(apr_time_t t,
1655
                                                  ap_mpm_callback_fn_t *cbfn,
1656
                                                  void *baton, 
1657
                                                  apr_array_header_t *pfds)
1658
0
{
1659
0
    event_get_timer_event(t, cbfn, baton, 1, pfds);
1660
0
    return APR_SUCCESS;
1661
0
}
1662
1663
static apr_status_t event_register_timed_callback(apr_time_t t,
1664
                                                  ap_mpm_callback_fn_t *cbfn,
1665
                                                  void *baton)
1666
0
{
1667
0
    event_register_timed_callback_ex(t, cbfn, baton, NULL);
1668
0
    return APR_SUCCESS;
1669
0
}
1670
1671
static apr_status_t event_cleanup_poll_callback(void *data)
1672
0
{
1673
0
    apr_status_t final_rc = APR_SUCCESS;
1674
0
    apr_array_header_t *pfds = data;
1675
0
    int i;
1676
1677
0
    for (i = 0; i < pfds->nelts; i++) {
1678
0
        apr_pollfd_t *pfd = (apr_pollfd_t *)pfds->elts + i;
1679
0
        if (pfd->client_data) {
1680
0
            apr_status_t rc;
1681
0
            rc = apr_pollset_remove(event_pollset, pfd);
1682
0
            if (rc != APR_SUCCESS && !APR_STATUS_IS_NOTFOUND(rc)) {
1683
0
                final_rc = rc;
1684
0
            }
1685
0
            pfd->client_data = NULL;
1686
0
        }
1687
0
    }
1688
1689
0
    return final_rc;
1690
0
}
1691
1692
static apr_status_t event_register_poll_callback_ex(apr_pool_t *p,
1693
                                                const apr_array_header_t *pfds,
1694
                                                ap_mpm_callback_fn_t *cbfn,
1695
                                                ap_mpm_callback_fn_t *tofn,
1696
                                                void *baton,
1697
                                                apr_time_t timeout)
1698
0
{
1699
0
    socket_callback_baton_t *scb = apr_pcalloc(p, sizeof(*scb));
1700
0
    listener_poll_type *pt = apr_palloc(p, sizeof(*pt));
1701
0
    apr_status_t rc, final_rc = APR_SUCCESS;
1702
0
    int i;
1703
1704
0
    pt->type = PT_USER;
1705
0
    pt->baton = scb;
1706
1707
0
    scb->cbfunc = cbfn;
1708
0
    scb->user_baton = baton;
1709
0
    scb->pfds = apr_array_copy(p, pfds);
1710
1711
0
    apr_pool_pre_cleanup_register(p, scb->pfds, event_cleanup_poll_callback);
1712
1713
0
    for (i = 0; i < scb->pfds->nelts; i++) {
1714
0
        apr_pollfd_t *pfd = (apr_pollfd_t *)scb->pfds->elts + i;
1715
0
        if (pfd->reqevents) {
1716
0
            if (pfd->reqevents & APR_POLLIN) {
1717
0
                pfd->reqevents |= APR_POLLHUP;
1718
0
            }
1719
0
            pfd->reqevents |= APR_POLLERR;
1720
0
            pfd->client_data = pt;
1721
0
        }
1722
0
        else {
1723
0
            pfd->client_data = NULL;
1724
0
        }
1725
0
    }
1726
1727
0
    if (timeout > 0) { 
1728
        /* XXX:  This cancel timer event can fire before the pollset is updated */
1729
0
        scb->cancel_event = event_get_timer_event(timeout, tofn, baton, 1, scb->pfds);
1730
0
    }
1731
0
    for (i = 0; i < scb->pfds->nelts; i++) {
1732
0
        apr_pollfd_t *pfd = (apr_pollfd_t *)scb->pfds->elts + i;
1733
0
        if (pfd->client_data) {
1734
0
            rc = apr_pollset_add(event_pollset, pfd);
1735
0
            if (rc != APR_SUCCESS) {
1736
0
                final_rc = rc;
1737
0
            }
1738
0
        }
1739
0
    }
1740
0
    return final_rc;
1741
0
}
1742
1743
static apr_status_t event_register_poll_callback(apr_pool_t *p,
1744
                                                 const apr_array_header_t *pfds,
1745
                                                 ap_mpm_callback_fn_t *cbfn,
1746
                                                 void *baton)
1747
0
{
1748
0
    return event_register_poll_callback_ex(p,
1749
0
                                           pfds,
1750
0
                                           cbfn,
1751
0
                                           NULL, /* no timeout function */
1752
0
                                           baton,
1753
0
                                           0     /* no timeout */);
1754
0
}
1755
1756
/*
1757
 * Flush data and close our side of the connection, then drain incoming data.
1758
 * If the latter would block put the connection in one of the linger timeout
1759
 * queues to be called back when ready, and repeat until it's closed by peer.
1760
 * Only to be called in the worker thread, and since it's in immediate call
1761
 * stack, we can afford a comfortable buffer size to consume data quickly.
1762
 * Pre-condition: cs is not in any timeout queue and not in the pollset,
1763
 *                timeout_mutex is not locked
1764
 */
1765
#define LINGERING_BUF_SIZE (32 * 1024)
1766
static void process_lingering_close(event_conn_state_t *cs)
1767
0
{
1768
0
    apr_socket_t *csd = ap_get_conn_socket(cs->c);
1769
0
    char dummybuf[LINGERING_BUF_SIZE];
1770
0
    apr_size_t nbytes;
1771
0
    apr_status_t rv;
1772
0
    struct timeout_queue *q;
1773
1774
0
    ap_log_cerror(APLOG_MARK, APLOG_TRACE6, 0, cs->c,
1775
0
                  "lingering close from state %i", (int)cs->pub.state);
1776
0
    AP_DEBUG_ASSERT(CONN_STATE_IS_LINGERING_CLOSE(cs->pub.state));
1777
1778
0
    if (!cs->linger_started) {
1779
0
        cs->pub.state = CONN_STATE_LINGER;
1780
0
        cs->linger_started = 1;
1781
1782
        /* defer_lingering_close() may have bumped lingering_count already */
1783
0
        if (!cs->deferred_linger) {
1784
0
            apr_atomic_inc32(&lingering_count);
1785
0
        }
1786
1787
0
        apr_socket_timeout_set(csd, apr_time_from_sec(SECONDS_TO_LINGER));
1788
0
        if (ap_start_lingering_close(cs->c)) {
1789
0
            notify_suspend(cs);
1790
0
            close_connection(cs);
1791
0
            return;
1792
0
        }
1793
        
1794
        /* All nonblocking from now, no need for APR_INCOMPLETE_READ either */
1795
0
        apr_socket_timeout_set(csd, 0);
1796
0
        apr_socket_opt_set(csd, APR_INCOMPLETE_READ, 0);
1797
1798
        /*
1799
         * If some module requested a shortened waiting period, only wait for
1800
         * 2s (SECONDS_TO_LINGER). This is useful for mitigating certain
1801
         * DoS attacks.
1802
         */
1803
0
        if (apr_table_get(cs->c->notes, "short-lingering-close")) {
1804
0
            cs->pub.state = CONN_STATE_LINGER_SHORT;
1805
0
        }
1806
0
        else {
1807
0
            cs->pub.state = CONN_STATE_LINGER_NORMAL;
1808
0
        }
1809
0
        cs->pub.sense = CONN_SENSE_DEFAULT;
1810
0
        notify_suspend(cs);
1811
1812
        /* One timestamp/duration for the whole lingering close time.
1813
         * XXX: This makes the (short_)linger_q not sorted/ordered by expiring
1814
         * timeouts whenever multiple schedules are necessary (EAGAIN below),
1815
         * but we probabaly don't care since these connections do not count
1816
         * for connections_above_limit() and all of them will be killed when
1817
         * busy or gracefully stopping anyway.
1818
         */
1819
0
        cs->queue_timestamp = apr_time_now();
1820
0
    }
1821
1822
0
    do {
1823
0
        nbytes = sizeof(dummybuf);
1824
0
        rv = apr_socket_recv(csd, dummybuf, &nbytes);
1825
0
    } while (rv == APR_SUCCESS);
1826
1827
0
    if (!APR_STATUS_IS_EAGAIN(rv)) {
1828
0
        close_connection(cs);
1829
0
        return;
1830
0
    }
1831
1832
    /* (Re)queue the connection to come back when readable */
1833
0
    update_reqevents_from_sense(cs, CONN_SENSE_WANT_READ);
1834
0
    q = (cs->pub.state == CONN_STATE_LINGER_SHORT) ? short_linger_q : linger_q;
1835
0
    apr_thread_mutex_lock(timeout_mutex);
1836
0
    TO_QUEUE_APPEND(q, cs);
1837
0
    rv = apr_pollset_add(event_pollset, &cs->pfd);
1838
0
    if (rv != APR_SUCCESS && !APR_STATUS_IS_EEXIST(rv)) {
1839
0
        AP_DEBUG_ASSERT(0);
1840
0
        TO_QUEUE_REMOVE(q, cs);
1841
0
        apr_thread_mutex_unlock(timeout_mutex);
1842
0
        ap_log_error(APLOG_MARK, APLOG_ERR, rv, ap_server_conf, APLOGNO(03092)
1843
0
                     "process_lingering_close: apr_pollset_add failure");
1844
0
        close_connection(cs);
1845
0
        signal_threads(ST_GRACEFUL);
1846
0
        return;
1847
0
    }
1848
0
    apr_thread_mutex_unlock(timeout_mutex);
1849
0
}
1850
1851
/* call 'func' for all elements of 'q' above 'expiry'.
1852
 * Pre-condition: timeout_mutex must already be locked
1853
 * Post-condition: timeout_mutex will be locked again
1854
 */
1855
static void process_timeout_queue(struct timeout_queue *q, apr_time_t expiry,
1856
                                  int (*func)(event_conn_state_t *))
1857
0
{
1858
0
    apr_uint32_t total = 0, count;
1859
0
    event_conn_state_t *first, *cs, *last;
1860
0
    struct event_conn_state_t trash;
1861
0
    struct timeout_queue *qp;
1862
0
    apr_status_t rv;
1863
1864
0
    if (!*q->total) {
1865
0
        return;
1866
0
    }
1867
1868
0
    APR_RING_INIT(&trash.timeout_list, event_conn_state_t, timeout_list);
1869
0
    for (qp = q; qp; qp = qp->next) {
1870
0
        count = 0;
1871
0
        cs = first = last = APR_RING_FIRST(&qp->head);
1872
0
        while (cs != APR_RING_SENTINEL(&qp->head, event_conn_state_t,
1873
0
                                       timeout_list)) {
1874
            /* Trash the entry if:
1875
             * - no expiry was given (zero means all), or
1876
             * - it expired (according to the queue timeout), or
1877
             * - the system clock skewed in the past: no entry should be
1878
             *   registered above the given expiry (~now) + the queue
1879
             *   timeout, we won't keep any here (eg. for centuries).
1880
             *
1881
             * Otherwise stop, no following entry will match thanks to the
1882
             * single timeout per queue (entries are added to the end!).
1883
             * This allows maintenance in O(1).
1884
             */
1885
0
            if (expiry && cs->queue_timestamp + qp->timeout > expiry
1886
0
                       && cs->queue_timestamp < expiry + qp->timeout) {
1887
                /* Since this is the next expiring entry of this queue, update
1888
                 * the global queues_next_expiry if it's later than this one.
1889
                 */
1890
0
                apr_time_t elem_expiry = cs->queue_timestamp + qp->timeout;
1891
0
                apr_time_t next_expiry = queues_next_expiry;
1892
0
                if (!next_expiry
1893
0
                        || next_expiry > elem_expiry + TIMEOUT_FUDGE_FACTOR) {
1894
0
                    queues_next_expiry = elem_expiry;
1895
0
                }
1896
0
                break;
1897
0
            }
1898
1899
0
            last = cs;
1900
0
            rv = apr_pollset_remove(event_pollset, &cs->pfd);
1901
0
            if (rv != APR_SUCCESS && !APR_STATUS_IS_NOTFOUND(rv)) {
1902
0
                AP_DEBUG_ASSERT(0);
1903
0
                ap_log_cerror(APLOG_MARK, APLOG_ERR, rv, cs->c, APLOGNO(00473)
1904
0
                              "apr_pollset_remove failed");
1905
0
            }
1906
0
            cs = APR_RING_NEXT(cs, timeout_list);
1907
0
            count++;
1908
0
        }
1909
0
        if (!count)
1910
0
            continue;
1911
1912
0
        APR_RING_UNSPLICE(first, last, timeout_list);
1913
0
        APR_RING_SPLICE_TAIL(&trash.timeout_list, first, last, event_conn_state_t,
1914
0
                             timeout_list);
1915
0
        AP_DEBUG_ASSERT(*q->total >= count && qp->count >= count);
1916
0
        *q->total -= count;
1917
0
        qp->count -= count;
1918
0
        total += count;
1919
0
    }
1920
0
    if (!total)
1921
0
        return;
1922
1923
0
    apr_thread_mutex_unlock(timeout_mutex);
1924
0
    first = APR_RING_FIRST(&trash.timeout_list);
1925
0
    do {
1926
0
        cs = APR_RING_NEXT(first, timeout_list);
1927
0
        TO_QUEUE_ELEM_INIT(first);
1928
0
        func(first);
1929
0
        first = cs;
1930
0
    } while (--total);
1931
0
    apr_thread_mutex_lock(timeout_mutex);
1932
0
}
1933
1934
static void process_keepalive_queue(apr_time_t expiry)
1935
0
{
1936
    /* If all workers are busy, we kill older keep-alive connections so
1937
     * that they may connect to another process.
1938
     */
1939
0
    if (!expiry && *keepalive_q->total) {
1940
0
        ap_log_error(APLOG_MARK, APLOG_TRACE1, 0, ap_server_conf,
1941
0
                     "All workers are busy or dying, will shutdown %u "
1942
0
                     "keep-alive connections", *keepalive_q->total);
1943
0
    }
1944
0
    process_timeout_queue(keepalive_q, expiry, shutdown_connection);
1945
0
}
1946
1947
static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
1948
0
{
1949
0
    apr_status_t rc;
1950
0
    proc_info *ti = dummy;
1951
0
    int process_slot = ti->pslot;
1952
0
    struct process_score *ps = ap_get_scoreboard_process(process_slot);
1953
0
    int closed = 0;
1954
0
    int have_idle_worker = 0;
1955
0
    apr_time_t last_log;
1956
1957
0
    last_log = apr_time_now();
1958
0
    free(ti);
1959
1960
#if HAVE_SERF
1961
    init_serf(apr_thread_pool_get(thd));
1962
#endif
1963
1964
    /* Unblock the signal used to wake this thread up, and set a handler for
1965
     * it.
1966
     */
1967
0
    apr_signal(LISTENER_SIGNAL, dummy_signal_handler);
1968
0
    unblock_signal(LISTENER_SIGNAL);
1969
1970
0
    for (;;) {
1971
0
        timer_event_t *te;
1972
0
        const apr_pollfd_t *out_pfd;
1973
0
        apr_int32_t num = 0;
1974
0
        apr_interval_time_t timeout;
1975
0
        socket_callback_baton_t *user_chain;
1976
0
        apr_time_t now, expiry = -1;
1977
0
        int workers_were_busy = 0;
1978
1979
0
        if (conns_this_child <= 0)
1980
0
            check_infinite_requests();
1981
1982
0
        if (listener_may_exit) {
1983
0
            int first_close = close_listeners(&closed);
1984
1985
0
            if (terminate_mode == ST_UNGRACEFUL
1986
0
                || apr_atomic_read32(&connection_count) == 0)
1987
0
                break;
1988
1989
            /* Don't wait in poll() for the first close (i.e. dying now), we
1990
             * want to maintain the queues and schedule defer_linger_chain ASAP
1991
             * to kill kept-alive connection and shutdown the workers and child
1992
             * faster.
1993
             */
1994
0
            if (first_close) {
1995
0
                goto do_maintenance; /* with expiry == -1 */
1996
0
            }
1997
0
        }
1998
1999
0
        if (APLOGtrace6(ap_server_conf)) {
2000
0
            now = apr_time_now();
2001
            /* trace log status every second */
2002
0
            if (now - last_log > apr_time_from_sec(1)) {
2003
0
                ap_log_error(APLOG_MARK, APLOG_TRACE6, 0, ap_server_conf,
2004
0
                             "connections: %u (waitio:%u write-completion:%u"
2005
0
                             "keep-alive:%u lingering:%u suspended:%u clogged:%u), "
2006
0
                             "workers: %u/%u shutdown",
2007
0
                             apr_atomic_read32(&connection_count),
2008
0
                             apr_atomic_read32(waitio_q->total),
2009
0
                             apr_atomic_read32(write_completion_q->total),
2010
0
                             apr_atomic_read32(keepalive_q->total),
2011
0
                             apr_atomic_read32(&lingering_count),
2012
0
                             apr_atomic_read32(&suspended_count),
2013
0
                             apr_atomic_read32(&clogged_count),
2014
0
                             apr_atomic_read32(&threads_shutdown),
2015
0
                             threads_per_child);
2016
0
                last_log = now;
2017
0
            }
2018
0
        }
2019
2020
#if HAVE_SERF
2021
        rc = serf_context_prerun(g_serf);
2022
        if (rc != APR_SUCCESS) {
2023
            /* TODO: what should we do here? ugh. */
2024
        }
2025
#endif
2026
2027
        /* Start with an infinite poll() timeout and update it according to
2028
         * the next expiring timer or queue entry. If there are none, either
2029
         * the listener is wakeable and it can poll() indefinitely until a wake
2030
         * up occurs, otherwise periodic checks (maintenance, shutdown, ...)
2031
         * must be performed.
2032
         */
2033
0
        now = apr_time_now();
2034
0
        timeout = -1;
2035
2036
        /* Push expired timers to a worker, the first remaining one determines
2037
         * the maximum time to poll() below, if any.
2038
         */
2039
0
        expiry = timers_next_expiry;
2040
0
        if (expiry && expiry < now) {
2041
0
            apr_thread_mutex_lock(g_timer_skiplist_mtx);
2042
0
            while ((te = apr_skiplist_peek(timer_skiplist))) {
2043
0
                if (te->when > now) {
2044
0
                    timers_next_expiry = te->when;
2045
0
                    timeout = te->when - now;
2046
0
                    break;
2047
0
                }
2048
0
                apr_skiplist_pop(timer_skiplist, NULL);
2049
0
                if (!te->canceled) { 
2050
0
                    if (te->pfds) {
2051
                        /* remove all sockets from the pollset */
2052
0
                        apr_pool_cleanup_run(te->pfds->pool, te->pfds,
2053
0
                                             event_cleanup_poll_callback);
2054
0
                    }
2055
0
                    push_timer2worker(te);
2056
0
                }
2057
0
                else {
2058
0
                    APR_RING_INSERT_TAIL(&timer_free_ring.link, te,
2059
0
                                         timer_event_t, link);
2060
0
                }
2061
0
            }
2062
0
            if (!te) {
2063
0
                timers_next_expiry = 0;
2064
0
            }
2065
0
            apr_thread_mutex_unlock(g_timer_skiplist_mtx);
2066
0
        }
2067
2068
        /* Same for queues, use their next expiry, if any. */
2069
0
        expiry = queues_next_expiry;
2070
0
        if (expiry
2071
0
                && (timeout < 0
2072
0
                    || expiry <= now
2073
0
                    || timeout > expiry - now)) {
2074
0
            timeout = expiry > now ? expiry - now : 0;
2075
0
        }
2076
2077
        /* When non-wakeable, don't wait more than 100 ms, in any case. */
2078
0
#define NON_WAKEABLE_POLL_TIMEOUT apr_time_from_msec(100)
2079
0
        if (!listener_is_wakeable
2080
0
                && (timeout < 0
2081
0
                    || timeout > NON_WAKEABLE_POLL_TIMEOUT)) {
2082
0
            timeout = NON_WAKEABLE_POLL_TIMEOUT;
2083
0
        }
2084
0
        else if (timeout > 0) {
2085
            /* apr_pollset_poll() might round down the timeout to milliseconds,
2086
             * let's forcibly round up here to never return before the timeout.
2087
             */
2088
0
            timeout = apr_time_from_msec(
2089
0
                apr_time_as_msec(timeout + apr_time_from_msec(1) - 1)
2090
0
            );
2091
0
        }
2092
2093
0
        ap_log_error(APLOG_MARK, APLOG_TRACE7, 0, ap_server_conf,
2094
0
                     "polling with timeout=%" APR_TIME_T_FMT
2095
0
                     " queues_timeout=%" APR_TIME_T_FMT
2096
0
                     " timers_timeout=%" APR_TIME_T_FMT,
2097
0
                     timeout, queues_next_expiry - now,
2098
0
                     timers_next_expiry - now);
2099
2100
0
        rc = apr_pollset_poll(event_pollset, timeout, &num, &out_pfd);
2101
0
        if (rc != APR_SUCCESS) {
2102
0
            if (!APR_STATUS_IS_EINTR(rc) && !APR_STATUS_IS_TIMEUP(rc)) {
2103
0
                ap_log_error(APLOG_MARK, APLOG_CRIT, rc, ap_server_conf,
2104
0
                             APLOGNO(03267)
2105
0
                             "apr_pollset_poll failed.  Attempting to "
2106
0
                             "shutdown process gracefully");
2107
0
                signal_threads(ST_GRACEFUL);
2108
0
            }
2109
0
            num = 0;
2110
0
        }
2111
2112
0
        if (APLOGtrace7(ap_server_conf)) {
2113
0
            now = apr_time_now();
2114
0
            ap_log_error(APLOG_MARK, APLOG_TRACE7, rc, ap_server_conf,
2115
0
                         "polled with num=%u exit=%d/%d conns=%d"
2116
0
                         " queues_timeout=%" APR_TIME_T_FMT
2117
0
                         " timers_timeout=%" APR_TIME_T_FMT,
2118
0
                         num, listener_may_exit, dying,
2119
0
                         apr_atomic_read32(&connection_count),
2120
0
                         queues_next_expiry - now, timers_next_expiry - now);
2121
0
        }
2122
2123
        /* XXX possible optimization: stash the current time for use as
2124
         * r->request_time for new requests or queues maintenance
2125
         */
2126
2127
0
        for (user_chain = NULL; num; --num, ++out_pfd) {
2128
0
            listener_poll_type *pt = (listener_poll_type *) out_pfd->client_data;
2129
0
            if (pt->type == PT_CSD) {
2130
                /* one of the sockets is readable */
2131
0
                event_conn_state_t *cs = (event_conn_state_t *) pt->baton;
2132
0
                struct timeout_queue *remove_from_q = NULL;
2133
                /* don't wait for a worker for a keepalive request or
2134
                 * lingering close processing. */
2135
0
                int blocking = 0;
2136
2137
0
                switch (cs->pub.state) {
2138
0
                case CONN_STATE_WRITE_COMPLETION:
2139
0
                    remove_from_q = cs->sc->wc_q;
2140
0
                    blocking = 1;
2141
0
                    break;
2142
2143
0
                case CONN_STATE_ASYNC_WAITIO:
2144
0
                    cs->pub.state = CONN_STATE_PROCESSING;
2145
0
                    remove_from_q = cs->sc->io_q;
2146
0
                    blocking = 1;
2147
0
                    break;
2148
2149
0
                case CONN_STATE_KEEPALIVE:
2150
0
                    cs->pub.state = CONN_STATE_PROCESSING;
2151
0
                    remove_from_q = cs->sc->ka_q;
2152
0
                    break;
2153
2154
0
                case CONN_STATE_LINGER_NORMAL:
2155
0
                    remove_from_q = linger_q;
2156
0
                    break;
2157
2158
0
                case CONN_STATE_LINGER_SHORT:
2159
0
                    remove_from_q = short_linger_q;
2160
0
                    break;
2161
2162
0
                default:
2163
0
                    ap_log_error(APLOG_MARK, APLOG_CRIT, rc,
2164
0
                                 ap_server_conf, APLOGNO(03096)
2165
0
                                 "event_loop: unexpected state %d",
2166
0
                                 cs->pub.state);
2167
0
                    ap_assert(0);
2168
0
                }
2169
2170
0
                if (remove_from_q) {
2171
0
                    apr_thread_mutex_lock(timeout_mutex);
2172
0
                    TO_QUEUE_REMOVE(remove_from_q, cs);
2173
0
                    rc = apr_pollset_remove(event_pollset, &cs->pfd);
2174
0
                    apr_thread_mutex_unlock(timeout_mutex);
2175
                    /*
2176
                     * Some of the pollset backends, like KQueue or Epoll
2177
                     * automagically remove the FD if the socket is closed,
2178
                     * therefore, we can accept _SUCCESS or _NOTFOUND,
2179
                     * and we still want to keep going
2180
                     */
2181
0
                    if (rc != APR_SUCCESS && !APR_STATUS_IS_NOTFOUND(rc)) {
2182
0
                        AP_DEBUG_ASSERT(0);
2183
0
                        ap_log_error(APLOG_MARK, APLOG_ERR, rc, ap_server_conf,
2184
0
                                     APLOGNO(03094) "pollset remove failed");
2185
0
                        close_connection(cs);
2186
0
                        signal_threads(ST_GRACEFUL);
2187
0
                        break;
2188
0
                    }
2189
2190
                    /* If we don't get a worker immediately (nonblocking), we
2191
                     * close the connection; the client can re-connect to a
2192
                     * different process for keepalive, and for lingering close
2193
                     * the connection will be shutdown so the choice is to favor
2194
                     * incoming/alive connections.
2195
                     */
2196
0
                    get_worker(&have_idle_worker, blocking,
2197
0
                               &workers_were_busy);
2198
0
                    if (!have_idle_worker) {
2199
0
                        shutdown_connection(cs);
2200
0
                    }
2201
0
                    else if (push2worker(cs, NULL, NULL) == APR_SUCCESS) {
2202
0
                        have_idle_worker = 0;
2203
0
                    }
2204
0
                }
2205
0
            }
2206
0
            else if (pt->type == PT_ACCEPT && !listeners_disabled()) {
2207
                /* A Listener Socket is ready for an accept() */
2208
0
                if (workers_were_busy) {
2209
0
                    disable_listensocks();
2210
0
                    ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
2211
0
                                 APLOGNO(03268)
2212
0
                                 "All workers busy, not accepting new conns "
2213
0
                                 "in this process");
2214
0
                }
2215
0
                else if (connections_above_limit(&workers_were_busy)) {
2216
0
                    disable_listensocks();
2217
0
                    ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
2218
0
                                 APLOGNO(03269)
2219
0
                                 "Too many open connections (%u, idlers %u), "
2220
0
                                 "not accepting new conns in this process",
2221
0
                                 apr_atomic_read32(&connection_count),
2222
0
                                 ap_queue_info_num_idlers(worker_queue_info));
2223
0
                }
2224
0
                else if (!listener_may_exit) {
2225
0
                    void *csd = NULL;
2226
0
                    ap_listen_rec *lr = (ap_listen_rec *) pt->baton;
2227
0
                    apr_pool_t *ptrans;         /* Pool for per-transaction stuff */
2228
0
                    ap_queue_info_pop_pool(worker_queue_info, &ptrans);
2229
2230
0
                    if (ptrans == NULL) {
2231
                        /* create a new transaction pool for each accepted socket */
2232
0
                        apr_allocator_t *allocator = NULL;
2233
2234
0
                        rc = apr_allocator_create(&allocator);
2235
0
                        if (rc == APR_SUCCESS) {
2236
0
                            apr_allocator_max_free_set(allocator,
2237
0
                                                       ap_max_mem_free);
2238
0
                            rc = apr_pool_create_ex(&ptrans, pconf, NULL,
2239
0
                                                    allocator);
2240
0
                            if (rc == APR_SUCCESS) {
2241
0
                                apr_pool_tag(ptrans, "transaction");
2242
0
                                apr_allocator_owner_set(allocator, ptrans);
2243
0
                            }
2244
0
                        }
2245
0
                        if (rc != APR_SUCCESS) {
2246
0
                            ap_log_error(APLOG_MARK, APLOG_CRIT, rc,
2247
0
                                         ap_server_conf, APLOGNO(03097)
2248
0
                                         "Failed to create transaction pool");
2249
0
                            if (allocator) {
2250
0
                                apr_allocator_destroy(allocator);
2251
0
                            }
2252
0
                            resource_shortage = 1;
2253
0
                            signal_threads(ST_GRACEFUL);
2254
0
                            continue;
2255
0
                        }
2256
0
                    }
2257
2258
0
                    get_worker(&have_idle_worker, 1, &workers_were_busy);
2259
0
                    rc = lr->accept_func(&csd, lr, ptrans);
2260
2261
                    /* later we trash rv and rely on csd to indicate
2262
                     * success/failure
2263
                     */
2264
0
                    AP_DEBUG_ASSERT(rc == APR_SUCCESS || !csd);
2265
2266
0
                    if (rc == APR_EGENERAL) {
2267
                        /* E[NM]FILE, ENOMEM, etc */
2268
0
                        resource_shortage = 1;
2269
0
                        signal_threads(ST_GRACEFUL);
2270
0
                    }
2271
0
                    else if (ap_accept_error_is_nonfatal(rc)) { 
2272
0
                        ap_log_error(APLOG_MARK, APLOG_DEBUG, rc, ap_server_conf, 
2273
0
                                     "accept() on client socket failed");
2274
0
                    }
2275
2276
0
                    if (csd != NULL) {
2277
0
                        conns_this_child--;
2278
0
                        if (push2worker(NULL, csd, ptrans) == APR_SUCCESS) {
2279
0
                            have_idle_worker = 0;
2280
0
                        }
2281
0
                    }
2282
0
                    else {
2283
0
                        ap_queue_info_push_pool(worker_queue_info, ptrans);
2284
0
                    }
2285
0
                }
2286
0
            }               /* if:else on pt->type */
2287
#if HAVE_SERF
2288
            else if (pt->type == PT_SERF) {
2289
                /* send socket to serf. */
2290
                /* XXXX: this doesn't require get_worker() */
2291
                serf_event_trigger(g_serf, pt->baton, out_pfd);
2292
            }
2293
2294
#endif
2295
0
            else if (pt->type == PT_USER) {
2296
0
                socket_callback_baton_t *baton = pt->baton;
2297
0
                if (baton->cancel_event) {
2298
0
                    baton->cancel_event->canceled = 1;
2299
0
                }
2300
2301
                /* We only signal once per N sockets with this baton,
2302
                 * and after this loop to avoid any race/lifetime issue
2303
                 * with the user callback being called while we handle
2304
                 * the same baton multiple times here.
2305
                 */
2306
0
                if (!baton->signaled) { 
2307
0
                    baton->signaled = 1;
2308
0
                    baton->next = user_chain;
2309
0
                    user_chain = baton;
2310
0
                }
2311
0
            }
2312
0
        } /* for processing poll */
2313
2314
        /* Time to handle user callbacks chained above */
2315
0
        while (user_chain) {
2316
0
            socket_callback_baton_t *baton = user_chain;
2317
0
            user_chain = user_chain->next;
2318
0
            baton->next = NULL;
2319
2320
            /* remove all sockets from the pollset */
2321
0
            apr_pool_cleanup_run(baton->pfds->pool, baton->pfds,
2322
0
                                 event_cleanup_poll_callback);
2323
2324
            /* masquerade as a timer event that is firing */
2325
0
            te = event_get_timer_event(-1 /* fake timer */, 
2326
0
                                       baton->cbfunc, 
2327
0
                                       baton->user_baton, 
2328
0
                                       0, /* don't insert it */
2329
0
                                       NULL /* no associated socket callback */);
2330
0
            push_timer2worker(te);
2331
0
        }
2332
2333
        /* We process the timeout queues here only when the global
2334
         * queues_next_expiry is passed. This happens accurately since
2335
         * adding to the queues (in workers) can only decrease this expiry,
2336
         * while latest ones are only taken into account here (in listener)
2337
         * during queues' processing, with the lock held. This works both
2338
         * with and without wake-ability.
2339
         */
2340
0
        expiry = queues_next_expiry;
2341
0
do_maintenance:
2342
0
        if (expiry && expiry < (now = apr_time_now())) {
2343
0
            ap_log_error(APLOG_MARK, APLOG_TRACE7, 0, ap_server_conf,
2344
0
                         "queues maintenance with timeout=%" APR_TIME_T_FMT,
2345
0
                         expiry > 0 ? expiry - now : -1);
2346
0
            apr_thread_mutex_lock(timeout_mutex);
2347
2348
            /* Steps below will recompute this. */
2349
0
            queues_next_expiry = 0;
2350
2351
            /* Step 1: keepalive queue timeouts are closed */
2352
0
            if (workers_were_busy || dying) {
2353
0
                process_keepalive_queue(0); /* kill'em all \m/ */
2354
0
            }
2355
0
            else {
2356
0
                process_keepalive_queue(now);
2357
0
            }
2358
2359
            /* Step 2: waitio queue timeouts are flushed */
2360
0
            process_timeout_queue(waitio_q, now, defer_lingering_close);
2361
2362
            /* Step 3: write completion queue timeouts are flushed */
2363
0
            process_timeout_queue(write_completion_q, now, defer_lingering_close);
2364
2365
            /* Step 4: normal lingering close queue timeouts are closed */
2366
0
            if (dying && linger_q->timeout > short_linger_q->timeout) {
2367
                /* Dying, force short timeout for normal lingering close */
2368
0
                linger_q->timeout = short_linger_q->timeout;
2369
0
            }
2370
0
            process_timeout_queue(linger_q, now, shutdown_connection);
2371
2372
            /* Step 5: short lingering close queue timeouts are closed */
2373
0
            process_timeout_queue(short_linger_q, now, shutdown_connection);
2374
2375
0
            apr_thread_mutex_unlock(timeout_mutex);
2376
0
            ap_log_error(APLOG_MARK, APLOG_TRACE7, 0, ap_server_conf,
2377
0
                         "queues maintained with timeout=%" APR_TIME_T_FMT,
2378
0
                         queues_next_expiry > now ? queues_next_expiry - now
2379
0
                                                  : -1);
2380
2381
0
            ps->wait_io = apr_atomic_read32(waitio_q->total);
2382
0
            ps->write_completion = apr_atomic_read32(write_completion_q->total);
2383
0
            ps->keep_alive = apr_atomic_read32(keepalive_q->total);
2384
0
            ps->lingering_close = apr_atomic_read32(&lingering_count);
2385
0
            ps->suspended = apr_atomic_read32(&suspended_count);
2386
0
            ps->connections = apr_atomic_read32(&connection_count);
2387
0
        }
2388
0
        else if ((workers_were_busy || dying)
2389
0
                 && apr_atomic_read32(keepalive_q->total)) {
2390
0
            apr_thread_mutex_lock(timeout_mutex);
2391
0
            process_keepalive_queue(0); /* kill'em all \m/ */
2392
0
            apr_thread_mutex_unlock(timeout_mutex);
2393
0
            ps->keep_alive = 0;
2394
0
        }
2395
2396
        /* If there are some lingering closes to defer (to a worker), schedule
2397
         * them now. We might wakeup a worker spuriously if another one empties
2398
         * defer_linger_chain in the meantime, but there also may be no active
2399
         * or all busy workers for an undefined time.  In any case a deferred
2400
         * lingering close can't starve if we do that here since the chain is
2401
         * filled only above in the listener and it's emptied only in the
2402
         * worker(s); thus a NULL here means it will stay so while the listener
2403
         * waits (possibly indefinitely) in poll().
2404
         */
2405
0
        if (defer_linger_chain) {
2406
0
            get_worker(&have_idle_worker, 0, &workers_were_busy);
2407
0
            if (have_idle_worker
2408
0
                    && defer_linger_chain /* re-test */
2409
0
                    && push2worker(NULL, NULL, NULL) == APR_SUCCESS) {
2410
0
                have_idle_worker = 0;
2411
0
            }
2412
0
        }
2413
2414
0
        if (!workers_were_busy && should_enable_listensocks()) {
2415
0
            enable_listensocks();
2416
0
        }
2417
0
    } /* listener main loop */
2418
2419
0
    ap_queue_term(worker_queue);
2420
2421
0
    apr_thread_exit(thd, APR_SUCCESS);
2422
0
    return NULL;
2423
0
}
2424
2425
/*
2426
 * During graceful shutdown, if there are more running worker threads than
2427
 * open connections, exit one worker thread.
2428
 *
2429
 * return 1 if thread should exit, 0 if it should continue running.
2430
 */
2431
static int worker_thread_should_exit_early(void)
2432
0
{
2433
0
    for (;;) {
2434
0
        apr_uint32_t conns = apr_atomic_read32(&connection_count);
2435
0
        apr_uint32_t dead = apr_atomic_read32(&threads_shutdown);
2436
0
        apr_uint32_t newdead;
2437
2438
0
        AP_DEBUG_ASSERT(dead <= threads_per_child);
2439
0
        if (conns >= threads_per_child - dead)
2440
0
            return 0;
2441
2442
0
        newdead = dead + 1;
2443
0
        if (apr_atomic_cas32(&threads_shutdown, newdead, dead) == dead) {
2444
            /*
2445
             * No other thread has exited in the mean time, safe to exit
2446
             * this one.
2447
             */
2448
0
            return 1;
2449
0
        }
2450
0
    }
2451
0
}
2452
2453
/* XXX For ungraceful termination/restart, we definitely don't want to
2454
 *     wait for active connections to finish but we may want to wait
2455
 *     for idle workers to get out of the queue code and release mutexes,
2456
 *     since those mutexes are cleaned up pretty soon and some systems
2457
 *     may not react favorably (i.e., segfault) if operations are attempted
2458
 *     on cleaned-up mutexes.
2459
 */
2460
static void *APR_THREAD_FUNC worker_thread(apr_thread_t * thd, void *dummy)
2461
0
{
2462
0
    proc_info *ti = dummy;
2463
0
    int process_slot = ti->pslot;
2464
0
    int thread_slot = ti->tslot;
2465
0
    apr_status_t rv;
2466
0
    int is_idle = 0;
2467
2468
0
    free(ti);
2469
2470
0
    ap_scoreboard_image->servers[process_slot][thread_slot].pid = ap_my_pid;
2471
0
    ap_scoreboard_image->servers[process_slot][thread_slot].tid = apr_os_thread_current();
2472
0
    ap_scoreboard_image->servers[process_slot][thread_slot].generation = retained->mpm->my_generation;
2473
0
    ap_update_child_status_from_indexes(process_slot, thread_slot,
2474
0
                                        SERVER_STARTING, NULL);
2475
2476
0
    for (;;) {
2477
0
        apr_socket_t *csd = NULL;
2478
0
        event_conn_state_t *cs;
2479
0
        timer_event_t *te = NULL;
2480
0
        apr_pool_t *ptrans;         /* Pool for per-transaction stuff */
2481
2482
0
        if (!is_idle) {
2483
0
            rv = ap_queue_info_set_idle(worker_queue_info, NULL);
2484
0
            if (rv != APR_SUCCESS) {
2485
0
                ap_log_error(APLOG_MARK, APLOG_EMERG, rv, ap_server_conf,
2486
0
                             APLOGNO(03270)
2487
0
                             "ap_queue_info_set_idle failed. Attempting to "
2488
0
                             "shutdown process gracefully.");
2489
0
                signal_threads(ST_GRACEFUL);
2490
0
                break;
2491
0
            }
2492
            /* A new idler may have changed connections_above_limit(),
2493
             * let the listener know and decide.
2494
             */
2495
0
            if (listener_is_wakeable && should_enable_listensocks()) {
2496
0
                apr_pollset_wakeup(event_pollset);
2497
0
            }
2498
0
            is_idle = 1;
2499
0
        }
2500
2501
0
        ap_update_child_status_from_indexes(process_slot, thread_slot,
2502
0
                                            dying ? SERVER_GRACEFUL
2503
0
                                                  : SERVER_READY, NULL);
2504
0
      worker_pop:
2505
0
        if (workers_may_exit) {
2506
0
            break;
2507
0
        }
2508
0
        if (dying && worker_thread_should_exit_early()) {
2509
0
            break;
2510
0
        }
2511
2512
0
        rv = ap_queue_pop_something(worker_queue, &csd, (void **)&cs,
2513
0
                                    &ptrans, &te);
2514
2515
0
        if (rv != APR_SUCCESS) {
2516
            /* We get APR_EOF during a graceful shutdown once all the
2517
             * connections accepted by this server process have been handled.
2518
             */
2519
0
            if (APR_STATUS_IS_EOF(rv)) {
2520
0
                break;
2521
0
            }
2522
            /* We get APR_EINTR whenever ap_queue_pop_*() has been interrupted
2523
             * from an explicit call to ap_queue_interrupt_all(). This allows
2524
             * us to unblock threads stuck in ap_queue_pop_*() when a shutdown
2525
             * is pending.
2526
             *
2527
             * If workers_may_exit is set and this is ungraceful termination/
2528
             * restart, we are bound to get an error on some systems (e.g.,
2529
             * AIX, which sanity-checks mutex operations) since the queue
2530
             * may have already been cleaned up.  Don't log the "error" if
2531
             * workers_may_exit is set.
2532
             */
2533
0
            else if (APR_STATUS_IS_EINTR(rv)) {
2534
0
                goto worker_pop;
2535
0
            }
2536
            /* We got some other error. */
2537
0
            else if (!workers_may_exit) {
2538
0
                ap_log_error(APLOG_MARK, APLOG_CRIT, rv, ap_server_conf,
2539
0
                             APLOGNO(03099) "ap_queue_pop_socket failed");
2540
0
            }
2541
0
            continue;
2542
0
        }
2543
0
        if (te != NULL) {
2544
0
            te->cbfunc(te->baton);
2545
0
            {
2546
0
                apr_thread_mutex_lock(g_timer_skiplist_mtx);
2547
0
                APR_RING_INSERT_TAIL(&timer_free_ring.link, te, timer_event_t, link);
2548
0
                apr_thread_mutex_unlock(g_timer_skiplist_mtx);
2549
0
            }
2550
0
        }
2551
0
        else {
2552
0
            is_idle = 0;
2553
0
            if (csd != NULL) {
2554
0
                worker_sockets[thread_slot] = csd;
2555
0
                process_socket(thd, ptrans, csd, cs, process_slot, thread_slot);
2556
0
                worker_sockets[thread_slot] = NULL;
2557
0
            }
2558
0
        }
2559
2560
        /* If there are deferred lingering closes, handle them now. */
2561
0
        while (!workers_may_exit) {
2562
0
            cs = defer_linger_chain;
2563
0
            if (!cs) {
2564
0
                break;
2565
0
            }
2566
0
            if (apr_atomic_casptr((void *)&defer_linger_chain, cs->chain,
2567
0
                                  cs) != cs) {
2568
                /* Race lost, try again */
2569
0
                continue;
2570
0
            }
2571
0
            cs->chain = NULL;
2572
0
            AP_DEBUG_ASSERT(cs->pub.state == CONN_STATE_LINGER);
2573
2574
0
            worker_sockets[thread_slot] = csd = cs->pfd.desc.s;
2575
0
            process_socket(thd, cs->p, csd, cs, process_slot, thread_slot);
2576
0
            worker_sockets[thread_slot] = NULL;
2577
0
        }
2578
0
    }
2579
2580
0
    ap_update_child_status_from_indexes(process_slot, thread_slot,
2581
0
                                        dying ? SERVER_DEAD
2582
0
                                              : SERVER_GRACEFUL, NULL);
2583
2584
0
    apr_thread_exit(thd, APR_SUCCESS);
2585
0
    return NULL;
2586
0
}
2587
2588
static int check_signal(int signum)
2589
0
{
2590
0
    switch (signum) {
2591
0
    case SIGTERM:
2592
0
    case SIGINT:
2593
0
        return 1;
2594
0
    }
2595
0
    return 0;
2596
0
}
2597
2598
static void create_listener_thread(thread_starter * ts)
2599
0
{
2600
0
    int my_child_num = ts->child_num_arg;
2601
0
    apr_threadattr_t *thread_attr = ts->threadattr;
2602
0
    proc_info *my_info;
2603
0
    apr_status_t rv;
2604
2605
0
    my_info = (proc_info *) ap_malloc(sizeof(proc_info));
2606
0
    my_info->pslot = my_child_num;
2607
0
    my_info->tslot = -1;      /* listener thread doesn't have a thread slot */
2608
0
    rv = ap_thread_create(&ts->listener, thread_attr, listener_thread,
2609
0
                          my_info, pruntime);
2610
0
    if (rv != APR_SUCCESS) {
2611
0
        ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf, APLOGNO(00474)
2612
0
                     "ap_thread_create: unable to create listener thread");
2613
        /* let the parent decide how bad this really is */
2614
0
        clean_child_exit(APEXIT_CHILDSICK);
2615
0
    }
2616
0
    apr_os_thread_get(&listener_os_thread, ts->listener);
2617
0
}
2618
2619
static void setup_threads_runtime(void)
2620
0
{
2621
0
    apr_status_t rv;
2622
0
    ap_listen_rec *lr;
2623
0
    apr_pool_t *pskip = NULL;
2624
0
    int max_recycled_pools = -1, i;
2625
0
    const int good_methods[] = { APR_POLLSET_KQUEUE,
2626
0
                                 APR_POLLSET_PORT,
2627
0
                                 APR_POLLSET_EPOLL };
2628
    /* XXX: K-A or lingering close connection included in the async factor */
2629
0
    const apr_uint32_t async_factor = worker_factor / WORKER_FACTOR_SCALE;
2630
0
    const apr_uint32_t pollset_size = (apr_uint32_t)num_listensocks +
2631
0
                                      (apr_uint32_t)threads_per_child *
2632
0
                                      (async_factor > 2 ? async_factor : 2);
2633
0
    int pollset_flags;
2634
2635
    /* Event's skiplist operations will happen concurrently with other modules'
2636
     * runtime so they need their own pool for allocations, and its lifetime
2637
     * should be at least the one of the connections (ptrans). Thus pskip is
2638
     * created as a subpool of pconf like/before ptrans (before so that it's
2639
     * destroyed after). In forked mode pconf is never destroyed so we are good
2640
     * anyway, but in ONE_PROCESS mode this ensures that the skiplist works
2641
     * from connection/ptrans cleanups (even after pchild is destroyed).
2642
     */
2643
0
    apr_pool_create(&pskip, pconf);
2644
0
    apr_pool_tag(pskip, "mpm_skiplist");
2645
0
    apr_thread_mutex_create(&g_timer_skiplist_mtx, APR_THREAD_MUTEX_DEFAULT, pskip);
2646
0
    APR_RING_INIT(&timer_free_ring.link, timer_event_t, link);
2647
0
    apr_skiplist_init(&timer_skiplist, pskip);
2648
0
    apr_skiplist_set_compare(timer_skiplist, timer_comp, timer_comp);
2649
2650
    /* All threads (listener, workers) and synchronization objects (queues,
2651
     * pollset, mutexes...) created here should have at least the lifetime of
2652
     * the connections they handle (i.e. ptrans). We can't use this thread's
2653
     * self pool because all these objects survive it, nor use pchild or pconf
2654
     * directly because this starter thread races with other modules' runtime,
2655
     * nor finally pchild (or subpool thereof) because it is killed explicitly
2656
     * before pconf (thus connections/ptrans can live longer, which matters in
2657
     * ONE_PROCESS mode). So this leaves us with a subpool of pconf, created
2658
     * before any ptrans hence destroyed after.
2659
     */
2660
0
    apr_pool_create(&pruntime, pconf);
2661
0
    apr_pool_tag(pruntime, "mpm_runtime");
2662
2663
    /* We must create the fd queues before we start up the listener
2664
     * and worker threads. */
2665
0
    rv = ap_queue_create(&worker_queue, threads_per_child, pruntime);
2666
0
    if (rv != APR_SUCCESS) {
2667
0
        ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf, APLOGNO(03100)
2668
0
                     "ap_queue_create() failed");
2669
0
        clean_child_exit(APEXIT_CHILDFATAL);
2670
0
    }
2671
2672
0
    if (ap_max_mem_free != APR_ALLOCATOR_MAX_FREE_UNLIMITED) {
2673
        /* If we want to conserve memory, let's not keep an unlimited number of
2674
         * pools & allocators.
2675
         * XXX: This should probably be a separate config directive
2676
         */
2677
0
        max_recycled_pools = threads_per_child * 3 / 4 ;
2678
0
    }
2679
0
    rv = ap_queue_info_create(&worker_queue_info, pruntime,
2680
0
                              threads_per_child, max_recycled_pools);
2681
0
    if (rv != APR_SUCCESS) {
2682
0
        ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf, APLOGNO(03101)
2683
0
                     "ap_queue_info_create() failed");
2684
0
        clean_child_exit(APEXIT_CHILDFATAL);
2685
0
    }
2686
2687
    /* Create the timeout mutex and main pollset before the listener
2688
     * thread starts.
2689
     */
2690
0
    rv = apr_thread_mutex_create(&timeout_mutex, APR_THREAD_MUTEX_DEFAULT,
2691
0
                                 pruntime);
2692
0
    if (rv != APR_SUCCESS) {
2693
0
        ap_log_error(APLOG_MARK, APLOG_ERR, rv, ap_server_conf, APLOGNO(03102)
2694
0
                     "creation of the timeout mutex failed.");
2695
0
        clean_child_exit(APEXIT_CHILDFATAL);
2696
0
    }
2697
2698
    /* Create the main pollset. When APR_POLLSET_WAKEABLE is asked we account
2699
     * for the wakeup pipe explicitely with pollset_size+1 because some pollset
2700
     * implementations don't do it implicitely in APR.
2701
     */
2702
0
    pollset_flags = APR_POLLSET_THREADSAFE | APR_POLLSET_NOCOPY |
2703
0
                    APR_POLLSET_WAKEABLE | APR_POLLSET_NODEFAULT;
2704
0
    for (i = 0; i < sizeof(good_methods) / sizeof(good_methods[0]); i++) {
2705
0
        rv = apr_pollset_create_ex(&event_pollset, pollset_size + 1, pruntime,
2706
0
                                   pollset_flags, good_methods[i]);
2707
0
        if (rv == APR_SUCCESS) {
2708
0
            listener_is_wakeable = 1;
2709
0
            break;
2710
0
        }
2711
0
    }
2712
0
    if (rv != APR_SUCCESS) {
2713
0
        pollset_flags &= ~APR_POLLSET_NODEFAULT;
2714
0
        rv = apr_pollset_create(&event_pollset, pollset_size + 1, pruntime,
2715
0
                                pollset_flags);
2716
0
        if (rv == APR_SUCCESS) {
2717
0
            listener_is_wakeable = 1;
2718
0
        }
2719
0
        else {
2720
0
            pollset_flags &= ~APR_POLLSET_WAKEABLE;
2721
0
            rv = apr_pollset_create(&event_pollset, pollset_size, pruntime,
2722
0
                                    pollset_flags);
2723
0
        }
2724
0
    }
2725
0
    if (rv != APR_SUCCESS) {
2726
0
        ap_log_error(APLOG_MARK, APLOG_ERR, rv, ap_server_conf, APLOGNO(03103)
2727
0
                     "apr_pollset_create with Thread Safety failed.");
2728
0
        clean_child_exit(APEXIT_CHILDFATAL);
2729
0
    }
2730
2731
    /* Add listeners to the main pollset */
2732
0
    listener_pollfd = apr_pcalloc(pruntime, num_listensocks *
2733
0
                                            sizeof(apr_pollfd_t));
2734
0
    for (i = 0, lr = my_bucket->listeners; lr; lr = lr->next, i++) {
2735
0
        apr_pollfd_t *pfd;
2736
0
        listener_poll_type *pt;
2737
2738
0
        AP_DEBUG_ASSERT(i < num_listensocks);
2739
0
        pfd = &listener_pollfd[i];
2740
2741
0
        pfd->reqevents = APR_POLLIN | APR_POLLHUP | APR_POLLERR;
2742
0
#ifdef APR_POLLEXCL
2743
        /* If APR_POLLEXCL is available, use it to prevent the thundering
2744
         * herd issue. The listening sockets are potentially polled by all
2745
         * the children at the same time, when new connections arrive this
2746
         * avoids all of them to be woken up while most would get EAGAIN
2747
         * on accept().
2748
         */
2749
0
        pfd->reqevents |= APR_POLLEXCL;
2750
0
#endif
2751
0
        pfd->desc_type = APR_POLL_SOCKET;
2752
0
        pfd->desc.s = lr->sd;
2753
2754
0
        pt = apr_pcalloc(pruntime, sizeof(*pt));
2755
0
        pfd->client_data = pt;
2756
0
        pt->type = PT_ACCEPT;
2757
0
        pt->baton = lr;
2758
2759
0
        apr_socket_opt_set(pfd->desc.s, APR_SO_NONBLOCK, 1);
2760
0
        apr_pollset_add(event_pollset, pfd);
2761
2762
0
        lr->accept_func = ap_unixd_accept;
2763
0
    }
2764
2765
0
    worker_sockets = apr_pcalloc(pruntime, threads_per_child *
2766
0
                                           sizeof(apr_socket_t *));
2767
0
}
2768
2769
/* XXX under some circumstances not understood, children can get stuck
2770
 *     in start_threads forever trying to take over slots which will
2771
 *     never be cleaned up; for now there is an APLOG_DEBUG message issued
2772
 *     every so often when this condition occurs
2773
 */
2774
static void *APR_THREAD_FUNC start_threads(apr_thread_t * thd, void *dummy)
2775
0
{
2776
0
    thread_starter *ts = dummy;
2777
0
    apr_thread_t **threads = ts->threads;
2778
0
    apr_threadattr_t *thread_attr = ts->threadattr;
2779
0
    int my_child_num = ts->child_num_arg;
2780
0
    proc_info *my_info;
2781
0
    apr_status_t rv;
2782
0
    int threads_created = 0;
2783
0
    int listener_started = 0;
2784
0
    int prev_threads_created;
2785
0
    int loops, i;
2786
2787
0
    ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf, APLOGNO(02471)
2788
0
                 "start_threads: Using %s (%swakeable)",
2789
0
                 apr_pollset_method_name(event_pollset),
2790
0
                 listener_is_wakeable ? "" : "not ");
2791
2792
0
    loops = prev_threads_created = 0;
2793
0
    while (1) {
2794
        /* threads_per_child does not include the listener thread */
2795
0
        for (i = 0; i < threads_per_child; i++) {
2796
0
            int status =
2797
0
                ap_scoreboard_image->servers[my_child_num][i].status;
2798
2799
0
            if (status != SERVER_DEAD) {
2800
0
                continue;
2801
0
            }
2802
2803
0
            my_info = (proc_info *) ap_malloc(sizeof(proc_info));
2804
0
            my_info->pslot = my_child_num;
2805
0
            my_info->tslot = i;
2806
2807
            /* We are creating threads right now */
2808
0
            ap_update_child_status_from_indexes(my_child_num, i,
2809
0
                                                SERVER_STARTING, NULL);
2810
            /* We let each thread update its own scoreboard entry.  This is
2811
             * done because it lets us deal with tid better.
2812
             */
2813
0
            rv = ap_thread_create(&threads[i], thread_attr,
2814
0
                                  worker_thread, my_info, pruntime);
2815
0
            if (rv != APR_SUCCESS) {
2816
0
                ap_update_child_status_from_indexes(my_child_num, i, SERVER_DEAD, NULL);
2817
0
                ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf,
2818
0
                             APLOGNO(03104)
2819
0
                             "ap_thread_create: unable to create worker thread");
2820
                /* Let the parent decide how bad this really is by returning
2821
                 * APEXIT_CHILDSICK. If threads were created already let them
2822
                 * stop cleanly first to avoid deadlocks in clean_child_exit(),
2823
                 * just stop creating new ones here (but set resource_shortage
2824
                 * to return APEXIT_CHILDSICK still when the child exists).
2825
                 */
2826
0
                if (threads_created) {
2827
0
                    resource_shortage = 1;
2828
0
                    signal_threads(ST_GRACEFUL);
2829
0
                    if (!listener_started) {
2830
0
                        workers_may_exit = 1;
2831
0
                        ap_queue_term(worker_queue);
2832
                        /* wake up main POD thread too */
2833
0
                        kill(ap_my_pid, SIGTERM);
2834
0
                    }
2835
0
                    apr_thread_exit(thd, APR_SUCCESS);
2836
0
                    return NULL;
2837
0
                }
2838
0
                clean_child_exit(APEXIT_CHILDSICK);
2839
0
            }
2840
0
            threads_created++;
2841
0
        }
2842
2843
        /* Start the listener only when there are workers available */
2844
0
        if (!listener_started && threads_created) {
2845
0
            create_listener_thread(ts);
2846
0
            listener_started = 1;
2847
0
        }
2848
2849
2850
0
        if (start_thread_may_exit || threads_created == threads_per_child) {
2851
0
            break;
2852
0
        }
2853
        /* wait for previous generation to clean up an entry */
2854
0
        apr_sleep(apr_time_from_sec(1));
2855
0
        ++loops;
2856
0
        if (loops % 120 == 0) { /* every couple of minutes */
2857
0
            if (prev_threads_created == threads_created) {
2858
0
                ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
2859
0
                             APLOGNO(03271)
2860
0
                             "child %" APR_PID_T_FMT " isn't taking over "
2861
0
                             "slots very quickly (%d of %d)",
2862
0
                             ap_my_pid, threads_created,
2863
0
                             threads_per_child);
2864
0
            }
2865
0
            prev_threads_created = threads_created;
2866
0
        }
2867
0
    }
2868
2869
    /* What state should this child_main process be listed as in the
2870
     * scoreboard...?
2871
     *  ap_update_child_status_from_indexes(my_child_num, i, SERVER_STARTING,
2872
     *                                      (request_rec *) NULL);
2873
     *
2874
     *  This state should be listed separately in the scoreboard, in some kind
2875
     *  of process_status, not mixed in with the worker threads' status.
2876
     *  "life_status" is almost right, but it's in the worker's structure, and
2877
     *  the name could be clearer.   gla
2878
     */
2879
0
    apr_thread_exit(thd, APR_SUCCESS);
2880
0
    return NULL;
2881
0
}
2882
2883
static void join_workers(apr_thread_t * listener, apr_thread_t ** threads)
2884
0
{
2885
0
    int i;
2886
0
    apr_status_t rv, thread_rv;
2887
2888
0
    if (listener) {
2889
0
        int iter;
2890
2891
        /* deal with a rare timing window which affects waking up the
2892
         * listener thread...  if the signal sent to the listener thread
2893
         * is delivered between the time it verifies that the
2894
         * listener_may_exit flag is clear and the time it enters a
2895
         * blocking syscall, the signal didn't do any good...  work around
2896
         * that by sleeping briefly and sending it again
2897
         */
2898
2899
0
        iter = 0;
2900
0
        while (!dying) {
2901
0
            apr_sleep(apr_time_from_msec(500));
2902
0
            if (dying || ++iter > 10) {
2903
0
                break;
2904
0
            }
2905
            /* listener has not stopped accepting yet */
2906
0
            ap_log_error(APLOG_MARK, APLOG_TRACE1, 0, ap_server_conf,
2907
0
                         "listener has not stopped accepting yet (%d iter)", iter);
2908
0
            wakeup_listener();
2909
0
        }
2910
0
        if (iter > 10) {
2911
0
            ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf, APLOGNO(00475)
2912
0
                         "the listener thread didn't stop accepting");
2913
0
        }
2914
0
        else {
2915
0
            rv = apr_thread_join(&thread_rv, listener);
2916
0
            if (rv != APR_SUCCESS) {
2917
0
                ap_log_error(APLOG_MARK, APLOG_CRIT, rv, ap_server_conf, APLOGNO(00476)
2918
0
                             "apr_thread_join: unable to join listener thread");
2919
0
            }
2920
0
        }
2921
0
    }
2922
2923
0
    for (i = 0; i < threads_per_child; i++) {
2924
0
        if (threads[i]) {       /* if we ever created this thread */
2925
0
            rv = apr_thread_join(&thread_rv, threads[i]);
2926
0
            if (rv != APR_SUCCESS) {
2927
0
                ap_log_error(APLOG_MARK, APLOG_CRIT, rv, ap_server_conf, APLOGNO(00477)
2928
0
                             "apr_thread_join: unable to join worker "
2929
0
                             "thread %d", i);
2930
0
            }
2931
0
        }
2932
0
    }
2933
0
}
2934
2935
static void join_start_thread(apr_thread_t * start_thread_id)
2936
0
{
2937
0
    apr_status_t rv, thread_rv;
2938
2939
0
    start_thread_may_exit = 1;  /* tell it to give up in case it is still
2940
                                 * trying to take over slots from a
2941
                                 * previous generation
2942
                                 */
2943
0
    rv = apr_thread_join(&thread_rv, start_thread_id);
2944
0
    if (rv != APR_SUCCESS) {
2945
0
        ap_log_error(APLOG_MARK, APLOG_CRIT, rv, ap_server_conf, APLOGNO(00478)
2946
0
                     "apr_thread_join: unable to join the start " "thread");
2947
0
    }
2948
0
}
2949
2950
static void child_main(int child_num_arg, int child_bucket)
2951
0
{
2952
0
    apr_thread_t **threads;
2953
0
    apr_status_t rv;
2954
0
    thread_starter *ts;
2955
0
    apr_threadattr_t *thread_attr;
2956
0
    apr_thread_t *start_thread_id;
2957
0
    int i;
2958
2959
    /* for benefit of any hooks that run as this child initializes */
2960
0
    retained->mpm->mpm_state = AP_MPMQ_STARTING;
2961
2962
0
    ap_my_pid = getpid();
2963
0
    ap_child_slot = child_num_arg;
2964
0
    ap_fatal_signal_child_setup(ap_server_conf);
2965
2966
    /* Get a sub context for global allocations in this child, so that
2967
     * we can have cleanups occur when the child exits.
2968
     */
2969
0
    apr_pool_create(&pchild, pconf);
2970
0
    apr_pool_tag(pchild, "pchild");
2971
2972
0
#if AP_HAS_THREAD_LOCAL
2973
0
    if (!one_process) {
2974
0
        apr_thread_t *thd = NULL;
2975
0
        if ((rv = ap_thread_main_create(&thd, pchild))) {
2976
0
            ap_log_error(APLOG_MARK, APLOG_EMERG, rv, ap_server_conf, APLOGNO(10377)
2977
0
                         "Couldn't initialize child main thread");
2978
0
            clean_child_exit(APEXIT_CHILDFATAL);
2979
0
        }
2980
0
    }
2981
0
#endif
2982
2983
    /* close unused listeners and pods */
2984
0
    for (i = 0; i < retained->mpm->num_buckets; i++) {
2985
0
        if (i != child_bucket) {
2986
0
            ap_close_listeners_ex(retained->buckets[i].listeners);
2987
0
            ap_mpm_podx_close(retained->buckets[i].pod);
2988
0
        }
2989
0
    }
2990
2991
    /*stuff to do before we switch id's, so we have permissions. */
2992
0
    ap_reopen_scoreboard(pchild, NULL, 0);
2993
2994
    /* done with init critical section */
2995
0
    if (ap_run_drop_privileges(pchild, ap_server_conf)) {
2996
0
        clean_child_exit(APEXIT_CHILDFATAL);
2997
0
    }
2998
2999
    /* Just use the standard apr_setup_signal_thread to block all signals
3000
     * from being received.  The child processes no longer use signals for
3001
     * any communication with the parent process. Let's also do this before
3002
     * child_init() hooks are called and possibly create threads that
3003
     * otherwise could "steal" (implicitly) MPM's signals.
3004
     */
3005
0
    rv = apr_setup_signal_thread();
3006
0
    if (rv != APR_SUCCESS) {
3007
0
        ap_log_error(APLOG_MARK, APLOG_EMERG, rv, ap_server_conf, APLOGNO(00479)
3008
0
                     "Couldn't initialize signal thread");
3009
0
        clean_child_exit(APEXIT_CHILDFATAL);
3010
0
    }
3011
3012
    /* For rand() users (e.g. skiplist). */
3013
0
    srand((unsigned int)apr_time_now());
3014
3015
0
    ap_run_child_init(pchild, ap_server_conf);
3016
3017
0
    if (ap_max_requests_per_child) {
3018
0
        conns_this_child = ap_max_requests_per_child;
3019
0
    }
3020
0
    else {
3021
        /* coding a value of zero means infinity */
3022
0
        conns_this_child = APR_INT32_MAX;
3023
0
    }
3024
3025
    /* Setup threads */
3026
3027
    /* Globals used by signal_threads() so to be initialized before */
3028
0
    setup_threads_runtime();
3029
3030
    /* clear the storage; we may not create all our threads immediately,
3031
     * and we want a 0 entry to indicate a thread which was not created
3032
     */
3033
0
    threads = ap_calloc(threads_per_child, sizeof(apr_thread_t *));
3034
0
    ts = apr_palloc(pchild, sizeof(*ts));
3035
3036
0
    apr_threadattr_create(&thread_attr, pchild);
3037
    /* 0 means PTHREAD_CREATE_JOINABLE */
3038
0
    apr_threadattr_detach_set(thread_attr, 0);
3039
3040
0
    if (ap_thread_stacksize != 0) {
3041
0
        rv = apr_threadattr_stacksize_set(thread_attr, ap_thread_stacksize);
3042
0
        if (rv != APR_SUCCESS && rv != APR_ENOTIMPL) {
3043
0
            ap_log_error(APLOG_MARK, APLOG_WARNING, rv, ap_server_conf, APLOGNO(02436)
3044
0
                         "WARNING: ThreadStackSize of %" APR_SIZE_T_FMT " is "
3045
0
                         "inappropriate, using default", 
3046
0
                         ap_thread_stacksize);
3047
0
        }
3048
0
    }
3049
3050
0
    ts->threads = threads;
3051
0
    ts->listener = NULL;
3052
0
    ts->child_num_arg = child_num_arg;
3053
0
    ts->threadattr = thread_attr;
3054
3055
0
    rv = ap_thread_create(&start_thread_id, thread_attr, start_threads,
3056
0
                          ts, pchild);
3057
0
    if (rv != APR_SUCCESS) {
3058
0
        ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf, APLOGNO(00480)
3059
0
                     "ap_thread_create: unable to create worker thread");
3060
        /* let the parent decide how bad this really is */
3061
0
        clean_child_exit(APEXIT_CHILDSICK);
3062
0
    }
3063
3064
0
    retained->mpm->mpm_state = AP_MPMQ_RUNNING;
3065
3066
    /* If we are only running in one_process mode, we will want to
3067
     * still handle signals. */
3068
0
    if (one_process) {
3069
        /* Block until we get a terminating signal. */
3070
0
        apr_signal_thread(check_signal);
3071
        /* make sure the start thread has finished; signal_threads()
3072
         * and join_workers() depend on that
3073
         */
3074
        /* XXX join_start_thread() won't be awakened if one of our
3075
         *     threads encounters a critical error and attempts to
3076
         *     shutdown this child
3077
         */
3078
0
        join_start_thread(start_thread_id);
3079
3080
        /* helps us terminate a little more quickly than the dispatch of the
3081
         * signal thread; beats the Pipe of Death and the browsers
3082
         */
3083
0
        signal_threads(ST_UNGRACEFUL);
3084
3085
        /* A terminating signal was received. Now join each of the
3086
         * workers to clean them up.
3087
         *   If the worker already exited, then the join frees
3088
         *   their resources and returns.
3089
         *   If the worker hasn't exited, then this blocks until
3090
         *   they have (then cleans up).
3091
         */
3092
0
        join_workers(ts->listener, threads);
3093
0
    }
3094
0
    else {                      /* !one_process */
3095
        /* remove SIGTERM from the set of blocked signals...  if one of
3096
         * the other threads in the process needs to take us down
3097
         * (e.g., for MaxConnectionsPerChild) it will send us SIGTERM
3098
         */
3099
0
        apr_signal(SIGTERM, dummy_signal_handler);
3100
0
        unblock_signal(SIGTERM);
3101
        /* Watch for any messages from the parent over the POD */
3102
0
        while (1) {
3103
0
            rv = ap_mpm_podx_check(my_bucket->pod);
3104
0
            if (rv == AP_MPM_PODX_NORESTART) {
3105
                /* see if termination was triggered while we slept */
3106
0
                switch (terminate_mode) {
3107
0
                case ST_GRACEFUL:
3108
0
                    rv = AP_MPM_PODX_GRACEFUL;
3109
0
                    break;
3110
0
                case ST_UNGRACEFUL:
3111
0
                    rv = AP_MPM_PODX_RESTART;
3112
0
                    break;
3113
0
                }
3114
0
            }
3115
0
            if (rv == AP_MPM_PODX_GRACEFUL || rv == AP_MPM_PODX_RESTART) {
3116
                /* make sure the start thread has finished;
3117
                 * signal_threads() and join_workers depend on that
3118
                 */
3119
0
                join_start_thread(start_thread_id);
3120
0
                signal_threads(rv ==
3121
0
                               AP_MPM_PODX_GRACEFUL ? ST_GRACEFUL : ST_UNGRACEFUL);
3122
0
                break;
3123
0
            }
3124
0
        }
3125
3126
        /* A terminating signal was received. Now join each of the
3127
         * workers to clean them up.
3128
         *   If the worker already exited, then the join frees
3129
         *   their resources and returns.
3130
         *   If the worker hasn't exited, then this blocks until
3131
         *   they have (then cleans up).
3132
         */
3133
0
        ap_log_error(APLOG_MARK, APLOG_TRACE1, 0, ap_server_conf,
3134
0
                     "%s termination received, joining workers",
3135
0
                     rv == AP_MPM_PODX_GRACEFUL ? "graceful" : "ungraceful");
3136
0
        join_workers(ts->listener, threads);
3137
0
        ap_log_error(APLOG_MARK, APLOG_TRACE1, 0, ap_server_conf,
3138
0
                     "%s termination, workers joined, exiting",
3139
0
                     rv == AP_MPM_PODX_GRACEFUL ? "graceful" : "ungraceful");
3140
0
    }
3141
3142
0
    free(threads);
3143
3144
0
    clean_child_exit(resource_shortage ? APEXIT_CHILDSICK : 0);
3145
0
}
3146
3147
static int make_child(server_rec * s, int slot, int bucket)
3148
0
{
3149
0
    int pid;
3150
3151
0
    if (slot + 1 > retained->max_daemon_used) {
3152
0
        retained->max_daemon_used = slot + 1;
3153
0
    }
3154
3155
0
    if (ap_scoreboard_image->parent[slot].pid != 0) {
3156
        /* XXX replace with assert or remove ? */
3157
0
        ap_log_error(APLOG_MARK, APLOG_ERR, 0, ap_server_conf, APLOGNO(03455)
3158
0
                 "BUG: Scoreboard slot %d should be empty but is "
3159
0
                 "in use by pid %" APR_PID_T_FMT,
3160
0
                 slot, ap_scoreboard_image->parent[slot].pid);
3161
0
        return -1;
3162
0
    }
3163
3164
0
    if (one_process) {
3165
0
        my_bucket = &retained->buckets[0];
3166
3167
0
        event_note_child_started(slot, getpid());
3168
0
        child_main(slot, 0);
3169
        /* NOTREACHED */
3170
0
        ap_assert(0);
3171
0
        return -1;
3172
0
    }
3173
3174
0
    if ((pid = fork()) == -1) {
3175
0
        ap_log_error(APLOG_MARK, APLOG_ERR, errno, s, APLOGNO(00481)
3176
0
                     "fork: Unable to fork new process");
3177
3178
        /* fork didn't succeed.  There's no need to touch the scoreboard;
3179
         * if we were trying to replace a failed child process, then
3180
         * server_main_loop() marked its workers SERVER_DEAD, and if
3181
         * we were trying to replace a child process that exited normally,
3182
         * its worker_thread()s left SERVER_DEAD or SERVER_GRACEFUL behind.
3183
         */
3184
3185
        /* In case system resources are maxxed out, we don't want
3186
           Apache running away with the CPU trying to fork over and
3187
           over and over again. */
3188
0
        apr_sleep(apr_time_from_sec(10));
3189
3190
0
        return -1;
3191
0
    }
3192
3193
0
    if (!pid) {
3194
0
#if AP_HAS_THREAD_LOCAL
3195
0
        ap_thread_current_after_fork();
3196
0
#endif
3197
3198
0
        my_bucket = &retained->buckets[bucket];
3199
3200
#ifdef HAVE_BINDPROCESSOR
3201
        /* By default, AIX binds to a single processor.  This bit unbinds
3202
         * children which will then bind to another CPU.
3203
         */
3204
        int status = bindprocessor(BINDPROCESS, (int) getpid(),
3205
                                   PROCESSOR_CLASS_ANY);
3206
        if (status != OK)
3207
            ap_log_error(APLOG_MARK, APLOG_DEBUG, errno,
3208
                         ap_server_conf, APLOGNO(00482)
3209
                         "processor unbind failed");
3210
#endif
3211
0
        RAISE_SIGSTOP(MAKE_CHILD);
3212
3213
0
        apr_signal(SIGTERM, just_die);
3214
0
        child_main(slot, bucket);
3215
        /* NOTREACHED */
3216
0
        ap_assert(0);
3217
0
        return -1;
3218
0
    }
3219
3220
0
    event_note_child_started(slot, pid);
3221
0
    return 0;
3222
0
}
3223
3224
/* start up a bunch of children */
3225
static void startup_children(int number_to_start)
3226
0
{
3227
0
    int i;
3228
3229
0
    for (i = 0; number_to_start && i < server_limit; ++i) {
3230
0
        if (ap_scoreboard_image->parent[i].pid != 0) {
3231
0
            continue;
3232
0
        }
3233
0
        if (make_child(ap_server_conf, i, i % retained->mpm->num_buckets) < 0) {
3234
0
            break;
3235
0
        }
3236
0
        --number_to_start;
3237
0
    }
3238
0
}
3239
3240
static void perform_idle_server_maintenance(int child_bucket,
3241
                                            int *max_daemon_used)
3242
0
{
3243
0
    int num_buckets = retained->mpm->num_buckets;
3244
0
    int idle_thread_count = 0;
3245
0
    process_score *ps;
3246
0
    int free_length = 0;
3247
0
    int free_slots[MAX_SPAWN_RATE];
3248
0
    int last_non_dead = -1;
3249
0
    int active_thread_count = 0;
3250
0
    int i, j;
3251
3252
0
    for (i = 0; i < server_limit; ++i) {
3253
0
        if (num_buckets > 1 && (i % num_buckets) != child_bucket) {
3254
            /* We only care about child_bucket in this call */
3255
0
            continue;
3256
0
        }
3257
0
        if (i >= retained->max_daemon_used &&
3258
0
            free_length == retained->idle_spawn_rate[child_bucket]) {
3259
            /* short cut if all active processes have been examined and
3260
             * enough empty scoreboard slots have been found
3261
             */
3262
0
            break;
3263
0
        }
3264
3265
0
        ps = &ap_scoreboard_image->parent[i];
3266
0
        if (ps->pid != 0) {
3267
0
            int child_threads_active = 0;
3268
0
            if (ps->quiescing == 1) {
3269
0
                ps->quiescing = 2;
3270
0
                retained->active_daemons--;
3271
0
                ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
3272
0
                             "Child %d quiescing: pid %d, gen %d, "
3273
0
                             "active %d/%d, total %d/%d/%d",
3274
0
                             i, (int)ps->pid, (int)ps->generation,
3275
0
                             retained->active_daemons, active_daemons_limit,
3276
0
                             retained->total_daemons, retained->max_daemon_used,
3277
0
                             server_limit);
3278
0
            }
3279
0
            for (j = 0; j < threads_per_child; j++) {
3280
0
                int status = ap_scoreboard_image->servers[i][j].status;
3281
3282
                /* We consider a starting server as idle because we started it
3283
                 * at least a cycle ago, and if it still hasn't finished starting
3284
                 * then we're just going to swamp things worse by forking more.
3285
                 * So we hopefully won't need to fork more if we count it.
3286
                 * This depends on the ordering of SERVER_READY and SERVER_STARTING.
3287
                 */
3288
0
                if (status <= SERVER_READY && !ps->quiescing && !ps->not_accepting
3289
0
                    && ps->generation == retained->mpm->my_generation) {
3290
0
                    ++idle_thread_count;
3291
0
                }
3292
0
                if (status >= SERVER_READY && status < SERVER_GRACEFUL) {
3293
0
                    ++child_threads_active;
3294
0
                }
3295
0
            }
3296
0
            active_thread_count += child_threads_active;
3297
0
            if (child_threads_active == threads_per_child) {
3298
0
                had_healthy_child = 1;
3299
0
            }
3300
0
            last_non_dead = i;
3301
0
        }
3302
0
        else if (free_length < retained->idle_spawn_rate[child_bucket]) {
3303
0
            free_slots[free_length++] = i;
3304
0
        }
3305
0
    }
3306
0
    if (*max_daemon_used < last_non_dead + 1) {
3307
0
        *max_daemon_used = last_non_dead + 1;
3308
0
    }
3309
3310
0
    if (retained->sick_child_detected) {
3311
0
        if (had_healthy_child) {
3312
            /* Assume this is a transient error, even though it may not be.  Leave
3313
             * the server up in case it is able to serve some requests or the
3314
             * problem will be resolved.
3315
             */
3316
0
            retained->sick_child_detected = 0;
3317
0
        }
3318
0
        else if (child_bucket < num_buckets - 1) {
3319
            /* check for had_healthy_child up to the last child bucket */
3320
0
            return;
3321
0
        }
3322
0
        else {
3323
            /* looks like a basket case, as no child ever fully initialized; give up.
3324
             */
3325
0
            retained->mpm->shutdown_pending = 1;
3326
0
            child_fatal = 1;
3327
0
            ap_log_error(APLOG_MARK, APLOG_ALERT, 0,
3328
0
                         ap_server_conf, APLOGNO(02324)
3329
0
                         "A resource shortage or other unrecoverable failure "
3330
0
                         "was encountered before any child process initialized "
3331
0
                         "successfully... httpd is exiting!");
3332
            /* the child already logged the failure details */
3333
0
            return;
3334
0
        }
3335
0
    }
3336
3337
0
    AP_DEBUG_ASSERT(retained->active_daemons <= retained->total_daemons
3338
0
                    && retained->total_daemons <= retained->max_daemon_used
3339
0
                    && retained->max_daemon_used <= server_limit);
3340
3341
0
    if (idle_thread_count > max_spare_threads / num_buckets) {
3342
        /*
3343
         * Child processes that we ask to shut down won't die immediately
3344
         * but may stay around for a long time when they finish their
3345
         * requests. If the server load changes many times, many such
3346
         * gracefully finishing processes may accumulate, filling up the
3347
         * scoreboard. To avoid running out of scoreboard entries, we
3348
         * don't shut down more processes if there are stopping ones
3349
         * already (i.e. active_daemons != total_daemons) and not enough
3350
         * slack space in the scoreboard for a graceful restart.
3351
         *
3352
         * XXX It would be nice if we could
3353
         * XXX - kill processes without keepalive connections first
3354
         * XXX - tell children to stop accepting new connections, and
3355
         * XXX   depending on server load, later be able to resurrect them
3356
         *       or kill them
3357
         */
3358
0
        int do_kill = (retained->active_daemons == retained->total_daemons
3359
0
                       || (server_limit - retained->total_daemons >
3360
0
                           active_daemons_limit));
3361
0
        ap_log_error(APLOG_MARK, APLOG_TRACE5, 0, ap_server_conf,
3362
0
                     "%shutting down one child: "
3363
0
                     "active %d/%d, total %d/%d/%d, "
3364
0
                     "idle threads %d, max workers %d",
3365
0
                     (do_kill) ? "S" : "Not s",
3366
0
                     retained->active_daemons, active_daemons_limit,
3367
0
                     retained->total_daemons, retained->max_daemon_used,
3368
0
                     server_limit, idle_thread_count, max_workers);
3369
0
        if (do_kill) {
3370
0
            ap_mpm_podx_signal(retained->buckets[child_bucket].pod,
3371
0
                               AP_MPM_PODX_GRACEFUL);
3372
0
        }
3373
0
        else {
3374
            /* Wait for dying daemon(s) to exit */
3375
0
        }
3376
0
        retained->idle_spawn_rate[child_bucket] = 1;
3377
0
    }
3378
0
    else if (idle_thread_count < min_spare_threads / num_buckets) {
3379
0
        if (active_thread_count >= max_workers / num_buckets) {
3380
0
            if (0 == idle_thread_count) { 
3381
0
                if (!retained->maxclients_reported) {
3382
0
                    ap_log_error(APLOG_MARK, APLOG_ERR, 0, ap_server_conf, APLOGNO(00484)
3383
0
                                 "server reached MaxRequestWorkers setting, "
3384
0
                                 "consider raising the MaxRequestWorkers "
3385
0
                                 "setting");
3386
0
                    retained->maxclients_reported = 1;
3387
0
                }
3388
0
             }
3389
0
             else { 
3390
0
                if (!retained->near_maxclients_reported) {
3391
0
                    ap_log_error(APLOG_MARK, APLOG_ERR, 0, ap_server_conf, APLOGNO(10159)
3392
0
                            "server is within MinSpareThreads of "
3393
0
                            "MaxRequestWorkers, consider raising the "
3394
0
                            "MaxRequestWorkers setting");
3395
0
                    retained->near_maxclients_reported = 1;
3396
0
                }
3397
0
            }
3398
0
            retained->idle_spawn_rate[child_bucket] = 1;
3399
0
        }
3400
0
        else if (free_length == 0) { /* scoreboard is full, can't fork */
3401
0
            ap_log_error(APLOG_MARK, APLOG_ERR, 0, ap_server_conf, APLOGNO(03490)
3402
0
                         "scoreboard is full, not at MaxRequestWorkers."
3403
0
                         "Increase ServerLimit.");
3404
0
            retained->idle_spawn_rate[child_bucket] = 1;
3405
0
        }
3406
0
        else {
3407
0
            if (free_length > retained->idle_spawn_rate[child_bucket]) {
3408
0
                free_length = retained->idle_spawn_rate[child_bucket];
3409
0
            }
3410
0
            if (free_length + retained->active_daemons > active_daemons_limit) {
3411
0
                if (retained->active_daemons < active_daemons_limit) {
3412
0
                    free_length = active_daemons_limit - retained->active_daemons;
3413
0
                }
3414
0
                else {
3415
0
                    ap_log_error(APLOG_MARK, APLOG_TRACE1, 0, ap_server_conf,
3416
0
                                 "server is at active daemons limit, spawning "
3417
0
                                 "of %d children cancelled: active %d/%d, "
3418
0
                                 "total %d/%d/%d, rate %d", free_length,
3419
0
                                 retained->active_daemons, active_daemons_limit,
3420
0
                                 retained->total_daemons, retained->max_daemon_used,
3421
0
                                 server_limit, retained->idle_spawn_rate[child_bucket]);
3422
                    /* reset the spawning rate and prevent its growth below */
3423
0
                    retained->idle_spawn_rate[child_bucket] = 1;
3424
0
                    ++retained->hold_off_on_exponential_spawning;
3425
0
                    free_length = 0;
3426
0
                }
3427
0
            }
3428
0
            if (retained->idle_spawn_rate[child_bucket] >= 8) {
3429
0
                ap_log_error(APLOG_MARK, APLOG_INFO, 0, ap_server_conf, APLOGNO(00486)
3430
0
                             "server seems busy, (you may need "
3431
0
                             "to increase StartServers, ThreadsPerChild "
3432
0
                             "or Min/MaxSpareThreads), "
3433
0
                             "spawning %d children, there are around %d idle "
3434
0
                             "threads, %d active children, and %d children "
3435
0
                             "that are shutting down", free_length,
3436
0
                             idle_thread_count, retained->active_daemons,
3437
0
                             retained->total_daemons);
3438
0
            }
3439
0
            for (i = 0; i < free_length; ++i) {
3440
0
                int slot = free_slots[i];
3441
0
                if (make_child(ap_server_conf, slot, child_bucket) < 0) {
3442
0
                    continue;
3443
0
                }
3444
0
                if (*max_daemon_used < slot + 1) {
3445
0
                    *max_daemon_used = slot + 1;
3446
0
                }
3447
0
            }
3448
            /* the next time around we want to spawn twice as many if this
3449
             * wasn't good enough, but not if we've just done a graceful
3450
             */
3451
0
            if (retained->hold_off_on_exponential_spawning) {
3452
0
                --retained->hold_off_on_exponential_spawning;
3453
0
            }
3454
0
            else if (retained->idle_spawn_rate[child_bucket]
3455
0
                     < max_spawn_rate_per_bucket) {
3456
0
                int new_rate = retained->idle_spawn_rate[child_bucket] * 2;
3457
0
                if (new_rate > max_spawn_rate_per_bucket) {
3458
0
                    new_rate = max_spawn_rate_per_bucket;
3459
0
                }
3460
0
                retained->idle_spawn_rate[child_bucket] = new_rate;
3461
0
            }
3462
0
        }
3463
0
    }
3464
0
    else {
3465
0
        retained->idle_spawn_rate[child_bucket] = 1;
3466
0
    }
3467
0
}
3468
3469
static void server_main_loop(int remaining_children_to_start)
3470
0
{
3471
0
    int num_buckets = retained->mpm->num_buckets;
3472
0
    int max_daemon_used = 0;
3473
0
    int successive_kills = 0;
3474
0
    int child_slot;
3475
0
    apr_exit_why_e exitwhy;
3476
0
    int status, processed_status;
3477
0
    apr_proc_t pid;
3478
0
    int i;
3479
3480
0
    while (!retained->mpm->restart_pending && !retained->mpm->shutdown_pending) {
3481
0
        ap_wait_or_timeout(&exitwhy, &status, &pid, pconf, ap_server_conf);
3482
3483
0
        if (pid.pid != -1) {
3484
0
            processed_status = ap_process_child_status(&pid, exitwhy, status);
3485
0
            child_slot = ap_find_child_by_pid(&pid);
3486
0
            if (processed_status == APEXIT_CHILDFATAL) {
3487
                /* fix race condition found in PR 39311
3488
                 * A child created at the same time as a graceful happens 
3489
                 * can find the lock missing and create a fatal error.
3490
                 * It is not fatal for the last generation to be in this state.
3491
                 */
3492
0
                if (child_slot < 0
3493
0
                    || ap_get_scoreboard_process(child_slot)->generation
3494
0
                       == retained->mpm->my_generation) {
3495
0
                    retained->mpm->shutdown_pending = 1;
3496
0
                    child_fatal = 1;
3497
                    /*
3498
                     * total_daemons counting will be off now, but as we
3499
                     * are shutting down, that is not an issue anymore.
3500
                     */
3501
0
                    return;
3502
0
                }
3503
0
                else {
3504
0
                    ap_log_error(APLOG_MARK, APLOG_WARNING, 0, ap_server_conf, APLOGNO(00487)
3505
0
                                 "Ignoring fatal error in child of previous "
3506
0
                                 "generation (pid %ld).",
3507
0
                                 (long)pid.pid);
3508
0
                    retained->sick_child_detected = 1;
3509
0
                }
3510
0
            }
3511
0
            else if (processed_status == APEXIT_CHILDSICK) {
3512
                /* tell perform_idle_server_maintenance to check into this
3513
                 * on the next timer pop
3514
                 */
3515
0
                retained->sick_child_detected = 1;
3516
0
            }
3517
            /* non-fatal death... note that it's gone in the scoreboard. */
3518
0
            if (child_slot >= 0) {
3519
0
                event_note_child_stopped(child_slot, 0, 0);
3520
3521
0
                if (processed_status == APEXIT_CHILDSICK) {
3522
                    /* resource shortage, minimize the fork rate */
3523
0
                    retained->idle_spawn_rate[child_slot % num_buckets] = 1;
3524
0
                }
3525
0
                else if (remaining_children_to_start) {
3526
                    /* we're still doing a 1-for-1 replacement of dead
3527
                     * children with new children
3528
                     */
3529
0
                    make_child(ap_server_conf, child_slot,
3530
0
                               child_slot % num_buckets);
3531
0
                    --remaining_children_to_start;
3532
0
                }
3533
0
            }
3534
0
#if APR_HAS_OTHER_CHILD
3535
0
            else if (apr_proc_other_child_alert(&pid, APR_OC_REASON_DEATH,
3536
0
                                                status) == 0) {
3537
                /* handled */
3538
0
            }
3539
0
#endif
3540
0
            else if (retained->mpm->was_graceful) {
3541
                /* Great, we've probably just lost a slot in the
3542
                 * scoreboard.  Somehow we don't know about this child.
3543
                 */
3544
0
                ap_log_error(APLOG_MARK, APLOG_WARNING, 0,
3545
0
                             ap_server_conf, APLOGNO(00488)
3546
0
                             "long lost child came home! (pid %ld)",
3547
0
                             (long) pid.pid);
3548
0
            }
3549
            /* Don't perform idle maintenance when a child dies,
3550
             * only do it when there's a timeout.  Remember only a
3551
             * finite number of children can die, and it's pretty
3552
             * pathological for a lot to die suddenly.  If a child is
3553
             * killed by a signal (faulting) we want to restart it ASAP
3554
             * though, up to 3 successive faults or we stop this until
3555
             * a timeout happens again (to avoid the flood of fork()ed
3556
             * processes that keep being killed early).
3557
             */
3558
0
            if (child_slot < 0 || !APR_PROC_CHECK_SIGNALED(exitwhy)) {
3559
0
                continue;
3560
0
            }
3561
0
            if (++successive_kills >= 3) {
3562
0
                if (successive_kills % 10 == 3) {
3563
0
                    ap_log_error(APLOG_MARK, APLOG_WARNING, 0,
3564
0
                                 ap_server_conf, APLOGNO(10392)
3565
0
                                 "children are killed successively!");
3566
0
                }
3567
0
                continue;
3568
0
            }
3569
0
            ++remaining_children_to_start;
3570
0
        }
3571
0
        else {
3572
0
            successive_kills = 0;
3573
0
        }
3574
3575
0
        if (remaining_children_to_start) {
3576
            /* we hit a 1 second timeout in which none of the previous
3577
             * generation of children needed to be reaped... so assume
3578
             * they're all done, and pick up the slack if any is left.
3579
             */
3580
0
            startup_children(remaining_children_to_start);
3581
0
            remaining_children_to_start = 0;
3582
            /* In any event we really shouldn't do the code below because
3583
             * few of the servers we just started are in the IDLE state
3584
             * yet, so we'd mistakenly create an extra server.
3585
             */
3586
0
            continue;
3587
0
        }
3588
3589
0
        max_daemon_used = 0;
3590
0
        for (i = 0; i < num_buckets; i++) {
3591
0
            perform_idle_server_maintenance(i, &max_daemon_used);
3592
0
        }
3593
0
        retained->max_daemon_used = max_daemon_used;
3594
0
    }
3595
0
}
3596
3597
static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s)
3598
0
{
3599
0
    ap_listen_rec **listen_buckets = NULL;
3600
0
    int num_buckets = retained->mpm->num_buckets;
3601
0
    int remaining_children_to_start;
3602
0
    apr_status_t rv;
3603
0
    int i;
3604
3605
0
    ap_log_pid(pconf, ap_pid_fname);
3606
3607
    /* On first startup create gen_pool to satisfy the lifetime of the
3608
     * parent's PODs and listeners; on restart stop the children from the
3609
     * previous generation and clear gen_pool for the next one.
3610
     */
3611
0
    if (!retained->gen_pool) {
3612
0
        apr_pool_create(&retained->gen_pool, ap_pglobal);
3613
0
    }
3614
0
    else {
3615
0
        if (retained->mpm->was_graceful) {
3616
            /* wake up the children...time to die.  But we'll have more soon */
3617
0
            for (i = 0; i < num_buckets; i++) {
3618
0
                ap_mpm_podx_killpg(retained->buckets[i].pod,
3619
0
                                   active_daemons_limit, AP_MPM_PODX_GRACEFUL);
3620
0
            }
3621
0
        }
3622
0
        else {
3623
            /* Kill 'em all.  Since the child acts the same on the parents SIGTERM
3624
             * and a SIGHUP, we may as well use the same signal, because some user
3625
             * pthreads are stealing signals from us left and right.
3626
             */
3627
0
            for (i = 0; i < num_buckets; i++) {
3628
0
                ap_mpm_podx_killpg(retained->buckets[i].pod,
3629
0
                                   active_daemons_limit, AP_MPM_PODX_RESTART);
3630
0
            }
3631
0
            ap_reclaim_child_processes(1,  /* Start with SIGTERM */
3632
0
                                       event_note_child_stopped);
3633
0
        }
3634
0
        apr_pool_clear(retained->gen_pool);
3635
0
        retained->buckets = NULL;
3636
3637
        /* advance to the next generation */
3638
        /* XXX: we really need to make sure this new generation number isn't in
3639
         * use by any of the previous children.
3640
         */
3641
0
        ++retained->mpm->my_generation;
3642
0
    }
3643
3644
    /* On graceful restart, preserve the scoreboard and the listeners buckets.
3645
     * When ungraceful, clear the scoreboard and set num_buckets to zero to let
3646
     * ap_duplicate_listeners() below determine how many are needed/configured.
3647
     */
3648
0
    if (!retained->mpm->was_graceful) {
3649
0
        if (ap_run_pre_mpm(s->process->pool, SB_SHARED) != OK) {
3650
0
            retained->mpm->mpm_state = AP_MPMQ_STOPPING;
3651
0
            return !OK;
3652
0
        }
3653
0
        num_buckets = (one_process) ? 1 : 0; /* one_process => one bucket */
3654
0
        retained->mpm->num_buckets = 0; /* reset idle_spawn_rate below */
3655
0
    }
3656
3657
    /* Now on for the new generation. */
3658
0
    ap_scoreboard_image->global->running_generation = retained->mpm->my_generation;
3659
0
    ap_unixd_mpm_set_signals(pconf, one_process);
3660
3661
0
    if ((rv = ap_duplicate_listeners(retained->gen_pool, ap_server_conf,
3662
0
                                     &listen_buckets, &num_buckets))) {
3663
0
        ap_log_error(APLOG_MARK, APLOG_CRIT, rv,
3664
0
                     ap_server_conf, APLOGNO(03273)
3665
0
                     "could not duplicate listeners");
3666
0
        return !OK;
3667
0
    }
3668
3669
0
    retained->buckets = apr_pcalloc(retained->gen_pool,
3670
0
                                    num_buckets * sizeof(event_child_bucket));
3671
0
    for (i = 0; i < num_buckets; i++) {
3672
0
        if (!one_process /* no POD in one_process mode */
3673
0
                && (rv = ap_mpm_podx_open(retained->gen_pool,
3674
0
                                          &retained->buckets[i].pod))) {
3675
0
            ap_log_error(APLOG_MARK, APLOG_CRIT, rv,
3676
0
                         ap_server_conf, APLOGNO(03274)
3677
0
                         "could not open pipe-of-death");
3678
0
            return !OK;
3679
0
        }
3680
0
        retained->buckets[i].listeners = listen_buckets[i];
3681
0
    }
3682
3683
0
    if (retained->mpm->max_buckets < num_buckets) {
3684
0
        int new_max, *new_ptr;
3685
0
        new_max = retained->mpm->max_buckets * 2;
3686
0
        if (new_max < num_buckets) {
3687
0
            new_max = num_buckets;
3688
0
        }
3689
0
        new_ptr = (int *)apr_palloc(ap_pglobal, new_max * sizeof(int));
3690
0
        if (retained->mpm->num_buckets) /* idle_spawn_rate NULL at startup */
3691
0
            memcpy(new_ptr, retained->idle_spawn_rate,
3692
0
                   retained->mpm->num_buckets * sizeof(int));
3693
0
        retained->idle_spawn_rate = new_ptr;
3694
0
        retained->mpm->max_buckets = new_max;
3695
0
    }
3696
0
    if (retained->mpm->num_buckets < num_buckets) {
3697
0
        int rate_max = 1;
3698
        /* If new buckets are added, set their idle spawn rate to
3699
         * the highest so far, so that they get filled as quickly
3700
         * as the existing ones.
3701
         */
3702
0
        for (i = 0; i < retained->mpm->num_buckets; i++) {
3703
0
            if (rate_max < retained->idle_spawn_rate[i]) {
3704
0
                rate_max = retained->idle_spawn_rate[i];
3705
0
            }
3706
0
        }
3707
0
        for (/* up to date i */; i < num_buckets; i++) {
3708
0
            retained->idle_spawn_rate[i] = rate_max;
3709
0
        }
3710
0
    }
3711
0
    retained->mpm->num_buckets = num_buckets;
3712
3713
    /* Don't thrash since num_buckets depends on the
3714
     * system and the number of online CPU cores...
3715
     */
3716
0
    if (active_daemons_limit < num_buckets)
3717
0
        active_daemons_limit = num_buckets;
3718
0
    if (ap_daemons_to_start < num_buckets)
3719
0
        ap_daemons_to_start = num_buckets;
3720
    /* We want to create as much children at a time as the number of buckets,
3721
     * so to optimally accept connections (evenly distributed across buckets).
3722
     * Thus min_spare_threads should at least maintain num_buckets children,
3723
     * and max_spare_threads allow num_buckets more children w/o triggering
3724
     * immediately (e.g. num_buckets idle threads margin, one per bucket).
3725
     */
3726
0
    if (min_spare_threads < threads_per_child * (num_buckets - 1) + num_buckets)
3727
0
        min_spare_threads = threads_per_child * (num_buckets - 1) + num_buckets;
3728
0
    if (max_spare_threads < min_spare_threads + (threads_per_child + 1) * num_buckets)
3729
0
        max_spare_threads = min_spare_threads + (threads_per_child + 1) * num_buckets;
3730
3731
0
    max_spawn_rate_per_bucket = (MAX_SPAWN_RATE + num_buckets - 1) / num_buckets;
3732
0
    if (max_spawn_rate_per_bucket < 1) {
3733
0
        max_spawn_rate_per_bucket = 1;
3734
0
    }
3735
3736
    /* If we're doing a graceful_restart then we're going to see a lot
3737
     * of children exiting immediately when we get into the main loop
3738
     * below (because we just sent them AP_SIG_GRACEFUL).  This happens pretty
3739
     * rapidly... and for each one that exits we may start a new one, until
3740
     * there are at least min_spare_threads idle threads, counting across
3741
     * all children.  But we may be permitted to start more children than
3742
     * that, so we'll just keep track of how many we're
3743
     * supposed to start up without the 1 second penalty between each fork.
3744
     */
3745
0
    remaining_children_to_start = ap_daemons_to_start;
3746
0
    if (remaining_children_to_start > active_daemons_limit) {
3747
0
        remaining_children_to_start = active_daemons_limit;
3748
0
    }
3749
0
    if (!retained->mpm->was_graceful) {
3750
0
        startup_children(remaining_children_to_start);
3751
0
        remaining_children_to_start = 0;
3752
0
    }
3753
0
    else {
3754
        /* give the system some time to recover before kicking into
3755
         * exponential mode */
3756
0
        retained->hold_off_on_exponential_spawning = 10;
3757
0
    }
3758
3759
0
    ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf, APLOGNO(00489)
3760
0
                 "%s configured -- resuming normal operations",
3761
0
                 ap_get_server_description());
3762
0
    ap_log_error(APLOG_MARK, APLOG_INFO, 0, ap_server_conf, APLOGNO(00490)
3763
0
                 "Server built: %s", ap_get_server_built());
3764
0
    ap_log_command_line(plog, s);
3765
0
    ap_log_mpm_common(s);
3766
3767
0
    retained->mpm->mpm_state = AP_MPMQ_RUNNING;
3768
3769
0
    server_main_loop(remaining_children_to_start);
3770
0
    retained->mpm->mpm_state = AP_MPMQ_STOPPING;
3771
3772
0
    if (retained->mpm->shutdown_pending && retained->mpm->is_ungraceful) {
3773
        /* Time to shut down:
3774
         * Kill child processes, tell them to call child_exit, etc...
3775
         */
3776
0
        for (i = 0; i < num_buckets; i++) {
3777
0
            ap_mpm_podx_killpg(retained->buckets[i].pod,
3778
0
                               active_daemons_limit, AP_MPM_PODX_RESTART);
3779
0
        }
3780
0
        ap_reclaim_child_processes(1, /* Start with SIGTERM */
3781
0
                                   event_note_child_stopped);
3782
3783
0
        if (!child_fatal) {
3784
            /* cleanup pid file on normal shutdown */
3785
0
            ap_remove_pid(pconf, ap_pid_fname);
3786
0
            ap_log_error(APLOG_MARK, APLOG_NOTICE, 0,
3787
0
                         ap_server_conf, APLOGNO(00491) "caught SIGTERM, shutting down");
3788
0
        }
3789
3790
0
        return DONE;
3791
0
    }
3792
3793
0
    if (retained->mpm->shutdown_pending) {
3794
        /* Time to gracefully shut down:
3795
         * Kill child processes, tell them to call child_exit, etc...
3796
         */
3797
0
        int active_children;
3798
0
        int index;
3799
0
        apr_time_t cutoff = 0;
3800
3801
        /* Close our listeners, and then ask our children to do same */
3802
0
        ap_close_listeners();
3803
0
        for (i = 0; i < num_buckets; i++) {
3804
0
            ap_mpm_podx_killpg(retained->buckets[i].pod,
3805
0
                               active_daemons_limit, AP_MPM_PODX_GRACEFUL);
3806
0
        }
3807
0
        ap_relieve_child_processes(event_note_child_stopped);
3808
3809
0
        if (!child_fatal) {
3810
            /* cleanup pid file on normal shutdown */
3811
0
            ap_remove_pid(pconf, ap_pid_fname);
3812
0
            ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf, APLOGNO(00492)
3813
0
                         "caught " AP_SIG_GRACEFUL_STOP_STRING
3814
0
                         ", shutting down gracefully");
3815
0
        }
3816
3817
0
        if (ap_graceful_shutdown_timeout) {
3818
0
            cutoff = apr_time_now() +
3819
0
                     apr_time_from_sec(ap_graceful_shutdown_timeout);
3820
0
        }
3821
3822
        /* Don't really exit until each child has finished */
3823
0
        retained->mpm->shutdown_pending = 0;
3824
0
        do {
3825
            /* Pause for a second */
3826
0
            apr_sleep(apr_time_from_sec(1));
3827
3828
            /* Relieve any children which have now exited */
3829
0
            ap_relieve_child_processes(event_note_child_stopped);
3830
3831
0
            active_children = 0;
3832
0
            for (index = 0; index < retained->max_daemon_used; ++index) {
3833
0
                if (ap_mpm_safe_kill(MPM_CHILD_PID(index), 0) == APR_SUCCESS) {
3834
0
                    active_children = 1;
3835
                    /* Having just one child is enough to stay around */
3836
0
                    break;
3837
0
                }
3838
0
            }
3839
0
        } while (!retained->mpm->shutdown_pending && active_children &&
3840
0
                 (!ap_graceful_shutdown_timeout || apr_time_now() < cutoff));
3841
3842
        /* We might be here because we received SIGTERM, either
3843
         * way, try and make sure that all of our processes are
3844
         * really dead.
3845
         */
3846
0
        for (i = 0; i < num_buckets; i++) {
3847
0
            ap_mpm_podx_killpg(retained->buckets[i].pod,
3848
0
                               active_daemons_limit, AP_MPM_PODX_RESTART);
3849
0
        }
3850
0
        ap_reclaim_child_processes(1, event_note_child_stopped);
3851
3852
0
        return DONE;
3853
0
    }
3854
3855
    /* we've been told to restart */
3856
0
    if (one_process) {
3857
        /* not worth thinking about */
3858
0
        return DONE;
3859
0
    }
3860
3861
0
    if (!retained->mpm->is_ungraceful) {
3862
0
        ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf, APLOGNO(00493)
3863
0
                     "%s received.  Doing graceful restart",
3864
0
                     AP_SIG_GRACEFUL_STRING);
3865
0
    }
3866
0
    else {
3867
0
        ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf, APLOGNO(00494)
3868
0
                     "SIGHUP received.  Attempting to restart");
3869
0
    }
3870
0
    return OK;
3871
0
}
3872
3873
static void setup_slave_conn(conn_rec *c, void *csd) 
3874
0
{
3875
0
    event_conn_state_t *mcs;
3876
0
    event_conn_state_t *cs;
3877
    
3878
0
    mcs = ap_get_module_config(c->master->conn_config, &mpm_event_module);
3879
    
3880
0
    cs = apr_pcalloc(c->pool, sizeof(*cs));
3881
0
    cs->c = c;
3882
0
    cs->r = NULL;
3883
0
    cs->sc = mcs->sc;
3884
0
    cs->suspended = 0;
3885
0
    cs->p = c->pool;
3886
0
    cs->bucket_alloc = c->bucket_alloc;
3887
0
    cs->pfd = mcs->pfd;
3888
0
    cs->pub = mcs->pub;
3889
0
    cs->pub.state = CONN_STATE_PROCESSING;
3890
0
    cs->pub.sense = CONN_SENSE_DEFAULT;
3891
    
3892
0
    c->cs = &(cs->pub);
3893
0
    ap_set_module_config(c->conn_config, &mpm_event_module, cs);
3894
0
}
3895
3896
static int event_pre_connection(conn_rec *c, void *csd)
3897
0
{
3898
0
    if (c->master && (!c->cs || c->cs == c->master->cs)) {
3899
0
        setup_slave_conn(c, csd);
3900
0
    }
3901
0
    return OK;
3902
0
}
3903
3904
static int event_protocol_switch(conn_rec *c, request_rec *r, server_rec *s,
3905
                                 const char *protocol)
3906
0
{
3907
0
    if (!r && s) {
3908
        /* connection based switching of protocol, set the correct server
3909
         * configuration, so that timeouts, keepalives and such are used
3910
         * for the server that the connection was switched on.
3911
         * Normally, we set this on post_read_request, but on a protocol
3912
         * other than http/1.1, this might never happen.
3913
         */
3914
0
        event_conn_state_t *cs;
3915
        
3916
0
        cs = ap_get_module_config(c->conn_config, &mpm_event_module);
3917
0
        cs->sc = ap_get_module_config(s->module_config, &mpm_event_module);
3918
0
    }
3919
0
    return DECLINED;
3920
0
}
3921
3922
/* This really should be a post_config hook, but the error log is already
3923
 * redirected by that point, so we need to do this in the open_logs phase.
3924
 */
3925
static int event_open_logs(apr_pool_t * p, apr_pool_t * plog,
3926
                           apr_pool_t * ptemp, server_rec * s)
3927
0
{
3928
0
    int startup = 0;
3929
0
    int level_flags = 0;
3930
3931
0
    pconf = p;
3932
3933
    /* the reverse of pre_config, we want this only the first time around */
3934
0
    if (retained->mpm->module_loads == 1) {
3935
0
        startup = 1;
3936
0
        level_flags |= APLOG_STARTUP;
3937
0
    }
3938
3939
0
    if ((num_listensocks = ap_setup_listeners(ap_server_conf)) < 1) {
3940
0
        ap_log_error(APLOG_MARK, APLOG_ALERT | level_flags, 0,
3941
0
                     (startup ? NULL : s), APLOGNO(03272)
3942
0
                     "no listening sockets available, shutting down");
3943
0
        return !OK;
3944
0
    }
3945
3946
0
    return OK;
3947
0
}
3948
3949
static int event_pre_config(apr_pool_t * pconf, apr_pool_t * plog,
3950
                            apr_pool_t * ptemp)
3951
0
{
3952
0
    int no_detach, debug, foreground;
3953
0
    apr_status_t rv;
3954
0
    const char *userdata_key = "mpm_event_module";
3955
0
    int test_atomics = 0;
3956
3957
0
    debug = ap_exists_config_define("DEBUG");
3958
3959
0
    if (debug) {
3960
0
        foreground = one_process = 1;
3961
0
        no_detach = 0;
3962
0
    }
3963
0
    else {
3964
0
        one_process = ap_exists_config_define("ONE_PROCESS");
3965
0
        no_detach = ap_exists_config_define("NO_DETACH");
3966
0
        foreground = ap_exists_config_define("FOREGROUND");
3967
0
    }
3968
3969
0
    retained = ap_retained_data_get(userdata_key);
3970
0
    if (!retained) {
3971
0
        retained = ap_retained_data_create(userdata_key, sizeof(*retained));
3972
0
        retained->mpm = ap_unixd_mpm_get_retained_data();
3973
0
        retained->mpm->baton = retained;
3974
0
        if (retained->mpm->module_loads) {
3975
0
            test_atomics = 1;
3976
0
        }
3977
0
    }
3978
0
    else if (retained->mpm->baton != retained) {
3979
        /* If the MPM changes on restart, be ungraceful */
3980
0
        retained->mpm->baton = retained;
3981
0
        retained->mpm->was_graceful = 0;
3982
0
    }
3983
0
    retained->mpm->mpm_state = AP_MPMQ_STARTING;
3984
0
    ++retained->mpm->module_loads;
3985
3986
    /* test once for correct operation of fdqueue */
3987
0
    if (test_atomics || retained->mpm->module_loads == 2) {
3988
0
        static apr_uint32_t foo1, foo2;
3989
3990
0
        apr_atomic_set32(&foo1, 100);
3991
0
        foo2 = apr_atomic_add32(&foo1, -10);
3992
0
        if (foo2 != 100 || foo1 != 90) {
3993
0
            ap_log_error(APLOG_MARK, APLOG_CRIT, 0, NULL, APLOGNO(02405)
3994
0
                         "atomics not working as expected - add32 of negative number");
3995
0
            return HTTP_INTERNAL_SERVER_ERROR;
3996
0
        }
3997
0
    }
3998
3999
    /* sigh, want this only the second time around */
4000
0
    if (retained->mpm->module_loads == 2) {
4001
0
        rv = apr_pollset_create(&event_pollset, 1, plog,
4002
0
                                APR_POLLSET_THREADSAFE | APR_POLLSET_NOCOPY);
4003
0
        if (rv != APR_SUCCESS) {
4004
0
            ap_log_error(APLOG_MARK, APLOG_CRIT, rv, NULL, APLOGNO(00495)
4005
0
                         "Couldn't create a Thread Safe Pollset. "
4006
0
                         "Is it supported on your platform?"
4007
0
                         "Also check system or user limits!");
4008
0
            return HTTP_INTERNAL_SERVER_ERROR;
4009
0
        }
4010
0
        apr_pollset_destroy(event_pollset);
4011
4012
0
        if (!one_process && !foreground) {
4013
            /* before we detach, setup crash handlers to log to errorlog */
4014
0
            ap_fatal_signal_setup(ap_server_conf, pconf);
4015
0
            rv = apr_proc_detach(no_detach ? APR_PROC_DETACH_FOREGROUND
4016
0
                                 : APR_PROC_DETACH_DAEMONIZE);
4017
0
            if (rv != APR_SUCCESS) {
4018
0
                ap_log_error(APLOG_MARK, APLOG_CRIT, rv, NULL, APLOGNO(00496)
4019
0
                             "apr_proc_detach failed");
4020
0
                return HTTP_INTERNAL_SERVER_ERROR;
4021
0
            }
4022
0
        }
4023
0
    }
4024
4025
0
    parent_pid = ap_my_pid = getpid();
4026
4027
0
    ap_listen_pre_config();
4028
0
    ap_daemons_to_start = DEFAULT_START_DAEMON;
4029
0
    min_spare_threads = DEFAULT_MIN_FREE_DAEMON * DEFAULT_THREADS_PER_CHILD;
4030
0
    max_spare_threads = DEFAULT_MAX_FREE_DAEMON * DEFAULT_THREADS_PER_CHILD;
4031
0
    server_limit = DEFAULT_SERVER_LIMIT;
4032
0
    thread_limit = DEFAULT_THREAD_LIMIT;
4033
0
    active_daemons_limit = server_limit;
4034
0
    threads_per_child = DEFAULT_THREADS_PER_CHILD;
4035
0
    max_workers = active_daemons_limit * threads_per_child;
4036
0
    defer_linger_chain = NULL;
4037
0
    had_healthy_child = 0;
4038
0
    ap_extended_status = 0;
4039
4040
0
    event_pollset = NULL;
4041
0
    worker_queue_info = NULL;
4042
0
    listener_os_thread = NULL;
4043
0
    listensocks_disabled = 0;
4044
0
    listener_is_wakeable = 0;
4045
4046
0
    return OK;
4047
0
}
4048
4049
static int event_post_config(apr_pool_t *pconf, apr_pool_t *plog,
4050
                             apr_pool_t *ptemp, server_rec *s)
4051
0
{
4052
0
    struct {
4053
0
        struct timeout_queue *tail, *q;
4054
0
        apr_hash_t *hash;
4055
0
    } io, wc, ka;
4056
4057
    /* Not needed in pre_config stage */
4058
0
    if (ap_state_query(AP_SQ_MAIN_STATE) == AP_SQ_MS_CREATE_PRE_CONFIG) {
4059
0
        return OK;
4060
0
    }
4061
4062
0
    io.hash = apr_hash_make(ptemp);
4063
0
    wc.hash = apr_hash_make(ptemp);
4064
0
    ka.hash = apr_hash_make(ptemp);
4065
0
    io.tail = wc.tail = ka.tail = NULL;
4066
4067
0
    linger_q = TO_QUEUE_MAKE(pconf, apr_time_from_sec(MAX_SECS_TO_LINGER),
4068
0
                             NULL);
4069
0
    short_linger_q = TO_QUEUE_MAKE(pconf, apr_time_from_sec(SECONDS_TO_LINGER),
4070
0
                                   NULL);
4071
4072
0
    for (; s; s = s->next) {
4073
0
        event_srv_cfg *sc = apr_pcalloc(pconf, sizeof *sc);
4074
4075
0
        ap_set_module_config(s->module_config, &mpm_event_module, sc);
4076
0
        if (!io.tail) {
4077
            /* The main server uses the global queues */
4078
0
            io.q = TO_QUEUE_MAKE(pconf, s->timeout, NULL);
4079
0
            apr_hash_set(io.hash, &s->timeout, sizeof s->timeout, io.q);
4080
0
            io.tail = waitio_q = io.q;
4081
4082
0
            wc.q = TO_QUEUE_MAKE(pconf, s->timeout, NULL);
4083
0
            apr_hash_set(wc.hash, &s->timeout, sizeof s->timeout, wc.q);
4084
0
            wc.tail = write_completion_q = wc.q;
4085
4086
0
            ka.q = TO_QUEUE_MAKE(pconf, s->keep_alive_timeout, NULL);
4087
0
            apr_hash_set(ka.hash, &s->keep_alive_timeout,
4088
0
                         sizeof s->keep_alive_timeout, ka.q);
4089
0
            ka.tail = keepalive_q = ka.q;
4090
0
        }
4091
0
        else {
4092
            /* The vhosts use any existing queue with the same timeout,
4093
             * or their own queue(s) if there isn't */
4094
0
            io.q = apr_hash_get(io.hash, &s->timeout, sizeof s->timeout);
4095
0
            if (!io.q) {
4096
0
                io.q = TO_QUEUE_MAKE(pconf, s->timeout, io.tail);
4097
0
                apr_hash_set(io.hash, &s->timeout, sizeof s->timeout, io.q);
4098
0
                io.tail = io.tail->next = io.q;
4099
0
            }
4100
4101
0
            wc.q = apr_hash_get(wc.hash, &s->timeout, sizeof s->timeout);
4102
0
            if (!wc.q) {
4103
0
                wc.q = TO_QUEUE_MAKE(pconf, s->timeout, wc.tail);
4104
0
                apr_hash_set(wc.hash, &s->timeout, sizeof s->timeout, wc.q);
4105
0
                wc.tail = wc.tail->next = wc.q;
4106
0
            }
4107
4108
0
            ka.q = apr_hash_get(ka.hash, &s->keep_alive_timeout,
4109
0
                                sizeof s->keep_alive_timeout);
4110
0
            if (!ka.q) {
4111
0
                ka.q = TO_QUEUE_MAKE(pconf, s->keep_alive_timeout, ka.tail);
4112
0
                apr_hash_set(ka.hash, &s->keep_alive_timeout,
4113
0
                             sizeof s->keep_alive_timeout, ka.q);
4114
0
                ka.tail = ka.tail->next = ka.q;
4115
0
            }
4116
0
        }
4117
0
        sc->io_q = io.q;
4118
0
        sc->wc_q = wc.q;
4119
0
        sc->ka_q = ka.q;
4120
0
    }
4121
4122
0
    return OK;
4123
0
}
4124
4125
static int event_check_config(apr_pool_t *p, apr_pool_t *plog,
4126
                              apr_pool_t *ptemp, server_rec *s)
4127
0
{
4128
0
    int startup = 0;
4129
4130
    /* the reverse of pre_config, we want this only the first time around */
4131
0
    if (retained->mpm->module_loads == 1) {
4132
0
        startup = 1;
4133
0
    }
4134
4135
0
    if (server_limit > MAX_SERVER_LIMIT) {
4136
0
        if (startup) {
4137
0
            ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00497)
4138
0
                         "WARNING: ServerLimit of %d exceeds compile-time "
4139
0
                         "limit of %d servers, decreasing to %d.",
4140
0
                         server_limit, MAX_SERVER_LIMIT, MAX_SERVER_LIMIT);
4141
0
        } else {
4142
0
            ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00498)
4143
0
                         "ServerLimit of %d exceeds compile-time limit "
4144
0
                         "of %d, decreasing to match",
4145
0
                         server_limit, MAX_SERVER_LIMIT);
4146
0
        }
4147
0
        server_limit = MAX_SERVER_LIMIT;
4148
0
    }
4149
0
    else if (server_limit < 1) {
4150
0
        if (startup) {
4151
0
            ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00499)
4152
0
                         "WARNING: ServerLimit of %d not allowed, "
4153
0
                         "increasing to 1.", server_limit);
4154
0
        } else {
4155
0
            ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00500)
4156
0
                         "ServerLimit of %d not allowed, increasing to 1",
4157
0
                         server_limit);
4158
0
        }
4159
0
        server_limit = 1;
4160
0
    }
4161
4162
    /* you cannot change ServerLimit across a restart; ignore
4163
     * any such attempts
4164
     */
4165
0
    if (!retained->first_server_limit) {
4166
0
        retained->first_server_limit = server_limit;
4167
0
    }
4168
0
    else if (server_limit != retained->first_server_limit) {
4169
        /* don't need a startup console version here */
4170
0
        ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00501)
4171
0
                     "changing ServerLimit to %d from original value of %d "
4172
0
                     "not allowed during restart",
4173
0
                     server_limit, retained->first_server_limit);
4174
0
        server_limit = retained->first_server_limit;
4175
0
    }
4176
4177
0
    if (thread_limit > MAX_THREAD_LIMIT) {
4178
0
        if (startup) {
4179
0
            ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00502)
4180
0
                         "WARNING: ThreadLimit of %d exceeds compile-time "
4181
0
                         "limit of %d threads, decreasing to %d.",
4182
0
                         thread_limit, MAX_THREAD_LIMIT, MAX_THREAD_LIMIT);
4183
0
        } else {
4184
0
            ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00503)
4185
0
                         "ThreadLimit of %d exceeds compile-time limit "
4186
0
                         "of %d, decreasing to match",
4187
0
                         thread_limit, MAX_THREAD_LIMIT);
4188
0
        }
4189
0
        thread_limit = MAX_THREAD_LIMIT;
4190
0
    }
4191
0
    else if (thread_limit < 1) {
4192
0
        if (startup) {
4193
0
            ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00504)
4194
0
                         "WARNING: ThreadLimit of %d not allowed, "
4195
0
                         "increasing to 1.", thread_limit);
4196
0
        } else {
4197
0
            ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00505)
4198
0
                         "ThreadLimit of %d not allowed, increasing to 1",
4199
0
                         thread_limit);
4200
0
        }
4201
0
        thread_limit = 1;
4202
0
    }
4203
4204
    /* you cannot change ThreadLimit across a restart; ignore
4205
     * any such attempts
4206
     */
4207
0
    if (!retained->first_thread_limit) {
4208
0
        retained->first_thread_limit = thread_limit;
4209
0
    }
4210
0
    else if (thread_limit != retained->first_thread_limit) {
4211
        /* don't need a startup console version here */
4212
0
        ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00506)
4213
0
                     "changing ThreadLimit to %d from original value of %d "
4214
0
                     "not allowed during restart",
4215
0
                     thread_limit, retained->first_thread_limit);
4216
0
        thread_limit = retained->first_thread_limit;
4217
0
    }
4218
4219
0
    if (threads_per_child > thread_limit) {
4220
0
        if (startup) {
4221
0
            ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00507)
4222
0
                         "WARNING: ThreadsPerChild of %d exceeds ThreadLimit "
4223
0
                         "of %d threads, decreasing to %d. "
4224
0
                         "To increase, please see the ThreadLimit directive.",
4225
0
                         threads_per_child, thread_limit, thread_limit);
4226
0
        } else {
4227
0
            ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00508)
4228
0
                         "ThreadsPerChild of %d exceeds ThreadLimit "
4229
0
                         "of %d, decreasing to match",
4230
0
                         threads_per_child, thread_limit);
4231
0
        }
4232
0
        threads_per_child = thread_limit;
4233
0
    }
4234
0
    else if (threads_per_child < 1) {
4235
0
        if (startup) {
4236
0
            ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00509)
4237
0
                         "WARNING: ThreadsPerChild of %d not allowed, "
4238
0
                         "increasing to 1.", threads_per_child);
4239
0
        } else {
4240
0
            ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00510)
4241
0
                         "ThreadsPerChild of %d not allowed, increasing to 1",
4242
0
                         threads_per_child);
4243
0
        }
4244
0
        threads_per_child = 1;
4245
0
    }
4246
4247
0
    if (max_workers < threads_per_child) {
4248
0
        if (startup) {
4249
0
            ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00511)
4250
0
                         "WARNING: MaxRequestWorkers of %d is less than "
4251
0
                         "ThreadsPerChild of %d, increasing to %d. "
4252
0
                         "MaxRequestWorkers must be at least as large "
4253
0
                         "as the number of threads in a single server.",
4254
0
                         max_workers, threads_per_child, threads_per_child);
4255
0
        } else {
4256
0
            ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00512)
4257
0
                         "MaxRequestWorkers of %d is less than ThreadsPerChild "
4258
0
                         "of %d, increasing to match",
4259
0
                         max_workers, threads_per_child);
4260
0
        }
4261
0
        max_workers = threads_per_child;
4262
0
    }
4263
4264
0
    active_daemons_limit = max_workers / threads_per_child;
4265
4266
0
    if (max_workers % threads_per_child) {
4267
0
        int tmp_max_workers = active_daemons_limit * threads_per_child;
4268
4269
0
        if (startup) {
4270
0
            ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00513)
4271
0
                         "WARNING: MaxRequestWorkers of %d is not an integer "
4272
0
                         "multiple of ThreadsPerChild of %d, decreasing to nearest "
4273
0
                         "multiple %d, for a maximum of %d servers.",
4274
0
                         max_workers, threads_per_child, tmp_max_workers,
4275
0
                         active_daemons_limit);
4276
0
        } else {
4277
0
            ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00514)
4278
0
                         "MaxRequestWorkers of %d is not an integer multiple "
4279
0
                         "of ThreadsPerChild of %d, decreasing to nearest "
4280
0
                         "multiple %d", max_workers, threads_per_child,
4281
0
                         tmp_max_workers);
4282
0
        }
4283
0
        max_workers = tmp_max_workers;
4284
0
    }
4285
4286
0
    if (active_daemons_limit > server_limit) {
4287
0
        if (startup) {
4288
0
            ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00515)
4289
0
                         "WARNING: MaxRequestWorkers of %d would require %d servers "
4290
0
                         "and would exceed ServerLimit of %d, decreasing to %d. "
4291
0
                         "To increase, please see the ServerLimit directive.",
4292
0
                         max_workers, active_daemons_limit, server_limit,
4293
0
                         server_limit * threads_per_child);
4294
0
        } else {
4295
0
            ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00516)
4296
0
                         "MaxRequestWorkers of %d would require %d servers and "
4297
0
                         "exceed ServerLimit of %d, decreasing to %d",
4298
0
                         max_workers, active_daemons_limit, server_limit,
4299
0
                         server_limit * threads_per_child);
4300
0
        }
4301
0
        active_daemons_limit = server_limit;
4302
0
    }
4303
4304
    /* ap_daemons_to_start > active_daemons_limit checked in ap_mpm_run() */
4305
0
    if (ap_daemons_to_start < 1) {
4306
0
        if (startup) {
4307
0
            ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00517)
4308
0
                         "WARNING: StartServers of %d not allowed, "
4309
0
                         "increasing to 1.", ap_daemons_to_start);
4310
0
        } else {
4311
0
            ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00518)
4312
0
                         "StartServers of %d not allowed, increasing to 1",
4313
0
                         ap_daemons_to_start);
4314
0
        }
4315
0
        ap_daemons_to_start = 1;
4316
0
    }
4317
4318
0
    if (min_spare_threads < 1) {
4319
0
        if (startup) {
4320
0
            ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00519)
4321
0
                         "WARNING: MinSpareThreads of %d not allowed, "
4322
0
                         "increasing to 1 to avoid almost certain server "
4323
0
                         "failure. Please read the documentation.",
4324
0
                         min_spare_threads);
4325
0
        } else {
4326
0
            ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00520)
4327
0
                         "MinSpareThreads of %d not allowed, increasing to 1",
4328
0
                         min_spare_threads);
4329
0
        }
4330
0
        min_spare_threads = 1;
4331
0
    }
4332
4333
    /* max_spare_threads < min_spare_threads + threads_per_child
4334
     * checked in ap_mpm_run()
4335
     */
4336
4337
0
    return OK;
4338
0
}
4339
4340
static void event_hooks(apr_pool_t * p)
4341
0
{
4342
    /* Our open_logs hook function must run before the core's, or stderr
4343
     * will be redirected to a file, and the messages won't print to the
4344
     * console.
4345
     */
4346
0
    static const char *const aszSucc[] = { "core.c", NULL };
4347
0
    one_process = 0;
4348
0
    ap_force_set_tz(p);
4349
4350
0
    ap_hook_open_logs(event_open_logs, NULL, aszSucc, APR_HOOK_REALLY_FIRST);
4351
    /* we need to set the MPM state before other pre-config hooks use MPM query
4352
     * to retrieve it, so register as REALLY_FIRST
4353
     */
4354
0
    ap_hook_pre_config(event_pre_config, NULL, NULL, APR_HOOK_REALLY_FIRST);
4355
0
    ap_hook_post_config(event_post_config, NULL, NULL, APR_HOOK_MIDDLE);
4356
0
    ap_hook_check_config(event_check_config, NULL, NULL, APR_HOOK_MIDDLE);
4357
0
    ap_hook_mpm(event_run, NULL, NULL, APR_HOOK_MIDDLE);
4358
0
    ap_hook_mpm_query(event_query, NULL, NULL, APR_HOOK_MIDDLE);
4359
0
    ap_hook_mpm_register_timed_callback(event_register_timed_callback, NULL, NULL,
4360
0
                                        APR_HOOK_MIDDLE);
4361
0
    ap_hook_mpm_register_poll_callback(event_register_poll_callback,
4362
0
                                       NULL, NULL, APR_HOOK_MIDDLE);
4363
0
    ap_hook_mpm_register_poll_callback_timeout(event_register_poll_callback_ex,
4364
0
                                               NULL, NULL, APR_HOOK_MIDDLE);
4365
0
    ap_hook_pre_read_request(event_pre_read_request, NULL, NULL, APR_HOOK_MIDDLE);
4366
0
    ap_hook_post_read_request(event_post_read_request, NULL, NULL, APR_HOOK_MIDDLE);
4367
0
    ap_hook_mpm_get_name(event_get_name, NULL, NULL, APR_HOOK_MIDDLE);
4368
0
    ap_hook_mpm_resume_suspended(event_resume_suspended, NULL, NULL, APR_HOOK_MIDDLE);
4369
4370
0
    ap_hook_pre_connection(event_pre_connection, NULL, NULL, APR_HOOK_REALLY_FIRST);
4371
0
    ap_hook_protocol_switch(event_protocol_switch, NULL, NULL, APR_HOOK_REALLY_FIRST);
4372
0
}
4373
4374
static const char *set_daemons_to_start(cmd_parms *cmd, void *dummy,
4375
                                        const char *arg)
4376
0
{
4377
0
    const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
4378
0
    if (err != NULL) {
4379
0
        return err;
4380
0
    }
4381
4382
0
    ap_daemons_to_start = atoi(arg);
4383
0
    return NULL;
4384
0
}
4385
4386
static const char *set_min_spare_threads(cmd_parms * cmd, void *dummy,
4387
                                         const char *arg)
4388
0
{
4389
0
    const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
4390
0
    if (err != NULL) {
4391
0
        return err;
4392
0
    }
4393
4394
0
    min_spare_threads = atoi(arg);
4395
0
    return NULL;
4396
0
}
4397
4398
static const char *set_max_spare_threads(cmd_parms * cmd, void *dummy,
4399
                                         const char *arg)
4400
0
{
4401
0
    const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
4402
0
    if (err != NULL) {
4403
0
        return err;
4404
0
    }
4405
4406
0
    max_spare_threads = atoi(arg);
4407
0
    return NULL;
4408
0
}
4409
4410
static const char *set_max_workers(cmd_parms * cmd, void *dummy,
4411
                                   const char *arg)
4412
0
{
4413
0
    const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
4414
0
    if (err != NULL) {
4415
0
        return err;
4416
0
    }
4417
0
    if (!strcasecmp(cmd->cmd->name, "MaxClients")) {
4418
0
        ap_log_error(APLOG_MARK, APLOG_INFO, 0, NULL, APLOGNO(00521)
4419
0
                     "MaxClients is deprecated, use MaxRequestWorkers "
4420
0
                     "instead.");
4421
0
    }
4422
0
    max_workers = atoi(arg);
4423
0
    return NULL;
4424
0
}
4425
4426
static const char *set_threads_per_child(cmd_parms * cmd, void *dummy,
4427
                                         const char *arg)
4428
0
{
4429
0
    const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
4430
0
    if (err != NULL) {
4431
0
        return err;
4432
0
    }
4433
4434
0
    threads_per_child = atoi(arg);
4435
0
    return NULL;
4436
0
}
4437
static const char *set_server_limit (cmd_parms *cmd, void *dummy, const char *arg)
4438
0
{
4439
0
    const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
4440
0
    if (err != NULL) {
4441
0
        return err;
4442
0
    }
4443
4444
0
    server_limit = atoi(arg);
4445
0
    return NULL;
4446
0
}
4447
4448
static const char *set_thread_limit(cmd_parms * cmd, void *dummy,
4449
                                    const char *arg)
4450
0
{
4451
0
    const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
4452
0
    if (err != NULL) {
4453
0
        return err;
4454
0
    }
4455
4456
0
    thread_limit = atoi(arg);
4457
0
    return NULL;
4458
0
}
4459
4460
static const char *set_worker_factor(cmd_parms * cmd, void *dummy,
4461
                                     const char *arg)
4462
0
{
4463
0
    double val;
4464
0
    char *endptr;
4465
0
    const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
4466
0
    if (err != NULL) {
4467
0
        return err;
4468
0
    }
4469
4470
0
    val = strtod(arg, &endptr);
4471
0
    if (*endptr)
4472
0
        return "error parsing value";
4473
4474
0
    if (val <= 0)
4475
0
        return "AsyncRequestWorkerFactor argument must be a positive number";
4476
4477
0
    worker_factor = val * WORKER_FACTOR_SCALE;
4478
0
    if (worker_factor < WORKER_FACTOR_SCALE) {
4479
0
        worker_factor = WORKER_FACTOR_SCALE;
4480
0
    }
4481
0
    return NULL;
4482
0
}
4483
4484
4485
static const command_rec event_cmds[] = {
4486
    LISTEN_COMMANDS,
4487
    AP_INIT_TAKE1("StartServers", set_daemons_to_start, NULL, RSRC_CONF,
4488
                  "Number of child processes launched at server startup"),
4489
    AP_INIT_TAKE1("ServerLimit", set_server_limit, NULL, RSRC_CONF,
4490
                  "Maximum number of child processes for this run of Apache"),
4491
    AP_INIT_TAKE1("MinSpareThreads", set_min_spare_threads, NULL, RSRC_CONF,
4492
                  "Minimum number of idle threads, to handle request spikes"),
4493
    AP_INIT_TAKE1("MaxSpareThreads", set_max_spare_threads, NULL, RSRC_CONF,
4494
                  "Maximum number of idle threads"),
4495
    AP_INIT_TAKE1("MaxClients", set_max_workers, NULL, RSRC_CONF,
4496
                  "Deprecated name of MaxRequestWorkers"),
4497
    AP_INIT_TAKE1("MaxRequestWorkers", set_max_workers, NULL, RSRC_CONF,
4498
                  "Maximum number of threads alive at the same time"),
4499
    AP_INIT_TAKE1("ThreadsPerChild", set_threads_per_child, NULL, RSRC_CONF,
4500
                  "Number of threads each child creates"),
4501
    AP_INIT_TAKE1("ThreadLimit", set_thread_limit, NULL, RSRC_CONF,
4502
                  "Maximum number of worker threads per child process for this "
4503
                  "run of Apache - Upper limit for ThreadsPerChild"),
4504
    AP_INIT_TAKE1("AsyncRequestWorkerFactor", set_worker_factor, NULL, RSRC_CONF,
4505
                  "How many additional connects will be accepted per idle "
4506
                  "worker thread"),
4507
    AP_GRACEFUL_SHUTDOWN_TIMEOUT_COMMAND,
4508
    {NULL}
4509
};
4510
4511
AP_DECLARE_MODULE(mpm_event) = {
4512
    MPM20_MODULE_STUFF,
4513
    NULL,                       /* hook to run before apache parses args */
4514
    NULL,                       /* create per-directory config structure */
4515
    NULL,                       /* merge per-directory config structures */
4516
    NULL,                       /* create per-server config structure */
4517
    NULL,                       /* merge per-server config structures */
4518
    event_cmds,                 /* command apr_table_t */
4519
    event_hooks                 /* register_hooks */
4520
};