summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJim Jagielski <jim@apache.org>2014-06-03 15:07:29 +0200
committerJim Jagielski <jim@apache.org>2014-06-03 15:07:29 +0200
commitc539206da2d9e8d5d964e84c7497886c74f1a7f9 (patch)
treebf2d70a0adc86d730f516e93485c6ad913c03783
parentmod_proxy_http: avoid (unlikely) access to freed memory. (diff)
downloadapache2-c539206da2d9e8d5d964e84c7497886c74f1a7f9.tar.xz
apache2-c539206da2d9e8d5d964e84c7497886c74f1a7f9.zip
Optimize w/ duplicated listeners and use of SO_REUSEPORT
where available. git-svn-id: https://svn.apache.org/repos/asf/httpd/httpd/trunk@1599531 13f79535-47bb-0310-9956-ffa450edef68
-rw-r--r--CHANGES4
-rw-r--r--include/ap_listen.h16
-rw-r--r--server/listen.c95
-rw-r--r--server/mpm/event/event.c149
-rw-r--r--server/mpm/prefork/prefork.c127
-rw-r--r--server/mpm/worker/worker.c184
-rw-r--r--server/mpm_unix.c151
7 files changed, 528 insertions, 198 deletions
diff --git a/CHANGES b/CHANGES
index dca11889d1..2e04c4808c 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,6 +1,10 @@
-*- coding: utf-8 -*-
Changes with Apache 2.5.0
+ *) MPMs: Support SO_REUSEPORT to create multiple duplicated listener
+ records for scalability. [Yingqi Lu <yingqi.lu@intel.com>,
+ Jeff Trawick, Jim Jagielski]
+
*) mod_proxy_html: support automatic detection of doctype and processing
of FPIs. PR56285 [Micha Lenk <micha lenk info>, Nick Kew]
diff --git a/include/ap_listen.h b/include/ap_listen.h
index 21101cd8d3..4538ef9648 100644
--- a/include/ap_listen.h
+++ b/include/ap_listen.h
@@ -78,6 +78,14 @@ struct ap_listen_rec {
*/
AP_DECLARE_DATA extern ap_listen_rec *ap_listeners;
+AP_DECLARE_DATA extern ap_listen_rec **mpm_listen;
+
+AP_DECLARE_DATA extern int enable_default_listener;
+
+AP_DECLARE_DATA extern int num_buckets;
+
+AP_DECLARE_DATA extern int have_so_reuseport;
+
/**
* Setup all of the defaults for the listener list
*/
@@ -91,6 +99,14 @@ AP_DECLARE(void) ap_listen_pre_config(void);
*/
AP_DECLARE(int) ap_setup_listeners(server_rec *s);
+/**This function duplicates ap_listeners.
+ * @param s The global server_rec
+ * @param p The config pool
+ * @param num_buckets The total number of listener buckets.
+**/
+AP_DECLARE(apr_status_t) ap_duplicate_listeners(server_rec *s, apr_pool_t *p, int num_buckets);
+
+
/**
* Loop through the global ap_listen_rec list and close each of the sockets.
*/
diff --git a/server/listen.c b/server/listen.c
index f9c4266d3a..fb404d9fd6 100644
--- a/server/listen.c
+++ b/server/listen.c
@@ -38,6 +38,11 @@
AP_DECLARE_DATA ap_listen_rec *ap_listeners = NULL;
+AP_DECLARE_DATA ap_listen_rec **mpm_listen = NULL;
+AP_DECLARE_DATA int enable_default_listener = 1;
+AP_DECLARE_DATA int num_buckets = 1;
+AP_DECLARE_DATA int have_so_reuseport = 1;
+
static ap_listen_rec *old_listeners;
static int ap_listenbacklog;
static int send_buffer_size;
@@ -124,6 +129,24 @@ static apr_status_t make_sock(apr_pool_t *p, ap_listen_rec *server, int do_bind_
ap_sock_disable_nagle(s);
#endif
+#ifndef SO_REUSEPORT
+#define SO_REUSEPORT 15
+#endif
+ int thesock;
+ apr_os_sock_get(&thesock, s);
+ if (setsockopt(thesock, SOL_SOCKET, SO_REUSEPORT, (void *)&one, sizeof(int)) < 0) {
+ if (errno == ENOPROTOOPT) {
+ have_so_reuseport = 0;
+ } /* Check if SO_REUSEPORT is supported by the running Linux Kernel.*/
+ else {
+ ap_log_perror(APLOG_MARK, APLOG_CRIT, stat, p, APLOGNO()
+ "make_sock: for address %pI, apr_socket_opt_set: (SO_REUSEPORT)",
+ server->bind_addr);
+ apr_socket_close(s);
+ return errno;
+ }
+ }
+
if (do_bind_listen) {
#if APR_HAVE_IPV6
if (server->bind_addr->family == APR_INET6) {
@@ -179,7 +202,7 @@ static apr_status_t make_sock(apr_pool_t *p, ap_listen_rec *server, int do_bind_
#endif
server->sd = s;
- server->active = 1;
+ server->active = enable_default_listener;
server->accept_func = NULL;
@@ -575,7 +598,7 @@ static int open_listeners(apr_pool_t *pool)
}
}
#endif
- if (make_sock(pool, lr, 1) == APR_SUCCESS) {
+ if (make_sock(pool, lr, enable_default_listener) == APR_SUCCESS) {
++num_open;
}
else {
@@ -727,13 +750,73 @@ AP_DECLARE(int) ap_setup_listeners(server_rec *s)
return num_listeners;
}
+AP_DECLARE(apr_status_t) ap_duplicate_listeners(server_rec *s, apr_pool_t *p,
+ int num_buckets) {
+ int i;
+ apr_status_t stat;
+ int use_nonblock = 0;
+ ap_listen_rec *lr;
+
+ mpm_listen = apr_palloc(p, sizeof(ap_listen_rec*) * num_buckets);
+ for (i = 0; i < num_buckets; i++) {
+ lr = ap_listeners;
+ ap_listen_rec *last = NULL;
+ while (lr) {
+ ap_listen_rec *duplr;
+ char *hostname;
+ apr_port_t port;
+ apr_sockaddr_t *sa;
+ duplr = apr_palloc(p, sizeof(ap_listen_rec));
+ duplr->slave = NULL;
+ duplr->protocol = apr_pstrdup(p, lr->protocol);
+ hostname = apr_pstrdup(p, lr->bind_addr->hostname);
+ port = lr->bind_addr->port;
+ apr_sockaddr_info_get(&sa, hostname, APR_UNSPEC, port, 0, p);
+ duplr->bind_addr = sa;
+ duplr->next = NULL;
+ apr_socket_t *temps = duplr->sd;
+ if ((stat = apr_socket_create(&duplr->sd, duplr->bind_addr->family,
+ SOCK_STREAM, 0, p)) != APR_SUCCESS) {
+ ap_log_perror(APLOG_MARK, APLOG_CRIT, 0, p, APLOGNO()
+ "ap_duplicate_socket: for address %pI, "
+ "cannot duplicate a new socket!",
+ duplr->bind_addr);
+ return stat;
+ }
+ make_sock(p, duplr, 1);
+#if AP_NONBLOCK_WHEN_MULTI_LISTEN
+ use_nonblock = (ap_listeners && ap_listeners->next);
+ if ((stat = apr_socket_opt_set(duplr->sd, APR_SO_NONBLOCK, use_nonblock))
+ != APR_SUCCESS) {
+ ap_log_perror(APLOG_MARK, APLOG_CRIT, stat, p, APLOGNO()
+ "unable to control socket non-blocking status");
+ return stat;
+ }
+#endif
+ ap_apply_accept_filter(p, duplr, s);
+
+ if (last == NULL) {
+ mpm_listen[i] = last = duplr;
+ }
+ else {
+ last->next = duplr;
+ last = duplr;
+ }
+ lr = lr->next;
+ }
+ }
+ return APR_SUCCESS;
+}
+
AP_DECLARE_NONSTD(void) ap_close_listeners(void)
{
ap_listen_rec *lr;
-
- for (lr = ap_listeners; lr; lr = lr->next) {
- apr_socket_close(lr->sd);
- lr->active = 0;
+ int i;
+ for (i = 0; i < num_buckets; i++) {
+ for (lr = mpm_listen[i]; lr; lr = lr->next) {
+ apr_socket_close(lr->sd);
+ lr->active = 0;
+ }
}
}
diff --git a/server/mpm/event/event.c b/server/mpm/event/event.c
index ae247cd13c..779499a40e 100644
--- a/server/mpm/event/event.c
+++ b/server/mpm/event/event.c
@@ -59,6 +59,8 @@
#include "apr_want.h"
#include "apr_version.h"
+#include <stdlib.h>
+
#if APR_HAVE_UNISTD_H
#include <unistd.h>
#endif
@@ -349,7 +351,7 @@ typedef struct event_retained_data {
* doubled up to MAX_SPAWN_RATE, and reset only when a cycle goes by
* without the need to spawn.
*/
- int idle_spawn_rate;
+ int *idle_spawn_rate;
#ifndef MAX_SPAWN_RATE
#define MAX_SPAWN_RATE (32)
#endif
@@ -359,7 +361,10 @@ static event_retained_data *retained;
#define ID_FROM_CHILD_THREAD(c, t) ((c * thread_limit) + t)
-static ap_pod_t *pod;
+static ap_pod_t **pod;
+static ap_pod_t *child_pod;
+static ap_listen_rec *child_listen;
+static int *bucket; /* bucket array for the httpd child processes */
/* The event MPM respects a couple of runtime flags that can aid
* in debugging. Setting the -DNO_DETACH flag will prevent the root process
@@ -1292,7 +1297,7 @@ static apr_status_t init_pollset(apr_pool_t *p)
TO_QUEUE_INIT(short_linger_q);
listener_pollfd = apr_palloc(p, sizeof(apr_pollfd_t) * num_listensocks);
- for (lr = ap_listeners; lr != NULL; lr = lr->next, i++) {
+ for (lr = child_listen; lr != NULL; lr = lr->next, i++) {
apr_pollfd_t *pfd;
AP_DEBUG_ASSERT(i < num_listensocks);
pfd = &listener_pollfd[i];
@@ -2421,6 +2426,8 @@ static void child_main(int child_num_arg)
apr_threadattr_t *thread_attr;
apr_thread_t *start_thread_id;
apr_pool_t *pskip;
+ int i;
+ ap_listen_rec *lr;
mpm_state = AP_MPMQ_STARTING; /* for benefit of any hooks that run as this
* child initializes
@@ -2429,6 +2436,19 @@ static void child_main(int child_num_arg)
ap_fatal_signal_child_setup(ap_server_conf);
apr_pool_create(&pchild, pconf);
+ /* close unused listeners and pods */
+ for (i = 0; i < num_buckets; i++) {
+ if (i != bucket[child_num_arg]) {
+ lr = mpm_listen[i];
+ while(lr) {
+ apr_socket_close(lr->sd);
+ lr = lr->next;
+ }
+ mpm_listen[i]->active = 0;
+ ap_mpm_podx_close(pod[i]);
+ }
+ }
+
/*stuff to do before we switch id's, so we have permissions. */
ap_reopen_scoreboard(pchild, NULL, 0);
@@ -2539,7 +2559,7 @@ static void child_main(int child_num_arg)
apr_signal(SIGTERM, dummy_signal_handler);
/* Watch for any messages from the parent over the POD */
while (1) {
- rv = ap_mpm_podx_check(pod);
+ rv = ap_mpm_podx_check(child_pod);
if (rv == AP_MPM_PODX_NORESTART) {
/* see if termination was triggered while we slept */
switch (terminate_mode) {
@@ -2592,6 +2612,9 @@ static int make_child(server_rec * s, int slot)
/* NOTREACHED */
}
+ child_listen = mpm_listen[bucket[slot]];
+ child_pod = pod[bucket[slot]];
+
if ((pid = fork()) == -1) {
ap_log_error(APLOG_MARK, APLOG_ERR, errno, s, APLOGNO(00481)
"fork: Unable to fork new process");
@@ -2652,6 +2675,7 @@ static void startup_children(int number_to_start)
if (ap_scoreboard_image->parent[i].pid != 0) {
continue;
}
+ bucket[i] = i % num_buckets;
if (make_child(ap_server_conf, i) < 0) {
break;
}
@@ -2659,7 +2683,7 @@ static void startup_children(int number_to_start)
}
}
-static void perform_idle_server_maintenance(void)
+static void perform_idle_server_maintenance(int child_bucket)
{
int i, j;
int idle_thread_count;
@@ -2689,7 +2713,7 @@ static void perform_idle_server_maintenance(void)
int child_threads_active = 0;
if (i >= retained->max_daemons_limit
- && totally_free_length == retained->idle_spawn_rate)
+ && totally_free_length == retained->idle_spawn_rate[child_bucket])
/* short cut if all active processes have been examined and
* enough empty scoreboard slots have been found
*/
@@ -2716,7 +2740,8 @@ static void perform_idle_server_maintenance(void)
if (ps->pid != 0) { /* XXX just set all_dead_threads in outer
for loop if no pid? not much else matters */
if (status <= SERVER_READY && !ps->quiescing && !ps->not_accepting
- && ps->generation == retained->my_generation)
+ && ps->generation == retained->my_generation &&
+ bucket[i] == child_bucket)
{
++idle_thread_count;
}
@@ -2727,8 +2752,8 @@ static void perform_idle_server_maintenance(void)
}
active_thread_count += child_threads_active;
if (any_dead_threads
- && totally_free_length < retained->idle_spawn_rate
- && free_length < MAX_SPAWN_RATE
+ && totally_free_length < retained->idle_spawn_rate[child_bucket]
+ && free_length < MAX_SPAWN_RATE/num_buckets
&& (!ps->pid /* no process in the slot */
|| ps->quiescing)) { /* or at least one is going away */
if (all_dead_threads) {
@@ -2784,12 +2809,12 @@ static void perform_idle_server_maintenance(void)
retained->max_daemons_limit = last_non_dead + 1;
- if (idle_thread_count > max_spare_threads) {
+ if (idle_thread_count > max_spare_threads/num_buckets) {
/* Kill off one child */
- ap_mpm_podx_signal(pod, AP_MPM_PODX_GRACEFUL);
- retained->idle_spawn_rate = 1;
+ ap_mpm_podx_signal(pod[child_bucket], AP_MPM_PODX_GRACEFUL);
+ retained->idle_spawn_rate[child_bucket] = 1;
}
- else if (idle_thread_count < min_spare_threads) {
+ else if (idle_thread_count < min_spare_threads/num_buckets) {
/* terminate the free list */
if (free_length == 0) { /* scoreboard is full, can't fork */
@@ -2807,13 +2832,13 @@ static void perform_idle_server_maintenance(void)
ap_log_error(APLOG_MARK, APLOG_ERR, 0, ap_server_conf, APLOGNO(00485)
"scoreboard is full, not at MaxRequestWorkers");
}
- retained->idle_spawn_rate = 1;
+ retained->idle_spawn_rate[child_bucket] = 1;
}
else {
- if (free_length > retained->idle_spawn_rate) {
- free_length = retained->idle_spawn_rate;
+ if (free_length > retained->idle_spawn_rate[child_bucket]) {
+ free_length = retained->idle_spawn_rate[child_bucket];
}
- if (retained->idle_spawn_rate >= 8) {
+ if (retained->idle_spawn_rate[child_bucket] >= 8) {
ap_log_error(APLOG_MARK, APLOG_INFO, 0, ap_server_conf, APLOGNO(00486)
"server seems busy, (you may need "
"to increase StartServers, ThreadsPerChild "
@@ -2823,6 +2848,7 @@ static void perform_idle_server_maintenance(void)
idle_thread_count, total_non_dead);
}
for (i = 0; i < free_length; ++i) {
+ bucket[free_slots[i]] = child_bucket;
make_child(ap_server_conf, free_slots[i]);
}
/* the next time around we want to spawn twice as many if this
@@ -2831,13 +2857,13 @@ static void perform_idle_server_maintenance(void)
if (retained->hold_off_on_exponential_spawning) {
--retained->hold_off_on_exponential_spawning;
}
- else if (retained->idle_spawn_rate < MAX_SPAWN_RATE) {
- retained->idle_spawn_rate *= 2;
+ else if (retained->idle_spawn_rate[child_bucket] < MAX_SPAWN_RATE/num_buckets) {
+ retained->idle_spawn_rate[child_bucket] *= 2;
}
}
}
else {
- retained->idle_spawn_rate = 1;
+ retained->idle_spawn_rate[child_bucket] = 1;
}
}
@@ -2894,7 +2920,7 @@ static void server_main_loop(int remaining_children_to_start)
ap_scoreboard_image->parent[child_slot].quiescing = 0;
if (processed_status == APEXIT_CHILDSICK) {
/* resource shortage, minimize the fork rate */
- retained->idle_spawn_rate = 1;
+ retained->idle_spawn_rate[bucket[child_slot]] = 1;
}
else if (remaining_children_to_start
&& child_slot < ap_daemons_limit) {
@@ -2912,7 +2938,9 @@ static void server_main_loop(int remaining_children_to_start)
if (processed_status == APEXIT_CHILDSICK
&& old_gen == retained->my_generation) {
/* resource shortage, minimize the fork rate */
- retained->idle_spawn_rate = 1;
+ for (i = 0; i < num_buckets; i++) {
+ retained->idle_spawn_rate[i] = 1;
+ }
}
#if APR_HAS_OTHER_CHILD
}
@@ -2951,7 +2979,9 @@ static void server_main_loop(int remaining_children_to_start)
continue;
}
- perform_idle_server_maintenance();
+ for (i = 0; i < num_buckets; i++) {
+ perform_idle_server_maintenance(i);
+ }
}
}
@@ -2959,6 +2989,8 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s)
{
int remaining_children_to_start;
+ int i;
+
ap_log_pid(pconf, ap_pid_fname);
if (!retained->is_graceful) {
@@ -2972,11 +3004,13 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s)
ap_scoreboard_image->global->running_generation = retained->my_generation;
}
+ bucket = apr_palloc(_pconf, sizeof(int) * ap_daemons_limit);
+
restart_pending = shutdown_pending = 0;
set_signals();
/* Don't thrash... */
- if (max_spare_threads < min_spare_threads + threads_per_child)
- max_spare_threads = min_spare_threads + threads_per_child;
+ if (max_spare_threads < min_spare_threads + threads_per_child * num_buckets)
+ max_spare_threads = min_spare_threads + threads_per_child * num_buckets;
/* If we're doing a graceful_restart then we're going to see a lot
* of children exiting immediately when we get into the main loop
@@ -3017,7 +3051,9 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s)
/* Time to shut down:
* Kill child processes, tell them to call child_exit, etc...
*/
- ap_mpm_podx_killpg(pod, ap_daemons_limit, AP_MPM_PODX_RESTART);
+ for (i = 0; i < num_buckets; i++) {
+ ap_mpm_podx_killpg(pod[i], ap_daemons_limit, AP_MPM_PODX_RESTART);
+ }
ap_reclaim_child_processes(1, /* Start with SIGTERM */
event_note_child_killed);
@@ -3038,7 +3074,9 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s)
/* Close our listeners, and then ask our children to do same */
ap_close_listeners();
- ap_mpm_podx_killpg(pod, ap_daemons_limit, AP_MPM_PODX_GRACEFUL);
+ for (i = 0; i < num_buckets; i++) {
+ ap_mpm_podx_killpg(pod[i], ap_daemons_limit, AP_MPM_PODX_GRACEFUL);
+ }
ap_relieve_child_processes(event_note_child_killed);
if (!child_fatal) {
@@ -3078,7 +3116,9 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s)
* way, try and make sure that all of our processes are
* really dead.
*/
- ap_mpm_podx_killpg(pod, ap_daemons_limit, AP_MPM_PODX_RESTART);
+ for (i = 0; i < num_buckets; i++) {
+ ap_mpm_podx_killpg(pod[i], ap_daemons_limit, AP_MPM_PODX_RESTART);
+ }
ap_reclaim_child_processes(1, event_note_child_killed);
return DONE;
@@ -3104,8 +3144,9 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s)
AP_SIG_GRACEFUL_STRING
" received. Doing graceful restart");
/* wake up the children...time to die. But we'll have more soon */
- ap_mpm_podx_killpg(pod, ap_daemons_limit, AP_MPM_PODX_GRACEFUL);
-
+ for (i = 0; i < num_buckets; i++) {
+ ap_mpm_podx_killpg(pod[i], ap_daemons_limit, AP_MPM_PODX_GRACEFUL);
+ }
/* This is mostly for debugging... so that we know what is still
* gracefully dealing with existing request.
@@ -3117,7 +3158,9 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s)
* and a SIGHUP, we may as well use the same signal, because some user
* pthreads are stealing signals from us left and right.
*/
- ap_mpm_podx_killpg(pod, ap_daemons_limit, AP_MPM_PODX_RESTART);
+ for (i = 0; i < num_buckets; i++) {
+ ap_mpm_podx_killpg(pod[i], ap_daemons_limit, AP_MPM_PODX_RESTART);
+ }
ap_reclaim_child_processes(1, /* Start with SIGTERM */
event_note_child_killed);
@@ -3137,6 +3180,8 @@ static int event_open_logs(apr_pool_t * p, apr_pool_t * plog,
int startup = 0;
int level_flags = 0;
apr_status_t rv;
+ int i;
+ int num_of_cores = 0;
pconf = p;
@@ -3146,6 +3191,8 @@ static int event_open_logs(apr_pool_t * p, apr_pool_t * plog,
level_flags |= APLOG_STARTUP;
}
+ enable_default_listener = 0;
+
if ((num_listensocks = ap_setup_listeners(ap_server_conf)) < 1) {
ap_log_error(APLOG_MARK, APLOG_ALERT | level_flags, 0,
(startup ? NULL : s),
@@ -3153,12 +3200,36 @@ static int event_open_logs(apr_pool_t * p, apr_pool_t * plog,
return DONE;
}
+ enable_default_listener = 1;
+ if (have_so_reuseport) {
+#ifdef _SC_NPROCESSORS_ONLN
+ num_of_cores = sysconf(_SC_NPROCESSORS_ONLN);
+#else
+ num_of_cores = 1;
+#endif
+ if (num_of_cores > 8) {
+ num_buckets = num_of_cores/8;
+ }
+ else {
+ num_buckets = 1;
+ }
+ }
+ else {
+ num_buckets = 1;
+ }
+
+ ap_duplicate_listeners(ap_server_conf, pconf, num_buckets);
+
+ pod = apr_palloc(pconf, sizeof(ap_pod_t *) * num_buckets);
+
if (!one_process) {
- if ((rv = ap_mpm_podx_open(pconf, &pod))) {
- ap_log_error(APLOG_MARK, APLOG_CRIT | level_flags, rv,
- (startup ? NULL : s),
- "could not open pipe-of-death");
- return DONE;
+ for (i = 0; i < num_buckets; i++) {
+ if ((rv = ap_mpm_podx_open(pconf, &pod[i]))) {
+ ap_log_error(APLOG_MARK, APLOG_CRIT | level_flags, rv,
+ (startup ? NULL : s),
+ "could not open pipe-of-death");
+ return DONE;
+ }
}
}
/* for skiplist */
@@ -3172,6 +3243,7 @@ static int event_pre_config(apr_pool_t * pconf, apr_pool_t * plog,
int no_detach, debug, foreground;
apr_status_t rv;
const char *userdata_key = "mpm_event_module";
+ int i;
mpm_state = AP_MPMQ_STARTING;
@@ -3192,7 +3264,6 @@ static int event_pre_config(apr_pool_t * pconf, apr_pool_t * plog,
if (!retained) {
retained = ap_retained_data_create(userdata_key, sizeof(*retained));
retained->max_daemons_limit = -1;
- retained->idle_spawn_rate = 1;
}
++retained->module_loads;
if (retained->module_loads == 2) {
@@ -3206,6 +3277,10 @@ static int event_pre_config(apr_pool_t * pconf, apr_pool_t * plog,
"atomics not working as expected - add32 of negative number");
return HTTP_INTERNAL_SERVER_ERROR;
}
+ retained->idle_spawn_rate = apr_palloc(pconf, sizeof(int) * num_buckets);
+ for (i = 0; i< num_buckets; i++) {
+ retained->idle_spawn_rate[i] = 1;
+ }
rv = apr_pollset_create(&event_pollset, 1, plog,
APR_POLLSET_THREADSAFE | APR_POLLSET_NOCOPY);
if (rv != APR_SUCCESS) {
diff --git a/server/mpm/prefork/prefork.c b/server/mpm/prefork/prefork.c
index ae0fd37461..05a782e69a 100644
--- a/server/mpm/prefork/prefork.c
+++ b/server/mpm/prefork/prefork.c
@@ -48,6 +48,8 @@
#include "ap_mmn.h"
#include "apr_poll.h"
+#include <stdlib.h>
+
#ifdef HAVE_TIME_H
#include <time.h>
#endif
@@ -86,14 +88,19 @@
/* config globals */
-static apr_proc_mutex_t *accept_mutex;
+static apr_proc_mutex_t **accept_mutex;
static int ap_daemons_to_start=0;
static int ap_daemons_min_free=0;
static int ap_daemons_max_free=0;
static int ap_daemons_limit=0; /* MaxRequestWorkers */
static int server_limit = 0;
static int mpm_state = AP_MPMQ_STARTING;
-static ap_pod_t *pod;
+static ap_pod_t **pod;
+static ap_pod_t *child_pod;
+static apr_proc_mutex_t *child_mutex;
+static ap_listen_rec *child_listen;
+static int *bucket; /* bucket array for the httpd child processes */
+
/* data retained by prefork across load/unload of the module
* allocated on first call to pre-config hook; located on
@@ -222,14 +229,14 @@ static void clean_child_exit(int code)
prefork_note_child_killed(/* slot */ 0, 0, 0);
}
- ap_mpm_pod_close(pod);
+ ap_mpm_pod_close(child_pod);
chdir_for_gprof();
exit(code);
}
static void accept_mutex_on(void)
{
- apr_status_t rv = apr_proc_mutex_lock(accept_mutex);
+ apr_status_t rv = apr_proc_mutex_lock(child_mutex);
if (rv != APR_SUCCESS) {
const char *msg = "couldn't grab the accept mutex";
@@ -247,7 +254,7 @@ static void accept_mutex_on(void)
static void accept_mutex_off(void)
{
- apr_status_t rv = apr_proc_mutex_unlock(accept_mutex);
+ apr_status_t rv = apr_proc_mutex_unlock(child_mutex);
if (rv != APR_SUCCESS) {
const char *msg = "couldn't release the accept mutex";
@@ -272,7 +279,7 @@ static void accept_mutex_off(void)
* when it's safe in the single Listen case.
*/
#ifdef SINGLE_LISTEN_UNSERIALIZED_ACCEPT
-#define SAFE_ACCEPT(stmt) do {if (ap_listeners->next) {stmt;}} while(0)
+#define SAFE_ACCEPT(stmt) do {if (child_listen->next) {stmt;}} while(0)
#else
#define SAFE_ACCEPT(stmt) do {stmt;} while(0)
#endif
@@ -521,10 +528,23 @@ static void child_main(int child_num_arg)
apr_pool_create(&ptrans, pchild);
apr_pool_tag(ptrans, "transaction");
+/* close unused listeners and pods */
+ for (i = 0; i < num_buckets; i++) {
+ if (i != bucket[my_child_num]) {
+ lr = mpm_listen[i];
+ while(lr) {
+ apr_socket_close(lr->sd);
+ lr = lr->next;
+ }
+ mpm_listen[i]->active = 0;
+ ap_mpm_pod_close(pod[i]);
+ }
+ }
+
/* needs to be done before we switch UIDs so we have permissions */
ap_reopen_scoreboard(pchild, NULL, 0);
- lockfile = apr_proc_mutex_lockfile(accept_mutex);
- status = apr_proc_mutex_child_init(&accept_mutex,
+ lockfile = apr_proc_mutex_lockfile(child_mutex);
+ status = apr_proc_mutex_child_init(&child_mutex,
lockfile,
pchild);
if (status != APR_SUCCESS) {
@@ -532,7 +552,7 @@ static void child_main(int child_num_arg)
"Couldn't initialize cross-process lock in child "
"(%s) (%s)",
lockfile ? lockfile : "none",
- apr_proc_mutex_name(accept_mutex));
+ apr_proc_mutex_name(child_mutex));
clean_child_exit(APEXIT_CHILDFATAL);
}
@@ -554,7 +574,7 @@ static void child_main(int child_num_arg)
clean_child_exit(APEXIT_CHILDSICK); /* assume temporary resource issue */
}
- for (lr = ap_listeners, i = num_listensocks; i--; lr = lr->next) {
+ for (lr = child_listen, i = num_listensocks; i--; lr = lr->next) {
apr_pollfd_t pfd = { 0 };
pfd.desc_type = APR_POLL_SOCKET;
@@ -612,7 +632,7 @@ static void child_main(int child_num_arg)
if (num_listensocks == 1) {
/* There is only one listener record, so refer to that one. */
- lr = ap_listeners;
+ lr = child_listen;
}
else {
/* multiple listening sockets - need to poll */
@@ -710,7 +730,7 @@ static void child_main(int child_num_arg)
* while we were processing the connection or we are the lucky
* idle server process that gets to die.
*/
- if (ap_mpm_pod_check(pod) == APR_SUCCESS) { /* selected as idle? */
+ if (ap_mpm_pod_check(child_pod) == APR_SUCCESS) { /* selected as idle? */
die_now = 1;
}
else if (retained->my_generation !=
@@ -750,6 +770,9 @@ static int make_child(server_rec *s, int slot)
(void) ap_update_child_status_from_indexes(slot, 0, SERVER_STARTING,
(request_rec *) NULL);
+ child_listen = mpm_listen[bucket[slot]];
+ child_mutex = accept_mutex[bucket[slot]];
+ child_pod = pod[bucket[slot]];
#ifdef _OSD_POSIX
/* BS2000 requires a "special" version of fork() before a setuid() call */
@@ -815,6 +838,7 @@ static void startup_children(int number_to_start)
if (ap_scoreboard_image->servers[i][0].status != SERVER_DEAD) {
continue;
}
+ bucket[i] = i % num_buckets;
if (make_child(ap_server_conf, i) < 0) {
break;
}
@@ -822,6 +846,8 @@ static void startup_children(int number_to_start)
}
}
+static int bucket_make_child_record = -1;
+static int bucket_kill_child_record = -1;
static void perform_idle_server_maintenance(apr_pool_t *p)
{
int i;
@@ -874,7 +900,8 @@ static void perform_idle_server_maintenance(apr_pool_t *p)
* shut down gracefully, in case it happened to pick up a request
* while we were counting
*/
- ap_mpm_pod_signal(pod);
+ bucket_kill_child_record = (bucket_kill_child_record + 1) % num_buckets;
+ ap_mpm_pod_signal(pod[bucket_kill_child_record]);
retained->idle_spawn_rate = 1;
}
else if (idle_count < ap_daemons_min_free) {
@@ -899,6 +926,7 @@ static void perform_idle_server_maintenance(apr_pool_t *p)
idle_count, total_non_dead);
}
for (i = 0; i < free_length; ++i) {
+ bucket[free_slots[i]]= (++bucket_make_child_record) % num_buckets;
make_child(ap_server_conf, free_slots[i]);
}
/* the next time around we want to spawn twice as many if this
@@ -926,15 +954,24 @@ static int prefork_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s)
int index;
int remaining_children_to_start;
apr_status_t rv;
+ int i;
+ ap_listen_rec *lr;
ap_log_pid(pconf, ap_pid_fname);
- /* Initialize cross-process accept lock */
- rv = ap_proc_mutex_create(&accept_mutex, NULL, AP_ACCEPT_MUTEX_TYPE, NULL,
- s, _pconf, 0);
- if (rv != APR_SUCCESS) {
- mpm_state = AP_MPMQ_STOPPING;
- return DONE;
+ bucket = apr_palloc(_pconf, sizeof(int) * ap_daemons_limit);
+ /* Initialize cross-process accept lock for each bucket*/
+ accept_mutex = apr_palloc(_pconf, sizeof(apr_proc_mutex_t *) * num_buckets);
+ for (i = 0; i < num_buckets; i++) {
+ rv = ap_proc_mutex_create(&accept_mutex[i], NULL, AP_ACCEPT_MUTEX_TYPE, NULL,
+ s, _pconf, 0);
+ if (rv != APR_SUCCESS) {
+ mpm_state = AP_MPMQ_STOPPING;
+ return DONE;
+ }
+ }
+ for (lr = ap_listeners; lr; lr = lr->next) {
+ apr_socket_close(lr->sd);
}
if (!retained->is_graceful) {
@@ -953,12 +990,13 @@ static int prefork_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s)
if (one_process) {
AP_MONCONTROL(1);
+ bucket[0] = 0;
make_child(ap_server_conf, 0);
/* NOTREACHED */
}
else {
- if (ap_daemons_max_free < ap_daemons_min_free + 1) /* Don't thrash... */
- ap_daemons_max_free = ap_daemons_min_free + 1;
+ if (ap_daemons_max_free < ap_daemons_min_free + num_buckets) /* Don't thrash... */
+ ap_daemons_max_free = ap_daemons_min_free + num_buckets;
/* If we're doing a graceful_restart then we're going to see a lot
* of children exiting immediately when we get into the main loop
@@ -991,7 +1029,7 @@ static int prefork_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s)
ap_log_command_line(plog, s);
ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf, APLOGNO(00165)
"Accept mutex: %s (default: %s)",
- apr_proc_mutex_name(accept_mutex),
+ apr_proc_mutex_name(accept_mutex[0]),
apr_proc_mutex_defname());
mpm_state = AP_MPMQ_RUNNING;
@@ -1122,7 +1160,9 @@ static int prefork_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s)
ap_close_listeners();
/* kill off the idle ones */
- ap_mpm_pod_killpg(pod, retained->max_daemons_limit);
+ for (i = 0; i < num_buckets; i++) {
+ ap_mpm_pod_killpg(pod[i], retained->max_daemons_limit);
+ }
/* Send SIGUSR1 to the active children */
active_children = 0;
@@ -1196,7 +1236,9 @@ static int prefork_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s)
"Graceful restart requested, doing restart");
/* kill off the idle ones */
- ap_mpm_pod_killpg(pod, retained->max_daemons_limit);
+ for (i = 0; i < num_buckets; i++) {
+ ap_mpm_pod_killpg(pod[i], retained->max_daemons_limit);
+ }
/* This is mostly for debugging... so that we know what is still
* gracefully dealing with existing request. This will break
@@ -1239,6 +1281,8 @@ static int prefork_open_logs(apr_pool_t *p, apr_pool_t *plog, apr_pool_t *ptemp,
int startup = 0;
int level_flags = 0;
apr_status_t rv;
+ int i;
+ int num_of_cores = 0;
pconf = p;
@@ -1248,6 +1292,7 @@ static int prefork_open_logs(apr_pool_t *p, apr_pool_t *plog, apr_pool_t *ptemp,
level_flags |= APLOG_STARTUP;
}
+ enable_default_listener = 0;
if ((num_listensocks = ap_setup_listeners(ap_server_conf)) < 1) {
ap_log_error(APLOG_MARK, APLOG_ALERT | level_flags, 0,
(startup ? NULL : s),
@@ -1255,12 +1300,36 @@ static int prefork_open_logs(apr_pool_t *p, apr_pool_t *plog, apr_pool_t *ptemp,
return DONE;
}
- if ((rv = ap_mpm_pod_open(pconf, &pod))) {
- ap_log_error(APLOG_MARK, APLOG_CRIT | level_flags, rv,
- (startup ? NULL : s),
- "could not open pipe-of-death");
- return DONE;
+ enable_default_listener = 1;
+ if (have_so_reuseport) {
+#ifdef _SC_NPROCESSORS_ONLN
+ num_of_cores = sysconf(_SC_NPROCESSORS_ONLN);
+#else
+ num_of_cores = 1;
+#endif
+ if (num_of_cores > 8) {
+ num_buckets = num_of_cores/8;
+ }
+ else {
+ num_buckets = 1;
+ }
}
+ else {
+ num_buckets = 1;
+ }
+
+ ap_duplicate_listeners(ap_server_conf, pconf, num_buckets);
+
+ pod = apr_palloc(pconf, sizeof(ap_pod_t *) * num_buckets);
+ for (i = 0; i < num_buckets; i++) {
+ if ((rv = ap_mpm_pod_open(pconf, &pod[i]))) {
+ ap_log_error(APLOG_MARK, APLOG_CRIT | level_flags, rv,
+ (startup ? NULL : s),
+ "could not open pipe-of-death");
+ return DONE;
+ }
+ }
+
return OK;
}
diff --git a/server/mpm/worker/worker.c b/server/mpm/worker/worker.c
index 408d317650..b90161970d 100644
--- a/server/mpm/worker/worker.c
+++ b/server/mpm/worker/worker.c
@@ -30,6 +30,9 @@
#include "apr_thread_mutex.h"
#include "apr_proc_mutex.h"
#include "apr_poll.h"
+
+#include <stdlib.h>
+
#define APR_WANT_STRFUNC
#include "apr_want.h"
@@ -159,7 +162,7 @@ typedef struct worker_retained_data {
* doubled up to MAX_SPAWN_RATE, and reset only when a cycle goes by
* without the need to spawn.
*/
- int idle_spawn_rate;
+ int *idle_spawn_rate;
#ifndef MAX_SPAWN_RATE
#define MAX_SPAWN_RATE (32)
#endif
@@ -188,7 +191,8 @@ typedef struct {
#define ID_FROM_CHILD_THREAD(c, t) ((c * thread_limit) + t)
-static ap_pod_t *pod;
+static ap_pod_t **pod;
+static ap_pod_t *child_pod;
/* The worker MPM respects a couple of runtime flags that can aid
* in debugging. Setting the -DNO_DETACH flag will prevent the root process
@@ -218,10 +222,13 @@ static pid_t parent_pid;
static apr_os_thread_t *listener_os_thread;
/* Locks for accept serialization */
-static apr_proc_mutex_t *accept_mutex;
+static apr_proc_mutex_t **accept_mutex;
+static apr_proc_mutex_t *child_mutex;
+static ap_listen_rec *child_listen;
+static int *bucket; /* bucket array for the httpd child processes */
#ifdef SINGLE_LISTEN_UNSERIALIZED_ACCEPT
-#define SAFE_ACCEPT(stmt) (ap_listeners->next ? (stmt) : APR_SUCCESS)
+#define SAFE_ACCEPT(stmt) (child_listen->next ? (stmt) : APR_SUCCESS)
#else
#define SAFE_ACCEPT(stmt) (stmt)
#endif
@@ -701,7 +708,7 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t *thd, void * dummy)
clean_child_exit(APEXIT_CHILDSICK);
}
- for (lr = ap_listeners; lr != NULL; lr = lr->next) {
+ for (lr = child_listen; lr != NULL; lr = lr->next) {
apr_pollfd_t pfd = { 0 };
pfd.desc_type = APR_POLL_SOCKET;
@@ -758,7 +765,7 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t *thd, void * dummy)
/* We've already decremented the idle worker count inside
* ap_queue_info_wait_for_idler. */
- if ((rv = SAFE_ACCEPT(apr_proc_mutex_lock(accept_mutex)))
+ if ((rv = SAFE_ACCEPT(apr_proc_mutex_lock(child_mutex)))
!= APR_SUCCESS) {
if (!listener_may_exit) {
@@ -767,9 +774,9 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t *thd, void * dummy)
break; /* skip the lock release */
}
- if (!ap_listeners->next) {
+ if (!child_listen->next) {
/* Only one listener, so skip the poll */
- lr = ap_listeners;
+ lr = child_listen;
}
else {
while (!listener_may_exit) {
@@ -839,7 +846,7 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t *thd, void * dummy)
resource_shortage = 1;
signal_threads(ST_GRACEFUL);
}
- if ((rv = SAFE_ACCEPT(apr_proc_mutex_unlock(accept_mutex)))
+ if ((rv = SAFE_ACCEPT(apr_proc_mutex_unlock(child_mutex)))
!= APR_SUCCESS) {
if (listener_may_exit) {
@@ -863,7 +870,7 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t *thd, void * dummy)
}
}
else {
- if ((rv = SAFE_ACCEPT(apr_proc_mutex_unlock(accept_mutex)))
+ if ((rv = SAFE_ACCEPT(apr_proc_mutex_unlock(child_mutex)))
!= APR_SUCCESS) {
int level = APLOG_EMERG;
@@ -1217,6 +1224,8 @@ static void child_main(int child_num_arg)
thread_starter *ts;
apr_threadattr_t *thread_attr;
apr_thread_t *start_thread_id;
+ int i;
+ ap_listen_rec *lr;
mpm_state = AP_MPMQ_STARTING; /* for benefit of any hooks that run as this
* child initializes
@@ -1225,11 +1234,24 @@ static void child_main(int child_num_arg)
ap_fatal_signal_child_setup(ap_server_conf);
apr_pool_create(&pchild, pconf);
+ /* close unused listeners and pods */
+ for (i = 0; i < num_buckets; i++) {
+ if (i != bucket[child_num_arg]) {
+ lr = mpm_listen[i];
+ while(lr) {
+ apr_socket_close(lr->sd);
+ lr = lr->next;
+ }
+ mpm_listen[i]->active = 0;
+ ap_mpm_podx_close(pod[i]);
+ }
+ }
+
/*stuff to do before we switch id's, so we have permissions.*/
ap_reopen_scoreboard(pchild, NULL, 0);
- rv = SAFE_ACCEPT(apr_proc_mutex_child_init(&accept_mutex,
- apr_proc_mutex_lockfile(accept_mutex),
+ rv = SAFE_ACCEPT(apr_proc_mutex_child_init(&child_mutex,
+ apr_proc_mutex_lockfile(child_mutex),
pchild));
if (rv != APR_SUCCESS) {
ap_log_error(APLOG_MARK, APLOG_EMERG, rv, ap_server_conf, APLOGNO(00280)
@@ -1338,7 +1360,7 @@ static void child_main(int child_num_arg)
apr_signal(SIGTERM, dummy_signal_handler);
/* Watch for any messages from the parent over the POD */
while (1) {
- rv = ap_mpm_podx_check(pod);
+ rv = ap_mpm_podx_check(child_pod);
if (rv == AP_MPM_PODX_NORESTART) {
/* see if termination was triggered while we slept */
switch(terminate_mode) {
@@ -1391,6 +1413,10 @@ static int make_child(server_rec *s, int slot)
/* NOTREACHED */
}
+ child_listen = mpm_listen[bucket[slot]];
+ child_mutex = accept_mutex[bucket[slot]];
+ child_pod = pod[bucket[slot]];
+
if ((pid = fork()) == -1) {
ap_log_error(APLOG_MARK, APLOG_ERR, errno, s, APLOGNO(00283)
"fork: Unable to fork new process");
@@ -1449,6 +1475,7 @@ static void startup_children(int number_to_start)
if (ap_scoreboard_image->parent[i].pid != 0) {
continue;
}
+ bucket[i] = i % num_buckets;
if (make_child(ap_server_conf, i) < 0) {
break;
}
@@ -1456,7 +1483,7 @@ static void startup_children(int number_to_start)
}
}
-static void perform_idle_server_maintenance(void)
+static void perform_idle_server_maintenance(int child_bucket)
{
int i, j;
int idle_thread_count;
@@ -1485,7 +1512,7 @@ static void perform_idle_server_maintenance(void)
int all_dead_threads = 1;
int child_threads_active = 0;
- if (i >= retained->max_daemons_limit && totally_free_length == retained->idle_spawn_rate)
+ if (i >= retained->max_daemons_limit && totally_free_length == retained->idle_spawn_rate[child_bucket])
/* short cut if all active processes have been examined and
* enough empty scoreboard slots have been found
*/
@@ -1513,7 +1540,8 @@ static void perform_idle_server_maintenance(void)
loop if no pid? not much else matters */
if (status <= SERVER_READY &&
!ps->quiescing &&
- ps->generation == retained->my_generation) {
+ ps->generation == retained->my_generation &&
+ bucket[i] == child_bucket) {
++idle_thread_count;
}
if (status >= SERVER_READY && status < SERVER_GRACEFUL) {
@@ -1522,8 +1550,8 @@ static void perform_idle_server_maintenance(void)
}
}
active_thread_count += child_threads_active;
- if (any_dead_threads && totally_free_length < retained->idle_spawn_rate
- && free_length < MAX_SPAWN_RATE
+ if (any_dead_threads && totally_free_length < retained->idle_spawn_rate[child_bucket]
+ && free_length < MAX_SPAWN_RATE/num_buckets
&& (!ps->pid /* no process in the slot */
|| ps->quiescing)) { /* or at least one is going away */
if (all_dead_threads) {
@@ -1579,12 +1607,12 @@ static void perform_idle_server_maintenance(void)
retained->max_daemons_limit = last_non_dead + 1;
- if (idle_thread_count > max_spare_threads) {
+ if (idle_thread_count > max_spare_threads/num_buckets) {
/* Kill off one child */
- ap_mpm_podx_signal(pod, AP_MPM_PODX_GRACEFUL);
- retained->idle_spawn_rate = 1;
+ ap_mpm_podx_signal(pod[child_bucket], AP_MPM_PODX_GRACEFUL);
+ retained->idle_spawn_rate[child_bucket] = 1;
}
- else if (idle_thread_count < min_spare_threads) {
+ else if (idle_thread_count < min_spare_threads/num_buckets) {
/* terminate the free list */
if (free_length == 0) { /* scoreboard is full, can't fork */
@@ -1615,13 +1643,13 @@ static void perform_idle_server_maintenance(void)
ap_server_conf, APLOGNO(00288)
"scoreboard is full, not at MaxRequestWorkers");
}
- retained->idle_spawn_rate = 1;
+ retained->idle_spawn_rate[child_bucket] = 1;
}
else {
- if (free_length > retained->idle_spawn_rate) {
- free_length = retained->idle_spawn_rate;
+ if (free_length > retained->idle_spawn_rate[child_bucket]) {
+ free_length = retained->idle_spawn_rate[child_bucket];
}
- if (retained->idle_spawn_rate >= 8) {
+ if (retained->idle_spawn_rate[child_bucket] >= 8) {
ap_log_error(APLOG_MARK, APLOG_INFO, 0,
ap_server_conf, APLOGNO(00289)
"server seems busy, (you may need "
@@ -1632,6 +1660,7 @@ static void perform_idle_server_maintenance(void)
idle_thread_count, total_non_dead);
}
for (i = 0; i < free_length; ++i) {
+ bucket[free_slots[i]] = child_bucket;
make_child(ap_server_conf, free_slots[i]);
}
/* the next time around we want to spawn twice as many if this
@@ -1640,13 +1669,13 @@ static void perform_idle_server_maintenance(void)
if (retained->hold_off_on_exponential_spawning) {
--retained->hold_off_on_exponential_spawning;
}
- else if (retained->idle_spawn_rate < MAX_SPAWN_RATE) {
- retained->idle_spawn_rate *= 2;
+ else if (retained->idle_spawn_rate[child_bucket] < MAX_SPAWN_RATE/num_buckets) {
+ retained->idle_spawn_rate[child_bucket] *= 2;
}
}
}
else {
- retained->idle_spawn_rate = 1;
+ retained->idle_spawn_rate[child_bucket] = 1;
}
}
@@ -1702,7 +1731,7 @@ static void server_main_loop(int remaining_children_to_start)
ap_scoreboard_image->parent[child_slot].quiescing = 0;
if (processed_status == APEXIT_CHILDSICK) {
/* resource shortage, minimize the fork rate */
- retained->idle_spawn_rate = 1;
+ retained->idle_spawn_rate[bucket[child_slot]] = 1;
}
else if (remaining_children_to_start
&& child_slot < ap_daemons_limit) {
@@ -1719,7 +1748,9 @@ static void server_main_loop(int remaining_children_to_start)
if (processed_status == APEXIT_CHILDSICK
&& old_gen == retained->my_generation) {
/* resource shortage, minimize the fork rate */
- retained->idle_spawn_rate = 1;
+ for (i = 0; i < num_buckets; i++) {
+ retained->idle_spawn_rate[i] = 1;
+ }
}
#if APR_HAS_OTHER_CHILD
}
@@ -1758,7 +1789,9 @@ static void server_main_loop(int remaining_children_to_start)
continue;
}
- perform_idle_server_maintenance();
+ for (i = 0; i < num_buckets; i++) {
+ perform_idle_server_maintenance(i);
+ }
}
}
@@ -1766,16 +1799,25 @@ static int worker_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s)
{
int remaining_children_to_start;
apr_status_t rv;
+ int i;
+ ap_listen_rec *lr;
ap_log_pid(pconf, ap_pid_fname);
+ bucket = apr_palloc(_pconf, sizeof(int) * ap_daemons_limit);
/* Initialize cross-process accept lock */
- rv = ap_proc_mutex_create(&accept_mutex, NULL, AP_ACCEPT_MUTEX_TYPE, NULL,
- s, _pconf, 0);
- if (rv != APR_SUCCESS) {
- mpm_state = AP_MPMQ_STOPPING;
- return DONE;
+ accept_mutex = apr_palloc(_pconf, sizeof(apr_proc_mutex_t *) * num_buckets);
+ for (i = 0; i < num_buckets; i++) {
+ rv = ap_proc_mutex_create(&accept_mutex[i], NULL, AP_ACCEPT_MUTEX_TYPE, NULL,
+ s, _pconf, 0);
+ if (rv != APR_SUCCESS) {
+ mpm_state = AP_MPMQ_STOPPING;
+ return DONE;
+ }
}
+ for (lr = ap_listeners; lr; lr = lr->next) {
+ apr_socket_close(lr->sd);
+ }
if (!retained->is_graceful) {
if (ap_run_pre_mpm(s->process->pool, SB_SHARED) != OK) {
@@ -1791,8 +1833,8 @@ static int worker_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s)
restart_pending = shutdown_pending = 0;
set_signals();
/* Don't thrash... */
- if (max_spare_threads < min_spare_threads + threads_per_child)
- max_spare_threads = min_spare_threads + threads_per_child;
+ if (max_spare_threads < min_spare_threads + threads_per_child * num_buckets)
+ max_spare_threads = min_spare_threads + threads_per_child * num_buckets;
/* If we're doing a graceful_restart then we're going to see a lot
* of children exiting immediately when we get into the main loop
@@ -1825,7 +1867,7 @@ static int worker_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s)
ap_log_command_line(plog, s);
ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf, APLOGNO(00294)
"Accept mutex: %s (default: %s)",
- apr_proc_mutex_name(accept_mutex),
+ apr_proc_mutex_name(accept_mutex[0]),
apr_proc_mutex_defname());
mpm_state = AP_MPMQ_RUNNING;
@@ -1836,7 +1878,9 @@ static int worker_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s)
/* Time to shut down:
* Kill child processes, tell them to call child_exit, etc...
*/
- ap_mpm_podx_killpg(pod, ap_daemons_limit, AP_MPM_PODX_RESTART);
+ for (i = 0; i < num_buckets; i++) {
+ ap_mpm_podx_killpg(pod[i], ap_daemons_limit, AP_MPM_PODX_RESTART);
+ }
ap_reclaim_child_processes(1, /* Start with SIGTERM */
worker_note_child_killed);
@@ -1857,7 +1901,9 @@ static int worker_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s)
/* Close our listeners, and then ask our children to do same */
ap_close_listeners();
- ap_mpm_podx_killpg(pod, ap_daemons_limit, AP_MPM_PODX_GRACEFUL);
+ for (i = 0; i < num_buckets; i++) {
+ ap_mpm_podx_killpg(pod[i], ap_daemons_limit, AP_MPM_PODX_GRACEFUL);
+ }
ap_relieve_child_processes(worker_note_child_killed);
if (!child_fatal) {
@@ -1897,7 +1943,9 @@ static int worker_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s)
* way, try and make sure that all of our processes are
* really dead.
*/
- ap_mpm_podx_killpg(pod, ap_daemons_limit, AP_MPM_PODX_RESTART);
+ for (i = 0; i < num_buckets; i++) {
+ ap_mpm_podx_killpg(pod[i], ap_daemons_limit, AP_MPM_PODX_RESTART);
+ }
ap_reclaim_child_processes(1, worker_note_child_killed);
return DONE;
@@ -1922,8 +1970,9 @@ static int worker_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s)
ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf, APLOGNO(00297)
AP_SIG_GRACEFUL_STRING " received. Doing graceful restart");
/* wake up the children...time to die. But we'll have more soon */
- ap_mpm_podx_killpg(pod, ap_daemons_limit, AP_MPM_PODX_GRACEFUL);
-
+ for (i = 0; i < num_buckets; i++) {
+ ap_mpm_podx_killpg(pod[i], ap_daemons_limit, AP_MPM_PODX_GRACEFUL);
+ }
/* This is mostly for debugging... so that we know what is still
* gracefully dealing with existing request.
@@ -1935,7 +1984,9 @@ static int worker_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s)
* and a SIGHUP, we may as well use the same signal, because some user
* pthreads are stealing signals from us left and right.
*/
- ap_mpm_podx_killpg(pod, ap_daemons_limit, AP_MPM_PODX_RESTART);
+ for (i = 0; i < num_buckets; i++) {
+ ap_mpm_podx_killpg(pod[i], ap_daemons_limit, AP_MPM_PODX_RESTART);
+ }
ap_reclaim_child_processes(1, /* Start with SIGTERM */
worker_note_child_killed);
@@ -1954,6 +2005,8 @@ static int worker_open_logs(apr_pool_t *p, apr_pool_t *plog, apr_pool_t *ptemp,
int startup = 0;
int level_flags = 0;
apr_status_t rv;
+ int i;
+ int num_of_cores = 0;
pconf = p;
@@ -1963,19 +2016,42 @@ static int worker_open_logs(apr_pool_t *p, apr_pool_t *plog, apr_pool_t *ptemp,
level_flags |= APLOG_STARTUP;
}
+ enable_default_listener = 0;
if ((num_listensocks = ap_setup_listeners(ap_server_conf)) < 1) {
ap_log_error(APLOG_MARK, APLOG_ALERT | level_flags, 0,
(startup ? NULL : s),
"no listening sockets available, shutting down");
return DONE;
}
+ enable_default_listener = 1;
+ if (have_so_reuseport) {
+#ifdef _SC_NPROCESSORS_ONLN
+ num_of_cores = sysconf(_SC_NPROCESSORS_ONLN);
+#else
+ num_of_cores = 1;
+#endif
+ if (num_of_cores > 8) {
+ num_buckets = num_of_cores/8;
+ }
+ else {
+ num_buckets = 1;
+ }
+ }
+ else {
+ num_buckets = 1;
+ }
+
+ ap_duplicate_listeners(ap_server_conf, pconf, num_buckets);
+ pod = apr_palloc(pconf, sizeof(ap_pod_t *) * num_buckets);
if (!one_process) {
- if ((rv = ap_mpm_podx_open(pconf, &pod))) {
- ap_log_error(APLOG_MARK, APLOG_CRIT | level_flags, rv,
- (startup ? NULL : s),
- "could not open pipe-of-death");
- return DONE;
+ for (i = 0; i < num_buckets; i++) {
+ if ((rv = ap_mpm_podx_open(pconf, &pod[i]))) {
+ ap_log_error(APLOG_MARK, APLOG_CRIT | level_flags, rv,
+ (startup ? NULL : s),
+ "could not open pipe-of-death");
+ return DONE;
+ }
}
}
return OK;
@@ -1987,6 +2063,7 @@ static int worker_pre_config(apr_pool_t *pconf, apr_pool_t *plog,
int no_detach, debug, foreground;
apr_status_t rv;
const char *userdata_key = "mpm_worker_module";
+ int i;
mpm_state = AP_MPMQ_STARTING;
@@ -2009,7 +2086,6 @@ static int worker_pre_config(apr_pool_t *pconf, apr_pool_t *plog,
if (!retained) {
retained = ap_retained_data_create(userdata_key, sizeof(*retained));
retained->max_daemons_limit = -1;
- retained->idle_spawn_rate = 1;
}
++retained->module_loads;
if (retained->module_loads == 2) {
@@ -2023,6 +2099,10 @@ static int worker_pre_config(apr_pool_t *pconf, apr_pool_t *plog,
"apr_proc_detach failed");
return HTTP_INTERNAL_SERVER_ERROR;
}
+ retained->idle_spawn_rate = apr_palloc(pconf, sizeof(int) * num_buckets);
+ for (i = 0; i< num_buckets; i++) {
+ retained->idle_spawn_rate[i] = 1;
+ }
}
}
diff --git a/server/mpm_unix.c b/server/mpm_unix.c
index 0000cb6672..97e3e65dff 100644
--- a/server/mpm_unix.c
+++ b/server/mpm_unix.c
@@ -615,6 +615,7 @@ static apr_status_t dummy_connection(ap_pod_t *pod)
apr_pool_t *p;
apr_size_t len;
ap_listen_rec *lp;
+ int i;
/* create a temporary pool for the socket. pconf stays around too long */
rv = apr_pool_create(&p, pod->p);
@@ -626,87 +627,89 @@ static apr_status_t dummy_connection(ap_pod_t *pod)
* plain-HTTP, not SSL; using an SSL port would either be
* expensive to do correctly (performing a complete SSL handshake)
* or cause log spam by doing incorrectly (simply sending EOF). */
- lp = ap_listeners;
- while (lp && lp->protocol && strcasecmp(lp->protocol, "http") != 0) {
- lp = lp->next;
- }
- if (!lp) {
- lp = ap_listeners;
- }
+ for (i = 0; i < num_buckets; i++) {
+ lp = mpm_listen[i];
+ while (lp && lp->protocol && strcasecmp(lp->protocol, "http") != 0) {
+ lp = lp->next;
+ }
+ if (!lp) {
+ lp = mpm_listen[i];
+ }
- rv = apr_socket_create(&sock, lp->bind_addr->family, SOCK_STREAM, 0, p);
- if (rv != APR_SUCCESS) {
- ap_log_error(APLOG_MARK, APLOG_WARNING, rv, ap_server_conf, APLOGNO(00054)
- "get socket to connect to listener");
- apr_pool_destroy(p);
- return rv;
- }
+ rv = apr_socket_create(&sock, lp->bind_addr->family, SOCK_STREAM, 0, p);
+ if (rv != APR_SUCCESS) {
+ ap_log_error(APLOG_MARK, APLOG_WARNING, rv, ap_server_conf, APLOGNO(00054)
+ "get socket to connect to listener");
+ apr_pool_destroy(p);
+ return rv;
+ }
- /* on some platforms (e.g., FreeBSD), the kernel won't accept many
- * queued connections before it starts blocking local connects...
- * we need to keep from blocking too long and instead return an error,
- * because the MPM won't want to hold up a graceful restart for a
- * long time
- */
- rv = apr_socket_timeout_set(sock, apr_time_from_sec(3));
- if (rv != APR_SUCCESS) {
- ap_log_error(APLOG_MARK, APLOG_WARNING, rv, ap_server_conf, APLOGNO(00055)
- "set timeout on socket to connect to listener");
- apr_socket_close(sock);
- apr_pool_destroy(p);
- return rv;
- }
+ /* on some platforms (e.g., FreeBSD), the kernel won't accept many
+ * queued connections before it starts blocking local connects...
+ * we need to keep from blocking too long and instead return an error,
+ * because the MPM won't want to hold up a graceful restart for a
+ * long time
+ */
+ rv = apr_socket_timeout_set(sock, apr_time_from_sec(3));
+ if (rv != APR_SUCCESS) {
+ ap_log_error(APLOG_MARK, APLOG_WARNING, rv, ap_server_conf, APLOGNO(00055)
+ "set timeout on socket to connect to listener");
+ apr_socket_close(sock);
+ apr_pool_destroy(p);
+ return rv;
+ }
- rv = apr_socket_connect(sock, lp->bind_addr);
- if (rv != APR_SUCCESS) {
- int log_level = APLOG_WARNING;
-
- if (APR_STATUS_IS_TIMEUP(rv)) {
- /* probably some server processes bailed out already and there
- * is nobody around to call accept and clear out the kernel
- * connection queue; usually this is not worth logging
- */
- log_level = APLOG_DEBUG;
+ rv = apr_socket_connect(sock, lp->bind_addr);
+ if (rv != APR_SUCCESS) {
+ int log_level = APLOG_WARNING;
+
+ if (APR_STATUS_IS_TIMEUP(rv)) {
+ /* probably some server processes bailed out already and there
+ * is nobody around to call accept and clear out the kernel
+ * connection queue; usually this is not worth logging
+ */
+ log_level = APLOG_DEBUG;
+ }
+
+ ap_log_error(APLOG_MARK, log_level, rv, ap_server_conf, APLOGNO(00056)
+ "connect to listener on %pI", lp->bind_addr);
+ apr_pool_destroy(p);
+ return rv;
}
- ap_log_error(APLOG_MARK, log_level, rv, ap_server_conf, APLOGNO(00056)
- "connect to listener on %pI", lp->bind_addr);
- apr_pool_destroy(p);
- return rv;
- }
+ if (lp->protocol && strcasecmp(lp->protocol, "https") == 0) {
+ /* Send a TLS 1.0 close_notify alert. This is perhaps the
+ * "least wrong" way to open and cleanly terminate an SSL
+ * connection. It should "work" without noisy error logs if
+ * the server actually expects SSLv3/TLSv1. With
+ * SSLv23_server_method() OpenSSL's SSL_accept() fails
+ * ungracefully on receipt of this message, since it requires
+ * an 11-byte ClientHello message and this is too short. */
+ static const unsigned char tls10_close_notify[7] = {
+ '\x15', /* TLSPlainText.type = Alert (21) */
+ '\x03', '\x01', /* TLSPlainText.version = {3, 1} */
+ '\x00', '\x02', /* TLSPlainText.length = 2 */
+ '\x01', /* Alert.level = warning (1) */
+ '\x00' /* Alert.description = close_notify (0) */
+ };
+ data = (const char *)tls10_close_notify;
+ len = sizeof(tls10_close_notify);
+ }
+ else /* ... XXX other request types here? */ {
+ /* Create an HTTP request string. We include a User-Agent so
+ * that adminstrators can track down the cause of the
+ * odd-looking requests in their logs. A complete request is
+ * used since kernel-level filtering may require that much
+ * data before returning from accept(). */
+ data = apr_pstrcat(p, "OPTIONS * HTTP/1.0\r\nUser-Agent: ",
+ ap_get_server_description(),
+ " (internal dummy connection)\r\n\r\n", NULL);
+ len = strlen(data);
+ }
- if (lp->protocol && strcasecmp(lp->protocol, "https") == 0) {
- /* Send a TLS 1.0 close_notify alert. This is perhaps the
- * "least wrong" way to open and cleanly terminate an SSL
- * connection. It should "work" without noisy error logs if
- * the server actually expects SSLv3/TLSv1. With
- * SSLv23_server_method() OpenSSL's SSL_accept() fails
- * ungracefully on receipt of this message, since it requires
- * an 11-byte ClientHello message and this is too short. */
- static const unsigned char tls10_close_notify[7] = {
- '\x15', /* TLSPlainText.type = Alert (21) */
- '\x03', '\x01', /* TLSPlainText.version = {3, 1} */
- '\x00', '\x02', /* TLSPlainText.length = 2 */
- '\x01', /* Alert.level = warning (1) */
- '\x00' /* Alert.description = close_notify (0) */
- };
- data = (const char *)tls10_close_notify;
- len = sizeof(tls10_close_notify);
- }
- else /* ... XXX other request types here? */ {
- /* Create an HTTP request string. We include a User-Agent so
- * that adminstrators can track down the cause of the
- * odd-looking requests in their logs. A complete request is
- * used since kernel-level filtering may require that much
- * data before returning from accept(). */
- data = apr_pstrcat(p, "OPTIONS * HTTP/1.0\r\nUser-Agent: ",
- ap_get_server_description(),
- " (internal dummy connection)\r\n\r\n", NULL);
- len = strlen(data);
+ apr_socket_send(sock, data, &len);
+ apr_socket_close(sock);
}
-
- apr_socket_send(sock, data, &len);
- apr_socket_close(sock);
apr_pool_destroy(p);
return rv;