diff options
author | Jim Jagielski <jim@apache.org> | 2014-06-03 15:07:29 +0200 |
---|---|---|
committer | Jim Jagielski <jim@apache.org> | 2014-06-03 15:07:29 +0200 |
commit | c539206da2d9e8d5d964e84c7497886c74f1a7f9 (patch) | |
tree | bf2d70a0adc86d730f516e93485c6ad913c03783 | |
parent | mod_proxy_http: avoid (unlikely) access to freed memory. (diff) | |
download | apache2-c539206da2d9e8d5d964e84c7497886c74f1a7f9.tar.xz apache2-c539206da2d9e8d5d964e84c7497886c74f1a7f9.zip |
Optimize w/ duplicated listeners and use of SO_REUSEPORT
where available.
git-svn-id: https://svn.apache.org/repos/asf/httpd/httpd/trunk@1599531 13f79535-47bb-0310-9956-ffa450edef68
-rw-r--r-- | CHANGES | 4 | ||||
-rw-r--r-- | include/ap_listen.h | 16 | ||||
-rw-r--r-- | server/listen.c | 95 | ||||
-rw-r--r-- | server/mpm/event/event.c | 149 | ||||
-rw-r--r-- | server/mpm/prefork/prefork.c | 127 | ||||
-rw-r--r-- | server/mpm/worker/worker.c | 184 | ||||
-rw-r--r-- | server/mpm_unix.c | 151 |
7 files changed, 528 insertions, 198 deletions
@@ -1,6 +1,10 @@ -*- coding: utf-8 -*- Changes with Apache 2.5.0 + *) MPMs: Support SO_REUSEPORT to create multiple duplicated listener + records for scalability. [Yingqi Lu <yingqi.lu@intel.com>, + Jeff Trawick, Jim Jagielski] + *) mod_proxy_html: support automatic detection of doctype and processing of FPIs. PR56285 [Micha Lenk <micha lenk info>, Nick Kew] diff --git a/include/ap_listen.h b/include/ap_listen.h index 21101cd8d3..4538ef9648 100644 --- a/include/ap_listen.h +++ b/include/ap_listen.h @@ -78,6 +78,14 @@ struct ap_listen_rec { */ AP_DECLARE_DATA extern ap_listen_rec *ap_listeners; +AP_DECLARE_DATA extern ap_listen_rec **mpm_listen; + +AP_DECLARE_DATA extern int enable_default_listener; + +AP_DECLARE_DATA extern int num_buckets; + +AP_DECLARE_DATA extern int have_so_reuseport; + /** * Setup all of the defaults for the listener list */ @@ -91,6 +99,14 @@ AP_DECLARE(void) ap_listen_pre_config(void); */ AP_DECLARE(int) ap_setup_listeners(server_rec *s); +/**This function duplicates ap_listeners. + * @param s The global server_rec + * @param p The config pool + * @param num_buckets The total number of listener buckets. +**/ +AP_DECLARE(apr_status_t) ap_duplicate_listeners(server_rec *s, apr_pool_t *p, int num_buckets); + + /** * Loop through the global ap_listen_rec list and close each of the sockets. */ diff --git a/server/listen.c b/server/listen.c index f9c4266d3a..fb404d9fd6 100644 --- a/server/listen.c +++ b/server/listen.c @@ -38,6 +38,11 @@ AP_DECLARE_DATA ap_listen_rec *ap_listeners = NULL; +AP_DECLARE_DATA ap_listen_rec **mpm_listen = NULL; +AP_DECLARE_DATA int enable_default_listener = 1; +AP_DECLARE_DATA int num_buckets = 1; +AP_DECLARE_DATA int have_so_reuseport = 1; + static ap_listen_rec *old_listeners; static int ap_listenbacklog; static int send_buffer_size; @@ -124,6 +129,24 @@ static apr_status_t make_sock(apr_pool_t *p, ap_listen_rec *server, int do_bind_ ap_sock_disable_nagle(s); #endif +#ifndef SO_REUSEPORT +#define SO_REUSEPORT 15 +#endif + int thesock; + apr_os_sock_get(&thesock, s); + if (setsockopt(thesock, SOL_SOCKET, SO_REUSEPORT, (void *)&one, sizeof(int)) < 0) { + if (errno == ENOPROTOOPT) { + have_so_reuseport = 0; + } /* Check if SO_REUSEPORT is supported by the running Linux Kernel.*/ + else { + ap_log_perror(APLOG_MARK, APLOG_CRIT, stat, p, APLOGNO() + "make_sock: for address %pI, apr_socket_opt_set: (SO_REUSEPORT)", + server->bind_addr); + apr_socket_close(s); + return errno; + } + } + if (do_bind_listen) { #if APR_HAVE_IPV6 if (server->bind_addr->family == APR_INET6) { @@ -179,7 +202,7 @@ static apr_status_t make_sock(apr_pool_t *p, ap_listen_rec *server, int do_bind_ #endif server->sd = s; - server->active = 1; + server->active = enable_default_listener; server->accept_func = NULL; @@ -575,7 +598,7 @@ static int open_listeners(apr_pool_t *pool) } } #endif - if (make_sock(pool, lr, 1) == APR_SUCCESS) { + if (make_sock(pool, lr, enable_default_listener) == APR_SUCCESS) { ++num_open; } else { @@ -727,13 +750,73 @@ AP_DECLARE(int) ap_setup_listeners(server_rec *s) return num_listeners; } +AP_DECLARE(apr_status_t) ap_duplicate_listeners(server_rec *s, apr_pool_t *p, + int num_buckets) { + int i; + apr_status_t stat; + int use_nonblock = 0; + ap_listen_rec *lr; + + mpm_listen = apr_palloc(p, sizeof(ap_listen_rec*) * num_buckets); + for (i = 0; i < num_buckets; i++) { + lr = ap_listeners; + ap_listen_rec *last = NULL; + while (lr) { + ap_listen_rec *duplr; + char *hostname; + apr_port_t port; + apr_sockaddr_t *sa; + duplr = apr_palloc(p, sizeof(ap_listen_rec)); + duplr->slave = NULL; + duplr->protocol = apr_pstrdup(p, lr->protocol); + hostname = apr_pstrdup(p, lr->bind_addr->hostname); + port = lr->bind_addr->port; + apr_sockaddr_info_get(&sa, hostname, APR_UNSPEC, port, 0, p); + duplr->bind_addr = sa; + duplr->next = NULL; + apr_socket_t *temps = duplr->sd; + if ((stat = apr_socket_create(&duplr->sd, duplr->bind_addr->family, + SOCK_STREAM, 0, p)) != APR_SUCCESS) { + ap_log_perror(APLOG_MARK, APLOG_CRIT, 0, p, APLOGNO() + "ap_duplicate_socket: for address %pI, " + "cannot duplicate a new socket!", + duplr->bind_addr); + return stat; + } + make_sock(p, duplr, 1); +#if AP_NONBLOCK_WHEN_MULTI_LISTEN + use_nonblock = (ap_listeners && ap_listeners->next); + if ((stat = apr_socket_opt_set(duplr->sd, APR_SO_NONBLOCK, use_nonblock)) + != APR_SUCCESS) { + ap_log_perror(APLOG_MARK, APLOG_CRIT, stat, p, APLOGNO() + "unable to control socket non-blocking status"); + return stat; + } +#endif + ap_apply_accept_filter(p, duplr, s); + + if (last == NULL) { + mpm_listen[i] = last = duplr; + } + else { + last->next = duplr; + last = duplr; + } + lr = lr->next; + } + } + return APR_SUCCESS; +} + AP_DECLARE_NONSTD(void) ap_close_listeners(void) { ap_listen_rec *lr; - - for (lr = ap_listeners; lr; lr = lr->next) { - apr_socket_close(lr->sd); - lr->active = 0; + int i; + for (i = 0; i < num_buckets; i++) { + for (lr = mpm_listen[i]; lr; lr = lr->next) { + apr_socket_close(lr->sd); + lr->active = 0; + } } } diff --git a/server/mpm/event/event.c b/server/mpm/event/event.c index ae247cd13c..779499a40e 100644 --- a/server/mpm/event/event.c +++ b/server/mpm/event/event.c @@ -59,6 +59,8 @@ #include "apr_want.h" #include "apr_version.h" +#include <stdlib.h> + #if APR_HAVE_UNISTD_H #include <unistd.h> #endif @@ -349,7 +351,7 @@ typedef struct event_retained_data { * doubled up to MAX_SPAWN_RATE, and reset only when a cycle goes by * without the need to spawn. */ - int idle_spawn_rate; + int *idle_spawn_rate; #ifndef MAX_SPAWN_RATE #define MAX_SPAWN_RATE (32) #endif @@ -359,7 +361,10 @@ static event_retained_data *retained; #define ID_FROM_CHILD_THREAD(c, t) ((c * thread_limit) + t) -static ap_pod_t *pod; +static ap_pod_t **pod; +static ap_pod_t *child_pod; +static ap_listen_rec *child_listen; +static int *bucket; /* bucket array for the httpd child processes */ /* The event MPM respects a couple of runtime flags that can aid * in debugging. Setting the -DNO_DETACH flag will prevent the root process @@ -1292,7 +1297,7 @@ static apr_status_t init_pollset(apr_pool_t *p) TO_QUEUE_INIT(short_linger_q); listener_pollfd = apr_palloc(p, sizeof(apr_pollfd_t) * num_listensocks); - for (lr = ap_listeners; lr != NULL; lr = lr->next, i++) { + for (lr = child_listen; lr != NULL; lr = lr->next, i++) { apr_pollfd_t *pfd; AP_DEBUG_ASSERT(i < num_listensocks); pfd = &listener_pollfd[i]; @@ -2421,6 +2426,8 @@ static void child_main(int child_num_arg) apr_threadattr_t *thread_attr; apr_thread_t *start_thread_id; apr_pool_t *pskip; + int i; + ap_listen_rec *lr; mpm_state = AP_MPMQ_STARTING; /* for benefit of any hooks that run as this * child initializes @@ -2429,6 +2436,19 @@ static void child_main(int child_num_arg) ap_fatal_signal_child_setup(ap_server_conf); apr_pool_create(&pchild, pconf); + /* close unused listeners and pods */ + for (i = 0; i < num_buckets; i++) { + if (i != bucket[child_num_arg]) { + lr = mpm_listen[i]; + while(lr) { + apr_socket_close(lr->sd); + lr = lr->next; + } + mpm_listen[i]->active = 0; + ap_mpm_podx_close(pod[i]); + } + } + /*stuff to do before we switch id's, so we have permissions. */ ap_reopen_scoreboard(pchild, NULL, 0); @@ -2539,7 +2559,7 @@ static void child_main(int child_num_arg) apr_signal(SIGTERM, dummy_signal_handler); /* Watch for any messages from the parent over the POD */ while (1) { - rv = ap_mpm_podx_check(pod); + rv = ap_mpm_podx_check(child_pod); if (rv == AP_MPM_PODX_NORESTART) { /* see if termination was triggered while we slept */ switch (terminate_mode) { @@ -2592,6 +2612,9 @@ static int make_child(server_rec * s, int slot) /* NOTREACHED */ } + child_listen = mpm_listen[bucket[slot]]; + child_pod = pod[bucket[slot]]; + if ((pid = fork()) == -1) { ap_log_error(APLOG_MARK, APLOG_ERR, errno, s, APLOGNO(00481) "fork: Unable to fork new process"); @@ -2652,6 +2675,7 @@ static void startup_children(int number_to_start) if (ap_scoreboard_image->parent[i].pid != 0) { continue; } + bucket[i] = i % num_buckets; if (make_child(ap_server_conf, i) < 0) { break; } @@ -2659,7 +2683,7 @@ static void startup_children(int number_to_start) } } -static void perform_idle_server_maintenance(void) +static void perform_idle_server_maintenance(int child_bucket) { int i, j; int idle_thread_count; @@ -2689,7 +2713,7 @@ static void perform_idle_server_maintenance(void) int child_threads_active = 0; if (i >= retained->max_daemons_limit - && totally_free_length == retained->idle_spawn_rate) + && totally_free_length == retained->idle_spawn_rate[child_bucket]) /* short cut if all active processes have been examined and * enough empty scoreboard slots have been found */ @@ -2716,7 +2740,8 @@ static void perform_idle_server_maintenance(void) if (ps->pid != 0) { /* XXX just set all_dead_threads in outer for loop if no pid? not much else matters */ if (status <= SERVER_READY && !ps->quiescing && !ps->not_accepting - && ps->generation == retained->my_generation) + && ps->generation == retained->my_generation && + bucket[i] == child_bucket) { ++idle_thread_count; } @@ -2727,8 +2752,8 @@ static void perform_idle_server_maintenance(void) } active_thread_count += child_threads_active; if (any_dead_threads - && totally_free_length < retained->idle_spawn_rate - && free_length < MAX_SPAWN_RATE + && totally_free_length < retained->idle_spawn_rate[child_bucket] + && free_length < MAX_SPAWN_RATE/num_buckets && (!ps->pid /* no process in the slot */ || ps->quiescing)) { /* or at least one is going away */ if (all_dead_threads) { @@ -2784,12 +2809,12 @@ static void perform_idle_server_maintenance(void) retained->max_daemons_limit = last_non_dead + 1; - if (idle_thread_count > max_spare_threads) { + if (idle_thread_count > max_spare_threads/num_buckets) { /* Kill off one child */ - ap_mpm_podx_signal(pod, AP_MPM_PODX_GRACEFUL); - retained->idle_spawn_rate = 1; + ap_mpm_podx_signal(pod[child_bucket], AP_MPM_PODX_GRACEFUL); + retained->idle_spawn_rate[child_bucket] = 1; } - else if (idle_thread_count < min_spare_threads) { + else if (idle_thread_count < min_spare_threads/num_buckets) { /* terminate the free list */ if (free_length == 0) { /* scoreboard is full, can't fork */ @@ -2807,13 +2832,13 @@ static void perform_idle_server_maintenance(void) ap_log_error(APLOG_MARK, APLOG_ERR, 0, ap_server_conf, APLOGNO(00485) "scoreboard is full, not at MaxRequestWorkers"); } - retained->idle_spawn_rate = 1; + retained->idle_spawn_rate[child_bucket] = 1; } else { - if (free_length > retained->idle_spawn_rate) { - free_length = retained->idle_spawn_rate; + if (free_length > retained->idle_spawn_rate[child_bucket]) { + free_length = retained->idle_spawn_rate[child_bucket]; } - if (retained->idle_spawn_rate >= 8) { + if (retained->idle_spawn_rate[child_bucket] >= 8) { ap_log_error(APLOG_MARK, APLOG_INFO, 0, ap_server_conf, APLOGNO(00486) "server seems busy, (you may need " "to increase StartServers, ThreadsPerChild " @@ -2823,6 +2848,7 @@ static void perform_idle_server_maintenance(void) idle_thread_count, total_non_dead); } for (i = 0; i < free_length; ++i) { + bucket[free_slots[i]] = child_bucket; make_child(ap_server_conf, free_slots[i]); } /* the next time around we want to spawn twice as many if this @@ -2831,13 +2857,13 @@ static void perform_idle_server_maintenance(void) if (retained->hold_off_on_exponential_spawning) { --retained->hold_off_on_exponential_spawning; } - else if (retained->idle_spawn_rate < MAX_SPAWN_RATE) { - retained->idle_spawn_rate *= 2; + else if (retained->idle_spawn_rate[child_bucket] < MAX_SPAWN_RATE/num_buckets) { + retained->idle_spawn_rate[child_bucket] *= 2; } } } else { - retained->idle_spawn_rate = 1; + retained->idle_spawn_rate[child_bucket] = 1; } } @@ -2894,7 +2920,7 @@ static void server_main_loop(int remaining_children_to_start) ap_scoreboard_image->parent[child_slot].quiescing = 0; if (processed_status == APEXIT_CHILDSICK) { /* resource shortage, minimize the fork rate */ - retained->idle_spawn_rate = 1; + retained->idle_spawn_rate[bucket[child_slot]] = 1; } else if (remaining_children_to_start && child_slot < ap_daemons_limit) { @@ -2912,7 +2938,9 @@ static void server_main_loop(int remaining_children_to_start) if (processed_status == APEXIT_CHILDSICK && old_gen == retained->my_generation) { /* resource shortage, minimize the fork rate */ - retained->idle_spawn_rate = 1; + for (i = 0; i < num_buckets; i++) { + retained->idle_spawn_rate[i] = 1; + } } #if APR_HAS_OTHER_CHILD } @@ -2951,7 +2979,9 @@ static void server_main_loop(int remaining_children_to_start) continue; } - perform_idle_server_maintenance(); + for (i = 0; i < num_buckets; i++) { + perform_idle_server_maintenance(i); + } } } @@ -2959,6 +2989,8 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s) { int remaining_children_to_start; + int i; + ap_log_pid(pconf, ap_pid_fname); if (!retained->is_graceful) { @@ -2972,11 +3004,13 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s) ap_scoreboard_image->global->running_generation = retained->my_generation; } + bucket = apr_palloc(_pconf, sizeof(int) * ap_daemons_limit); + restart_pending = shutdown_pending = 0; set_signals(); /* Don't thrash... */ - if (max_spare_threads < min_spare_threads + threads_per_child) - max_spare_threads = min_spare_threads + threads_per_child; + if (max_spare_threads < min_spare_threads + threads_per_child * num_buckets) + max_spare_threads = min_spare_threads + threads_per_child * num_buckets; /* If we're doing a graceful_restart then we're going to see a lot * of children exiting immediately when we get into the main loop @@ -3017,7 +3051,9 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s) /* Time to shut down: * Kill child processes, tell them to call child_exit, etc... */ - ap_mpm_podx_killpg(pod, ap_daemons_limit, AP_MPM_PODX_RESTART); + for (i = 0; i < num_buckets; i++) { + ap_mpm_podx_killpg(pod[i], ap_daemons_limit, AP_MPM_PODX_RESTART); + } ap_reclaim_child_processes(1, /* Start with SIGTERM */ event_note_child_killed); @@ -3038,7 +3074,9 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s) /* Close our listeners, and then ask our children to do same */ ap_close_listeners(); - ap_mpm_podx_killpg(pod, ap_daemons_limit, AP_MPM_PODX_GRACEFUL); + for (i = 0; i < num_buckets; i++) { + ap_mpm_podx_killpg(pod[i], ap_daemons_limit, AP_MPM_PODX_GRACEFUL); + } ap_relieve_child_processes(event_note_child_killed); if (!child_fatal) { @@ -3078,7 +3116,9 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s) * way, try and make sure that all of our processes are * really dead. */ - ap_mpm_podx_killpg(pod, ap_daemons_limit, AP_MPM_PODX_RESTART); + for (i = 0; i < num_buckets; i++) { + ap_mpm_podx_killpg(pod[i], ap_daemons_limit, AP_MPM_PODX_RESTART); + } ap_reclaim_child_processes(1, event_note_child_killed); return DONE; @@ -3104,8 +3144,9 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s) AP_SIG_GRACEFUL_STRING " received. Doing graceful restart"); /* wake up the children...time to die. But we'll have more soon */ - ap_mpm_podx_killpg(pod, ap_daemons_limit, AP_MPM_PODX_GRACEFUL); - + for (i = 0; i < num_buckets; i++) { + ap_mpm_podx_killpg(pod[i], ap_daemons_limit, AP_MPM_PODX_GRACEFUL); + } /* This is mostly for debugging... so that we know what is still * gracefully dealing with existing request. @@ -3117,7 +3158,9 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s) * and a SIGHUP, we may as well use the same signal, because some user * pthreads are stealing signals from us left and right. */ - ap_mpm_podx_killpg(pod, ap_daemons_limit, AP_MPM_PODX_RESTART); + for (i = 0; i < num_buckets; i++) { + ap_mpm_podx_killpg(pod[i], ap_daemons_limit, AP_MPM_PODX_RESTART); + } ap_reclaim_child_processes(1, /* Start with SIGTERM */ event_note_child_killed); @@ -3137,6 +3180,8 @@ static int event_open_logs(apr_pool_t * p, apr_pool_t * plog, int startup = 0; int level_flags = 0; apr_status_t rv; + int i; + int num_of_cores = 0; pconf = p; @@ -3146,6 +3191,8 @@ static int event_open_logs(apr_pool_t * p, apr_pool_t * plog, level_flags |= APLOG_STARTUP; } + enable_default_listener = 0; + if ((num_listensocks = ap_setup_listeners(ap_server_conf)) < 1) { ap_log_error(APLOG_MARK, APLOG_ALERT | level_flags, 0, (startup ? NULL : s), @@ -3153,12 +3200,36 @@ static int event_open_logs(apr_pool_t * p, apr_pool_t * plog, return DONE; } + enable_default_listener = 1; + if (have_so_reuseport) { +#ifdef _SC_NPROCESSORS_ONLN + num_of_cores = sysconf(_SC_NPROCESSORS_ONLN); +#else + num_of_cores = 1; +#endif + if (num_of_cores > 8) { + num_buckets = num_of_cores/8; + } + else { + num_buckets = 1; + } + } + else { + num_buckets = 1; + } + + ap_duplicate_listeners(ap_server_conf, pconf, num_buckets); + + pod = apr_palloc(pconf, sizeof(ap_pod_t *) * num_buckets); + if (!one_process) { - if ((rv = ap_mpm_podx_open(pconf, &pod))) { - ap_log_error(APLOG_MARK, APLOG_CRIT | level_flags, rv, - (startup ? NULL : s), - "could not open pipe-of-death"); - return DONE; + for (i = 0; i < num_buckets; i++) { + if ((rv = ap_mpm_podx_open(pconf, &pod[i]))) { + ap_log_error(APLOG_MARK, APLOG_CRIT | level_flags, rv, + (startup ? NULL : s), + "could not open pipe-of-death"); + return DONE; + } } } /* for skiplist */ @@ -3172,6 +3243,7 @@ static int event_pre_config(apr_pool_t * pconf, apr_pool_t * plog, int no_detach, debug, foreground; apr_status_t rv; const char *userdata_key = "mpm_event_module"; + int i; mpm_state = AP_MPMQ_STARTING; @@ -3192,7 +3264,6 @@ static int event_pre_config(apr_pool_t * pconf, apr_pool_t * plog, if (!retained) { retained = ap_retained_data_create(userdata_key, sizeof(*retained)); retained->max_daemons_limit = -1; - retained->idle_spawn_rate = 1; } ++retained->module_loads; if (retained->module_loads == 2) { @@ -3206,6 +3277,10 @@ static int event_pre_config(apr_pool_t * pconf, apr_pool_t * plog, "atomics not working as expected - add32 of negative number"); return HTTP_INTERNAL_SERVER_ERROR; } + retained->idle_spawn_rate = apr_palloc(pconf, sizeof(int) * num_buckets); + for (i = 0; i< num_buckets; i++) { + retained->idle_spawn_rate[i] = 1; + } rv = apr_pollset_create(&event_pollset, 1, plog, APR_POLLSET_THREADSAFE | APR_POLLSET_NOCOPY); if (rv != APR_SUCCESS) { diff --git a/server/mpm/prefork/prefork.c b/server/mpm/prefork/prefork.c index ae0fd37461..05a782e69a 100644 --- a/server/mpm/prefork/prefork.c +++ b/server/mpm/prefork/prefork.c @@ -48,6 +48,8 @@ #include "ap_mmn.h" #include "apr_poll.h" +#include <stdlib.h> + #ifdef HAVE_TIME_H #include <time.h> #endif @@ -86,14 +88,19 @@ /* config globals */ -static apr_proc_mutex_t *accept_mutex; +static apr_proc_mutex_t **accept_mutex; static int ap_daemons_to_start=0; static int ap_daemons_min_free=0; static int ap_daemons_max_free=0; static int ap_daemons_limit=0; /* MaxRequestWorkers */ static int server_limit = 0; static int mpm_state = AP_MPMQ_STARTING; -static ap_pod_t *pod; +static ap_pod_t **pod; +static ap_pod_t *child_pod; +static apr_proc_mutex_t *child_mutex; +static ap_listen_rec *child_listen; +static int *bucket; /* bucket array for the httpd child processes */ + /* data retained by prefork across load/unload of the module * allocated on first call to pre-config hook; located on @@ -222,14 +229,14 @@ static void clean_child_exit(int code) prefork_note_child_killed(/* slot */ 0, 0, 0); } - ap_mpm_pod_close(pod); + ap_mpm_pod_close(child_pod); chdir_for_gprof(); exit(code); } static void accept_mutex_on(void) { - apr_status_t rv = apr_proc_mutex_lock(accept_mutex); + apr_status_t rv = apr_proc_mutex_lock(child_mutex); if (rv != APR_SUCCESS) { const char *msg = "couldn't grab the accept mutex"; @@ -247,7 +254,7 @@ static void accept_mutex_on(void) static void accept_mutex_off(void) { - apr_status_t rv = apr_proc_mutex_unlock(accept_mutex); + apr_status_t rv = apr_proc_mutex_unlock(child_mutex); if (rv != APR_SUCCESS) { const char *msg = "couldn't release the accept mutex"; @@ -272,7 +279,7 @@ static void accept_mutex_off(void) * when it's safe in the single Listen case. */ #ifdef SINGLE_LISTEN_UNSERIALIZED_ACCEPT -#define SAFE_ACCEPT(stmt) do {if (ap_listeners->next) {stmt;}} while(0) +#define SAFE_ACCEPT(stmt) do {if (child_listen->next) {stmt;}} while(0) #else #define SAFE_ACCEPT(stmt) do {stmt;} while(0) #endif @@ -521,10 +528,23 @@ static void child_main(int child_num_arg) apr_pool_create(&ptrans, pchild); apr_pool_tag(ptrans, "transaction"); +/* close unused listeners and pods */ + for (i = 0; i < num_buckets; i++) { + if (i != bucket[my_child_num]) { + lr = mpm_listen[i]; + while(lr) { + apr_socket_close(lr->sd); + lr = lr->next; + } + mpm_listen[i]->active = 0; + ap_mpm_pod_close(pod[i]); + } + } + /* needs to be done before we switch UIDs so we have permissions */ ap_reopen_scoreboard(pchild, NULL, 0); - lockfile = apr_proc_mutex_lockfile(accept_mutex); - status = apr_proc_mutex_child_init(&accept_mutex, + lockfile = apr_proc_mutex_lockfile(child_mutex); + status = apr_proc_mutex_child_init(&child_mutex, lockfile, pchild); if (status != APR_SUCCESS) { @@ -532,7 +552,7 @@ static void child_main(int child_num_arg) "Couldn't initialize cross-process lock in child " "(%s) (%s)", lockfile ? lockfile : "none", - apr_proc_mutex_name(accept_mutex)); + apr_proc_mutex_name(child_mutex)); clean_child_exit(APEXIT_CHILDFATAL); } @@ -554,7 +574,7 @@ static void child_main(int child_num_arg) clean_child_exit(APEXIT_CHILDSICK); /* assume temporary resource issue */ } - for (lr = ap_listeners, i = num_listensocks; i--; lr = lr->next) { + for (lr = child_listen, i = num_listensocks; i--; lr = lr->next) { apr_pollfd_t pfd = { 0 }; pfd.desc_type = APR_POLL_SOCKET; @@ -612,7 +632,7 @@ static void child_main(int child_num_arg) if (num_listensocks == 1) { /* There is only one listener record, so refer to that one. */ - lr = ap_listeners; + lr = child_listen; } else { /* multiple listening sockets - need to poll */ @@ -710,7 +730,7 @@ static void child_main(int child_num_arg) * while we were processing the connection or we are the lucky * idle server process that gets to die. */ - if (ap_mpm_pod_check(pod) == APR_SUCCESS) { /* selected as idle? */ + if (ap_mpm_pod_check(child_pod) == APR_SUCCESS) { /* selected as idle? */ die_now = 1; } else if (retained->my_generation != @@ -750,6 +770,9 @@ static int make_child(server_rec *s, int slot) (void) ap_update_child_status_from_indexes(slot, 0, SERVER_STARTING, (request_rec *) NULL); + child_listen = mpm_listen[bucket[slot]]; + child_mutex = accept_mutex[bucket[slot]]; + child_pod = pod[bucket[slot]]; #ifdef _OSD_POSIX /* BS2000 requires a "special" version of fork() before a setuid() call */ @@ -815,6 +838,7 @@ static void startup_children(int number_to_start) if (ap_scoreboard_image->servers[i][0].status != SERVER_DEAD) { continue; } + bucket[i] = i % num_buckets; if (make_child(ap_server_conf, i) < 0) { break; } @@ -822,6 +846,8 @@ static void startup_children(int number_to_start) } } +static int bucket_make_child_record = -1; +static int bucket_kill_child_record = -1; static void perform_idle_server_maintenance(apr_pool_t *p) { int i; @@ -874,7 +900,8 @@ static void perform_idle_server_maintenance(apr_pool_t *p) * shut down gracefully, in case it happened to pick up a request * while we were counting */ - ap_mpm_pod_signal(pod); + bucket_kill_child_record = (bucket_kill_child_record + 1) % num_buckets; + ap_mpm_pod_signal(pod[bucket_kill_child_record]); retained->idle_spawn_rate = 1; } else if (idle_count < ap_daemons_min_free) { @@ -899,6 +926,7 @@ static void perform_idle_server_maintenance(apr_pool_t *p) idle_count, total_non_dead); } for (i = 0; i < free_length; ++i) { + bucket[free_slots[i]]= (++bucket_make_child_record) % num_buckets; make_child(ap_server_conf, free_slots[i]); } /* the next time around we want to spawn twice as many if this @@ -926,15 +954,24 @@ static int prefork_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s) int index; int remaining_children_to_start; apr_status_t rv; + int i; + ap_listen_rec *lr; ap_log_pid(pconf, ap_pid_fname); - /* Initialize cross-process accept lock */ - rv = ap_proc_mutex_create(&accept_mutex, NULL, AP_ACCEPT_MUTEX_TYPE, NULL, - s, _pconf, 0); - if (rv != APR_SUCCESS) { - mpm_state = AP_MPMQ_STOPPING; - return DONE; + bucket = apr_palloc(_pconf, sizeof(int) * ap_daemons_limit); + /* Initialize cross-process accept lock for each bucket*/ + accept_mutex = apr_palloc(_pconf, sizeof(apr_proc_mutex_t *) * num_buckets); + for (i = 0; i < num_buckets; i++) { + rv = ap_proc_mutex_create(&accept_mutex[i], NULL, AP_ACCEPT_MUTEX_TYPE, NULL, + s, _pconf, 0); + if (rv != APR_SUCCESS) { + mpm_state = AP_MPMQ_STOPPING; + return DONE; + } + } + for (lr = ap_listeners; lr; lr = lr->next) { + apr_socket_close(lr->sd); } if (!retained->is_graceful) { @@ -953,12 +990,13 @@ static int prefork_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s) if (one_process) { AP_MONCONTROL(1); + bucket[0] = 0; make_child(ap_server_conf, 0); /* NOTREACHED */ } else { - if (ap_daemons_max_free < ap_daemons_min_free + 1) /* Don't thrash... */ - ap_daemons_max_free = ap_daemons_min_free + 1; + if (ap_daemons_max_free < ap_daemons_min_free + num_buckets) /* Don't thrash... */ + ap_daemons_max_free = ap_daemons_min_free + num_buckets; /* If we're doing a graceful_restart then we're going to see a lot * of children exiting immediately when we get into the main loop @@ -991,7 +1029,7 @@ static int prefork_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s) ap_log_command_line(plog, s); ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf, APLOGNO(00165) "Accept mutex: %s (default: %s)", - apr_proc_mutex_name(accept_mutex), + apr_proc_mutex_name(accept_mutex[0]), apr_proc_mutex_defname()); mpm_state = AP_MPMQ_RUNNING; @@ -1122,7 +1160,9 @@ static int prefork_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s) ap_close_listeners(); /* kill off the idle ones */ - ap_mpm_pod_killpg(pod, retained->max_daemons_limit); + for (i = 0; i < num_buckets; i++) { + ap_mpm_pod_killpg(pod[i], retained->max_daemons_limit); + } /* Send SIGUSR1 to the active children */ active_children = 0; @@ -1196,7 +1236,9 @@ static int prefork_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s) "Graceful restart requested, doing restart"); /* kill off the idle ones */ - ap_mpm_pod_killpg(pod, retained->max_daemons_limit); + for (i = 0; i < num_buckets; i++) { + ap_mpm_pod_killpg(pod[i], retained->max_daemons_limit); + } /* This is mostly for debugging... so that we know what is still * gracefully dealing with existing request. This will break @@ -1239,6 +1281,8 @@ static int prefork_open_logs(apr_pool_t *p, apr_pool_t *plog, apr_pool_t *ptemp, int startup = 0; int level_flags = 0; apr_status_t rv; + int i; + int num_of_cores = 0; pconf = p; @@ -1248,6 +1292,7 @@ static int prefork_open_logs(apr_pool_t *p, apr_pool_t *plog, apr_pool_t *ptemp, level_flags |= APLOG_STARTUP; } + enable_default_listener = 0; if ((num_listensocks = ap_setup_listeners(ap_server_conf)) < 1) { ap_log_error(APLOG_MARK, APLOG_ALERT | level_flags, 0, (startup ? NULL : s), @@ -1255,12 +1300,36 @@ static int prefork_open_logs(apr_pool_t *p, apr_pool_t *plog, apr_pool_t *ptemp, return DONE; } - if ((rv = ap_mpm_pod_open(pconf, &pod))) { - ap_log_error(APLOG_MARK, APLOG_CRIT | level_flags, rv, - (startup ? NULL : s), - "could not open pipe-of-death"); - return DONE; + enable_default_listener = 1; + if (have_so_reuseport) { +#ifdef _SC_NPROCESSORS_ONLN + num_of_cores = sysconf(_SC_NPROCESSORS_ONLN); +#else + num_of_cores = 1; +#endif + if (num_of_cores > 8) { + num_buckets = num_of_cores/8; + } + else { + num_buckets = 1; + } } + else { + num_buckets = 1; + } + + ap_duplicate_listeners(ap_server_conf, pconf, num_buckets); + + pod = apr_palloc(pconf, sizeof(ap_pod_t *) * num_buckets); + for (i = 0; i < num_buckets; i++) { + if ((rv = ap_mpm_pod_open(pconf, &pod[i]))) { + ap_log_error(APLOG_MARK, APLOG_CRIT | level_flags, rv, + (startup ? NULL : s), + "could not open pipe-of-death"); + return DONE; + } + } + return OK; } diff --git a/server/mpm/worker/worker.c b/server/mpm/worker/worker.c index 408d317650..b90161970d 100644 --- a/server/mpm/worker/worker.c +++ b/server/mpm/worker/worker.c @@ -30,6 +30,9 @@ #include "apr_thread_mutex.h" #include "apr_proc_mutex.h" #include "apr_poll.h" + +#include <stdlib.h> + #define APR_WANT_STRFUNC #include "apr_want.h" @@ -159,7 +162,7 @@ typedef struct worker_retained_data { * doubled up to MAX_SPAWN_RATE, and reset only when a cycle goes by * without the need to spawn. */ - int idle_spawn_rate; + int *idle_spawn_rate; #ifndef MAX_SPAWN_RATE #define MAX_SPAWN_RATE (32) #endif @@ -188,7 +191,8 @@ typedef struct { #define ID_FROM_CHILD_THREAD(c, t) ((c * thread_limit) + t) -static ap_pod_t *pod; +static ap_pod_t **pod; +static ap_pod_t *child_pod; /* The worker MPM respects a couple of runtime flags that can aid * in debugging. Setting the -DNO_DETACH flag will prevent the root process @@ -218,10 +222,13 @@ static pid_t parent_pid; static apr_os_thread_t *listener_os_thread; /* Locks for accept serialization */ -static apr_proc_mutex_t *accept_mutex; +static apr_proc_mutex_t **accept_mutex; +static apr_proc_mutex_t *child_mutex; +static ap_listen_rec *child_listen; +static int *bucket; /* bucket array for the httpd child processes */ #ifdef SINGLE_LISTEN_UNSERIALIZED_ACCEPT -#define SAFE_ACCEPT(stmt) (ap_listeners->next ? (stmt) : APR_SUCCESS) +#define SAFE_ACCEPT(stmt) (child_listen->next ? (stmt) : APR_SUCCESS) #else #define SAFE_ACCEPT(stmt) (stmt) #endif @@ -701,7 +708,7 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t *thd, void * dummy) clean_child_exit(APEXIT_CHILDSICK); } - for (lr = ap_listeners; lr != NULL; lr = lr->next) { + for (lr = child_listen; lr != NULL; lr = lr->next) { apr_pollfd_t pfd = { 0 }; pfd.desc_type = APR_POLL_SOCKET; @@ -758,7 +765,7 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t *thd, void * dummy) /* We've already decremented the idle worker count inside * ap_queue_info_wait_for_idler. */ - if ((rv = SAFE_ACCEPT(apr_proc_mutex_lock(accept_mutex))) + if ((rv = SAFE_ACCEPT(apr_proc_mutex_lock(child_mutex))) != APR_SUCCESS) { if (!listener_may_exit) { @@ -767,9 +774,9 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t *thd, void * dummy) break; /* skip the lock release */ } - if (!ap_listeners->next) { + if (!child_listen->next) { /* Only one listener, so skip the poll */ - lr = ap_listeners; + lr = child_listen; } else { while (!listener_may_exit) { @@ -839,7 +846,7 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t *thd, void * dummy) resource_shortage = 1; signal_threads(ST_GRACEFUL); } - if ((rv = SAFE_ACCEPT(apr_proc_mutex_unlock(accept_mutex))) + if ((rv = SAFE_ACCEPT(apr_proc_mutex_unlock(child_mutex))) != APR_SUCCESS) { if (listener_may_exit) { @@ -863,7 +870,7 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t *thd, void * dummy) } } else { - if ((rv = SAFE_ACCEPT(apr_proc_mutex_unlock(accept_mutex))) + if ((rv = SAFE_ACCEPT(apr_proc_mutex_unlock(child_mutex))) != APR_SUCCESS) { int level = APLOG_EMERG; @@ -1217,6 +1224,8 @@ static void child_main(int child_num_arg) thread_starter *ts; apr_threadattr_t *thread_attr; apr_thread_t *start_thread_id; + int i; + ap_listen_rec *lr; mpm_state = AP_MPMQ_STARTING; /* for benefit of any hooks that run as this * child initializes @@ -1225,11 +1234,24 @@ static void child_main(int child_num_arg) ap_fatal_signal_child_setup(ap_server_conf); apr_pool_create(&pchild, pconf); + /* close unused listeners and pods */ + for (i = 0; i < num_buckets; i++) { + if (i != bucket[child_num_arg]) { + lr = mpm_listen[i]; + while(lr) { + apr_socket_close(lr->sd); + lr = lr->next; + } + mpm_listen[i]->active = 0; + ap_mpm_podx_close(pod[i]); + } + } + /*stuff to do before we switch id's, so we have permissions.*/ ap_reopen_scoreboard(pchild, NULL, 0); - rv = SAFE_ACCEPT(apr_proc_mutex_child_init(&accept_mutex, - apr_proc_mutex_lockfile(accept_mutex), + rv = SAFE_ACCEPT(apr_proc_mutex_child_init(&child_mutex, + apr_proc_mutex_lockfile(child_mutex), pchild)); if (rv != APR_SUCCESS) { ap_log_error(APLOG_MARK, APLOG_EMERG, rv, ap_server_conf, APLOGNO(00280) @@ -1338,7 +1360,7 @@ static void child_main(int child_num_arg) apr_signal(SIGTERM, dummy_signal_handler); /* Watch for any messages from the parent over the POD */ while (1) { - rv = ap_mpm_podx_check(pod); + rv = ap_mpm_podx_check(child_pod); if (rv == AP_MPM_PODX_NORESTART) { /* see if termination was triggered while we slept */ switch(terminate_mode) { @@ -1391,6 +1413,10 @@ static int make_child(server_rec *s, int slot) /* NOTREACHED */ } + child_listen = mpm_listen[bucket[slot]]; + child_mutex = accept_mutex[bucket[slot]]; + child_pod = pod[bucket[slot]]; + if ((pid = fork()) == -1) { ap_log_error(APLOG_MARK, APLOG_ERR, errno, s, APLOGNO(00283) "fork: Unable to fork new process"); @@ -1449,6 +1475,7 @@ static void startup_children(int number_to_start) if (ap_scoreboard_image->parent[i].pid != 0) { continue; } + bucket[i] = i % num_buckets; if (make_child(ap_server_conf, i) < 0) { break; } @@ -1456,7 +1483,7 @@ static void startup_children(int number_to_start) } } -static void perform_idle_server_maintenance(void) +static void perform_idle_server_maintenance(int child_bucket) { int i, j; int idle_thread_count; @@ -1485,7 +1512,7 @@ static void perform_idle_server_maintenance(void) int all_dead_threads = 1; int child_threads_active = 0; - if (i >= retained->max_daemons_limit && totally_free_length == retained->idle_spawn_rate) + if (i >= retained->max_daemons_limit && totally_free_length == retained->idle_spawn_rate[child_bucket]) /* short cut if all active processes have been examined and * enough empty scoreboard slots have been found */ @@ -1513,7 +1540,8 @@ static void perform_idle_server_maintenance(void) loop if no pid? not much else matters */ if (status <= SERVER_READY && !ps->quiescing && - ps->generation == retained->my_generation) { + ps->generation == retained->my_generation && + bucket[i] == child_bucket) { ++idle_thread_count; } if (status >= SERVER_READY && status < SERVER_GRACEFUL) { @@ -1522,8 +1550,8 @@ static void perform_idle_server_maintenance(void) } } active_thread_count += child_threads_active; - if (any_dead_threads && totally_free_length < retained->idle_spawn_rate - && free_length < MAX_SPAWN_RATE + if (any_dead_threads && totally_free_length < retained->idle_spawn_rate[child_bucket] + && free_length < MAX_SPAWN_RATE/num_buckets && (!ps->pid /* no process in the slot */ || ps->quiescing)) { /* or at least one is going away */ if (all_dead_threads) { @@ -1579,12 +1607,12 @@ static void perform_idle_server_maintenance(void) retained->max_daemons_limit = last_non_dead + 1; - if (idle_thread_count > max_spare_threads) { + if (idle_thread_count > max_spare_threads/num_buckets) { /* Kill off one child */ - ap_mpm_podx_signal(pod, AP_MPM_PODX_GRACEFUL); - retained->idle_spawn_rate = 1; + ap_mpm_podx_signal(pod[child_bucket], AP_MPM_PODX_GRACEFUL); + retained->idle_spawn_rate[child_bucket] = 1; } - else if (idle_thread_count < min_spare_threads) { + else if (idle_thread_count < min_spare_threads/num_buckets) { /* terminate the free list */ if (free_length == 0) { /* scoreboard is full, can't fork */ @@ -1615,13 +1643,13 @@ static void perform_idle_server_maintenance(void) ap_server_conf, APLOGNO(00288) "scoreboard is full, not at MaxRequestWorkers"); } - retained->idle_spawn_rate = 1; + retained->idle_spawn_rate[child_bucket] = 1; } else { - if (free_length > retained->idle_spawn_rate) { - free_length = retained->idle_spawn_rate; + if (free_length > retained->idle_spawn_rate[child_bucket]) { + free_length = retained->idle_spawn_rate[child_bucket]; } - if (retained->idle_spawn_rate >= 8) { + if (retained->idle_spawn_rate[child_bucket] >= 8) { ap_log_error(APLOG_MARK, APLOG_INFO, 0, ap_server_conf, APLOGNO(00289) "server seems busy, (you may need " @@ -1632,6 +1660,7 @@ static void perform_idle_server_maintenance(void) idle_thread_count, total_non_dead); } for (i = 0; i < free_length; ++i) { + bucket[free_slots[i]] = child_bucket; make_child(ap_server_conf, free_slots[i]); } /* the next time around we want to spawn twice as many if this @@ -1640,13 +1669,13 @@ static void perform_idle_server_maintenance(void) if (retained->hold_off_on_exponential_spawning) { --retained->hold_off_on_exponential_spawning; } - else if (retained->idle_spawn_rate < MAX_SPAWN_RATE) { - retained->idle_spawn_rate *= 2; + else if (retained->idle_spawn_rate[child_bucket] < MAX_SPAWN_RATE/num_buckets) { + retained->idle_spawn_rate[child_bucket] *= 2; } } } else { - retained->idle_spawn_rate = 1; + retained->idle_spawn_rate[child_bucket] = 1; } } @@ -1702,7 +1731,7 @@ static void server_main_loop(int remaining_children_to_start) ap_scoreboard_image->parent[child_slot].quiescing = 0; if (processed_status == APEXIT_CHILDSICK) { /* resource shortage, minimize the fork rate */ - retained->idle_spawn_rate = 1; + retained->idle_spawn_rate[bucket[child_slot]] = 1; } else if (remaining_children_to_start && child_slot < ap_daemons_limit) { @@ -1719,7 +1748,9 @@ static void server_main_loop(int remaining_children_to_start) if (processed_status == APEXIT_CHILDSICK && old_gen == retained->my_generation) { /* resource shortage, minimize the fork rate */ - retained->idle_spawn_rate = 1; + for (i = 0; i < num_buckets; i++) { + retained->idle_spawn_rate[i] = 1; + } } #if APR_HAS_OTHER_CHILD } @@ -1758,7 +1789,9 @@ static void server_main_loop(int remaining_children_to_start) continue; } - perform_idle_server_maintenance(); + for (i = 0; i < num_buckets; i++) { + perform_idle_server_maintenance(i); + } } } @@ -1766,16 +1799,25 @@ static int worker_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s) { int remaining_children_to_start; apr_status_t rv; + int i; + ap_listen_rec *lr; ap_log_pid(pconf, ap_pid_fname); + bucket = apr_palloc(_pconf, sizeof(int) * ap_daemons_limit); /* Initialize cross-process accept lock */ - rv = ap_proc_mutex_create(&accept_mutex, NULL, AP_ACCEPT_MUTEX_TYPE, NULL, - s, _pconf, 0); - if (rv != APR_SUCCESS) { - mpm_state = AP_MPMQ_STOPPING; - return DONE; + accept_mutex = apr_palloc(_pconf, sizeof(apr_proc_mutex_t *) * num_buckets); + for (i = 0; i < num_buckets; i++) { + rv = ap_proc_mutex_create(&accept_mutex[i], NULL, AP_ACCEPT_MUTEX_TYPE, NULL, + s, _pconf, 0); + if (rv != APR_SUCCESS) { + mpm_state = AP_MPMQ_STOPPING; + return DONE; + } } + for (lr = ap_listeners; lr; lr = lr->next) { + apr_socket_close(lr->sd); + } if (!retained->is_graceful) { if (ap_run_pre_mpm(s->process->pool, SB_SHARED) != OK) { @@ -1791,8 +1833,8 @@ static int worker_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s) restart_pending = shutdown_pending = 0; set_signals(); /* Don't thrash... */ - if (max_spare_threads < min_spare_threads + threads_per_child) - max_spare_threads = min_spare_threads + threads_per_child; + if (max_spare_threads < min_spare_threads + threads_per_child * num_buckets) + max_spare_threads = min_spare_threads + threads_per_child * num_buckets; /* If we're doing a graceful_restart then we're going to see a lot * of children exiting immediately when we get into the main loop @@ -1825,7 +1867,7 @@ static int worker_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s) ap_log_command_line(plog, s); ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf, APLOGNO(00294) "Accept mutex: %s (default: %s)", - apr_proc_mutex_name(accept_mutex), + apr_proc_mutex_name(accept_mutex[0]), apr_proc_mutex_defname()); mpm_state = AP_MPMQ_RUNNING; @@ -1836,7 +1878,9 @@ static int worker_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s) /* Time to shut down: * Kill child processes, tell them to call child_exit, etc... */ - ap_mpm_podx_killpg(pod, ap_daemons_limit, AP_MPM_PODX_RESTART); + for (i = 0; i < num_buckets; i++) { + ap_mpm_podx_killpg(pod[i], ap_daemons_limit, AP_MPM_PODX_RESTART); + } ap_reclaim_child_processes(1, /* Start with SIGTERM */ worker_note_child_killed); @@ -1857,7 +1901,9 @@ static int worker_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s) /* Close our listeners, and then ask our children to do same */ ap_close_listeners(); - ap_mpm_podx_killpg(pod, ap_daemons_limit, AP_MPM_PODX_GRACEFUL); + for (i = 0; i < num_buckets; i++) { + ap_mpm_podx_killpg(pod[i], ap_daemons_limit, AP_MPM_PODX_GRACEFUL); + } ap_relieve_child_processes(worker_note_child_killed); if (!child_fatal) { @@ -1897,7 +1943,9 @@ static int worker_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s) * way, try and make sure that all of our processes are * really dead. */ - ap_mpm_podx_killpg(pod, ap_daemons_limit, AP_MPM_PODX_RESTART); + for (i = 0; i < num_buckets; i++) { + ap_mpm_podx_killpg(pod[i], ap_daemons_limit, AP_MPM_PODX_RESTART); + } ap_reclaim_child_processes(1, worker_note_child_killed); return DONE; @@ -1922,8 +1970,9 @@ static int worker_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s) ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf, APLOGNO(00297) AP_SIG_GRACEFUL_STRING " received. Doing graceful restart"); /* wake up the children...time to die. But we'll have more soon */ - ap_mpm_podx_killpg(pod, ap_daemons_limit, AP_MPM_PODX_GRACEFUL); - + for (i = 0; i < num_buckets; i++) { + ap_mpm_podx_killpg(pod[i], ap_daemons_limit, AP_MPM_PODX_GRACEFUL); + } /* This is mostly for debugging... so that we know what is still * gracefully dealing with existing request. @@ -1935,7 +1984,9 @@ static int worker_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s) * and a SIGHUP, we may as well use the same signal, because some user * pthreads are stealing signals from us left and right. */ - ap_mpm_podx_killpg(pod, ap_daemons_limit, AP_MPM_PODX_RESTART); + for (i = 0; i < num_buckets; i++) { + ap_mpm_podx_killpg(pod[i], ap_daemons_limit, AP_MPM_PODX_RESTART); + } ap_reclaim_child_processes(1, /* Start with SIGTERM */ worker_note_child_killed); @@ -1954,6 +2005,8 @@ static int worker_open_logs(apr_pool_t *p, apr_pool_t *plog, apr_pool_t *ptemp, int startup = 0; int level_flags = 0; apr_status_t rv; + int i; + int num_of_cores = 0; pconf = p; @@ -1963,19 +2016,42 @@ static int worker_open_logs(apr_pool_t *p, apr_pool_t *plog, apr_pool_t *ptemp, level_flags |= APLOG_STARTUP; } + enable_default_listener = 0; if ((num_listensocks = ap_setup_listeners(ap_server_conf)) < 1) { ap_log_error(APLOG_MARK, APLOG_ALERT | level_flags, 0, (startup ? NULL : s), "no listening sockets available, shutting down"); return DONE; } + enable_default_listener = 1; + if (have_so_reuseport) { +#ifdef _SC_NPROCESSORS_ONLN + num_of_cores = sysconf(_SC_NPROCESSORS_ONLN); +#else + num_of_cores = 1; +#endif + if (num_of_cores > 8) { + num_buckets = num_of_cores/8; + } + else { + num_buckets = 1; + } + } + else { + num_buckets = 1; + } + + ap_duplicate_listeners(ap_server_conf, pconf, num_buckets); + pod = apr_palloc(pconf, sizeof(ap_pod_t *) * num_buckets); if (!one_process) { - if ((rv = ap_mpm_podx_open(pconf, &pod))) { - ap_log_error(APLOG_MARK, APLOG_CRIT | level_flags, rv, - (startup ? NULL : s), - "could not open pipe-of-death"); - return DONE; + for (i = 0; i < num_buckets; i++) { + if ((rv = ap_mpm_podx_open(pconf, &pod[i]))) { + ap_log_error(APLOG_MARK, APLOG_CRIT | level_flags, rv, + (startup ? NULL : s), + "could not open pipe-of-death"); + return DONE; + } } } return OK; @@ -1987,6 +2063,7 @@ static int worker_pre_config(apr_pool_t *pconf, apr_pool_t *plog, int no_detach, debug, foreground; apr_status_t rv; const char *userdata_key = "mpm_worker_module"; + int i; mpm_state = AP_MPMQ_STARTING; @@ -2009,7 +2086,6 @@ static int worker_pre_config(apr_pool_t *pconf, apr_pool_t *plog, if (!retained) { retained = ap_retained_data_create(userdata_key, sizeof(*retained)); retained->max_daemons_limit = -1; - retained->idle_spawn_rate = 1; } ++retained->module_loads; if (retained->module_loads == 2) { @@ -2023,6 +2099,10 @@ static int worker_pre_config(apr_pool_t *pconf, apr_pool_t *plog, "apr_proc_detach failed"); return HTTP_INTERNAL_SERVER_ERROR; } + retained->idle_spawn_rate = apr_palloc(pconf, sizeof(int) * num_buckets); + for (i = 0; i< num_buckets; i++) { + retained->idle_spawn_rate[i] = 1; + } } } diff --git a/server/mpm_unix.c b/server/mpm_unix.c index 0000cb6672..97e3e65dff 100644 --- a/server/mpm_unix.c +++ b/server/mpm_unix.c @@ -615,6 +615,7 @@ static apr_status_t dummy_connection(ap_pod_t *pod) apr_pool_t *p; apr_size_t len; ap_listen_rec *lp; + int i; /* create a temporary pool for the socket. pconf stays around too long */ rv = apr_pool_create(&p, pod->p); @@ -626,87 +627,89 @@ static apr_status_t dummy_connection(ap_pod_t *pod) * plain-HTTP, not SSL; using an SSL port would either be * expensive to do correctly (performing a complete SSL handshake) * or cause log spam by doing incorrectly (simply sending EOF). */ - lp = ap_listeners; - while (lp && lp->protocol && strcasecmp(lp->protocol, "http") != 0) { - lp = lp->next; - } - if (!lp) { - lp = ap_listeners; - } + for (i = 0; i < num_buckets; i++) { + lp = mpm_listen[i]; + while (lp && lp->protocol && strcasecmp(lp->protocol, "http") != 0) { + lp = lp->next; + } + if (!lp) { + lp = mpm_listen[i]; + } - rv = apr_socket_create(&sock, lp->bind_addr->family, SOCK_STREAM, 0, p); - if (rv != APR_SUCCESS) { - ap_log_error(APLOG_MARK, APLOG_WARNING, rv, ap_server_conf, APLOGNO(00054) - "get socket to connect to listener"); - apr_pool_destroy(p); - return rv; - } + rv = apr_socket_create(&sock, lp->bind_addr->family, SOCK_STREAM, 0, p); + if (rv != APR_SUCCESS) { + ap_log_error(APLOG_MARK, APLOG_WARNING, rv, ap_server_conf, APLOGNO(00054) + "get socket to connect to listener"); + apr_pool_destroy(p); + return rv; + } - /* on some platforms (e.g., FreeBSD), the kernel won't accept many - * queued connections before it starts blocking local connects... - * we need to keep from blocking too long and instead return an error, - * because the MPM won't want to hold up a graceful restart for a - * long time - */ - rv = apr_socket_timeout_set(sock, apr_time_from_sec(3)); - if (rv != APR_SUCCESS) { - ap_log_error(APLOG_MARK, APLOG_WARNING, rv, ap_server_conf, APLOGNO(00055) - "set timeout on socket to connect to listener"); - apr_socket_close(sock); - apr_pool_destroy(p); - return rv; - } + /* on some platforms (e.g., FreeBSD), the kernel won't accept many + * queued connections before it starts blocking local connects... + * we need to keep from blocking too long and instead return an error, + * because the MPM won't want to hold up a graceful restart for a + * long time + */ + rv = apr_socket_timeout_set(sock, apr_time_from_sec(3)); + if (rv != APR_SUCCESS) { + ap_log_error(APLOG_MARK, APLOG_WARNING, rv, ap_server_conf, APLOGNO(00055) + "set timeout on socket to connect to listener"); + apr_socket_close(sock); + apr_pool_destroy(p); + return rv; + } - rv = apr_socket_connect(sock, lp->bind_addr); - if (rv != APR_SUCCESS) { - int log_level = APLOG_WARNING; - - if (APR_STATUS_IS_TIMEUP(rv)) { - /* probably some server processes bailed out already and there - * is nobody around to call accept and clear out the kernel - * connection queue; usually this is not worth logging - */ - log_level = APLOG_DEBUG; + rv = apr_socket_connect(sock, lp->bind_addr); + if (rv != APR_SUCCESS) { + int log_level = APLOG_WARNING; + + if (APR_STATUS_IS_TIMEUP(rv)) { + /* probably some server processes bailed out already and there + * is nobody around to call accept and clear out the kernel + * connection queue; usually this is not worth logging + */ + log_level = APLOG_DEBUG; + } + + ap_log_error(APLOG_MARK, log_level, rv, ap_server_conf, APLOGNO(00056) + "connect to listener on %pI", lp->bind_addr); + apr_pool_destroy(p); + return rv; } - ap_log_error(APLOG_MARK, log_level, rv, ap_server_conf, APLOGNO(00056) - "connect to listener on %pI", lp->bind_addr); - apr_pool_destroy(p); - return rv; - } + if (lp->protocol && strcasecmp(lp->protocol, "https") == 0) { + /* Send a TLS 1.0 close_notify alert. This is perhaps the + * "least wrong" way to open and cleanly terminate an SSL + * connection. It should "work" without noisy error logs if + * the server actually expects SSLv3/TLSv1. With + * SSLv23_server_method() OpenSSL's SSL_accept() fails + * ungracefully on receipt of this message, since it requires + * an 11-byte ClientHello message and this is too short. */ + static const unsigned char tls10_close_notify[7] = { + '\x15', /* TLSPlainText.type = Alert (21) */ + '\x03', '\x01', /* TLSPlainText.version = {3, 1} */ + '\x00', '\x02', /* TLSPlainText.length = 2 */ + '\x01', /* Alert.level = warning (1) */ + '\x00' /* Alert.description = close_notify (0) */ + }; + data = (const char *)tls10_close_notify; + len = sizeof(tls10_close_notify); + } + else /* ... XXX other request types here? */ { + /* Create an HTTP request string. We include a User-Agent so + * that adminstrators can track down the cause of the + * odd-looking requests in their logs. A complete request is + * used since kernel-level filtering may require that much + * data before returning from accept(). */ + data = apr_pstrcat(p, "OPTIONS * HTTP/1.0\r\nUser-Agent: ", + ap_get_server_description(), + " (internal dummy connection)\r\n\r\n", NULL); + len = strlen(data); + } - if (lp->protocol && strcasecmp(lp->protocol, "https") == 0) { - /* Send a TLS 1.0 close_notify alert. This is perhaps the - * "least wrong" way to open and cleanly terminate an SSL - * connection. It should "work" without noisy error logs if - * the server actually expects SSLv3/TLSv1. With - * SSLv23_server_method() OpenSSL's SSL_accept() fails - * ungracefully on receipt of this message, since it requires - * an 11-byte ClientHello message and this is too short. */ - static const unsigned char tls10_close_notify[7] = { - '\x15', /* TLSPlainText.type = Alert (21) */ - '\x03', '\x01', /* TLSPlainText.version = {3, 1} */ - '\x00', '\x02', /* TLSPlainText.length = 2 */ - '\x01', /* Alert.level = warning (1) */ - '\x00' /* Alert.description = close_notify (0) */ - }; - data = (const char *)tls10_close_notify; - len = sizeof(tls10_close_notify); - } - else /* ... XXX other request types here? */ { - /* Create an HTTP request string. We include a User-Agent so - * that adminstrators can track down the cause of the - * odd-looking requests in their logs. A complete request is - * used since kernel-level filtering may require that much - * data before returning from accept(). */ - data = apr_pstrcat(p, "OPTIONS * HTTP/1.0\r\nUser-Agent: ", - ap_get_server_description(), - " (internal dummy connection)\r\n\r\n", NULL); - len = strlen(data); + apr_socket_send(sock, data, &len); + apr_socket_close(sock); } - - apr_socket_send(sock, data, &len); - apr_socket_close(sock); apr_pool_destroy(p); return rv; |