From 3be04bffdaa8612f6bebe4b5dd4404f40c84aa7d Mon Sep 17 00:00:00 2001
From: Stefan Eissing <icing@apache.org>
Date: Wed, 10 Jul 2019 10:18:15 +0000
Subject:   *) mod_http2: fixed a bug that prevented proper stream cleanup when
 connection      throttling was in place. Stream resets by clients on streams
 initiated by them      are counted as possible trigger for throttling.

git-svn-id: https://svn.apache.org/repos/asf/httpd/httpd/trunk@1862865 13f79535-47bb-0310-9956-ffa450edef68
---
 modules/http2/h2_mplx.c    | 250 +++++++++++++++++++++++++++------------------
 modules/http2/h2_mplx.h    |  11 +-
 modules/http2/h2_session.c |  13 +++
 modules/http2/h2_task.c    |   8 +-
 modules/http2/h2_task.h    |   2 +
 5 files changed, 176 insertions(+), 108 deletions(-)

(limited to 'modules')

diff --git a/modules/http2/h2_mplx.c b/modules/http2/h2_mplx.c
index 81b063ad44..0c93d48664 100644
--- a/modules/http2/h2_mplx.c
+++ b/modules/http2/h2_mplx.c
@@ -53,8 +53,12 @@ typedef struct {
     h2_mplx *m;
     h2_stream *stream;
     apr_time_t now;
+    apr_size_t count;
 } stream_iter_ctx;
 
+static apr_status_t mplx_be_happy(h2_mplx *m);
+static apr_status_t mplx_be_annoyed(h2_mplx *m);
+
 apr_status_t h2_mplx_child_init(apr_pool_t *pool, server_rec *s)
 {
     return APR_SUCCESS;
@@ -98,7 +102,7 @@ static void stream_input_consumed(void *ctx, h2_bucket_beam *beam, apr_off_t len
 
 static void stream_joined(h2_mplx *m, h2_stream *stream)
 {
-    ap_assert(!stream->task || stream->task->worker_done);
+    ap_assert(!h2_task_has_started(stream->task) || stream->task->worker_done);
     
     h2_ihash_remove(m->shold, stream->id);
     h2_ihash_add(m->spurge, stream);
@@ -124,7 +128,7 @@ static void stream_cleanup(h2_mplx *m, h2_stream *stream)
     h2_ififo_remove(m->readyq, stream->id);
     h2_ihash_add(m->shold, stream);
     
-    if (!stream->task || stream->task->worker_done) {
+    if (!h2_task_has_started(stream->task) || stream->task->done_done) {
         stream_joined(m, stream);
     }
     else if (stream->task) {
@@ -194,7 +198,6 @@ h2_mplx *h2_mplx_create(conn_rec *c, server_rec *s, apr_pool_t *parent,
         m->stream_max_mem = h2_config_sgeti(s, H2_CONF_STREAM_MAX_MEM);
 
         m->streams = h2_ihash_create(m->pool, offsetof(h2_stream,id));
-        m->sredo = h2_ihash_create(m->pool, offsetof(h2_stream,id));
         m->shold = h2_ihash_create(m->pool, offsetof(h2_stream,id));
         m->spurge = h2_ihash_create(m->pool, offsetof(h2_stream,id));
         m->q = h2_iq_create(m->pool, m->max_streams);
@@ -208,8 +211,8 @@ h2_mplx *h2_mplx_create(conn_rec *c, server_rec *s, apr_pool_t *parent,
         m->workers = workers;
         m->max_active = workers->max_workers;
         m->limit_active = 6; /* the original h1 max parallel connections */
-        m->last_limit_change = m->last_idle_block = apr_time_now();
-        m->limit_change_interval = apr_time_from_msec(100);
+        m->last_mood_change = apr_time_now();
+        m->mood_update_interval = apr_time_from_msec(100);
         
         m->spare_slaves = apr_array_make(m->pool, 10, sizeof(conn_rec*));
     }
@@ -430,6 +433,10 @@ void h2_mplx_release_and_join(h2_mplx *m, apr_thread_cond_t *wait)
 
     /* How to shut down a h2 connection:
      * 1. cancel all streams still active */
+    ap_log_cerror(APLOG_MARK, APLOG_TRACE1, 0, m->c, 
+                  "h2_mplx(%ld): release, %d/%d/%d streams (total/hold/purge), %d active tasks", 
+                  m->id, (int)h2_ihash_count(m->streams),
+                  (int)h2_ihash_count(m->shold), (int)h2_ihash_count(m->spurge), m->tasks_active);
     while (!h2_ihash_iter(m->streams, stream_cancel_iter, m)) {
         /* until empty */
     }
@@ -455,10 +462,10 @@ void h2_mplx_release_and_join(h2_mplx *m, apr_thread_cond_t *wait)
             h2_ihash_iter(m->shold, report_stream_iter, m);
         }
     }
-    ap_assert(m->tasks_active == 0);
     m->join_wait = NULL;
-    
+
     /* 4. With all workers done, all streams should be in spurge */
+    ap_assert(m->tasks_active == 0);
     if (!h2_ihash_empty(m->shold)) {
         ap_log_cerror(APLOG_MARK, APLOG_WARNING, 0, m->c, APLOGNO(03516)
                       "h2_mplx(%ld): unexpected %d streams in hold", 
@@ -469,8 +476,7 @@ void h2_mplx_release_and_join(h2_mplx *m, apr_thread_cond_t *wait)
     m->c->aborted = old_aborted;
     H2_MPLX_LEAVE(m);
 
-    ap_log_cerror(APLOG_MARK, APLOG_TRACE1, 0, m->c,
-                  "h2_mplx(%ld): released", m->id);
+    ap_log_cerror(APLOG_MARK, APLOG_TRACE1, 0, m->c, "h2_mplx(%ld): released", m->id);
 }
 
 apr_status_t h2_mplx_stream_cleanup(h2_mplx *m, h2_stream *stream)
@@ -708,7 +714,6 @@ static h2_task *next_stream_task(h2_mplx *m)
             }
             
             if (!stream->task) {
-
                 if (sid > m->max_stream_started) {
                     m->max_stream_started = sid;
                 }
@@ -727,9 +732,9 @@ static h2_task *next_stream_task(h2_mplx *m)
                                   "create task"));
                     return NULL;
                 }
-                
             }
             
+            stream->task->started_at = apr_time_now();
             ++m->tasks_active;
             return stream->task;
         }
@@ -777,32 +782,18 @@ static void task_done(h2_mplx *m, h2_task *task)
                   "h2_mplx(%s): request done, %f ms elapsed", task->id, 
                   (task->done_at - task->started_at) / 1000.0);
     
-    if (task->started_at > m->last_idle_block) {
-        /* this task finished without causing an 'idle block', e.g.
-         * a block by flow control.
-         */
-        if (task->done_at- m->last_limit_change >= m->limit_change_interval
-            && m->limit_active < m->max_active) {
-            /* Well behaving stream, allow it more workers */
-            m->limit_active = H2MIN(m->limit_active * 2, 
-                                     m->max_active);
-            m->last_limit_change = task->done_at;
-            ap_log_cerror(APLOG_MARK, APLOG_TRACE1, 0, m->c,
-                          "h2_mplx(%ld): increase worker limit to %d",
-                          m->id, m->limit_active);
-        }
+    if (task->c && !task->c->aborted && task->started_at > m->last_mood_change) {
+        mplx_be_happy(m);
     }
-
+    
     ap_assert(task->done_done == 0);
 
     stream = h2_ihash_get(m->streams, task->stream_id);
     if (stream) {
         /* stream not done yet. */
-        if (!m->aborted && h2_ihash_get(m->sredo, stream->id)) {
+        if (!m->aborted && task->redo) {
             /* reset and schedule again */
-            task->worker_done = 0;
             h2_task_redo(task);
-            h2_ihash_remove(m->sredo, stream->id);
             h2_iq_add(m->q, stream->id, NULL, NULL);
             ap_log_cerror(APLOG_MARK, APLOG_INFO, 0, m->c,
                           H2_STRM_MSG(stream, "redo, added to q")); 
@@ -847,8 +838,8 @@ void h2_mplx_task_done(h2_mplx *m, h2_task *task, h2_task **ptask)
 {
     H2_MPLX_ENTER_ALWAYS(m);
 
-    task_done(m, task);
     --m->tasks_active;
+    task_done(m, task);
     
     if (m->join_wait) {
         apr_thread_cond_signal(m->join_wait);
@@ -866,94 +857,160 @@ void h2_mplx_task_done(h2_mplx *m, h2_task *task, h2_task **ptask)
  * h2_mplx DoS protection
  ******************************************************************************/
 
-static int latest_repeatable_unsubmitted_iter(void *data, void *val)
+static int timed_out_busy_iter(void *data, void *val)
 {
     stream_iter_ctx *ctx = data;
     h2_stream *stream = val;
-    
-    if (stream->task && !stream->task->worker_done 
-        && h2_task_can_redo(stream->task) 
-        && !h2_ihash_get(ctx->m->sredo, stream->id)) {
-        if (!h2_stream_is_ready(stream)) {
-            /* this task occupies a worker, the response has not been submitted 
-             * yet, not been cancelled and it is a repeatable request
-             * -> it can be re-scheduled later */
-            if (!ctx->stream 
-                || (ctx->stream->task->started_at < stream->task->started_at)) {
-                /* we did not have one or this one was started later */
-                ctx->stream = stream;
-            }
-        }
+    if (h2_task_has_started(stream->task) && !stream->task->worker_done
+        && (ctx->now - stream->task->started_at) > stream->task->timeout) {
+        /* timed out stream occupying a worker, found */
+        ctx->stream = stream;
+        return 0;
     }
     return 1;
 }
 
-static h2_stream *get_latest_repeatable_unsubmitted_stream(h2_mplx *m) 
+static h2_stream *get_timed_out_busy_stream(h2_mplx *m) 
 {
     stream_iter_ctx ctx;
     ctx.m = m;
     ctx.stream = NULL;
-    h2_ihash_iter(m->streams, latest_repeatable_unsubmitted_iter, &ctx);
+    ctx.now = apr_time_now();
+    h2_ihash_iter(m->streams, timed_out_busy_iter, &ctx);
     return ctx.stream;
 }
 
-static int timed_out_busy_iter(void *data, void *val)
+static int latest_repeatable_unsubmitted_iter(void *data, void *val)
 {
     stream_iter_ctx *ctx = data;
     h2_stream *stream = val;
-    if (stream->task && !stream->task->worker_done
-        && (ctx->now - stream->task->started_at) > stream->task->timeout) {
-        /* timed out stream occupying a worker, found */
-        ctx->stream = stream;
-        return 0;
+    
+    if (!stream->task) goto leave;
+    if (!h2_task_has_started(stream->task) || stream->task->worker_done) goto leave;
+    if (h2_stream_is_ready(stream)) goto leave;
+    if (stream->task->redo) {
+        ++ctx->count;
+        goto leave;
+    }
+    if (h2_task_can_redo(stream->task)) {
+        /* this task occupies a worker, the response has not been submitted 
+         * yet, not been cancelled and it is a repeatable request
+         * -> we could redo it later */
+        if (!ctx->stream 
+            || (ctx->stream->task->started_at < stream->task->started_at)) {
+            /* we did not have one or this one was started later */
+            ctx->stream = stream;
+        }
     }
+leave:
     return 1;
 }
 
-static h2_stream *get_timed_out_busy_stream(h2_mplx *m) 
+static apr_status_t assess_task_to_throttle(h2_task **ptask, h2_mplx *m) 
 {
     stream_iter_ctx ctx;
+    
+    /* count the running tasks already marked for redo and get one that could
+     * be throttled */
+    *ptask = NULL;
     ctx.m = m;
     ctx.stream = NULL;
-    ctx.now = apr_time_now();
-    h2_ihash_iter(m->streams, timed_out_busy_iter, &ctx);
-    return ctx.stream;
+    ctx.count = 0;
+    h2_ihash_iter(m->streams, latest_repeatable_unsubmitted_iter, &ctx);
+    if (m->tasks_active - ctx.count > m->limit_active) {
+        /* we are above the limit of running tasks, accounting for the ones
+         * already throttled. */
+        if (ctx.stream && ctx.stream->task) {
+            *ptask = ctx.stream->task;
+            return APR_EAGAIN;
+        }
+        /* above limit, be seeing no candidate for easy throttling */
+        if (get_timed_out_busy_stream(m)) {
+            /* Too many busy workers, unable to cancel enough streams
+             * and with a busy, timed out stream, we tell the client
+             * to go away... */
+            return APR_TIMEUP;
+        }
+    }
+    return APR_SUCCESS;
 }
 
 static apr_status_t unschedule_slow_tasks(h2_mplx *m) 
 {
-    h2_stream *stream;
-    int n;
+    h2_task *task;
+    apr_status_t rv;
     
     /* Try to get rid of streams that occupy workers. Look for safe requests
      * that are repeatable. If none found, fail the connection.
      */
-    n = (m->tasks_active - m->limit_active - (int)h2_ihash_count(m->sredo));
-    while (n > 0 && (stream = get_latest_repeatable_unsubmitted_stream(m))) {
+    while (APR_EAGAIN == (rv = assess_task_to_throttle(&task, m))) {
         ap_log_cerror(APLOG_MARK, APLOG_TRACE2, 0, m->c, 
                       "h2_mplx(%s): unschedule, resetting task for redo later",
-                      stream->task->id);
-        h2_task_rst(stream->task, H2_ERR_CANCEL);
-        h2_ihash_add(m->sredo, stream);
-        --n;
+                      task->id);
+        task->redo = 1;
+        h2_task_rst(task, H2_ERR_CANCEL);
     }
-    
-    if ((m->tasks_active - h2_ihash_count(m->sredo)) > m->limit_active) {
-        stream = get_timed_out_busy_stream(m);
-        if (stream) {
-            /* Too many busy workers, unable to cancel enough streams
-             * and with a busy, timed out stream, we tell the client
-             * to go away... */
-            return APR_TIMEUP;
-        }
+    return rv;
+}
+
+static apr_status_t mplx_be_happy(h2_mplx *m)
+{
+    apr_time_t now;            
+
+    --m->irritations_since;
+    now = apr_time_now();
+    if (m->limit_active < m->max_active 
+        && (now - m->last_mood_change >= m->mood_update_interval
+            || m->irritations_since < -m->limit_active)) {
+        m->limit_active = H2MIN(m->limit_active * 2, m->max_active);
+        m->last_mood_change = now;
+        m->irritations_since = 0;
+        ap_log_cerror(APLOG_MARK, APLOG_TRACE1, 0, m->c,
+                      "h2_mplx(%ld): mood update, increasing worker limit to %d",
+                      m->id, m->limit_active);
     }
     return APR_SUCCESS;
 }
 
-apr_status_t h2_mplx_idle(h2_mplx *m)
+static apr_status_t mplx_be_annoyed(h2_mplx *m)
 {
     apr_status_t status = APR_SUCCESS;
     apr_time_t now;            
+
+    ++m->irritations_since;
+    now = apr_time_now();
+    if (m->limit_active > 2 && 
+        ((now - m->last_mood_change >= m->mood_update_interval)
+         || (m->irritations_since >= m->limit_active))) {
+            
+        if (m->limit_active > 16) {
+            m->limit_active = 16;
+        }
+        else if (m->limit_active > 8) {
+            m->limit_active = 8;
+        }
+        else if (m->limit_active > 4) {
+            m->limit_active = 4;
+        }
+        else if (m->limit_active > 2) {
+            m->limit_active = 2;
+        }
+        m->last_mood_change = now;
+        m->irritations_since = 0;
+        ap_log_cerror(APLOG_MARK, APLOG_TRACE1, 0, m->c,
+                      "h2_mplx(%ld): mood update, decreasing worker limit to %d",
+                      m->id, m->limit_active);
+    }
+    
+    if (m->tasks_active > m->limit_active) {
+        status = unschedule_slow_tasks(m);
+    }
+    return status;
+}
+
+apr_status_t h2_mplx_idle(h2_mplx *m)
+{
+    apr_status_t status = APR_SUCCESS;
     apr_size_t scount;
     
     H2_MPLX_ENTER(m);
@@ -973,31 +1030,7 @@ apr_status_t h2_mplx_idle(h2_mplx *m)
              * of busy workers we allow for this connection until it
              * well behaves.
              */
-            now = apr_time_now();
-            m->last_idle_block = now;
-            if (m->limit_active > 2 
-                && now - m->last_limit_change >= m->limit_change_interval) {
-                if (m->limit_active > 16) {
-                    m->limit_active = 16;
-                }
-                else if (m->limit_active > 8) {
-                    m->limit_active = 8;
-                }
-                else if (m->limit_active > 4) {
-                    m->limit_active = 4;
-                }
-                else if (m->limit_active > 2) {
-                    m->limit_active = 2;
-                }
-                m->last_limit_change = now;
-                ap_log_cerror(APLOG_MARK, APLOG_TRACE1, 0, m->c,
-                              "h2_mplx(%ld): decrease worker limit to %d",
-                              m->id, m->limit_active);
-            }
-            
-            if (m->tasks_active > m->limit_active) {
-                status = unschedule_slow_tasks(m);
-            }
+            status = mplx_be_annoyed(m);
         }
         else if (!h2_iq_empty(m->q)) {
             ap_log_cerror(APLOG_MARK, APLOG_TRACE1, 0, m->c,
@@ -1092,11 +1125,24 @@ int h2_mplx_awaits_data(h2_mplx *m)
     if (h2_ihash_empty(m->streams)) {
         waiting = 0;
     }
-    else if (!m->tasks_active && !h2_ififo_count(m->readyq)
-             && h2_iq_empty(m->q)) {
+    else if (!m->tasks_active && !h2_ififo_count(m->readyq) && h2_iq_empty(m->q)) {
         waiting = 0;
     }
 
     H2_MPLX_LEAVE(m);
     return waiting;
 }
+
+apr_status_t h2_mplx_client_rst(h2_mplx *m, int stream_id)
+{
+    h2_stream *stream;
+    apr_status_t status = APR_SUCCESS;
+    
+    H2_MPLX_ENTER_ALWAYS(m);
+    stream = h2_ihash_get(m->streams, stream_id);
+    if (stream && stream->task) {
+        status = mplx_be_annoyed(m);
+    }
+    H2_MPLX_LEAVE(m);
+    return status;
+}
diff --git a/modules/http2/h2_mplx.h b/modules/http2/h2_mplx.h
index 575ccaf430..8a4f63f195 100644
--- a/modules/http2/h2_mplx.h
+++ b/modules/http2/h2_mplx.h
@@ -63,7 +63,6 @@ struct h2_mplx {
     unsigned int is_registered;     /* is registered at h2_workers */
 
     struct h2_ihash_t *streams;     /* all streams currently processing */
-    struct h2_ihash_t *sredo;       /* all streams that need to be re-started */
     struct h2_ihash_t *shold;       /* all streams done with task ongoing */
     struct h2_ihash_t *spurge;      /* all streams done, ready for destroy */
     
@@ -77,10 +76,10 @@ struct h2_mplx {
     int tasks_active;       /* # of tasks being processed from this mplx */
     int limit_active;       /* current limit on active tasks, dynamic */
     int max_active;         /* max, hard limit # of active tasks in a process */
-    apr_time_t last_idle_block;      /* last time, this mplx entered IDLE while
-                                      * streams were ready */
-    apr_time_t last_limit_change;    /* last time, worker limit changed */
-    apr_interval_time_t limit_change_interval;
+    
+    apr_time_t last_mood_change; /* last time, we worker limit changed */
+    apr_interval_time_t mood_update_interval; /* how frequent we update at most */
+    int irritations_since; /* irritations (>0) or happy events (<0) since last mood change */
 
     apr_thread_mutex_t *lock;
     struct apr_thread_cond_t *added_output;
@@ -205,6 +204,8 @@ typedef int h2_mplx_stream_cb(struct h2_stream *s, void *ctx);
 
 apr_status_t h2_mplx_stream_do(h2_mplx *m, h2_mplx_stream_cb *cb, void *ctx);
 
+apr_status_t h2_mplx_client_rst(h2_mplx *m, int stream_id);
+
 /*******************************************************************************
  * Output handling of streams.
  ******************************************************************************/
diff --git a/modules/http2/h2_session.c b/modules/http2/h2_session.c
index d4b152019d..4fae148cf0 100644
--- a/modules/http2/h2_session.c
+++ b/modules/http2/h2_session.c
@@ -390,9 +390,14 @@ static int on_frame_recv_cb(nghttp2_session *ng2s,
                           (int)frame->rst_stream.error_code);
             stream = get_stream(session, frame->hd.stream_id);
             if (stream && stream->initiated_on) {
+                /* A stream reset on a request we sent it. Normal, when the
+                 * client does not want it. */
                 ++session->pushes_reset;
             }
             else {
+                /* A stream reset on a request it sent us. Could happen in a browser
+                 * when the user navigates away or cancels loading - maybe. */
+                h2_mplx_client_rst(session->mplx, frame->hd.stream_id);
                 ++session->streams_reset;
             }
             break;
@@ -2170,6 +2175,14 @@ apr_status_t h2_session_process(h2_session *session, int async)
                         session->have_read = 1;
                     }
                     else if (APR_STATUS_IS_EAGAIN(status) || APR_STATUS_IS_TIMEUP(status)) {
+                        status = h2_mplx_idle(session->mplx);
+                        if (status == APR_EAGAIN) {
+                            break;
+                        }
+                        else if (status != APR_SUCCESS) {
+                            dispatch_event(session, H2_SESSION_EV_CONN_ERROR, 
+                                           H2_ERR_ENHANCE_YOUR_CALM, "less is more");
+                        }
                         status = APR_EAGAIN;
                         goto out;
                     }
diff --git a/modules/http2/h2_task.c b/modules/http2/h2_task.c
index 8d3dc6fde8..72bbc2432c 100644
--- a/modules/http2/h2_task.c
+++ b/modules/http2/h2_task.c
@@ -406,8 +406,15 @@ int h2_task_can_redo(h2_task *task) {
             || !strcmp("OPTIONS", task->request->method));
 }
 
+int h2_task_has_started(h2_task *task)
+{
+    return task && task->started_at != 0;
+}
+
 void h2_task_redo(h2_task *task)
 {
+    task->started_at = 0;
+    task->worker_done = 0;
     task->rst_error = 0;
 }
 
@@ -547,7 +554,6 @@ apr_status_t h2_task_do(h2_task *task, apr_thread_t *thread, int worker_id)
     ap_assert(task);
     c = task->c;
     task->worker_started = 1;
-    task->started_at = apr_time_now();
     
     if (c->master) {
         /* Each conn_rec->id is supposed to be unique at a point in time. Since
diff --git a/modules/http2/h2_task.h b/modules/http2/h2_task.h
index 4121d0fd69..68a66657ad 100644
--- a/modules/http2/h2_task.h
+++ b/modules/http2/h2_task.h
@@ -80,6 +80,7 @@ struct h2_task {
     
     unsigned int filters_set    : 1;
     unsigned int worker_started : 1; /* h2_worker started processing */
+    unsigned int redo : 1;           /* was throttled, should be restarted later */
     
     int worker_done;                 /* h2_worker finished */
     int done_done;                   /* task_done has been handled */
@@ -101,6 +102,7 @@ apr_status_t h2_task_do(h2_task *task, apr_thread_t *thread, int worker_id);
 
 void h2_task_redo(h2_task *task);
 int h2_task_can_redo(h2_task *task);
+int h2_task_has_started(h2_task *task);
 
 /**
  * Reset the task with the given error code, resets all input/output.
-- 
cgit v1.2.3