--- a/bin/varnishd/cache/cache_varnishd.h
+++ b/bin/varnishd/cache/cache_varnishd.h
@@ -434,6 +434,11 @@ void STV_BanExport(const uint8_t *banlis
 int STV_NewObject(struct worker *, struct objcore *,
     const struct stevedore *, unsigned len);
 
+struct stv_buffer;
+struct stv_buffer *STV_AllocBuf(struct worker *wrk, const struct stevedore *stv,
+    size_t size);
+void STV_FreeBuf(struct worker *wrk, struct stv_buffer **pstvbuf);
+void *STV_GetBufPtr(struct stv_buffer *stvbuf, size_t *psize);
 
 #if WITH_PERSISTENT_STORAGE
 /* storage_persistent.c */
--- a/bin/varnishd/http2/cache_http2.h
+++ b/bin/varnishd/http2/cache_http2.h
@@ -42,15 +42,15 @@ struct h2_error_s {
 	uint32_t			val;
 	int				stream;
 	int				connection;
+	enum sess_close			reason;
 };
 
 typedef const struct h2_error_s *h2_error;
 
-#define H2EC0(U,v,d)
-#define H2EC1(U,v,d) extern const struct h2_error_s H2CE_##U[1];
-#define H2EC2(U,v,d) extern const struct h2_error_s H2SE_##U[1];
-#define H2EC3(U,v,d) H2EC1(U,v,d) H2EC2(U,v,d)
-#define H2_ERROR(NAME, val, sc, desc) H2EC##sc(NAME, val, desc)
+#define H2EC1(U,v,r,d) extern const struct h2_error_s H2CE_##U[1];
+#define H2EC2(U,v,r,d) extern const struct h2_error_s H2SE_##U[1];
+#define H2EC3(U,v,r,d) H2EC1(U,v,r,d) H2EC2(U,v,r,d)
+#define H2_ERROR(NAME, val, sc, reason, desc) H2EC##sc(NAME, val, reason, desc)
 #include "tbl/h2_error.h"
 #undef H2EC1
 #undef H2EC2
@@ -113,6 +113,16 @@ enum h2_stream_e {
 #define H2_FRAME_FLAGS(l,u,v)   extern const uint8_t H2FF_##u;
 #include "tbl/h2_frames.h"
 
+struct h2_rxbuf {
+	unsigned			magic;
+#define H2_RXBUF_MAGIC			0x73f9fb27
+	unsigned			size;
+	uint64_t			tail;
+	uint64_t			head;
+	struct stv_buffer		*stvbuf;
+	uint8_t				data[];
+};
+
 struct h2_req {
 	unsigned			magic;
 #define H2_REQ_MAGIC			0x03411584
@@ -131,7 +141,7 @@ struct h2_req {
 	/* Where to wake this stream up */
 	struct worker			*wrk;
 
-	ssize_t				reqbody_bytes;
+	struct h2_rxbuf			*rxbuf;
 
 	VTAILQ_ENTRY(h2_req)		tx_list;
 	h2_error			error;
@@ -144,7 +154,6 @@ struct h2_sess {
 #define H2_SESS_MAGIC			0xa16f7e4b
 
 	pthread_t			rxthr;
-	struct h2_req			*mailcall;
 	pthread_cond_t			*cond;
 	pthread_cond_t			winupd_cond[1];
 
@@ -153,6 +162,7 @@ struct h2_sess {
 	uint32_t			highest_stream;
 	int				bogosity;
 	int				do_sweep;
+	unsigned			open_streams;
 
 	struct h2_req			*req0;
 
@@ -229,7 +239,7 @@ void H2_Send(struct worker *, struct h2_
 /* cache_http2_proto.c */
 struct h2_req * h2_new_req(const struct worker *, struct h2_sess *,
     unsigned stream, struct req *);
-void h2_del_req(struct worker *, const struct h2_req *);
+void h2_del_req(struct worker *, struct h2_req *);
 void h2_kill_req(struct worker *, const struct h2_sess *,
     struct h2_req *, h2_error);
 int h2_rxframe(struct worker *, struct h2_sess *);
--- a/bin/varnishd/http2/cache_http2_panic.c
+++ b/bin/varnishd/http2/cache_http2_panic.c
@@ -34,6 +34,29 @@
 #include "cache/cache_transport.h"
 #include "http2/cache_http2.h"
 
+static const char *
+h2_panic_error(const struct h2_error_s *e)
+{
+	if (e == NULL)
+		return ("(null)");
+	else
+		return (e->name);
+}
+
+static void
+h2_panic_settings(struct vsb *vsb, const struct h2_settings *s)
+{
+	int cont = 0;
+
+#define H2_SETTING(U,l,...)			\
+	if (cont)				\
+		VSB_printf(vsb, ", ");		\
+	cont = 1;				\
+	VSB_printf(vsb, "0x%x", s->l);
+#include "tbl/h2_settings.h"
+#undef H2_SETTING
+}
+
 void
 h2_sess_panic(struct vsb *vsb, const struct sess *sp)
 {
@@ -44,21 +67,65 @@ h2_sess_panic(struct vsb *vsb, const str
 	AZ(SES_Get_proto_priv(sp, &up));
 
 	h2 = (void*)*up;
-	CHECK_OBJ_NOTNULL(h2, H2_SESS_MAGIC);
+	VSB_printf(vsb, "h2_sess = %p,\n", h2);
+	PAN_CheckMagic(vsb, h2, H2_SESS_MAGIC);
+	if (!VALID_OBJ(h2, H2_SESS_MAGIC))
+		return;
+	VSB_printf(vsb, "refcnt = %d, bogosity = %d, error = %s\n",
+	    h2->refcnt, h2->bogosity, h2_panic_error(h2->error));
+	VSB_printf(vsb,
+	    "open_streams = %d, highest_stream = %u,"
+	    " goaway_last_stream = %u,\n",
+	    h2->open_streams, h2->highest_stream, h2->goaway_last_stream);
+	VSB_printf(vsb, "local_settings = {");
+	h2_panic_settings(vsb, &h2->local_settings);
+	VSB_printf(vsb, "},\n");
+	VSB_printf(vsb, "remote_settings = {");
+	h2_panic_settings(vsb, &h2->remote_settings);
+	VSB_printf(vsb, "},\n");
+	VSB_printf(vsb,
+	    "{rxf_len, rxf_type, rxf_flags, rxf_stream} ="
+	    " {%u, %u, 0x%x, %u},\n",
+	    h2->rxf_len, h2->rxf_type, h2->rxf_flags, h2->rxf_stream);
 	VSB_printf(vsb, "streams {\n");
 	VSB_indent(vsb, 2);
 	VTAILQ_FOREACH(r2, &h2->streams, list) {
 		PAN_CheckMagic(vsb, r2, H2_REQ_MAGIC);
-		VSB_printf(vsb, "0x%08x", r2->stream);
+		VSB_printf(vsb, "%u %p ", r2->stream, r2);
 		switch (r2->state) {
-#define H2_STREAM(U,sd,d) case H2_S_##U: VSB_printf(vsb, " %-6s", sd); break;
+#define H2_STREAM(U,sd,d) case H2_S_##U: VSB_printf(vsb, "%s", sd); break;
 #include <tbl/h2_stream.h>
 		default:
-			VSB_printf(vsb, " State %d", r2->state);
+			VSB_printf(vsb, " 0x%x", r2->state);
 			break;
 		}
-		VSB_printf(vsb, "\n");
+		VSB_printf(vsb, " {\n");
+		VSB_indent(vsb, 2);
+
+		VSB_printf(vsb, "h2_sess = %p, scheduled = %d, error = %s,\n",
+		    r2->h2sess, r2->scheduled, h2_panic_error(r2->error));
+		VSB_printf(vsb, "t_send = %f, t_winupd = %f,\n",
+		    r2->t_send, r2->t_winupd);
+		VSB_printf(vsb, "t_window = %jd, r_window = %jd,\n",
+		    r2->t_window, r2->r_window);
+
+		VSB_printf(vsb, "rxbuf = %p", r2->rxbuf);
+		if (r2->rxbuf != NULL) {
+			VSB_printf(vsb, " {\n");
+			VSB_indent(vsb, 2);
+			PAN_CheckMagic(vsb, r2->rxbuf, H2_RXBUF_MAGIC);
+			VSB_printf(vsb, "stvbuf = %p,\n", r2->rxbuf->stvbuf);
+			VSB_printf(vsb,
+			    "{size, tail, head} = {%u, %ju, %ju},\n",
+			    r2->rxbuf->size, r2->rxbuf->tail, r2->rxbuf->head);
+			VSB_indent(vsb, -2);
+			VSB_printf(vsb, "}");
+		}
+		VSB_printf(vsb, ",\n");
+
+		VSB_indent(vsb, -2);
+		VSB_printf(vsb, "},\n");
 	}
 	VSB_indent(vsb, -2);
-	VSB_printf(vsb, "}\n");
+	VSB_printf(vsb, "},\n");
 }
--- a/bin/varnishd/http2/cache_http2_proto.c
+++ b/bin/varnishd/http2/cache_http2_proto.c
@@ -31,6 +31,7 @@
 
 #include "cache/cache_varnishd.h"
 
+#include <errno.h>
 #include <stdio.h>
 #include <stdlib.h>
 
@@ -38,15 +39,16 @@
 #include "cache/cache_filter.h"
 #include "http2/cache_http2.h"
 #include "cache/cache_objhead.h"
+#include "storage/storage.h"
 
 #include "vend.h"
 #include "vtcp.h"
 #include "vtim.h"
 
-#define H2EC1(U,v,d) const struct h2_error_s H2CE_##U[1] = {{#U,d,v,0,1}};
-#define H2EC2(U,v,d) const struct h2_error_s H2SE_##U[1] = {{#U,d,v,1,0}};
-#define H2EC3(U,v,d) H2EC1(U,v,d) H2EC2(U,v,d)
-#define H2_ERROR(NAME, val, sc, desc) H2EC##sc(NAME, val, desc)
+#define H2EC1(U,v,r,d) const struct h2_error_s H2CE_##U[1] = {{#U,d,v,0,1,r}};
+#define H2EC2(U,v,r,d) const struct h2_error_s H2SE_##U[1] = {{#U,d,v,1,0,r}};
+#define H2EC3(U,v,r,d) H2EC1(U,v,r,d) H2EC2(U,v,r,d)
+#define H2_ERROR(NAME, val, sc, reason, desc) H2EC##sc(NAME, val, reason, desc)
 #include "tbl/h2_error.h"
 #undef H2EC1
 #undef H2EC2
@@ -57,7 +59,8 @@ static const struct h2_error_s H2NN_ERRO
 	"Unknown error number",
 	0xffffffff,
 	1,
-	1
+	1,
+	SC_RX_JUNK
 }};
 
 enum h2frame {
@@ -84,10 +87,10 @@ h2_framename(enum h2frame h2f)
  */
 
 static const h2_error stream_errors[] = {
-#define H2EC1(U,v,d)
-#define H2EC2(U,v,d) [v] = H2SE_##U,
-#define H2EC3(U,v,d) H2EC1(U,v,d) H2EC2(U,v,d)
-#define H2_ERROR(NAME, val, sc, desc) H2EC##sc(NAME, val, desc)
+#define H2EC1(U,v,r,d)
+#define H2EC2(U,v,r,d) [v] = H2SE_##U,
+#define H2EC3(U,v,r,d) H2EC1(U,v,r,d) H2EC2(U,v,r,d)
+#define H2_ERROR(NAME, val, sc, reason, desc) H2EC##sc(NAME, val, reason, desc)
 #include "tbl/h2_error.h"
 #undef H2EC1
 #undef H2EC2
@@ -109,10 +112,10 @@ h2_streamerror(uint32_t u)
  */
 
 static const h2_error conn_errors[] = {
-#define H2EC1(U,v,d) [v] = H2CE_##U,
-#define H2EC2(U,v,d)
-#define H2EC3(U,v,d) H2EC1(U,v,d) H2EC2(U,v,d)
-#define H2_ERROR(NAME, val, sc, desc) H2EC##sc(NAME, val, desc)
+#define H2EC1(U,v,r,d) [v] = H2CE_##U,
+#define H2EC2(U,v,r,d)
+#define H2EC3(U,v,r,d) H2EC1(U,v,r,d) H2EC2(U,v,r,d)
+#define H2_ERROR(NAME, val, sc, reason, desc) H2EC##sc(NAME, val, reason, desc)
 #include "tbl/h2_error.h"
 #undef H2EC1
 #undef H2EC2
@@ -183,10 +186,11 @@ h2_new_req(const struct worker *wrk, str
 }
 
 void
-h2_del_req(struct worker *wrk, const struct h2_req *r2)
+h2_del_req(struct worker *wrk, struct h2_req *r2)
 {
 	struct h2_sess *h2;
 	struct sess *sp;
+	struct stv_buffer *stvbuf;
 
 	CHECK_OBJ_NOTNULL(r2, H2_REQ_MAGIC);
 	AZ(r2->scheduled);
@@ -201,6 +205,15 @@ h2_del_req(struct worker *wrk, const str
 	VTAILQ_REMOVE(&h2->streams, r2, list);
 	Lck_Unlock(&sp->mtx);
 	AZ(r2->req->ws->r);
+
+	CHECK_OBJ_ORNULL(r2->rxbuf, H2_RXBUF_MAGIC);
+	if (r2->rxbuf) {
+		stvbuf = r2->rxbuf->stvbuf;
+		r2->rxbuf = NULL;
+		STV_FreeBuf(wrk, &stvbuf);
+		AZ(stvbuf);
+	}
+
 	Req_Cleanup(sp, wrk, r2->req);
 	Req_Release(r2->req);
 }
@@ -544,10 +557,6 @@ h2_do_req(struct worker *wrk, void *priv
 		r2->scheduled = 0;
 		r2->state = H2_S_CLOSED;
 		r2->h2sess->do_sweep = 1;
-		if (h2->mailcall == r2) {
-			h2->mailcall = NULL;
-			AZ(pthread_cond_signal(h2->cond));
-		}
 		Lck_Unlock(&h2->sess->mtx);
 	}
 	THR_SetRequest(NULL);
@@ -594,8 +603,16 @@ h2_end_headers(struct worker *wrk, struc
 	if (req->req_body_status == REQ_BODY_INIT) {
 		if (cl == -1)
 			req->req_body_status = REQ_BODY_WITHOUT_LEN;
-		else
+		else {
+			/* Note: If cl==0 here, we still need to have
+			 * req_body_status==BS_LENGTH, so that there will
+			 * be a wait for the stream to reach H2_S_CLOS_REM
+			 * while dealing with the request body. */
 			req->req_body_status = REQ_BODY_WITH_LEN;
+		}
+		/* Set req->htc->content_length because this is used as
+		 * the hint in vrb_pull() for how large the storage
+		 * buffers need to be */
 		req->htc->content_length = cl;
 	} else {
 		/* A HEADER frame contained END_STREAM */
@@ -750,29 +767,78 @@ h2_rx_continuation(struct worker *wrk, s
 static h2_error v_matchproto_(h2_rxframe_f)
 h2_rx_data(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2)
 {
-	int w1 = 0, w2 = 0;
 	char buf[4];
-	unsigned wi;
-	ssize_t cl;
+	uint64_t l, l2, head;
+	const uint8_t *src;
+	unsigned len;
+
+	/* XXX: Shouldn't error handling, setting of r2->error and
+	 * r2->cond signalling be handled more generally at the end of
+	 * procframe()??? */
 
 	CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
 	ASSERT_RXTHR(h2);
 	CHECK_OBJ_ORNULL(r2, H2_REQ_MAGIC);
 
-	if (r2 == NULL || !r2->scheduled)
+	if (r2 == NULL)
 		return (0);
+
 	if (r2->state >= H2_S_CLOS_REM) {
 		r2->error = H2SE_STREAM_CLOSED;
 		return (H2SE_STREAM_CLOSED); // rfc7540,l,1766,1769
 	}
 	Lck_Lock(&h2->sess->mtx);
+	CHECK_OBJ_ORNULL(r2->rxbuf, H2_RXBUF_MAGIC);
 
-	r2->reqbody_bytes += h2->rxf_len;
-	if (h2->rxf_flags & H2FF_DATA_END_STREAM)
-		r2->state = H2_S_CLOS_REM;
-	cl = r2->req->htc->content_length;
-	if (cl >= 0 && (r2->reqbody_bytes > cl ||
-	      (r2->state >= H2_S_CLOS_REM && r2->reqbody_bytes != cl))) {
+	if (h2->error || r2->error) {
+		if (r2->cond)
+			AZ(pthread_cond_signal(r2->cond));
+		Lck_Unlock(&h2->sess->mtx);
+		return (h2->error ? h2->error : r2->error);
+	}
+
+/* Check padding if present */
+	src = h2->rxf_data;
+	len = h2->rxf_len;
+	if (h2->rxf_flags & H2FF_DATA_PADDED) {
+		if (*src >= len) {
+			VSLb(h2->vsl, SLT_Debug,
+			    "H2: stream %u: Padding larger than frame length",
+			    h2->rxf_stream);
+			r2->error = H2CE_PROTOCOL_ERROR;
+			if (r2->cond)
+				AZ(pthread_cond_signal(r2->cond));
+			Lck_Unlock(&h2->sess->mtx);
+			return (H2CE_PROTOCOL_ERROR);
+		}
+		len -= 1 + *src;
+		src += 1;
+	}
+
+	/* Check against the Content-Length header if given */
+	if (r2->req->htc->content_length >= 0) {
+		if (r2->rxbuf)
+			l = r2->rxbuf->head;
+		else
+			l = 0;
+		l += len;
+		if (l > r2->req->htc->content_length ||
+		    ((h2->rxf_flags & H2FF_DATA_END_STREAM) &&
+		     l != r2->req->htc->content_length)) {
+			VSLb(h2->vsl, SLT_Debug,
+			    "H2: stream %u: Received data and Content-Length"
+			    " mismatch", h2->rxf_stream);
+			r2->error = H2SE_PROTOCOL_ERROR;
+			if (r2->cond)
+				AZ(pthread_cond_signal(r2->cond));
+			Lck_Unlock(&h2->sess->mtx);
+			return (H2SE_PROTOCOL_ERROR);
+		}
+	}
+
+	/* Check and charge connection window. The entire frame including
+	 * padding (h2->rxf_len) counts towards the window. */
+	if (h2->rxf_len > h2->req0->r_window) {
 		VSLb(h2->vsl, SLT_Debug,
 		    "H2: stream %u: Received data and Content-Length"
 		    " mismatch", h2->rxf_stream);
@@ -780,42 +846,157 @@ h2_rx_data(struct worker *wrk, struct h2
 		if (r2->cond)
 			AZ(pthread_cond_signal(r2->cond));
 		Lck_Unlock(&h2->sess->mtx);
-		return (H2SE_PROTOCOL_ERROR);
+		return (H2CE_FLOW_CONTROL_ERROR);
 	}
 
-	AZ(h2->mailcall);
-	h2->mailcall = r2;
 	h2->req0->r_window -= h2->rxf_len;
-	r2->r_window -= h2->rxf_len;
-	// req_bodybytes accounted in CNT code.
-	if (r2->cond)
-		AZ(pthread_cond_signal(r2->cond));
-	while (h2->mailcall != NULL && h2->error == 0 && r2->error == 0)
-		AZ(Lck_CondWait(h2->cond, &h2->sess->mtx, 0));
-	wi = cache_param->h2_rx_window_increment;
 	if (h2->req0->r_window < cache_param->h2_rx_window_low_water) {
-		h2->req0->r_window += wi;
-		w1 = 1;
+		h2->req0->r_window += cache_param->h2_rx_window_increment;
+		vbe32enc(buf, cache_param->h2_rx_window_increment);
+		Lck_Unlock(&h2->sess->mtx);
+		H2_Send_Get(wrk, h2, h2->req0);
+		H2_Send_Frame(wrk, h2, H2_F_WINDOW_UPDATE, 0, 4, 0, buf);
+		H2_Send_Rel(h2, h2->req0);
+		Lck_Lock(&h2->sess->mtx);
 	}
-	if (r2->r_window < cache_param->h2_rx_window_low_water) {
-		r2->r_window += wi;
-		w2 = 1;
+
+		/* Check stream window. The entire frame including padding
+	 * (h2->rxf_len) counts towards the window. */
+	if (h2->rxf_len > r2->r_window) {
+		VSLb(h2->vsl, SLT_Debug,
+		    "H2: stream %u: Exceeded stream receive window",
+		    h2->rxf_stream);
+		r2->error = H2SE_FLOW_CONTROL_ERROR;
+		if (r2->cond)
+			AZ(pthread_cond_signal(r2->cond));
+		Lck_Unlock(&h2->sess->mtx);
+		return (H2SE_FLOW_CONTROL_ERROR);
 	}
 
+	/* Handle zero size frame before starting to allocate buffers */
+	if (len == 0) {
+		r2->r_window -= h2->rxf_len;
+
+		/* Handle the specific corner case where the entire window
+		 * has been exhausted using nothing but padding
+		 * bytes. Since no bytes have been buffered, no bytes
+		 * would be consumed by the request thread and no stream
+		 * window updates sent. Unpaint ourselves from this corner
+		 * by sending a stream window update here. */
+		CHECK_OBJ_ORNULL(r2->rxbuf, H2_RXBUF_MAGIC);
+		if (r2->r_window == 0 &&
+		    (r2->rxbuf == NULL || r2->rxbuf->tail == r2->rxbuf->head)) {
+			if (r2->rxbuf)
+				l = r2->rxbuf->size;
+			else
+				l = h2->local_settings.initial_window_size;
+			r2->r_window += l;
+			Lck_Unlock(&h2->sess->mtx);
+			vbe32enc(buf, l);
+			H2_Send_Get(wrk, h2, h2->req0);
+			H2_Send_Frame(wrk, h2, H2_F_WINDOW_UPDATE, 0, 4,
+			    r2->stream, buf);
+			H2_Send_Rel(h2, h2->req0);
+			Lck_Lock(&h2->sess->mtx);
+		}
 
-	Lck_Unlock(&h2->sess->mtx);
 
-	if (w1 || w2) {
-		vbe32enc(buf, wi);
-		H2_Send_Get(wrk, h2, h2->req0);
-		if (w1)
-			H2_Send_Frame(wrk, h2, H2_F_WINDOW_UPDATE, 0,
-			    4, 0, buf);
-		if (w2)
-			H2_Send_Frame(wrk, h2, H2_F_WINDOW_UPDATE, 0,
-			    4, r2->stream, buf);
-		H2_Send_Rel(h2, h2->req0);
+		if (h2->rxf_flags & H2FF_DATA_END_STREAM)
+			r2->state = H2_S_CLOS_REM;
+		if (r2->cond)
+			AZ(pthread_cond_signal(r2->cond));
+		Lck_Unlock(&h2->sess->mtx);
+		return (0);
+	}
+
+	/* Make the buffer on demand */
+	if (r2->rxbuf == NULL) {
+		unsigned bufsize;
+		size_t bstest;
+		struct stv_buffer *stvbuf;
+		struct h2_rxbuf *rxbuf;
+
+		Lck_Unlock(&h2->sess->mtx);
+
+		bufsize = h2->local_settings.initial_window_size;
+		if (bufsize < r2->r_window) {
+			/* This will not happen because we do not have any
+			 * mechanism to change the initial window size on
+			 * a running session. But if we gain that ability,
+			 * this future proofs it. */
+			bufsize = r2->r_window;
+		}
+		assert(bufsize > 0);
+		if ((h2->rxf_flags & H2FF_DATA_END_STREAM) &&
+		    bufsize > len)
+			/* Cap the buffer size when we know this is the
+			 * single data frame. */
+			bufsize = len;
+		CHECK_OBJ_NOTNULL(stv_h2_rxbuf, STEVEDORE_MAGIC);
+		stvbuf = STV_AllocBuf(wrk, stv_h2_rxbuf,
+		    bufsize + sizeof *rxbuf);
+		if (stvbuf == NULL) {
+			VSLb(h2->vsl, SLT_Debug,
+			    "H2: stream %u: Failed to allocate request body"
+			    " buffer",
+			    h2->rxf_stream);
+			Lck_Lock(&h2->sess->mtx);
+			r2->error = H2SE_INTERNAL_ERROR;
+			if (r2->cond)
+				AZ(pthread_cond_signal(r2->cond));
+			Lck_Unlock(&h2->sess->mtx);
+			return (H2SE_INTERNAL_ERROR);
+		}
+		rxbuf = STV_GetBufPtr(stvbuf, &bstest);
+		AN(rxbuf);
+		assert(bstest >= bufsize + sizeof *rxbuf);
+		assert(PAOK(rxbuf));
+		INIT_OBJ(rxbuf, H2_RXBUF_MAGIC);
+		rxbuf->size = bufsize;
+		rxbuf->stvbuf = stvbuf;
+
+		r2->rxbuf = rxbuf;
+
+		Lck_Lock(&h2->sess->mtx);
 	}
+
+	CHECK_OBJ_NOTNULL(r2->rxbuf, H2_RXBUF_MAGIC);
+	assert(r2->rxbuf->tail <= r2->rxbuf->head);
+	l = r2->rxbuf->head - r2->rxbuf->tail;
+	assert(l <= r2->rxbuf->size);
+	l = r2->rxbuf->size - l;
+	assert(len <= l); /* Stream window handling ensures this */
+
+	Lck_Unlock(&h2->sess->mtx);
+
+	l = len;
+	head = r2->rxbuf->head;
+	do {
+		l2 = l;
+		if ((head % r2->rxbuf->size) + l2 > r2->rxbuf->size)
+			l2 = r2->rxbuf->size - (head % r2->rxbuf->size);
+		assert(l2 > 0);
+		memcpy(&r2->rxbuf->data[head % r2->rxbuf->size], src, l2);
+		src += l2;
+		head += l2;
+		l -= l2;
+	} while (l > 0);
+
+	Lck_Lock(&h2->sess->mtx);
+
+	/* Charge stream window. The entire frame including padding
+	 * (h2->rxf_len) counts towards the window. The used padding
+	 * bytes will be included in the next connection window update
+	 * sent when the buffer bytes are consumed because that is
+	 * calculated against the available buffer space. */
+	r2->r_window -= h2->rxf_len;
+	r2->rxbuf->head += len;
+	assert(r2->rxbuf->tail <= r2->rxbuf->head);
+	if (h2->rxf_flags & H2FF_DATA_END_STREAM)
+		r2->state = H2_S_CLOS_REM;
+	if (r2->cond)
+		AZ(pthread_cond_signal(r2->cond));
+	Lck_Unlock(&h2->sess->mtx);
 	return (0);
 }
 
@@ -824,8 +1005,11 @@ h2_vfp_body(struct vfp_ctx *vc, struct v
 {
 	struct h2_req *r2;
 	struct h2_sess *h2;
-	unsigned l;
 	enum vfp_status retval;
+	uint64_t l, l2, tail;
+	uint8_t *dst;
+	char buf[4];
+	int i;
 
 	CHECK_OBJ_NOTNULL(vc, VFP_CTX_MAGIC);
 	CHECK_OBJ_NOTNULL(vfe, VFP_ENTRY_MAGIC);
@@ -834,40 +1018,91 @@ h2_vfp_body(struct vfp_ctx *vc, struct v
 
 	AN(ptr);
 	AN(lp);
-	l = *lp;
-	*lp = 0;
+	assert(*lp >= 0);
 
 	Lck_Lock(&h2->sess->mtx);
+
 	r2->cond = &vc->wrk->cond;
-	while (h2->mailcall != r2 && h2->error == 0 && r2->error == 0)
-		AZ(Lck_CondWait(r2->cond, &h2->sess->mtx, 0));
-	r2->cond = NULL;
-	if (h2->error || r2->error) {
-		retval = VFP_ERROR;
-	} else {
-		assert(h2->mailcall == r2);
-		if (l > h2->rxf_len)
-			l = h2->rxf_len;
-		if (l > 0) {
-			memcpy(ptr, h2->rxf_data, l);
-			h2->rxf_data += l;
-			h2->rxf_len -= l;
-		}
-		*lp = l;
-		if (h2->rxf_len > 0) {
-			/* We ran out of storage: Have VFP call us
-			 * again with a fresh buffer */
-			Lck_Unlock(&h2->sess->mtx);
-			return (VFP_OK);
-		}
-		if (h2->rxf_len == 0 && r2->state >= H2_S_CLOS_REM)
+	while (1) {
+		CHECK_OBJ_ORNULL(r2->rxbuf, H2_RXBUF_MAGIC);
+		if (r2->rxbuf) {
+			assert(r2->rxbuf->tail <= r2->rxbuf->head);
+			l = r2->rxbuf->head - r2->rxbuf->tail;
+		} else
+			l = 0;
+
+		if (h2->error || r2->error)
+			retval = VFP_ERROR;
+		else if (r2->state >= H2_S_CLOS_REM && l <= *lp)
 			retval = VFP_END;
-		else
+		else {
+			if (l > *lp)
+				l = *lp;
 			retval = VFP_OK;
-		h2->mailcall = NULL;
-		AZ(pthread_cond_signal(h2->cond));
+		}
+
+		if (retval != VFP_OK || l > 0)
+			break;
+
+		i = Lck_CondWait(r2->cond, &h2->sess->mtx,
+		    VTIM_real() + SESS_TMO(h2->sess, timeout_idle));
+		if (i == ETIMEDOUT) {
+			retval = VFP_ERROR;
+			break;
+		}
 	}
+	r2->cond = NULL;
+
 	Lck_Unlock(&h2->sess->mtx);
+
+	if (l == 0 || retval == VFP_ERROR) {
+		*lp = 0;
+		return (retval);
+	}
+
+	*lp = l;
+	dst = ptr;
+	tail = r2->rxbuf->tail;
+	do {
+		l2 = l;
+		if ((tail % r2->rxbuf->size) + l2 > r2->rxbuf->size)
+			l2 = r2->rxbuf->size - (tail % r2->rxbuf->size);
+		assert(l2 > 0);
+		memcpy(dst, &r2->rxbuf->data[tail % r2->rxbuf->size], l2);
+		dst += l2;
+		tail += l2;
+		l -= l2;
+	} while (l > 0);
+
+	Lck_Lock(&h2->sess->mtx);
+
+	CHECK_OBJ_NOTNULL(r2->rxbuf, H2_RXBUF_MAGIC);
+	r2->rxbuf->tail = tail;
+	assert(r2->rxbuf->tail <= r2->rxbuf->head);
+
+	if (r2->r_window < cache_param->h2_rx_window_low_water &&
+	    r2->state < H2_S_CLOS_REM) {
+		/* l is free buffer space */
+		/* l2 is calculated window increment */
+		l = r2->rxbuf->size - (r2->rxbuf->head - r2->rxbuf->tail);
+		assert(r2->r_window <= l);
+		l2 = cache_param->h2_rx_window_increment;
+		if (r2->r_window + l2 > l)
+			l2 = l - r2->r_window;
+		r2->r_window += l2;
+	} else
+		l2 = 0;
+
+	Lck_Unlock(&h2->sess->mtx);
+
+	if (l2 > 0) {
+		vbe32enc(buf, l2);
+		H2_Send_Get(vc->wrk, h2, r2);
+		H2_Send_Frame(vc->wrk, h2, H2_F_WINDOW_UPDATE, 0, 4,
+		    r2->stream, buf);
+		H2_Send_Rel(h2, r2);
+	}
+
 	return (retval);
 }
 
@@ -876,6 +1111,7 @@ h2_vfp_body_fini(struct vfp_ctx *vc, str
 {
 	struct h2_req *r2;
 	struct h2_sess *h2;
+	struct stv_buffer *stvbuf = NULL;
 
 	CHECK_OBJ_NOTNULL(vc, VFP_CTX_MAGIC);
 	CHECK_OBJ_NOTNULL(vfe, VFP_ENTRY_MAGIC);
@@ -889,11 +1125,21 @@ h2_vfp_body_fini(struct vfp_ctx *vc, str
 		    H2SE_REFUSED_STREAM);
 		Lck_Lock(&h2->sess->mtx);
 		r2->error = H2SE_REFUSED_STREAM;
-		if (h2->mailcall == r2) {
-			h2->mailcall = NULL;
-			AZ(pthread_cond_signal(h2->cond));
+		Lck_Unlock(&h2->sess->mtx);
+	}
+
+	if (r2->state >= H2_S_CLOS_REM && r2->rxbuf != NULL) {
+		Lck_Lock(&h2->sess->mtx);
+		CHECK_OBJ_ORNULL(r2->rxbuf, H2_RXBUF_MAGIC);
+		if (r2->rxbuf != NULL) {
+			stvbuf = r2->rxbuf->stvbuf;
+			r2->rxbuf = NULL;
 		}
 		Lck_Unlock(&h2->sess->mtx);
+		if (stvbuf != NULL) {
+			STV_FreeBuf(vc->wrk, &stvbuf);
+			AZ(stvbuf);
+		}
 	}
 }
 
--- a/bin/varnishd/http2/cache_http2_session.c
+++ b/bin/varnishd/http2/cache_http2_session.c
@@ -148,6 +148,7 @@ h2_del_sess(struct worker *wrk, struct h
 	CHECK_OBJ_NOTNULL(h2, H2_SESS_MAGIC);
 	AZ(h2->refcnt);
 	assert(VTAILQ_EMPTY(&h2->streams));
+	AN(reason);
 
 	VHT_Fini(h2->dectbl);
 	AZ(pthread_cond_destroy(h2->winupd_cond));
@@ -355,10 +356,14 @@ h2_new_session(struct worker *wrk, void
 	AZ(h2->htc->priv);
 	h2->htc->priv = h2;
 
+	AZ(wrk->vsl);
+	wrk->vsl = h2->vsl;
+
 	if (req->err_code == H2_OU_MARKER && !h2_ou_session(wrk, h2, req)) {
 		assert(h2->refcnt == 1);
 		h2_del_req(wrk, h2->req0);
 		h2_del_sess(wrk, h2, SC_RX_JUNK);
+		wrk->vsl = NULL;
 		return;
 	}
 	assert(HTC_S_COMPLETE == H2_prism_complete(h2->htc));
@@ -428,8 +433,8 @@ h2_new_session(struct worker *wrk, void
 	h2->cond = NULL;
 	assert(h2->refcnt == 1);
 	h2_del_req(wrk, h2->req0);
-	/* TODO: proper sess close reason */
-	h2_del_sess(wrk, h2, SC_RX_JUNK);
+	h2_del_sess(wrk, h2, h2->error->reason);
+	wrk->vsl = NULL;
 }
 
 struct transport H2_transport = {
--- a/bin/varnishd/mgt/mgt.h
+++ b/bin/varnishd/mgt/mgt.h
@@ -202,6 +202,7 @@ char **MGT_NamedArg(const char *, const
 
 
 /* stevedore_mgt.c */
+extern const char *mgt_stv_h2_rxbuf;
 void STV_Config(const char *spec);
 void STV_Config_Transient(void);
 
--- a/bin/varnishd/mgt/mgt_param.h
+++ b/bin/varnishd/mgt/mgt_param.h
@@ -32,6 +32,9 @@ struct parspec;
 
 typedef int tweak_t(struct vsb *, const struct parspec *, const char *arg);
 
+/* Sentinel for the arg position of tweak_t to ask for JSON formatting. */
+extern const char * const JSON_FMT;
+
 struct parspec {
 	const char	*name;
 	tweak_t		*func;
@@ -64,6 +67,7 @@ tweak_t tweak_timeout;
 tweak_t tweak_uint;
 tweak_t tweak_vsl_buffer;
 tweak_t tweak_vsl_reclen;
+tweak_t tweak_h2_rxbuf_storage;
 
 int tweak_generic_uint(struct vsb *vsb, volatile unsigned *dest,
     const char *arg, const char *min, const char *max);
--- a/bin/varnishd/mgt/mgt_param_bits.c
+++ b/bin/varnishd/mgt/mgt_param_bits.c
@@ -135,6 +135,7 @@ tweak_vsl_mask(struct vsb *vsb, const st
 			(void)bit(mgt_param.vsl_mask, SLT_ObjProtocol, BSET);
 			(void)bit(mgt_param.vsl_mask, SLT_ObjReason, BSET);
 			(void)bit(mgt_param.vsl_mask, SLT_ObjStatus, BSET);
+			(void)bit(mgt_param.vsl_mask, SLT_Debug, BSET);
 		} else {
 			return (bit_tweak(vsb, mgt_param.vsl_mask,
 			    SLT__Reserved, arg, VSL_tags,
--- a/bin/varnishd/mgt/mgt_param_tbl.c
+++ b/bin/varnishd/mgt/mgt_param_tbl.c
@@ -152,6 +152,12 @@ struct parspec mgt_parspec[] = {
 		0,
 		"255b",
 		"bytes" },
+	{ "h2_rxbuf_storage", tweak_h2_rxbuf_storage, &mgt_stv_h2_rxbuf,
+		NULL, NULL,
+		"The name of the storage backend that HTTP/2 receive buffers"
+		" should be allocated from.",
+		MUST_RESTART,
+		"Transient", "" },
 
 	{ NULL, NULL, NULL }
 };
--- a/bin/varnishd/mgt/mgt_param_tweak.c
+++ b/bin/varnishd/mgt/mgt_param_tweak.c
@@ -40,9 +40,12 @@
 #include "mgt/mgt.h"
 
 #include "mgt/mgt_param.h"
+#include "storage/storage.h"
 #include "vav.h"
 #include "vnum.h"
 
+const char * const JSON_FMT = (const char *)&JSON_FMT;
+
 /*--------------------------------------------------------------------
  * Generic handling of double typed parameters
  */
@@ -424,3 +427,43 @@ tweak_poolparam(struct vsb *vsb, const s
 	}
 	return (retval);
 }
+
+/*--------------------------------------------------------------------
+ * Tweak 'h2_rxbuf_storage'
+ *
+ */
+
+int v_matchproto_(tweak_t)
+tweak_h2_rxbuf_storage(struct vsb *vsb, const struct parspec *par,
+    const char *arg)
+{
+	struct stevedore *stv;
+
+	/* XXX: If we want to remove the MUST_RESTART flag from the
+	 * h2_rxbuf_storage parameter, we could have a mechanism here
+	 * that when the child is running calls out through CLI to change
+	 * the stevedore being used. */
+
+	if (arg == NULL || arg == JSON_FMT)
+		return (tweak_string(vsb, par, arg));
+
+	if (!strcmp(arg, "Transient")) {
+		/* Always allow setting to the special name
+		 * "Transient". There will always be a stevedore with this
+		 * name, but it may not have been configured at the time
+		 * this is called. */
+	} else {
+		/* Only allow setting the value to a known configured
+		 * stevedore */
+		STV_Foreach(stv) {
+			CHECK_OBJ_NOTNULL(stv, STEVEDORE_MAGIC);
+			if (!strcmp(stv->ident, arg))
+				break;
+		}
+		if (stv == NULL) {
+			VSB_printf(vsb, "unknown storage backend '%s'", arg);
+			return (-1);
+		}
+	}
+	return (tweak_string(vsb, par, arg));
+}
--- a/bin/varnishd/storage/mgt_stevedore.c
+++ b/bin/varnishd/storage/mgt_stevedore.c
@@ -52,6 +52,8 @@ static VTAILQ_HEAD(, stevedore) stevedor
 
 struct stevedore *stv_transient;
 
+const char *mgt_stv_h2_rxbuf;
+
 /*--------------------------------------------------------------------*/
 
 int
@@ -219,7 +221,6 @@ STV_Config(const char *spec)
 void
 STV_Config_Transient(void)
 {
-
 	ASSERT_MGT();
 
 	VCLS_AddFunc(mgt_cls, MCF_AUTH, cli_stv);
--- a/bin/varnishd/storage/stevedore.c
+++ b/bin/varnishd/storage/stevedore.c
@@ -41,6 +41,8 @@
 #include "storage/storage.h"
 #include "vrt_obj.h"
 
+extern const char *mgt_stv_h2_rxbuf;
+struct stevedore *stv_h2_rxbuf = NULL;
 
 static pthread_mutex_t stv_mtx;
 
@@ -95,6 +97,74 @@ STV_NewObject(struct worker *wrk, struct
 
 /*-------------------------------------------------------------------*/
 
+struct stv_buffer {
+	unsigned		magic;
+#define STV_BUFFER_MAGIC	0xf39cb6c2
+	const struct stevedore	*stv;
+	size_t			size;
+	uintptr_t		priv;
+};
+
+struct stv_buffer *
+STV_AllocBuf(struct worker *wrk, const struct stevedore *stv, size_t size)
+{
+	struct stv_buffer *stvbuf;
+	uint8_t *buf;
+	uintptr_t priv = 0;
+
+	CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
+	CHECK_OBJ_NOTNULL(stv, STEVEDORE_MAGIC);
+
+	if (size == 0)
+		return (NULL);
+	if (stv->allocbuf == NULL)
+		return (NULL);
+
+	buf = stv->allocbuf(wrk, stv, size + PRNDUP(sizeof *stvbuf), &priv);
+	if (buf == NULL)
+		return (NULL);
+
+	assert(PAOK(buf));
+	stvbuf = (void *)buf;
+	INIT_OBJ(stvbuf, STV_BUFFER_MAGIC);
+	stvbuf->stv = stv;
+	stvbuf->priv = priv;
+	stvbuf->size = size;
+
+	return (stvbuf);
+}
+
+void
+STV_FreeBuf(struct worker *wrk, struct stv_buffer **pstvbuf)
+{
+	struct stv_buffer *stvbuf;
+	const struct stevedore *stv;
+	uintptr_t priv;
+
+	CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
+	TAKE_OBJ_NOTNULL(stvbuf, pstvbuf, STV_BUFFER_MAGIC);
+	CHECK_OBJ_NOTNULL(stvbuf->stv, STEVEDORE_MAGIC);
+
+	stv = stvbuf->stv;
+	priv = stvbuf->priv;
+	CHECK_OBJ_NOTNULL(stv, STEVEDORE_MAGIC);
+	ZERO_OBJ(stvbuf, sizeof *stvbuf);
+
+	AN(stv->freebuf);
+	stv->freebuf(wrk, stv, priv);
+}
+
+void *
+STV_GetBufPtr(struct stv_buffer *stvbuf, size_t *psize)
+{
+	CHECK_OBJ_NOTNULL(stvbuf, STV_BUFFER_MAGIC);
+	if (psize)
+		*psize = stvbuf->size;
+	return (&stvbuf[1]);
+}
+
+/*-------------------------------------------------------------------*/
+
 void
 STV_open(void)
 {
@@ -103,13 +173,23 @@ STV_open(void)
 
 	ASSERT_CLI();
 	AZ(pthread_mutex_init(&stv_mtx, NULL));
+
+	/* This string was prepared for us before the fork, and should
+	 * point to a configured stevedore. */
+	AN(mgt_stv_h2_rxbuf);
+
+	stv_h2_rxbuf = NULL;
 	STV_Foreach(stv) {
+		CHECK_OBJ_NOTNULL(stv, STEVEDORE_MAGIC);
 		bprintf(buf, "storage.%s", stv->ident);
 		stv->vclname = strdup(buf);
 		AN(stv->vclname);
 		if (stv->open != NULL)
 			stv->open(stv);
+		if (!strcmp(stv->ident, mgt_stv_h2_rxbuf))
+			stv_h2_rxbuf = stv;
 	}
+	AN(stv_h2_rxbuf);
 }
 
 void
--- a/bin/varnishd/storage/storage.h
+++ b/bin/varnishd/storage/storage.h
@@ -73,6 +73,10 @@ typedef void storage_banexport_f(const s
     unsigned len);
 typedef void storage_panic_f(struct vsb *vsb, const struct objcore *oc);
 
+typedef void *storage_allocbuf_f(struct worker *, const struct stevedore *,
+    size_t size, uintptr_t *ppriv);
+typedef void storage_freebuf_f(struct worker *, const struct stevedore *,
+    uintptr_t priv);
 
 typedef struct object *sml_getobj_f(struct worker *, struct objcore *);
 typedef struct storage *sml_alloc_f(const struct stevedore *, size_t size);
@@ -100,6 +104,8 @@ struct stevedore {
 	storage_baninfo_f	*baninfo;
 	storage_banexport_f	*banexport;
 	storage_panic_f		*panic;
+	storage_allocbuf_f	*allocbuf;
+	storage_freebuf_f	*freebuf;
 
 	/* Only if SML is used */
 	sml_alloc_f		*sml_alloc;
@@ -124,6 +130,7 @@ struct stevedore {
 };
 
 extern struct stevedore *stv_transient;
+extern struct stevedore *stv_h2_rxbuf;
 
 /*--------------------------------------------------------------------*/
 
--- a/bin/varnishd/storage/storage_file.c
+++ b/bin/varnishd/storage/storage_file.c
@@ -496,6 +496,8 @@ const struct stevedore smf_stevedore = {
 	.allocobj	=	SML_allocobj,
 	.panic		=	SML_panic,
 	.methods	=	&SML_methods,
+	.allocbuf	=	SML_AllocBuf,
+	.freebuf	=	SML_FreeBuf,
 };
 
 #ifdef INCLUDE_TEST_DRIVER
--- a/bin/varnishd/storage/storage_malloc.c
+++ b/bin/varnishd/storage/storage_malloc.c
@@ -230,4 +230,6 @@ const struct stevedore sma_stevedore = {
 	.methods	=	&SML_methods,
 	.var_free_space =	sma_free_space,
 	.var_used_space =	sma_used_space,
+	.allocbuf	=	SML_AllocBuf,
+	.freebuf	=	SML_FreeBuf,
 };
--- a/bin/varnishd/storage/storage_simple.c
+++ b/bin/varnishd/storage/storage_simple.c
@@ -45,6 +45,10 @@
 /*-------------------------------------------------------------------*/
 
 static struct storage *
+objallocwithnuke(struct worker *, const struct stevedore *, size_t size,
+    int flags);
+
+static struct storage *
 sml_stv_alloc(const struct stevedore *stv, size_t size, int flags)
 {
 	struct storage *st;
@@ -159,6 +163,39 @@ SML_allocobj(struct worker *wrk, const s
 	return (1);
 }
 
+void * v_matchproto_(storage_allocbuf_t)
+SML_AllocBuf(struct worker *wrk, const struct stevedore *stv, size_t size,
+    uintptr_t *ppriv)
+{
+	struct storage *st;
+
+	CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
+	CHECK_OBJ_NOTNULL(stv, STEVEDORE_MAGIC);
+	AN(ppriv);
+
+	if (size > UINT_MAX)
+		return (NULL);
+	st = objallocwithnuke(wrk, stv, size, 0);
+	if (st == NULL)
+		return (NULL);
+	assert(st->space >= size);
+	st->len = size;
+	*ppriv = (uintptr_t)st;
+	return (st->ptr);
+}
+
+void v_matchproto_(storage_freebuf_t)
+SML_FreeBuf(struct worker *wrk, const struct stevedore *stv, uintptr_t priv)
+{
+	struct storage *st;
+
+	CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
+	CHECK_OBJ_NOTNULL(stv, STEVEDORE_MAGIC);
+
+	CAST_OBJ_NOTNULL(st, (void *)priv, STORAGE_MAGIC);
+	sml_stv_free(stv, st);
+}
+
 /*---------------------------------------------------------------------
  */
 
--- a/bin/varnishd/storage/storage_simple.h
+++ b/bin/varnishd/storage/storage_simple.h
@@ -66,6 +66,9 @@ extern const struct obj_methods SML_meth
 
 struct object *SML_MkObject(const struct stevedore *, struct objcore *,
     void *ptr);
+void *SML_AllocBuf(struct worker *, const struct stevedore *, size_t,
+    uintptr_t *);
+void SML_FreeBuf(struct worker *, const struct stevedore *, uintptr_t);
 
 storage_allocobj_f SML_allocobj;
 storage_panic_f SML_panic;
--- a/bin/varnishtest/tests/r02305.vtc
+++ b/bin/varnishtest/tests/r02305.vtc
@@ -1,13 +1,14 @@
 varnishtest "#2305: h/2 reembark with a request body"
 
 barrier b1 cond 2
-barrier b2 sock 2
+barrier b2 cond 2
+barrier b3 cond 2
 
 server s1 {
 	rxreq
 	expect req.url == "/"
 	barrier b1 sync
-	delay 2
+	barrier b2 sync
 	txresp
 } -start
 
@@ -16,15 +17,9 @@ varnish v1 -cliok "param.set debug +sync
 varnish v1 -cliok "param.set debug +waitinglist"
 
 varnish v1 -vcl+backend {
-	import vtc;
 	sub vcl_recv {
 		return (hash);
 	}
-	sub vcl_deliver {
-		if (req.http.sync) {
-			vtc.barrier_sync("${b2_sock}");
-		}
-	}
 } -start
 
 client c1 {
@@ -32,14 +27,13 @@ client c1 {
 		txreq
 		rxresp
 		expect resp.status == 200
+		barrier b3 sync
 	} -start
 	stream 3 {
 		barrier b1 sync
-		txreq -req POST -hdr sync 1 -body "foo"
-		rxwinup
-		# barrier b2 is here to make HEADERS vs WINDOW_UPDATE
-		# less racy
+		txreq -req POST -body "foo"
 		barrier b2 sync
+		barrier b3 sync
 		rxresp
 		expect resp.status == 200
 	} -run
--- a/bin/varnishtest/tests/r02679.vtc
+++ b/bin/varnishtest/tests/r02679.vtc
@@ -16,12 +16,12 @@ varnish v1 -vcl+backend {
 
 varnish v1 -cliok "param.set feature +http2"
 varnish v1 -cliok "param.set h2_rx_window_low_water 65535"
+varnish v1 -cliok "param.reset h2_initial_window_size"
 
 client c1 {
 	stream 1 {
 		txreq -req POST -hdr "content-length" "31469" -nostrend
 		txdata -datalen 1550 -nostrend
-		rxwinup
 		txdata -datalen 16000 -nostrend
 		txdata -datalen 13919
 		rxresp
--- a/bin/varnishtest/tests/t02000.vtc
+++ b/bin/varnishtest/tests/t02000.vtc
@@ -33,6 +33,7 @@ client c1 {
 } -run
 
 varnish v1 -cliok "param.set feature +http2"
+varnish v1 -cliok "param.reset h2_initial_window_size"
 
 client c1 {
 	stream 1 {
@@ -67,8 +68,10 @@ varnish v1 -expect MEMPOOL.sess1.live ==
 
 process p1 -stop
 # shell {cat ${tmpdir}/vlog}
-shell -match {1001 H2TxHdr        c \[000006040000000000\]} \
-	{cat ${tmpdir}/vlog}
+# SETTINGS with default initial window size
+shell -match {1001 H2TxHdr        c \[000006040000000000\]} {
+	cat ${tmpdir}/vlog
+}
 
 # While we're here, test sess.xid over H2 as well
 
--- a/bin/varnishtest/tests/t02005.vtc
+++ b/bin/varnishtest/tests/t02005.vtc
@@ -1,7 +1,5 @@
 varnishtest "H2 POST"
 
-barrier b1 cond 2
-
 barrier b2 sock 2
 barrier b3 sock 2
 
@@ -9,7 +7,6 @@ server s1 {
 	rxreq
 	expect req.http.content-length == 7
 	expect req.http.transfer-encoding == <undef>
-	barrier b1 sync
 	txresp -hdr "Content-Type: text/plain" -body response
 
 	rxreq
@@ -30,7 +27,7 @@ varnish v1 -cliok "param.set debug +sync
 
 logexpect l1 -v v1 -g raw {
 	expect	* 1001 ReqAcct	"80 7 87 106 8 114"
-	expect	* 1000 ReqAcct	"45 8 53 72 22 94"
+	expect	* 1000 ReqAcct	"45 8 53 54 20 74"
 } -start
 
 client c1 {
@@ -38,14 +35,8 @@ client c1 {
 		txping
 		rxping
 	} -run
-	stream 0 {
-		rxwinup
-	} -start
 	stream 1 {
 		txreq -req POST -hdr content-type text/plain -hdr content-length 7 -body request
-
-		rxwinup
-		barrier b1 sync
 		# First, HTTP checks
 		rxresp
 		expect resp.http.content-Type == "text/plain"
@@ -53,21 +44,18 @@ client c1 {
 		# Then, payload checks
 		expect resp.body == response
 	} -run
-	stream 0 -wait
 } -run
 
-client c1 {
+client c2 {
 	stream 0 {
 		barrier b2 sync
 		delay 1
 		barrier b3 sync
-		rxwinup
 	} -start
 	stream 1 {
 		txreq -url "/a" -req POST -nostrend
 		txdata -datalen 100
 		rxresp
-		rxwinup
 		expect resp.status == 503
 	} -run
 	stream 3 {
--- a/bin/varnishtest/tests/t02006.vtc
+++ b/bin/varnishtest/tests/t02006.vtc
@@ -1,13 +1,10 @@
 varnishtest "H2 POST w/ 100 Continue"
 
-barrier b1 cond 2
-
 server s1 {
 	rxreq
 	expect req.http.content-length == <undef>
 	expect req.http.transfer-encoding == chunked
 	expect req.proto == HTTP/1.1
-	barrier b1 sync
 	txresp -hdr "Content-Type: text/plain" -body response
 } -start
 
@@ -31,8 +28,6 @@ client c1 {
 		txdata \
 			-data request
 
-		rxwinup
-		barrier b1 sync
 		rxresp
 		expect resp.status == 200
 		expect resp.http.content-Type == "text/plain"
--- a/bin/varnishtest/tests/t02007.vtc
+++ b/bin/varnishtest/tests/t02007.vtc
@@ -1,7 +1,5 @@
 varnishtest "H2 Huge response headers"
 
-barrier b1 sock 2
-
 server s1 {
 	rxreq
 	expect req.proto == HTTP/1.1
@@ -25,14 +23,7 @@ varnish v1 -cliok "param.set feature +ht
 varnish v1 -cliok "param.set debug +syncvsl"
 varnish v1 -cliok "param.set debug +h2_nocheck"
 
-varnish v1 -vcl+backend {
-	import vtc;
-	sub vcl_deliver {
-		if (req.url == "/1") {
-			vtc.barrier_sync("${b1_sock}");
-		}
-	}
-} -start
+varnish v1 -vcl+backend {} -start
 
 client c1 {
 	stream 0 {
@@ -61,8 +52,6 @@ client c1 {
 		txdata \
 			-data request
 
-		rxwinup
-		barrier b1 sync
 		rxresp
 		expect resp.status == 200
 		expect resp.http.content-Type == "text/plain"
--- a/bin/varnishtest/tests/t02014.vtc
+++ b/bin/varnishtest/tests/t02014.vtc
@@ -2,7 +2,13 @@ varnishtest "Exercise h/2 sender flow co
 
 barrier b1 sock 3 -cyclic
 
-server s1 -repeat 2 {
+server s1 {
+	rxreq
+	txresp -bodylen 66300
+} -start
+
+server s2 {
+	non_fatal
 	rxreq
 	txresp -bodylen 66300
 } -start
@@ -10,6 +16,12 @@ server s1 -repeat 2 {
 varnish v1 -vcl+backend {
 	import vtc;
 
+	sub vcl_backend_fetch {
+		if (bereq.method == "POST") {
+			set bereq.backend = s2;
+		}
+	}
+
 	sub vcl_deliver {
 		vtc.barrier_sync("${b1_sock}");
 	}
@@ -17,6 +29,7 @@ varnish v1 -vcl+backend {
 
 varnish v1 -cliok "param.set debug +syncvsl"
 varnish v1 -cliok "param.set feature +http2"
+varnish v1 -cliok "param.reset h2_initial_window_size"
 
 client c1 {
 	stream 0 {
@@ -46,7 +59,7 @@ client c1 {
 	stream 0 -wait
 } -run
 
-client c1 {
+client c2 {
 	stream 0 {
 		barrier b1 sync
 	} -start
@@ -62,7 +75,7 @@ client c1 {
 	stream 0 -wait
 } -run
 
-client c1 {
+client c3 {
 	stream 0 {
 		barrier b1 sync
 		barrier b1 sync
@@ -78,7 +91,6 @@ client c1 {
 	stream 1 {
 		txreq -req "POST" -nostrend
 		txdata -data "ok"
-		rxwinup
 		txdata -data "fail"
 		rxrst
 		expect rst.err == STREAM_CLOSED
--- /dev/null
+++ b/bin/varnishtest/tests/t02017.vtc
@@ -0,0 +1,46 @@
+varnishtest "H/2 stream data head of line blocking"
+
+barrier b1 cond 2
+barrier b2 cond 2
+barrier b3 cond 2
+barrier b4 cond 2
+
+server s1 {
+	rxreq
+	barrier b4 sync
+	txresp
+} -start
+
+varnish v1 -vcl+backend {
+	sub vcl_recv {
+		if (req.url == "/2") {
+			return (synth(700));
+		}
+	}
+} -start
+
+varnish v1 -cliok "param.set feature +http2"
+
+client c1 {
+	stream 1 {
+		txreq -req GET -url /1 -hdr "content-length" "1" -nostrend
+		barrier b1 sync
+		barrier b2 sync
+		txdata -data 1
+#		rxwinup
+		barrier b3 sync
+		rxresp
+		expect resp.status == 200
+	} -start
+	stream 3 {
+		barrier b1 sync
+		txreq -req GET -url /2 -hdr "content-length" "1" -nostrend
+		barrier b2 sync
+		barrier b3 sync
+		txdata -data 2
+#		rxwinup
+		rxresp
+		expect resp.status == 700
+		barrier b4 sync
+	} -start
+} -run
--- /dev/null
+++ b/bin/varnishtest/tests/t02018.vtc
@@ -0,0 +1,36 @@
+varnishtest "H/2 stream multiple buffer exhaustion"
+
+server s1 {
+	rxreq
+	txresp
+} -start
+
+varnish v1 -vcl+backend {
+} -start
+
+varnish v1 -cliok "param.set feature +http2"
+varnish v1 -cliok "param.reset h2_initial_window_size"
+varnish v1 -cliok "param.reset h2_rx_window_low_water"
+
+client c1 {
+	stream 1 {
+		txreq -req GET -url /1 -hdr "content-length" "131072" -nostrend
+		txdata -datalen 16384 -nostrend
+		rxwinup
+		txdata -datalen 16384 -nostrend
+		rxwinup
+		txdata -datalen 16384 -nostrend
+		rxwinup
+		txdata -datalen 16384 -nostrend
+		rxwinup
+		txdata -datalen 16384 -nostrend
+		rxwinup
+		txdata -datalen 16384 -nostrend
+		rxwinup
+		txdata -datalen 16384 -nostrend
+		rxwinup
+		txdata -datalen 16384
+		rxresp
+		expect resp.status == 200
+	} -start
+} -run
--- /dev/null
+++ b/bin/varnishtest/tests/t02019.vtc
@@ -0,0 +1,43 @@
+varnishtest "H/2 stream early buffer exhaustion"
+
+barrier b1 sock 2
+
+server s1 {
+	rxreq
+	txresp
+} -start
+
+varnish v1 -vcl+backend {
+	import vtc;
+	sub vcl_recv {
+		vtc.barrier_sync("${b1_sock}");
+		vtc.sleep(0.1s);
+	}
+} -start
+
+varnish v1 -cliok "param.set feature +http2"
+varnish v1 -cliok "param.reset h2_initial_window_size"
+varnish v1 -cliok "param.reset h2_rx_window_low_water"
+
+client c1 {
+	stream 1 {
+		txreq -req POST -url /1 -hdr "content-length" "131072" -nostrend
+		txdata -datalen 16384 -nostrend
+		txdata -datalen 16384 -nostrend
+		txdata -datalen 16384 -nostrend
+		txdata -datalen 16383 -nostrend
+		barrier b1 sync
+		rxwinup
+		txdata -datalen 16384 -nostrend
+		rxwinup
+		txdata -datalen 16384 -nostrend
+		rxwinup
+		txdata -datalen 16384 -nostrend
+		rxwinup
+		txdata -datalen 16384 -nostrend
+		rxwinup
+		txdata -datalen 1
+		rxresp
+		expect resp.status == 200
+	} -run
+} -run
--- /dev/null
+++ b/bin/varnishtest/tests/t02020.vtc
@@ -0,0 +1,66 @@
+varnishtest "H/2 received data frames with padding"
+
+barrier b1 sock 2
+
+server s1 {
+	rxreq
+	expect req.url == /1
+	expect req.body == abcde
+	txresp
+	rxreq
+	txresp
+	rxreq
+	txresp
+	expect req.body == a
+} -start
+
+varnish v1 -vcl+backend {
+	import vtc;
+	sub vcl_recv {
+		if (req.url == "/3") {
+			vtc.barrier_sync("${b1_sock}");
+		}
+	}
+} -start
+
+varnish v1 -cliok "param.set feature +http2"
+varnish v1 -cliok "param.reset h2_initial_window_size"
+varnish v1 -cliok "param.reset h2_rx_window_low_water"
+varnish v1 -cliok "param.set debug +syncvsl"
+
+client c1 {
+	stream 1 {
+		txreq -req POST -url /1 -hdr "content-length" "5" -nostrend
+		txdata -data abcde -padlen 1
+		rxresp
+		expect resp.status == 200
+	} -run
+
+	stream 3 {
+		txreq -req POST -url /3 -hdr "content-length" "131072" -nostrend
+		txdata -datalen 16300 -padlen 83 -nostrend
+		txdata -datalen 16300 -padlen 83 -nostrend
+		txdata -datalen 16300 -padlen 83 -nostrend
+		txdata -datalen 16300 -padlen 82 -nostrend
+		barrier b1 sync
+		rxwinup
+		txdata -datalen 16300 -padlen 83 -nostrend
+		rxwinup
+		txdata -datalen 16300 -padlen 83 -nostrend
+		rxwinup
+		txdata -datalen 16300 -padlen 83 -nostrend
+		rxwinup
+		txdata -datalen 16300 -padlen 83 -nostrend
+		rxwinup
+		txdata -datalen 672
+		rxresp
+		expect resp.status == 200
+	} -run
+
+	stream 5 {
+		txreq -req POST -url /5 -nostrend
+		txdata -data a -padlen 255
+		rxresp
+		expect resp.status == 200
+	} -run
+} -run
--- /dev/null
+++ b/bin/varnishtest/tests/t02021.vtc
@@ -0,0 +1,36 @@
+varnishtest "H/2 data frame padding exhaust window"
+
+server s1 {
+	rxreq
+	expect req.body == abcde
+	txresp
+} -start
+
+varnish v1 -vcl+backend {
+} -start
+
+varnish v1 -cliok "param.set feature +http2"
+varnish v1 -cliok "param.reset h2_initial_window_size"
+varnish v1 -cliok "param.reset h2_rx_window_low_water"
+
+client c1 {
+	stream 1 {
+		txreq -req POST -url /1 -hdr "content-length" "5" -nostrend
+
+		# Fill 65535 bytes of stream window using padding only
+		# Note that each frame consumes 256 bytes of window (padlen + 1)
+
+		loop 255 {
+			txdata -padlen 255 -nostrend
+		}
+		txdata -padlen 254 -nostrend
+
+		# Here the window have been exhausted, so we should receive
+		# a window update
+		rxwinup
+
+		txdata -data abcde
+		rxresp
+		expect resp.status == 200
+	} -run
+} -run
--- /dev/null
+++ b/bin/varnishtest/tests/t02022.vtc
@@ -0,0 +1,86 @@
+varnishtest "Test non-transient rxbuf stevedore with LRU nuking"
+
+barrier b1 sock 2 -cyclic
+
+server s1 {
+	rxreq
+	txresp -body asdf
+	rxreq
+	txresp -bodylen 1048000
+	rxreq
+	txresp -body ASDF
+} -start
+
+varnish v1 -arg "-srxbuf=malloc,1m -smain=malloc,1m" -vcl+backend {
+	import vtc;
+	sub vcl_recv {
+		if (req.url == "/1") {
+			vtc.barrier_sync("${b1_sock}");
+		}
+	}
+	sub vcl_backend_response {
+		if (bereq.url == "/2") {
+			set beresp.storage = storage.rxbuf;
+		} else {
+			set beresp.storage = storage.main;
+		}
+	}
+}
+
+varnish v1 -cliok "param.set feature +http2"
+varnish v1 -cliok "param.reset h2_initial_window_size"
+varnish v1 -cliok "param.reset h2_rx_window_low_water"
+varnish v1 -cliok "param.set h2_rxbuf_storage rxbuf"
+varnish v1 -cliok "param.set debug +syncvsl"
+
+varnish v1 -start
+
+client c1 {
+	stream 1 {
+		txreq -req POST -url /1 -hdr "content-length" "2048" -nostrend
+		txdata -datalen 2048
+		rxresp
+		expect resp.status == 200
+	} -start
+} -start
+
+varnish v1 -expect SMA.rxbuf.g_bytes >= 2048
+varnish v1 -expect SMA.Transient.g_bytes == 0
+varnish v1 -expect MAIN.n_lru_nuked == 0
+
+barrier b1 sync
+client c1 -wait
+
+varnish v1 -expect SMA.rxbuf.g_bytes == 0
+varnish v1 -expect SMA.Transient.g_bytes == 0
+varnish v1 -expect MAIN.n_lru_nuked == 0
+
+client c2 {
+	txreq -url /2
+	rxresp
+	expect resp.status == 200
+	expect resp.bodylen == 1048000
+} -run
+
+varnish v1 -expect SMA.rxbuf.g_bytes >= 1048000
+varnish v1 -expect MAIN.n_lru_nuked == 0
+
+client c3 {
+	stream 1 {
+		txreq -req POST -url /1 -hdr "content-length" "2048" -nostrend
+		txdata -datalen 2048
+		rxresp
+		expect resp.status == 200
+	} -start
+} -start
+
+varnish v1 -expect SMA.rxbuf.g_bytes >= 2048
+varnish v1 -expect SMA.rxbuf.g_bytes < 3000
+varnish v1 -expect SMA.Transient.g_bytes == 0
+varnish v1 -expect MAIN.n_lru_nuked == 1
+
+barrier b1 sync
+client c3 -wait
+
+varnish v1 -expect SMA.rxbuf.g_bytes == 0
+varnish v1 -expect SMA.Transient.g_bytes == 0
--- a/bin/varnishtest/vtc.c
+++ b/bin/varnishtest/vtc.c
@@ -299,6 +299,7 @@ parse_string(const char *spec, const str
 	char *e, *p, *q, *f, *buf;
 	int nest_brace;
 	int tn;
+	unsigned n, m;
 	const struct cmds *cp;
 
 	AN(spec);
@@ -406,6 +407,24 @@ parse_string(const char *spec, const str
 			}
 		}
 
+
+/* SECTION: loop loop
+ *
+ * loop NUMBER STRING
+ *         Process STRING as a specification, NUMBER times.
+ *
+ * This works inside all specification strings
+ */
+
+		if (!strcmp(token_s[0], "loop")) {
+			n = strtoul(token_s[1], NULL, 0);
+			for (m = 0; m < n; m++) {
+				vtc_log(vl, 4, "Loop #%u", m);
+				parse_string(token_s[2], cmd, priv, vl);
+			}
+			continue;
+		}
+
 		for (cp = cmd; cp->name != NULL; cp++)
 			if (!strcmp(token_s[0], cp->name))
 				break;
--- a/bin/varnishtest/vtc_http.c
+++ b/bin/varnishtest/vtc_http.c
@@ -1613,29 +1613,6 @@ cmd_http_accept(CMD_ARGS)
 	vtc_log(vl, 3, "Accepted socket fd is %d", hp->fd);
 }
 
-/* SECTION: client-server.spec.loop
- *
- * loop NUMBER STRING
- *         Process STRING as a specification, NUMBER times.
- */
-
-static void
-cmd_http_loop(CMD_ARGS)
-{
-	struct http *hp;
-	unsigned n, m;
-
-	CAST_OBJ_NOTNULL(hp, priv, HTTP_MAGIC);
-	AN(av[1]);
-	AN(av[2]);
-	AZ(av[3]);
-	n = strtoul(av[1], NULL, 0);
-	for (m = 1 ; m <= n; m++) {
-		vtc_log(vl, 4, "Loop #%u", m);
-		parse_string(av[2], cmd, hp, vl);
-	}
-}
-
 /* SECTION: client-server.spec.fatal
  *
  * fatal|non_fatal
@@ -1829,7 +1806,6 @@ const struct cmds http_cmds[] = {
 
 	/* spec */
 	CMD_HTTP(fatal)
-	CMD_HTTP(loop)
 	CMD_HTTP(non_fatal)
 
 	/* body */
--- a/bin/varnishtest/vtc_http2.c
+++ b/bin/varnishtest/vtc_http2.c
@@ -51,7 +51,7 @@
 #define BUF_SIZE	(1024*2048)
 
 static const char *const h2_errs[] = {
-#define H2_ERROR(n,v,sc,t) [v] = #n,
+#define H2_ERROR(n,v,sc,r,t) [v] = #n,
 #include <tbl/h2_error.h>
 	NULL
 };
@@ -1204,7 +1204,7 @@ cmd_var_resolve(const struct stream *s,
 		else
 			return (NULL);
 	}
-#define H2_ERROR(U,v,sc,t) \
+#define H2_ERROR(U,v,sc,r,t) \
 	if (!strcmp(spec, #U)) { return (#v); }
 #include "tbl/h2_error.h"
 	return (spec);
@@ -1565,8 +1565,8 @@ cmd_tx11obj(CMD_ARGS)
 			exclusive_stream_dependency(s);
 	}
 	if (pad) {
-		if (strlen(pad) >= 128)
-			vtc_fatal(vl, "Padding is limited to 128 bytes");
+		if (strlen(pad) > 255)
+			vtc_fatal(vl, "Padding is limited to 255 bytes");
 		f.flags |= PADDED;
 		assert(f.size + strlen(pad) < BUF_SIZE);
 		memmove(buf + 1, buf, f.size);
@@ -1659,8 +1659,8 @@ cmd_txdata(CMD_ARGS)
 
 	if (pad) {
 		f.flags |= PADDED;
-		if (strlen(pad) >= 128)
-			vtc_fatal(vl, "Padding is limited to 128 bytes");
+		if (strlen(pad) > 255)
+			vtc_fatal(vl, "Padding is limited to 255 bytes");
 		data = malloc( 1 + strlen(body) + strlen(pad));
 		AN(data);
 		*((uint8_t *)data) = strlen(pad);
@@ -2243,9 +2243,11 @@ cmd_rxmsg(CMD_ARGS)
 	else
 		ONLY_H2_CLIENT(s->hp, av);
 
-	f = rxstuff(s);
-	if (!f)
-		return;
+	do {
+		f = rxstuff(s);
+		if (!f)
+			return;
+	} while (f->type == TYPE_WINDOW_UPDATE);
 
 	rcv++;
 	CHKFRAME(f->type, TYPE_HEADERS, rcv, *av);
--- a/bin/varnishtest/vtc_log.c
+++ b/bin/varnishtest/vtc_log.c
@@ -245,7 +245,7 @@ vtc_hexdump(struct vtclog *vl, int lvl,
 	else {
 		for (l = 0; l < len; l++, ss++) {
 			if (l > 512) {
-				VSB_printf(vl->vsb, "...");
+				VSB_cat(vl->vsb, "...");
 				break;
 			}
 			if (nl) {
@@ -254,13 +254,13 @@ vtc_hexdump(struct vtclog *vl, int lvl,
 			}
 			VSB_printf(vl->vsb, " %02x", *ss);
 			if ((l & 0xf) == 0xf) {
-				VSB_printf(vl->vsb, "\n");
+				VSB_cat(vl->vsb, "\n");
 				nl = 1;
 			}
 		}
 	}
 	if (!nl)
-		VSB_printf(vl->vsb, "\n");
+		VSB_cat(vl->vsb, "\n");
 	REL_VL(vl);
 	if (lvl == 0)
 		vtc_logfail();
--- a/bin/varnishtest/vtc_main.c
+++ b/bin/varnishtest/vtc_main.c
@@ -52,6 +52,17 @@
 #include "vtim.h"
 #include "vct.h"
 
+struct buf {
+	unsigned		magic;
+#define BUF_MAGIC		0x39d1258a
+	VTAILQ_ENTRY(buf)	list;
+	char			*buf;
+	struct vsb		*diag;
+	size_t			bufsiz;
+};
+
+static VTAILQ_HEAD(, buf) free_bufs = VTAILQ_HEAD_INITIALIZER(free_bufs);
+
 struct vtc_tst {
 	unsigned		magic;
 #define TST_MAGIC		0x618d8b88
@@ -68,14 +79,13 @@ struct vtc_job {
 	pid_t			child;
 	struct vev		*ev;
 	struct vev		*evt;
-	char			*buf;
+	struct buf		*bp;
 	char			*tmpdir;
-	unsigned		bufsiz;
 	double			t0;
-	struct vsb		*diag;
 	int			killed;
 };
 
+
 int iflg = 0;
 unsigned vtc_maxdur = 60;
 static unsigned vtc_bufsiz = 1024 * 1024;
@@ -95,6 +105,43 @@ char *vmod_path = NULL;
 struct vsb *params_vsb = NULL;
 int leave_temp;
 int vtc_witness = 0;
+static struct vsb *cbvsb;
+
+static int cleaner_fd = -1;
+static pid_t cleaner_pid;
+
+static struct buf *
+get_buf(void)
+{
+	struct buf *bp;
+
+	bp = VTAILQ_FIRST(&free_bufs);
+	CHECK_OBJ_ORNULL(bp, BUF_MAGIC);
+	if (bp != NULL) {
+		VTAILQ_REMOVE(&free_bufs, bp, list);
+		VSB_clear(bp->diag);
+	} else {
+		ALLOC_OBJ(bp, BUF_MAGIC);
+		AN(bp);
+		bp->bufsiz = vtc_bufsiz;
+		bp->buf = mmap(NULL, bp->bufsiz, PROT_READ|PROT_WRITE,
+		    MAP_ANON | MAP_SHARED, -1, 0);
+		assert(bp->buf != MAP_FAILED);
+		bp->diag = VSB_new_auto();
+		AN(bp->diag);
+	}
+	memset(bp->buf, 0, bp->bufsiz);
+	return (bp);
+}
+
+static void
+rel_buf(struct buf **bp)
+{
+	CHECK_OBJ_NOTNULL(*bp, BUF_MAGIC);
+
+	VTAILQ_INSERT_HEAD(&free_bufs, (*bp), list);
+	*bp = NULL;
+}
 
 /**********************************************************************
  * Parse a -D option argument into a name/val pair, and insert
@@ -127,6 +174,7 @@ usage(void)
 #define FMT "    %-28s # %s\n"
 	fprintf(stderr, FMT, "-b size",
 	    "Set internal buffer size (default: 1M)");
+	fprintf(stderr, FMT, "-C", "Use cleaner subprocess");
 	fprintf(stderr, FMT, "-D name=val", "Define macro");
 	fprintf(stderr, FMT, "-i", "Find varnish binaries in build tree");
 	fprintf(stderr, FMT, "-j jobs", "Run this many tests in parallel");
@@ -143,6 +191,86 @@ usage(void)
 }
 
 /**********************************************************************
+ * When running many tests, cleaning the tmpdir with "rm -rf" becomes
+ * chore which limits our performance.
+ * When the number of tests are above 100, we spawn a child-process
+ * to do that for us.
+ */
+
+static void
+cleaner_do(const char *dirname)
+{
+	char buf[BUFSIZ];
+
+	AZ(memcmp(dirname, tmppath, strlen(tmppath)));
+	if (cleaner_pid > 0) {
+		bprintf(buf, "%s\n", dirname);
+		assert(write(cleaner_fd, buf, strlen(buf)) == strlen(buf));
+		return;
+	}
+	bprintf(buf, "exec /bin/rm -rf %s\n", dirname);
+	AZ(system(buf));
+}
+
+static void
+cleaner_setup(void)
+{
+	int p[2], st;
+	char buf[BUFSIZ];
+	char *q;
+	pid_t pp;
+
+	AZ(pipe(p));
+	assert(p[0] > STDERR_FILENO);
+	assert(p[1] > STDERR_FILENO);
+	cleaner_pid = fork();
+	assert(cleaner_pid >= 0);
+	if (cleaner_pid == 0) {
+		closefd(&p[1]);
+		AZ(nice(1));
+		setbuf(stdin, NULL);
+		AZ(dup2(p[0], STDIN_FILENO));
+		while (fgets(buf, sizeof buf, stdin)) {
+			AZ(memcmp(buf, tmppath, strlen(tmppath)));
+			q = buf + strlen(buf);
+			assert(q > buf);
+			assert(q[-1] == '\n');
+			q[-1] = '\0';
+
+			/* Dont expend a shell on running /bin/rm */
+			pp = fork();
+			assert(pp >= 0);
+			if (pp == 0)
+				exit(execl("/bin/rm", "rm", "-rf", buf, NULL));
+			assert(waitpid(pp, &st, 0) == pp);
+			AZ(st);
+		}
+		exit(0);
+	}
+	closefd(&p[0]);
+	cleaner_fd = p[1];
+}
+
+static void
+cleaner_neuter(void)
+{
+	if (cleaner_pid > 0)
+		closefd(&cleaner_fd);
+}
+
+static void
+cleaner_finish(void)
+{
+	int st;
+
+	if (cleaner_pid > 0) {
+		closefd(&cleaner_fd);
+		assert(waitpid(cleaner_pid, &st, 0) == cleaner_pid);
+		AZ(st);
+	}
+}
+
+/**********************************************************************
  * CallBack
  */
 
@@ -157,7 +285,6 @@ tst_cb(const struct vev *ve, int what)
 	double t;
 	FILE *f;
 	char *p;
-	struct vsb *v;
 
 	CAST_OBJ_NOTNULL(jp, ve->priv, JOB_MAGIC);
 
@@ -172,8 +299,9 @@ tst_cb(const struct vev *ve, int what)
 	*buf = '\0';
 	i = read(ve->fd, buf, sizeof buf);
 	if (i > 0)
-		VSB_bcat(jp->diag, buf, i);
+		VSB_bcat(jp->bp->diag, buf, i);
 	if (i == 0) {
+
 		njob--;
 		px = wait4(jp->child, &stx, 0, NULL);
 		assert(px == jp->child);
@@ -184,21 +312,20 @@ tst_cb(const struct vev *ve, int what)
 		if (ecode == 0)
 			ecode = WEXITSTATUS(stx);
 
-		AZ(VSB_finish(jp->diag));
-		v = VSB_new_auto();
-		AN(v);
-		VSB_cat(v, jp->buf);
-		p = strchr(jp->buf, '\0');
-		if (p > jp->buf && p[-1] != '\n')
-			VSB_putc(v, '\n');
-		VSB_quote_pfx(v, "*    diag  0.0 ",
-		    VSB_data(jp->diag), -1, VSB_QUOTE_NONL);
-		AZ(VSB_finish(v));
-		VSB_destroy(&jp->diag);
-		AZ(munmap(jp->buf, jp->bufsiz));
+		AZ(VSB_finish(jp->bp->diag));
+
+		VSB_clear(cbvsb);
+		VSB_cat(cbvsb, jp->bp->buf);
+		p = strchr(jp->bp->buf, '\0');
+		if (p > jp->bp->buf && p[-1] != '\n')
+			VSB_putc(cbvsb, '\n');
+		VSB_quote_pfx(cbvsb, "*    diag  0.0 ",
+		    VSB_data(jp->bp->diag), -1, VSB_QUOTE_NONL);
+		AZ(VSB_finish(cbvsb));
+		rel_buf(&jp->bp);
 
 		if ((ecode > 1 && vtc_verbosity) || vtc_verbosity > 1)
-			printf("%s", VSB_data(v));
+			printf("%s", VSB_data(cbvsb));
 
 		if (!ecode)
 			vtc_good++;
@@ -208,17 +335,15 @@ tst_cb(const struct vev *ve, int what)
 			vtc_fail++;
 
 		if (leave_temp == 0 || (leave_temp == 1 && ecode <= 1)) {
-			bprintf(buf, "rm -rf %s", jp->tmpdir);
-			AZ(system(buf));
+			cleaner_do(jp->tmpdir);
 		} else {
 			bprintf(buf, "%s/LOG", jp->tmpdir);
 			f = fopen(buf, "w");
 			AN(f);
-			(void)fprintf(f, "%s\n", VSB_data(v));
+			(void)fprintf(f, "%s\n", VSB_data(cbvsb));
 			AZ(fclose(f));
 		}
 		free(jp->tmpdir);
-		VSB_destroy(&v);
 
 		if (jp->killed)
 			printf("#    top  TEST %s TIMED OUT (kill -9)\n",
@@ -264,17 +389,8 @@ start_test(void)
 	ALLOC_OBJ(jp, JOB_MAGIC);
 	AN(jp);
 
-	jp->diag = VSB_new_auto();
-	AN(jp->diag);
-
-	jp->bufsiz = vtc_bufsiz;
+	jp->bp = get_buf();
 
-	jp->buf = mmap(NULL, jp->bufsiz, PROT_READ|PROT_WRITE,
-	    MAP_ANON | MAP_SHARED, -1, 0);
-	assert(jp->buf != MAP_FAILED);
-	memset(jp->buf, 0, jp->bufsiz);
-
-	VRND_SeedAll();
 	bprintf(tmpdir, "%s/vtc.%d.%08x", tmppath, (int)getpid(),
 		(unsigned)random());
 	AZ(mkdir(tmpdir, 0711));
@@ -284,7 +400,7 @@ start_test(void)
 	AN(tp->ntodo);
 	tp->ntodo--;
 	VTAILQ_REMOVE(&tst_head, tp, list);
-	if (tp->ntodo >0)
+	if (tp->ntodo > 0)
 		VTAILQ_INSERT_TAIL(&tst_head, tp, list);
 
 	jp->tst = tp;
@@ -298,13 +414,14 @@ start_test(void)
 	jp->child = fork();
 	assert(jp->child >= 0);
 	if (jp->child == 0) {
+		cleaner_neuter();	// Too dangerous to have around
 		AZ(setpgid(getpid(), 0));
 		VFIL_null_fd(STDIN_FILENO);
 		assert(dup2(p[1], STDOUT_FILENO) == STDOUT_FILENO);
 		assert(dup2(p[1], STDERR_FILENO) == STDERR_FILENO);
 		VSUB_closefrom(STDERR_FILENO + 1);
 		retval = exec_file(jp->tst->filename, jp->tst->script,
-		    jp->tmpdir, jp->buf, jp->bufsiz);
+		    jp->tmpdir, jp->bp->buf, jp->bp->bufsiz);
 		exit(retval);
 	}
 	closefd(&p[1]);
@@ -388,7 +505,7 @@ i_mode(void)
 	/*
 	 * Build $PATH which can find all programs in the build tree
 	 */
-	VSB_printf(vsb, "PATH=");
+	VSB_cat(vsb, "PATH=");
 	sep = "";
 #define VTC_PROG(l)							\
 	do {								\
@@ -538,6 +655,8 @@ main(int argc, char * const *argv)
 {
 	int ch, i;
 	int ntest = 1;			/* Run tests this many times */
+	int nstart = 0;
+	int use_cleaner = 0;
 	uintmax_t bufsiz;
 	const char *p;
 
@@ -557,9 +676,12 @@ main(int argc, char * const *argv)
 	if (p != NULL)
 		vtc_maxdur = atoi(p);
 
+	VRND_SeedAll();
+	cbvsb = VSB_new_auto();
+	AN(cbvsb);
 	setbuf(stdout, NULL);
 	setbuf(stderr, NULL);
-	while ((ch = getopt(argc, argv, "b:D:hij:kLln:p:qt:vW")) != -1) {
+	while ((ch = getopt(argc, argv, "b:CD:hij:kLln:p:qt:v")) != -1) {
 		switch (ch) {
 		case 'b':
 			if (VNUM_2bytes(optarg, &bufsiz, 0)) {
@@ -574,6 +696,9 @@ main(int argc, char * const *argv)
 			}
 			vtc_bufsiz = (unsigned)bufsiz;
 			break;
+		case 'C':
+			use_cleaner = !use_cleaner;
+			break;
 		case 'D':
 			if (!parse_D_opt(optarg)) {
 				fprintf(stderr, "Cannot parse D opt '%s'\n",
@@ -600,7 +725,7 @@ main(int argc, char * const *argv)
 			ntest = strtoul(optarg, NULL, 0);
 			break;
 		case 'p':
-			VSB_printf(params_vsb, " -p ");
+			VSB_cat(params_vsb, " -p ");
 			VSB_quote(params_vsb, optarg, -1, 0);
 			break;
 		case 'q':
@@ -643,19 +768,23 @@ main(int argc, char * const *argv)
 
 	vb = VEV_New();
 
+	if (use_cleaner)
+		cleaner_setup();
+
 	i = 0;
 	while (!VTAILQ_EMPTY(&tst_head) || i) {
 		if (!VTAILQ_EMPTY(&tst_head) && njob < npar) {
 			start_test();
 			njob++;
 			/* Stagger ramp-up */
-			if (njob < npar)
+			if (nstart++ < npar)
 				(void)usleep(random() % 100000L);
 			i = 1;
 			continue;
 		}
 		i = VEV_Once(vb);
 	}
+	cleaner_finish();
 	if (vtc_continue)
 		fprintf(stderr,
 		    "%d tests failed, %d tests skipped, %d tests passed\n",
--- a/bin/varnishtest/vtc_proxy.c
+++ b/bin/varnishtest/vtc_proxy.c
@@ -100,9 +100,9 @@ vtc_send_proxy(int fd, int version, cons
 	if (version == 1) {
 		VSB_bcat(vsb, vpx1_sig, sizeof(vpx1_sig));
 		if (proto == PF_INET6)
-			VSB_printf(vsb, " TCP6 ");
+			VSB_cat(vsb, " TCP6 ");
 		else if (proto == PF_INET)
-			VSB_printf(vsb, " TCP4 ");
+			VSB_cat(vsb, " TCP4 ");
 		VTCP_name(sac, hc, sizeof(hc), pc, sizeof(pc));
 		VTCP_name(sas, hs, sizeof(hs), ps, sizeof(ps));
 		VSB_printf(vsb, "%s %s %s %s\r\n", hc, hs, pc, ps);
--- a/bin/varnishtest/vtc_varnish.c
+++ b/bin/varnishtest/vtc_varnish.c
@@ -406,7 +406,7 @@ varnish_launch(struct varnish *v)
 	vtc_log(v->vl, 2, "Launch");
 	vsb = VSB_new_auto();
 	AN(vsb);
-	VSB_printf(vsb, "cd ${pwd} &&");
+	VSB_cat(vsb, "cd ${pwd} &&");
 	VSB_printf(vsb, " exec varnishd %s -d -n %s",
 	    v->jail, v->workdir);
 	VSB_cat(vsb, VSB_data(params_vsb));
@@ -416,12 +416,15 @@ varnish_launch(struct varnish *v)
 		VSB_cat(vsb, " -p debug=+vsm_keep");
 		VSB_cat(vsb, " -p debug=+vmod_so_keep");
 	}
-	VSB_printf(vsb, " -l 2m");
-	VSB_printf(vsb, " -p auto_restart=off");
-	VSB_printf(vsb, " -p syslog_cli_traffic=off");
-	VSB_printf(vsb, " -p sigsegv_handler=on");
-	VSB_printf(vsb, " -p thread_pool_min=10");
-	VSB_printf(vsb, " -p debug=+vtc_mode");
+	VSB_cat(vsb, " -l 2m");
+	VSB_cat(vsb, " -p auto_restart=off");
+	VSB_cat(vsb, " -p syslog_cli_traffic=off");
+	VSB_cat(vsb, " -p sigsegv_handler=on");
+	VSB_cat(vsb, " -p thread_pool_min=10");
+	VSB_cat(vsb, " -p debug=+vtc_mode");
+	VSB_cat(vsb, " -p vsl_mask=+Debug,+H2RxHdr,+H2RxBody");
+	VSB_cat(vsb, " -p h2_initial_window_size=1m");
+	VSB_cat(vsb, " -p h2_rx_window_low_water=64k");
 	if (!v->has_a_arg) {
 		VSB_printf(vsb, " -a '%s'", "127.0.0.1:0");
 		if (v->proto != NULL)
--- a/include/tbl/h2_error.h
+++ b/include/tbl/h2_error.h
@@ -27,25 +27,123 @@
  *
  * RFC7540 section 11.4
  *
- * Fields: Upper, value, conn=1|stream=2, description
+ * Types: conn=1|stream=2
+ * Reason: enum sess_close
  */
 
 /*lint -save -e525 -e539 */
 
-H2_ERROR(NO_ERROR,	       0,3, "Graceful shutdown")
-H2_ERROR(PROTOCOL_ERROR,       1,3, "Protocol error detected")
-H2_ERROR(INTERNAL_ERROR,       2,3, "Implementation fault")
-H2_ERROR(FLOW_CONTROL_ERROR,   3,3, "Flow-control limits exceeded")
-H2_ERROR(SETTINGS_TIMEOUT,     4,1, "Settings not acknowledged")
-H2_ERROR(STREAM_CLOSED,	       5,2, "Frame received for closed stream")
-H2_ERROR(FRAME_SIZE_ERROR,     6,3, "Frame size incorrect")
-H2_ERROR(REFUSED_STREAM,       7,2, "Stream not processed")
-H2_ERROR(CANCEL,	       8,2, "Stream cancelled")
-H2_ERROR(COMPRESSION_ERROR,    9,1, "Compression state not updated")
-H2_ERROR(CONNECT_ERROR,	      10,2, "TCP connection error for CONNECT method")
-H2_ERROR(ENHANCE_YOUR_CALM,   11,3, "Processing capacity exceeded")
-H2_ERROR(INADEQUATE_SECURITY, 12,1, "Negotiated TLS parameters not acceptable")
-H2_ERROR(HTTP_1_1_REQUIRED,   13,1, "Use HTTP/1.1 for the request")
-#undef H2_ERROR
+H2_ERROR(
+	/* name */	NO_ERROR,
+	/* val */	0,
+	/* types */	3,
+	/* reason */	SC_REM_CLOSE,
+	/* descr */	"Graceful shutdown"
+)
+
+H2_ERROR(
+	/* name */	PROTOCOL_ERROR,
+	/* val */	1,
+	/* types */	3,
+	/* reason */	SC_RX_JUNK,
+	/* descr */	"Protocol error detected"
+)
+
+H2_ERROR(
+	/* name */	INTERNAL_ERROR,
+	/* val */	2,
+	/* types */	3,
+	/* reason */	SC_VCL_FAILURE,
+	/* descr */	"Implementation fault"
+)
+
+H2_ERROR(
+	/* name */	FLOW_CONTROL_ERROR,
+	/* val */	3,
+	/* types */	3,
+	/* reason */	SC_OVERLOAD,
+	/* descr */	"Flow-control limits exceeded"
+)
+
+H2_ERROR(
+	/* name */	SETTINGS_TIMEOUT,
+	/* val */	4,
+	/* types */	1,
+	/* reason */	SC_RX_TIMEOUT,
+	/* descr */	"Settings not acknowledged"
+)
+
+H2_ERROR(
+	/* name */	STREAM_CLOSED,
+	/* val */	5,
+	/* types */	2,
+	/* reason */	SC_NULL,
+	/* descr */	"Frame received for closed stream"
+)
+
+H2_ERROR(
+	/* name */	FRAME_SIZE_ERROR,
+	/* val */	6,
+	/* types */	3,
+	/* reason */	SC_RX_JUNK,
+	/* descr */	"Frame size incorrect"
+)
+
+H2_ERROR(
+	/* name */	REFUSED_STREAM,
+	/* val */	7,
+	/* types */	2,
+	/* reason */	SC_NULL,
+	/* descr */	"Stream not processed"
+)
 
+H2_ERROR(
+	/* name */	CANCEL,
+	/* val */	8,
+	/* types */	2,
+	/* reason */	SC_NULL,
+	/* descr */	"Stream cancelled"
+)
+
+H2_ERROR(
+	/* name */	COMPRESSION_ERROR,
+	/* val */	9,
+	/* types */	1,
+	/* reason */	SC_RX_JUNK,
+	/* descr */	"Compression state not updated"
+)
+
+H2_ERROR(
+	/* name */	CONNECT_ERROR,
+	/* val */	10,
+	/* types */	2,
+	/* reason */	SC_NULL,
+	/* descr */	"TCP connection error for CONNECT method"
+)
+
+H2_ERROR(
+	/* name */	ENHANCE_YOUR_CALM,
+	/* val */	11,
+	/* types */	3,
+	/* reason */	SC_OVERLOAD,
+	/* descr */	"Processing capacity exceeded"
+)
+
+H2_ERROR(
+	/* name */	INADEQUATE_SECURITY,
+	/* val */	12,
+	/* types */	1,
+	/* reason */	SC_RX_JUNK,
+	/* descr */	"Negotiated TLS parameters not acceptable"
+)
+
+H2_ERROR(
+	/* name */	HTTP_1_1_REQUIRED,
+	/* val */	13,
+	/* types */	1,
+	/* reason */	SC_REQ_HTTP20,
+	/* descr */	"Use HTTP/1.1 for the request"
+)
+
+#undef H2_ERROR
 /*lint -restore */
--- a/include/tbl/params.h
+++ b/include/tbl/params.h
@@ -1859,10 +1859,14 @@ PARAM(
        /* func */      NULL
 )
 
+/* We have a strict min at the protocol default here. This is because we
+ * don't have the 'use settings only after peer ack' in place yet. If the
+ * value is lower than the protocol default, the very first stream could
+ * get a flow control error. */
 PARAM(
 	/* name */      h2_initial_window_size,
 	/* typ */       bytes_u,
-	/* min */       "0",
+	/* min */       "65535b",
 	/* max */       "2147483647b",
 	/* default */   "65535b",
 	/* units */     "bytes",
@@ -1901,6 +1905,24 @@ PARAM(
 	/* func */      NULL
 )
 
+#if 0
+/* actual location mgt_param_tbl.c */
+PARAM(
+	/* name */	h2_rxbuf_storage,
+	/* typ */	h2_rxbuf_storage,
+	/* min */	NULL,
+	/* max */	NULL,
+	/* default */	"Transient",
+	/* units */	NULL,
+	/* flags */	MUST_RESTART,
+	/* s-text */
+	"The name of the storage backend that HTTP/2 receive buffers"
+	" should be allocated from.",
+	/* l-text */	"",
+	/* func */	NULL
+)
+#endif
+
 #undef PARAM
 
 /*lint -restore */
--- a/bin/varnishd/cache/cache.h
+++ b/bin/varnishd/cache/cache.h
@@ -566,9 +566,14 @@ struct sess {
 
 	vtim_real		t_open;		/* fd accepted */
 	vtim_real		t_idle;		/* fd accepted or resp sent */
-
+	vtim_dur		timeout_idle;
+	vtim_dur		timeout_linger;
+	vtim_dur		send_timeout;
+	vtim_dur		idle_send_timeout;
 };
 
+#define SESS_TMO(sp, tmo)					\
+	(isnan((sp)->tmo) ? cache_param->tmo : (sp)->tmo)
 /* Prototypes etc ----------------------------------------------------*/
 
 
