diff options
author | NeilBrown <neilb@suse.de> | 2007-02-08 23:20:30 +0100 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-02-09 18:25:47 +0100 |
commit | aaf68cfbf2241d24d46583423f6bff5c47e088b3 (patch) | |
tree | 65ca14b85d28b12da097d7d187cebfef88b5ba3a /net/sunrpc/svcsock.c | |
parent | [PATCH] md: fix various bugs with aligned reads in RAID5 (diff) | |
download | linux-aaf68cfbf2241d24d46583423f6bff5c47e088b3.tar.xz linux-aaf68cfbf2241d24d46583423f6bff5c47e088b3.zip |
[PATCH] knfsd: fix a race in closing NFSd connections
If you lose this race, it can iput a socket inode twice and you get a BUG
in fs/inode.c
When I added the option for user-space to close a socket, I added some
cruft to svc_delete_socket so that I could call that function when closing
a socket per user-space request.
This was the wrong thing to do. I should have just set SK_CLOSE and let
normal mechanisms do the work.
Not only wrong, but buggy. The locking is all wrong and it openned up a
race where-by a socket could be closed twice.
So this patch:
Introduces svc_close_socket which sets SK_CLOSE then either leave
the close up to a thread, or calls svc_delete_socket if it can
get SK_BUSY.
Adds a bias to sk_busy which is removed when SK_DEAD is set,
This avoid races around shutting down the socket.
Changes several 'spin_lock' to 'spin_lock_bh' where the _bh
was missing.
Bugzilla-url: http://bugzilla.kernel.org/show_bug.cgi?id=7916
Signed-off-by: Neil Brown <neilb@suse.de>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'net/sunrpc/svcsock.c')
-rw-r--r-- | net/sunrpc/svcsock.c | 52 |
1 files changed, 38 insertions, 14 deletions
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index ff1f8bf680aa..cf93cd1d857b 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -62,6 +62,12 @@ * after a clear, the socket must be read/accepted * if this succeeds, it must be set again. * SK_CLOSE can set at any time. It is never cleared. + * sk_inuse contains a bias of '1' until SK_DEAD is set. + * so when sk_inuse hits zero, we know the socket is dead + * and no-one is using it. + * SK_DEAD can only be set while SK_BUSY is held which ensures + * no other thread will be using the socket or will try to + * set SK_DEAD. * */ @@ -70,6 +76,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *, int *errp, int pmap_reg); +static void svc_delete_socket(struct svc_sock *svsk); static void svc_udp_data_ready(struct sock *, int); static int svc_udp_recvfrom(struct svc_rqst *); static int svc_udp_sendto(struct svc_rqst *); @@ -329,8 +336,9 @@ void svc_reserve(struct svc_rqst *rqstp, int space) static inline void svc_sock_put(struct svc_sock *svsk) { - if (atomic_dec_and_test(&svsk->sk_inuse) && - test_bit(SK_DEAD, &svsk->sk_flags)) { + if (atomic_dec_and_test(&svsk->sk_inuse)) { + BUG_ON(! test_bit(SK_DEAD, &svsk->sk_flags)); + dprintk("svc: releasing dead socket\n"); if (svsk->sk_sock->file) sockfd_put(svsk->sk_sock); @@ -520,7 +528,7 @@ svc_sock_names(char *buf, struct svc_serv *serv, char *toclose) if (!serv) return 0; - spin_lock(&serv->sv_lock); + spin_lock_bh(&serv->sv_lock); list_for_each_entry(svsk, &serv->sv_permsocks, sk_list) { int onelen = one_sock_name(buf+len, svsk); if (toclose && strcmp(toclose, buf+len) == 0) @@ -528,12 +536,12 @@ svc_sock_names(char *buf, struct svc_serv *serv, char *toclose) else len += onelen; } - spin_unlock(&serv->sv_lock); + spin_unlock_bh(&serv->sv_lock); if (closesk) /* Should unregister with portmap, but you cannot * unregister just one protocol... */ - svc_delete_socket(closesk); + svc_close_socket(closesk); else if (toclose) return -ENOENT; return len; @@ -683,6 +691,11 @@ svc_udp_recvfrom(struct svc_rqst *rqstp) return svc_deferred_recv(rqstp); } + if (test_bit(SK_CLOSE, &svsk->sk_flags)) { + svc_delete_socket(svsk); + return 0; + } + clear_bit(SK_DATA, &svsk->sk_flags); while ((skb = skb_recv_datagram(svsk->sk_sk, 0, 1, &err)) == NULL) { if (err == -EAGAIN) { @@ -1176,7 +1189,8 @@ svc_tcp_sendto(struct svc_rqst *rqstp) rqstp->rq_sock->sk_server->sv_name, (sent<0)?"got error":"sent only", sent, xbufp->len); - svc_delete_socket(rqstp->rq_sock); + set_bit(SK_CLOSE, &rqstp->rq_sock->sk_flags); + svc_sock_enqueue(rqstp->rq_sock); sent = -EAGAIN; } return sent; @@ -1495,7 +1509,7 @@ svc_setup_socket(struct svc_serv *serv, struct socket *sock, svsk->sk_odata = inet->sk_data_ready; svsk->sk_owspace = inet->sk_write_space; svsk->sk_server = serv; - atomic_set(&svsk->sk_inuse, 0); + atomic_set(&svsk->sk_inuse, 1); svsk->sk_lastrecv = get_seconds(); spin_lock_init(&svsk->sk_defer_lock); INIT_LIST_HEAD(&svsk->sk_deferred); @@ -1618,7 +1632,7 @@ bummer: /* * Remove a dead socket */ -void +static void svc_delete_socket(struct svc_sock *svsk) { struct svc_serv *serv; @@ -1644,16 +1658,26 @@ svc_delete_socket(struct svc_sock *svsk) * while still attached to a queue, the queue itself * is about to be destroyed (in svc_destroy). */ - if (!test_and_set_bit(SK_DEAD, &svsk->sk_flags)) + if (!test_and_set_bit(SK_DEAD, &svsk->sk_flags)) { + BUG_ON(atomic_read(&svsk->sk_inuse)<2); + atomic_dec(&svsk->sk_inuse); if (test_bit(SK_TEMP, &svsk->sk_flags)) serv->sv_tmpcnt--; + } - /* This atomic_inc should be needed - svc_delete_socket - * should have the semantic of dropping a reference. - * But it doesn't yet.... - */ - atomic_inc(&svsk->sk_inuse); spin_unlock_bh(&serv->sv_lock); +} + +void svc_close_socket(struct svc_sock *svsk) +{ + set_bit(SK_CLOSE, &svsk->sk_flags); + if (test_and_set_bit(SK_BUSY, &svsk->sk_flags)) + /* someone else will have to effect the close */ + return; + + atomic_inc(&svsk->sk_inuse); + svc_delete_socket(svsk); + clear_bit(SK_BUSY, &svsk->sk_flags); svc_sock_put(svsk); } |