summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorPablo Neira Ayuso <pablo@netfilter.org>2016-09-25 23:23:57 +0200
committerPablo Neira Ayuso <pablo@netfilter.org>2016-09-25 23:34:19 +0200
commitf20fbc0717f9f007c94b2641134b19228d0ce9ed (patch)
tree1404248ebbec552a3fb7928b75322b65d74de1bd /fs
parentnetfilter: nf_log: get rid of XT_LOG_* macros (diff)
parentMerge branch '40GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/jkirshe... (diff)
downloadlinux-f20fbc0717f9f007c94b2641134b19228d0ce9ed.tar.xz
linux-f20fbc0717f9f007c94b2641134b19228d0ce9ed.zip
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Conflicts: net/netfilter/core.c net/netfilter/nf_tables_netdev.c Resolve two conflicts before pull request for David's net-next tree: 1) Between c73c24849011 ("netfilter: nf_tables_netdev: remove redundant ip_hdr assignment") from the net tree and commit ddc8b6027ad0 ("netfilter: introduce nft_set_pktinfo_{ipv4, ipv6}_validate()"). 2) Between e8bffe0cf964 ("net: Add _nf_(un)register_hooks symbols") and Aaron Conole's patches to replace list_head with single linked list. Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/rxrpc.c105
-rw-r--r--fs/aio.c7
-rw-r--r--fs/autofs4/expire.c55
-rw-r--r--fs/binfmt_elf.c2
-rw-r--r--fs/btrfs/ctree.h1
-rw-r--r--fs/btrfs/extent-tree.c33
-rw-r--r--fs/btrfs/relocation.c8
-rw-r--r--fs/btrfs/send.c8
-rw-r--r--fs/btrfs/tree-log.c1
-rw-r--r--fs/ceph/dir.c2
-rw-r--r--fs/cifs/cifsfs.c29
-rw-r--r--fs/cifs/cifsproto.h2
-rw-r--r--fs/cifs/connect.c31
-rw-r--r--fs/crypto/policy.c41
-rw-r--r--fs/devpts/inode.c3
-rw-r--r--fs/ext4/ioctl.c2
-rw-r--r--fs/f2fs/file.c9
-rw-r--r--fs/fuse/file.c7
-rw-r--r--fs/ioctl.c6
-rw-r--r--fs/iomap.c5
-rw-r--r--fs/kernfs/file.c28
-rw-r--r--fs/nfs/blocklayout/blocklayout.c2
-rw-r--r--fs/nfs/blocklayout/blocklayout.h3
-rw-r--r--fs/nfs/blocklayout/extent_tree.c10
-rw-r--r--fs/nfs/callback.c1
-rw-r--r--fs/nfs/callback_proc.c8
-rw-r--r--fs/nfs/client.c10
-rw-r--r--fs/nfs/file.c5
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c45
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.h2
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayoutdev.c23
-rw-r--r--fs/nfs/internal.h5
-rw-r--r--fs/nfs/nfs42proc.c34
-rw-r--r--fs/nfs/nfs4client.c5
-rw-r--r--fs/nfs/nfs4proc.c110
-rw-r--r--fs/nfs/nfs4session.c53
-rw-r--r--fs/nfs/nfs4session.h7
-rw-r--r--fs/nfs/pnfs.c44
-rw-r--r--fs/nfs/super.c19
-rw-r--r--fs/notify/fanotify/fanotify.c13
-rw-r--r--fs/notify/fanotify/fanotify_user.c36
-rw-r--r--fs/notify/group.c19
-rw-r--r--fs/notify/notification.c23
-rw-r--r--fs/ocfs2/alloc.c56
-rw-r--r--fs/ocfs2/cluster/tcp_internal.h5
-rw-r--r--fs/ocfs2/dlm/dlmconvert.c12
-rw-r--r--fs/ocfs2/file.c34
-rw-r--r--fs/ocfs2/suballoc.c14
-rw-r--r--fs/overlayfs/copy_up.c2
-rw-r--r--fs/overlayfs/dir.c58
-rw-r--r--fs/overlayfs/inode.c108
-rw-r--r--fs/overlayfs/overlayfs.h17
-rw-r--r--fs/overlayfs/readdir.c63
-rw-r--r--fs/overlayfs/super.c93
-rw-r--r--fs/proc/base.c7
-rw-r--r--fs/proc/kcore.c31
-rw-r--r--fs/proc/task_mmu.c2
-rw-r--r--fs/ramfs/file-mmu.c9
-rw-r--r--fs/sysfs/file.c8
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c2
-rw-r--r--fs/xfs/libxfs/xfs_btree.c14
-rw-r--r--fs/xfs/libxfs/xfs_defer.c17
-rw-r--r--fs/xfs/libxfs/xfs_defer.h2
-rw-r--r--fs/xfs/libxfs/xfs_format.h6
-rw-r--r--fs/xfs/libxfs/xfs_sb.c3
-rw-r--r--fs/xfs/xfs_buf.c2
-rw-r--r--fs/xfs/xfs_super.c9
-rw-r--r--fs/xfs/xfs_trace.h2
68 files changed, 971 insertions, 467 deletions
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 37608be52abd..59bdaa7527b6 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -18,6 +18,7 @@
struct socket *afs_socket; /* my RxRPC socket */
static struct workqueue_struct *afs_async_calls;
+static struct afs_call *afs_spare_incoming_call;
static atomic_t afs_outstanding_calls;
static void afs_free_call(struct afs_call *);
@@ -26,7 +27,8 @@ static int afs_wait_for_call_to_complete(struct afs_call *);
static void afs_wake_up_async_call(struct sock *, struct rxrpc_call *, unsigned long);
static int afs_dont_wait_for_call_to_complete(struct afs_call *);
static void afs_process_async_call(struct work_struct *);
-static void afs_rx_new_call(struct sock *);
+static void afs_rx_new_call(struct sock *, struct rxrpc_call *, unsigned long);
+static void afs_rx_discard_new_call(struct rxrpc_call *, unsigned long);
static int afs_deliver_cm_op_id(struct afs_call *);
/* synchronous call management */
@@ -53,9 +55,9 @@ static const struct afs_call_type afs_RXCMxxxx = {
.abort_to_error = afs_abort_to_error,
};
-static void afs_collect_incoming_call(struct work_struct *);
+static void afs_charge_preallocation(struct work_struct *);
-static DECLARE_WORK(afs_collect_incoming_call_work, afs_collect_incoming_call);
+static DECLARE_WORK(afs_charge_preallocation_work, afs_charge_preallocation);
static int afs_wait_atomic_t(atomic_t *p)
{
@@ -100,13 +102,15 @@ int afs_open_socket(void)
if (ret < 0)
goto error_2;
- rxrpc_kernel_new_call_notification(socket, afs_rx_new_call);
+ rxrpc_kernel_new_call_notification(socket, afs_rx_new_call,
+ afs_rx_discard_new_call);
ret = kernel_listen(socket, INT_MAX);
if (ret < 0)
goto error_2;
afs_socket = socket;
+ afs_charge_preallocation(NULL);
_leave(" = 0");
return 0;
@@ -126,12 +130,20 @@ void afs_close_socket(void)
{
_enter("");
+ if (afs_spare_incoming_call) {
+ atomic_inc(&afs_outstanding_calls);
+ afs_free_call(afs_spare_incoming_call);
+ afs_spare_incoming_call = NULL;
+ }
+
_debug("outstanding %u", atomic_read(&afs_outstanding_calls));
wait_on_atomic_t(&afs_outstanding_calls, afs_wait_atomic_t,
TASK_UNINTERRUPTIBLE);
_debug("no outstanding calls");
flush_workqueue(afs_async_calls);
+ kernel_sock_shutdown(afs_socket, SHUT_RDWR);
+ flush_workqueue(afs_async_calls);
sock_release(afs_socket);
_debug("dework");
@@ -377,7 +389,7 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
return wait_mode->wait(call);
error_do_abort:
- rxrpc_kernel_abort_call(afs_socket, rxcall, RX_USER_ABORT);
+ rxrpc_kernel_abort_call(afs_socket, rxcall, RX_USER_ABORT, -ret, "KSD");
error_kill_call:
afs_end_call(call);
_leave(" = %d", ret);
@@ -425,12 +437,12 @@ static void afs_deliver_to_call(struct afs_call *call)
case -ENOTCONN:
abort_code = RX_CALL_DEAD;
rxrpc_kernel_abort_call(afs_socket, call->rxcall,
- abort_code);
+ abort_code, -ret, "KNC");
goto do_abort;
case -ENOTSUPP:
abort_code = RX_INVALID_OPERATION;
rxrpc_kernel_abort_call(afs_socket, call->rxcall,
- abort_code);
+ abort_code, -ret, "KIV");
goto do_abort;
case -ENODATA:
case -EBADMSG:
@@ -440,7 +452,7 @@ static void afs_deliver_to_call(struct afs_call *call)
if (call->state != AFS_CALL_AWAIT_REPLY)
abort_code = RXGEN_SS_UNMARSHAL;
rxrpc_kernel_abort_call(afs_socket, call->rxcall,
- abort_code);
+ abort_code, EBADMSG, "KUM");
goto do_abort;
}
}
@@ -463,6 +475,7 @@ do_abort:
*/
static int afs_wait_for_call_to_complete(struct afs_call *call)
{
+ const char *abort_why;
int ret;
DECLARE_WAITQUEUE(myself, current);
@@ -481,9 +494,11 @@ static int afs_wait_for_call_to_complete(struct afs_call *call)
continue;
}
+ abort_why = "KWC";
ret = call->error;
if (call->state == AFS_CALL_COMPLETE)
break;
+ abort_why = "KWI";
ret = -EINTR;
if (signal_pending(current))
break;
@@ -497,7 +512,7 @@ static int afs_wait_for_call_to_complete(struct afs_call *call)
if (call->state < AFS_CALL_COMPLETE) {
_debug("call incomplete");
rxrpc_kernel_abort_call(afs_socket, call->rxcall,
- RX_CALL_DEAD);
+ RX_CALL_DEAD, -ret, abort_why);
}
_debug("call complete");
@@ -587,57 +602,65 @@ static void afs_process_async_call(struct work_struct *work)
_leave("");
}
+static void afs_rx_attach(struct rxrpc_call *rxcall, unsigned long user_call_ID)
+{
+ struct afs_call *call = (struct afs_call *)user_call_ID;
+
+ call->rxcall = rxcall;
+}
+
/*
- * accept the backlog of incoming calls
+ * Charge the incoming call preallocation.
*/
-static void afs_collect_incoming_call(struct work_struct *work)
+static void afs_charge_preallocation(struct work_struct *work)
{
- struct rxrpc_call *rxcall;
- struct afs_call *call = NULL;
-
- _enter("");
+ struct afs_call *call = afs_spare_incoming_call;
- do {
+ for (;;) {
if (!call) {
call = kzalloc(sizeof(struct afs_call), GFP_KERNEL);
- if (!call) {
- rxrpc_kernel_reject_call(afs_socket);
- return;
- }
+ if (!call)
+ break;
INIT_WORK(&call->async_work, afs_process_async_call);
call->wait_mode = &afs_async_incoming_call;
call->type = &afs_RXCMxxxx;
init_waitqueue_head(&call->waitq);
call->state = AFS_CALL_AWAIT_OP_ID;
-
- _debug("CALL %p{%s} [%d]",
- call, call->type->name,
- atomic_read(&afs_outstanding_calls));
- atomic_inc(&afs_outstanding_calls);
}
- rxcall = rxrpc_kernel_accept_call(afs_socket,
- (unsigned long)call,
- afs_wake_up_async_call);
- if (!IS_ERR(rxcall)) {
- call->rxcall = rxcall;
- call->need_attention = true;
- queue_work(afs_async_calls, &call->async_work);
- call = NULL;
- }
- } while (!call);
+ if (rxrpc_kernel_charge_accept(afs_socket,
+ afs_wake_up_async_call,
+ afs_rx_attach,
+ (unsigned long)call,
+ GFP_KERNEL) < 0)
+ break;
+ call = NULL;
+ }
+ afs_spare_incoming_call = call;
+}
- if (call)
- afs_free_call(call);
+/*
+ * Discard a preallocated call when a socket is shut down.
+ */
+static void afs_rx_discard_new_call(struct rxrpc_call *rxcall,
+ unsigned long user_call_ID)
+{
+ struct afs_call *call = (struct afs_call *)user_call_ID;
+
+ atomic_inc(&afs_outstanding_calls);
+ call->rxcall = NULL;
+ afs_free_call(call);
}
/*
* Notification of an incoming call.
*/
-static void afs_rx_new_call(struct sock *sk)
+static void afs_rx_new_call(struct sock *sk, struct rxrpc_call *rxcall,
+ unsigned long user_call_ID)
{
- queue_work(afs_wq, &afs_collect_incoming_call_work);
+ atomic_inc(&afs_outstanding_calls);
+ queue_work(afs_wq, &afs_charge_preallocation_work);
}
/*
@@ -695,7 +718,7 @@ void afs_send_empty_reply(struct afs_call *call)
case -ENOMEM:
_debug("oom");
rxrpc_kernel_abort_call(afs_socket, call->rxcall,
- RX_USER_ABORT);
+ RX_USER_ABORT, ENOMEM, "KOO");
default:
afs_end_call(call);
_leave(" [error]");
@@ -734,7 +757,7 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len)
if (n == -ENOMEM) {
_debug("oom");
rxrpc_kernel_abort_call(afs_socket, call->rxcall,
- RX_USER_ABORT);
+ RX_USER_ABORT, ENOMEM, "KOO");
}
afs_end_call(call);
_leave(" [error]");
diff --git a/fs/aio.c b/fs/aio.c
index fb8e45b88cd4..4fe81d1c60f9 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -239,7 +239,12 @@ static struct dentry *aio_mount(struct file_system_type *fs_type,
static const struct dentry_operations ops = {
.d_dname = simple_dname,
};
- return mount_pseudo(fs_type, "aio:", NULL, &ops, AIO_RING_MAGIC);
+ struct dentry *root = mount_pseudo(fs_type, "aio:", NULL, &ops,
+ AIO_RING_MAGIC);
+
+ if (!IS_ERR(root))
+ root->d_sb->s_iflags |= SB_I_NOEXEC;
+ return root;
}
/* aio_setup
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index b493909e7492..d8e6d421c27f 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -417,6 +417,7 @@ static struct dentry *should_expire(struct dentry *dentry,
}
return NULL;
}
+
/*
* Find an eligible tree to time-out
* A tree is eligible if :-
@@ -432,6 +433,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
struct dentry *root = sb->s_root;
struct dentry *dentry;
struct dentry *expired;
+ struct dentry *found;
struct autofs_info *ino;
if (!root)
@@ -442,31 +444,46 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
dentry = NULL;
while ((dentry = get_next_positive_subdir(dentry, root))) {
+ int flags = how;
+
spin_lock(&sbi->fs_lock);
ino = autofs4_dentry_ino(dentry);
- if (ino->flags & AUTOFS_INF_WANT_EXPIRE)
- expired = NULL;
- else
- expired = should_expire(dentry, mnt, timeout, how);
- if (!expired) {
+ if (ino->flags & AUTOFS_INF_WANT_EXPIRE) {
spin_unlock(&sbi->fs_lock);
continue;
}
+ spin_unlock(&sbi->fs_lock);
+
+ expired = should_expire(dentry, mnt, timeout, flags);
+ if (!expired)
+ continue;
+
+ spin_lock(&sbi->fs_lock);
ino = autofs4_dentry_ino(expired);
ino->flags |= AUTOFS_INF_WANT_EXPIRE;
spin_unlock(&sbi->fs_lock);
synchronize_rcu();
- spin_lock(&sbi->fs_lock);
- if (should_expire(expired, mnt, timeout, how)) {
- if (expired != dentry)
- dput(dentry);
- goto found;
- }
+ /* Make sure a reference is not taken on found if
+ * things have changed.
+ */
+ flags &= ~AUTOFS_EXP_LEAVES;
+ found = should_expire(expired, mnt, timeout, how);
+ if (!found || found != expired)
+ /* Something has changed, continue */
+ goto next;
+
+ if (expired != dentry)
+ dput(dentry);
+
+ spin_lock(&sbi->fs_lock);
+ goto found;
+next:
+ spin_lock(&sbi->fs_lock);
ino->flags &= ~AUTOFS_INF_WANT_EXPIRE;
+ spin_unlock(&sbi->fs_lock);
if (expired != dentry)
dput(expired);
- spin_unlock(&sbi->fs_lock);
}
return NULL;
@@ -483,6 +500,7 @@ int autofs4_expire_wait(struct dentry *dentry, int rcu_walk)
struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
struct autofs_info *ino = autofs4_dentry_ino(dentry);
int status;
+ int state;
/* Block on any pending expire */
if (!(ino->flags & AUTOFS_INF_WANT_EXPIRE))
@@ -490,8 +508,19 @@ int autofs4_expire_wait(struct dentry *dentry, int rcu_walk)
if (rcu_walk)
return -ECHILD;
+retry:
spin_lock(&sbi->fs_lock);
- if (ino->flags & AUTOFS_INF_EXPIRING) {
+ state = ino->flags & (AUTOFS_INF_WANT_EXPIRE | AUTOFS_INF_EXPIRING);
+ if (state == AUTOFS_INF_WANT_EXPIRE) {
+ spin_unlock(&sbi->fs_lock);
+ /*
+ * Possibly being selected for expire, wait until
+ * it's selected or not.
+ */
+ schedule_timeout_uninterruptible(HZ/10);
+ goto retry;
+ }
+ if (state & AUTOFS_INF_EXPIRING) {
spin_unlock(&sbi->fs_lock);
pr_debug("waiting for expire %p name=%pd\n", dentry, dentry);
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 7f6aff3f72eb..e5495f37c6ed 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -853,6 +853,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
current->flags |= PF_RANDOMIZE;
setup_new_exec(bprm);
+ install_exec_creds(bprm);
/* Do this so that we can load the interpreter, if need be. We will
change some of these later */
@@ -1044,7 +1045,6 @@ static int load_elf_binary(struct linux_binprm *bprm)
goto out;
#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
- install_exec_creds(bprm);
retval = create_elf_tables(bprm, &loc->elf_ex,
load_addr, interp_load_addr);
if (retval < 0)
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index eff3993c77b3..33fe03551105 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -427,6 +427,7 @@ struct btrfs_space_info {
struct list_head ro_bgs;
struct list_head priority_tickets;
struct list_head tickets;
+ u64 tickets_id;
struct rw_semaphore groups_sem;
/* for block groups in our same type */
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 0450dc410533..38c2df84cabd 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4901,11 +4901,6 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_root *root,
u64 expected;
u64 to_reclaim = 0;
- to_reclaim = min_t(u64, num_online_cpus() * SZ_1M, SZ_16M);
- if (can_overcommit(root, space_info, to_reclaim,
- BTRFS_RESERVE_FLUSH_ALL))
- return 0;
-
list_for_each_entry(ticket, &space_info->tickets, list)
to_reclaim += ticket->bytes;
list_for_each_entry(ticket, &space_info->priority_tickets, list)
@@ -4913,6 +4908,11 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_root *root,
if (to_reclaim)
return to_reclaim;
+ to_reclaim = min_t(u64, num_online_cpus() * SZ_1M, SZ_16M);
+ if (can_overcommit(root, space_info, to_reclaim,
+ BTRFS_RESERVE_FLUSH_ALL))
+ return 0;
+
used = space_info->bytes_used + space_info->bytes_reserved +
space_info->bytes_pinned + space_info->bytes_readonly +
space_info->bytes_may_use;
@@ -4966,12 +4966,12 @@ static void wake_all_tickets(struct list_head *head)
*/
static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
{
- struct reserve_ticket *last_ticket = NULL;
struct btrfs_fs_info *fs_info;
struct btrfs_space_info *space_info;
u64 to_reclaim;
int flush_state;
int commit_cycles = 0;
+ u64 last_tickets_id;
fs_info = container_of(work, struct btrfs_fs_info, async_reclaim_work);
space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
@@ -4984,8 +4984,7 @@ static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
spin_unlock(&space_info->lock);
return;
}
- last_ticket = list_first_entry(&space_info->tickets,
- struct reserve_ticket, list);
+ last_tickets_id = space_info->tickets_id;
spin_unlock(&space_info->lock);
flush_state = FLUSH_DELAYED_ITEMS_NR;
@@ -5005,10 +5004,10 @@ static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
space_info);
ticket = list_first_entry(&space_info->tickets,
struct reserve_ticket, list);
- if (last_ticket == ticket) {
+ if (last_tickets_id == space_info->tickets_id) {
flush_state++;
} else {
- last_ticket = ticket;
+ last_tickets_id = space_info->tickets_id;
flush_state = FLUSH_DELAYED_ITEMS_NR;
if (commit_cycles)
commit_cycles--;
@@ -5384,6 +5383,7 @@ again:
list_del_init(&ticket->list);
num_bytes -= ticket->bytes;
ticket->bytes = 0;
+ space_info->tickets_id++;
wake_up(&ticket->wait);
} else {
ticket->bytes -= num_bytes;
@@ -5426,6 +5426,7 @@ again:
num_bytes -= ticket->bytes;
space_info->bytes_may_use += ticket->bytes;
ticket->bytes = 0;
+ space_info->tickets_id++;
wake_up(&ticket->wait);
} else {
trace_btrfs_space_reservation(fs_info, "space_info",
@@ -8216,6 +8217,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
{
int ret;
struct btrfs_block_group_cache *block_group;
+ struct btrfs_space_info *space_info;
/*
* Mixed block groups will exclude before processing the log so we only
@@ -8231,9 +8233,14 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
if (!block_group)
return -EINVAL;
- ret = btrfs_add_reserved_bytes(block_group, ins->offset,
- ins->offset, 0);
- BUG_ON(ret); /* logic error */
+ space_info = block_group->space_info;
+ spin_lock(&space_info->lock);
+ spin_lock(&block_group->lock);
+ space_info->bytes_reserved += ins->offset;
+ block_group->reserved += ins->offset;
+ spin_unlock(&block_group->lock);
+ spin_unlock(&space_info->lock);
+
ret = alloc_reserved_file_extent(trans, root, 0, root_objectid,
0, owner, offset, ins, 1);
btrfs_put_block_group(block_group);
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 8a2c2a07987b..c0c13dc6fe12 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -4200,9 +4200,11 @@ restart:
err = PTR_ERR(trans);
goto out_free;
}
- err = qgroup_fix_relocated_data_extents(trans, rc);
- if (err < 0) {
- btrfs_abort_transaction(trans, err);
+ ret = qgroup_fix_relocated_data_extents(trans, rc);
+ if (ret < 0) {
+ btrfs_abort_transaction(trans, ret);
+ if (!err)
+ err = ret;
goto out_free;
}
btrfs_commit_transaction(trans, rc->extent_root);
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index efe129fe2678..a87675ffd02b 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -4268,10 +4268,12 @@ static int process_all_refs(struct send_ctx *sctx,
}
btrfs_release_path(path);
+ /*
+ * We don't actually care about pending_move as we are simply
+ * re-creating this inode and will be rename'ing it into place once we
+ * rename the parent directory.
+ */
ret = process_recorded_refs(sctx, &pending_move);
- /* Only applicable to an incremental send. */
- ASSERT(pending_move == 0);
-
out:
btrfs_free_path(path);
return ret;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index e935035ac034..ef9c55bc7907 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -2867,6 +2867,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
if (log_root_tree->log_transid_committed >= root_log_ctx.log_transid) {
blk_finish_plug(&plug);
+ list_del_init(&root_log_ctx.list);
mutex_unlock(&log_root_tree->log_mutex);
ret = root_log_ctx.log_ret;
goto out;
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index c64a0b794d49..df4b3e6fa563 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -597,7 +597,7 @@ static bool need_reset_readdir(struct ceph_file_info *fi, loff_t new_pos)
if (is_hash_order(new_pos)) {
/* no need to reset last_name for a forward seek when
* dentries are sotred in hash order */
- } else if (fi->frag |= fpos_frag(new_pos)) {
+ } else if (fi->frag != fpos_frag(new_pos)) {
return true;
}
rinfo = fi->last_readdir ? &fi->last_readdir->r_reply_info : NULL;
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 6bbec5e784cd..14ae4b8e1a3c 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -609,6 +609,9 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb)
char *s, *p;
char sep;
+ if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH)
+ return dget(sb->s_root);
+
full_path = cifs_build_path_to_root(vol, cifs_sb,
cifs_sb_master_tcon(cifs_sb));
if (full_path == NULL)
@@ -686,26 +689,22 @@ cifs_do_mount(struct file_system_type *fs_type,
cifs_sb->mountdata = kstrndup(data, PAGE_SIZE, GFP_KERNEL);
if (cifs_sb->mountdata == NULL) {
root = ERR_PTR(-ENOMEM);
- goto out_cifs_sb;
+ goto out_free;
}
- if (volume_info->prepath) {
- cifs_sb->prepath = kstrdup(volume_info->prepath, GFP_KERNEL);
- if (cifs_sb->prepath == NULL) {
- root = ERR_PTR(-ENOMEM);
- goto out_cifs_sb;
- }
+ rc = cifs_setup_cifs_sb(volume_info, cifs_sb);
+ if (rc) {
+ root = ERR_PTR(rc);
+ goto out_free;
}
- cifs_setup_cifs_sb(volume_info, cifs_sb);
-
rc = cifs_mount(cifs_sb, volume_info);
if (rc) {
if (!(flags & MS_SILENT))
cifs_dbg(VFS, "cifs_mount failed w/return code = %d\n",
rc);
root = ERR_PTR(rc);
- goto out_mountdata;
+ goto out_free;
}
mnt_data.vol = volume_info;
@@ -735,11 +734,7 @@ cifs_do_mount(struct file_system_type *fs_type,
sb->s_flags |= MS_ACTIVE;
}
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH)
- root = dget(sb->s_root);
- else
- root = cifs_get_root(volume_info, sb);
-
+ root = cifs_get_root(volume_info, sb);
if (IS_ERR(root))
goto out_super;
@@ -752,9 +747,9 @@ out:
cifs_cleanup_volume_info(volume_info);
return root;
-out_mountdata:
+out_free:
+ kfree(cifs_sb->prepath);
kfree(cifs_sb->mountdata);
-out_cifs_sb:
kfree(cifs_sb);
out_nls:
unload_nls(volume_info->local_nls);
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 1243bd326591..95dab43646f0 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -184,7 +184,7 @@ extern int cifs_read_from_socket(struct TCP_Server_Info *server, char *buf,
unsigned int to_read);
extern int cifs_read_page_from_socket(struct TCP_Server_Info *server,
struct page *page, unsigned int to_read);
-extern void cifs_setup_cifs_sb(struct smb_vol *pvolume_info,
+extern int cifs_setup_cifs_sb(struct smb_vol *pvolume_info,
struct cifs_sb_info *cifs_sb);
extern int cifs_match_super(struct super_block *, void *);
extern void cifs_cleanup_volume_info(struct smb_vol *pvolume_info);
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 7ae03283bd61..2e4f4bad8b1e 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -2781,6 +2781,24 @@ compare_mount_options(struct super_block *sb, struct cifs_mnt_data *mnt_data)
return 1;
}
+static int
+match_prepath(struct super_block *sb, struct cifs_mnt_data *mnt_data)
+{
+ struct cifs_sb_info *old = CIFS_SB(sb);
+ struct cifs_sb_info *new = mnt_data->cifs_sb;
+
+ if (old->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH) {
+ if (!(new->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH))
+ return 0;
+ /* The prepath should be null terminated strings */
+ if (strcmp(new->prepath, old->prepath))
+ return 0;
+
+ return 1;
+ }
+ return 0;
+}
+
int
cifs_match_super(struct super_block *sb, void *data)
{
@@ -2808,7 +2826,8 @@ cifs_match_super(struct super_block *sb, void *data)
if (!match_server(tcp_srv, volume_info) ||
!match_session(ses, volume_info) ||
- !match_tcon(tcon, volume_info->UNC)) {
+ !match_tcon(tcon, volume_info->UNC) ||
+ !match_prepath(sb, mnt_data)) {
rc = 0;
goto out;
}
@@ -3222,7 +3241,7 @@ void reset_cifs_unix_caps(unsigned int xid, struct cifs_tcon *tcon,
}
}
-void cifs_setup_cifs_sb(struct smb_vol *pvolume_info,
+int cifs_setup_cifs_sb(struct smb_vol *pvolume_info,
struct cifs_sb_info *cifs_sb)
{
INIT_DELAYED_WORK(&cifs_sb->prune_tlinks, cifs_prune_tlinks);
@@ -3316,6 +3335,14 @@ void cifs_setup_cifs_sb(struct smb_vol *pvolume_info,
if ((pvolume_info->cifs_acl) && (pvolume_info->dynperm))
cifs_dbg(VFS, "mount option dynperm ignored if cifsacl mount option supported\n");
+
+ if (pvolume_info->prepath) {
+ cifs_sb->prepath = kstrdup(pvolume_info->prepath, GFP_KERNEL);
+ if (cifs_sb->prepath == NULL)
+ return -ENOMEM;
+ }
+
+ return 0;
}
static void
diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c
index 0f9961eede1e..ed115acb5dee 100644
--- a/fs/crypto/policy.c
+++ b/fs/crypto/policy.c
@@ -11,6 +11,7 @@
#include <linux/random.h>
#include <linux/string.h>
#include <linux/fscrypto.h>
+#include <linux/mount.h>
static int inode_has_encryption_context(struct inode *inode)
{
@@ -92,26 +93,42 @@ static int create_encryption_context_from_policy(struct inode *inode,
return inode->i_sb->s_cop->set_context(inode, &ctx, sizeof(ctx), NULL);
}
-int fscrypt_process_policy(struct inode *inode,
+int fscrypt_process_policy(struct file *filp,
const struct fscrypt_policy *policy)
{
+ struct inode *inode = file_inode(filp);
+ int ret;
+
+ if (!inode_owner_or_capable(inode))
+ return -EACCES;
+
if (policy->version != 0)
return -EINVAL;
+ ret = mnt_want_write_file(filp);
+ if (ret)
+ return ret;
+
if (!inode_has_encryption_context(inode)) {
- if (!inode->i_sb->s_cop->empty_dir)
- return -EOPNOTSUPP;
- if (!inode->i_sb->s_cop->empty_dir(inode))
- return -ENOTEMPTY;
- return create_encryption_context_from_policy(inode, policy);
+ if (!S_ISDIR(inode->i_mode))
+ ret = -EINVAL;
+ else if (!inode->i_sb->s_cop->empty_dir)
+ ret = -EOPNOTSUPP;
+ else if (!inode->i_sb->s_cop->empty_dir(inode))
+ ret = -ENOTEMPTY;
+ else
+ ret = create_encryption_context_from_policy(inode,
+ policy);
+ } else if (!is_encryption_context_consistent_with_policy(inode,
+ policy)) {
+ printk(KERN_WARNING
+ "%s: Policy inconsistent with encryption context\n",
+ __func__);
+ ret = -EINVAL;
}
- if (is_encryption_context_consistent_with_policy(inode, policy))
- return 0;
-
- printk(KERN_WARNING "%s: Policy inconsistent with encryption context\n",
- __func__);
- return -EINVAL;
+ mnt_drop_write_file(filp);
+ return ret;
}
EXPORT_SYMBOL(fscrypt_process_policy);
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index d116453b0276..79a5941c2474 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -585,7 +585,8 @@ struct dentry *devpts_pty_new(struct pts_fs_info *fsi, int index, void *priv)
*/
void *devpts_get_priv(struct dentry *dentry)
{
- WARN_ON_ONCE(dentry->d_sb->s_magic != DEVPTS_SUPER_MAGIC);
+ if (dentry->d_sb->s_magic != DEVPTS_SUPER_MAGIC)
+ return NULL;
return dentry->d_fsdata;
}
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 10686fd67fb4..1bb7df5e4536 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -776,7 +776,7 @@ resizefs_out:
(struct fscrypt_policy __user *)arg,
sizeof(policy)))
return -EFAULT;
- return fscrypt_process_policy(inode, &policy);
+ return fscrypt_process_policy(filp, &policy);
#else
return -EOPNOTSUPP;
#endif
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 47abb96098e4..28f4f4cbb8d8 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -1757,21 +1757,14 @@ static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg)
{
struct fscrypt_policy policy;
struct inode *inode = file_inode(filp);
- int ret;
if (copy_from_user(&policy, (struct fscrypt_policy __user *)arg,
sizeof(policy)))
return -EFAULT;
- ret = mnt_want_write_file(filp);
- if (ret)
- return ret;
-
f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
- ret = fscrypt_process_policy(inode, &policy);
- mnt_drop_write_file(filp);
- return ret;
+ return fscrypt_process_policy(filp, &policy);
}
static int f2fs_ioc_get_encryption_policy(struct file *filp, unsigned long arg)
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index f394aff59c36..3988b43c2f5a 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -530,13 +530,13 @@ void fuse_read_fill(struct fuse_req *req, struct file *file, loff_t pos,
req->out.args[0].size = count;
}
-static void fuse_release_user_pages(struct fuse_req *req, int write)
+static void fuse_release_user_pages(struct fuse_req *req, bool should_dirty)
{
unsigned i;
for (i = 0; i < req->num_pages; i++) {
struct page *page = req->pages[i];
- if (write)
+ if (should_dirty)
set_page_dirty_lock(page);
put_page(page);
}
@@ -1320,6 +1320,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
loff_t *ppos, int flags)
{
int write = flags & FUSE_DIO_WRITE;
+ bool should_dirty = !write && iter_is_iovec(iter);
int cuse = flags & FUSE_DIO_CUSE;
struct file *file = io->file;
struct inode *inode = file->f_mapping->host;
@@ -1363,7 +1364,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
nres = fuse_send_read(req, io, pos, nbytes, owner);
if (!io->async)
- fuse_release_user_pages(req, !write);
+ fuse_release_user_pages(req, should_dirty);
if (req->out.h.error) {
err = req->out.h.error;
break;
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 0f56deb24ce6..c415668c86d4 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -568,7 +568,7 @@ static int ioctl_fsthaw(struct file *filp)
return thaw_super(sb);
}
-static long ioctl_file_dedupe_range(struct file *file, void __user *arg)
+static int ioctl_file_dedupe_range(struct file *file, void __user *arg)
{
struct file_dedupe_range __user *argp = arg;
struct file_dedupe_range *same = NULL;
@@ -582,6 +582,10 @@ static long ioctl_file_dedupe_range(struct file *file, void __user *arg)
}
size = offsetof(struct file_dedupe_range __user, info[count]);
+ if (size > PAGE_SIZE) {
+ ret = -ENOMEM;
+ goto out;
+ }
same = memdup_user(argp, size);
if (IS_ERR(same)) {
diff --git a/fs/iomap.c b/fs/iomap.c
index 0342254646e3..706270f21b35 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -428,9 +428,12 @@ static int iomap_to_fiemap(struct fiemap_extent_info *fi,
break;
}
+ if (iomap->flags & IOMAP_F_MERGED)
+ flags |= FIEMAP_EXTENT_MERGED;
+
return fiemap_fill_next_extent(fi, iomap->offset,
iomap->blkno != IOMAP_NULL_BLOCK ? iomap->blkno << 9: 0,
- iomap->length, flags | FIEMAP_EXTENT_MERGED);
+ iomap->length, flags);
}
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index e1574008adc9..2bcb86e6e6ca 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -840,21 +840,35 @@ repeat:
mutex_lock(&kernfs_mutex);
list_for_each_entry(info, &kernfs_root(kn)->supers, node) {
+ struct kernfs_node *parent;
struct inode *inode;
- struct dentry *dentry;
+ /*
+ * We want fsnotify_modify() on @kn but as the
+ * modifications aren't originating from userland don't
+ * have the matching @file available. Look up the inodes
+ * and generate the events manually.
+ */
inode = ilookup(info->sb, kn->ino);
if (!inode)
continue;
- dentry = d_find_any_alias(inode);
- if (dentry) {
- fsnotify_parent(NULL, dentry, FS_MODIFY);
- fsnotify(inode, FS_MODIFY, inode, FSNOTIFY_EVENT_INODE,
- NULL, 0);
- dput(dentry);
+ parent = kernfs_get_parent(kn);
+ if (parent) {
+ struct inode *p_inode;
+
+ p_inode = ilookup(info->sb, parent->ino);
+ if (p_inode) {
+ fsnotify(p_inode, FS_MODIFY | FS_EVENT_ON_CHILD,
+ inode, FSNOTIFY_EVENT_INODE, kn->name, 0);
+ iput(p_inode);
+ }
+
+ kernfs_put(parent);
}
+ fsnotify(inode, FS_MODIFY, inode, FSNOTIFY_EVENT_INODE,
+ kn->name, 0);
iput(inode);
}
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index f55a4e756047..217847679f0e 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -346,7 +346,7 @@ static void bl_write_cleanup(struct work_struct *work)
PAGE_SIZE - 1) & (loff_t)PAGE_MASK;
ext_tree_mark_written(bl, start >> SECTOR_SHIFT,
- (end - start) >> SECTOR_SHIFT);
+ (end - start) >> SECTOR_SHIFT, end);
}
pnfs_ld_write_done(hdr);
diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h
index 18e6fd0b9506..efc007f00742 100644
--- a/fs/nfs/blocklayout/blocklayout.h
+++ b/fs/nfs/blocklayout/blocklayout.h
@@ -141,6 +141,7 @@ struct pnfs_block_layout {
struct rb_root bl_ext_ro;
spinlock_t bl_ext_lock; /* Protects list manipulation */
bool bl_scsi_layout;
+ u64 bl_lwb;
};
static inline struct pnfs_block_layout *
@@ -182,7 +183,7 @@ int ext_tree_insert(struct pnfs_block_layout *bl,
int ext_tree_remove(struct pnfs_block_layout *bl, bool rw, sector_t start,
sector_t end);
int ext_tree_mark_written(struct pnfs_block_layout *bl, sector_t start,
- sector_t len);
+ sector_t len, u64 lwb);
bool ext_tree_lookup(struct pnfs_block_layout *bl, sector_t isect,
struct pnfs_block_extent *ret, bool rw);
int ext_tree_prepare_commit(struct nfs4_layoutcommit_args *arg);
diff --git a/fs/nfs/blocklayout/extent_tree.c b/fs/nfs/blocklayout/extent_tree.c
index 992bcb19c11e..c85fbfd2d0d9 100644
--- a/fs/nfs/blocklayout/extent_tree.c
+++ b/fs/nfs/blocklayout/extent_tree.c
@@ -402,7 +402,7 @@ ext_tree_split(struct rb_root *root, struct pnfs_block_extent *be,
int
ext_tree_mark_written(struct pnfs_block_layout *bl, sector_t start,
- sector_t len)
+ sector_t len, u64 lwb)
{
struct rb_root *root = &bl->bl_ext_rw;
sector_t end = start + len;
@@ -471,6 +471,8 @@ ext_tree_mark_written(struct pnfs_block_layout *bl, sector_t start,
}
}
out:
+ if (bl->bl_lwb < lwb)
+ bl->bl_lwb = lwb;
spin_unlock(&bl->bl_ext_lock);
__ext_put_deviceids(&tmp);
@@ -518,7 +520,7 @@ static __be32 *encode_scsi_range(struct pnfs_block_extent *be, __be32 *p)
}
static int ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p,
- size_t buffer_size, size_t *count)
+ size_t buffer_size, size_t *count, __u64 *lastbyte)
{
struct pnfs_block_extent *be;
int ret = 0;
@@ -542,6 +544,8 @@ static int ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p,
p = encode_block_extent(be, p);
be->be_tag = EXTENT_COMMITTING;
}
+ *lastbyte = bl->bl_lwb - 1;
+ bl->bl_lwb = 0;
spin_unlock(&bl->bl_ext_lock);
return ret;
@@ -564,7 +568,7 @@ ext_tree_prepare_commit(struct nfs4_layoutcommit_args *arg)
arg->layoutupdate_pages = &arg->layoutupdate_page;
retry:
- ret = ext_tree_encode_commit(bl, start_p + 1, buffer_size, &count);
+ ret = ext_tree_encode_commit(bl, start_p + 1, buffer_size, &count, &arg->lastbytewritten);
if (unlikely(ret)) {
ext_tree_free_commitdata(arg, buffer_size);
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index a7f2e6e33305..52a28311e2a4 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -275,6 +275,7 @@ static int nfs_callback_up_net(int minorversion, struct svc_serv *serv,
err_socks:
svc_rpcb_cleanup(serv, net);
err_bind:
+ nn->cb_users[minorversion]--;
dprintk("NFS: Couldn't create callback socket: err = %d; "
"net = %p\n", ret, net);
return ret;
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index c92a75e066a6..f953ef6b2f2e 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -454,11 +454,8 @@ static bool referring_call_exists(struct nfs_client *clp,
((u32 *)&rclist->rcl_sessionid.data)[3],
ref->rc_sequenceid, ref->rc_slotid);
- spin_lock(&tbl->slot_tbl_lock);
- status = (test_bit(ref->rc_slotid, tbl->used_slots) &&
- tbl->slots[ref->rc_slotid].seq_nr ==
- ref->rc_sequenceid);
- spin_unlock(&tbl->slot_tbl_lock);
+ status = nfs4_slot_wait_on_seqid(tbl, ref->rc_slotid,
+ ref->rc_sequenceid, HZ >> 1) < 0;
if (status)
goto out;
}
@@ -487,7 +484,6 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
goto out;
tbl = &clp->cl_session->bc_slot_table;
- slot = tbl->slots + args->csa_slotid;
/* Set up res before grabbing the spinlock */
memcpy(&res->csr_sessionid, &args->csa_sessionid,
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 003ebce4bbc4..1e106780a237 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -426,7 +426,7 @@ EXPORT_SYMBOL_GPL(nfs_mark_client_ready);
* Initialise the timeout values for a connection
*/
void nfs_init_timeout_values(struct rpc_timeout *to, int proto,
- unsigned int timeo, unsigned int retrans)
+ int timeo, int retrans)
{
to->to_initval = timeo * HZ / 10;
to->to_retries = retrans;
@@ -434,9 +434,9 @@ void nfs_init_timeout_values(struct rpc_timeout *to, int proto,
switch (proto) {
case XPRT_TRANSPORT_TCP:
case XPRT_TRANSPORT_RDMA:
- if (to->to_retries == 0)
+ if (retrans == NFS_UNSPEC_RETRANS)
to->to_retries = NFS_DEF_TCP_RETRANS;
- if (to->to_initval == 0)
+ if (timeo == NFS_UNSPEC_TIMEO || to->to_retries == 0)
to->to_initval = NFS_DEF_TCP_TIMEO * HZ / 10;
if (to->to_initval > NFS_MAX_TCP_TIMEOUT)
to->to_initval = NFS_MAX_TCP_TIMEOUT;
@@ -449,9 +449,9 @@ void nfs_init_timeout_values(struct rpc_timeout *to, int proto,
to->to_exponential = 0;
break;
case XPRT_TRANSPORT_UDP:
- if (to->to_retries == 0)
+ if (retrans == NFS_UNSPEC_RETRANS)
to->to_retries = NFS_DEF_UDP_RETRANS;
- if (!to->to_initval)
+ if (timeo == NFS_UNSPEC_TIMEO || to->to_initval == 0)
to->to_initval = NFS_DEF_UDP_TIMEO * HZ / 10;
if (to->to_initval > NFS_MAX_UDP_TIMEOUT)
to->to_initval = NFS_MAX_UDP_TIMEOUT;
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 7d620970f2e1..ca699ddc11c1 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -657,7 +657,10 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
if (result <= 0)
goto out;
- written = generic_write_sync(iocb, result);
+ result = generic_write_sync(iocb, result);
+ if (result < 0)
+ goto out;
+ written = result;
iocb->ki_pos += written;
/* Return error values */
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index e6206eaf2bdf..51b51369704c 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -37,6 +37,7 @@ ff_layout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags)
if (ffl) {
INIT_LIST_HEAD(&ffl->error_list);
INIT_LIST_HEAD(&ffl->mirrors);
+ ffl->last_report_time = ktime_get();
return &ffl->generic_hdr;
} else
return NULL;
@@ -640,19 +641,18 @@ nfs4_ff_layoutstat_start_io(struct nfs4_ff_layout_mirror *mirror,
{
static const ktime_t notime = {0};
s64 report_interval = FF_LAYOUTSTATS_REPORT_INTERVAL;
+ struct nfs4_flexfile_layout *ffl = FF_LAYOUT_FROM_HDR(mirror->layout);
nfs4_ff_start_busy_timer(&layoutstat->busy_timer, now);
if (ktime_equal(mirror->start_time, notime))
mirror->start_time = now;
- if (ktime_equal(mirror->last_report_time, notime))
- mirror->last_report_time = now;
if (mirror->report_interval != 0)
report_interval = (s64)mirror->report_interval * 1000LL;
else if (layoutstats_timer != 0)
report_interval = (s64)layoutstats_timer * 1000LL;
- if (ktime_to_ms(ktime_sub(now, mirror->last_report_time)) >=
+ if (ktime_to_ms(ktime_sub(now, ffl->last_report_time)) >=
report_interval) {
- mirror->last_report_time = now;
+ ffl->last_report_time = now;
return true;
}
@@ -806,11 +806,14 @@ ff_layout_choose_best_ds_for_read(struct pnfs_layout_segment *lseg,
{
struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg);
struct nfs4_pnfs_ds *ds;
+ bool fail_return = false;
int idx;
/* mirrors are sorted by efficiency */
for (idx = start_idx; idx < fls->mirror_array_cnt; idx++) {
- ds = nfs4_ff_layout_prepare_ds(lseg, idx, false);
+ if (idx+1 == fls->mirror_array_cnt)
+ fail_return = true;
+ ds = nfs4_ff_layout_prepare_ds(lseg, idx, fail_return);
if (ds) {
*best_idx = idx;
return ds;
@@ -859,6 +862,7 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio,
struct nfs4_pnfs_ds *ds;
int ds_idx;
+retry:
/* Use full layout for now */
if (!pgio->pg_lseg)
ff_layout_pg_get_read(pgio, req, false);
@@ -871,10 +875,13 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio,
ds = ff_layout_choose_best_ds_for_read(pgio->pg_lseg, 0, &ds_idx);
if (!ds) {
- if (ff_layout_no_fallback_to_mds(pgio->pg_lseg))
- goto out_pnfs;
- else
+ if (!ff_layout_no_fallback_to_mds(pgio->pg_lseg))
goto out_mds;
+ pnfs_put_lseg(pgio->pg_lseg);
+ pgio->pg_lseg = NULL;
+ /* Sleep for 1 second before retrying */
+ ssleep(1);
+ goto retry;
}
mirror = FF_LAYOUT_COMP(pgio->pg_lseg, ds_idx);
@@ -890,12 +897,6 @@ out_mds:
pnfs_put_lseg(pgio->pg_lseg);
pgio->pg_lseg = NULL;
nfs_pageio_reset_read_mds(pgio);
- return;
-
-out_pnfs:
- pnfs_set_lo_fail(pgio->pg_lseg);
- pnfs_put_lseg(pgio->pg_lseg);
- pgio->pg_lseg = NULL;
}
static void
@@ -909,6 +910,7 @@ ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio,
int i;
int status;
+retry:
if (!pgio->pg_lseg) {
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
req->wb_context,
@@ -940,10 +942,13 @@ ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio,
for (i = 0; i < pgio->pg_mirror_count; i++) {
ds = nfs4_ff_layout_prepare_ds(pgio->pg_lseg, i, true);
if (!ds) {
- if (ff_layout_no_fallback_to_mds(pgio->pg_lseg))
- goto out_pnfs;
- else
+ if (!ff_layout_no_fallback_to_mds(pgio->pg_lseg))
goto out_mds;
+ pnfs_put_lseg(pgio->pg_lseg);
+ pgio->pg_lseg = NULL;
+ /* Sleep for 1 second before retrying */
+ ssleep(1);
+ goto retry;
}
pgm = &pgio->pg_mirrors[i];
mirror = FF_LAYOUT_COMP(pgio->pg_lseg, i);
@@ -956,12 +961,6 @@ out_mds:
pnfs_put_lseg(pgio->pg_lseg);
pgio->pg_lseg = NULL;
nfs_pageio_reset_write_mds(pgio);
- return;
-
-out_pnfs:
- pnfs_set_lo_fail(pgio->pg_lseg);
- pnfs_put_lseg(pgio->pg_lseg);
- pgio->pg_lseg = NULL;
}
static unsigned int
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h
index 1bcdb15d0c41..3ee0c9fcea76 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.h
+++ b/fs/nfs/flexfilelayout/flexfilelayout.h
@@ -84,7 +84,6 @@ struct nfs4_ff_layout_mirror {
struct nfs4_ff_layoutstat read_stat;
struct nfs4_ff_layoutstat write_stat;
ktime_t start_time;
- ktime_t last_report_time;
u32 report_interval;
};
@@ -101,6 +100,7 @@ struct nfs4_flexfile_layout {
struct pnfs_ds_commit_info commit_info;
struct list_head mirrors;
struct list_head error_list; /* nfs4_ff_layout_ds_err */
+ ktime_t last_report_time; /* Layoutstat report times */
};
static inline struct nfs4_flexfile_layout *
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
index 0aa36be71fce..f7a3f6b05369 100644
--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
@@ -17,8 +17,8 @@
#define NFSDBG_FACILITY NFSDBG_PNFS_LD
-static unsigned int dataserver_timeo = NFS4_DEF_DS_TIMEO;
-static unsigned int dataserver_retrans = NFS4_DEF_DS_RETRANS;
+static unsigned int dataserver_timeo = NFS_DEF_TCP_RETRANS;
+static unsigned int dataserver_retrans;
void nfs4_ff_layout_put_deviceid(struct nfs4_ff_layout_ds *mirror_ds)
{
@@ -379,7 +379,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
devid = &mirror->mirror_ds->id_node;
if (ff_layout_test_devid_unavailable(devid))
- goto out;
+ goto out_fail;
ds = mirror->mirror_ds->ds;
/* matching smp_wmb() in _nfs4_pnfs_v3/4_ds_connect */
@@ -405,15 +405,16 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
mirror->mirror_ds->ds_versions[0].rsize = max_payload;
if (mirror->mirror_ds->ds_versions[0].wsize > max_payload)
mirror->mirror_ds->ds_versions[0].wsize = max_payload;
- } else {
- ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout),
- mirror, lseg->pls_range.offset,
- lseg->pls_range.length, NFS4ERR_NXIO,
- OP_ILLEGAL, GFP_NOIO);
- if (fail_return || !ff_layout_has_available_ds(lseg))
- pnfs_error_mark_layout_for_return(ino, lseg);
- ds = NULL;
+ goto out;
}
+ ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout),
+ mirror, lseg->pls_range.offset,
+ lseg->pls_range.length, NFS4ERR_NXIO,
+ OP_ILLEGAL, GFP_NOIO);
+out_fail:
+ if (fail_return || !ff_layout_has_available_ds(lseg))
+ pnfs_error_mark_layout_for_return(ino, lseg);
+ ds = NULL;
out:
return ds;
}
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 7ce5e023c3c3..74935a19e4bf 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -58,6 +58,9 @@ struct nfs_clone_mount {
*/
#define NFS_UNSPEC_PORT (-1)
+#define NFS_UNSPEC_RETRANS (UINT_MAX)
+#define NFS_UNSPEC_TIMEO (UINT_MAX)
+
/*
* Maximum number of pages that readdir can use for creating
* a vmapped array of pages.
@@ -156,7 +159,7 @@ struct nfs_client *nfs_get_client(const struct nfs_client_initdata *,
int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *, struct nfs_fattr *);
void nfs_server_insert_lists(struct nfs_server *);
void nfs_server_remove_lists(struct nfs_server *);
-void nfs_init_timeout_values(struct rpc_timeout *, int, unsigned int, unsigned int);
+void nfs_init_timeout_values(struct rpc_timeout *to, int proto, int timeo, int retrans);
int nfs_init_server_rpcclient(struct nfs_server *, const struct rpc_timeout *t,
rpc_authflavor_t);
struct nfs_server *nfs_alloc_server(void);
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 6f4752734804..64b43b4ad9dd 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -318,10 +318,22 @@ static void
nfs42_layoutstat_prepare(struct rpc_task *task, void *calldata)
{
struct nfs42_layoutstat_data *data = calldata;
- struct nfs_server *server = NFS_SERVER(data->args.inode);
+ struct inode *inode = data->inode;
+ struct nfs_server *server = NFS_SERVER(inode);
+ struct pnfs_layout_hdr *lo;
+ spin_lock(&inode->i_lock);
+ lo = NFS_I(inode)->layout;
+ if (!pnfs_layout_is_valid(lo)) {
+ spin_unlock(&inode->i_lock);
+ rpc_exit(task, 0);
+ return;
+ }
+ nfs4_stateid_copy(&data->args.stateid, &lo->plh_stateid);
+ spin_unlock(&inode->i_lock);
nfs41_setup_sequence(nfs4_get_session(server), &data->args.seq_args,
&data->res.seq_res, task);
+
}
static void
@@ -341,11 +353,11 @@ nfs42_layoutstat_done(struct rpc_task *task, void *calldata)
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_DELEG_REVOKED:
case -NFS4ERR_STALE_STATEID:
- case -NFS4ERR_OLD_STATEID:
case -NFS4ERR_BAD_STATEID:
spin_lock(&inode->i_lock);
lo = NFS_I(inode)->layout;
- if (lo && nfs4_stateid_match(&data->args.stateid,
+ if (pnfs_layout_is_valid(lo) &&
+ nfs4_stateid_match(&data->args.stateid,
&lo->plh_stateid)) {
LIST_HEAD(head);
@@ -359,11 +371,23 @@ nfs42_layoutstat_done(struct rpc_task *task, void *calldata)
} else
spin_unlock(&inode->i_lock);
break;
+ case -NFS4ERR_OLD_STATEID:
+ spin_lock(&inode->i_lock);
+ lo = NFS_I(inode)->layout;
+ if (pnfs_layout_is_valid(lo) &&
+ nfs4_stateid_match_other(&data->args.stateid,
+ &lo->plh_stateid)) {
+ /* Do we need to delay before resending? */
+ if (!nfs4_stateid_is_newer(&lo->plh_stateid,
+ &data->args.stateid))
+ rpc_delay(task, HZ);
+ rpc_restart_call_prepare(task);
+ }
+ spin_unlock(&inode->i_lock);
+ break;
case -ENOTSUPP:
case -EOPNOTSUPP:
NFS_SERVER(inode)->caps &= ~NFS_CAP_LAYOUTSTATS;
- default:
- break;
}
dprintk("%s server returns %d\n", __func__, task->tk_status);
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 8d7d08d4f95f..cd3b7cfdde16 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -817,6 +817,11 @@ static int nfs4_set_client(struct nfs_server *server,
goto error;
}
+ if (server->nfs_client == clp) {
+ error = -ELOOP;
+ goto error;
+ }
+
/*
* Query for the lease time on clientid setup or renewal
*
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 1949bbd806eb..a9dec32ba9ba 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -634,15 +634,11 @@ out_sleep:
}
EXPORT_SYMBOL_GPL(nfs40_setup_sequence);
-static int nfs40_sequence_done(struct rpc_task *task,
- struct nfs4_sequence_res *res)
+static void nfs40_sequence_free_slot(struct nfs4_sequence_res *res)
{
struct nfs4_slot *slot = res->sr_slot;
struct nfs4_slot_table *tbl;
- if (slot == NULL)
- goto out;
-
tbl = slot->table;
spin_lock(&tbl->slot_tbl_lock);
if (!nfs41_wake_and_assign_slot(tbl, slot))
@@ -650,7 +646,13 @@ static int nfs40_sequence_done(struct rpc_task *task,
spin_unlock(&tbl->slot_tbl_lock);
res->sr_slot = NULL;
-out:
+}
+
+static int nfs40_sequence_done(struct rpc_task *task,
+ struct nfs4_sequence_res *res)
+{
+ if (res->sr_slot != NULL)
+ nfs40_sequence_free_slot(res);
return 1;
}
@@ -666,6 +668,11 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
tbl = slot->table;
session = tbl->session;
+ /* Bump the slot sequence number */
+ if (slot->seq_done)
+ slot->seq_nr++;
+ slot->seq_done = 0;
+
spin_lock(&tbl->slot_tbl_lock);
/* Be nice to the server: try to ensure that the last transmitted
* value for highest_user_slotid <= target_highest_slotid
@@ -686,9 +693,12 @@ out_unlock:
res->sr_slot = NULL;
if (send_new_highest_used_slotid)
nfs41_notify_server(session->clp);
+ if (waitqueue_active(&tbl->slot_waitq))
+ wake_up_all(&tbl->slot_waitq);
}
-int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res)
+static int nfs41_sequence_process(struct rpc_task *task,
+ struct nfs4_sequence_res *res)
{
struct nfs4_session *session;
struct nfs4_slot *slot = res->sr_slot;
@@ -714,7 +724,7 @@ int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res)
switch (res->sr_status) {
case 0:
/* Update the slot's sequence and clientid lease timer */
- ++slot->seq_nr;
+ slot->seq_done = 1;
clp = session->clp;
do_renew_lease(clp, res->sr_timestamp);
/* Check sequence flags */
@@ -769,16 +779,16 @@ int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res)
goto retry_nowait;
default:
/* Just update the slot sequence no. */
- ++slot->seq_nr;
+ slot->seq_done = 1;
}
out:
/* The session may be reset by one of the error handlers. */
dprintk("%s: Error %d free the slot \n", __func__, res->sr_status);
- nfs41_sequence_free_slot(res);
out_noaction:
return ret;
retry_nowait:
if (rpc_restart_call_prepare(task)) {
+ nfs41_sequence_free_slot(res);
task->tk_status = 0;
ret = 0;
}
@@ -789,8 +799,37 @@ out_retry:
rpc_delay(task, NFS4_POLL_RETRY_MAX);
return 0;
}
+
+int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res)
+{
+ if (!nfs41_sequence_process(task, res))
+ return 0;
+ if (res->sr_slot != NULL)
+ nfs41_sequence_free_slot(res);
+ return 1;
+
+}
EXPORT_SYMBOL_GPL(nfs41_sequence_done);
+static int nfs4_sequence_process(struct rpc_task *task, struct nfs4_sequence_res *res)
+{
+ if (res->sr_slot == NULL)
+ return 1;
+ if (res->sr_slot->table->session != NULL)
+ return nfs41_sequence_process(task, res);
+ return nfs40_sequence_done(task, res);
+}
+
+static void nfs4_sequence_free_slot(struct nfs4_sequence_res *res)
+{
+ if (res->sr_slot != NULL) {
+ if (res->sr_slot->table->session != NULL)
+ nfs41_sequence_free_slot(res);
+ else
+ nfs40_sequence_free_slot(res);
+ }
+}
+
int nfs4_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res)
{
if (res->sr_slot == NULL)
@@ -920,6 +959,17 @@ static int nfs4_setup_sequence(const struct nfs_server *server,
args, res, task);
}
+static int nfs4_sequence_process(struct rpc_task *task, struct nfs4_sequence_res *res)
+{
+ return nfs40_sequence_done(task, res);
+}
+
+static void nfs4_sequence_free_slot(struct nfs4_sequence_res *res)
+{
+ if (res->sr_slot != NULL)
+ nfs40_sequence_free_slot(res);
+}
+
int nfs4_sequence_done(struct rpc_task *task,
struct nfs4_sequence_res *res)
{
@@ -1197,6 +1247,7 @@ static void nfs4_opendata_free(struct kref *kref)
struct super_block *sb = p->dentry->d_sb;
nfs_free_seqid(p->o_arg.seqid);
+ nfs4_sequence_free_slot(&p->o_res.seq_res);
if (p->state != NULL)
nfs4_put_open_state(p->state);
nfs4_put_state_owner(p->owner);
@@ -1656,9 +1707,14 @@ err:
static struct nfs4_state *
nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data)
{
+ struct nfs4_state *ret;
+
if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS)
- return _nfs4_opendata_reclaim_to_nfs4_state(data);
- return _nfs4_opendata_to_nfs4_state(data);
+ ret =_nfs4_opendata_reclaim_to_nfs4_state(data);
+ else
+ ret = _nfs4_opendata_to_nfs4_state(data);
+ nfs4_sequence_free_slot(&data->o_res.seq_res);
+ return ret;
}
static struct nfs_open_context *nfs4_state_find_open_context(struct nfs4_state *state)
@@ -2056,7 +2112,7 @@ static void nfs4_open_done(struct rpc_task *task, void *calldata)
data->rpc_status = task->tk_status;
- if (!nfs4_sequence_done(task, &data->o_res.seq_res))
+ if (!nfs4_sequence_process(task, &data->o_res.seq_res))
return;
if (task->tk_status == 0) {
@@ -7514,12 +7570,20 @@ static int _nfs4_proc_create_session(struct nfs_client *clp,
status = rpc_call_sync(session->clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
trace_nfs4_create_session(clp, status);
+ switch (status) {
+ case -NFS4ERR_STALE_CLIENTID:
+ case -NFS4ERR_DELAY:
+ case -ETIMEDOUT:
+ case -EACCES:
+ case -EAGAIN:
+ goto out;
+ };
+
+ clp->cl_seqid++;
if (!status) {
/* Verify the session's negotiated channel_attrs values */
status = nfs4_verify_channel_attrs(&args, &res);
/* Increment the clientid slot sequence id */
- if (clp->cl_seqid == res.seqid)
- clp->cl_seqid++;
if (status)
goto out;
nfs4_update_session(session, &res);
@@ -7864,7 +7928,7 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
struct nfs4_layoutget *lgp = calldata;
dprintk("--> %s\n", __func__);
- nfs41_sequence_done(task, &lgp->res.seq_res);
+ nfs41_sequence_process(task, &lgp->res.seq_res);
dprintk("<-- %s\n", __func__);
}
@@ -8080,6 +8144,7 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout, gfp_t gfp_flags)
/* if layoutp->len is 0, nfs4_layoutget_prepare called rpc_exit */
if (status == 0 && lgp->res.layoutp->len)
lseg = pnfs_layout_process(lgp);
+ nfs4_sequence_free_slot(&lgp->res.seq_res);
rpc_put_task(task);
dprintk("<-- %s status=%d\n", __func__, status);
if (status)
@@ -8106,7 +8171,7 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
dprintk("--> %s\n", __func__);
- if (!nfs41_sequence_done(task, &lrp->res.seq_res))
+ if (!nfs41_sequence_process(task, &lrp->res.seq_res))
return;
server = NFS_SERVER(lrp->args.inode);
@@ -8118,6 +8183,7 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
case -NFS4ERR_DELAY:
if (nfs4_async_handle_error(task, server, NULL, NULL) != -EAGAIN)
break;
+ nfs4_sequence_free_slot(&lrp->res.seq_res);
rpc_restart_call_prepare(task);
return;
}
@@ -8132,12 +8198,16 @@ static void nfs4_layoutreturn_release(void *calldata)
dprintk("--> %s\n", __func__);
spin_lock(&lo->plh_inode->i_lock);
- pnfs_mark_matching_lsegs_invalid(lo, &freeme, &lrp->args.range,
- be32_to_cpu(lrp->args.stateid.seqid));
- if (lrp->res.lrs_present && pnfs_layout_is_valid(lo))
+ if (lrp->res.lrs_present) {
+ pnfs_mark_matching_lsegs_invalid(lo, &freeme,
+ &lrp->args.range,
+ be32_to_cpu(lrp->args.stateid.seqid));
pnfs_set_layout_stateid(lo, &lrp->res.stateid, true);
+ } else
+ pnfs_mark_layout_stateid_invalid(lo, &freeme);
pnfs_clear_layoutreturn_waitbit(lo);
spin_unlock(&lo->plh_inode->i_lock);
+ nfs4_sequence_free_slot(&lrp->res.seq_res);
pnfs_free_lseg_list(&freeme);
pnfs_put_layout_hdr(lrp->args.layout);
nfs_iput_and_deactive(lrp->inode);
diff --git a/fs/nfs/nfs4session.c b/fs/nfs/nfs4session.c
index 332d06e64fa9..b62973045a3e 100644
--- a/fs/nfs/nfs4session.c
+++ b/fs/nfs/nfs4session.c
@@ -28,6 +28,7 @@ static void nfs4_init_slot_table(struct nfs4_slot_table *tbl, const char *queue)
tbl->highest_used_slotid = NFS4_NO_SLOT;
spin_lock_init(&tbl->slot_tbl_lock);
rpc_init_priority_wait_queue(&tbl->slot_tbl_waitq, queue);
+ init_waitqueue_head(&tbl->slot_waitq);
init_completion(&tbl->complete);
}
@@ -172,6 +173,58 @@ struct nfs4_slot *nfs4_lookup_slot(struct nfs4_slot_table *tbl, u32 slotid)
return ERR_PTR(-E2BIG);
}
+static int nfs4_slot_get_seqid(struct nfs4_slot_table *tbl, u32 slotid,
+ u32 *seq_nr)
+ __must_hold(&tbl->slot_tbl_lock)
+{
+ struct nfs4_slot *slot;
+
+ slot = nfs4_lookup_slot(tbl, slotid);
+ if (IS_ERR(slot))
+ return PTR_ERR(slot);
+ *seq_nr = slot->seq_nr;
+ return 0;
+}
+
+/*
+ * nfs4_slot_seqid_in_use - test if a slot sequence id is still in use
+ *
+ * Given a slot table, slot id and sequence number, determine if the
+ * RPC call in question is still in flight. This function is mainly
+ * intended for use by the callback channel.
+ */
+static bool nfs4_slot_seqid_in_use(struct nfs4_slot_table *tbl,
+ u32 slotid, u32 seq_nr)
+{
+ u32 cur_seq;
+ bool ret = false;
+
+ spin_lock(&tbl->slot_tbl_lock);
+ if (nfs4_slot_get_seqid(tbl, slotid, &cur_seq) == 0 &&
+ cur_seq == seq_nr && test_bit(slotid, tbl->used_slots))
+ ret = true;
+ spin_unlock(&tbl->slot_tbl_lock);
+ return ret;
+}
+
+/*
+ * nfs4_slot_wait_on_seqid - wait until a slot sequence id is complete
+ *
+ * Given a slot table, slot id and sequence number, wait until the
+ * corresponding RPC call completes. This function is mainly
+ * intended for use by the callback channel.
+ */
+int nfs4_slot_wait_on_seqid(struct nfs4_slot_table *tbl,
+ u32 slotid, u32 seq_nr,
+ unsigned long timeout)
+{
+ if (wait_event_timeout(tbl->slot_waitq,
+ !nfs4_slot_seqid_in_use(tbl, slotid, seq_nr),
+ timeout) == 0)
+ return -ETIMEDOUT;
+ return 0;
+}
+
/*
* nfs4_alloc_slot - efficiently look for a free slot
*
diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h
index 5b51298d1d03..f703b755351b 100644
--- a/fs/nfs/nfs4session.h
+++ b/fs/nfs/nfs4session.h
@@ -21,7 +21,8 @@ struct nfs4_slot {
unsigned long generation;
u32 slot_nr;
u32 seq_nr;
- unsigned int interrupted : 1;
+ unsigned int interrupted : 1,
+ seq_done : 1;
};
/* Sessions */
@@ -36,6 +37,7 @@ struct nfs4_slot_table {
unsigned long used_slots[SLOT_TABLE_SZ]; /* used/unused bitmap */
spinlock_t slot_tbl_lock;
struct rpc_wait_queue slot_tbl_waitq; /* allocators may wait here */
+ wait_queue_head_t slot_waitq; /* Completion wait on slot */
u32 max_slots; /* # slots in table */
u32 max_slotid; /* Max allowed slotid value */
u32 highest_used_slotid; /* sent to server on each SEQ.
@@ -78,6 +80,9 @@ extern int nfs4_setup_slot_table(struct nfs4_slot_table *tbl,
extern void nfs4_shutdown_slot_table(struct nfs4_slot_table *tbl);
extern struct nfs4_slot *nfs4_alloc_slot(struct nfs4_slot_table *tbl);
extern struct nfs4_slot *nfs4_lookup_slot(struct nfs4_slot_table *tbl, u32 slotid);
+extern int nfs4_slot_wait_on_seqid(struct nfs4_slot_table *tbl,
+ u32 slotid, u32 seq_nr,
+ unsigned long timeout);
extern bool nfs4_try_to_lock_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *slot);
extern void nfs4_free_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *slot);
extern void nfs4_slot_tbl_drain_complete(struct nfs4_slot_table *tbl);
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 70806cae0d36..2c93a85eda51 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -365,7 +365,8 @@ pnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo,
/* Matched by pnfs_get_layout_hdr in pnfs_layout_insert_lseg */
atomic_dec(&lo->plh_refcount);
if (list_empty(&lo->plh_segs)) {
- set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
+ if (atomic_read(&lo->plh_outstanding) == 0)
+ set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
}
rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq);
@@ -768,17 +769,32 @@ pnfs_destroy_all_layouts(struct nfs_client *clp)
pnfs_destroy_layouts_byclid(clp, false);
}
+static void
+pnfs_clear_layoutreturn_info(struct pnfs_layout_hdr *lo)
+{
+ lo->plh_return_iomode = 0;
+ lo->plh_return_seq = 0;
+ clear_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags);
+}
+
/* update lo->plh_stateid with new if is more recent */
void
pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new,
bool update_barrier)
{
u32 oldseq, newseq, new_barrier = 0;
- bool invalid = !pnfs_layout_is_valid(lo);
oldseq = be32_to_cpu(lo->plh_stateid.seqid);
newseq = be32_to_cpu(new->seqid);
- if (invalid || pnfs_seqid_is_newer(newseq, oldseq)) {
+
+ if (!pnfs_layout_is_valid(lo)) {
+ nfs4_stateid_copy(&lo->plh_stateid, new);
+ lo->plh_barrier = newseq;
+ pnfs_clear_layoutreturn_info(lo);
+ clear_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
+ return;
+ }
+ if (pnfs_seqid_is_newer(newseq, oldseq)) {
nfs4_stateid_copy(&lo->plh_stateid, new);
/*
* Because of wraparound, we want to keep the barrier
@@ -790,7 +806,7 @@ pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new,
new_barrier = be32_to_cpu(new->seqid);
else if (new_barrier == 0)
return;
- if (invalid || pnfs_seqid_is_newer(new_barrier, lo->plh_barrier))
+ if (pnfs_seqid_is_newer(new_barrier, lo->plh_barrier))
lo->plh_barrier = new_barrier;
}
@@ -886,19 +902,14 @@ void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo)
rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq);
}
-static void
-pnfs_clear_layoutreturn_info(struct pnfs_layout_hdr *lo)
-{
- lo->plh_return_iomode = 0;
- lo->plh_return_seq = 0;
- clear_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags);
-}
-
static bool
pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo,
nfs4_stateid *stateid,
enum pnfs_iomode *iomode)
{
+ /* Serialise LAYOUTGET/LAYOUTRETURN */
+ if (atomic_read(&lo->plh_outstanding) != 0)
+ return false;
if (test_and_set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags))
return false;
pnfs_get_layout_hdr(lo);
@@ -1555,6 +1566,7 @@ pnfs_update_layout(struct inode *ino,
}
lookup_again:
+ nfs4_client_recover_expired_lease(clp);
first = false;
spin_lock(&ino->i_lock);
lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags);
@@ -1797,16 +1809,11 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
*/
pnfs_mark_layout_stateid_invalid(lo, &free_me);
- nfs4_stateid_copy(&lo->plh_stateid, &res->stateid);
- lo->plh_barrier = be32_to_cpu(res->stateid.seqid);
+ pnfs_set_layout_stateid(lo, &res->stateid, true);
}
pnfs_get_lseg(lseg);
pnfs_layout_insert_lseg(lo, lseg, &free_me);
- if (!pnfs_layout_is_valid(lo)) {
- pnfs_clear_layoutreturn_info(lo);
- clear_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
- }
if (res->return_on_close)
@@ -2510,7 +2517,6 @@ pnfs_report_layoutstat(struct inode *inode, gfp_t gfp_flags)
data->args.fh = NFS_FH(inode);
data->args.inode = inode;
- nfs4_stateid_copy(&data->args.stateid, &hdr->plh_stateid);
status = ld->prepare_layoutstats(&data->args);
if (status)
goto out_free;
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 18d446e1a82b..d39601381adf 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -923,6 +923,8 @@ static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(void)
data = kzalloc(sizeof(*data), GFP_KERNEL);
if (data) {
+ data->timeo = NFS_UNSPEC_TIMEO;
+ data->retrans = NFS_UNSPEC_RETRANS;
data->acregmin = NFS_DEF_ACREGMIN;
data->acregmax = NFS_DEF_ACREGMAX;
data->acdirmin = NFS_DEF_ACDIRMIN;
@@ -1189,6 +1191,19 @@ static int nfs_get_option_ul(substring_t args[], unsigned long *option)
return rc;
}
+static int nfs_get_option_ul_bound(substring_t args[], unsigned long *option,
+ unsigned long l_bound, unsigned long u_bound)
+{
+ int ret;
+
+ ret = nfs_get_option_ul(args, option);
+ if (ret != 0)
+ return ret;
+ if (*option < l_bound || *option > u_bound)
+ return -ERANGE;
+ return 0;
+}
+
/*
* Error-check and convert a string of mount options from user space into
* a data structure. The whole mount string is processed; bad options are
@@ -1352,12 +1367,12 @@ static int nfs_parse_mount_options(char *raw,
mnt->bsize = option;
break;
case Opt_timeo:
- if (nfs_get_option_ul(args, &option) || option == 0)
+ if (nfs_get_option_ul_bound(args, &option, 1, INT_MAX))
goto out_invalid_value;
mnt->timeo = option;
break;
case Opt_retrans:
- if (nfs_get_option_ul(args, &option) || option == 0)
+ if (nfs_get_option_ul_bound(args, &option, 0, INT_MAX))
goto out_invalid_value;
mnt->retrans = option;
break;
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index d2f97ecca6a5..e0e5f7c3c99f 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -67,18 +67,7 @@ static int fanotify_get_response(struct fsnotify_group *group,
pr_debug("%s: group=%p event=%p\n", __func__, group, event);
- wait_event(group->fanotify_data.access_waitq, event->response ||
- atomic_read(&group->fanotify_data.bypass_perm));
-
- if (!event->response) { /* bypass_perm set */
- /*
- * Event was canceled because group is being destroyed. Remove
- * it from group's event list because we are responsible for
- * freeing the permission event.
- */
- fsnotify_remove_event(group, &event->fae.fse);
- return 0;
- }
+ wait_event(group->fanotify_data.access_waitq, event->response);
/* userspace responded, convert to something usable */
switch (event->response) {
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 8e8e6bcd1d43..a64313868d3a 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -358,16 +358,20 @@ static int fanotify_release(struct inode *ignored, struct file *file)
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
struct fanotify_perm_event_info *event, *next;
+ struct fsnotify_event *fsn_event;
/*
- * There may be still new events arriving in the notification queue
- * but since userspace cannot use fanotify fd anymore, no event can
- * enter or leave access_list by now.
+ * Stop new events from arriving in the notification queue. since
+ * userspace cannot use fanotify fd anymore, no event can enter or
+ * leave access_list by now either.
*/
- spin_lock(&group->fanotify_data.access_lock);
-
- atomic_inc(&group->fanotify_data.bypass_perm);
+ fsnotify_group_stop_queueing(group);
+ /*
+ * Process all permission events on access_list and notification queue
+ * and simulate reply from userspace.
+ */
+ spin_lock(&group->fanotify_data.access_lock);
list_for_each_entry_safe(event, next, &group->fanotify_data.access_list,
fae.fse.list) {
pr_debug("%s: found group=%p event=%p\n", __func__, group,
@@ -379,12 +383,21 @@ static int fanotify_release(struct inode *ignored, struct file *file)
spin_unlock(&group->fanotify_data.access_lock);
/*
- * Since bypass_perm is set, newly queued events will not wait for
- * access response. Wake up the already sleeping ones now.
- * synchronize_srcu() in fsnotify_destroy_group() will wait for all
- * processes sleeping in fanotify_handle_event() waiting for access
- * response and thus also for all permission events to be freed.
+ * Destroy all non-permission events. For permission events just
+ * dequeue them and set the response. They will be freed once the
+ * response is consumed and fanotify_get_response() returns.
*/
+ mutex_lock(&group->notification_mutex);
+ while (!fsnotify_notify_queue_is_empty(group)) {
+ fsn_event = fsnotify_remove_first_event(group);
+ if (!(fsn_event->mask & FAN_ALL_PERM_EVENTS))
+ fsnotify_destroy_event(group, fsn_event);
+ else
+ FANOTIFY_PE(fsn_event)->response = FAN_ALLOW;
+ }
+ mutex_unlock(&group->notification_mutex);
+
+ /* Response for all permission events it set, wakeup waiters */
wake_up(&group->fanotify_data.access_waitq);
#endif
@@ -755,7 +768,6 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
spin_lock_init(&group->fanotify_data.access_lock);
init_waitqueue_head(&group->fanotify_data.access_waitq);
INIT_LIST_HEAD(&group->fanotify_data.access_list);
- atomic_set(&group->fanotify_data.bypass_perm, 0);
#endif
switch (flags & FAN_ALL_CLASS_BITS) {
case FAN_CLASS_NOTIF:
diff --git a/fs/notify/group.c b/fs/notify/group.c
index 3e2dd85be5dd..b47f7cfdcaa4 100644
--- a/fs/notify/group.c
+++ b/fs/notify/group.c
@@ -40,6 +40,17 @@ static void fsnotify_final_destroy_group(struct fsnotify_group *group)
}
/*
+ * Stop queueing new events for this group. Once this function returns
+ * fsnotify_add_event() will not add any new events to the group's queue.
+ */
+void fsnotify_group_stop_queueing(struct fsnotify_group *group)
+{
+ mutex_lock(&group->notification_mutex);
+ group->shutdown = true;
+ mutex_unlock(&group->notification_mutex);
+}
+
+/*
* Trying to get rid of a group. Remove all marks, flush all events and release
* the group reference.
* Note that another thread calling fsnotify_clear_marks_by_group() may still
@@ -47,6 +58,14 @@ static void fsnotify_final_destroy_group(struct fsnotify_group *group)
*/
void fsnotify_destroy_group(struct fsnotify_group *group)
{
+ /*
+ * Stop queueing new events. The code below is careful enough to not
+ * require this but fanotify needs to stop queuing events even before
+ * fsnotify_destroy_group() is called and this makes the other callers
+ * of fsnotify_destroy_group() to see the same behavior.
+ */
+ fsnotify_group_stop_queueing(group);
+
/* clear all inode marks for this group, attach them to destroy_list */
fsnotify_detach_group_marks(group);
diff --git a/fs/notify/notification.c b/fs/notify/notification.c
index a95d8e037aeb..e455e83ceeeb 100644
--- a/fs/notify/notification.c
+++ b/fs/notify/notification.c
@@ -82,7 +82,8 @@ void fsnotify_destroy_event(struct fsnotify_group *group,
* Add an event to the group notification queue. The group can later pull this
* event off the queue to deal with. The function returns 0 if the event was
* added to the queue, 1 if the event was merged with some other queued event,
- * 2 if the queue of events has overflown.
+ * 2 if the event was not queued - either the queue of events has overflown
+ * or the group is shutting down.
*/
int fsnotify_add_event(struct fsnotify_group *group,
struct fsnotify_event *event,
@@ -96,6 +97,11 @@ int fsnotify_add_event(struct fsnotify_group *group,
mutex_lock(&group->notification_mutex);
+ if (group->shutdown) {
+ mutex_unlock(&group->notification_mutex);
+ return 2;
+ }
+
if (group->q_len >= group->max_events) {
ret = 2;
/* Queue overflow event only if it isn't already queued */
@@ -126,21 +132,6 @@ queue:
}
/*
- * Remove @event from group's notification queue. It is the responsibility of
- * the caller to destroy the event.
- */
-void fsnotify_remove_event(struct fsnotify_group *group,
- struct fsnotify_event *event)
-{
- mutex_lock(&group->notification_mutex);
- if (!list_empty(&event->list)) {
- list_del_init(&event->list);
- group->q_len--;
- }
- mutex_unlock(&group->notification_mutex);
-}
-
-/*
* Remove and return the first event from the notification list. It is the
* responsibility of the caller to destroy the obtained event
*/
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 7dabbc31060e..f165f867f332 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -5922,7 +5922,6 @@ bail:
}
static int ocfs2_replay_truncate_records(struct ocfs2_super *osb,
- handle_t *handle,
struct inode *data_alloc_inode,
struct buffer_head *data_alloc_bh)
{
@@ -5935,11 +5934,19 @@ static int ocfs2_replay_truncate_records(struct ocfs2_super *osb,
struct ocfs2_truncate_log *tl;
struct inode *tl_inode = osb->osb_tl_inode;
struct buffer_head *tl_bh = osb->osb_tl_bh;
+ handle_t *handle;
di = (struct ocfs2_dinode *) tl_bh->b_data;
tl = &di->id2.i_dealloc;
i = le16_to_cpu(tl->tl_used) - 1;
while (i >= 0) {
+ handle = ocfs2_start_trans(osb, OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC);
+ if (IS_ERR(handle)) {
+ status = PTR_ERR(handle);
+ mlog_errno(status);
+ goto bail;
+ }
+
/* Caller has given us at least enough credits to
* update the truncate log dinode */
status = ocfs2_journal_access_di(handle, INODE_CACHE(tl_inode), tl_bh,
@@ -5974,12 +5981,7 @@ static int ocfs2_replay_truncate_records(struct ocfs2_super *osb,
}
}
- status = ocfs2_extend_trans(handle,
- OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC);
- if (status < 0) {
- mlog_errno(status);
- goto bail;
- }
+ ocfs2_commit_trans(osb, handle);
i--;
}
@@ -5994,7 +5996,6 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
{
int status;
unsigned int num_to_flush;
- handle_t *handle;
struct inode *tl_inode = osb->osb_tl_inode;
struct inode *data_alloc_inode = NULL;
struct buffer_head *tl_bh = osb->osb_tl_bh;
@@ -6038,21 +6039,11 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
goto out_mutex;
}
- handle = ocfs2_start_trans(osb, OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC);
- if (IS_ERR(handle)) {
- status = PTR_ERR(handle);
- mlog_errno(status);
- goto out_unlock;
- }
-
- status = ocfs2_replay_truncate_records(osb, handle, data_alloc_inode,
+ status = ocfs2_replay_truncate_records(osb, data_alloc_inode,
data_alloc_bh);
if (status < 0)
mlog_errno(status);
- ocfs2_commit_trans(osb, handle);
-
-out_unlock:
brelse(data_alloc_bh);
ocfs2_inode_unlock(data_alloc_inode, 1);
@@ -6413,43 +6404,34 @@ static int ocfs2_free_cached_blocks(struct ocfs2_super *osb,
goto out_mutex;
}
- handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE);
- if (IS_ERR(handle)) {
- ret = PTR_ERR(handle);
- mlog_errno(ret);
- goto out_unlock;
- }
-
while (head) {
if (head->free_bg)
bg_blkno = head->free_bg;
else
bg_blkno = ocfs2_which_suballoc_group(head->free_blk,
head->free_bit);
+ handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE);
+ if (IS_ERR(handle)) {
+ ret = PTR_ERR(handle);
+ mlog_errno(ret);
+ goto out_unlock;
+ }
+
trace_ocfs2_free_cached_blocks(
(unsigned long long)head->free_blk, head->free_bit);
ret = ocfs2_free_suballoc_bits(handle, inode, di_bh,
head->free_bit, bg_blkno, 1);
- if (ret) {
+ if (ret)
mlog_errno(ret);
- goto out_journal;
- }
- ret = ocfs2_extend_trans(handle, OCFS2_SUBALLOC_FREE);
- if (ret) {
- mlog_errno(ret);
- goto out_journal;
- }
+ ocfs2_commit_trans(osb, handle);
tmp = head;
head = head->free_next;
kfree(tmp);
}
-out_journal:
- ocfs2_commit_trans(osb, handle);
-
out_unlock:
ocfs2_inode_unlock(inode, 1);
brelse(di_bh);
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h
index 94b18369b1cc..b95e7df5b76a 100644
--- a/fs/ocfs2/cluster/tcp_internal.h
+++ b/fs/ocfs2/cluster/tcp_internal.h
@@ -44,9 +44,6 @@
* version here in tcp_internal.h should not need to be bumped for
* filesystem locking changes.
*
- * New in version 12
- * - Negotiate hb timeout when storage is down.
- *
* New in version 11
* - Negotiation of filesystem locking in the dlm join.
*
@@ -78,7 +75,7 @@
* - full 64 bit i_size in the metadata lock lvbs
* - introduction of "rw" lock and pushing meta/data locking down
*/
-#define O2NET_PROTOCOL_VERSION 12ULL
+#define O2NET_PROTOCOL_VERSION 11ULL
struct o2net_handshake {
__be64 protocol_version;
__be64 connector_id;
diff --git a/fs/ocfs2/dlm/dlmconvert.c b/fs/ocfs2/dlm/dlmconvert.c
index cdeafb4e7ed6..0bb128659d4b 100644
--- a/fs/ocfs2/dlm/dlmconvert.c
+++ b/fs/ocfs2/dlm/dlmconvert.c
@@ -268,7 +268,6 @@ enum dlm_status dlmconvert_remote(struct dlm_ctxt *dlm,
struct dlm_lock *lock, int flags, int type)
{
enum dlm_status status;
- u8 old_owner = res->owner;
mlog(0, "type=%d, convert_type=%d, busy=%d\n", lock->ml.type,
lock->ml.convert_type, res->state & DLM_LOCK_RES_IN_PROGRESS);
@@ -335,7 +334,6 @@ enum dlm_status dlmconvert_remote(struct dlm_ctxt *dlm,
spin_lock(&res->spinlock);
res->state &= ~DLM_LOCK_RES_IN_PROGRESS;
- lock->convert_pending = 0;
/* if it failed, move it back to granted queue.
* if master returns DLM_NORMAL and then down before sending ast,
* it may have already been moved to granted queue, reset to
@@ -344,12 +342,14 @@ enum dlm_status dlmconvert_remote(struct dlm_ctxt *dlm,
if (status != DLM_NOTQUEUED)
dlm_error(status);
dlm_revert_pending_convert(res, lock);
- } else if ((res->state & DLM_LOCK_RES_RECOVERING) ||
- (old_owner != res->owner)) {
- mlog(0, "res %.*s is in recovering or has been recovered.\n",
- res->lockname.len, res->lockname.name);
+ } else if (!lock->convert_pending) {
+ mlog(0, "%s: res %.*s, owner died and lock has been moved back "
+ "to granted list, retry convert.\n",
+ dlm->name, res->lockname.len, res->lockname.name);
status = DLM_RECOVERING;
}
+
+ lock->convert_pending = 0;
bail:
spin_unlock(&res->spinlock);
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 4e7b0dc22450..0b055bfb8e86 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1506,7 +1506,8 @@ static int ocfs2_zero_partial_clusters(struct inode *inode,
u64 start, u64 len)
{
int ret = 0;
- u64 tmpend, end = start + len;
+ u64 tmpend = 0;
+ u64 end = start + len;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
unsigned int csize = osb->s_clustersize;
handle_t *handle;
@@ -1538,18 +1539,31 @@ static int ocfs2_zero_partial_clusters(struct inode *inode,
}
/*
- * We want to get the byte offset of the end of the 1st cluster.
+ * If start is on a cluster boundary and end is somewhere in another
+ * cluster, we have not COWed the cluster starting at start, unless
+ * end is also within the same cluster. So, in this case, we skip this
+ * first call to ocfs2_zero_range_for_truncate() truncate and move on
+ * to the next one.
*/
- tmpend = (u64)osb->s_clustersize + (start & ~(osb->s_clustersize - 1));
- if (tmpend > end)
- tmpend = end;
+ if ((start & (csize - 1)) != 0) {
+ /*
+ * We want to get the byte offset of the end of the 1st
+ * cluster.
+ */
+ tmpend = (u64)osb->s_clustersize +
+ (start & ~(osb->s_clustersize - 1));
+ if (tmpend > end)
+ tmpend = end;
- trace_ocfs2_zero_partial_clusters_range1((unsigned long long)start,
- (unsigned long long)tmpend);
+ trace_ocfs2_zero_partial_clusters_range1(
+ (unsigned long long)start,
+ (unsigned long long)tmpend);
- ret = ocfs2_zero_range_for_truncate(inode, handle, start, tmpend);
- if (ret)
- mlog_errno(ret);
+ ret = ocfs2_zero_range_for_truncate(inode, handle, start,
+ tmpend);
+ if (ret)
+ mlog_errno(ret);
+ }
if (tmpend < end) {
/*
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index ea47120a85ff..6ad3533940ba 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -1199,14 +1199,24 @@ retry:
inode_unlock((*ac)->ac_inode);
ret = ocfs2_try_to_free_truncate_log(osb, bits_wanted);
- if (ret == 1)
+ if (ret == 1) {
+ iput((*ac)->ac_inode);
+ (*ac)->ac_inode = NULL;
goto retry;
+ }
if (ret < 0)
mlog_errno(ret);
inode_lock((*ac)->ac_inode);
- ocfs2_inode_lock((*ac)->ac_inode, NULL, 1);
+ ret = ocfs2_inode_lock((*ac)->ac_inode, NULL, 1);
+ if (ret < 0) {
+ mlog_errno(ret);
+ inode_unlock((*ac)->ac_inode);
+ iput((*ac)->ac_inode);
+ (*ac)->ac_inode = NULL;
+ goto bail;
+ }
}
if (status < 0) {
if (status != -ENOSPC)
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 54e5d6681786..43fdc2765aea 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -80,6 +80,8 @@ int ovl_copy_xattr(struct dentry *old, struct dentry *new)
}
for (name = buf; name < (buf + list_size); name += strlen(name) + 1) {
+ if (ovl_is_private_xattr(name))
+ continue;
retry:
size = vfs_getxattr(old, name, value, value_size);
if (size == -ERANGE)
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index 12bcd07b9e32..1560fdc09a5f 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -12,6 +12,8 @@
#include <linux/xattr.h>
#include <linux/security.h>
#include <linux/cred.h>
+#include <linux/posix_acl.h>
+#include <linux/posix_acl_xattr.h>
#include "overlayfs.h"
void ovl_cleanup(struct inode *wdir, struct dentry *wdentry)
@@ -186,6 +188,9 @@ static int ovl_create_upper(struct dentry *dentry, struct inode *inode,
struct dentry *newdentry;
int err;
+ if (!hardlink && !IS_POSIXACL(udir))
+ stat->mode &= ~current_umask();
+
inode_lock_nested(udir, I_MUTEX_PARENT);
newdentry = lookup_one_len(dentry->d_name.name, upperdir,
dentry->d_name.len);
@@ -335,6 +340,32 @@ out_free:
return ret;
}
+static int ovl_set_upper_acl(struct dentry *upperdentry, const char *name,
+ const struct posix_acl *acl)
+{
+ void *buffer;
+ size_t size;
+ int err;
+
+ if (!IS_ENABLED(CONFIG_FS_POSIX_ACL) || !acl)
+ return 0;
+
+ size = posix_acl_to_xattr(NULL, acl, NULL, 0);
+ buffer = kmalloc(size, GFP_KERNEL);
+ if (!buffer)
+ return -ENOMEM;
+
+ size = posix_acl_to_xattr(&init_user_ns, acl, buffer, size);
+ err = size;
+ if (err < 0)
+ goto out_free;
+
+ err = vfs_setxattr(upperdentry, name, buffer, size, XATTR_CREATE);
+out_free:
+ kfree(buffer);
+ return err;
+}
+
static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode,
struct kstat *stat, const char *link,
struct dentry *hardlink)
@@ -346,10 +377,18 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode,
struct dentry *upper;
struct dentry *newdentry;
int err;
+ struct posix_acl *acl, *default_acl;
if (WARN_ON(!workdir))
return -EROFS;
+ if (!hardlink) {
+ err = posix_acl_create(dentry->d_parent->d_inode,
+ &stat->mode, &default_acl, &acl);
+ if (err)
+ return err;
+ }
+
err = ovl_lock_rename_workdir(workdir, upperdir);
if (err)
goto out;
@@ -384,6 +423,17 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode,
if (err)
goto out_cleanup;
}
+ if (!hardlink) {
+ err = ovl_set_upper_acl(newdentry, XATTR_NAME_POSIX_ACL_ACCESS,
+ acl);
+ if (err)
+ goto out_cleanup;
+
+ err = ovl_set_upper_acl(newdentry, XATTR_NAME_POSIX_ACL_DEFAULT,
+ default_acl);
+ if (err)
+ goto out_cleanup;
+ }
if (!hardlink && S_ISDIR(stat->mode)) {
err = ovl_set_opaque(newdentry);
@@ -410,6 +460,10 @@ out_dput:
out_unlock:
unlock_rename(workdir, upperdir);
out:
+ if (!hardlink) {
+ posix_acl_release(acl);
+ posix_acl_release(default_acl);
+ }
return err;
out_cleanup:
@@ -950,9 +1004,9 @@ const struct inode_operations ovl_dir_inode_operations = {
.permission = ovl_permission,
.getattr = ovl_dir_getattr,
.setxattr = generic_setxattr,
- .getxattr = ovl_getxattr,
+ .getxattr = generic_getxattr,
.listxattr = ovl_listxattr,
- .removexattr = ovl_removexattr,
+ .removexattr = generic_removexattr,
.get_acl = ovl_get_acl,
.update_time = ovl_update_time,
};
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index 1b885c156028..c75625c1efa3 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -10,6 +10,7 @@
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/xattr.h>
+#include <linux/posix_acl.h>
#include "overlayfs.h"
static int ovl_copy_up_truncate(struct dentry *dentry)
@@ -191,32 +192,44 @@ static int ovl_readlink(struct dentry *dentry, char __user *buf, int bufsiz)
return err;
}
-static bool ovl_is_private_xattr(const char *name)
+bool ovl_is_private_xattr(const char *name)
{
-#define OVL_XATTR_PRE_NAME OVL_XATTR_PREFIX "."
- return strncmp(name, OVL_XATTR_PRE_NAME,
- sizeof(OVL_XATTR_PRE_NAME) - 1) == 0;
+ return strncmp(name, OVL_XATTR_PREFIX,
+ sizeof(OVL_XATTR_PREFIX) - 1) == 0;
}
-int ovl_setxattr(struct dentry *dentry, struct inode *inode,
- const char *name, const void *value,
- size_t size, int flags)
+int ovl_xattr_set(struct dentry *dentry, const char *name, const void *value,
+ size_t size, int flags)
{
int err;
- struct dentry *upperdentry;
+ struct path realpath;
+ enum ovl_path_type type = ovl_path_real(dentry, &realpath);
const struct cred *old_cred;
err = ovl_want_write(dentry);
if (err)
goto out;
+ if (!value && !OVL_TYPE_UPPER(type)) {
+ err = vfs_getxattr(realpath.dentry, name, NULL, 0);
+ if (err < 0)
+ goto out_drop_write;
+ }
+
err = ovl_copy_up(dentry);
if (err)
goto out_drop_write;
- upperdentry = ovl_dentry_upper(dentry);
+ if (!OVL_TYPE_UPPER(type))
+ ovl_path_upper(dentry, &realpath);
+
old_cred = ovl_override_creds(dentry->d_sb);
- err = vfs_setxattr(upperdentry, name, value, size, flags);
+ if (value)
+ err = vfs_setxattr(realpath.dentry, name, value, size, flags);
+ else {
+ WARN_ON(flags != XATTR_REPLACE);
+ err = vfs_removexattr(realpath.dentry, name);
+ }
revert_creds(old_cred);
out_drop_write:
@@ -225,16 +238,13 @@ out:
return err;
}
-ssize_t ovl_getxattr(struct dentry *dentry, struct inode *inode,
- const char *name, void *value, size_t size)
+int ovl_xattr_get(struct dentry *dentry, const char *name,
+ void *value, size_t size)
{
struct dentry *realdentry = ovl_dentry_real(dentry);
ssize_t res;
const struct cred *old_cred;
- if (ovl_is_private_xattr(name))
- return -ENODATA;
-
old_cred = ovl_override_creds(dentry->d_sb);
res = vfs_getxattr(realdentry, name, value, size);
revert_creds(old_cred);
@@ -245,7 +255,8 @@ ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
{
struct dentry *realdentry = ovl_dentry_real(dentry);
ssize_t res;
- int off;
+ size_t len;
+ char *s;
const struct cred *old_cred;
old_cred = ovl_override_creds(dentry->d_sb);
@@ -255,73 +266,39 @@ ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
return res;
/* filter out private xattrs */
- for (off = 0; off < res;) {
- char *s = list + off;
- size_t slen = strlen(s) + 1;
+ for (s = list, len = res; len;) {
+ size_t slen = strnlen(s, len) + 1;
- BUG_ON(off + slen > res);
+ /* underlying fs providing us with an broken xattr list? */
+ if (WARN_ON(slen > len))
+ return -EIO;
+ len -= slen;
if (ovl_is_private_xattr(s)) {
res -= slen;
- memmove(s, s + slen, res - off);
+ memmove(s, s + slen, len);
} else {
- off += slen;
+ s += slen;
}
}
return res;
}
-int ovl_removexattr(struct dentry *dentry, const char *name)
-{
- int err;
- struct path realpath;
- enum ovl_path_type type = ovl_path_real(dentry, &realpath);
- const struct cred *old_cred;
-
- err = ovl_want_write(dentry);
- if (err)
- goto out;
-
- err = -ENODATA;
- if (ovl_is_private_xattr(name))
- goto out_drop_write;
-
- if (!OVL_TYPE_UPPER(type)) {
- err = vfs_getxattr(realpath.dentry, name, NULL, 0);
- if (err < 0)
- goto out_drop_write;
-
- err = ovl_copy_up(dentry);
- if (err)
- goto out_drop_write;
-
- ovl_path_upper(dentry, &realpath);
- }
-
- old_cred = ovl_override_creds(dentry->d_sb);
- err = vfs_removexattr(realpath.dentry, name);
- revert_creds(old_cred);
-out_drop_write:
- ovl_drop_write(dentry);
-out:
- return err;
-}
-
struct posix_acl *ovl_get_acl(struct inode *inode, int type)
{
struct inode *realinode = ovl_inode_real(inode, NULL);
const struct cred *old_cred;
struct posix_acl *acl;
- if (!IS_POSIXACL(realinode))
+ if (!IS_ENABLED(CONFIG_FS_POSIX_ACL) || !IS_POSIXACL(realinode))
return NULL;
if (!realinode->i_op->get_acl)
return NULL;
old_cred = ovl_override_creds(inode->i_sb);
- acl = realinode->i_op->get_acl(realinode, type);
+ acl = get_acl(realinode, type);
revert_creds(old_cred);
return acl;
@@ -391,9 +368,9 @@ static const struct inode_operations ovl_file_inode_operations = {
.permission = ovl_permission,
.getattr = ovl_getattr,
.setxattr = generic_setxattr,
- .getxattr = ovl_getxattr,
+ .getxattr = generic_getxattr,
.listxattr = ovl_listxattr,
- .removexattr = ovl_removexattr,
+ .removexattr = generic_removexattr,
.get_acl = ovl_get_acl,
.update_time = ovl_update_time,
};
@@ -404,9 +381,9 @@ static const struct inode_operations ovl_symlink_inode_operations = {
.readlink = ovl_readlink,
.getattr = ovl_getattr,
.setxattr = generic_setxattr,
- .getxattr = ovl_getxattr,
+ .getxattr = generic_getxattr,
.listxattr = ovl_listxattr,
- .removexattr = ovl_removexattr,
+ .removexattr = generic_removexattr,
.update_time = ovl_update_time,
};
@@ -415,6 +392,9 @@ static void ovl_fill_inode(struct inode *inode, umode_t mode)
inode->i_ino = get_next_ino();
inode->i_mode = mode;
inode->i_flags |= S_NOCMTIME;
+#ifdef CONFIG_FS_POSIX_ACL
+ inode->i_acl = inode->i_default_acl = ACL_DONT_CACHE;
+#endif
mode &= S_IFMT;
switch (mode) {
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index e4f5c9536bfe..5813ccff8cd9 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -24,8 +24,8 @@ enum ovl_path_type {
(OVL_TYPE_MERGE(type) || !OVL_TYPE_UPPER(type))
-#define OVL_XATTR_PREFIX XATTR_TRUSTED_PREFIX "overlay"
-#define OVL_XATTR_OPAQUE OVL_XATTR_PREFIX ".opaque"
+#define OVL_XATTR_PREFIX XATTR_TRUSTED_PREFIX "overlay."
+#define OVL_XATTR_OPAQUE OVL_XATTR_PREFIX "opaque"
#define OVL_ISUPPER_MASK 1UL
@@ -179,20 +179,21 @@ int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list);
void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list);
void ovl_cache_free(struct list_head *list);
int ovl_check_d_type_supported(struct path *realpath);
+void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt,
+ struct dentry *dentry, int level);
/* inode.c */
int ovl_setattr(struct dentry *dentry, struct iattr *attr);
int ovl_permission(struct inode *inode, int mask);
-int ovl_setxattr(struct dentry *dentry, struct inode *inode,
- const char *name, const void *value,
- size_t size, int flags);
-ssize_t ovl_getxattr(struct dentry *dentry, struct inode *inode,
- const char *name, void *value, size_t size);
+int ovl_xattr_set(struct dentry *dentry, const char *name, const void *value,
+ size_t size, int flags);
+int ovl_xattr_get(struct dentry *dentry, const char *name,
+ void *value, size_t size);
ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size);
-int ovl_removexattr(struct dentry *dentry, const char *name);
struct posix_acl *ovl_get_acl(struct inode *inode, int type);
int ovl_open_maybe_copy_up(struct dentry *dentry, unsigned int file_flags);
int ovl_update_time(struct inode *inode, struct timespec *ts, int flags);
+bool ovl_is_private_xattr(const char *name);
struct inode *ovl_new_inode(struct super_block *sb, umode_t mode);
struct inode *ovl_get_inode(struct super_block *sb, struct inode *realinode);
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index cf37fc76fc9f..f241b4ee3d8a 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -248,7 +248,7 @@ static inline int ovl_dir_read(struct path *realpath,
err = rdd->err;
} while (!err && rdd->count);
- if (!err && rdd->first_maybe_whiteout)
+ if (!err && rdd->first_maybe_whiteout && rdd->dentry)
err = ovl_check_whiteouts(realpath->dentry, rdd);
fput(realfile);
@@ -606,3 +606,64 @@ int ovl_check_d_type_supported(struct path *realpath)
return rdd.d_type_supported;
}
+
+static void ovl_workdir_cleanup_recurse(struct path *path, int level)
+{
+ int err;
+ struct inode *dir = path->dentry->d_inode;
+ LIST_HEAD(list);
+ struct ovl_cache_entry *p;
+ struct ovl_readdir_data rdd = {
+ .ctx.actor = ovl_fill_merge,
+ .dentry = NULL,
+ .list = &list,
+ .root = RB_ROOT,
+ .is_lowest = false,
+ };
+
+ err = ovl_dir_read(path, &rdd);
+ if (err)
+ goto out;
+
+ inode_lock_nested(dir, I_MUTEX_PARENT);
+ list_for_each_entry(p, &list, l_node) {
+ struct dentry *dentry;
+
+ if (p->name[0] == '.') {
+ if (p->len == 1)
+ continue;
+ if (p->len == 2 && p->name[1] == '.')
+ continue;
+ }
+ dentry = lookup_one_len(p->name, path->dentry, p->len);
+ if (IS_ERR(dentry))
+ continue;
+ if (dentry->d_inode)
+ ovl_workdir_cleanup(dir, path->mnt, dentry, level);
+ dput(dentry);
+ }
+ inode_unlock(dir);
+out:
+ ovl_cache_free(&list);
+}
+
+void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt,
+ struct dentry *dentry, int level)
+{
+ int err;
+
+ if (!d_is_dir(dentry) || level > 1) {
+ ovl_cleanup(dir, dentry);
+ return;
+ }
+
+ err = ovl_do_rmdir(dir, dentry);
+ if (err) {
+ struct path path = { .mnt = mnt, .dentry = dentry };
+
+ inode_unlock(dir);
+ ovl_workdir_cleanup_recurse(&path, level + 1);
+ inode_lock_nested(dir, I_MUTEX_PARENT);
+ ovl_cleanup(dir, dentry);
+ }
+}
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 4036132842b5..e2a94a26767b 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -814,6 +814,10 @@ retry:
struct kstat stat = {
.mode = S_IFDIR | 0,
};
+ struct iattr attr = {
+ .ia_valid = ATTR_MODE,
+ .ia_mode = stat.mode,
+ };
if (work->d_inode) {
err = -EEXIST;
@@ -821,7 +825,7 @@ retry:
goto out_dput;
retried = true;
- ovl_cleanup(dir, work);
+ ovl_workdir_cleanup(dir, mnt, work, 0);
dput(work);
goto retry;
}
@@ -829,6 +833,21 @@ retry:
err = ovl_create_real(dir, work, &stat, NULL, NULL, true);
if (err)
goto out_dput;
+
+ err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_DEFAULT);
+ if (err && err != -ENODATA && err != -EOPNOTSUPP)
+ goto out_dput;
+
+ err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_ACCESS);
+ if (err && err != -ENODATA && err != -EOPNOTSUPP)
+ goto out_dput;
+
+ /* Clear any inherited mode bits */
+ inode_lock(work->d_inode);
+ err = notify_change(work, &attr, NULL);
+ inode_unlock(work->d_inode);
+ if (err)
+ goto out_dput;
}
out_unlock:
inode_unlock(dir);
@@ -967,10 +986,19 @@ static unsigned int ovl_split_lowerdirs(char *str)
return ctr;
}
-static int ovl_posix_acl_xattr_set(const struct xattr_handler *handler,
- struct dentry *dentry, struct inode *inode,
- const char *name, const void *value,
- size_t size, int flags)
+static int __maybe_unused
+ovl_posix_acl_xattr_get(const struct xattr_handler *handler,
+ struct dentry *dentry, struct inode *inode,
+ const char *name, void *buffer, size_t size)
+{
+ return ovl_xattr_get(dentry, handler->name, buffer, size);
+}
+
+static int __maybe_unused
+ovl_posix_acl_xattr_set(const struct xattr_handler *handler,
+ struct dentry *dentry, struct inode *inode,
+ const char *name, const void *value,
+ size_t size, int flags)
{
struct dentry *workdir = ovl_workdir(dentry);
struct inode *realinode = ovl_inode_real(inode, NULL);
@@ -998,19 +1026,22 @@ static int ovl_posix_acl_xattr_set(const struct xattr_handler *handler,
posix_acl_release(acl);
- return ovl_setxattr(dentry, inode, handler->name, value, size, flags);
+ err = ovl_xattr_set(dentry, handler->name, value, size, flags);
+ if (!err)
+ ovl_copyattr(ovl_inode_real(inode, NULL), inode);
+
+ return err;
out_acl_release:
posix_acl_release(acl);
return err;
}
-static int ovl_other_xattr_set(const struct xattr_handler *handler,
- struct dentry *dentry, struct inode *inode,
- const char *name, const void *value,
- size_t size, int flags)
+static int ovl_own_xattr_get(const struct xattr_handler *handler,
+ struct dentry *dentry, struct inode *inode,
+ const char *name, void *buffer, size_t size)
{
- return ovl_setxattr(dentry, inode, name, value, size, flags);
+ return -EPERM;
}
static int ovl_own_xattr_set(const struct xattr_handler *handler,
@@ -1021,42 +1052,59 @@ static int ovl_own_xattr_set(const struct xattr_handler *handler,
return -EPERM;
}
-static const struct xattr_handler ovl_posix_acl_access_xattr_handler = {
+static int ovl_other_xattr_get(const struct xattr_handler *handler,
+ struct dentry *dentry, struct inode *inode,
+ const char *name, void *buffer, size_t size)
+{
+ return ovl_xattr_get(dentry, name, buffer, size);
+}
+
+static int ovl_other_xattr_set(const struct xattr_handler *handler,
+ struct dentry *dentry, struct inode *inode,
+ const char *name, const void *value,
+ size_t size, int flags)
+{
+ return ovl_xattr_set(dentry, name, value, size, flags);
+}
+
+static const struct xattr_handler __maybe_unused
+ovl_posix_acl_access_xattr_handler = {
.name = XATTR_NAME_POSIX_ACL_ACCESS,
.flags = ACL_TYPE_ACCESS,
+ .get = ovl_posix_acl_xattr_get,
.set = ovl_posix_acl_xattr_set,
};
-static const struct xattr_handler ovl_posix_acl_default_xattr_handler = {
+static const struct xattr_handler __maybe_unused
+ovl_posix_acl_default_xattr_handler = {
.name = XATTR_NAME_POSIX_ACL_DEFAULT,
.flags = ACL_TYPE_DEFAULT,
+ .get = ovl_posix_acl_xattr_get,
.set = ovl_posix_acl_xattr_set,
};
static const struct xattr_handler ovl_own_xattr_handler = {
.prefix = OVL_XATTR_PREFIX,
+ .get = ovl_own_xattr_get,
.set = ovl_own_xattr_set,
};
static const struct xattr_handler ovl_other_xattr_handler = {
.prefix = "", /* catch all */
+ .get = ovl_other_xattr_get,
.set = ovl_other_xattr_set,
};
static const struct xattr_handler *ovl_xattr_handlers[] = {
+#ifdef CONFIG_FS_POSIX_ACL
&ovl_posix_acl_access_xattr_handler,
&ovl_posix_acl_default_xattr_handler,
+#endif
&ovl_own_xattr_handler,
&ovl_other_xattr_handler,
NULL
};
-static const struct xattr_handler *ovl_xattr_noacl_handlers[] = {
- &ovl_own_xattr_handler,
- &ovl_other_xattr_handler,
- NULL,
-};
-
static int ovl_fill_super(struct super_block *sb, void *data, int silent)
{
struct path upperpath = { NULL, NULL };
@@ -1132,7 +1180,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
err = -EINVAL;
stacklen = ovl_split_lowerdirs(lowertmp);
if (stacklen > OVL_MAX_STACK) {
- pr_err("overlayfs: too many lower directries, limit is %d\n",
+ pr_err("overlayfs: too many lower directories, limit is %d\n",
OVL_MAX_STACK);
goto out_free_lowertmp;
} else if (!ufs->config.upperdir && stacklen == 1) {
@@ -1269,10 +1317,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
sb->s_magic = OVERLAYFS_SUPER_MAGIC;
sb->s_op = &ovl_super_operations;
- if (IS_ENABLED(CONFIG_FS_POSIX_ACL))
- sb->s_xattr = ovl_xattr_handlers;
- else
- sb->s_xattr = ovl_xattr_noacl_handlers;
+ sb->s_xattr = ovl_xattr_handlers;
sb->s_root = root_dentry;
sb->s_fs_info = ufs;
sb->s_flags |= MS_POSIXACL;
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 54e270262979..ac0df4dde823 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1556,18 +1556,13 @@ static const struct file_operations proc_pid_set_comm_operations = {
static int proc_exe_link(struct dentry *dentry, struct path *exe_path)
{
struct task_struct *task;
- struct mm_struct *mm;
struct file *exe_file;
task = get_proc_task(d_inode(dentry));
if (!task)
return -ENOENT;
- mm = get_task_mm(task);
+ exe_file = get_task_exe_file(task);
put_task_struct(task);
- if (!mm)
- return -ENOENT;
- exe_file = get_mm_exe_file(mm);
- mmput(mm);
if (exe_file) {
*exe_path = exe_file->f_path;
path_get(&exe_file->f_path);
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index a939f5ed7f89..5c89a07e3d7f 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -430,6 +430,7 @@ static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff)
static ssize_t
read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
{
+ char *buf = file->private_data;
ssize_t acc = 0;
size_t size, tsz;
size_t elf_buflen;
@@ -500,23 +501,20 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
if (clear_user(buffer, tsz))
return -EFAULT;
} else if (is_vmalloc_or_module_addr((void *)start)) {
- char * elf_buf;
-
- elf_buf = kzalloc(tsz, GFP_KERNEL);
- if (!elf_buf)
- return -ENOMEM;
- vread(elf_buf, (char *)start, tsz);
+ vread(buf, (char *)start, tsz);
/* we have to zero-fill user buffer even if no read */
- if (copy_to_user(buffer, elf_buf, tsz)) {
- kfree(elf_buf);
+ if (copy_to_user(buffer, buf, tsz))
return -EFAULT;
- }
- kfree(elf_buf);
} else {
if (kern_addr_valid(start)) {
unsigned long n;
- n = copy_to_user(buffer, (char *)start, tsz);
+ /*
+ * Using bounce buffer to bypass the
+ * hardened user copy kernel text checks.
+ */
+ memcpy(buf, (char *) start, tsz);
+ n = copy_to_user(buffer, buf, tsz);
/*
* We cannot distinguish between fault on source
* and fault on destination. When this happens
@@ -549,6 +547,11 @@ static int open_kcore(struct inode *inode, struct file *filp)
{
if (!capable(CAP_SYS_RAWIO))
return -EPERM;
+
+ filp->private_data = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!filp->private_data)
+ return -ENOMEM;
+
if (kcore_need_update)
kcore_update_ram();
if (i_size_read(inode) != proc_root_kcore->size) {
@@ -559,10 +562,16 @@ static int open_kcore(struct inode *inode, struct file *filp)
return 0;
}
+static int release_kcore(struct inode *inode, struct file *file)
+{
+ kfree(file->private_data);
+ return 0;
+}
static const struct file_operations proc_kcore_operations = {
.read = read_kcore,
.open = open_kcore,
+ .release = release_kcore,
.llseek = default_llseek,
};
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 187d84ef9de9..f6fa99eca515 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -581,6 +581,8 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
mss->anonymous_thp += HPAGE_PMD_SIZE;
else if (PageSwapBacked(page))
mss->shmem_thp += HPAGE_PMD_SIZE;
+ else if (is_zone_device_page(page))
+ /* pass */;
else
VM_BUG_ON_PAGE(1, page);
smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd));
diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c
index 183a212694bf..12af0490322f 100644
--- a/fs/ramfs/file-mmu.c
+++ b/fs/ramfs/file-mmu.c
@@ -27,9 +27,17 @@
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/ramfs.h>
+#include <linux/sched.h>
#include "internal.h"
+static unsigned long ramfs_mmu_get_unmapped_area(struct file *file,
+ unsigned long addr, unsigned long len, unsigned long pgoff,
+ unsigned long flags)
+{
+ return current->mm->get_unmapped_area(file, addr, len, pgoff, flags);
+}
+
const struct file_operations ramfs_file_operations = {
.read_iter = generic_file_read_iter,
.write_iter = generic_file_write_iter,
@@ -38,6 +46,7 @@ const struct file_operations ramfs_file_operations = {
.splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
.llseek = generic_file_llseek,
+ .get_unmapped_area = ramfs_mmu_get_unmapped_area,
};
const struct inode_operations ramfs_file_inode_operations = {
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index f35523d4fa3a..b803213d1307 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -114,9 +114,15 @@ static ssize_t sysfs_kf_read(struct kernfs_open_file *of, char *buf,
* If buf != of->prealloc_buf, we don't know how
* large it is, so cannot safely pass it to ->show
*/
- if (pos || WARN_ON_ONCE(buf != of->prealloc_buf))
+ if (WARN_ON_ONCE(buf != of->prealloc_buf))
return 0;
len = ops->show(kobj, of->kn->priv, buf);
+ if (pos) {
+ if (len <= pos)
+ return 0;
+ len -= pos;
+ memmove(buf, buf + pos, len);
+ }
return min(count, len);
}
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 3dd8f1d54498..05b5243d89f6 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -2278,6 +2278,8 @@ xfs_alloc_log_agf(
offsetof(xfs_agf_t, agf_btreeblks),
offsetof(xfs_agf_t, agf_uuid),
offsetof(xfs_agf_t, agf_rmap_blocks),
+ /* needed so that we don't log the whole rest of the structure: */
+ offsetof(xfs_agf_t, agf_spare64),
sizeof(xfs_agf_t)
};
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index b5c213a051cd..08569792fe20 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -1814,6 +1814,10 @@ xfs_btree_lookup(
XFS_BTREE_STATS_INC(cur, lookup);
+ /* No such thing as a zero-level tree. */
+ if (cur->bc_nlevels == 0)
+ return -EFSCORRUPTED;
+
block = NULL;
keyno = 0;
@@ -4554,15 +4558,22 @@ xfs_btree_simple_query_range(
if (error)
goto out;
+ /* Nothing? See if there's anything to the right. */
+ if (!stat) {
+ error = xfs_btree_increment(cur, 0, &stat);
+ if (error)
+ goto out;
+ }
+
while (stat) {
/* Find the record. */
error = xfs_btree_get_rec(cur, &recp, &stat);
if (error || !stat)
break;
- cur->bc_ops->init_high_key_from_rec(&rec_key, recp);
/* Skip if high_key(rec) < low_key. */
if (firstrec) {
+ cur->bc_ops->init_high_key_from_rec(&rec_key, recp);
firstrec = false;
diff = cur->bc_ops->diff_two_keys(cur, low_key,
&rec_key);
@@ -4571,6 +4582,7 @@ xfs_btree_simple_query_range(
}
/* Stop if high_key < low_key(rec). */
+ cur->bc_ops->init_key_from_rec(&rec_key, recp);
diff = cur->bc_ops->diff_two_keys(cur, &rec_key, high_key);
if (diff > 0)
break;
diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
index 054a2032fdb3..c221d0ecd52e 100644
--- a/fs/xfs/libxfs/xfs_defer.c
+++ b/fs/xfs/libxfs/xfs_defer.c
@@ -194,7 +194,7 @@ xfs_defer_trans_abort(
/* Abort intent items. */
list_for_each_entry(dfp, &dop->dop_pending, dfp_list) {
trace_xfs_defer_pending_abort(tp->t_mountp, dfp);
- if (dfp->dfp_committed)
+ if (!dfp->dfp_done)
dfp->dfp_type->abort_intent(dfp->dfp_intent);
}
@@ -290,7 +290,6 @@ xfs_defer_finish(
struct xfs_defer_pending *dfp;
struct list_head *li;
struct list_head *n;
- void *done_item = NULL;
void *state;
int error = 0;
void (*cleanup_fn)(struct xfs_trans *, void *, int);
@@ -309,19 +308,11 @@ xfs_defer_finish(
if (error)
goto out;
- /* Mark all pending intents as committed. */
- list_for_each_entry_reverse(dfp, &dop->dop_pending, dfp_list) {
- if (dfp->dfp_committed)
- break;
- trace_xfs_defer_pending_commit((*tp)->t_mountp, dfp);
- dfp->dfp_committed = true;
- }
-
/* Log an intent-done item for the first pending item. */
dfp = list_first_entry(&dop->dop_pending,
struct xfs_defer_pending, dfp_list);
trace_xfs_defer_pending_finish((*tp)->t_mountp, dfp);
- done_item = dfp->dfp_type->create_done(*tp, dfp->dfp_intent,
+ dfp->dfp_done = dfp->dfp_type->create_done(*tp, dfp->dfp_intent,
dfp->dfp_count);
cleanup_fn = dfp->dfp_type->finish_cleanup;
@@ -331,7 +322,7 @@ xfs_defer_finish(
list_del(li);
dfp->dfp_count--;
error = dfp->dfp_type->finish_item(*tp, dop, li,
- done_item, &state);
+ dfp->dfp_done, &state);
if (error) {
/*
* Clean up after ourselves and jump out.
@@ -428,8 +419,8 @@ xfs_defer_add(
dfp = kmem_alloc(sizeof(struct xfs_defer_pending),
KM_SLEEP | KM_NOFS);
dfp->dfp_type = defer_op_types[type];
- dfp->dfp_committed = false;
dfp->dfp_intent = NULL;
+ dfp->dfp_done = NULL;
dfp->dfp_count = 0;
INIT_LIST_HEAD(&dfp->dfp_work);
list_add_tail(&dfp->dfp_list, &dop->dop_intake);
diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h
index cc3981c48296..e96533d178cf 100644
--- a/fs/xfs/libxfs/xfs_defer.h
+++ b/fs/xfs/libxfs/xfs_defer.h
@@ -30,8 +30,8 @@ struct xfs_defer_op_type;
struct xfs_defer_pending {
const struct xfs_defer_op_type *dfp_type; /* function pointers */
struct list_head dfp_list; /* pending items */
- bool dfp_committed; /* committed trans? */
void *dfp_intent; /* log intent item */
+ void *dfp_done; /* log done item */
struct list_head dfp_work; /* work items */
unsigned int dfp_count; /* # extent items */
};
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index e6a8bea0f7ba..270fb5cf4fa1 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -674,7 +674,8 @@ typedef struct xfs_agf {
#define XFS_AGF_BTREEBLKS 0x00000800
#define XFS_AGF_UUID 0x00001000
#define XFS_AGF_RMAP_BLOCKS 0x00002000
-#define XFS_AGF_NUM_BITS 14
+#define XFS_AGF_SPARE64 0x00004000
+#define XFS_AGF_NUM_BITS 15
#define XFS_AGF_ALL_BITS ((1 << XFS_AGF_NUM_BITS) - 1)
#define XFS_AGF_FLAGS \
@@ -691,7 +692,8 @@ typedef struct xfs_agf {
{ XFS_AGF_LONGEST, "LONGEST" }, \
{ XFS_AGF_BTREEBLKS, "BTREEBLKS" }, \
{ XFS_AGF_UUID, "UUID" }, \
- { XFS_AGF_RMAP_BLOCKS, "RMAP_BLOCKS" }
+ { XFS_AGF_RMAP_BLOCKS, "RMAP_BLOCKS" }, \
+ { XFS_AGF_SPARE64, "SPARE64" }
/* disk block (xfs_daddr_t) in the AG */
#define XFS_AGF_DADDR(mp) ((xfs_daddr_t)(1 << (mp)->m_sectbb_log))
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index 0e3d4f5ec33c..4aecc5fefe96 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -583,7 +583,8 @@ xfs_sb_verify(
* Only check the in progress field for the primary superblock as
* mkfs.xfs doesn't clear it from secondary superblocks.
*/
- return xfs_mount_validate_sb(mp, &sb, bp->b_bn == XFS_SB_DADDR,
+ return xfs_mount_validate_sb(mp, &sb,
+ bp->b_maps[0].bm_bn == XFS_SB_DADDR,
check_version);
}
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 607cc29bba21..b5b9bffe3520 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1611,7 +1611,7 @@ xfs_wait_buftarg(
*/
while (percpu_counter_sum(&btp->bt_io_count))
delay(100);
- drain_workqueue(btp->bt_mount->m_buf_workqueue);
+ flush_workqueue(btp->bt_mount->m_buf_workqueue);
/* loop until there is nothing left on the lru list. */
while (list_lru_count(&btp->bt_lru)) {
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 24ef83ef04de..fd6be45b3a1e 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1574,9 +1574,16 @@ xfs_fs_fill_super(
}
}
- if (xfs_sb_version_hasrmapbt(&mp->m_sb))
+ if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
+ if (mp->m_sb.sb_rblocks) {
+ xfs_alert(mp,
+ "EXPERIMENTAL reverse mapping btree not compatible with realtime device!");
+ error = -EINVAL;
+ goto out_filestream_unmount;
+ }
xfs_alert(mp,
"EXPERIMENTAL reverse mapping btree feature enabled. Use at your own risk!");
+ }
error = xfs_mountfs(mp);
if (error)
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 7e88bec3f359..d303a665dba9 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -2295,7 +2295,7 @@ DECLARE_EVENT_CLASS(xfs_defer_pending_class,
__entry->dev = mp ? mp->m_super->s_dev : 0;
__entry->type = dfp->dfp_type->type;
__entry->intent = dfp->dfp_intent;
- __entry->committed = dfp->dfp_committed;
+ __entry->committed = dfp->dfp_done != NULL;
__entry->nr = dfp->dfp_count;
),
TP_printk("dev %d:%d optype %d intent %p committed %d nr %d\n",