From d46966013bb4f1fbed7cf995828523a3d1f265c6 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 25 Apr 2019 14:26:50 +0100
Subject: afs: Add file locking tracepoints

Add two tracepoints for monitoring AFS file locking.  Firstly, add one that
follows the operational part:

    echo 1 >/sys/kernel/debug/tracing/events/afs/afs_flock_op/enable

And add a second that more follows the event-driven part:

    echo 1 >/sys/kernel/debug/tracing/events/afs/afs_flock_ev/enable

Individual file_lock structs seen by afs are tagged with debugging IDs that
are displayed in the trace log to make it easier to see what's going on,
especially as setting the first lock always seems to involve copying the
file_lock twice.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 include/trace/events/afs.h | 146 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 146 insertions(+)

(limited to 'include/trace')

diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h
index e3f005eae1f7..24c058a93e8f 100644
--- a/include/trace/events/afs.h
+++ b/include/trace/events/afs.h
@@ -152,6 +152,40 @@ enum afs_file_error {
 	afs_file_error_writeback_fail,
 };
 
+enum afs_flock_event {
+	afs_flock_acquired,
+	afs_flock_callback_break,
+	afs_flock_defer_unlock,
+	afs_flock_fail_other,
+	afs_flock_fail_perm,
+	afs_flock_no_lockers,
+	afs_flock_timestamp,
+	afs_flock_try_to_lock,
+	afs_flock_vfs_lock,
+	afs_flock_vfs_locking,
+	afs_flock_waited,
+	afs_flock_waiting,
+	afs_flock_work_extending,
+	afs_flock_work_retry,
+	afs_flock_work_unlocking,
+	afs_flock_would_block,
+};
+
+enum afs_flock_operation {
+	afs_flock_op_copy_lock,
+	afs_flock_op_flock,
+	afs_flock_op_grant,
+	afs_flock_op_lock,
+	afs_flock_op_release_lock,
+	afs_flock_op_return_ok,
+	afs_flock_op_return_eagain,
+	afs_flock_op_return_edeadlk,
+	afs_flock_op_return_error,
+	afs_flock_op_set_lock,
+	afs_flock_op_unlock,
+	afs_flock_op_wake,
+};
+
 #endif /* end __AFS_DECLARE_TRACE_ENUMS_ONCE_ONLY */
 
 /*
@@ -277,6 +311,52 @@ enum afs_file_error {
 	EM(afs_file_error_mntpt,		"MNTPT_READ_FAILED")	\
 	E_(afs_file_error_writeback_fail,	"WRITEBACK_FAILED")
 
+#define afs_flock_types							\
+	EM(F_RDLCK,				"RDLCK")		\
+	EM(F_WRLCK,				"WRLCK")		\
+	E_(F_UNLCK,				"UNLCK")
+
+#define afs_flock_states						\
+	EM(AFS_VNODE_LOCK_NONE,			"NONE")			\
+	EM(AFS_VNODE_LOCK_WAITING_FOR_CB,	"WAIT_FOR_CB")		\
+	EM(AFS_VNODE_LOCK_SETTING,		"SETTING")		\
+	EM(AFS_VNODE_LOCK_GRANTED,		"GRANTED")		\
+	EM(AFS_VNODE_LOCK_EXTENDING,		"EXTENDING")		\
+	EM(AFS_VNODE_LOCK_NEED_UNLOCK,		"NEED_UNLOCK")		\
+	E_(AFS_VNODE_LOCK_UNLOCKING,		"UNLOCKING")		\
+
+#define afs_flock_events						\
+	EM(afs_flock_acquired,			"Acquired")		\
+	EM(afs_flock_callback_break,		"Callback")		\
+	EM(afs_flock_defer_unlock,		"D-Unlock")		\
+	EM(afs_flock_fail_other,		"ErrOther")		\
+	EM(afs_flock_fail_perm,			"ErrPerm ")		\
+	EM(afs_flock_no_lockers,		"NoLocker")		\
+	EM(afs_flock_timestamp,			"Timestmp")		\
+	EM(afs_flock_try_to_lock,		"TryToLck")		\
+	EM(afs_flock_vfs_lock,			"VFSLock ")		\
+	EM(afs_flock_vfs_locking,		"VFSLking")		\
+	EM(afs_flock_waited,			"Waited  ")		\
+	EM(afs_flock_waiting,			"Waiting ")		\
+	EM(afs_flock_work_extending,		"Extendng")		\
+	EM(afs_flock_work_retry,		"Retry   ")		\
+	EM(afs_flock_work_unlocking,		"Unlcking")		\
+	E_(afs_flock_would_block,		"EWOULDBL")
+
+#define afs_flock_operations						\
+	EM(afs_flock_op_copy_lock,		"COPY    ")		\
+	EM(afs_flock_op_flock,			"->flock ")		\
+	EM(afs_flock_op_grant,			"GRANT   ")		\
+	EM(afs_flock_op_lock,			"->lock  ")		\
+	EM(afs_flock_op_release_lock,		"RELEASE ")		\
+	EM(afs_flock_op_return_ok,		"<-OK    ")		\
+	EM(afs_flock_op_return_edeadlk,		"<-EDEADL")		\
+	EM(afs_flock_op_return_eagain,		"<-EAGAIN")		\
+	EM(afs_flock_op_return_error,		"<-ERROR ")		\
+	EM(afs_flock_op_set_lock,		"SET     ")		\
+	EM(afs_flock_op_unlock,			"UNLOCK  ")		\
+	E_(afs_flock_op_wake,			"WAKE    ")
+
 /*
  * Export enum symbols via userspace.
  */
@@ -293,6 +373,8 @@ afs_edit_dir_reasons;
 afs_eproto_causes;
 afs_io_errors;
 afs_file_errors;
+afs_flock_types;
+afs_flock_operations;
 
 /*
  * Now redefine the EM() and E_() macros to map the enums to the strings that
@@ -796,6 +878,70 @@ TRACE_EVENT(afs_cm_no_server_u,
 		      __entry->call, __entry->op_id, &__entry->uuid)
 	    );
 
+TRACE_EVENT(afs_flock_ev,
+	    TP_PROTO(struct afs_vnode *vnode, struct file_lock *fl,
+		     enum afs_flock_event event, int error),
+
+	    TP_ARGS(vnode, fl, event, error),
+
+	    TP_STRUCT__entry(
+		    __field_struct(struct afs_fid,	fid		)
+		    __field(enum afs_flock_event,	event		)
+		    __field(enum afs_lock_state,	state		)
+		    __field(int,			error		)
+		    __field(unsigned int,		debug_id	)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->fid = vnode->fid;
+		    __entry->event = event;
+		    __entry->state = vnode->lock_state;
+		    __entry->error = error;
+		    __entry->debug_id = fl ? fl->fl_u.afs.debug_id : 0;
+			   ),
+
+	    TP_printk("%llx:%llx:%x %04x %s s=%s e=%d",
+		      __entry->fid.vid, __entry->fid.vnode, __entry->fid.unique,
+		      __entry->debug_id,
+		      __print_symbolic(__entry->event, afs_flock_events),
+		      __print_symbolic(__entry->state, afs_flock_states),
+		      __entry->error)
+	    );
+
+TRACE_EVENT(afs_flock_op,
+	    TP_PROTO(struct afs_vnode *vnode, struct file_lock *fl,
+		     enum afs_flock_operation op),
+
+	    TP_ARGS(vnode, fl, op),
+
+	    TP_STRUCT__entry(
+		    __field_struct(struct afs_fid,	fid		)
+		    __field(loff_t,			from		)
+		    __field(loff_t,			len		)
+		    __field(enum afs_flock_operation,	op		)
+		    __field(unsigned char,		type		)
+		    __field(unsigned int,		flags		)
+		    __field(unsigned int,		debug_id	)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->fid = vnode->fid;
+		    __entry->from = fl->fl_start;
+		    __entry->len = fl->fl_end - fl->fl_start + 1;
+		    __entry->op = op;
+		    __entry->type = fl->fl_type;
+		    __entry->flags = fl->fl_flags;
+		    __entry->debug_id = fl->fl_u.afs.debug_id;
+			   ),
+
+	    TP_printk("%llx:%llx:%x %04x %s t=%s R=%llx/%llx f=%x",
+		      __entry->fid.vid, __entry->fid.vnode, __entry->fid.unique,
+		      __entry->debug_id,
+		      __print_symbolic(__entry->op, afs_flock_operations),
+		      __print_symbolic(__entry->type, afs_flock_types),
+		      __entry->from, __entry->len, __entry->flags)
+	    );
+
 #endif /* _TRACE_AFS_H */
 
 /* This part must be outside protection */
-- 
cgit v1.2.3


From cdfb26b40dfa51127d22d171cef4fe8993cbfb55 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 25 Apr 2019 14:26:51 +0100
Subject: afs: Handle lock rpc ops failing on a file that got deleted

Holding a file lock on an AFS file does not prevent it from being deleted
on the server, so we need to handle an error resulting from that when we
try setting, extending or releasing a lock.

Fix this by adding a "deleted" lock state and cancelling the lock extension
process for that file and aborting all waiters for the lock.

Fixes: 0fafdc9f888b ("afs: Fix file locking")
Reported-by: Jonathan Billings <jsbillin@umich.edu>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/flock.c             | 62 +++++++++++++++++++++++++++++++++++++++++++---
 fs/afs/internal.h          |  1 +
 include/trace/events/afs.h |  7 +++++-
 3 files changed, 66 insertions(+), 4 deletions(-)

(limited to 'include/trace')

diff --git a/fs/afs/flock.c b/fs/afs/flock.c
index 3e06a560f66b..742038a21ef7 100644
--- a/fs/afs/flock.c
+++ b/fs/afs/flock.c
@@ -157,6 +157,28 @@ static void afs_next_locker(struct afs_vnode *vnode, int error)
 	_leave("");
 }
 
+/*
+ * Kill off all waiters in the the pending lock queue due to the vnode being
+ * deleted.
+ */
+static void afs_kill_lockers_enoent(struct afs_vnode *vnode)
+{
+	struct file_lock *p;
+
+	afs_set_lock_state(vnode, AFS_VNODE_LOCK_DELETED);
+
+	while (!list_empty(&vnode->pending_locks)) {
+		p = list_entry(vnode->pending_locks.next,
+			       struct file_lock, fl_u.afs.link);
+		list_del_init(&p->fl_u.afs.link);
+		p->fl_u.afs.state = -ENOENT;
+		wake_up(&p->fl_wait);
+	}
+
+	key_put(vnode->lock_key);
+	vnode->lock_key = NULL;
+}
+
 /*
  * Get a lock on a file
  */
@@ -278,13 +300,19 @@ again:
 		/* attempt to release the server lock; if it fails, we just
 		 * wait 5 minutes and it'll expire anyway */
 		ret = afs_release_lock(vnode, vnode->lock_key);
-		if (ret < 0)
+		if (ret < 0) {
+			trace_afs_flock_ev(vnode, NULL, afs_flock_release_fail,
+					   ret);
 			printk(KERN_WARNING "AFS:"
 			       " Failed to release lock on {%llx:%llx} error %d\n",
 			       vnode->fid.vid, vnode->fid.vnode, ret);
+		}
 
 		spin_lock(&vnode->lock);
-		afs_next_locker(vnode, 0);
+		if (ret == -ENOENT)
+			afs_kill_lockers_enoent(vnode);
+		else
+			afs_next_locker(vnode, 0);
 		spin_unlock(&vnode->lock);
 		return;
 
@@ -304,12 +332,21 @@ again:
 		ret = afs_extend_lock(vnode, key); /* RPC */
 		key_put(key);
 
-		if (ret < 0)
+		if (ret < 0) {
+			trace_afs_flock_ev(vnode, NULL, afs_flock_extend_fail,
+					   ret);
 			pr_warning("AFS: Failed to extend lock on {%llx:%llx} error %d\n",
 				   vnode->fid.vid, vnode->fid.vnode, ret);
+		}
 
 		spin_lock(&vnode->lock);
 
+		if (ret == -ENOENT) {
+			afs_kill_lockers_enoent(vnode);
+			spin_unlock(&vnode->lock);
+			return;
+		}
+
 		if (vnode->lock_state != AFS_VNODE_LOCK_EXTENDING)
 			goto again;
 		afs_set_lock_state(vnode, AFS_VNODE_LOCK_GRANTED);
@@ -333,6 +370,11 @@ again:
 		spin_unlock(&vnode->lock);
 		return;
 
+	case AFS_VNODE_LOCK_DELETED:
+		afs_kill_lockers_enoent(vnode);
+		spin_unlock(&vnode->lock);
+		return;
+
 	default:
 		/* Looks like a lock request was withdrawn. */
 		spin_unlock(&vnode->lock);
@@ -435,6 +477,10 @@ static int afs_do_setlk(struct file *file, struct file_lock *fl)
 	spin_lock(&vnode->lock);
 	list_add_tail(&fl->fl_u.afs.link, &vnode->pending_locks);
 
+	ret = -ENOENT;
+	if (vnode->lock_state == AFS_VNODE_LOCK_DELETED)
+		goto error_unlock;
+
 	/* If we've already got a lock on the server then try to move to having
 	 * the VFS grant the requested lock.  Note that this means that other
 	 * clients may get starved out.
@@ -489,6 +535,13 @@ try_to_lock:
 		afs_next_locker(vnode, ret);
 		goto error_unlock;
 
+	case -ENOENT:
+		fl->fl_u.afs.state = ret;
+		trace_afs_flock_ev(vnode, fl, afs_flock_fail_other, ret);
+		list_del_init(&fl->fl_u.afs.link);
+		afs_kill_lockers_enoent(vnode);
+		goto error_unlock;
+
 	default:
 		fl->fl_u.afs.state = ret;
 		trace_afs_flock_ev(vnode, fl, afs_flock_fail_other, ret);
@@ -638,6 +691,9 @@ static int afs_do_getlk(struct file *file, struct file_lock *fl)
 
 	_enter("");
 
+	if (vnode->lock_state == AFS_VNODE_LOCK_DELETED)
+		return -ENOENT;
+
 	fl->fl_type = F_UNLCK;
 
 	/* check local lock records first */
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 6e680783f59f..5eb6be3f73b2 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -599,6 +599,7 @@ enum afs_lock_state {
 	AFS_VNODE_LOCK_EXTENDING,	/* We're extending a lock on the server */
 	AFS_VNODE_LOCK_NEED_UNLOCK,	/* We need to unlock on the server */
 	AFS_VNODE_LOCK_UNLOCKING,	/* We're telling the server to unlock */
+	AFS_VNODE_LOCK_DELETED,		/* The vnode has been deleted whilst we have a lock */
 };
 
 /*
diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h
index 24c058a93e8f..21b896fabb2f 100644
--- a/include/trace/events/afs.h
+++ b/include/trace/events/afs.h
@@ -156,9 +156,11 @@ enum afs_flock_event {
 	afs_flock_acquired,
 	afs_flock_callback_break,
 	afs_flock_defer_unlock,
+	afs_flock_extend_fail,
 	afs_flock_fail_other,
 	afs_flock_fail_perm,
 	afs_flock_no_lockers,
+	afs_flock_release_fail,
 	afs_flock_timestamp,
 	afs_flock_try_to_lock,
 	afs_flock_vfs_lock,
@@ -323,15 +325,18 @@ enum afs_flock_operation {
 	EM(AFS_VNODE_LOCK_GRANTED,		"GRANTED")		\
 	EM(AFS_VNODE_LOCK_EXTENDING,		"EXTENDING")		\
 	EM(AFS_VNODE_LOCK_NEED_UNLOCK,		"NEED_UNLOCK")		\
-	E_(AFS_VNODE_LOCK_UNLOCKING,		"UNLOCKING")		\
+	EM(AFS_VNODE_LOCK_UNLOCKING,		"UNLOCKING")		\
+	E_(AFS_VNODE_LOCK_DELETED,		"DELETED")
 
 #define afs_flock_events						\
 	EM(afs_flock_acquired,			"Acquired")		\
 	EM(afs_flock_callback_break,		"Callback")		\
 	EM(afs_flock_defer_unlock,		"D-Unlock")		\
+	EM(afs_flock_extend_fail,		"Ext_Fail")		\
 	EM(afs_flock_fail_other,		"ErrOther")		\
 	EM(afs_flock_fail_perm,			"ErrPerm ")		\
 	EM(afs_flock_no_lockers,		"NoLocker")		\
+	EM(afs_flock_release_fail,		"Rel_Fail")		\
 	EM(afs_flock_timestamp,			"Timestmp")		\
 	EM(afs_flock_try_to_lock,		"TryToLck")		\
 	EM(afs_flock_vfs_lock,			"VFSLock ")		\
-- 
cgit v1.2.3


From 99987c560046ea178eb5aea793043deea255f185 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 25 Apr 2019 14:26:51 +0100
Subject: afs: Add directory reload tracepoint

Add a tracepoint (afs_reload_dir) to indicate when a directory is being
reloaded.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/dir.c               |  1 +
 include/trace/events/afs.h | 17 +++++++++++++++++
 2 files changed, 18 insertions(+)

(limited to 'include/trace')

diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 378a96a1116e..be5d2f932b77 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -309,6 +309,7 @@ retry:
 		goto error;
 
 	if (!test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) {
+		trace_afs_reload_dir(dvnode);
 		ret = afs_fetch_data(dvnode, key, req);
 		if (ret < 0)
 			goto error_unlock;
diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h
index 21b896fabb2f..8da9dd5bc2b6 100644
--- a/include/trace/events/afs.h
+++ b/include/trace/events/afs.h
@@ -947,6 +947,23 @@ TRACE_EVENT(afs_flock_op,
 		      __entry->from, __entry->len, __entry->flags)
 	    );
 
+TRACE_EVENT(afs_reload_dir,
+	    TP_PROTO(struct afs_vnode *vnode),
+
+	    TP_ARGS(vnode),
+
+	    TP_STRUCT__entry(
+		    __field_struct(struct afs_fid,	fid		)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->fid = vnode->fid;
+			   ),
+
+	    TP_printk("%llx:%llx:%x",
+		      __entry->fid.vid, __entry->fid.vnode, __entry->fid.unique)
+	    );
+
 #endif /* _TRACE_AFS_H */
 
 /* This part must be outside protection */
-- 
cgit v1.2.3


From 79ddbfa500b37a94fa7501e65ebdd5c0e4c7592d Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 25 Apr 2019 14:26:51 +0100
Subject: afs: Implement sillyrename for unlink and rename

Implement sillyrename for AFS unlink and rename, using the NFS variant
implementation as a basis.

Note that the asynchronous file locking extender/releaser has to be
notified with a state change to stop it complaining if there's a race
between that and the actual file deletion.

A tracepoint, afs_silly_rename, is also added to note the silly rename and
the cleanup.  The afs_edit_dir tracepoint is given some extra reason
indicators and the afs_flock_ev tracepoint is given a silly-delete file
lock cancellation indicator.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/Makefile            |   1 +
 fs/afs/dir.c               | 116 ++++++++++++++++++++--
 fs/afs/dir_silly.c         | 239 +++++++++++++++++++++++++++++++++++++++++++++
 fs/afs/flock.c             |   2 +-
 fs/afs/inode.c             |   2 +
 fs/afs/internal.h          |  10 ++
 fs/afs/super.c             |   4 +-
 include/trace/events/afs.h |  34 ++++++-
 8 files changed, 395 insertions(+), 13 deletions(-)
 create mode 100644 fs/afs/dir_silly.c

(limited to 'include/trace')

diff --git a/fs/afs/Makefile b/fs/afs/Makefile
index 0738e2bf5193..cbf31f6cd177 100644
--- a/fs/afs/Makefile
+++ b/fs/afs/Makefile
@@ -13,6 +13,7 @@ kafs-y := \
 	cmservice.o \
 	dir.o \
 	dir_edit.o \
+	dir_silly.o \
 	dynroot.o \
 	file.o \
 	flock.o \
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index be5d2f932b77..6c8523501639 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -26,6 +26,7 @@ static int afs_dir_open(struct inode *inode, struct file *file);
 static int afs_readdir(struct file *file, struct dir_context *ctx);
 static int afs_d_revalidate(struct dentry *dentry, unsigned int flags);
 static int afs_d_delete(const struct dentry *dentry);
+static void afs_d_iput(struct dentry *dentry, struct inode *inode);
 static int afs_lookup_one_filldir(struct dir_context *ctx, const char *name, int nlen,
 				  loff_t fpos, u64 ino, unsigned dtype);
 static int afs_lookup_filldir(struct dir_context *ctx, const char *name, int nlen,
@@ -85,6 +86,7 @@ const struct dentry_operations afs_fs_dentry_operations = {
 	.d_delete	= afs_d_delete,
 	.d_release	= afs_d_release,
 	.d_automount	= afs_d_automount,
+	.d_iput		= afs_d_iput,
 };
 
 struct afs_lookup_one_cookie {
@@ -1083,6 +1085,16 @@ zap:
 	return 1;
 }
 
+/*
+ * Clean up sillyrename files on dentry removal.
+ */
+static void afs_d_iput(struct dentry *dentry, struct inode *inode)
+{
+	if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
+		afs_silly_iput(dentry, inode);
+	iput(inode);
+}
+
 /*
  * handle dentry release
  */
@@ -1225,6 +1237,12 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry)
 			goto error_key;
 	}
 
+	if (vnode) {
+		ret = down_write_killable(&vnode->rmdir_lock);
+		if (ret < 0)
+			goto error_key;
+	}
+
 	ret = -ERESTARTSYS;
 	if (afs_begin_vnode_operation(&fc, dvnode, key)) {
 		while (afs_select_fileserver(&fc)) {
@@ -1243,6 +1261,8 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry)
 		}
 	}
 
+	if (vnode)
+		up_write(&vnode->rmdir_lock);
 error_key:
 	key_put(key);
 error:
@@ -1259,9 +1279,9 @@ error:
  * However, if we didn't have a callback promise outstanding, or it was
  * outstanding on a different server, then it won't break it either...
  */
-static int afs_dir_remove_link(struct dentry *dentry, struct key *key,
-			       unsigned long d_version_before,
-			       unsigned long d_version_after)
+int afs_dir_remove_link(struct dentry *dentry, struct key *key,
+			unsigned long d_version_before,
+			unsigned long d_version_after)
 {
 	bool dir_valid;
 	int ret = 0;
@@ -1308,6 +1328,7 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
 	struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode = NULL;
 	struct key *key;
 	unsigned long d_version = (unsigned long)dentry->d_fsdata;
+	bool need_rehash = false;
 	u64 data_version = dvnode->status.data_version;
 	int ret;
 
@@ -1331,6 +1352,21 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
 			goto error_key;
 	}
 
+	spin_lock(&dentry->d_lock);
+	if (vnode && d_count(dentry) > 1) {
+		spin_unlock(&dentry->d_lock);
+		/* Start asynchronous writeout of the inode */
+		write_inode_now(d_inode(dentry), 0);
+		ret = afs_sillyrename(dvnode, vnode, dentry, key);
+		goto error_key;
+	}
+	if (!d_unhashed(dentry)) {
+		/* Prevent a race with RCU lookup. */
+		__d_drop(dentry);
+		need_rehash = true;
+	}
+	spin_unlock(&dentry->d_lock);
+
 	ret = -ERESTARTSYS;
 	if (afs_begin_vnode_operation(&fc, dvnode, key)) {
 		while (afs_select_fileserver(&fc)) {
@@ -1362,6 +1398,9 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
 					    afs_edit_dir_for_unlink);
 	}
 
+	if (need_rehash && ret < 0 && ret != -ENOENT)
+		d_rehash(dentry);
+
 error_key:
 	key_put(key);
 error:
@@ -1582,6 +1621,8 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
 {
 	struct afs_fs_cursor fc;
 	struct afs_vnode *orig_dvnode, *new_dvnode, *vnode;
+	struct dentry *tmp = NULL, *rehash = NULL;
+	struct inode *new_inode;
 	struct key *key;
 	u64 orig_data_version, new_data_version;
 	bool new_negative = d_is_negative(new_dentry);
@@ -1590,6 +1631,10 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	if (flags)
 		return -EINVAL;
 
+	/* Don't allow silly-rename files be moved around. */
+	if (old_dentry->d_flags & DCACHE_NFSFS_RENAMED)
+		return -EINVAL;
+
 	vnode = AFS_FS_I(d_inode(old_dentry));
 	orig_dvnode = AFS_FS_I(old_dir);
 	new_dvnode = AFS_FS_I(new_dir);
@@ -1608,12 +1653,48 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
 		goto error;
 	}
 
+	/* For non-directories, check whether the target is busy and if so,
+	 * make a copy of the dentry and then do a silly-rename.  If the
+	 * silly-rename succeeds, the copied dentry is hashed and becomes the
+	 * new target.
+	 */
+	if (d_is_positive(new_dentry) && !d_is_dir(new_dentry)) {
+		/* To prevent any new references to the target during the
+		 * rename, we unhash the dentry in advance.
+		 */
+		if (!d_unhashed(new_dentry)) {
+			d_drop(new_dentry);
+			rehash = new_dentry;
+		}
+
+		if (d_count(new_dentry) > 2) {
+			/* copy the target dentry's name */
+			ret = -ENOMEM;
+			tmp = d_alloc(new_dentry->d_parent,
+				      &new_dentry->d_name);
+			if (!tmp)
+				goto error_rehash;
+
+			ret = afs_sillyrename(new_dvnode,
+					      AFS_FS_I(d_inode(new_dentry)),
+					      new_dentry, key);
+			if (ret)
+				goto error_rehash;
+
+			new_dentry = tmp;
+			rehash = NULL;
+			new_negative = true;
+			orig_data_version = orig_dvnode->status.data_version;
+			new_data_version = new_dvnode->status.data_version;
+		}
+	}
+
 	ret = -ERESTARTSYS;
 	if (afs_begin_vnode_operation(&fc, orig_dvnode, key)) {
 		if (orig_dvnode != new_dvnode) {
 			if (mutex_lock_interruptible_nested(&new_dvnode->io_lock, 1) < 0) {
 				afs_end_vnode_operation(&fc);
-				goto error_key;
+				goto error_rehash;
 			}
 		}
 		while (afs_select_fileserver(&fc)) {
@@ -1630,25 +1711,42 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
 			mutex_unlock(&new_dvnode->io_lock);
 		ret = afs_end_vnode_operation(&fc);
 		if (ret < 0)
-			goto error_key;
+			goto error_rehash;
 	}
 
 	if (ret == 0) {
+		if (rehash)
+			d_rehash(rehash);
 		if (test_bit(AFS_VNODE_DIR_VALID, &orig_dvnode->flags))
 		    afs_edit_dir_remove(orig_dvnode, &old_dentry->d_name,
-					afs_edit_dir_for_rename);
+					afs_edit_dir_for_rename_0);
 
 		if (!new_negative &&
 		    test_bit(AFS_VNODE_DIR_VALID, &new_dvnode->flags))
 			afs_edit_dir_remove(new_dvnode, &new_dentry->d_name,
-					    afs_edit_dir_for_rename);
+					    afs_edit_dir_for_rename_1);
 
 		if (test_bit(AFS_VNODE_DIR_VALID, &new_dvnode->flags))
 			afs_edit_dir_add(new_dvnode, &new_dentry->d_name,
-					 &vnode->fid,  afs_edit_dir_for_rename);
+					 &vnode->fid, afs_edit_dir_for_rename_2);
+
+		new_inode = d_inode(new_dentry);
+		if (new_inode) {
+			spin_lock(&new_inode->i_lock);
+			if (new_inode->i_nlink > 0)
+				drop_nlink(new_inode);
+			spin_unlock(&new_inode->i_lock);
+		}
+		d_move(old_dentry, new_dentry);
+		goto error_tmp;
 	}
 
-error_key:
+error_rehash:
+	if (rehash)
+		d_rehash(rehash);
+error_tmp:
+	if (tmp)
+		dput(tmp);
 	key_put(key);
 error:
 	_leave(" = %d", ret);
diff --git a/fs/afs/dir_silly.c b/fs/afs/dir_silly.c
new file mode 100644
index 000000000000..f6f89fdab6b2
--- /dev/null
+++ b/fs/afs/dir_silly.c
@@ -0,0 +1,239 @@
+/* AFS silly rename handling
+ *
+ * Copyright (C) 2019 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ * - Derived from NFS's sillyrename.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include <linux/fsnotify.h>
+#include "internal.h"
+
+/*
+ * Actually perform the silly rename step.
+ */
+static int afs_do_silly_rename(struct afs_vnode *dvnode, struct afs_vnode *vnode,
+			       struct dentry *old, struct dentry *new,
+			       struct key *key)
+{
+	struct afs_fs_cursor fc;
+	u64 dir_data_version = dvnode->status.data_version;
+	int ret = -ERESTARTSYS;
+
+	_enter("%pd,%pd", old, new);
+
+	trace_afs_silly_rename(vnode, false);
+	if (afs_begin_vnode_operation(&fc, dvnode, key)) {
+		while (afs_select_fileserver(&fc)) {
+			fc.cb_break = afs_calc_vnode_cb_break(dvnode);
+			afs_fs_rename(&fc, old->d_name.name,
+				      dvnode, new->d_name.name,
+				      dir_data_version, dir_data_version);
+		}
+
+		afs_vnode_commit_status(&fc, dvnode, fc.cb_break);
+		ret = afs_end_vnode_operation(&fc);
+	}
+
+	if (ret == 0) {
+		spin_lock(&old->d_lock);
+		old->d_flags |= DCACHE_NFSFS_RENAMED;
+		spin_unlock(&old->d_lock);
+		if (dvnode->silly_key != key) {
+			key_put(dvnode->silly_key);
+			dvnode->silly_key = key_get(key);
+		}
+
+		if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
+			afs_edit_dir_remove(dvnode, &old->d_name,
+					    afs_edit_dir_for_silly_0);
+		if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
+			afs_edit_dir_add(dvnode, &new->d_name,
+					 &vnode->fid, afs_edit_dir_for_silly_1);
+
+		/* vfs_unlink and the like do not issue this when a file is
+		 * sillyrenamed, so do it here.
+		 */
+		fsnotify_nameremove(old, 0);
+	}
+
+	_leave(" = %d", ret);
+	return ret;
+}
+
+/**
+ * afs_sillyrename - Perform a silly-rename of a dentry
+ *
+ * AFS is stateless and the server doesn't know when the client is holding a
+ * file open.  To prevent application problems when a file is unlinked while
+ * it's still open, the client performs a "silly-rename".  That is, it renames
+ * the file to a hidden file in the same directory, and only performs the
+ * unlink once the last reference to it is put.
+ *
+ * The final cleanup is done during dentry_iput.
+ */
+int afs_sillyrename(struct afs_vnode *dvnode, struct afs_vnode *vnode,
+		    struct dentry *dentry, struct key *key)
+{
+	static unsigned int sillycounter;
+	struct dentry *sdentry = NULL;
+	unsigned char silly[16];
+	int ret = -EBUSY;
+
+	_enter("");
+
+	/* We don't allow a dentry to be silly-renamed twice. */
+	if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
+		return -EBUSY;
+
+	sdentry = NULL;
+	do {
+		int slen;
+
+		dput(sdentry);
+		sillycounter++;
+
+		/* Create a silly name.  Note that the ".__afs" prefix is
+		 * understood by the salvager and must not be changed.
+		 */
+		slen = scnprintf(silly, sizeof(silly), ".__afs%04X", sillycounter);
+		sdentry = lookup_one_len(silly, dentry->d_parent, slen);
+
+		/* N.B. Better to return EBUSY here ... it could be dangerous
+		 * to delete the file while it's in use.
+		 */
+		if (IS_ERR(sdentry))
+			goto out;
+	} while (!d_is_negative(sdentry));
+
+	ihold(&vnode->vfs_inode);
+
+	ret = afs_do_silly_rename(dvnode, vnode, dentry, sdentry, key);
+	switch (ret) {
+	case 0:
+		/* The rename succeeded. */
+		d_move(dentry, sdentry);
+		break;
+	case -ERESTARTSYS:
+		/* The result of the rename is unknown. Play it safe by forcing
+		 * a new lookup.
+		 */
+		d_drop(dentry);
+		d_drop(sdentry);
+	}
+
+	iput(&vnode->vfs_inode);
+	dput(sdentry);
+out:
+	_leave(" = %d", ret);
+	return ret;
+}
+
+/*
+ * Tell the server to remove a sillyrename file.
+ */
+static int afs_do_silly_unlink(struct afs_vnode *dvnode, struct afs_vnode *vnode,
+			       struct dentry *dentry, struct key *key)
+{
+	struct afs_fs_cursor fc;
+	u64 dir_data_version = dvnode->status.data_version;
+	int ret = -ERESTARTSYS;
+
+	_enter("");
+
+	trace_afs_silly_rename(vnode, true);
+	if (afs_begin_vnode_operation(&fc, dvnode, key)) {
+		while (afs_select_fileserver(&fc)) {
+			fc.cb_break = afs_calc_vnode_cb_break(dvnode);
+
+			if (test_bit(AFS_SERVER_FL_IS_YFS, &fc.cbi->server->flags) &&
+			    !test_bit(AFS_SERVER_FL_NO_RM2, &fc.cbi->server->flags)) {
+				yfs_fs_remove_file2(&fc, vnode, dentry->d_name.name,
+						    dir_data_version);
+				if (fc.ac.error != -ECONNABORTED ||
+				    fc.ac.abort_code != RXGEN_OPCODE)
+					continue;
+				set_bit(AFS_SERVER_FL_NO_RM2, &fc.cbi->server->flags);
+			}
+
+			afs_fs_remove(&fc, vnode, dentry->d_name.name, false,
+				      dir_data_version);
+		}
+
+		afs_vnode_commit_status(&fc, dvnode, fc.cb_break);
+		ret = afs_end_vnode_operation(&fc);
+		if (ret == 0) {
+			drop_nlink(&vnode->vfs_inode);
+			if (vnode->vfs_inode.i_nlink == 0) {
+				set_bit(AFS_VNODE_DELETED, &vnode->flags);
+				clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
+			}
+		}
+		if (ret == 0 &&
+		    test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
+			afs_edit_dir_remove(dvnode, &dentry->d_name,
+					    afs_edit_dir_for_unlink);
+	}
+
+	_leave(" = %d", ret);
+	return ret;
+}
+
+/*
+ * Remove sillyrename file on iput.
+ */
+int afs_silly_iput(struct dentry *dentry, struct inode *inode)
+{
+	struct afs_vnode *dvnode = AFS_FS_I(d_inode(dentry->d_parent));
+	struct afs_vnode *vnode = AFS_FS_I(inode);
+	struct dentry *alias;
+	int ret;
+
+	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
+
+	_enter("%p{%pd},%llx", dentry, dentry, vnode->fid.vnode);
+
+	down_read(&dvnode->rmdir_lock);
+
+	alias = d_alloc_parallel(dentry->d_parent, &dentry->d_name, &wq);
+	if (IS_ERR(alias)) {
+		up_read(&dvnode->rmdir_lock);
+		return 0;
+	}
+
+	if (!d_in_lookup(alias)) {
+		/* We raced with lookup...  See if we need to transfer the
+		 * sillyrename information to the aliased dentry.
+		 */
+		ret = 0;
+		spin_lock(&alias->d_lock);
+		if (d_really_is_positive(alias) &&
+		    !(alias->d_flags & DCACHE_NFSFS_RENAMED)) {
+			alias->d_flags |= DCACHE_NFSFS_RENAMED;
+			ret = 1;
+		}
+		spin_unlock(&alias->d_lock);
+		up_read(&dvnode->rmdir_lock);
+		dput(alias);
+		return ret;
+	}
+
+	/* Stop lock-release from complaining. */
+	spin_lock(&vnode->lock);
+	vnode->lock_state = AFS_VNODE_LOCK_DELETED;
+	trace_afs_flock_ev(vnode, NULL, afs_flock_silly_delete, 0);
+	spin_unlock(&vnode->lock);
+
+	afs_do_silly_unlink(dvnode, vnode, dentry, dvnode->silly_key);
+	up_read(&dvnode->rmdir_lock);
+	d_lookup_done(alias);
+	dput(alias);
+	return 1;
+}
diff --git a/fs/afs/flock.c b/fs/afs/flock.c
index 742038a21ef7..325bf731d8dd 100644
--- a/fs/afs/flock.c
+++ b/fs/afs/flock.c
@@ -300,7 +300,7 @@ again:
 		/* attempt to release the server lock; if it fails, we just
 		 * wait 5 minutes and it'll expire anyway */
 		ret = afs_release_lock(vnode, vnode->lock_key);
-		if (ret < 0) {
+		if (ret < 0 && vnode->lock_state != AFS_VNODE_LOCK_DELETED) {
 			trace_afs_flock_ev(vnode, NULL, afs_flock_release_fail,
 					   ret);
 			printk(KERN_WARNING "AFS:"
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 9cedc3fc1b77..3eef20ff285b 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -543,6 +543,8 @@ void afs_evict_inode(struct inode *inode)
 #endif
 
 	afs_put_permits(rcu_access_pointer(vnode->permit_cache));
+	key_put(vnode->silly_key);
+	vnode->silly_key = NULL;
 	key_put(vnode->lock_key);
 	vnode->lock_key = NULL;
 	_leave("");
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 5eb6be3f73b2..20fd44de26ac 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -621,6 +621,8 @@ struct afs_vnode {
 	struct afs_permits __rcu *permit_cache;	/* cache of permits so far obtained */
 	struct mutex		io_lock;	/* Lock for serialising I/O on this mutex */
 	struct rw_semaphore	validate_lock;	/* lock for validating this vnode */
+	struct rw_semaphore	rmdir_lock;	/* Lock for rmdir vs sillyrename */
+	struct key		*silly_key;	/* Silly rename key */
 	spinlock_t		wb_lock;	/* lock for wb_keys */
 	spinlock_t		lock;		/* waitqueue/flags lock */
 	unsigned long		flags;
@@ -866,6 +868,7 @@ extern const struct address_space_operations afs_dir_aops;
 extern const struct dentry_operations afs_fs_dentry_operations;
 
 extern void afs_d_release(struct dentry *);
+extern int afs_dir_remove_link(struct dentry *, struct key *, unsigned long, unsigned long);
 
 /*
  * dir_edit.c
@@ -874,6 +877,13 @@ extern void afs_edit_dir_add(struct afs_vnode *, struct qstr *, struct afs_fid *
 			     enum afs_edit_dir_reason);
 extern void afs_edit_dir_remove(struct afs_vnode *, struct qstr *, enum afs_edit_dir_reason);
 
+/*
+ * dir_silly.c
+ */
+extern int afs_sillyrename(struct afs_vnode *, struct afs_vnode *,
+			   struct dentry *, struct key *);
+extern int afs_silly_iput(struct dentry *, struct inode *);
+
 /*
  * dynroot.c
  */
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 5adf012b8e27..6438849a75c4 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -45,7 +45,7 @@ struct file_system_type afs_fs_type = {
 	.init_fs_context	= afs_init_fs_context,
 	.parameters		= &afs_fs_parameters,
 	.kill_sb		= afs_kill_super,
-	.fs_flags		= 0,
+	.fs_flags		= FS_RENAME_DOES_D_MOVE,
 };
 MODULE_ALIAS_FS("afs");
 
@@ -656,6 +656,8 @@ static struct inode *afs_alloc_inode(struct super_block *sb)
 	vnode->cb_type		= 0;
 	vnode->lock_state	= AFS_VNODE_LOCK_NONE;
 
+	init_rwsem(&vnode->rmdir_lock);
+
 	_leave(" = %p", &vnode->vfs_inode);
 	return &vnode->vfs_inode;
 }
diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h
index 8da9dd5bc2b6..f67815ebb1b9 100644
--- a/include/trace/events/afs.h
+++ b/include/trace/events/afs.h
@@ -108,8 +108,12 @@ enum afs_edit_dir_reason {
 	afs_edit_dir_for_create,
 	afs_edit_dir_for_link,
 	afs_edit_dir_for_mkdir,
-	afs_edit_dir_for_rename,
+	afs_edit_dir_for_rename_0,
+	afs_edit_dir_for_rename_1,
+	afs_edit_dir_for_rename_2,
 	afs_edit_dir_for_rmdir,
+	afs_edit_dir_for_silly_0,
+	afs_edit_dir_for_silly_1,
 	afs_edit_dir_for_symlink,
 	afs_edit_dir_for_unlink,
 };
@@ -161,6 +165,7 @@ enum afs_flock_event {
 	afs_flock_fail_perm,
 	afs_flock_no_lockers,
 	afs_flock_release_fail,
+	afs_flock_silly_delete,
 	afs_flock_timestamp,
 	afs_flock_try_to_lock,
 	afs_flock_vfs_lock,
@@ -273,8 +278,12 @@ enum afs_flock_operation {
 	EM(afs_edit_dir_for_create,		"Create") \
 	EM(afs_edit_dir_for_link,		"Link  ") \
 	EM(afs_edit_dir_for_mkdir,		"MkDir ") \
-	EM(afs_edit_dir_for_rename,		"Rename") \
+	EM(afs_edit_dir_for_rename_0,		"Renam0") \
+	EM(afs_edit_dir_for_rename_1,		"Renam1") \
+	EM(afs_edit_dir_for_rename_2,		"Renam2") \
 	EM(afs_edit_dir_for_rmdir,		"RmDir ") \
+	EM(afs_edit_dir_for_silly_0,		"S_Ren0") \
+	EM(afs_edit_dir_for_silly_1,		"S_Ren1") \
 	EM(afs_edit_dir_for_symlink,		"Symlnk") \
 	E_(afs_edit_dir_for_unlink,		"Unlink")
 
@@ -337,6 +346,7 @@ enum afs_flock_operation {
 	EM(afs_flock_fail_perm,			"ErrPerm ")		\
 	EM(afs_flock_no_lockers,		"NoLocker")		\
 	EM(afs_flock_release_fail,		"Rel_Fail")		\
+	EM(afs_flock_silly_delete,		"SillyDel")		\
 	EM(afs_flock_timestamp,			"Timestmp")		\
 	EM(afs_flock_try_to_lock,		"TryToLck")		\
 	EM(afs_flock_vfs_lock,			"VFSLock ")		\
@@ -964,6 +974,26 @@ TRACE_EVENT(afs_reload_dir,
 		      __entry->fid.vid, __entry->fid.vnode, __entry->fid.unique)
 	    );
 
+TRACE_EVENT(afs_silly_rename,
+	    TP_PROTO(struct afs_vnode *vnode, bool done),
+
+	    TP_ARGS(vnode, done),
+
+	    TP_STRUCT__entry(
+		    __field_struct(struct afs_fid,	fid		)
+		    __field(bool,			done		)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->fid = vnode->fid;
+		    __entry->done = done;
+			   ),
+
+	    TP_printk("%llx:%llx:%x done=%u",
+		      __entry->fid.vid, __entry->fid.vnode, __entry->fid.unique,
+		      __entry->done)
+	    );
+
 #endif /* _TRACE_AFS_H */
 
 /* This part must be outside protection */
-- 
cgit v1.2.3


From 80548b03991f58758a336424a90bf9f988e3b077 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 25 Apr 2019 14:26:51 +0100
Subject: afs: Add more tracepoints

Add four more tracepoints:

 (1) afs_make_fs_call1 - Split from afs_make_fs_call but takes a filename
     to log also.

 (2) afs_make_fs_call2 - Like the above but takes two filenames to log.

 (3) afs_lookup - Log the result of doing a successful lookup, including a
     negative result (fid 0:0).

 (4) afs_get_tree - Log the set up of a volume for mounting.

It also extends the name buffer on the afs_edit_dir tracepoint to 24 chars
and puts quotes around the filename in the text representation.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/dir.c               |   8 ++-
 fs/afs/fsclient.c          |  10 ++--
 fs/afs/super.c             |   1 +
 fs/afs/yfsclient.c         |  14 ++---
 include/trace/events/afs.h | 146 ++++++++++++++++++++++++++++++++++++++++++++-
 5 files changed, 163 insertions(+), 16 deletions(-)

(limited to 'include/trace')

diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 6c8523501639..48706eefc63b 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -908,8 +908,14 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
 			(void *)(unsigned long)dvnode->status.data_version;
 	}
 	d = d_splice_alias(inode, dentry);
-	if (!IS_ERR_OR_NULL(d))
+	if (!IS_ERR_OR_NULL(d)) {
 		d->d_fsdata = dentry->d_fsdata;
+		trace_afs_lookup(dvnode, &d->d_name,
+				 inode ? AFS_FS_I(inode) : NULL);
+	} else {
+		trace_afs_lookup(dvnode, &dentry->d_name,
+				 inode ? AFS_FS_I(inode) : NULL);
+	}
 	return d;
 }
 
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 9d405f96cc91..be4520eb4965 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -830,7 +830,7 @@ int afs_fs_create(struct afs_fs_cursor *fc,
 	*bp++ = 0; /* segment size */
 
 	afs_use_fs_server(call, fc->cbi);
-	trace_afs_make_fs_call(call, &vnode->fid);
+	trace_afs_make_fs_call1(call, &vnode->fid, name);
 	afs_make_call(&fc->ac, call, GFP_NOFS);
 	return afs_wait_for_call_to_complete(call, &fc->ac);
 }
@@ -926,7 +926,7 @@ int afs_fs_remove(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
 	}
 
 	afs_use_fs_server(call, fc->cbi);
-	trace_afs_make_fs_call(call, &dvnode->fid);
+	trace_afs_make_fs_call1(call, &dvnode->fid, name);
 	afs_make_call(&fc->ac, call, GFP_NOFS);
 	return afs_wait_for_call_to_complete(call, &fc->ac);
 }
@@ -1019,7 +1019,7 @@ int afs_fs_link(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
 	*bp++ = htonl(vnode->fid.unique);
 
 	afs_use_fs_server(call, fc->cbi);
-	trace_afs_make_fs_call(call, &vnode->fid);
+	trace_afs_make_fs_call1(call, &vnode->fid, name);
 	afs_make_call(&fc->ac, call, GFP_NOFS);
 	return afs_wait_for_call_to_complete(call, &fc->ac);
 }
@@ -1134,7 +1134,7 @@ int afs_fs_symlink(struct afs_fs_cursor *fc,
 	*bp++ = 0; /* segment size */
 
 	afs_use_fs_server(call, fc->cbi);
-	trace_afs_make_fs_call(call, &vnode->fid);
+	trace_afs_make_fs_call1(call, &vnode->fid, name);
 	afs_make_call(&fc->ac, call, GFP_NOFS);
 	return afs_wait_for_call_to_complete(call, &fc->ac);
 }
@@ -1253,7 +1253,7 @@ int afs_fs_rename(struct afs_fs_cursor *fc,
 	}
 
 	afs_use_fs_server(call, fc->cbi);
-	trace_afs_make_fs_call(call, &orig_dvnode->fid);
+	trace_afs_make_fs_call2(call, &orig_dvnode->fid, orig_name, new_name);
 	afs_make_call(&fc->ac, call, GFP_NOFS);
 	return afs_wait_for_call_to_complete(call, &fc->ac);
 }
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 6438849a75c4..ce85ae61f12d 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -550,6 +550,7 @@ static int afs_get_tree(struct fs_context *fc)
 	}
 
 	fc->root = dget(sb->s_root);
+	trace_afs_get_tree(as->cell, as->volume);
 	_leave(" = 0 [%p]", sb);
 	return 0;
 
diff --git a/fs/afs/yfsclient.c b/fs/afs/yfsclient.c
index 73546c2d89ee..5ea0350dc9dd 100644
--- a/fs/afs/yfsclient.c
+++ b/fs/afs/yfsclient.c
@@ -809,7 +809,7 @@ int yfs_fs_create_file(struct afs_fs_cursor *fc,
 	yfs_check_req(call, bp);
 
 	afs_use_fs_server(call, fc->cbi);
-	trace_afs_make_fs_call(call, &vnode->fid);
+	trace_afs_make_fs_call1(call, &vnode->fid, name);
 	afs_make_call(&fc->ac, call, GFP_NOFS);
 	return afs_wait_for_call_to_complete(call, &fc->ac);
 }
@@ -873,7 +873,7 @@ int yfs_fs_make_dir(struct afs_fs_cursor *fc,
 	yfs_check_req(call, bp);
 
 	afs_use_fs_server(call, fc->cbi);
-	trace_afs_make_fs_call(call, &vnode->fid);
+	trace_afs_make_fs_call1(call, &vnode->fid, name);
 	afs_make_call(&fc->ac, call, GFP_NOFS);
 	return afs_wait_for_call_to_complete(call, &fc->ac);
 }
@@ -964,7 +964,7 @@ int yfs_fs_remove_file2(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
 	yfs_check_req(call, bp);
 
 	afs_use_fs_server(call, fc->cbi);
-	trace_afs_make_fs_call(call, &dvnode->fid);
+	trace_afs_make_fs_call1(call, &dvnode->fid, name);
 	afs_make_call(&fc->ac, call, GFP_NOFS);
 	return afs_wait_for_call_to_complete(call, &fc->ac);
 }
@@ -1052,7 +1052,7 @@ int yfs_fs_remove(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
 	yfs_check_req(call, bp);
 
 	afs_use_fs_server(call, fc->cbi);
-	trace_afs_make_fs_call(call, &dvnode->fid);
+	trace_afs_make_fs_call1(call, &dvnode->fid, name);
 	afs_make_call(&fc->ac, call, GFP_NOFS);
 	return afs_wait_for_call_to_complete(call, &fc->ac);
 }
@@ -1138,7 +1138,7 @@ int yfs_fs_link(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
 	yfs_check_req(call, bp);
 
 	afs_use_fs_server(call, fc->cbi);
-	trace_afs_make_fs_call(call, &vnode->fid);
+	trace_afs_make_fs_call1(call, &vnode->fid, name);
 	afs_make_call(&fc->ac, call, GFP_NOFS);
 	return afs_wait_for_call_to_complete(call, &fc->ac);
 }
@@ -1235,7 +1235,7 @@ int yfs_fs_symlink(struct afs_fs_cursor *fc,
 	yfs_check_req(call, bp);
 
 	afs_use_fs_server(call, fc->cbi);
-	trace_afs_make_fs_call(call, &dvnode->fid);
+	trace_afs_make_fs_call1(call, &dvnode->fid, name);
 	afs_make_call(&fc->ac, call, GFP_NOFS);
 	return afs_wait_for_call_to_complete(call, &fc->ac);
 }
@@ -1334,7 +1334,7 @@ int yfs_fs_rename(struct afs_fs_cursor *fc,
 	yfs_check_req(call, bp);
 
 	afs_use_fs_server(call, fc->cbi);
-	trace_afs_make_fs_call(call, &orig_dvnode->fid);
+	trace_afs_make_fs_call2(call, &orig_dvnode->fid, orig_name, new_name);
 	afs_make_call(&fc->ac, call, GFP_NOFS);
 	return afs_wait_for_call_to_complete(call, &fc->ac);
 }
diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h
index f67815ebb1b9..e81d6a50781f 100644
--- a/include/trace/events/afs.h
+++ b/include/trace/events/afs.h
@@ -539,6 +539,88 @@ TRACE_EVENT(afs_make_fs_call,
 		      __print_symbolic(__entry->op, afs_fs_operations))
 	    );
 
+TRACE_EVENT(afs_make_fs_call1,
+	    TP_PROTO(struct afs_call *call, const struct afs_fid *fid,
+		     const char *name),
+
+	    TP_ARGS(call, fid, name),
+
+	    TP_STRUCT__entry(
+		    __field(unsigned int,		call		)
+		    __field(enum afs_fs_operation,	op		)
+		    __field_struct(struct afs_fid,	fid		)
+		    __array(char,			name, 24	)
+			     ),
+
+	    TP_fast_assign(
+		    int __len = strlen(name);
+		    __len = min(__len, 23);
+		    __entry->call = call->debug_id;
+		    __entry->op = call->operation_ID;
+		    if (fid) {
+			    __entry->fid = *fid;
+		    } else {
+			    __entry->fid.vid = 0;
+			    __entry->fid.vnode = 0;
+			    __entry->fid.unique = 0;
+		    }
+		    memcpy(__entry->name, name, __len);
+		    __entry->name[__len] = 0;
+			   ),
+
+	    TP_printk("c=%08x %06llx:%06llx:%06x %s \"%s\"",
+		      __entry->call,
+		      __entry->fid.vid,
+		      __entry->fid.vnode,
+		      __entry->fid.unique,
+		      __print_symbolic(__entry->op, afs_fs_operations),
+		      __entry->name)
+	    );
+
+TRACE_EVENT(afs_make_fs_call2,
+	    TP_PROTO(struct afs_call *call, const struct afs_fid *fid,
+		     const char *name, const char *name2),
+
+	    TP_ARGS(call, fid, name, name2),
+
+	    TP_STRUCT__entry(
+		    __field(unsigned int,		call		)
+		    __field(enum afs_fs_operation,	op		)
+		    __field_struct(struct afs_fid,	fid		)
+		    __array(char,			name, 24	)
+		    __array(char,			name2, 24	)
+			     ),
+
+	    TP_fast_assign(
+		    int __len = strlen(name);
+		    int __len2 = strlen(name2);
+		    __len = min(__len, 23);
+		    __len2 = min(__len2, 23);
+		    __entry->call = call->debug_id;
+		    __entry->op = call->operation_ID;
+		    if (fid) {
+			    __entry->fid = *fid;
+		    } else {
+			    __entry->fid.vid = 0;
+			    __entry->fid.vnode = 0;
+			    __entry->fid.unique = 0;
+		    }
+		    memcpy(__entry->name, name, __len);
+		    __entry->name[__len] = 0;
+		    memcpy(__entry->name2, name2, __len2);
+		    __entry->name2[__len2] = 0;
+			   ),
+
+	    TP_printk("c=%08x %06llx:%06llx:%06x %s \"%s\" \"%s\"",
+		      __entry->call,
+		      __entry->fid.vid,
+		      __entry->fid.vnode,
+		      __entry->fid.unique,
+		      __print_symbolic(__entry->op, afs_fs_operations),
+		      __entry->name,
+		      __entry->name2)
+	    );
+
 TRACE_EVENT(afs_make_vl_call,
 	    TP_PROTO(struct afs_call *call),
 
@@ -736,6 +818,38 @@ TRACE_EVENT(afs_call_state,
 		      __entry->ret, __entry->abort)
 	    );
 
+TRACE_EVENT(afs_lookup,
+	    TP_PROTO(struct afs_vnode *dvnode, const struct qstr *name,
+		     struct afs_vnode *vnode),
+
+	    TP_ARGS(dvnode, name, vnode),
+
+	    TP_STRUCT__entry(
+		    __field_struct(struct afs_fid,	dfid		)
+		    __field_struct(struct afs_fid,	fid		)
+		    __array(char,			name, 24	)
+			     ),
+
+	    TP_fast_assign(
+		    int __len = min_t(int, name->len, 23);
+		    __entry->dfid = dvnode->fid;
+		    if (vnode) {
+			    __entry->fid = vnode->fid;
+		    } else {
+			    __entry->fid.vid = 0;
+			    __entry->fid.vnode = 0;
+			    __entry->fid.unique = 0;
+		    }
+		    memcpy(__entry->name, name->name, __len);
+		    __entry->name[__len] = 0;
+			   ),
+
+	    TP_printk("d=%llx:%llx:%x \"%s\" f=%llx:%x",
+		      __entry->dfid.vid, __entry->dfid.vnode, __entry->dfid.unique,
+		      __entry->name,
+		      __entry->fid.vnode, __entry->fid.unique)
+	    );
+
 TRACE_EVENT(afs_edit_dir,
 	    TP_PROTO(struct afs_vnode *dvnode,
 		     enum afs_edit_dir_reason why,
@@ -757,12 +871,12 @@ TRACE_EVENT(afs_edit_dir,
 		    __field(unsigned short,		slot		)
 		    __field(unsigned int,		f_vnode		)
 		    __field(unsigned int,		f_unique	)
-		    __array(char,			name, 18	)
+		    __array(char,			name, 24	)
 			     ),
 
 	    TP_fast_assign(
 		    int __len = strlen(name);
-		    __len = min(__len, 17);
+		    __len = min(__len, 23);
 		    __entry->vnode	= dvnode->fid.vnode;
 		    __entry->unique	= dvnode->fid.unique;
 		    __entry->why	= why;
@@ -775,7 +889,7 @@ TRACE_EVENT(afs_edit_dir,
 		    __entry->name[__len] = 0;
 			   ),
 
-	    TP_printk("d=%x:%x %s %s %u[%u] f=%x:%x %s",
+	    TP_printk("d=%x:%x %s %s %u[%u] f=%x:%x \"%s\"",
 		      __entry->vnode, __entry->unique,
 		      __print_symbolic(__entry->why, afs_edit_dir_reasons),
 		      __print_symbolic(__entry->op, afs_edit_dir_ops),
@@ -994,6 +1108,32 @@ TRACE_EVENT(afs_silly_rename,
 		      __entry->done)
 	    );
 
+TRACE_EVENT(afs_get_tree,
+	    TP_PROTO(struct afs_cell *cell, struct afs_volume *volume),
+
+	    TP_ARGS(cell, volume),
+
+	    TP_STRUCT__entry(
+		    __field(u64,			vid		)
+		    __array(char,			cell, 24	)
+		    __array(char,			volume, 24	)
+			     ),
+
+	    TP_fast_assign(
+		    int __len;
+		    __entry->vid = volume->vid;
+		    __len = min_t(int, cell->name_len, 23);
+		    memcpy(__entry->cell, cell->name, __len);
+		    __entry->cell[__len] = 0;
+		    __len = min_t(int, volume->name_len, 23);
+		    memcpy(__entry->volume, volume->name, __len);
+		    __entry->volume[__len] = 0;
+			   ),
+
+	    TP_printk("--- MOUNT %s:%s %llx",
+		      __entry->cell, __entry->volume, __entry->vid)
+	    );
+
 #endif /* _TRACE_AFS_H */
 
 /* This part must be outside protection */
-- 
cgit v1.2.3


From 6c6c1d63c243025956f061e67fff3a615aa0f6be Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 25 Apr 2019 14:26:52 +0100
Subject: afs: Provide mount-time configurable byte-range file locking
 emulation

Provide byte-range file locking emulation that can be configured at mount
time to one of four modes:

 (1) flock=local.  Locking is done locally only and no reference is made to
     the server.

 (2) flock=openafs.  Byte-range locking is done locally only; whole-file
     locking is done with reference to the server.  Whole-file locks cannot
     be upgraded unless the client holds an exclusive lock.

 (3) flock=strict.  Byte-range and whole-file locking both require a
     sufficient whole-file lock on the server.

 (4) flock=write.  As strict, but the client always gets an exclusive
     whole-file lock on the server.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/flock.c             | 49 +++++++++++++++++++++++++++++++++++++++-------
 fs/afs/fsclient.c          |  2 +-
 fs/afs/internal.h          | 14 +++++++++++++
 fs/afs/super.c             | 27 +++++++++++++++++++++++++
 fs/afs/yfsclient.c         |  2 +-
 include/trace/events/afs.h | 35 +++++++++++++++++++++++++++++++++
 6 files changed, 120 insertions(+), 9 deletions(-)

(limited to 'include/trace')

diff --git a/fs/afs/flock.c b/fs/afs/flock.c
index 325bf731d8dd..ef313f4c1d11 100644
--- a/fs/afs/flock.c
+++ b/fs/afs/flock.c
@@ -409,7 +409,7 @@ static void afs_defer_unlock(struct afs_vnode *vnode)
  * whether we think that we have a locking permit.
  */
 static int afs_do_setlk_check(struct afs_vnode *vnode, struct key *key,
-			      afs_lock_type_t type, bool can_sleep)
+			      enum afs_flock_mode mode, afs_lock_type_t type)
 {
 	afs_access_t access;
 	int ret;
@@ -437,13 +437,9 @@ static int afs_do_setlk_check(struct afs_vnode *vnode, struct key *key,
 	if (type == AFS_LOCK_READ) {
 		if (!(access & (AFS_ACE_INSERT | AFS_ACE_WRITE | AFS_ACE_LOCK)))
 			return -EACCES;
-		if (vnode->status.lock_count == -1 && !can_sleep)
-			return -EAGAIN; /* Write locked */
 	} else {
 		if (!(access & (AFS_ACE_INSERT | AFS_ACE_WRITE)))
 			return -EACCES;
-		if (vnode->status.lock_count != 0 && !can_sleep)
-			return -EAGAIN; /* Locked */
 	}
 
 	return 0;
@@ -456,24 +452,48 @@ static int afs_do_setlk(struct file *file, struct file_lock *fl)
 {
 	struct inode *inode = locks_inode(file);
 	struct afs_vnode *vnode = AFS_FS_I(inode);
+	enum afs_flock_mode mode = AFS_FS_S(inode->i_sb)->flock_mode;
 	afs_lock_type_t type;
 	struct key *key = afs_file_key(file);
+	bool partial, no_server_lock = false;
 	int ret;
 
-	_enter("{%llx:%llu},%u", vnode->fid.vid, vnode->fid.vnode, fl->fl_type);
+	if (mode == afs_flock_mode_unset)
+		mode = afs_flock_mode_openafs;
+
+	_enter("{%llx:%llu},%llu-%llu,%u,%u",
+	       vnode->fid.vid, vnode->fid.vnode,
+	       fl->fl_start, fl->fl_end, fl->fl_type, mode);
 
 	fl->fl_ops = &afs_lock_ops;
 	INIT_LIST_HEAD(&fl->fl_u.afs.link);
 	fl->fl_u.afs.state = AFS_LOCK_PENDING;
 
+	partial = (fl->fl_start != 0 || fl->fl_end != OFFSET_MAX);
 	type = (fl->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE;
+	if (mode == afs_flock_mode_write && partial)
+		type = AFS_LOCK_WRITE;
 
-	ret = afs_do_setlk_check(vnode, key, type, fl->fl_flags & FL_SLEEP);
+	ret = afs_do_setlk_check(vnode, key, mode, type);
 	if (ret < 0)
 		return ret;
 
 	trace_afs_flock_op(vnode, fl, afs_flock_op_set_lock);
 
+	/* AFS3 protocol only supports full-file locks and doesn't provide any
+	 * method of upgrade/downgrade, so we need to emulate for partial-file
+	 * locks.
+	 *
+	 * The OpenAFS client only gets a server lock for a full-file lock and
+	 * keeps partial-file locks local.  Allow this behaviour to be emulated
+	 * (as the default).
+	 */
+	if (mode == afs_flock_mode_local ||
+	    (partial && mode == afs_flock_mode_openafs)) {
+		no_server_lock = true;
+		goto skip_server_lock;
+	}
+
 	spin_lock(&vnode->lock);
 	list_add_tail(&fl->fl_u.afs.link, &vnode->pending_locks);
 
@@ -502,6 +522,18 @@ static int afs_do_setlk(struct file *file, struct file_lock *fl)
 		}
 	}
 
+	if (vnode->lock_state == AFS_VNODE_LOCK_NONE &&
+	    !(fl->fl_flags & FL_SLEEP)) {
+		ret = -EAGAIN;
+		if (type == AFS_LOCK_READ) {
+			if (vnode->status.lock_count == -1)
+				goto lock_is_contended; /* Write locked */
+		} else {
+			if (vnode->status.lock_count != 0)
+				goto lock_is_contended; /* Locked */
+		}
+	}
+
 	if (vnode->lock_state != AFS_VNODE_LOCK_NONE)
 		goto need_to_wait;
 
@@ -571,6 +603,7 @@ vnode_is_locked:
 	/* the lock has been granted by the server... */
 	ASSERTCMP(fl->fl_u.afs.state, ==, AFS_LOCK_GRANTED);
 
+skip_server_lock:
 	/* ... but the VFS still needs to distribute access on this client. */
 	trace_afs_flock_ev(vnode, fl, afs_flock_vfs_locking, 0);
 	ret = locks_lock_file_wait(file, fl);
@@ -649,6 +682,8 @@ vfs_rejected_lock:
 	 * deal with.
 	 */
 	_debug("vfs refused %d", ret);
+	if (no_server_lock)
+		goto error;
 	spin_lock(&vnode->lock);
 	list_del_init(&fl->fl_u.afs.link);
 	afs_defer_unlock(vnode);
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index be4520eb4965..9b73a57aa5cb 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -1902,7 +1902,7 @@ int afs_fs_set_lock(struct afs_fs_cursor *fc, afs_lock_type_t type)
 	*bp++ = htonl(type);
 
 	afs_use_fs_server(call, fc->cbi);
-	trace_afs_make_fs_call(call, &vnode->fid);
+	trace_afs_make_fs_calli(call, &vnode->fid, type);
 	afs_make_call(&fc->ac, call, GFP_NOFS);
 	return afs_wait_for_call_to_complete(call, &fc->ac);
 }
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 20fd44de26ac..91204e1428f2 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -36,11 +36,24 @@
 struct pagevec;
 struct afs_call;
 
+/*
+ * Partial file-locking emulation mode.  (The problem being that AFS3 only
+ * allows whole-file locks and no upgrading/downgrading).
+ */
+enum afs_flock_mode {
+	afs_flock_mode_unset,
+	afs_flock_mode_local,	/* Local locking only */
+	afs_flock_mode_openafs,	/* Don't get server lock for a partial lock */
+	afs_flock_mode_strict,	/* Always get a server lock for a partial lock */
+	afs_flock_mode_write,	/* Get an exclusive server lock for a partial lock */
+};
+
 struct afs_fs_context {
 	bool			force;		/* T to force cell type */
 	bool			autocell;	/* T if set auto mount operation */
 	bool			dyn_root;	/* T if dynamic root */
 	bool			no_cell;	/* T if the source is "none" (for dynroot) */
+	enum afs_flock_mode	flock_mode;	/* Partial file-locking emulation mode */
 	afs_voltype_t		type;		/* type of volume requested */
 	unsigned int		volnamesz;	/* size of volume name */
 	const char		*volname;	/* name of volume to mount */
@@ -221,6 +234,7 @@ struct afs_super_info {
 	struct net		*net_ns;	/* Network namespace */
 	struct afs_cell		*cell;		/* The cell in which the volume resides */
 	struct afs_volume	*volume;	/* volume record */
+	enum afs_flock_mode	flock_mode:8;	/* File locking emulation mode */
 	bool			dyn_root;	/* True if dynamic root */
 };
 
diff --git a/fs/afs/super.c b/fs/afs/super.c
index ce85ae61f12d..18334fa1a0d2 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -67,19 +67,30 @@ static atomic_t afs_count_active_inodes;
 enum afs_param {
 	Opt_autocell,
 	Opt_dyn,
+	Opt_flock,
 	Opt_source,
 };
 
 static const struct fs_parameter_spec afs_param_specs[] = {
 	fsparam_flag  ("autocell",	Opt_autocell),
 	fsparam_flag  ("dyn",		Opt_dyn),
+	fsparam_enum  ("flock",		Opt_flock),
 	fsparam_string("source",	Opt_source),
 	{}
 };
 
+static const struct fs_parameter_enum afs_param_enums[] = {
+	{ Opt_flock,	"local",	afs_flock_mode_local },
+	{ Opt_flock,	"openafs",	afs_flock_mode_openafs },
+	{ Opt_flock,	"strict",	afs_flock_mode_strict },
+	{ Opt_flock,	"write",	afs_flock_mode_write },
+	{}
+};
+
 static const struct fs_parameter_description afs_fs_parameters = {
 	.name		= "kAFS",
 	.specs		= afs_param_specs,
+	.enums		= afs_param_enums,
 };
 
 /*
@@ -182,11 +193,22 @@ static int afs_show_devname(struct seq_file *m, struct dentry *root)
 static int afs_show_options(struct seq_file *m, struct dentry *root)
 {
 	struct afs_super_info *as = AFS_FS_S(root->d_sb);
+	const char *p = NULL;
 
 	if (as->dyn_root)
 		seq_puts(m, ",dyn");
 	if (test_bit(AFS_VNODE_AUTOCELL, &AFS_FS_I(d_inode(root))->flags))
 		seq_puts(m, ",autocell");
+	switch (as->flock_mode) {
+	case afs_flock_mode_unset:	break;
+	case afs_flock_mode_local:	p = "local";	break;
+	case afs_flock_mode_openafs:	p = "openafs";	break;
+	case afs_flock_mode_strict:	p = "strict";	break;
+	case afs_flock_mode_write:	p = "write";	break;
+	}
+	if (p)
+		seq_printf(m, ",flock=%s", p);
+
 	return 0;
 }
 
@@ -315,6 +337,10 @@ static int afs_parse_param(struct fs_context *fc, struct fs_parameter *param)
 		ctx->dyn_root = true;
 		break;
 
+	case Opt_flock:
+		ctx->flock_mode = result.uint_32;
+		break;
+
 	default:
 		return -EINVAL;
 	}
@@ -466,6 +492,7 @@ static struct afs_super_info *afs_alloc_sbi(struct fs_context *fc)
 	as = kzalloc(sizeof(struct afs_super_info), GFP_KERNEL);
 	if (as) {
 		as->net_ns = get_net(fc->net_ns);
+		as->flock_mode = ctx->flock_mode;
 		if (ctx->dyn_root) {
 			as->dyn_root = true;
 		} else {
diff --git a/fs/afs/yfsclient.c b/fs/afs/yfsclient.c
index 5ea0350dc9dd..055840aa07f6 100644
--- a/fs/afs/yfsclient.c
+++ b/fs/afs/yfsclient.c
@@ -1860,7 +1860,7 @@ int yfs_fs_set_lock(struct afs_fs_cursor *fc, afs_lock_type_t type)
 	yfs_check_req(call, bp);
 
 	afs_use_fs_server(call, fc->cbi);
-	trace_afs_make_fs_call(call, &vnode->fid);
+	trace_afs_make_fs_calli(call, &vnode->fid, type);
 	afs_make_call(&fc->ac, call, GFP_NOFS);
 	return afs_wait_for_call_to_complete(call, &fc->ac);
 }
diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h
index e81d6a50781f..f1373c29bf7d 100644
--- a/include/trace/events/afs.h
+++ b/include/trace/events/afs.h
@@ -539,6 +539,41 @@ TRACE_EVENT(afs_make_fs_call,
 		      __print_symbolic(__entry->op, afs_fs_operations))
 	    );
 
+TRACE_EVENT(afs_make_fs_calli,
+	    TP_PROTO(struct afs_call *call, const struct afs_fid *fid,
+		     unsigned int i),
+
+	    TP_ARGS(call, fid, i),
+
+	    TP_STRUCT__entry(
+		    __field(unsigned int,		call		)
+		    __field(unsigned int,		i		)
+		    __field(enum afs_fs_operation,	op		)
+		    __field_struct(struct afs_fid,	fid		)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->call = call->debug_id;
+		    __entry->i = i;
+		    __entry->op = call->operation_ID;
+		    if (fid) {
+			    __entry->fid = *fid;
+		    } else {
+			    __entry->fid.vid = 0;
+			    __entry->fid.vnode = 0;
+			    __entry->fid.unique = 0;
+		    }
+			   ),
+
+	    TP_printk("c=%08x %06llx:%06llx:%06x %s i=%u",
+		      __entry->call,
+		      __entry->fid.vid,
+		      __entry->fid.vnode,
+		      __entry->fid.unique,
+		      __print_symbolic(__entry->op, afs_fs_operations),
+		      __entry->i)
+	    );
+
 TRACE_EVENT(afs_make_fs_call1,
 	    TP_PROTO(struct afs_call *call, const struct afs_fid *fid,
 		     const char *name),
-- 
cgit v1.2.3


From 260f082bae6dcf70aeae2cc3e24aecb55bdb1c99 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 25 Apr 2019 14:26:52 +0100
Subject: afs: Get an AFS3 ACL as an xattr

Implement an xattr on AFS files called "afs.acl" that retrieves a file's
ACL.  It returns the raw AFS3 ACL from the result of calling FS.FetchACL,
leaving any interpretation to userspace.

Note that whilst YFS servers will respond to FS.FetchACL, this will render
a more-advanced YFS ACL down.  Use "afs.yfs.acl" instead for that.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/afs_fs.h            |   1 +
 fs/afs/fsclient.c          | 122 +++++++++++++++++++++++++++++++++++++++++++++
 fs/afs/internal.h          |   7 +++
 fs/afs/xattr.c             |  53 ++++++++++++++++++++
 include/trace/events/afs.h |   1 +
 5 files changed, 184 insertions(+)

(limited to 'include/trace')

diff --git a/fs/afs/afs_fs.h b/fs/afs/afs_fs.h
index ddfa88a7a9c0..4df1f1eec0ab 100644
--- a/fs/afs/afs_fs.h
+++ b/fs/afs/afs_fs.h
@@ -17,6 +17,7 @@
 
 enum AFS_FS_Operations {
 	FSFETCHDATA		= 130,	/* AFS Fetch file data */
+	FSFETCHACL		= 131,	/* AFS Fetch file ACL */
 	FSFETCHSTATUS		= 132,	/* AFS Fetch file status */
 	FSSTOREDATA		= 133,	/* AFS Store file data */
 	FSSTORESTATUS		= 135,	/* AFS Store file status */
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 9b73a57aa5cb..283f486c59f4 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -2391,3 +2391,125 @@ int afs_fs_inline_bulk_status(struct afs_fs_cursor *fc,
 	afs_make_call(&fc->ac, call, GFP_NOFS);
 	return afs_wait_for_call_to_complete(call, &fc->ac);
 }
+
+/*
+ * deliver reply data to an FS.FetchACL
+ */
+static int afs_deliver_fs_fetch_acl(struct afs_call *call)
+{
+	struct afs_vnode *vnode = call->reply[1];
+	struct afs_acl *acl;
+	const __be32 *bp;
+	unsigned int size;
+	int ret;
+
+	_enter("{%u}", call->unmarshall);
+
+	switch (call->unmarshall) {
+	case 0:
+		afs_extract_to_tmp(call);
+		call->unmarshall++;
+
+		/* extract the returned data length */
+	case 1:
+		ret = afs_extract_data(call, true);
+		if (ret < 0)
+			return ret;
+
+		size = call->count2 = ntohl(call->tmp);
+		size = round_up(size, 4);
+
+		acl = kmalloc(struct_size(acl, data, size), GFP_KERNEL);
+		if (!acl)
+			return -ENOMEM;
+		call->reply[0] = acl;
+		acl->size = call->count2;
+		afs_extract_begin(call, acl->data, size);
+		call->unmarshall++;
+
+		/* extract the returned data */
+	case 2:
+		ret = afs_extract_data(call, true);
+		if (ret < 0)
+			return ret;
+
+		afs_extract_to_buf(call, (21 + 6) * 4);
+		call->unmarshall++;
+
+		/* extract the metadata */
+	case 3:
+		ret = afs_extract_data(call, false);
+		if (ret < 0)
+			return ret;
+
+		bp = call->buffer;
+		ret = afs_decode_status(call, &bp, &vnode->status, vnode,
+					&vnode->status.data_version, NULL);
+		if (ret < 0)
+			return ret;
+		xdr_decode_AFSVolSync(&bp, call->reply[2]);
+
+		call->unmarshall++;
+
+	case 4:
+		break;
+	}
+
+	_leave(" = 0 [done]");
+	return 0;
+}
+
+static void afs_destroy_fs_fetch_acl(struct afs_call *call)
+{
+	kfree(call->reply[0]);
+	afs_flat_call_destructor(call);
+}
+
+/*
+ * FS.FetchACL operation type
+ */
+static const struct afs_call_type afs_RXFSFetchACL = {
+	.name		= "FS.FetchACL",
+	.op		= afs_FS_FetchACL,
+	.deliver	= afs_deliver_fs_fetch_acl,
+	.destructor	= afs_destroy_fs_fetch_acl,
+};
+
+/*
+ * Fetch the ACL for a file.
+ */
+struct afs_acl *afs_fs_fetch_acl(struct afs_fs_cursor *fc)
+{
+	struct afs_vnode *vnode = fc->vnode;
+	struct afs_call *call;
+	struct afs_net *net = afs_v2net(vnode);
+	__be32 *bp;
+
+	_enter(",%x,{%llx:%llu},,",
+	       key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode);
+
+	call = afs_alloc_flat_call(net, &afs_RXFSFetchACL, 16, (21 + 6) * 4);
+	if (!call) {
+		fc->ac.error = -ENOMEM;
+		return ERR_PTR(-ENOMEM);
+	}
+
+	call->key = fc->key;
+	call->reply[0] = NULL;
+	call->reply[1] = vnode;
+	call->reply[2] = NULL; /* volsync */
+	call->ret_reply0 = true;
+
+	/* marshall the parameters */
+	bp = call->request;
+	bp[0] = htonl(FSFETCHACL);
+	bp[1] = htonl(vnode->fid.vid);
+	bp[2] = htonl(vnode->fid.vnode);
+	bp[3] = htonl(vnode->fid.unique);
+
+	call->cb_break = fc->cb_break;
+	afs_use_fs_server(call, fc->cbi);
+	trace_afs_make_fs_call(call, &vnode->fid);
+	afs_make_call(&fc->ac, call, GFP_KERNEL);
+	return (struct afs_acl *)afs_wait_for_call_to_complete(call, &fc->ac);
+}
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 585a5952f608..683b802c20ea 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -977,6 +977,13 @@ extern int afs_fs_fetch_status(struct afs_fs_cursor *, struct afs_net *,
 			       struct afs_fid *, struct afs_file_status *,
 			       struct afs_callback *, struct afs_volsync *);
 
+struct afs_acl {
+	u32	size;
+	u8	data[];
+};
+
+extern struct afs_acl *afs_fs_fetch_acl(struct afs_fs_cursor *);
+
 /*
  * fs_probe.c
  */
diff --git a/fs/afs/xattr.c b/fs/afs/xattr.c
index e729ee3d4b02..b7d3d714d8ff 100644
--- a/fs/afs/xattr.c
+++ b/fs/afs/xattr.c
@@ -16,6 +16,7 @@
 #include "internal.h"
 
 static const char afs_xattr_list[] =
+	"afs.acl\0"
 	"afs.cell\0"
 	"afs.fid\0"
 	"afs.volume";
@@ -33,6 +34,57 @@ ssize_t afs_listxattr(struct dentry *dentry, char *buffer, size_t size)
 	return sizeof(afs_xattr_list);
 }
 
+/*
+ * Get a file's ACL.
+ */
+static int afs_xattr_get_acl(const struct xattr_handler *handler,
+			     struct dentry *dentry,
+			     struct inode *inode, const char *name,
+			     void *buffer, size_t size)
+{
+	struct afs_fs_cursor fc;
+	struct afs_vnode *vnode = AFS_FS_I(inode);
+	struct afs_acl *acl = NULL;
+	struct key *key;
+	int ret;
+
+	key = afs_request_key(vnode->volume->cell);
+	if (IS_ERR(key))
+		return PTR_ERR(key);
+
+	ret = -ERESTARTSYS;
+	if (afs_begin_vnode_operation(&fc, vnode, key)) {
+		while (afs_select_fileserver(&fc)) {
+			fc.cb_break = afs_calc_vnode_cb_break(vnode);
+			acl = afs_fs_fetch_acl(&fc);
+		}
+
+		afs_check_for_remote_deletion(&fc, fc.vnode);
+		afs_vnode_commit_status(&fc, vnode, fc.cb_break);
+		ret = afs_end_vnode_operation(&fc);
+	}
+
+	if (ret == 0) {
+		ret = acl->size;
+		if (size > 0) {
+			ret = -ERANGE;
+			if (acl->size > size)
+				return -ERANGE;
+			memcpy(buffer, acl->data, acl->size);
+			ret = acl->size;
+		}
+		kfree(acl);
+	}
+
+	key_put(key);
+	return ret;
+}
+
+static const struct xattr_handler afs_xattr_afs_acl_handler = {
+	.name	= "afs.acl",
+	.get	= afs_xattr_get_acl,
+};
+
 /*
  * Get the name of the cell on which a file resides.
  */
@@ -123,6 +175,7 @@ static const struct xattr_handler afs_xattr_afs_volume_handler = {
 };
 
 const struct xattr_handler *afs_xattr_handlers[] = {
+	&afs_xattr_afs_acl_handler,
 	&afs_xattr_afs_cell_handler,
 	&afs_xattr_afs_fid_handler,
 	&afs_xattr_afs_volume_handler,
diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h
index f1373c29bf7d..25c2e089c6ea 100644
--- a/include/trace/events/afs.h
+++ b/include/trace/events/afs.h
@@ -33,6 +33,7 @@ enum afs_call_trace {
 
 enum afs_fs_operation {
 	afs_FS_FetchData		= 130,	/* AFS Fetch file data */
+	afs_FS_FetchACL			= 131,	/* AFS Fetch file ACL */
 	afs_FS_FetchStatus		= 132,	/* AFS Fetch file status */
 	afs_FS_StoreData		= 133,	/* AFS Store file data */
 	afs_FS_StoreStatus		= 135,	/* AFS Store file status */
-- 
cgit v1.2.3


From b10494af4989d2d20679d0e3b7d1a45c2f8f8f1a Mon Sep 17 00:00:00 2001
From: Joe Gorse <jhgorse@gmail.com>
Date: Thu, 25 Apr 2019 14:26:52 +0100
Subject: afs: implement acl setting

Implements the setting of ACLs in AFS by means of setting the
afs.acl extended attribute on the file.

Signed-off-by: Joe Gorse <jhgorse@gmail.com>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/afs_fs.h            |  1 +
 fs/afs/fsclient.c          | 61 +++++++++++++++++++++++++++++++++++++++++++---
 fs/afs/internal.h          |  1 +
 fs/afs/xattr.c             | 52 +++++++++++++++++++++++++++++++++++++--
 include/trace/events/afs.h |  1 +
 5 files changed, 110 insertions(+), 6 deletions(-)

(limited to 'include/trace')

diff --git a/fs/afs/afs_fs.h b/fs/afs/afs_fs.h
index 4df1f1eec0ab..18a54ca422f8 100644
--- a/fs/afs/afs_fs.h
+++ b/fs/afs/afs_fs.h
@@ -20,6 +20,7 @@ enum AFS_FS_Operations {
 	FSFETCHACL		= 131,	/* AFS Fetch file ACL */
 	FSFETCHSTATUS		= 132,	/* AFS Fetch file status */
 	FSSTOREDATA		= 133,	/* AFS Store file data */
+	FSSTOREACL		= 134,	/* AFS Store file ACL */
 	FSSTORESTATUS		= 135,	/* AFS Store file status */
 	FSREMOVEFILE		= 136,	/* AFS Remove a file */
 	FSCREATEFILE		= 137,	/* AFS Create a file */
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 283f486c59f4..7f1722b9e432 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -836,9 +836,10 @@ int afs_fs_create(struct afs_fs_cursor *fc,
 }
 
 /*
- * deliver reply data to an FS.RemoveFile or FS.RemoveDir
+ * Deliver reply data to any operation that returns file status and volume
+ * sync.
  */
-static int afs_deliver_fs_remove(struct afs_call *call)
+static int afs_deliver_fs_status_and_vol(struct afs_call *call)
 {
 	struct afs_vnode *vnode = call->reply[0];
 	const __be32 *bp;
@@ -868,14 +869,14 @@ static int afs_deliver_fs_remove(struct afs_call *call)
 static const struct afs_call_type afs_RXFSRemoveFile = {
 	.name		= "FS.RemoveFile",
 	.op		= afs_FS_RemoveFile,
-	.deliver	= afs_deliver_fs_remove,
+	.deliver	= afs_deliver_fs_status_and_vol,
 	.destructor	= afs_flat_call_destructor,
 };
 
 static const struct afs_call_type afs_RXFSRemoveDir = {
 	.name		= "FS.RemoveDir",
 	.op		= afs_FS_RemoveDir,
-	.deliver	= afs_deliver_fs_remove,
+	.deliver	= afs_deliver_fs_status_and_vol,
 	.destructor	= afs_flat_call_destructor,
 };
 
@@ -2513,3 +2514,55 @@ struct afs_acl *afs_fs_fetch_acl(struct afs_fs_cursor *fc)
 	afs_make_call(&fc->ac, call, GFP_KERNEL);
 	return (struct afs_acl *)afs_wait_for_call_to_complete(call, &fc->ac);
 }
+
+/*
+ * FS.StoreACL operation type
+ */
+static const struct afs_call_type afs_RXFSStoreACL = {
+	.name		= "FS.StoreACL",
+	.op		= afs_FS_StoreACL,
+	.deliver	= afs_deliver_fs_status_and_vol,
+	.destructor	= afs_flat_call_destructor,
+};
+
+/*
+ * Fetch the ACL for a file.
+ */
+int afs_fs_store_acl(struct afs_fs_cursor *fc, const struct afs_acl *acl)
+{
+	struct afs_vnode *vnode = fc->vnode;
+	struct afs_call *call;
+	struct afs_net *net = afs_v2net(vnode);
+	size_t size;
+	__be32 *bp;
+
+	_enter(",%x,{%llx:%llu},,",
+	       key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode);
+
+	size = round_up(acl->size, 4);
+	call = afs_alloc_flat_call(net, &afs_RXFSStoreACL,
+				   5 * 4 + size, (21 + 6) * 4);
+	if (!call) {
+		fc->ac.error = -ENOMEM;
+		return -ENOMEM;
+	}
+
+	call->key = fc->key;
+	call->reply[0] = vnode;
+	call->reply[2] = NULL; /* volsync */
+
+	/* marshall the parameters */
+	bp = call->request;
+	bp[0] = htonl(FSSTOREACL);
+	bp[1] = htonl(vnode->fid.vid);
+	bp[2] = htonl(vnode->fid.vnode);
+	bp[3] = htonl(vnode->fid.unique);
+	bp[4] = htonl(acl->size);
+	memcpy(&bp[5], acl->data, acl->size);
+	if (acl->size != size)
+		memset((void *)&bp[5] + acl->size, 0, size - acl->size);
+
+	trace_afs_make_fs_call(call, &vnode->fid);
+	afs_make_call(&fc->ac, call, GFP_KERNEL);
+	return afs_wait_for_call_to_complete(call, &fc->ac);
+}
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 683b802c20ea..5269824244c6 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -983,6 +983,7 @@ struct afs_acl {
 };
 
 extern struct afs_acl *afs_fs_fetch_acl(struct afs_fs_cursor *);
+extern int afs_fs_store_acl(struct afs_fs_cursor *, const struct afs_acl *);
 
 /*
  * fs_probe.c
diff --git a/fs/afs/xattr.c b/fs/afs/xattr.c
index b7d3d714d8ff..31db360947a6 100644
--- a/fs/afs/xattr.c
+++ b/fs/afs/xattr.c
@@ -80,9 +80,57 @@ static int afs_xattr_get_acl(const struct xattr_handler *handler,
 	return ret;
 }
 
+/*
+ * Set a file's AFS3 ACL.
+ */
+static int afs_xattr_set_acl(const struct xattr_handler *handler,
+                             struct dentry *dentry,
+                             struct inode *inode, const char *name,
+                             const void *buffer, size_t size, int flags)
+{
+	struct afs_fs_cursor fc;
+	struct afs_vnode *vnode = AFS_FS_I(inode);
+	struct afs_acl *acl = NULL;
+	struct key *key;
+	int ret;
+
+	if (flags == XATTR_CREATE)
+		return -EINVAL;
+
+	key = afs_request_key(vnode->volume->cell);
+	if (IS_ERR(key))
+		return PTR_ERR(key);
+
+	acl = kmalloc(sizeof(*acl) + size, GFP_KERNEL);
+	if (!acl) {
+		key_put(key);
+		return -ENOMEM;
+	}
+
+	acl->size = size;
+	memcpy(acl->data, buffer, size);
+
+	ret = -ERESTARTSYS;
+	if (afs_begin_vnode_operation(&fc, vnode, key)) {
+		while (afs_select_fileserver(&fc)) {
+			fc.cb_break = afs_calc_vnode_cb_break(vnode);
+			afs_fs_store_acl(&fc, acl);
+		}
+
+		afs_check_for_remote_deletion(&fc, fc.vnode);
+		afs_vnode_commit_status(&fc, vnode, fc.cb_break);
+		ret = afs_end_vnode_operation(&fc);
+	}
+
+	kfree(acl);
+	key_put(key);
+	return ret;
+}
+
 static const struct xattr_handler afs_xattr_afs_acl_handler = {
-	.name	= "afs.acl",
-	.get	= afs_xattr_get_acl,
+	.name   = "afs.acl",
+	.get    = afs_xattr_get_acl,
+	.set    = afs_xattr_set_acl,
 };
 
 /*
diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h
index 25c2e089c6ea..562f854ac4bf 100644
--- a/include/trace/events/afs.h
+++ b/include/trace/events/afs.h
@@ -36,6 +36,7 @@ enum afs_fs_operation {
 	afs_FS_FetchACL			= 131,	/* AFS Fetch file ACL */
 	afs_FS_FetchStatus		= 132,	/* AFS Fetch file status */
 	afs_FS_StoreData		= 133,	/* AFS Store file data */
+	afs_FS_StoreACL			= 134,	/* AFS Store file ACL */
 	afs_FS_StoreStatus		= 135,	/* AFS Store file status */
 	afs_FS_RemoveFile		= 136,	/* AFS Remove a file */
 	afs_FS_CreateFile		= 137,	/* AFS Create a file */
-- 
cgit v1.2.3