summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-11-13 07:27:00 +0100
committerLinus Torvalds <torvalds@linux-foundation.org>2013-11-13 07:27:00 +0100
commita7fa20a594fadf1e37cb3469c880ce6a544d3c3b (patch)
tree4be4e98648ff9afa398be947ca8fa716d6701686
parentMerge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jac... (diff)
parentfuse: writepages: protect secondary requests from fuse file release (diff)
downloadlinux-a7fa20a594fadf1e37cb3469c880ce6a544d3c3b.tar.xz
linux-a7fa20a594fadf1e37cb3469c880ce6a544d3c3b.zip
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse
Pull fuse updates from Miklos Szeredi: "This adds a ->writepage() implementation to fuse, improving mmaped writeout and paving the way for buffered writeback. And there's a patch to add a fix minor number for /dev/cuse, similarly to /dev/fuse" * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse: fuse: writepages: protect secondary requests from fuse file release fuse: writepages: update bdi writeout when deleting secondary request fuse: writepages: crop secondary requests fuse: writepages: roll back changes if request not found cuse: add fix minor number to /dev/cuse fuse: writepage: skip already in flight fuse: writepages: handle same page rewrites fuse: writepages: fix aggregation fuse: fix race in fuse_writepages() fuse: Implement writepages callback fuse: don't BUG on no write file fuse: lock page in mkwrite fuse: Prepare to handle multiple pages in writeback fuse: Getting file for writeback helper
-rw-r--r--Documentation/devices.txt1
-rw-r--r--fs/fuse/cuse.c5
-rw-r--r--fs/fuse/file.c361
-rw-r--r--fs/fuse/fuse_i.h1
-rw-r--r--include/linux/miscdevice.h1
5 files changed, 342 insertions, 27 deletions
diff --git a/Documentation/devices.txt b/Documentation/devices.txt
index 23721d3be3e6..80b72419ffd8 100644
--- a/Documentation/devices.txt
+++ b/Documentation/devices.txt
@@ -414,6 +414,7 @@ Your cooperation is appreciated.
200 = /dev/net/tun TAP/TUN network device
201 = /dev/button/gulpb Transmeta GULP-B buttons
202 = /dev/emd/ctl Enhanced Metadisk RAID (EMD) control
+ 203 = /dev/cuse Cuse (character device in user-space)
204 = /dev/video/em8300 EM8300 DVD decoder control
205 = /dev/video/em8300_mv EM8300 DVD decoder video
206 = /dev/video/em8300_ma EM8300 DVD decoder audio
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
index adbfd66b380f..24da581cb52b 100644
--- a/fs/fuse/cuse.c
+++ b/fs/fuse/cuse.c
@@ -589,11 +589,14 @@ static struct attribute *cuse_class_dev_attrs[] = {
ATTRIBUTE_GROUPS(cuse_class_dev);
static struct miscdevice cuse_miscdev = {
- .minor = MISC_DYNAMIC_MINOR,
+ .minor = CUSE_MINOR,
.name = "cuse",
.fops = &cuse_channel_fops,
};
+MODULE_ALIAS_MISCDEV(CUSE_MINOR);
+MODULE_ALIAS("devname:cuse");
+
static int __init cuse_init(void)
{
int i, rc;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 4598345ab87d..7e70506297bc 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -334,7 +334,8 @@ static bool fuse_page_is_writeback(struct inode *inode, pgoff_t index)
BUG_ON(req->inode != inode);
curr_index = req->misc.write.in.offset >> PAGE_CACHE_SHIFT;
- if (curr_index == index) {
+ if (curr_index <= index &&
+ index < curr_index + req->num_pages) {
found = true;
break;
}
@@ -1409,8 +1410,13 @@ static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
static void fuse_writepage_free(struct fuse_conn *fc, struct fuse_req *req)
{
- __free_page(req->pages[0]);
- fuse_file_put(req->ff, false);
+ int i;
+
+ for (i = 0; i < req->num_pages; i++)
+ __free_page(req->pages[i]);
+
+ if (req->ff)
+ fuse_file_put(req->ff, false);
}
static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req)
@@ -1418,30 +1424,34 @@ static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req)
struct inode *inode = req->inode;
struct fuse_inode *fi = get_fuse_inode(inode);
struct backing_dev_info *bdi = inode->i_mapping->backing_dev_info;
+ int i;
list_del(&req->writepages_entry);
- dec_bdi_stat(bdi, BDI_WRITEBACK);
- dec_zone_page_state(req->pages[0], NR_WRITEBACK_TEMP);
- bdi_writeout_inc(bdi);
+ for (i = 0; i < req->num_pages; i++) {
+ dec_bdi_stat(bdi, BDI_WRITEBACK);
+ dec_zone_page_state(req->pages[i], NR_WRITEBACK_TEMP);
+ bdi_writeout_inc(bdi);
+ }
wake_up(&fi->page_waitq);
}
/* Called under fc->lock, may release and reacquire it */
-static void fuse_send_writepage(struct fuse_conn *fc, struct fuse_req *req)
+static void fuse_send_writepage(struct fuse_conn *fc, struct fuse_req *req,
+ loff_t size)
__releases(fc->lock)
__acquires(fc->lock)
{
struct fuse_inode *fi = get_fuse_inode(req->inode);
- loff_t size = i_size_read(req->inode);
struct fuse_write_in *inarg = &req->misc.write.in;
+ __u64 data_size = req->num_pages * PAGE_CACHE_SIZE;
if (!fc->connected)
goto out_free;
- if (inarg->offset + PAGE_CACHE_SIZE <= size) {
- inarg->size = PAGE_CACHE_SIZE;
+ if (inarg->offset + data_size <= size) {
+ inarg->size = data_size;
} else if (inarg->offset < size) {
- inarg->size = size & (PAGE_CACHE_SIZE - 1);
+ inarg->size = size - inarg->offset;
} else {
/* Got truncated off completely */
goto out_free;
@@ -1472,12 +1482,13 @@ __acquires(fc->lock)
{
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_inode *fi = get_fuse_inode(inode);
+ size_t crop = i_size_read(inode);
struct fuse_req *req;
while (fi->writectr >= 0 && !list_empty(&fi->queued_writes)) {
req = list_entry(fi->queued_writes.next, struct fuse_req, list);
list_del_init(&req->list);
- fuse_send_writepage(fc, req);
+ fuse_send_writepage(fc, req, crop);
}
}
@@ -1488,12 +1499,62 @@ static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_req *req)
mapping_set_error(inode->i_mapping, req->out.h.error);
spin_lock(&fc->lock);
+ while (req->misc.write.next) {
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_write_in *inarg = &req->misc.write.in;
+ struct fuse_req *next = req->misc.write.next;
+ req->misc.write.next = next->misc.write.next;
+ next->misc.write.next = NULL;
+ next->ff = fuse_file_get(req->ff);
+ list_add(&next->writepages_entry, &fi->writepages);
+
+ /*
+ * Skip fuse_flush_writepages() to make it easy to crop requests
+ * based on primary request size.
+ *
+ * 1st case (trivial): there are no concurrent activities using
+ * fuse_set/release_nowrite. Then we're on safe side because
+ * fuse_flush_writepages() would call fuse_send_writepage()
+ * anyway.
+ *
+ * 2nd case: someone called fuse_set_nowrite and it is waiting
+ * now for completion of all in-flight requests. This happens
+ * rarely and no more than once per page, so this should be
+ * okay.
+ *
+ * 3rd case: someone (e.g. fuse_do_setattr()) is in the middle
+ * of fuse_set_nowrite..fuse_release_nowrite section. The fact
+ * that fuse_set_nowrite returned implies that all in-flight
+ * requests were completed along with all of their secondary
+ * requests. Further primary requests are blocked by negative
+ * writectr. Hence there cannot be any in-flight requests and
+ * no invocations of fuse_writepage_end() while we're in
+ * fuse_set_nowrite..fuse_release_nowrite section.
+ */
+ fuse_send_writepage(fc, next, inarg->offset + inarg->size);
+ }
fi->writectr--;
fuse_writepage_finish(fc, req);
spin_unlock(&fc->lock);
fuse_writepage_free(fc, req);
}
+static struct fuse_file *fuse_write_file_get(struct fuse_conn *fc,
+ struct fuse_inode *fi)
+{
+ struct fuse_file *ff = NULL;
+
+ spin_lock(&fc->lock);
+ if (!WARN_ON(list_empty(&fi->write_files))) {
+ ff = list_entry(fi->write_files.next, struct fuse_file,
+ write_entry);
+ fuse_file_get(ff);
+ }
+ spin_unlock(&fc->lock);
+
+ return ff;
+}
+
static int fuse_writepage_locked(struct page *page)
{
struct address_space *mapping = page->mapping;
@@ -1501,8 +1562,8 @@ static int fuse_writepage_locked(struct page *page)
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_inode *fi = get_fuse_inode(inode);
struct fuse_req *req;
- struct fuse_file *ff;
struct page *tmp_page;
+ int error = -ENOMEM;
set_page_writeback(page);
@@ -1515,16 +1576,16 @@ static int fuse_writepage_locked(struct page *page)
if (!tmp_page)
goto err_free;
- spin_lock(&fc->lock);
- BUG_ON(list_empty(&fi->write_files));
- ff = list_entry(fi->write_files.next, struct fuse_file, write_entry);
- req->ff = fuse_file_get(ff);
- spin_unlock(&fc->lock);
+ error = -EIO;
+ req->ff = fuse_write_file_get(fc, fi);
+ if (!req->ff)
+ goto err_free;
- fuse_write_fill(req, ff, page_offset(page), 0);
+ fuse_write_fill(req, req->ff, page_offset(page), 0);
copy_highpage(tmp_page, page);
req->misc.write.in.write_flags |= FUSE_WRITE_CACHE;
+ req->misc.write.next = NULL;
req->in.argpages = 1;
req->num_pages = 1;
req->pages[0] = tmp_page;
@@ -1550,19 +1611,263 @@ err_free:
fuse_request_free(req);
err:
end_page_writeback(page);
- return -ENOMEM;
+ return error;
}
static int fuse_writepage(struct page *page, struct writeback_control *wbc)
{
int err;
+ if (fuse_page_is_writeback(page->mapping->host, page->index)) {
+ /*
+ * ->writepages() should be called for sync() and friends. We
+ * should only get here on direct reclaim and then we are
+ * allowed to skip a page which is already in flight
+ */
+ WARN_ON(wbc->sync_mode == WB_SYNC_ALL);
+
+ redirty_page_for_writepage(wbc, page);
+ return 0;
+ }
+
err = fuse_writepage_locked(page);
unlock_page(page);
return err;
}
+struct fuse_fill_wb_data {
+ struct fuse_req *req;
+ struct fuse_file *ff;
+ struct inode *inode;
+ struct page **orig_pages;
+};
+
+static void fuse_writepages_send(struct fuse_fill_wb_data *data)
+{
+ struct fuse_req *req = data->req;
+ struct inode *inode = data->inode;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_inode *fi = get_fuse_inode(inode);
+ int num_pages = req->num_pages;
+ int i;
+
+ req->ff = fuse_file_get(data->ff);
+ spin_lock(&fc->lock);
+ list_add_tail(&req->list, &fi->queued_writes);
+ fuse_flush_writepages(inode);
+ spin_unlock(&fc->lock);
+
+ for (i = 0; i < num_pages; i++)
+ end_page_writeback(data->orig_pages[i]);
+}
+
+static bool fuse_writepage_in_flight(struct fuse_req *new_req,
+ struct page *page)
+{
+ struct fuse_conn *fc = get_fuse_conn(new_req->inode);
+ struct fuse_inode *fi = get_fuse_inode(new_req->inode);
+ struct fuse_req *tmp;
+ struct fuse_req *old_req;
+ bool found = false;
+ pgoff_t curr_index;
+
+ BUG_ON(new_req->num_pages != 0);
+
+ spin_lock(&fc->lock);
+ list_del(&new_req->writepages_entry);
+ list_for_each_entry(old_req, &fi->writepages, writepages_entry) {
+ BUG_ON(old_req->inode != new_req->inode);
+ curr_index = old_req->misc.write.in.offset >> PAGE_CACHE_SHIFT;
+ if (curr_index <= page->index &&
+ page->index < curr_index + old_req->num_pages) {
+ found = true;
+ break;
+ }
+ }
+ if (!found) {
+ list_add(&new_req->writepages_entry, &fi->writepages);
+ goto out_unlock;
+ }
+
+ new_req->num_pages = 1;
+ for (tmp = old_req; tmp != NULL; tmp = tmp->misc.write.next) {
+ BUG_ON(tmp->inode != new_req->inode);
+ curr_index = tmp->misc.write.in.offset >> PAGE_CACHE_SHIFT;
+ if (tmp->num_pages == 1 &&
+ curr_index == page->index) {
+ old_req = tmp;
+ }
+ }
+
+ if (old_req->num_pages == 1 && (old_req->state == FUSE_REQ_INIT ||
+ old_req->state == FUSE_REQ_PENDING)) {
+ struct backing_dev_info *bdi = page->mapping->backing_dev_info;
+
+ copy_highpage(old_req->pages[0], page);
+ spin_unlock(&fc->lock);
+
+ dec_bdi_stat(bdi, BDI_WRITEBACK);
+ dec_zone_page_state(page, NR_WRITEBACK_TEMP);
+ bdi_writeout_inc(bdi);
+ fuse_writepage_free(fc, new_req);
+ fuse_request_free(new_req);
+ goto out;
+ } else {
+ new_req->misc.write.next = old_req->misc.write.next;
+ old_req->misc.write.next = new_req;
+ }
+out_unlock:
+ spin_unlock(&fc->lock);
+out:
+ return found;
+}
+
+static int fuse_writepages_fill(struct page *page,
+ struct writeback_control *wbc, void *_data)
+{
+ struct fuse_fill_wb_data *data = _data;
+ struct fuse_req *req = data->req;
+ struct inode *inode = data->inode;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct page *tmp_page;
+ bool is_writeback;
+ int err;
+
+ if (!data->ff) {
+ err = -EIO;
+ data->ff = fuse_write_file_get(fc, get_fuse_inode(inode));
+ if (!data->ff)
+ goto out_unlock;
+ }
+
+ /*
+ * Being under writeback is unlikely but possible. For example direct
+ * read to an mmaped fuse file will set the page dirty twice; once when
+ * the pages are faulted with get_user_pages(), and then after the read
+ * completed.
+ */
+ is_writeback = fuse_page_is_writeback(inode, page->index);
+
+ if (req && req->num_pages &&
+ (is_writeback || req->num_pages == FUSE_MAX_PAGES_PER_REQ ||
+ (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_write ||
+ data->orig_pages[req->num_pages - 1]->index + 1 != page->index)) {
+ fuse_writepages_send(data);
+ data->req = NULL;
+ }
+ err = -ENOMEM;
+ tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
+ if (!tmp_page)
+ goto out_unlock;
+
+ /*
+ * The page must not be redirtied until the writeout is completed
+ * (i.e. userspace has sent a reply to the write request). Otherwise
+ * there could be more than one temporary page instance for each real
+ * page.
+ *
+ * This is ensured by holding the page lock in page_mkwrite() while
+ * checking fuse_page_is_writeback(). We already hold the page lock
+ * since clear_page_dirty_for_io() and keep it held until we add the
+ * request to the fi->writepages list and increment req->num_pages.
+ * After this fuse_page_is_writeback() will indicate that the page is
+ * under writeback, so we can release the page lock.
+ */
+ if (data->req == NULL) {
+ struct fuse_inode *fi = get_fuse_inode(inode);
+
+ err = -ENOMEM;
+ req = fuse_request_alloc_nofs(FUSE_MAX_PAGES_PER_REQ);
+ if (!req) {
+ __free_page(tmp_page);
+ goto out_unlock;
+ }
+
+ fuse_write_fill(req, data->ff, page_offset(page), 0);
+ req->misc.write.in.write_flags |= FUSE_WRITE_CACHE;
+ req->misc.write.next = NULL;
+ req->in.argpages = 1;
+ req->background = 1;
+ req->num_pages = 0;
+ req->end = fuse_writepage_end;
+ req->inode = inode;
+
+ spin_lock(&fc->lock);
+ list_add(&req->writepages_entry, &fi->writepages);
+ spin_unlock(&fc->lock);
+
+ data->req = req;
+ }
+ set_page_writeback(page);
+
+ copy_highpage(tmp_page, page);
+ req->pages[req->num_pages] = tmp_page;
+ req->page_descs[req->num_pages].offset = 0;
+ req->page_descs[req->num_pages].length = PAGE_SIZE;
+
+ inc_bdi_stat(page->mapping->backing_dev_info, BDI_WRITEBACK);
+ inc_zone_page_state(tmp_page, NR_WRITEBACK_TEMP);
+
+ err = 0;
+ if (is_writeback && fuse_writepage_in_flight(req, page)) {
+ end_page_writeback(page);
+ data->req = NULL;
+ goto out_unlock;
+ }
+ data->orig_pages[req->num_pages] = page;
+
+ /*
+ * Protected by fc->lock against concurrent access by
+ * fuse_page_is_writeback().
+ */
+ spin_lock(&fc->lock);
+ req->num_pages++;
+ spin_unlock(&fc->lock);
+
+out_unlock:
+ unlock_page(page);
+
+ return err;
+}
+
+static int fuse_writepages(struct address_space *mapping,
+ struct writeback_control *wbc)
+{
+ struct inode *inode = mapping->host;
+ struct fuse_fill_wb_data data;
+ int err;
+
+ err = -EIO;
+ if (is_bad_inode(inode))
+ goto out;
+
+ data.inode = inode;
+ data.req = NULL;
+ data.ff = NULL;
+
+ err = -ENOMEM;
+ data.orig_pages = kzalloc(sizeof(struct page *) *
+ FUSE_MAX_PAGES_PER_REQ,
+ GFP_NOFS);
+ if (!data.orig_pages)
+ goto out;
+
+ err = write_cache_pages(mapping, wbc, fuse_writepages_fill, &data);
+ if (data.req) {
+ /* Ignore errors if we can write at least one page */
+ BUG_ON(!data.req->num_pages);
+ fuse_writepages_send(&data);
+ err = 0;
+ }
+ if (data.ff)
+ fuse_file_put(data.ff, false);
+
+ kfree(data.orig_pages);
+out:
+ return err;
+}
+
static int fuse_launder_page(struct page *page)
{
int err = 0;
@@ -1602,14 +1907,17 @@ static void fuse_vma_close(struct vm_area_struct *vma)
static int fuse_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
{
struct page *page = vmf->page;
- /*
- * Don't use page->mapping as it may become NULL from a
- * concurrent truncate.
- */
- struct inode *inode = vma->vm_file->f_mapping->host;
+ struct inode *inode = file_inode(vma->vm_file);
+
+ file_update_time(vma->vm_file);
+ lock_page(page);
+ if (page->mapping != inode->i_mapping) {
+ unlock_page(page);
+ return VM_FAULT_NOPAGE;
+ }
fuse_wait_on_page_writeback(inode, page->index);
- return 0;
+ return VM_FAULT_LOCKED;
}
static const struct vm_operations_struct fuse_file_vm_ops = {
@@ -2581,6 +2889,7 @@ static const struct file_operations fuse_direct_io_file_operations = {
static const struct address_space_operations fuse_file_aops = {
.readpage = fuse_readpage,
.writepage = fuse_writepage,
+ .writepages = fuse_writepages,
.launder_page = fuse_launder_page,
.readpages = fuse_readpages,
.set_page_dirty = __set_page_dirty_nobuffers,
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 5b9e6f3b6aef..643274852c8b 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -321,6 +321,7 @@ struct fuse_req {
struct {
struct fuse_write_in in;
struct fuse_write_out out;
+ struct fuse_req *next;
} write;
struct fuse_notify_retrieve_in retrieve_in;
struct fuse_lk_in lk_in;
diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h
index cb358355ef43..f7eaf2d60083 100644
--- a/include/linux/miscdevice.h
+++ b/include/linux/miscdevice.h
@@ -31,6 +31,7 @@
#define I2O_MINOR 166
#define MICROCODE_MINOR 184
#define TUN_MINOR 200
+#define CUSE_MINOR 203
#define MWAVE_MINOR 219 /* ACP/Mwave Modem */
#define MPT_MINOR 220
#define MPT2SAS_MINOR 221