summaryrefslogtreecommitdiffstats
path: root/fs/logfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/logfs')
-rw-r--r--fs/logfs/dev_bdev.c16
-rw-r--r--fs/logfs/dev_mtd.c26
-rw-r--r--fs/logfs/dir.c8
-rw-r--r--fs/logfs/file.c16
-rw-r--r--fs/logfs/gc.c58
-rw-r--r--fs/logfs/inode.c7
-rw-r--r--fs/logfs/journal.c44
-rw-r--r--fs/logfs/logfs.h31
-rw-r--r--fs/logfs/logfs_abi.h10
-rw-r--r--fs/logfs/readwrite.c106
-rw-r--r--fs/logfs/segment.c70
-rw-r--r--fs/logfs/super.c45
12 files changed, 281 insertions, 156 deletions
diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c
index 9718c22f186d..9bd2ce2a3040 100644
--- a/fs/logfs/dev_bdev.c
+++ b/fs/logfs/dev_bdev.c
@@ -9,6 +9,7 @@
#include <linux/bio.h>
#include <linux/blkdev.h>
#include <linux/buffer_head.h>
+#include <linux/gfp.h>
#define PAGE_OFS(ofs) ((ofs) & (PAGE_SIZE-1))
@@ -80,6 +81,7 @@ static void writeseg_end_io(struct bio *bio, int err)
prefetchw(&bvec->bv_page->flags);
end_page_writeback(page);
+ page_cache_release(page);
} while (bvec >= bio->bi_io_vec);
bio_put(bio);
if (atomic_dec_and_test(&super->s_pending_writes))
@@ -97,8 +99,10 @@ static int __bdev_writeseg(struct super_block *sb, u64 ofs, pgoff_t index,
unsigned int max_pages = queue_max_hw_sectors(q) >> (PAGE_SHIFT - 9);
int i;
+ if (max_pages > BIO_MAX_PAGES)
+ max_pages = BIO_MAX_PAGES;
bio = bio_alloc(GFP_NOFS, max_pages);
- BUG_ON(!bio); /* FIXME: handle this */
+ BUG_ON(!bio);
for (i = 0; i < nr_pages; i++) {
if (i >= max_pages) {
@@ -191,8 +195,10 @@ static int do_erase(struct super_block *sb, u64 ofs, pgoff_t index,
unsigned int max_pages = queue_max_hw_sectors(q) >> (PAGE_SHIFT - 9);
int i;
+ if (max_pages > BIO_MAX_PAGES)
+ max_pages = BIO_MAX_PAGES;
bio = bio_alloc(GFP_NOFS, max_pages);
- BUG_ON(!bio); /* FIXME: handle this */
+ BUG_ON(!bio);
for (i = 0; i < nr_pages; i++) {
if (i >= max_pages) {
@@ -297,6 +303,11 @@ static void bdev_put_device(struct super_block *sb)
close_bdev_exclusive(logfs_super(sb)->s_bdev, FMODE_READ|FMODE_WRITE);
}
+static int bdev_can_write_buf(struct super_block *sb, u64 ofs)
+{
+ return 0;
+}
+
static const struct logfs_device_ops bd_devops = {
.find_first_sb = bdev_find_first_sb,
.find_last_sb = bdev_find_last_sb,
@@ -304,6 +315,7 @@ static const struct logfs_device_ops bd_devops = {
.readpage = bdev_readpage,
.writeseg = bdev_writeseg,
.erase = bdev_erase,
+ .can_write_buf = bdev_can_write_buf,
.sync = bdev_sync,
.put_device = bdev_put_device,
};
diff --git a/fs/logfs/dev_mtd.c b/fs/logfs/dev_mtd.c
index cafb6ef2e05b..a85d47d13e4b 100644
--- a/fs/logfs/dev_mtd.c
+++ b/fs/logfs/dev_mtd.c
@@ -9,6 +9,7 @@
#include <linux/completion.h>
#include <linux/mount.h>
#include <linux/sched.h>
+#include <linux/slab.h>
#define PAGE_OFS(ofs) ((ofs) & (PAGE_SIZE-1))
@@ -126,7 +127,8 @@ static int mtd_readpage(void *_sb, struct page *page)
err = mtd_read(sb, page->index << PAGE_SHIFT, PAGE_SIZE,
page_address(page));
- if (err == -EUCLEAN) {
+ if (err == -EUCLEAN || err == -EBADMSG) {
+ /* -EBADMSG happens regularly on power failures */
err = 0;
/* FIXME: force GC this segment */
}
@@ -233,12 +235,32 @@ static void mtd_put_device(struct super_block *sb)
put_mtd_device(logfs_super(sb)->s_mtd);
}
+static int mtd_can_write_buf(struct super_block *sb, u64 ofs)
+{
+ struct logfs_super *super = logfs_super(sb);
+ void *buf;
+ int err;
+
+ buf = kmalloc(super->s_writesize, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+ err = mtd_read(sb, ofs, super->s_writesize, buf);
+ if (err)
+ goto out;
+ if (memchr_inv(buf, 0xff, super->s_writesize))
+ err = -EIO;
+ kfree(buf);
+out:
+ return err;
+}
+
static const struct logfs_device_ops mtd_devops = {
.find_first_sb = mtd_find_first_sb,
.find_last_sb = mtd_find_last_sb,
.readpage = mtd_readpage,
.writeseg = mtd_writeseg,
.erase = mtd_erase,
+ .can_write_buf = mtd_can_write_buf,
.sync = mtd_sync,
.put_device = mtd_put_device,
};
@@ -250,5 +272,7 @@ int logfs_get_sb_mtd(struct file_system_type *type, int flags,
const struct logfs_device_ops *devops = &mtd_devops;
mtd = get_mtd_device(NULL, mtdnr);
+ if (IS_ERR(mtd))
+ return PTR_ERR(mtd);
return logfs_get_sb_device(type, flags, mtd, NULL, devops, mnt);
}
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c
index 56a8bfbb0120..72d1893ddd36 100644
--- a/fs/logfs/dir.c
+++ b/fs/logfs/dir.c
@@ -6,13 +6,13 @@
* Copyright (c) 2005-2008 Joern Engel <joern@logfs.org>
*/
#include "logfs.h"
-
+#include <linux/slab.h>
/*
* Atomic dir operations
*
* Directory operations are by default not atomic. Dentries and Inodes are
- * created/removed/altered in seperate operations. Therefore we need to do
+ * created/removed/altered in separate operations. Therefore we need to do
* a small amount of journaling.
*
* Create, link, mkdir, mknod and symlink all share the same function to do
@@ -303,12 +303,12 @@ static int __logfs_readdir(struct file *file, void *buf, filldir_t filldir)
(filler_t *)logfs_readpage, NULL);
if (IS_ERR(page))
return PTR_ERR(page);
- dd = kmap_atomic(page, KM_USER0);
+ dd = kmap(page);
BUG_ON(dd->namelen == 0);
full = filldir(buf, (char *)dd->name, be16_to_cpu(dd->namelen),
pos, be64_to_cpu(dd->ino), dd->type);
- kunmap_atomic(dd, KM_USER0);
+ kunmap(page);
page_cache_release(page);
if (full)
break;
diff --git a/fs/logfs/file.c b/fs/logfs/file.c
index 370f367a933e..0de524071870 100644
--- a/fs/logfs/file.c
+++ b/fs/logfs/file.c
@@ -161,7 +161,17 @@ static int logfs_writepage(struct page *page, struct writeback_control *wbc)
static void logfs_invalidatepage(struct page *page, unsigned long offset)
{
- move_page_to_btree(page);
+ struct logfs_block *block = logfs_block(page);
+
+ if (block->reserved_bytes) {
+ struct super_block *sb = page->mapping->host->i_sb;
+ struct logfs_super *super = logfs_super(sb);
+
+ super->s_dirty_pages -= block->reserved_bytes;
+ block->ops->free_block(sb, block);
+ BUG_ON(bitmap_weight(block->alias_map, LOGFS_BLOCK_FACTOR));
+ } else
+ move_page_to_btree(page);
BUG_ON(PagePrivate(page) || page->private);
}
@@ -212,10 +222,8 @@ int logfs_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
int logfs_fsync(struct file *file, struct dentry *dentry, int datasync)
{
struct super_block *sb = dentry->d_inode->i_sb;
- struct logfs_super *super = logfs_super(sb);
- /* FIXME: write anchor */
- super->s_devops->sync(sb);
+ logfs_write_anchor(sb);
return 0;
}
diff --git a/fs/logfs/gc.c b/fs/logfs/gc.c
index 92949f95a901..caa4419285dc 100644
--- a/fs/logfs/gc.c
+++ b/fs/logfs/gc.c
@@ -7,6 +7,7 @@
*/
#include "logfs.h"
#include <linux/sched.h>
+#include <linux/slab.h>
/*
* Wear leveling needs to kick in when the difference between low erase
@@ -121,7 +122,7 @@ static void logfs_cleanse_block(struct super_block *sb, u64 ofs, u64 ino,
logfs_safe_iput(inode, cookie);
}
-static u32 logfs_gc_segment(struct super_block *sb, u32 segno, u8 dist)
+static u32 logfs_gc_segment(struct super_block *sb, u32 segno)
{
struct logfs_super *super = logfs_super(sb);
struct logfs_segment_header sh;
@@ -400,7 +401,7 @@ static int __logfs_gc_once(struct super_block *sb, struct gc_candidate *cand)
segno, (u64)segno << super->s_segshift,
dist, no_free_segments(sb), valid,
super->s_free_bytes);
- cleaned = logfs_gc_segment(sb, segno, dist);
+ cleaned = logfs_gc_segment(sb, segno);
log_gc("GC segment #%02x complete - now %x valid\n", segno,
valid - cleaned);
BUG_ON(cleaned != valid);
@@ -458,6 +459,14 @@ static void __logfs_gc_pass(struct super_block *sb, int target)
struct logfs_block *block;
int round, progress, last_progress = 0;
+ /*
+ * Doing too many changes to the segfile at once would result
+ * in a large number of aliases. Write the journal before
+ * things get out of hand.
+ */
+ if (super->s_shadow_tree.no_shadowed_segments >= MAX_OBJ_ALIASES)
+ logfs_write_anchor(sb);
+
if (no_free_segments(sb) >= target &&
super->s_no_object_aliases < MAX_OBJ_ALIASES)
return;
@@ -623,38 +632,31 @@ static int check_area(struct super_block *sb, int i)
{
struct logfs_super *super = logfs_super(sb);
struct logfs_area *area = super->s_area[i];
- struct logfs_object_header oh;
+ gc_level_t gc_level;
+ u32 cleaned, valid, ec;
u32 segno = area->a_segno;
- u32 ofs = area->a_used_bytes;
- __be32 crc;
- int err;
+ u64 ofs = dev_ofs(sb, area->a_segno, area->a_written_bytes);
if (!area->a_is_open)
return 0;
- for (ofs = area->a_used_bytes;
- ofs <= super->s_segsize - sizeof(oh);
- ofs += (u32)be16_to_cpu(oh.len) + sizeof(oh)) {
- err = wbuf_read(sb, dev_ofs(sb, segno, ofs), sizeof(oh), &oh);
- if (err)
- return err;
-
- if (!memchr_inv(&oh, 0xff, sizeof(oh)))
- break;
+ if (super->s_devops->can_write_buf(sb, ofs) == 0)
+ return 0;
- crc = logfs_crc32(&oh, sizeof(oh) - 4, 4);
- if (crc != oh.crc) {
- printk(KERN_INFO "interrupted header at %llx\n",
- dev_ofs(sb, segno, ofs));
- return 0;
- }
- }
- if (ofs != area->a_used_bytes) {
- printk(KERN_INFO "%x bytes unaccounted data found at %llx\n",
- ofs - area->a_used_bytes,
- dev_ofs(sb, segno, area->a_used_bytes));
- area->a_used_bytes = ofs;
- }
+ printk(KERN_INFO"LogFS: Possibly incomplete write at %llx\n", ofs);
+ /*
+ * The device cannot write back the write buffer. Most likely the
+ * wbuf was already written out and the system crashed at some point
+ * before the journal commit happened. In that case we wouldn't have
+ * to do anything. But if the crash happened before the wbuf was
+ * written out correctly, we must GC this segment. So assume the
+ * worst and always do the GC run.
+ */
+ area->a_is_open = 0;
+ valid = logfs_valid_bytes(sb, segno, &ec, &gc_level);
+ cleaned = logfs_gc_segment(sb, segno);
+ if (cleaned != valid)
+ return -EIO;
return 0;
}
diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c
index 33ec1aeaeec4..755a92e8daa7 100644
--- a/fs/logfs/inode.c
+++ b/fs/logfs/inode.c
@@ -6,6 +6,7 @@
* Copyright (c) 2005-2008 Joern Engel <joern@logfs.org>
*/
#include "logfs.h"
+#include <linux/slab.h>
#include <linux/writeback.h>
#include <linux/backing-dev.h>
@@ -192,6 +193,7 @@ static void logfs_init_inode(struct super_block *sb, struct inode *inode)
inode->i_ctime = CURRENT_TIME;
inode->i_mtime = CURRENT_TIME;
inode->i_nlink = 1;
+ li->li_refcount = 1;
INIT_LIST_HEAD(&li->li_freeing_list);
for (i = 0; i < LOGFS_EMBEDDED_FIELDS; i++)
@@ -325,7 +327,7 @@ static void logfs_set_ino_generation(struct super_block *sb,
u64 ino;
mutex_lock(&super->s_journal_mutex);
- ino = logfs_seek_hole(super->s_master_inode, super->s_last_ino);
+ ino = logfs_seek_hole(super->s_master_inode, super->s_last_ino + 1);
super->s_last_ino = ino;
super->s_inos_till_wrap--;
if (super->s_inos_till_wrap < 0) {
@@ -385,8 +387,7 @@ static void logfs_init_once(void *_li)
static int logfs_sync_fs(struct super_block *sb, int wait)
{
- /* FIXME: write anchor */
- logfs_super(sb)->s_devops->sync(sb);
+ logfs_write_anchor(sb);
return 0;
}
diff --git a/fs/logfs/journal.c b/fs/logfs/journal.c
index 6ad30a4c9052..4b0e0616b357 100644
--- a/fs/logfs/journal.c
+++ b/fs/logfs/journal.c
@@ -6,6 +6,7 @@
* Copyright (c) 2005-2008 Joern Engel <joern@logfs.org>
*/
#include "logfs.h"
+#include <linux/slab.h>
static void logfs_calc_free(struct super_block *sb)
{
@@ -131,10 +132,9 @@ static int read_area(struct super_block *sb, struct logfs_je_area *a)
ofs = dev_ofs(sb, area->a_segno, area->a_written_bytes);
if (super->s_writesize > 1)
- logfs_buf_recover(area, ofs, a + 1, super->s_writesize);
+ return logfs_buf_recover(area, ofs, a + 1, super->s_writesize);
else
- logfs_buf_recover(area, ofs, NULL, 0);
- return 0;
+ return logfs_buf_recover(area, ofs, NULL, 0);
}
static void *unpack(void *from, void *to)
@@ -244,7 +244,7 @@ static int read_je(struct super_block *sb, u64 ofs)
read_erasecount(sb, unpack(jh, scratch));
break;
case JE_AREA:
- read_area(sb, unpack(jh, scratch));
+ err = read_area(sb, unpack(jh, scratch));
break;
case JE_OBJ_ALIAS:
err = logfs_load_object_aliases(sb, unpack(jh, scratch),
@@ -388,7 +388,10 @@ static void journal_get_erase_count(struct logfs_area *area)
static int journal_erase_segment(struct logfs_area *area)
{
struct super_block *sb = area->a_sb;
- struct logfs_segment_header sh;
+ union {
+ struct logfs_segment_header sh;
+ unsigned char c[ALIGN(sizeof(struct logfs_segment_header), 16)];
+ } u;
u64 ofs;
int err;
@@ -396,20 +399,21 @@ static int journal_erase_segment(struct logfs_area *area)
if (err)
return err;
- sh.pad = 0;
- sh.type = SEG_JOURNAL;
- sh.level = 0;
- sh.segno = cpu_to_be32(area->a_segno);
- sh.ec = cpu_to_be32(area->a_erase_count);
- sh.gec = cpu_to_be64(logfs_super(sb)->s_gec);
- sh.crc = logfs_crc32(&sh, sizeof(sh), 4);
+ memset(&u, 0, sizeof(u));
+ u.sh.pad = 0;
+ u.sh.type = SEG_JOURNAL;
+ u.sh.level = 0;
+ u.sh.segno = cpu_to_be32(area->a_segno);
+ u.sh.ec = cpu_to_be32(area->a_erase_count);
+ u.sh.gec = cpu_to_be64(logfs_super(sb)->s_gec);
+ u.sh.crc = logfs_crc32(&u.sh, sizeof(u.sh), 4);
/* This causes a bug in segment.c. Not yet. */
//logfs_set_segment_erased(sb, area->a_segno, area->a_erase_count, 0);
ofs = dev_ofs(sb, area->a_segno, 0);
- area->a_used_bytes = ALIGN(sizeof(sh), 16);
- logfs_buf_write(area, ofs, &sh, sizeof(sh));
+ area->a_used_bytes = sizeof(u);
+ logfs_buf_write(area, ofs, &u, sizeof(u));
return 0;
}
@@ -493,6 +497,8 @@ static void account_shadows(struct super_block *sb)
btree_grim_visitor64(&tree->new, (unsigned long)sb, account_shadow);
btree_grim_visitor64(&tree->old, (unsigned long)sb, account_shadow);
+ btree_grim_visitor32(&tree->segment_map, 0, NULL);
+ tree->no_shadowed_segments = 0;
if (li->li_block) {
/*
@@ -606,9 +612,9 @@ static size_t __logfs_write_je(struct super_block *sb, void *buf, u16 type,
if (len == 0)
return logfs_write_header(super, header, 0, type);
+ BUG_ON(len > sb->s_blocksize);
compr_len = logfs_compress(buf, data, len, sb->s_blocksize);
if (compr_len < 0 || type == JE_ANCHOR) {
- BUG_ON(len > sb->s_blocksize);
memcpy(data, buf, len);
compr_len = len;
compr = COMPR_NONE;
@@ -660,6 +666,7 @@ static int logfs_write_je_buf(struct super_block *sb, void *buf, u16 type,
if (ofs < 0)
return ofs;
logfs_buf_write(area, ofs, super->s_compressed_je, len);
+ BUG_ON(super->s_no_je >= MAX_JOURNAL_ENTRIES);
super->s_je_array[super->s_no_je++] = cpu_to_be64(ofs);
return 0;
}
@@ -800,6 +807,7 @@ void do_logfs_journal_wl_pass(struct super_block *sb)
{
struct logfs_super *super = logfs_super(sb);
struct logfs_area *area = super->s_journal_area;
+ struct btree_head32 *head = &super->s_reserved_segments;
u32 segno, ec;
int i, err;
@@ -807,6 +815,7 @@ void do_logfs_journal_wl_pass(struct super_block *sb)
/* Drop old segments */
journal_for_each(i)
if (super->s_journal_seg[i]) {
+ btree_remove32(head, super->s_journal_seg[i]);
logfs_set_segment_unreserved(sb,
super->s_journal_seg[i],
super->s_journal_ec[i]);
@@ -819,8 +828,13 @@ void do_logfs_journal_wl_pass(struct super_block *sb)
super->s_journal_seg[i] = segno;
super->s_journal_ec[i] = ec;
logfs_set_segment_reserved(sb, segno);
+ err = btree_insert32(head, segno, (void *)1, GFP_KERNEL);
+ BUG_ON(err); /* mempool should prevent this */
+ err = logfs_erase_segment(sb, segno, 1);
+ BUG_ON(err); /* FIXME: remount-ro would be nicer */
}
/* Manually move journal_area */
+ freeseg(sb, area->a_segno);
area->a_segno = super->s_journal_seg[0];
area->a_is_open = 0;
area->a_used_bytes = 0;
diff --git a/fs/logfs/logfs.h b/fs/logfs/logfs.h
index 129779431373..1a9db84f8d8f 100644
--- a/fs/logfs/logfs.h
+++ b/fs/logfs/logfs.h
@@ -144,6 +144,7 @@ struct logfs_area_ops {
* @erase: erase one segment
* @read: read from the device
* @erase: erase part of the device
+ * @can_write_buf: decide whether wbuf can be written to ofs
*/
struct logfs_device_ops {
struct page *(*find_first_sb)(struct super_block *sb, u64 *ofs);
@@ -153,6 +154,7 @@ struct logfs_device_ops {
void (*writeseg)(struct super_block *sb, u64 ofs, size_t len);
int (*erase)(struct super_block *sb, loff_t ofs, size_t len,
int ensure_write);
+ int (*can_write_buf)(struct super_block *sb, u64 ofs);
void (*sync)(struct super_block *sb);
void (*put_device)(struct super_block *sb);
};
@@ -257,10 +259,14 @@ struct logfs_shadow {
* struct shadow_tree
* @new: shadows where old_ofs==0, indexed by new_ofs
* @old: shadows where old_ofs!=0, indexed by old_ofs
+ * @segment_map: bitfield of segments containing shadows
+ * @no_shadowed_segment: number of segments containing shadows
*/
struct shadow_tree {
struct btree_head64 new;
struct btree_head64 old;
+ struct btree_head32 segment_map;
+ int no_shadowed_segments;
};
struct object_alias_item {
@@ -305,13 +311,14 @@ typedef int write_alias_t(struct super_block *sb, u64 ino, u64 bix,
level_t level, int child_no, __be64 val);
struct logfs_block_ops {
void (*write_block)(struct logfs_block *block);
- gc_level_t (*block_level)(struct logfs_block *block);
void (*free_block)(struct super_block *sb, struct logfs_block*block);
int (*write_alias)(struct super_block *sb,
struct logfs_block *block,
write_alias_t *write_one_alias);
};
+#define MAX_JOURNAL_ENTRIES 256
+
struct logfs_super {
struct mtd_info *s_mtd; /* underlying device */
struct block_device *s_bdev; /* underlying device */
@@ -378,7 +385,7 @@ struct logfs_super {
u32 s_journal_ec[LOGFS_JOURNAL_SEGS]; /* journal erasecounts */
u64 s_last_version;
struct logfs_area *s_journal_area; /* open journal segment */
- __be64 s_je_array[64];
+ __be64 s_je_array[MAX_JOURNAL_ENTRIES];
int s_no_je;
int s_sum_index; /* for the 12 summaries */
@@ -389,6 +396,7 @@ struct logfs_super {
int s_lock_count;
mempool_t *s_block_pool; /* struct logfs_block pool */
mempool_t *s_shadow_pool; /* struct logfs_shadow pool */
+ struct list_head s_writeback_list; /* writeback pages */
/*
* Space accounting:
* - s_used_bytes specifies space used to store valid data objects.
@@ -587,24 +595,25 @@ void move_page_to_btree(struct page *page);
int logfs_init_mapping(struct super_block *sb);
void logfs_sync_area(struct logfs_area *area);
void logfs_sync_segments(struct super_block *sb);
+void freeseg(struct super_block *sb, u32 segno);
/* area handling */
int logfs_init_areas(struct super_block *sb);
void logfs_cleanup_areas(struct super_block *sb);
int logfs_open_area(struct logfs_area *area, size_t bytes);
-void __logfs_buf_write(struct logfs_area *area, u64 ofs, void *buf, size_t len,
+int __logfs_buf_write(struct logfs_area *area, u64 ofs, void *buf, size_t len,
int use_filler);
-static inline void logfs_buf_write(struct logfs_area *area, u64 ofs,
+static inline int logfs_buf_write(struct logfs_area *area, u64 ofs,
void *buf, size_t len)
{
- __logfs_buf_write(area, ofs, buf, len, 0);
+ return __logfs_buf_write(area, ofs, buf, len, 0);
}
-static inline void logfs_buf_recover(struct logfs_area *area, u64 ofs,
+static inline int logfs_buf_recover(struct logfs_area *area, u64 ofs,
void *buf, size_t len)
{
- __logfs_buf_write(area, ofs, buf, len, 1);
+ return __logfs_buf_write(area, ofs, buf, len, 1);
}
/* super.c */
@@ -698,7 +707,7 @@ static inline gc_level_t expand_level(u64 ino, level_t __level)
u8 level = (__force u8)__level;
if (ino == LOGFS_INO_MASTER) {
- /* ifile has seperate areas */
+ /* ifile has separate areas */
level += LOGFS_MAX_LEVELS;
}
return (__force gc_level_t)level;
@@ -721,4 +730,10 @@ static inline struct logfs_area *get_area(struct super_block *sb,
return logfs_super(sb)->s_area[(__force u8)gc_level];
}
+static inline void logfs_mempool_destroy(mempool_t *pool)
+{
+ if (pool)
+ mempool_destroy(pool);
+}
+
#endif
diff --git a/fs/logfs/logfs_abi.h b/fs/logfs/logfs_abi.h
index f674725663fe..ae960519c54a 100644
--- a/fs/logfs/logfs_abi.h
+++ b/fs/logfs/logfs_abi.h
@@ -50,9 +50,9 @@ static inline void check_##type(void) \
* 12 - gc recycled blocks, long-lived data
* 13 - replacement blocks, short-lived data
*
- * Levels 1-11 are necessary for robust gc operations and help seperate
+ * Levels 1-11 are necessary for robust gc operations and help separate
* short-lived metadata from longer-lived file data. In the future,
- * file data should get seperated into several segments based on simple
+ * file data should get separated into several segments based on simple
* heuristics. Old data recycled during gc operation is expected to be
* long-lived. New data is of uncertain life expectancy. New data
* used to replace older blocks in existing files is expected to be
@@ -117,7 +117,7 @@ static inline void check_##type(void) \
#define pure_ofs(ofs) (ofs & ~LOGFS_FULLY_POPULATED)
/*
- * LogFS needs to seperate data into levels. Each level is defined as the
+ * LogFS needs to separate data into levels. Each level is defined as the
* maximal possible distance from the master inode (inode of the inode file).
* Data blocks reside on level 0, 1x indirect block on level 1, etc.
* Inodes reside on level 6, indirect blocks for the inode file on levels 7-11.
@@ -204,7 +204,7 @@ SIZE_CHECK(logfs_segment_header, LOGFS_SEGMENT_HEADERSIZE);
* @ds_crc: crc32 of structure starting with the next field
* @ds_ifile_levels: maximum number of levels for ifile
* @ds_iblock_levels: maximum number of levels for regular files
- * @ds_data_levels: number of seperate levels for data
+ * @ds_data_levels: number of separate levels for data
* @pad0: reserved, must be 0
* @ds_feature_incompat: incompatible filesystem features
* @ds_feature_ro_compat: read-only compatible filesystem features
@@ -456,7 +456,7 @@ enum logfs_vim {
* @vim: life expectancy of data
*
* "Areas" are segments currently being used for writing. There is at least
- * one area per GC level. Several may be used to seperate long-living from
+ * one area per GC level. Several may be used to separate long-living from
* short-living data. If an area with unknown vim is encountered, it can
* simply be closed.
* The write buffer immediately follow this header.
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c
index 7a23b3e7c0a7..0718d112a1a5 100644
--- a/fs/logfs/readwrite.c
+++ b/fs/logfs/readwrite.c
@@ -18,6 +18,7 @@
*/
#include "logfs.h"
#include <linux/sched.h>
+#include <linux/slab.h>
static u64 adjust_bix(u64 bix, level_t level)
{
@@ -429,25 +430,6 @@ static void inode_write_block(struct logfs_block *block)
}
}
-static gc_level_t inode_block_level(struct logfs_block *block)
-{
- BUG_ON(block->inode->i_ino == LOGFS_INO_MASTER);
- return GC_LEVEL(LOGFS_MAX_LEVELS);
-}
-
-static gc_level_t indirect_block_level(struct logfs_block *block)
-{
- struct page *page;
- struct inode *inode;
- u64 bix;
- level_t level;
-
- page = block->page;
- inode = page->mapping->host;
- logfs_unpack_index(page->index, &bix, &level);
- return expand_level(inode->i_ino, level);
-}
-
/*
* This silences a false, yet annoying gcc warning. I hate it when my editor
* jumps into bitops.h each time I recompile this file.
@@ -586,14 +568,12 @@ static void indirect_free_block(struct super_block *sb,
static struct logfs_block_ops inode_block_ops = {
.write_block = inode_write_block,
- .block_level = inode_block_level,
.free_block = inode_free_block,
.write_alias = inode_write_alias,
};
struct logfs_block_ops indirect_block_ops = {
.write_block = indirect_write_block,
- .block_level = indirect_block_level,
.free_block = indirect_free_block,
.write_alias = indirect_write_alias,
};
@@ -912,6 +892,8 @@ u64 logfs_seek_hole(struct inode *inode, u64 bix)
return bix;
else if (li->li_data[INDIRECT_INDEX] & LOGFS_FULLY_POPULATED)
bix = maxbix(li->li_height);
+ else if (bix >= maxbix(li->li_height))
+ return bix;
else {
bix = seek_holedata_loop(inode, bix, 0);
if (bix < maxbix(li->li_height))
@@ -1113,17 +1095,25 @@ static int logfs_reserve_bytes(struct inode *inode, int bytes)
int get_page_reserve(struct inode *inode, struct page *page)
{
struct logfs_super *super = logfs_super(inode->i_sb);
+ struct logfs_block *block = logfs_block(page);
int ret;
- if (logfs_block(page) && logfs_block(page)->reserved_bytes)
+ if (block && block->reserved_bytes)
return 0;
logfs_get_wblocks(inode->i_sb, page, WF_LOCK);
- ret = logfs_reserve_bytes(inode, 6 * LOGFS_MAX_OBJECTSIZE);
+ while ((ret = logfs_reserve_bytes(inode, 6 * LOGFS_MAX_OBJECTSIZE)) &&
+ !list_empty(&super->s_writeback_list)) {
+ block = list_entry(super->s_writeback_list.next,
+ struct logfs_block, alias_list);
+ block->ops->write_block(block);
+ }
if (!ret) {
alloc_data_block(inode, page);
- logfs_block(page)->reserved_bytes += 6 * LOGFS_MAX_OBJECTSIZE;
+ block = logfs_block(page);
+ block->reserved_bytes += 6 * LOGFS_MAX_OBJECTSIZE;
super->s_dirty_pages += 6 * LOGFS_MAX_OBJECTSIZE;
+ list_move_tail(&block->alias_list, &super->s_writeback_list);
}
logfs_put_wblocks(inode->i_sb, page, WF_LOCK);
return ret;
@@ -1240,6 +1230,18 @@ static void free_shadow(struct inode *inode, struct logfs_shadow *shadow)
mempool_free(shadow, super->s_shadow_pool);
}
+static void mark_segment(struct shadow_tree *tree, u32 segno)
+{
+ int err;
+
+ if (!btree_lookup32(&tree->segment_map, segno)) {
+ err = btree_insert32(&tree->segment_map, segno, (void *)1,
+ GFP_NOFS);
+ BUG_ON(err);
+ tree->no_shadowed_segments++;
+ }
+}
+
/**
* fill_shadow_tree - Propagate shadow tree changes due to a write
* @inode: Inode owning the page
@@ -1287,6 +1289,8 @@ static void fill_shadow_tree(struct inode *inode, struct page *page,
super->s_dirty_used_bytes += shadow->new_len;
super->s_dirty_free_bytes += shadow->old_len;
+ mark_segment(tree, shadow->old_ofs >> super->s_segshift);
+ mark_segment(tree, shadow->new_ofs >> super->s_segshift);
}
}
@@ -1594,7 +1598,6 @@ int logfs_delete(struct inode *inode, pgoff_t index,
return ret;
}
-/* Rewrite cannot mark the inode dirty but has to write it immediatly. */
int logfs_rewrite_block(struct inode *inode, u64 bix, u64 ofs,
gc_level_t gc_level, long flags)
{
@@ -1611,6 +1614,18 @@ int logfs_rewrite_block(struct inode *inode, u64 bix, u64 ofs,
if (level != 0)
alloc_indirect_block(inode, page, 0);
err = logfs_write_buf(inode, page, flags);
+ if (!err && shrink_level(gc_level) == 0) {
+ /* Rewrite cannot mark the inode dirty but has to
+ * write it immediatly.
+ * Q: Can't we just create an alias for the inode
+ * instead? And if not, why not?
+ */
+ if (inode->i_ino == LOGFS_INO_MASTER)
+ logfs_write_anchor(inode->i_sb);
+ else {
+ err = __logfs_write_inode(inode, flags);
+ }
+ }
}
logfs_put_write_page(page);
return err;
@@ -1833,19 +1848,37 @@ static int __logfs_truncate(struct inode *inode, u64 size)
return logfs_truncate_direct(inode, size);
}
-int logfs_truncate(struct inode *inode, u64 size)
+/*
+ * Truncate, by changing the segment file, can consume a fair amount
+ * of resources. So back off from time to time and do some GC.
+ * 8 or 2048 blocks should be well within safety limits even if
+ * every single block resided in a different segment.
+ */
+#define TRUNCATE_STEP (8 * 1024 * 1024)
+int logfs_truncate(struct inode *inode, u64 target)
{
struct super_block *sb = inode->i_sb;
- int err;
+ u64 size = i_size_read(inode);
+ int err = 0;
- logfs_get_wblocks(sb, NULL, 1);
- err = __logfs_truncate(inode, size);
- if (!err)
- err = __logfs_write_inode(inode, 0);
- logfs_put_wblocks(sb, NULL, 1);
+ size = ALIGN(size, TRUNCATE_STEP);
+ while (size > target) {
+ if (size > TRUNCATE_STEP)
+ size -= TRUNCATE_STEP;
+ else
+ size = 0;
+ if (size < target)
+ size = target;
+
+ logfs_get_wblocks(sb, NULL, 1);
+ err = __logfs_truncate(inode, size);
+ if (!err)
+ err = __logfs_write_inode(inode, 0);
+ logfs_put_wblocks(sb, NULL, 1);
+ }
if (!err)
- err = vmtruncate(inode, size);
+ err = vmtruncate(inode, target);
/* I don't trust error recovery yet. */
WARN_ON(err);
@@ -2226,6 +2259,7 @@ int logfs_init_rw(struct super_block *sb)
int min_fill = 3 * super->s_no_blocks;
INIT_LIST_HEAD(&super->s_object_alias);
+ INIT_LIST_HEAD(&super->s_writeback_list);
mutex_init(&super->s_write_mutex);
super->s_block_pool = mempool_create_kmalloc_pool(min_fill,
sizeof(struct logfs_block));
@@ -2239,8 +2273,6 @@ void logfs_cleanup_rw(struct super_block *sb)
struct logfs_super *super = logfs_super(sb);
destroy_meta_inode(super->s_segfile_inode);
- if (super->s_block_pool)
- mempool_destroy(super->s_block_pool);
- if (super->s_shadow_pool)
- mempool_destroy(super->s_shadow_pool);
+ logfs_mempool_destroy(super->s_block_pool);
+ logfs_mempool_destroy(super->s_shadow_pool);
}
diff --git a/fs/logfs/segment.c b/fs/logfs/segment.c
index 1a14f9910d55..a9657afb70ad 100644
--- a/fs/logfs/segment.c
+++ b/fs/logfs/segment.c
@@ -10,6 +10,7 @@
* three kinds of objects: inodes, dentries and blocks, both data and indirect.
*/
#include "logfs.h"
+#include <linux/slab.h>
static int logfs_mark_segment_bad(struct super_block *sb, u32 segno)
{
@@ -66,7 +67,7 @@ static struct page *get_mapping_page(struct super_block *sb, pgoff_t index,
return page;
}
-void __logfs_buf_write(struct logfs_area *area, u64 ofs, void *buf, size_t len,
+int __logfs_buf_write(struct logfs_area *area, u64 ofs, void *buf, size_t len,
int use_filler)
{
pgoff_t index = ofs >> PAGE_SHIFT;
@@ -80,8 +81,10 @@ void __logfs_buf_write(struct logfs_area *area, u64 ofs, void *buf, size_t len,
copylen = min((ulong)len, PAGE_SIZE - offset);
page = get_mapping_page(area->a_sb, index, use_filler);
- SetPageUptodate(page);
+ if (IS_ERR(page))
+ return PTR_ERR(page);
BUG_ON(!page); /* FIXME: reserve a pool */
+ SetPageUptodate(page);
memcpy(page_address(page) + offset, buf, copylen);
SetPagePrivate(page);
page_cache_release(page);
@@ -91,52 +94,61 @@ void __logfs_buf_write(struct logfs_area *area, u64 ofs, void *buf, size_t len,
offset = 0;
index++;
} while (len);
+ return 0;
}
-/*
- * bdev_writeseg will write full pages. Memset the tail to prevent data leaks.
- */
-static void pad_wbuf(struct logfs_area *area, int final)
+static void pad_partial_page(struct logfs_area *area)
{
struct super_block *sb = area->a_sb;
- struct logfs_super *super = logfs_super(sb);
struct page *page;
u64 ofs = dev_ofs(sb, area->a_segno, area->a_used_bytes);
pgoff_t index = ofs >> PAGE_SHIFT;
long offset = ofs & (PAGE_SIZE-1);
u32 len = PAGE_SIZE - offset;
- if (len == PAGE_SIZE) {
- /* The math in this function can surely use some love */
- len = 0;
- }
- if (len) {
- BUG_ON(area->a_used_bytes >= super->s_segsize);
-
- page = get_mapping_page(area->a_sb, index, 0);
+ if (len % PAGE_SIZE) {
+ page = get_mapping_page(sb, index, 0);
BUG_ON(!page); /* FIXME: reserve a pool */
memset(page_address(page) + offset, 0xff, len);
SetPagePrivate(page);
page_cache_release(page);
}
+}
- if (!final)
- return;
+static void pad_full_pages(struct logfs_area *area)
+{
+ struct super_block *sb = area->a_sb;
+ struct logfs_super *super = logfs_super(sb);
+ u64 ofs = dev_ofs(sb, area->a_segno, area->a_used_bytes);
+ u32 len = super->s_segsize - area->a_used_bytes;
+ pgoff_t index = PAGE_CACHE_ALIGN(ofs) >> PAGE_CACHE_SHIFT;
+ pgoff_t no_indizes = len >> PAGE_CACHE_SHIFT;
+ struct page *page;
- area->a_used_bytes += len;
- for ( ; area->a_used_bytes < super->s_segsize;
- area->a_used_bytes += PAGE_SIZE) {
- /* Memset another page */
- index++;
- page = get_mapping_page(area->a_sb, index, 0);
+ while (no_indizes) {
+ page = get_mapping_page(sb, index, 0);
BUG_ON(!page); /* FIXME: reserve a pool */
- memset(page_address(page), 0xff, PAGE_SIZE);
+ SetPageUptodate(page);
+ memset(page_address(page), 0xff, PAGE_CACHE_SIZE);
SetPagePrivate(page);
page_cache_release(page);
+ index++;
+ no_indizes--;
}
}
/*
+ * bdev_writeseg will write full pages. Memset the tail to prevent data leaks.
+ * Also make sure we allocate (and memset) all pages for final writeout.
+ */
+static void pad_wbuf(struct logfs_area *area, int final)
+{
+ pad_partial_page(area);
+ if (final)
+ pad_full_pages(area);
+}
+
+/*
* We have to be careful with the alias tree. Since lookup is done by bix,
* it needs to be normalized, so 14, 15, 16, etc. all match when dealing with
* indirect blocks. So always use it through accessor functions.
@@ -174,14 +186,8 @@ static int btree_write_alias(struct super_block *sb, struct logfs_block *block,
return 0;
}
-static gc_level_t btree_block_level(struct logfs_block *block)
-{
- return expand_level(block->ino, block->level);
-}
-
static struct logfs_block_ops btree_block_ops = {
.write_block = btree_write_block,
- .block_level = btree_block_level,
.free_block = __free_block,
.write_alias = btree_write_alias,
};
@@ -683,7 +689,7 @@ int logfs_segment_delete(struct inode *inode, struct logfs_shadow *shadow)
return 0;
}
-static void freeseg(struct super_block *sb, u32 segno)
+void freeseg(struct super_block *sb, u32 segno)
{
struct logfs_super *super = logfs_super(sb);
struct address_space *mapping = super->s_mapping_inode->i_mapping;
@@ -910,7 +916,7 @@ err:
for (i--; i >= 0; i--)
free_area(super->s_area[i]);
free_area(super->s_journal_area);
- mempool_destroy(super->s_alias_pool);
+ logfs_mempool_destroy(super->s_alias_pool);
return -ENOMEM;
}
diff --git a/fs/logfs/super.c b/fs/logfs/super.c
index c66beab78dee..d651e10a1e9c 100644
--- a/fs/logfs/super.c
+++ b/fs/logfs/super.c
@@ -11,6 +11,8 @@
*/
#include "logfs.h"
#include <linux/bio.h>
+#include <linux/slab.h>
+#include <linux/blkdev.h>
#include <linux/mtd/mtd.h>
#include <linux/statfs.h>
#include <linux/buffer_head.h>
@@ -136,6 +138,14 @@ static int logfs_sb_set(struct super_block *sb, void *_super)
sb->s_fs_info = super;
sb->s_mtd = super->s_mtd;
sb->s_bdev = super->s_bdev;
+#ifdef CONFIG_BLOCK
+ if (sb->s_bdev)
+ sb->s_bdi = &bdev_get_queue(sb->s_bdev)->backing_dev_info;
+#endif
+#ifdef CONFIG_MTD
+ if (sb->s_mtd)
+ sb->s_bdi = sb->s_mtd->backing_dev_info;
+#endif
return 0;
}
@@ -277,7 +287,7 @@ static int logfs_recover_sb(struct super_block *sb)
}
if (valid0 && valid1 && ds_cmp(ds0, ds1)) {
printk(KERN_INFO"Superblocks don't match - fixing.\n");
- return write_one_sb(sb, super->s_devops->find_last_sb);
+ return logfs_write_sb(sb);
}
/* If neither is valid now, something's wrong. Didn't we properly
* check them before?!? */
@@ -289,6 +299,10 @@ static int logfs_make_writeable(struct super_block *sb)
{
int err;
+ err = logfs_open_segfile(sb);
+ if (err)
+ return err;
+
/* Repair any broken superblock copies */
err = logfs_recover_sb(sb);
if (err)
@@ -299,10 +313,6 @@ static int logfs_make_writeable(struct super_block *sb)
if (err)
return err;
- err = logfs_open_segfile(sb);
- if (err)
- return err;
-
/* Do one GC pass before any data gets dirtied */
logfs_gc_pass(sb);
@@ -327,27 +337,27 @@ static int logfs_get_sb_final(struct super_block *sb, struct vfsmount *mnt)
goto fail;
sb->s_root = d_alloc_root(rootdir);
- if (!sb->s_root)
+ if (!sb->s_root) {
+ iput(rootdir);
goto fail;
+ }
super->s_erase_page = alloc_pages(GFP_KERNEL, 0);
if (!super->s_erase_page)
- goto fail2;
+ goto fail;
memset(page_address(super->s_erase_page), 0xFF, PAGE_SIZE);
/* FIXME: check for read-only mounts */
err = logfs_make_writeable(sb);
if (err)
- goto fail3;
+ goto fail1;
log_super("LogFS: Finished mounting\n");
simple_set_mnt(mnt, sb);
return 0;
-fail3:
+fail1:
__free_page(super->s_erase_page);
-fail2:
- iput(rootdir);
fail:
iput(logfs_super(sb)->s_master_inode);
return -EIO;
@@ -376,7 +386,7 @@ static struct page *find_super_block(struct super_block *sb)
if (!first || IS_ERR(first))
return NULL;
last = super->s_devops->find_last_sb(sb, &super->s_sb_ofs[1]);
- if (!last || IS_ERR(first)) {
+ if (!last || IS_ERR(last)) {
page_cache_release(first);
return NULL;
}
@@ -407,7 +417,7 @@ static int __logfs_read_sb(struct super_block *sb)
page = find_super_block(sb);
if (!page)
- return -EIO;
+ return -EINVAL;
ds = page_address(page);
super->s_size = be64_to_cpu(ds->ds_filesystem_size);
@@ -451,6 +461,8 @@ static int logfs_read_sb(struct super_block *sb, int read_only)
btree_init_mempool64(&super->s_shadow_tree.new, super->s_btree_pool);
btree_init_mempool64(&super->s_shadow_tree.old, super->s_btree_pool);
+ btree_init_mempool32(&super->s_shadow_tree.segment_map,
+ super->s_btree_pool);
ret = logfs_init_mapping(sb);
if (ret)
@@ -515,8 +527,8 @@ static void logfs_kill_sb(struct super_block *sb)
if (super->s_erase_page)
__free_page(super->s_erase_page);
super->s_devops->put_device(sb);
- mempool_destroy(super->s_btree_pool);
- mempool_destroy(super->s_alias_pool);
+ logfs_mempool_destroy(super->s_btree_pool);
+ logfs_mempool_destroy(super->s_alias_pool);
kfree(super);
log_super("LogFS: Finished unmounting\n");
}
@@ -572,8 +584,7 @@ int logfs_get_sb_device(struct file_system_type *type, int flags,
return 0;
err1:
- up_write(&sb->s_umount);
- deactivate_super(sb);
+ deactivate_locked_super(sb);
return err;
err0:
kfree(super);