diff options
Diffstat (limited to 'fs/logfs')
-rw-r--r-- | fs/logfs/dev_bdev.c | 16 | ||||
-rw-r--r-- | fs/logfs/dev_mtd.c | 26 | ||||
-rw-r--r-- | fs/logfs/dir.c | 8 | ||||
-rw-r--r-- | fs/logfs/file.c | 16 | ||||
-rw-r--r-- | fs/logfs/gc.c | 58 | ||||
-rw-r--r-- | fs/logfs/inode.c | 7 | ||||
-rw-r--r-- | fs/logfs/journal.c | 44 | ||||
-rw-r--r-- | fs/logfs/logfs.h | 31 | ||||
-rw-r--r-- | fs/logfs/logfs_abi.h | 10 | ||||
-rw-r--r-- | fs/logfs/readwrite.c | 106 | ||||
-rw-r--r-- | fs/logfs/segment.c | 70 | ||||
-rw-r--r-- | fs/logfs/super.c | 45 |
12 files changed, 281 insertions, 156 deletions
diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c index 9718c22f186d..9bd2ce2a3040 100644 --- a/fs/logfs/dev_bdev.c +++ b/fs/logfs/dev_bdev.c @@ -9,6 +9,7 @@ #include <linux/bio.h> #include <linux/blkdev.h> #include <linux/buffer_head.h> +#include <linux/gfp.h> #define PAGE_OFS(ofs) ((ofs) & (PAGE_SIZE-1)) @@ -80,6 +81,7 @@ static void writeseg_end_io(struct bio *bio, int err) prefetchw(&bvec->bv_page->flags); end_page_writeback(page); + page_cache_release(page); } while (bvec >= bio->bi_io_vec); bio_put(bio); if (atomic_dec_and_test(&super->s_pending_writes)) @@ -97,8 +99,10 @@ static int __bdev_writeseg(struct super_block *sb, u64 ofs, pgoff_t index, unsigned int max_pages = queue_max_hw_sectors(q) >> (PAGE_SHIFT - 9); int i; + if (max_pages > BIO_MAX_PAGES) + max_pages = BIO_MAX_PAGES; bio = bio_alloc(GFP_NOFS, max_pages); - BUG_ON(!bio); /* FIXME: handle this */ + BUG_ON(!bio); for (i = 0; i < nr_pages; i++) { if (i >= max_pages) { @@ -191,8 +195,10 @@ static int do_erase(struct super_block *sb, u64 ofs, pgoff_t index, unsigned int max_pages = queue_max_hw_sectors(q) >> (PAGE_SHIFT - 9); int i; + if (max_pages > BIO_MAX_PAGES) + max_pages = BIO_MAX_PAGES; bio = bio_alloc(GFP_NOFS, max_pages); - BUG_ON(!bio); /* FIXME: handle this */ + BUG_ON(!bio); for (i = 0; i < nr_pages; i++) { if (i >= max_pages) { @@ -297,6 +303,11 @@ static void bdev_put_device(struct super_block *sb) close_bdev_exclusive(logfs_super(sb)->s_bdev, FMODE_READ|FMODE_WRITE); } +static int bdev_can_write_buf(struct super_block *sb, u64 ofs) +{ + return 0; +} + static const struct logfs_device_ops bd_devops = { .find_first_sb = bdev_find_first_sb, .find_last_sb = bdev_find_last_sb, @@ -304,6 +315,7 @@ static const struct logfs_device_ops bd_devops = { .readpage = bdev_readpage, .writeseg = bdev_writeseg, .erase = bdev_erase, + .can_write_buf = bdev_can_write_buf, .sync = bdev_sync, .put_device = bdev_put_device, }; diff --git a/fs/logfs/dev_mtd.c b/fs/logfs/dev_mtd.c index cafb6ef2e05b..a85d47d13e4b 100644 --- a/fs/logfs/dev_mtd.c +++ b/fs/logfs/dev_mtd.c @@ -9,6 +9,7 @@ #include <linux/completion.h> #include <linux/mount.h> #include <linux/sched.h> +#include <linux/slab.h> #define PAGE_OFS(ofs) ((ofs) & (PAGE_SIZE-1)) @@ -126,7 +127,8 @@ static int mtd_readpage(void *_sb, struct page *page) err = mtd_read(sb, page->index << PAGE_SHIFT, PAGE_SIZE, page_address(page)); - if (err == -EUCLEAN) { + if (err == -EUCLEAN || err == -EBADMSG) { + /* -EBADMSG happens regularly on power failures */ err = 0; /* FIXME: force GC this segment */ } @@ -233,12 +235,32 @@ static void mtd_put_device(struct super_block *sb) put_mtd_device(logfs_super(sb)->s_mtd); } +static int mtd_can_write_buf(struct super_block *sb, u64 ofs) +{ + struct logfs_super *super = logfs_super(sb); + void *buf; + int err; + + buf = kmalloc(super->s_writesize, GFP_KERNEL); + if (!buf) + return -ENOMEM; + err = mtd_read(sb, ofs, super->s_writesize, buf); + if (err) + goto out; + if (memchr_inv(buf, 0xff, super->s_writesize)) + err = -EIO; + kfree(buf); +out: + return err; +} + static const struct logfs_device_ops mtd_devops = { .find_first_sb = mtd_find_first_sb, .find_last_sb = mtd_find_last_sb, .readpage = mtd_readpage, .writeseg = mtd_writeseg, .erase = mtd_erase, + .can_write_buf = mtd_can_write_buf, .sync = mtd_sync, .put_device = mtd_put_device, }; @@ -250,5 +272,7 @@ int logfs_get_sb_mtd(struct file_system_type *type, int flags, const struct logfs_device_ops *devops = &mtd_devops; mtd = get_mtd_device(NULL, mtdnr); + if (IS_ERR(mtd)) + return PTR_ERR(mtd); return logfs_get_sb_device(type, flags, mtd, NULL, devops, mnt); } diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c index 56a8bfbb0120..72d1893ddd36 100644 --- a/fs/logfs/dir.c +++ b/fs/logfs/dir.c @@ -6,13 +6,13 @@ * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org> */ #include "logfs.h" - +#include <linux/slab.h> /* * Atomic dir operations * * Directory operations are by default not atomic. Dentries and Inodes are - * created/removed/altered in seperate operations. Therefore we need to do + * created/removed/altered in separate operations. Therefore we need to do * a small amount of journaling. * * Create, link, mkdir, mknod and symlink all share the same function to do @@ -303,12 +303,12 @@ static int __logfs_readdir(struct file *file, void *buf, filldir_t filldir) (filler_t *)logfs_readpage, NULL); if (IS_ERR(page)) return PTR_ERR(page); - dd = kmap_atomic(page, KM_USER0); + dd = kmap(page); BUG_ON(dd->namelen == 0); full = filldir(buf, (char *)dd->name, be16_to_cpu(dd->namelen), pos, be64_to_cpu(dd->ino), dd->type); - kunmap_atomic(dd, KM_USER0); + kunmap(page); page_cache_release(page); if (full) break; diff --git a/fs/logfs/file.c b/fs/logfs/file.c index 370f367a933e..0de524071870 100644 --- a/fs/logfs/file.c +++ b/fs/logfs/file.c @@ -161,7 +161,17 @@ static int logfs_writepage(struct page *page, struct writeback_control *wbc) static void logfs_invalidatepage(struct page *page, unsigned long offset) { - move_page_to_btree(page); + struct logfs_block *block = logfs_block(page); + + if (block->reserved_bytes) { + struct super_block *sb = page->mapping->host->i_sb; + struct logfs_super *super = logfs_super(sb); + + super->s_dirty_pages -= block->reserved_bytes; + block->ops->free_block(sb, block); + BUG_ON(bitmap_weight(block->alias_map, LOGFS_BLOCK_FACTOR)); + } else + move_page_to_btree(page); BUG_ON(PagePrivate(page) || page->private); } @@ -212,10 +222,8 @@ int logfs_ioctl(struct inode *inode, struct file *file, unsigned int cmd, int logfs_fsync(struct file *file, struct dentry *dentry, int datasync) { struct super_block *sb = dentry->d_inode->i_sb; - struct logfs_super *super = logfs_super(sb); - /* FIXME: write anchor */ - super->s_devops->sync(sb); + logfs_write_anchor(sb); return 0; } diff --git a/fs/logfs/gc.c b/fs/logfs/gc.c index 92949f95a901..caa4419285dc 100644 --- a/fs/logfs/gc.c +++ b/fs/logfs/gc.c @@ -7,6 +7,7 @@ */ #include "logfs.h" #include <linux/sched.h> +#include <linux/slab.h> /* * Wear leveling needs to kick in when the difference between low erase @@ -121,7 +122,7 @@ static void logfs_cleanse_block(struct super_block *sb, u64 ofs, u64 ino, logfs_safe_iput(inode, cookie); } -static u32 logfs_gc_segment(struct super_block *sb, u32 segno, u8 dist) +static u32 logfs_gc_segment(struct super_block *sb, u32 segno) { struct logfs_super *super = logfs_super(sb); struct logfs_segment_header sh; @@ -400,7 +401,7 @@ static int __logfs_gc_once(struct super_block *sb, struct gc_candidate *cand) segno, (u64)segno << super->s_segshift, dist, no_free_segments(sb), valid, super->s_free_bytes); - cleaned = logfs_gc_segment(sb, segno, dist); + cleaned = logfs_gc_segment(sb, segno); log_gc("GC segment #%02x complete - now %x valid\n", segno, valid - cleaned); BUG_ON(cleaned != valid); @@ -458,6 +459,14 @@ static void __logfs_gc_pass(struct super_block *sb, int target) struct logfs_block *block; int round, progress, last_progress = 0; + /* + * Doing too many changes to the segfile at once would result + * in a large number of aliases. Write the journal before + * things get out of hand. + */ + if (super->s_shadow_tree.no_shadowed_segments >= MAX_OBJ_ALIASES) + logfs_write_anchor(sb); + if (no_free_segments(sb) >= target && super->s_no_object_aliases < MAX_OBJ_ALIASES) return; @@ -623,38 +632,31 @@ static int check_area(struct super_block *sb, int i) { struct logfs_super *super = logfs_super(sb); struct logfs_area *area = super->s_area[i]; - struct logfs_object_header oh; + gc_level_t gc_level; + u32 cleaned, valid, ec; u32 segno = area->a_segno; - u32 ofs = area->a_used_bytes; - __be32 crc; - int err; + u64 ofs = dev_ofs(sb, area->a_segno, area->a_written_bytes); if (!area->a_is_open) return 0; - for (ofs = area->a_used_bytes; - ofs <= super->s_segsize - sizeof(oh); - ofs += (u32)be16_to_cpu(oh.len) + sizeof(oh)) { - err = wbuf_read(sb, dev_ofs(sb, segno, ofs), sizeof(oh), &oh); - if (err) - return err; - - if (!memchr_inv(&oh, 0xff, sizeof(oh))) - break; + if (super->s_devops->can_write_buf(sb, ofs) == 0) + return 0; - crc = logfs_crc32(&oh, sizeof(oh) - 4, 4); - if (crc != oh.crc) { - printk(KERN_INFO "interrupted header at %llx\n", - dev_ofs(sb, segno, ofs)); - return 0; - } - } - if (ofs != area->a_used_bytes) { - printk(KERN_INFO "%x bytes unaccounted data found at %llx\n", - ofs - area->a_used_bytes, - dev_ofs(sb, segno, area->a_used_bytes)); - area->a_used_bytes = ofs; - } + printk(KERN_INFO"LogFS: Possibly incomplete write at %llx\n", ofs); + /* + * The device cannot write back the write buffer. Most likely the + * wbuf was already written out and the system crashed at some point + * before the journal commit happened. In that case we wouldn't have + * to do anything. But if the crash happened before the wbuf was + * written out correctly, we must GC this segment. So assume the + * worst and always do the GC run. + */ + area->a_is_open = 0; + valid = logfs_valid_bytes(sb, segno, &ec, &gc_level); + cleaned = logfs_gc_segment(sb, segno); + if (cleaned != valid) + return -EIO; return 0; } diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c index 33ec1aeaeec4..755a92e8daa7 100644 --- a/fs/logfs/inode.c +++ b/fs/logfs/inode.c @@ -6,6 +6,7 @@ * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org> */ #include "logfs.h" +#include <linux/slab.h> #include <linux/writeback.h> #include <linux/backing-dev.h> @@ -192,6 +193,7 @@ static void logfs_init_inode(struct super_block *sb, struct inode *inode) inode->i_ctime = CURRENT_TIME; inode->i_mtime = CURRENT_TIME; inode->i_nlink = 1; + li->li_refcount = 1; INIT_LIST_HEAD(&li->li_freeing_list); for (i = 0; i < LOGFS_EMBEDDED_FIELDS; i++) @@ -325,7 +327,7 @@ static void logfs_set_ino_generation(struct super_block *sb, u64 ino; mutex_lock(&super->s_journal_mutex); - ino = logfs_seek_hole(super->s_master_inode, super->s_last_ino); + ino = logfs_seek_hole(super->s_master_inode, super->s_last_ino + 1); super->s_last_ino = ino; super->s_inos_till_wrap--; if (super->s_inos_till_wrap < 0) { @@ -385,8 +387,7 @@ static void logfs_init_once(void *_li) static int logfs_sync_fs(struct super_block *sb, int wait) { - /* FIXME: write anchor */ - logfs_super(sb)->s_devops->sync(sb); + logfs_write_anchor(sb); return 0; } diff --git a/fs/logfs/journal.c b/fs/logfs/journal.c index 6ad30a4c9052..4b0e0616b357 100644 --- a/fs/logfs/journal.c +++ b/fs/logfs/journal.c @@ -6,6 +6,7 @@ * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org> */ #include "logfs.h" +#include <linux/slab.h> static void logfs_calc_free(struct super_block *sb) { @@ -131,10 +132,9 @@ static int read_area(struct super_block *sb, struct logfs_je_area *a) ofs = dev_ofs(sb, area->a_segno, area->a_written_bytes); if (super->s_writesize > 1) - logfs_buf_recover(area, ofs, a + 1, super->s_writesize); + return logfs_buf_recover(area, ofs, a + 1, super->s_writesize); else - logfs_buf_recover(area, ofs, NULL, 0); - return 0; + return logfs_buf_recover(area, ofs, NULL, 0); } static void *unpack(void *from, void *to) @@ -244,7 +244,7 @@ static int read_je(struct super_block *sb, u64 ofs) read_erasecount(sb, unpack(jh, scratch)); break; case JE_AREA: - read_area(sb, unpack(jh, scratch)); + err = read_area(sb, unpack(jh, scratch)); break; case JE_OBJ_ALIAS: err = logfs_load_object_aliases(sb, unpack(jh, scratch), @@ -388,7 +388,10 @@ static void journal_get_erase_count(struct logfs_area *area) static int journal_erase_segment(struct logfs_area *area) { struct super_block *sb = area->a_sb; - struct logfs_segment_header sh; + union { + struct logfs_segment_header sh; + unsigned char c[ALIGN(sizeof(struct logfs_segment_header), 16)]; + } u; u64 ofs; int err; @@ -396,20 +399,21 @@ static int journal_erase_segment(struct logfs_area *area) if (err) return err; - sh.pad = 0; - sh.type = SEG_JOURNAL; - sh.level = 0; - sh.segno = cpu_to_be32(area->a_segno); - sh.ec = cpu_to_be32(area->a_erase_count); - sh.gec = cpu_to_be64(logfs_super(sb)->s_gec); - sh.crc = logfs_crc32(&sh, sizeof(sh), 4); + memset(&u, 0, sizeof(u)); + u.sh.pad = 0; + u.sh.type = SEG_JOURNAL; + u.sh.level = 0; + u.sh.segno = cpu_to_be32(area->a_segno); + u.sh.ec = cpu_to_be32(area->a_erase_count); + u.sh.gec = cpu_to_be64(logfs_super(sb)->s_gec); + u.sh.crc = logfs_crc32(&u.sh, sizeof(u.sh), 4); /* This causes a bug in segment.c. Not yet. */ //logfs_set_segment_erased(sb, area->a_segno, area->a_erase_count, 0); ofs = dev_ofs(sb, area->a_segno, 0); - area->a_used_bytes = ALIGN(sizeof(sh), 16); - logfs_buf_write(area, ofs, &sh, sizeof(sh)); + area->a_used_bytes = sizeof(u); + logfs_buf_write(area, ofs, &u, sizeof(u)); return 0; } @@ -493,6 +497,8 @@ static void account_shadows(struct super_block *sb) btree_grim_visitor64(&tree->new, (unsigned long)sb, account_shadow); btree_grim_visitor64(&tree->old, (unsigned long)sb, account_shadow); + btree_grim_visitor32(&tree->segment_map, 0, NULL); + tree->no_shadowed_segments = 0; if (li->li_block) { /* @@ -606,9 +612,9 @@ static size_t __logfs_write_je(struct super_block *sb, void *buf, u16 type, if (len == 0) return logfs_write_header(super, header, 0, type); + BUG_ON(len > sb->s_blocksize); compr_len = logfs_compress(buf, data, len, sb->s_blocksize); if (compr_len < 0 || type == JE_ANCHOR) { - BUG_ON(len > sb->s_blocksize); memcpy(data, buf, len); compr_len = len; compr = COMPR_NONE; @@ -660,6 +666,7 @@ static int logfs_write_je_buf(struct super_block *sb, void *buf, u16 type, if (ofs < 0) return ofs; logfs_buf_write(area, ofs, super->s_compressed_je, len); + BUG_ON(super->s_no_je >= MAX_JOURNAL_ENTRIES); super->s_je_array[super->s_no_je++] = cpu_to_be64(ofs); return 0; } @@ -800,6 +807,7 @@ void do_logfs_journal_wl_pass(struct super_block *sb) { struct logfs_super *super = logfs_super(sb); struct logfs_area *area = super->s_journal_area; + struct btree_head32 *head = &super->s_reserved_segments; u32 segno, ec; int i, err; @@ -807,6 +815,7 @@ void do_logfs_journal_wl_pass(struct super_block *sb) /* Drop old segments */ journal_for_each(i) if (super->s_journal_seg[i]) { + btree_remove32(head, super->s_journal_seg[i]); logfs_set_segment_unreserved(sb, super->s_journal_seg[i], super->s_journal_ec[i]); @@ -819,8 +828,13 @@ void do_logfs_journal_wl_pass(struct super_block *sb) super->s_journal_seg[i] = segno; super->s_journal_ec[i] = ec; logfs_set_segment_reserved(sb, segno); + err = btree_insert32(head, segno, (void *)1, GFP_KERNEL); + BUG_ON(err); /* mempool should prevent this */ + err = logfs_erase_segment(sb, segno, 1); + BUG_ON(err); /* FIXME: remount-ro would be nicer */ } /* Manually move journal_area */ + freeseg(sb, area->a_segno); area->a_segno = super->s_journal_seg[0]; area->a_is_open = 0; area->a_used_bytes = 0; diff --git a/fs/logfs/logfs.h b/fs/logfs/logfs.h index 129779431373..1a9db84f8d8f 100644 --- a/fs/logfs/logfs.h +++ b/fs/logfs/logfs.h @@ -144,6 +144,7 @@ struct logfs_area_ops { * @erase: erase one segment * @read: read from the device * @erase: erase part of the device + * @can_write_buf: decide whether wbuf can be written to ofs */ struct logfs_device_ops { struct page *(*find_first_sb)(struct super_block *sb, u64 *ofs); @@ -153,6 +154,7 @@ struct logfs_device_ops { void (*writeseg)(struct super_block *sb, u64 ofs, size_t len); int (*erase)(struct super_block *sb, loff_t ofs, size_t len, int ensure_write); + int (*can_write_buf)(struct super_block *sb, u64 ofs); void (*sync)(struct super_block *sb); void (*put_device)(struct super_block *sb); }; @@ -257,10 +259,14 @@ struct logfs_shadow { * struct shadow_tree * @new: shadows where old_ofs==0, indexed by new_ofs * @old: shadows where old_ofs!=0, indexed by old_ofs + * @segment_map: bitfield of segments containing shadows + * @no_shadowed_segment: number of segments containing shadows */ struct shadow_tree { struct btree_head64 new; struct btree_head64 old; + struct btree_head32 segment_map; + int no_shadowed_segments; }; struct object_alias_item { @@ -305,13 +311,14 @@ typedef int write_alias_t(struct super_block *sb, u64 ino, u64 bix, level_t level, int child_no, __be64 val); struct logfs_block_ops { void (*write_block)(struct logfs_block *block); - gc_level_t (*block_level)(struct logfs_block *block); void (*free_block)(struct super_block *sb, struct logfs_block*block); int (*write_alias)(struct super_block *sb, struct logfs_block *block, write_alias_t *write_one_alias); }; +#define MAX_JOURNAL_ENTRIES 256 + struct logfs_super { struct mtd_info *s_mtd; /* underlying device */ struct block_device *s_bdev; /* underlying device */ @@ -378,7 +385,7 @@ struct logfs_super { u32 s_journal_ec[LOGFS_JOURNAL_SEGS]; /* journal erasecounts */ u64 s_last_version; struct logfs_area *s_journal_area; /* open journal segment */ - __be64 s_je_array[64]; + __be64 s_je_array[MAX_JOURNAL_ENTRIES]; int s_no_je; int s_sum_index; /* for the 12 summaries */ @@ -389,6 +396,7 @@ struct logfs_super { int s_lock_count; mempool_t *s_block_pool; /* struct logfs_block pool */ mempool_t *s_shadow_pool; /* struct logfs_shadow pool */ + struct list_head s_writeback_list; /* writeback pages */ /* * Space accounting: * - s_used_bytes specifies space used to store valid data objects. @@ -587,24 +595,25 @@ void move_page_to_btree(struct page *page); int logfs_init_mapping(struct super_block *sb); void logfs_sync_area(struct logfs_area *area); void logfs_sync_segments(struct super_block *sb); +void freeseg(struct super_block *sb, u32 segno); /* area handling */ int logfs_init_areas(struct super_block *sb); void logfs_cleanup_areas(struct super_block *sb); int logfs_open_area(struct logfs_area *area, size_t bytes); -void __logfs_buf_write(struct logfs_area *area, u64 ofs, void *buf, size_t len, +int __logfs_buf_write(struct logfs_area *area, u64 ofs, void *buf, size_t len, int use_filler); -static inline void logfs_buf_write(struct logfs_area *area, u64 ofs, +static inline int logfs_buf_write(struct logfs_area *area, u64 ofs, void *buf, size_t len) { - __logfs_buf_write(area, ofs, buf, len, 0); + return __logfs_buf_write(area, ofs, buf, len, 0); } -static inline void logfs_buf_recover(struct logfs_area *area, u64 ofs, +static inline int logfs_buf_recover(struct logfs_area *area, u64 ofs, void *buf, size_t len) { - __logfs_buf_write(area, ofs, buf, len, 1); + return __logfs_buf_write(area, ofs, buf, len, 1); } /* super.c */ @@ -698,7 +707,7 @@ static inline gc_level_t expand_level(u64 ino, level_t __level) u8 level = (__force u8)__level; if (ino == LOGFS_INO_MASTER) { - /* ifile has seperate areas */ + /* ifile has separate areas */ level += LOGFS_MAX_LEVELS; } return (__force gc_level_t)level; @@ -721,4 +730,10 @@ static inline struct logfs_area *get_area(struct super_block *sb, return logfs_super(sb)->s_area[(__force u8)gc_level]; } +static inline void logfs_mempool_destroy(mempool_t *pool) +{ + if (pool) + mempool_destroy(pool); +} + #endif diff --git a/fs/logfs/logfs_abi.h b/fs/logfs/logfs_abi.h index f674725663fe..ae960519c54a 100644 --- a/fs/logfs/logfs_abi.h +++ b/fs/logfs/logfs_abi.h @@ -50,9 +50,9 @@ static inline void check_##type(void) \ * 12 - gc recycled blocks, long-lived data * 13 - replacement blocks, short-lived data * - * Levels 1-11 are necessary for robust gc operations and help seperate + * Levels 1-11 are necessary for robust gc operations and help separate * short-lived metadata from longer-lived file data. In the future, - * file data should get seperated into several segments based on simple + * file data should get separated into several segments based on simple * heuristics. Old data recycled during gc operation is expected to be * long-lived. New data is of uncertain life expectancy. New data * used to replace older blocks in existing files is expected to be @@ -117,7 +117,7 @@ static inline void check_##type(void) \ #define pure_ofs(ofs) (ofs & ~LOGFS_FULLY_POPULATED) /* - * LogFS needs to seperate data into levels. Each level is defined as the + * LogFS needs to separate data into levels. Each level is defined as the * maximal possible distance from the master inode (inode of the inode file). * Data blocks reside on level 0, 1x indirect block on level 1, etc. * Inodes reside on level 6, indirect blocks for the inode file on levels 7-11. @@ -204,7 +204,7 @@ SIZE_CHECK(logfs_segment_header, LOGFS_SEGMENT_HEADERSIZE); * @ds_crc: crc32 of structure starting with the next field * @ds_ifile_levels: maximum number of levels for ifile * @ds_iblock_levels: maximum number of levels for regular files - * @ds_data_levels: number of seperate levels for data + * @ds_data_levels: number of separate levels for data * @pad0: reserved, must be 0 * @ds_feature_incompat: incompatible filesystem features * @ds_feature_ro_compat: read-only compatible filesystem features @@ -456,7 +456,7 @@ enum logfs_vim { * @vim: life expectancy of data * * "Areas" are segments currently being used for writing. There is at least - * one area per GC level. Several may be used to seperate long-living from + * one area per GC level. Several may be used to separate long-living from * short-living data. If an area with unknown vim is encountered, it can * simply be closed. * The write buffer immediately follow this header. diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c index 7a23b3e7c0a7..0718d112a1a5 100644 --- a/fs/logfs/readwrite.c +++ b/fs/logfs/readwrite.c @@ -18,6 +18,7 @@ */ #include "logfs.h" #include <linux/sched.h> +#include <linux/slab.h> static u64 adjust_bix(u64 bix, level_t level) { @@ -429,25 +430,6 @@ static void inode_write_block(struct logfs_block *block) } } -static gc_level_t inode_block_level(struct logfs_block *block) -{ - BUG_ON(block->inode->i_ino == LOGFS_INO_MASTER); - return GC_LEVEL(LOGFS_MAX_LEVELS); -} - -static gc_level_t indirect_block_level(struct logfs_block *block) -{ - struct page *page; - struct inode *inode; - u64 bix; - level_t level; - - page = block->page; - inode = page->mapping->host; - logfs_unpack_index(page->index, &bix, &level); - return expand_level(inode->i_ino, level); -} - /* * This silences a false, yet annoying gcc warning. I hate it when my editor * jumps into bitops.h each time I recompile this file. @@ -586,14 +568,12 @@ static void indirect_free_block(struct super_block *sb, static struct logfs_block_ops inode_block_ops = { .write_block = inode_write_block, - .block_level = inode_block_level, .free_block = inode_free_block, .write_alias = inode_write_alias, }; struct logfs_block_ops indirect_block_ops = { .write_block = indirect_write_block, - .block_level = indirect_block_level, .free_block = indirect_free_block, .write_alias = indirect_write_alias, }; @@ -912,6 +892,8 @@ u64 logfs_seek_hole(struct inode *inode, u64 bix) return bix; else if (li->li_data[INDIRECT_INDEX] & LOGFS_FULLY_POPULATED) bix = maxbix(li->li_height); + else if (bix >= maxbix(li->li_height)) + return bix; else { bix = seek_holedata_loop(inode, bix, 0); if (bix < maxbix(li->li_height)) @@ -1113,17 +1095,25 @@ static int logfs_reserve_bytes(struct inode *inode, int bytes) int get_page_reserve(struct inode *inode, struct page *page) { struct logfs_super *super = logfs_super(inode->i_sb); + struct logfs_block *block = logfs_block(page); int ret; - if (logfs_block(page) && logfs_block(page)->reserved_bytes) + if (block && block->reserved_bytes) return 0; logfs_get_wblocks(inode->i_sb, page, WF_LOCK); - ret = logfs_reserve_bytes(inode, 6 * LOGFS_MAX_OBJECTSIZE); + while ((ret = logfs_reserve_bytes(inode, 6 * LOGFS_MAX_OBJECTSIZE)) && + !list_empty(&super->s_writeback_list)) { + block = list_entry(super->s_writeback_list.next, + struct logfs_block, alias_list); + block->ops->write_block(block); + } if (!ret) { alloc_data_block(inode, page); - logfs_block(page)->reserved_bytes += 6 * LOGFS_MAX_OBJECTSIZE; + block = logfs_block(page); + block->reserved_bytes += 6 * LOGFS_MAX_OBJECTSIZE; super->s_dirty_pages += 6 * LOGFS_MAX_OBJECTSIZE; + list_move_tail(&block->alias_list, &super->s_writeback_list); } logfs_put_wblocks(inode->i_sb, page, WF_LOCK); return ret; @@ -1240,6 +1230,18 @@ static void free_shadow(struct inode *inode, struct logfs_shadow *shadow) mempool_free(shadow, super->s_shadow_pool); } +static void mark_segment(struct shadow_tree *tree, u32 segno) +{ + int err; + + if (!btree_lookup32(&tree->segment_map, segno)) { + err = btree_insert32(&tree->segment_map, segno, (void *)1, + GFP_NOFS); + BUG_ON(err); + tree->no_shadowed_segments++; + } +} + /** * fill_shadow_tree - Propagate shadow tree changes due to a write * @inode: Inode owning the page @@ -1287,6 +1289,8 @@ static void fill_shadow_tree(struct inode *inode, struct page *page, super->s_dirty_used_bytes += shadow->new_len; super->s_dirty_free_bytes += shadow->old_len; + mark_segment(tree, shadow->old_ofs >> super->s_segshift); + mark_segment(tree, shadow->new_ofs >> super->s_segshift); } } @@ -1594,7 +1598,6 @@ int logfs_delete(struct inode *inode, pgoff_t index, return ret; } -/* Rewrite cannot mark the inode dirty but has to write it immediatly. */ int logfs_rewrite_block(struct inode *inode, u64 bix, u64 ofs, gc_level_t gc_level, long flags) { @@ -1611,6 +1614,18 @@ int logfs_rewrite_block(struct inode *inode, u64 bix, u64 ofs, if (level != 0) alloc_indirect_block(inode, page, 0); err = logfs_write_buf(inode, page, flags); + if (!err && shrink_level(gc_level) == 0) { + /* Rewrite cannot mark the inode dirty but has to + * write it immediatly. + * Q: Can't we just create an alias for the inode + * instead? And if not, why not? + */ + if (inode->i_ino == LOGFS_INO_MASTER) + logfs_write_anchor(inode->i_sb); + else { + err = __logfs_write_inode(inode, flags); + } + } } logfs_put_write_page(page); return err; @@ -1833,19 +1848,37 @@ static int __logfs_truncate(struct inode *inode, u64 size) return logfs_truncate_direct(inode, size); } -int logfs_truncate(struct inode *inode, u64 size) +/* + * Truncate, by changing the segment file, can consume a fair amount + * of resources. So back off from time to time and do some GC. + * 8 or 2048 blocks should be well within safety limits even if + * every single block resided in a different segment. + */ +#define TRUNCATE_STEP (8 * 1024 * 1024) +int logfs_truncate(struct inode *inode, u64 target) { struct super_block *sb = inode->i_sb; - int err; + u64 size = i_size_read(inode); + int err = 0; - logfs_get_wblocks(sb, NULL, 1); - err = __logfs_truncate(inode, size); - if (!err) - err = __logfs_write_inode(inode, 0); - logfs_put_wblocks(sb, NULL, 1); + size = ALIGN(size, TRUNCATE_STEP); + while (size > target) { + if (size > TRUNCATE_STEP) + size -= TRUNCATE_STEP; + else + size = 0; + if (size < target) + size = target; + + logfs_get_wblocks(sb, NULL, 1); + err = __logfs_truncate(inode, size); + if (!err) + err = __logfs_write_inode(inode, 0); + logfs_put_wblocks(sb, NULL, 1); + } if (!err) - err = vmtruncate(inode, size); + err = vmtruncate(inode, target); /* I don't trust error recovery yet. */ WARN_ON(err); @@ -2226,6 +2259,7 @@ int logfs_init_rw(struct super_block *sb) int min_fill = 3 * super->s_no_blocks; INIT_LIST_HEAD(&super->s_object_alias); + INIT_LIST_HEAD(&super->s_writeback_list); mutex_init(&super->s_write_mutex); super->s_block_pool = mempool_create_kmalloc_pool(min_fill, sizeof(struct logfs_block)); @@ -2239,8 +2273,6 @@ void logfs_cleanup_rw(struct super_block *sb) struct logfs_super *super = logfs_super(sb); destroy_meta_inode(super->s_segfile_inode); - if (super->s_block_pool) - mempool_destroy(super->s_block_pool); - if (super->s_shadow_pool) - mempool_destroy(super->s_shadow_pool); + logfs_mempool_destroy(super->s_block_pool); + logfs_mempool_destroy(super->s_shadow_pool); } diff --git a/fs/logfs/segment.c b/fs/logfs/segment.c index 1a14f9910d55..a9657afb70ad 100644 --- a/fs/logfs/segment.c +++ b/fs/logfs/segment.c @@ -10,6 +10,7 @@ * three kinds of objects: inodes, dentries and blocks, both data and indirect. */ #include "logfs.h" +#include <linux/slab.h> static int logfs_mark_segment_bad(struct super_block *sb, u32 segno) { @@ -66,7 +67,7 @@ static struct page *get_mapping_page(struct super_block *sb, pgoff_t index, return page; } -void __logfs_buf_write(struct logfs_area *area, u64 ofs, void *buf, size_t len, +int __logfs_buf_write(struct logfs_area *area, u64 ofs, void *buf, size_t len, int use_filler) { pgoff_t index = ofs >> PAGE_SHIFT; @@ -80,8 +81,10 @@ void __logfs_buf_write(struct logfs_area *area, u64 ofs, void *buf, size_t len, copylen = min((ulong)len, PAGE_SIZE - offset); page = get_mapping_page(area->a_sb, index, use_filler); - SetPageUptodate(page); + if (IS_ERR(page)) + return PTR_ERR(page); BUG_ON(!page); /* FIXME: reserve a pool */ + SetPageUptodate(page); memcpy(page_address(page) + offset, buf, copylen); SetPagePrivate(page); page_cache_release(page); @@ -91,52 +94,61 @@ void __logfs_buf_write(struct logfs_area *area, u64 ofs, void *buf, size_t len, offset = 0; index++; } while (len); + return 0; } -/* - * bdev_writeseg will write full pages. Memset the tail to prevent data leaks. - */ -static void pad_wbuf(struct logfs_area *area, int final) +static void pad_partial_page(struct logfs_area *area) { struct super_block *sb = area->a_sb; - struct logfs_super *super = logfs_super(sb); struct page *page; u64 ofs = dev_ofs(sb, area->a_segno, area->a_used_bytes); pgoff_t index = ofs >> PAGE_SHIFT; long offset = ofs & (PAGE_SIZE-1); u32 len = PAGE_SIZE - offset; - if (len == PAGE_SIZE) { - /* The math in this function can surely use some love */ - len = 0; - } - if (len) { - BUG_ON(area->a_used_bytes >= super->s_segsize); - - page = get_mapping_page(area->a_sb, index, 0); + if (len % PAGE_SIZE) { + page = get_mapping_page(sb, index, 0); BUG_ON(!page); /* FIXME: reserve a pool */ memset(page_address(page) + offset, 0xff, len); SetPagePrivate(page); page_cache_release(page); } +} - if (!final) - return; +static void pad_full_pages(struct logfs_area *area) +{ + struct super_block *sb = area->a_sb; + struct logfs_super *super = logfs_super(sb); + u64 ofs = dev_ofs(sb, area->a_segno, area->a_used_bytes); + u32 len = super->s_segsize - area->a_used_bytes; + pgoff_t index = PAGE_CACHE_ALIGN(ofs) >> PAGE_CACHE_SHIFT; + pgoff_t no_indizes = len >> PAGE_CACHE_SHIFT; + struct page *page; - area->a_used_bytes += len; - for ( ; area->a_used_bytes < super->s_segsize; - area->a_used_bytes += PAGE_SIZE) { - /* Memset another page */ - index++; - page = get_mapping_page(area->a_sb, index, 0); + while (no_indizes) { + page = get_mapping_page(sb, index, 0); BUG_ON(!page); /* FIXME: reserve a pool */ - memset(page_address(page), 0xff, PAGE_SIZE); + SetPageUptodate(page); + memset(page_address(page), 0xff, PAGE_CACHE_SIZE); SetPagePrivate(page); page_cache_release(page); + index++; + no_indizes--; } } /* + * bdev_writeseg will write full pages. Memset the tail to prevent data leaks. + * Also make sure we allocate (and memset) all pages for final writeout. + */ +static void pad_wbuf(struct logfs_area *area, int final) +{ + pad_partial_page(area); + if (final) + pad_full_pages(area); +} + +/* * We have to be careful with the alias tree. Since lookup is done by bix, * it needs to be normalized, so 14, 15, 16, etc. all match when dealing with * indirect blocks. So always use it through accessor functions. @@ -174,14 +186,8 @@ static int btree_write_alias(struct super_block *sb, struct logfs_block *block, return 0; } -static gc_level_t btree_block_level(struct logfs_block *block) -{ - return expand_level(block->ino, block->level); -} - static struct logfs_block_ops btree_block_ops = { .write_block = btree_write_block, - .block_level = btree_block_level, .free_block = __free_block, .write_alias = btree_write_alias, }; @@ -683,7 +689,7 @@ int logfs_segment_delete(struct inode *inode, struct logfs_shadow *shadow) return 0; } -static void freeseg(struct super_block *sb, u32 segno) +void freeseg(struct super_block *sb, u32 segno) { struct logfs_super *super = logfs_super(sb); struct address_space *mapping = super->s_mapping_inode->i_mapping; @@ -910,7 +916,7 @@ err: for (i--; i >= 0; i--) free_area(super->s_area[i]); free_area(super->s_journal_area); - mempool_destroy(super->s_alias_pool); + logfs_mempool_destroy(super->s_alias_pool); return -ENOMEM; } diff --git a/fs/logfs/super.c b/fs/logfs/super.c index c66beab78dee..d651e10a1e9c 100644 --- a/fs/logfs/super.c +++ b/fs/logfs/super.c @@ -11,6 +11,8 @@ */ #include "logfs.h" #include <linux/bio.h> +#include <linux/slab.h> +#include <linux/blkdev.h> #include <linux/mtd/mtd.h> #include <linux/statfs.h> #include <linux/buffer_head.h> @@ -136,6 +138,14 @@ static int logfs_sb_set(struct super_block *sb, void *_super) sb->s_fs_info = super; sb->s_mtd = super->s_mtd; sb->s_bdev = super->s_bdev; +#ifdef CONFIG_BLOCK + if (sb->s_bdev) + sb->s_bdi = &bdev_get_queue(sb->s_bdev)->backing_dev_info; +#endif +#ifdef CONFIG_MTD + if (sb->s_mtd) + sb->s_bdi = sb->s_mtd->backing_dev_info; +#endif return 0; } @@ -277,7 +287,7 @@ static int logfs_recover_sb(struct super_block *sb) } if (valid0 && valid1 && ds_cmp(ds0, ds1)) { printk(KERN_INFO"Superblocks don't match - fixing.\n"); - return write_one_sb(sb, super->s_devops->find_last_sb); + return logfs_write_sb(sb); } /* If neither is valid now, something's wrong. Didn't we properly * check them before?!? */ @@ -289,6 +299,10 @@ static int logfs_make_writeable(struct super_block *sb) { int err; + err = logfs_open_segfile(sb); + if (err) + return err; + /* Repair any broken superblock copies */ err = logfs_recover_sb(sb); if (err) @@ -299,10 +313,6 @@ static int logfs_make_writeable(struct super_block *sb) if (err) return err; - err = logfs_open_segfile(sb); - if (err) - return err; - /* Do one GC pass before any data gets dirtied */ logfs_gc_pass(sb); @@ -327,27 +337,27 @@ static int logfs_get_sb_final(struct super_block *sb, struct vfsmount *mnt) goto fail; sb->s_root = d_alloc_root(rootdir); - if (!sb->s_root) + if (!sb->s_root) { + iput(rootdir); goto fail; + } super->s_erase_page = alloc_pages(GFP_KERNEL, 0); if (!super->s_erase_page) - goto fail2; + goto fail; memset(page_address(super->s_erase_page), 0xFF, PAGE_SIZE); /* FIXME: check for read-only mounts */ err = logfs_make_writeable(sb); if (err) - goto fail3; + goto fail1; log_super("LogFS: Finished mounting\n"); simple_set_mnt(mnt, sb); return 0; -fail3: +fail1: __free_page(super->s_erase_page); -fail2: - iput(rootdir); fail: iput(logfs_super(sb)->s_master_inode); return -EIO; @@ -376,7 +386,7 @@ static struct page *find_super_block(struct super_block *sb) if (!first || IS_ERR(first)) return NULL; last = super->s_devops->find_last_sb(sb, &super->s_sb_ofs[1]); - if (!last || IS_ERR(first)) { + if (!last || IS_ERR(last)) { page_cache_release(first); return NULL; } @@ -407,7 +417,7 @@ static int __logfs_read_sb(struct super_block *sb) page = find_super_block(sb); if (!page) - return -EIO; + return -EINVAL; ds = page_address(page); super->s_size = be64_to_cpu(ds->ds_filesystem_size); @@ -451,6 +461,8 @@ static int logfs_read_sb(struct super_block *sb, int read_only) btree_init_mempool64(&super->s_shadow_tree.new, super->s_btree_pool); btree_init_mempool64(&super->s_shadow_tree.old, super->s_btree_pool); + btree_init_mempool32(&super->s_shadow_tree.segment_map, + super->s_btree_pool); ret = logfs_init_mapping(sb); if (ret) @@ -515,8 +527,8 @@ static void logfs_kill_sb(struct super_block *sb) if (super->s_erase_page) __free_page(super->s_erase_page); super->s_devops->put_device(sb); - mempool_destroy(super->s_btree_pool); - mempool_destroy(super->s_alias_pool); + logfs_mempool_destroy(super->s_btree_pool); + logfs_mempool_destroy(super->s_alias_pool); kfree(super); log_super("LogFS: Finished unmounting\n"); } @@ -572,8 +584,7 @@ int logfs_get_sb_device(struct file_system_type *type, int flags, return 0; err1: - up_write(&sb->s_umount); - deactivate_super(sb); + deactivate_locked_super(sb); return err; err0: kfree(super); |