diff options
Diffstat (limited to 'fs')
89 files changed, 3266 insertions, 2068 deletions
diff --git a/fs/9p/fid.c b/fs/9p/fid.c index dfebdbe7440e..3031e3233dd6 100644 --- a/fs/9p/fid.c +++ b/fs/9p/fid.c @@ -26,7 +26,6 @@ #include <linux/fs.h> #include <linux/sched.h> #include <linux/idr.h> -#include <asm/semaphore.h> #include <net/9p/9p.h> #include <net/9p/client.h> diff --git a/fs/Kconfig b/fs/Kconfig index 028ae38ecc52..8b18a8758677 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -689,6 +689,7 @@ config ZISOFS config UDF_FS tristate "UDF file system support" + select CRC_ITU_T help This is the new file system used on some CD-ROMs and DVDs. Say Y if you intend to mount DVD discs or CDRW's written in packet mode, or diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt index b5c3b6114add..853845abcca6 100644 --- a/fs/Kconfig.binfmt +++ b/fs/Kconfig.binfmt @@ -62,7 +62,7 @@ config BINFMT_SHARED_FLAT config BINFMT_AOUT tristate "Kernel support for a.out and ECOFF binaries" depends on ARCH_SUPPORTS_AOUT && \ - (X86_32 || ALPHA || ARM || M68K || SPARC32) + (X86_32 || ALPHA || ARM || M68K) ---help--- A.out (Assembler.OUTput) is a set of formats for libraries and executables used in the earliest versions of UNIX. Linux used @@ -444,22 +444,27 @@ int bio_add_page(struct bio *bio, struct page *page, unsigned int len, struct bio_map_data { struct bio_vec *iovecs; - void __user *userptr; + int nr_sgvecs; + struct sg_iovec *sgvecs; }; -static void bio_set_map_data(struct bio_map_data *bmd, struct bio *bio) +static void bio_set_map_data(struct bio_map_data *bmd, struct bio *bio, + struct sg_iovec *iov, int iov_count) { memcpy(bmd->iovecs, bio->bi_io_vec, sizeof(struct bio_vec) * bio->bi_vcnt); + memcpy(bmd->sgvecs, iov, sizeof(struct sg_iovec) * iov_count); + bmd->nr_sgvecs = iov_count; bio->bi_private = bmd; } static void bio_free_map_data(struct bio_map_data *bmd) { kfree(bmd->iovecs); + kfree(bmd->sgvecs); kfree(bmd); } -static struct bio_map_data *bio_alloc_map_data(int nr_segs) +static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count) { struct bio_map_data *bmd = kmalloc(sizeof(*bmd), GFP_KERNEL); @@ -467,13 +472,71 @@ static struct bio_map_data *bio_alloc_map_data(int nr_segs) return NULL; bmd->iovecs = kmalloc(sizeof(struct bio_vec) * nr_segs, GFP_KERNEL); - if (bmd->iovecs) + if (!bmd->iovecs) { + kfree(bmd); + return NULL; + } + + bmd->sgvecs = kmalloc(sizeof(struct sg_iovec) * iov_count, GFP_KERNEL); + if (bmd->sgvecs) return bmd; + kfree(bmd->iovecs); kfree(bmd); return NULL; } +static int __bio_copy_iov(struct bio *bio, struct sg_iovec *iov, int iov_count, + int uncopy) +{ + int ret = 0, i; + struct bio_vec *bvec; + int iov_idx = 0; + unsigned int iov_off = 0; + int read = bio_data_dir(bio) == READ; + + __bio_for_each_segment(bvec, bio, i, 0) { + char *bv_addr = page_address(bvec->bv_page); + unsigned int bv_len = bvec->bv_len; + + while (bv_len && iov_idx < iov_count) { + unsigned int bytes; + char *iov_addr; + + bytes = min_t(unsigned int, + iov[iov_idx].iov_len - iov_off, bv_len); + iov_addr = iov[iov_idx].iov_base + iov_off; + + if (!ret) { + if (!read && !uncopy) + ret = copy_from_user(bv_addr, iov_addr, + bytes); + if (read && uncopy) + ret = copy_to_user(iov_addr, bv_addr, + bytes); + + if (ret) + ret = -EFAULT; + } + + bv_len -= bytes; + bv_addr += bytes; + iov_addr += bytes; + iov_off += bytes; + + if (iov[iov_idx].iov_len == iov_off) { + iov_idx++; + iov_off = 0; + } + } + + if (uncopy) + __free_page(bvec->bv_page); + } + + return ret; +} + /** * bio_uncopy_user - finish previously mapped bio * @bio: bio being terminated @@ -484,55 +547,56 @@ static struct bio_map_data *bio_alloc_map_data(int nr_segs) int bio_uncopy_user(struct bio *bio) { struct bio_map_data *bmd = bio->bi_private; - const int read = bio_data_dir(bio) == READ; - struct bio_vec *bvec; - int i, ret = 0; + int ret; - __bio_for_each_segment(bvec, bio, i, 0) { - char *addr = page_address(bvec->bv_page); - unsigned int len = bmd->iovecs[i].bv_len; + ret = __bio_copy_iov(bio, bmd->sgvecs, bmd->nr_sgvecs, 1); - if (read && !ret && copy_to_user(bmd->userptr, addr, len)) - ret = -EFAULT; - - __free_page(bvec->bv_page); - bmd->userptr += len; - } bio_free_map_data(bmd); bio_put(bio); return ret; } /** - * bio_copy_user - copy user data to bio + * bio_copy_user_iov - copy user data to bio * @q: destination block queue - * @uaddr: start of user address - * @len: length in bytes + * @iov: the iovec. + * @iov_count: number of elements in the iovec * @write_to_vm: bool indicating writing to pages or not * * Prepares and returns a bio for indirect user io, bouncing data * to/from kernel pages as necessary. Must be paired with * call bio_uncopy_user() on io completion. */ -struct bio *bio_copy_user(struct request_queue *q, unsigned long uaddr, - unsigned int len, int write_to_vm) +struct bio *bio_copy_user_iov(struct request_queue *q, struct sg_iovec *iov, + int iov_count, int write_to_vm) { - unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; - unsigned long start = uaddr >> PAGE_SHIFT; struct bio_map_data *bmd; struct bio_vec *bvec; struct page *page; struct bio *bio; int i, ret; + int nr_pages = 0; + unsigned int len = 0; - bmd = bio_alloc_map_data(end - start); + for (i = 0; i < iov_count; i++) { + unsigned long uaddr; + unsigned long end; + unsigned long start; + + uaddr = (unsigned long)iov[i].iov_base; + end = (uaddr + iov[i].iov_len + PAGE_SIZE - 1) >> PAGE_SHIFT; + start = uaddr >> PAGE_SHIFT; + + nr_pages += end - start; + len += iov[i].iov_len; + } + + bmd = bio_alloc_map_data(nr_pages, iov_count); if (!bmd) return ERR_PTR(-ENOMEM); - bmd->userptr = (void __user *) uaddr; - ret = -ENOMEM; - bio = bio_alloc(GFP_KERNEL, end - start); + bio = bio_alloc(GFP_KERNEL, nr_pages); if (!bio) goto out_bmd; @@ -564,22 +628,12 @@ struct bio *bio_copy_user(struct request_queue *q, unsigned long uaddr, * success */ if (!write_to_vm) { - char __user *p = (char __user *) uaddr; - - /* - * for a write, copy in data to kernel pages - */ - ret = -EFAULT; - bio_for_each_segment(bvec, bio, i) { - char *addr = page_address(bvec->bv_page); - - if (copy_from_user(addr, p, bvec->bv_len)) - goto cleanup; - p += bvec->bv_len; - } + ret = __bio_copy_iov(bio, iov, iov_count, 0); + if (ret) + goto cleanup; } - bio_set_map_data(bmd, bio); + bio_set_map_data(bmd, bio, iov, iov_count); return bio; cleanup: bio_for_each_segment(bvec, bio, i) @@ -591,6 +645,28 @@ out_bmd: return ERR_PTR(ret); } +/** + * bio_copy_user - copy user data to bio + * @q: destination block queue + * @uaddr: start of user address + * @len: length in bytes + * @write_to_vm: bool indicating writing to pages or not + * + * Prepares and returns a bio for indirect user io, bouncing data + * to/from kernel pages as necessary. Must be paired with + * call bio_uncopy_user() on io completion. + */ +struct bio *bio_copy_user(struct request_queue *q, unsigned long uaddr, + unsigned int len, int write_to_vm) +{ + struct sg_iovec iov; + + iov.iov_base = (void __user *)uaddr; + iov.iov_len = len; + + return bio_copy_user_iov(q, &iov, 1, write_to_vm); +} + static struct bio *__bio_map_user_iov(struct request_queue *q, struct block_device *bdev, struct sg_iovec *iov, int iov_count, diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index 350680fd7da7..0c3b618c15b3 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c @@ -23,7 +23,6 @@ #include <linux/buffer_head.h> #include <linux/vfs.h> #include <linux/mutex.h> -#include <asm/semaphore.h> #include <asm/uaccess.h> diff --git a/fs/dcache.c b/fs/dcache.c index 43455776711e..3ee588d5f585 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1746,12 +1746,21 @@ shouldnt_be_hashed: goto shouldnt_be_hashed; } +static int prepend(char **buffer, int *buflen, const char *str, + int namelen) +{ + *buflen -= namelen; + if (*buflen < 0) + return -ENAMETOOLONG; + *buffer -= namelen; + memcpy(*buffer, str, namelen); + return 0; +} + /** * d_path - return the path of a dentry - * @dentry: dentry to report - * @vfsmnt: vfsmnt to which the dentry belongs - * @root: root dentry - * @rootmnt: vfsmnt to which the root dentry belongs + * @path: the dentry/vfsmount to report + * @root: root vfsmnt/dentry (may be modified by this function) * @buffer: buffer to return value in * @buflen: buffer length * @@ -1761,23 +1770,22 @@ shouldnt_be_hashed: * Returns the buffer or an error code if the path was too long. * * "buflen" should be positive. Caller holds the dcache_lock. + * + * If path is not reachable from the supplied root, then the value of + * root is changed (without modifying refcounts). */ -static char *__d_path(struct dentry *dentry, struct vfsmount *vfsmnt, - struct path *root, char *buffer, int buflen) +char *__d_path(const struct path *path, struct path *root, + char *buffer, int buflen) { + struct dentry *dentry = path->dentry; + struct vfsmount *vfsmnt = path->mnt; char * end = buffer+buflen; char * retval; - int namelen; - - *--end = '\0'; - buflen--; - if (!IS_ROOT(dentry) && d_unhashed(dentry)) { - buflen -= 10; - end -= 10; - if (buflen < 0) + + prepend(&end, &buflen, "\0", 1); + if (!IS_ROOT(dentry) && d_unhashed(dentry) && + (prepend(&end, &buflen, " (deleted)", 10) != 0)) goto Elong; - memcpy(end, " (deleted)", 10); - } if (buflen < 1) goto Elong; @@ -1804,13 +1812,10 @@ static char *__d_path(struct dentry *dentry, struct vfsmount *vfsmnt, } parent = dentry->d_parent; prefetch(parent); - namelen = dentry->d_name.len; - buflen -= namelen + 1; - if (buflen < 0) + if ((prepend(&end, &buflen, dentry->d_name.name, + dentry->d_name.len) != 0) || + (prepend(&end, &buflen, "/", 1) != 0)) goto Elong; - end -= namelen; - memcpy(end, dentry->d_name.name, namelen); - *--end = '/'; retval = end; dentry = parent; } @@ -1818,12 +1823,12 @@ static char *__d_path(struct dentry *dentry, struct vfsmount *vfsmnt, return retval; global_root: - namelen = dentry->d_name.len; - buflen -= namelen; - if (buflen < 0) + retval += 1; /* hit the slash */ + if (prepend(&retval, &buflen, dentry->d_name.name, + dentry->d_name.len) != 0) goto Elong; - retval -= namelen-1; /* hit the slash */ - memcpy(retval, dentry->d_name.name, namelen); + root->mnt = vfsmnt; + root->dentry = dentry; return retval; Elong: return ERR_PTR(-ENAMETOOLONG); @@ -1846,6 +1851,7 @@ char *d_path(struct path *path, char *buf, int buflen) { char *res; struct path root; + struct path tmp; /* * We have various synthetic filesystems that never get mounted. On @@ -1859,10 +1865,11 @@ char *d_path(struct path *path, char *buf, int buflen) read_lock(¤t->fs->lock); root = current->fs->root; - path_get(¤t->fs->root); + path_get(&root); read_unlock(¤t->fs->lock); spin_lock(&dcache_lock); - res = __d_path(path->dentry, path->mnt, &root, buf, buflen); + tmp = root; + res = __d_path(path, &tmp, buf, buflen); spin_unlock(&dcache_lock); path_put(&root); return res; @@ -1890,6 +1897,48 @@ char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen, } /* + * Write full pathname from the root of the filesystem into the buffer. + */ +char *dentry_path(struct dentry *dentry, char *buf, int buflen) +{ + char *end = buf + buflen; + char *retval; + + spin_lock(&dcache_lock); + prepend(&end, &buflen, "\0", 1); + if (!IS_ROOT(dentry) && d_unhashed(dentry) && + (prepend(&end, &buflen, "//deleted", 9) != 0)) + goto Elong; + if (buflen < 1) + goto Elong; + /* Get '/' right */ + retval = end-1; + *retval = '/'; + + for (;;) { + struct dentry *parent; + if (IS_ROOT(dentry)) + break; + + parent = dentry->d_parent; + prefetch(parent); + + if ((prepend(&end, &buflen, dentry->d_name.name, + dentry->d_name.len) != 0) || + (prepend(&end, &buflen, "/", 1) != 0)) + goto Elong; + + retval = end; + dentry = parent; + } + spin_unlock(&dcache_lock); + return retval; +Elong: + spin_unlock(&dcache_lock); + return ERR_PTR(-ENAMETOOLONG); +} + +/* * NOTE! The user-level library version returns a * character pointer. The kernel system call just * returns the length of the buffer filled (which @@ -1918,9 +1967,9 @@ asmlinkage long sys_getcwd(char __user *buf, unsigned long size) read_lock(¤t->fs->lock); pwd = current->fs->pwd; - path_get(¤t->fs->pwd); + path_get(&pwd); root = current->fs->root; - path_get(¤t->fs->root); + path_get(&root); read_unlock(¤t->fs->lock); error = -ENOENT; @@ -1928,9 +1977,10 @@ asmlinkage long sys_getcwd(char __user *buf, unsigned long size) spin_lock(&dcache_lock); if (pwd.dentry->d_parent == pwd.dentry || !d_unhashed(pwd.dentry)) { unsigned long len; + struct path tmp = root; char * cwd; - cwd = __d_path(pwd.dentry, pwd.mnt, &root, page, PAGE_SIZE); + cwd = __d_path(&pwd, &tmp, page, PAGE_SIZE); spin_unlock(&dcache_lock); error = PTR_ERR(cwd); diff --git a/fs/dlm/Makefile b/fs/dlm/Makefile index d248e60951ba..ca1c9124c8ce 100644 --- a/fs/dlm/Makefile +++ b/fs/dlm/Makefile @@ -10,6 +10,7 @@ dlm-y := ast.o \ midcomms.o \ netlink.o \ lowcomms.o \ + plock.o \ rcom.o \ recover.o \ recoverd.o \ diff --git a/fs/dlm/config.c b/fs/dlm/config.c index c3ad1dff3b25..eac23bd288b2 100644 --- a/fs/dlm/config.c +++ b/fs/dlm/config.c @@ -114,7 +114,7 @@ struct cluster_attribute { }; static ssize_t cluster_set(struct cluster *cl, unsigned int *cl_field, - unsigned int *info_field, int check_zero, + int *info_field, int check_zero, const char *buf, size_t len) { unsigned int x; @@ -284,6 +284,7 @@ struct node { struct list_head list; /* space->members */ int nodeid; int weight; + int new; }; static struct configfs_group_operations clusters_ops = { @@ -565,6 +566,7 @@ static struct config_item *make_node(struct config_group *g, const char *name) config_item_init_type_name(&nd->item, name, &node_type); nd->nodeid = -1; nd->weight = 1; /* default weight of 1 if none is set */ + nd->new = 1; /* set to 0 once it's been read by dlm_nodeid_list() */ mutex_lock(&sp->members_lock); list_add(&nd->list, &sp->members); @@ -805,12 +807,13 @@ static void put_comm(struct comm *cm) } /* caller must free mem */ -int dlm_nodeid_list(char *lsname, int **ids_out) +int dlm_nodeid_list(char *lsname, int **ids_out, int *ids_count_out, + int **new_out, int *new_count_out) { struct space *sp; struct node *nd; - int i = 0, rv = 0; - int *ids; + int i = 0, rv = 0, ids_count = 0, new_count = 0; + int *ids, *new; sp = get_space(lsname); if (!sp) @@ -818,23 +821,50 @@ int dlm_nodeid_list(char *lsname, int **ids_out) mutex_lock(&sp->members_lock); if (!sp->members_count) { - rv = 0; + rv = -EINVAL; + printk(KERN_ERR "dlm: zero members_count\n"); goto out; } - ids = kcalloc(sp->members_count, sizeof(int), GFP_KERNEL); + ids_count = sp->members_count; + + ids = kcalloc(ids_count, sizeof(int), GFP_KERNEL); if (!ids) { rv = -ENOMEM; goto out; } - rv = sp->members_count; - list_for_each_entry(nd, &sp->members, list) + list_for_each_entry(nd, &sp->members, list) { ids[i++] = nd->nodeid; + if (nd->new) + new_count++; + } + + if (ids_count != i) + printk(KERN_ERR "dlm: bad nodeid count %d %d\n", ids_count, i); + + if (!new_count) + goto out_ids; + + new = kcalloc(new_count, sizeof(int), GFP_KERNEL); + if (!new) { + kfree(ids); + rv = -ENOMEM; + goto out; + } - if (rv != i) - printk("bad nodeid count %d %d\n", rv, i); + i = 0; + list_for_each_entry(nd, &sp->members, list) { + if (nd->new) { + new[i++] = nd->nodeid; + nd->new = 0; + } + } + *new_count_out = new_count; + *new_out = new; + out_ids: + *ids_count_out = ids_count; *ids_out = ids; out: mutex_unlock(&sp->members_lock); diff --git a/fs/dlm/config.h b/fs/dlm/config.h index a3170fe22090..4f1d6fce58c5 100644 --- a/fs/dlm/config.h +++ b/fs/dlm/config.h @@ -35,7 +35,8 @@ extern struct dlm_config_info dlm_config; int dlm_config_init(void); void dlm_config_exit(void); int dlm_node_weight(char *lsname, int nodeid); -int dlm_nodeid_list(char *lsname, int **ids_out); +int dlm_nodeid_list(char *lsname, int **ids_out, int *ids_count_out, + int **new_out, int *new_count_out); int dlm_nodeid_to_addr(int nodeid, struct sockaddr_storage *addr); int dlm_addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid); int dlm_our_nodeid(void); diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h index d30ea8b433a2..5a7ac33b629c 100644 --- a/fs/dlm/dlm_internal.h +++ b/fs/dlm/dlm_internal.h @@ -37,14 +37,11 @@ #include <linux/jhash.h> #include <linux/miscdevice.h> #include <linux/mutex.h> -#include <asm/semaphore.h> #include <asm/uaccess.h> #include <linux/dlm.h> #include "config.h" -#define DLM_LOCKSPACE_LEN 64 - /* Size of the temp buffer midcomms allocates on the stack. We try to make this large enough so most messages fit. FIXME: should sctp make this unnecessary? */ @@ -133,8 +130,10 @@ struct dlm_member { struct dlm_recover { struct list_head list; - int *nodeids; + int *nodeids; /* nodeids of all members */ int node_count; + int *new; /* nodeids of new members */ + int new_count; uint64_t seq; }; @@ -580,6 +579,8 @@ static inline int dlm_no_directory(struct dlm_ls *ls) int dlm_netlink_init(void); void dlm_netlink_exit(void); void dlm_timeout_warn(struct dlm_lkb *lkb); +int dlm_plock_init(void); +void dlm_plock_exit(void); #ifdef CONFIG_DLM_DEBUG int dlm_register_debugfs(void); diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index 8f250ac8b928..2d3d1027ce2b 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c @@ -165,7 +165,7 @@ void dlm_print_lkb(struct dlm_lkb *lkb) lkb->lkb_grmode, lkb->lkb_wait_type, lkb->lkb_ast_type); } -void dlm_print_rsb(struct dlm_rsb *r) +static void dlm_print_rsb(struct dlm_rsb *r) { printk(KERN_ERR "rsb: nodeid %d flags %lx first %x rlc %d name %s\n", r->res_nodeid, r->res_flags, r->res_first_lkid, @@ -1956,8 +1956,7 @@ static void confirm_master(struct dlm_rsb *r, int error) list_del_init(&lkb->lkb_rsb_lookup); r->res_first_lkid = lkb->lkb_id; _request_lock(r, lkb); - } else - r->res_nodeid = -1; + } break; default: diff --git a/fs/dlm/lock.h b/fs/dlm/lock.h index 05d9c82e646b..88e93c80cc22 100644 --- a/fs/dlm/lock.h +++ b/fs/dlm/lock.h @@ -13,7 +13,6 @@ #ifndef __LOCK_DOT_H__ #define __LOCK_DOT_H__ -void dlm_print_rsb(struct dlm_rsb *r); void dlm_dump_rsb(struct dlm_rsb *r); void dlm_print_lkb(struct dlm_lkb *lkb); void dlm_receive_message_saved(struct dlm_ls *ls, struct dlm_message *ms); diff --git a/fs/dlm/main.c b/fs/dlm/main.c index 58487fb95a4c..b80e0aa3cfa5 100644 --- a/fs/dlm/main.c +++ b/fs/dlm/main.c @@ -46,10 +46,16 @@ static int __init init_dlm(void) if (error) goto out_user; + error = dlm_plock_init(); + if (error) + goto out_netlink; + printk("DLM (built %s %s) installed\n", __DATE__, __TIME__); return 0; + out_netlink: + dlm_netlink_exit(); out_user: dlm_user_exit(); out_debug: @@ -66,6 +72,7 @@ static int __init init_dlm(void) static void __exit exit_dlm(void) { + dlm_plock_exit(); dlm_netlink_exit(); dlm_user_exit(); dlm_config_exit(); diff --git a/fs/dlm/member.c b/fs/dlm/member.c index fa17f5a27883..26133f05ae3a 100644 --- a/fs/dlm/member.c +++ b/fs/dlm/member.c @@ -210,6 +210,23 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out) } } + /* Add an entry to ls_nodes_gone for members that were removed and + then added again, so that previous state for these nodes will be + cleared during recovery. */ + + for (i = 0; i < rv->new_count; i++) { + if (!dlm_is_member(ls, rv->new[i])) + continue; + log_debug(ls, "new nodeid %d is a re-added member", rv->new[i]); + + memb = kzalloc(sizeof(struct dlm_member), GFP_KERNEL); + if (!memb) + return -ENOMEM; + memb->nodeid = rv->new[i]; + list_add_tail(&memb->list, &ls->ls_nodes_gone); + neg++; + } + /* add new members to ls_nodes */ for (i = 0; i < rv->node_count; i++) { @@ -314,15 +331,16 @@ int dlm_ls_stop(struct dlm_ls *ls) int dlm_ls_start(struct dlm_ls *ls) { struct dlm_recover *rv = NULL, *rv_old; - int *ids = NULL; - int error, count; + int *ids = NULL, *new = NULL; + int error, ids_count = 0, new_count = 0; rv = kzalloc(sizeof(struct dlm_recover), GFP_KERNEL); if (!rv) return -ENOMEM; - error = count = dlm_nodeid_list(ls->ls_name, &ids); - if (error <= 0) + error = dlm_nodeid_list(ls->ls_name, &ids, &ids_count, + &new, &new_count); + if (error < 0) goto fail; spin_lock(&ls->ls_recover_lock); @@ -337,14 +355,19 @@ int dlm_ls_start(struct dlm_ls *ls) } rv->nodeids = ids; - rv->node_count = count; + rv->node_count = ids_count; + rv->new = new; + rv->new_count = new_count; rv->seq = ++ls->ls_recover_seq; rv_old = ls->ls_recover_args; ls->ls_recover_args = rv; spin_unlock(&ls->ls_recover_lock); if (rv_old) { + log_error(ls, "unused recovery %llx %d", + (unsigned long long)rv_old->seq, rv_old->node_count); kfree(rv_old->nodeids); + kfree(rv_old->new); kfree(rv_old); } @@ -354,6 +377,7 @@ int dlm_ls_start(struct dlm_ls *ls) fail: kfree(rv); kfree(ids); + kfree(new); return error; } diff --git a/fs/gfs2/locking/dlm/plock.c b/fs/dlm/plock.c index 2ebd374b3143..d6d6e370f89c 100644 --- a/fs/gfs2/locking/dlm/plock.c +++ b/fs/dlm/plock.c @@ -1,17 +1,19 @@ /* - * Copyright (C) 2005 Red Hat, Inc. All rights reserved. + * Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved. * * This copyrighted material is made available to anyone wishing to use, * modify, copy, or redistribute it subject to the terms and conditions * of the GNU General Public License version 2. */ +#include <linux/fs.h> #include <linux/miscdevice.h> -#include <linux/lock_dlm_plock.h> #include <linux/poll.h> +#include <linux/dlm.h> +#include <linux/dlm_plock.h> -#include "lock_dlm.h" - +#include "dlm_internal.h" +#include "lockspace.h" static spinlock_t ops_lock; static struct list_head send_list; @@ -22,7 +24,7 @@ static wait_queue_head_t recv_wq; struct plock_op { struct list_head list; int done; - struct gdlm_plock_info info; + struct dlm_plock_info info; }; struct plock_xop { @@ -34,22 +36,22 @@ struct plock_xop { }; -static inline void set_version(struct gdlm_plock_info *info) +static inline void set_version(struct dlm_plock_info *info) { - info->version[0] = GDLM_PLOCK_VERSION_MAJOR; - info->version[1] = GDLM_PLOCK_VERSION_MINOR; - info->version[2] = GDLM_PLOCK_VERSION_PATCH; + info->version[0] = DLM_PLOCK_VERSION_MAJOR; + info->version[1] = DLM_PLOCK_VERSION_MINOR; + info->version[2] = DLM_PLOCK_VERSION_PATCH; } -static int check_version(struct gdlm_plock_info *info) +static int check_version(struct dlm_plock_info *info) { - if ((GDLM_PLOCK_VERSION_MAJOR != info->version[0]) || - (GDLM_PLOCK_VERSION_MINOR < info->version[1])) { - log_error("plock device version mismatch: " + if ((DLM_PLOCK_VERSION_MAJOR != info->version[0]) || + (DLM_PLOCK_VERSION_MINOR < info->version[1])) { + log_print("plock device version mismatch: " "kernel (%u.%u.%u), user (%u.%u.%u)", - GDLM_PLOCK_VERSION_MAJOR, - GDLM_PLOCK_VERSION_MINOR, - GDLM_PLOCK_VERSION_PATCH, + DLM_PLOCK_VERSION_MAJOR, + DLM_PLOCK_VERSION_MINOR, + DLM_PLOCK_VERSION_PATCH, info->version[0], info->version[1], info->version[2]); @@ -68,25 +70,31 @@ static void send_op(struct plock_op *op) wake_up(&send_wq); } -int gdlm_plock(void *lockspace, struct lm_lockname *name, - struct file *file, int cmd, struct file_lock *fl) +int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file, + int cmd, struct file_lock *fl) { - struct gdlm_ls *ls = lockspace; + struct dlm_ls *ls; struct plock_op *op; struct plock_xop *xop; int rv; + ls = dlm_find_lockspace_local(lockspace); + if (!ls) + return -EINVAL; + xop = kzalloc(sizeof(*xop), GFP_KERNEL); - if (!xop) - return -ENOMEM; + if (!xop) { + rv = -ENOMEM; + goto out; + } op = &xop->xop; - op->info.optype = GDLM_PLOCK_OP_LOCK; + op->info.optype = DLM_PLOCK_OP_LOCK; op->info.pid = fl->fl_pid; op->info.ex = (fl->fl_type == F_WRLCK); op->info.wait = IS_SETLKW(cmd); - op->info.fsid = ls->id; - op->info.number = name->ln_number; + op->info.fsid = ls->ls_global_id; + op->info.number = number; op->info.start = fl->fl_start; op->info.end = fl->fl_end; if (fl->fl_lmops && fl->fl_lmops->fl_grant) { @@ -107,12 +115,15 @@ int gdlm_plock(void *lockspace, struct lm_lockname *name, if (xop->callback == NULL) wait_event(recv_wq, (op->done != 0)); - else - return -EINPROGRESS; + else { + rv = -EINPROGRESS; + goto out; + } spin_lock(&ops_lock); if (!list_empty(&op->list)) { - printk(KERN_INFO "plock op on list\n"); + log_error(ls, "dlm_posix_lock: op on list %llx", + (unsigned long long)number); list_del(&op->list); } spin_unlock(&ops_lock); @@ -121,17 +132,19 @@ int gdlm_plock(void *lockspace, struct lm_lockname *name, if (!rv) { if (posix_lock_file_wait(file, fl) < 0) - log_error("gdlm_plock: vfs lock error %x,%llx", - name->ln_type, - (unsigned long long)name->ln_number); + log_error(ls, "dlm_posix_lock: vfs lock error %llx", + (unsigned long long)number); } kfree(xop); +out: + dlm_put_lockspace(ls); return rv; } +EXPORT_SYMBOL_GPL(dlm_posix_lock); /* Returns failure iff a succesful lock operation should be canceled */ -static int gdlm_plock_callback(struct plock_op *op) +static int dlm_plock_callback(struct plock_op *op) { struct file *file; struct file_lock *fl; @@ -142,7 +155,8 @@ static int gdlm_plock_callback(struct plock_op *op) spin_lock(&ops_lock); if (!list_empty(&op->list)) { - printk(KERN_INFO "plock op on list\n"); + log_print("dlm_plock_callback: op on list %llx", + (unsigned long long)op->info.number); list_del(&op->list); } spin_unlock(&ops_lock); @@ -165,19 +179,19 @@ static int gdlm_plock_callback(struct plock_op *op) * This can only happen in the case of kmalloc() failure. * The filesystem's own lock is the authoritative lock, * so a failure to get the lock locally is not a disaster. - * As long as GFS cannot reliably cancel locks (especially + * As long as the fs cannot reliably cancel locks (especially * in a low-memory situation), we're better off ignoring * this failure than trying to recover. */ - log_error("gdlm_plock: vfs lock error file %p fl %p", - file, fl); + log_print("dlm_plock_callback: vfs lock error %llx file %p fl %p", + (unsigned long long)op->info.number, file, fl); } rv = notify(flc, NULL, 0); if (rv) { /* XXX: We need to cancel the fs lock here: */ - printk("gfs2 lock granted after lock request failed;" - " dangling lock!\n"); + log_print("dlm_plock_callback: lock granted after lock request " + "failed; dangling lock!\n"); goto out; } @@ -186,25 +200,31 @@ out: return rv; } -int gdlm_punlock(void *lockspace, struct lm_lockname *name, - struct file *file, struct file_lock *fl) +int dlm_posix_unlock(dlm_lockspace_t *lockspace, u64 number, struct file *file, + struct file_lock *fl) { - struct gdlm_ls *ls = lockspace; + struct dlm_ls *ls; struct plock_op *op; int rv; + ls = dlm_find_lockspace_local(lockspace); + if (!ls) + return -EINVAL; + op = kzalloc(sizeof(*op), GFP_KERNEL); - if (!op) - return -ENOMEM; + if (!op) { + rv = -ENOMEM; + goto out; + } if (posix_lock_file_wait(file, fl) < 0) - log_error("gdlm_punlock: vfs unlock error %x,%llx", - name->ln_type, (unsigned long long)name->ln_number); + log_error(ls, "dlm_posix_unlock: vfs unlock error %llx", + (unsigned long long)number); - op->info.optype = GDLM_PLOCK_OP_UNLOCK; + op->info.optype = DLM_PLOCK_OP_UNLOCK; op->info.pid = fl->fl_pid; - op->info.fsid = ls->id; - op->info.number = name->ln_number; + op->info.fsid = ls->ls_global_id; + op->info.number = number; op->info.start = fl->fl_start; op->info.end = fl->fl_end; if (fl->fl_lmops && fl->fl_lmops->fl_grant) @@ -217,7 +237,8 @@ int gdlm_punlock(void *lockspace, struct lm_lockname *name, spin_lock(&ops_lock); if (!list_empty(&op->list)) { - printk(KERN_INFO "punlock op on list\n"); + log_error(ls, "dlm_posix_unlock: op on list %llx", + (unsigned long long)number); list_del(&op->list); } spin_unlock(&ops_lock); @@ -228,25 +249,34 @@ int gdlm_punlock(void *lockspace, struct lm_lockname *name, rv = 0; kfree(op); +out: + dlm_put_lockspace(ls); return rv; } +EXPORT_SYMBOL_GPL(dlm_posix_unlock); -int gdlm_plock_get(void *lockspace, struct lm_lockname *name, - struct file *file, struct file_lock *fl) +int dlm_posix_get(dlm_lockspace_t *lockspace, u64 number, struct file *file, + struct file_lock *fl) { - struct gdlm_ls *ls = lockspace; + struct dlm_ls *ls; struct plock_op *op; int rv; + ls = dlm_find_lockspace_local(lockspace); + if (!ls) + return -EINVAL; + op = kzalloc(sizeof(*op), GFP_KERNEL); - if (!op) - return -ENOMEM; + if (!op) { + rv = -ENOMEM; + goto out; + } - op->info.optype = GDLM_PLOCK_OP_GET; + op->info.optype = DLM_PLOCK_OP_GET; op->info.pid = fl->fl_pid; op->info.ex = (fl->fl_type == F_WRLCK); - op->info.fsid = ls->id; - op->info.number = name->ln_number; + op->info.fsid = ls->ls_global_id; + op->info.number = number; op->info.start = fl->fl_start; op->info.end = fl->fl_end; if (fl->fl_lmops && fl->fl_lmops->fl_grant) @@ -259,7 +289,8 @@ int gdlm_plock_get(void *lockspace, struct lm_lockname *name, spin_lock(&ops_lock); if (!list_empty(&op->list)) { - printk(KERN_INFO "plock_get op on list\n"); + log_error(ls, "dlm_posix_get: op on list %llx", + (unsigned long long)number); list_del(&op->list); } spin_unlock(&ops_lock); @@ -281,14 +312,17 @@ int gdlm_plock_get(void *lockspace, struct lm_lockname *name, } kfree(op); +out: + dlm_put_lockspace(ls); return rv; } +EXPORT_SYMBOL_GPL(dlm_posix_get); /* a read copies out one plock request from the send list */ static ssize_t dev_read(struct file *file, char __user *u, size_t count, loff_t *ppos) { - struct gdlm_plock_info info; + struct dlm_plock_info info; struct plock_op *op = NULL; if (count < sizeof(info)) @@ -315,7 +349,7 @@ static ssize_t dev_read(struct file *file, char __user *u, size_t count, static ssize_t dev_write(struct file *file, const char __user *u, size_t count, loff_t *ppos) { - struct gdlm_plock_info info; + struct dlm_plock_info info; struct plock_op *op; int found = 0; @@ -345,12 +379,12 @@ static ssize_t dev_write(struct file *file, const char __user *u, size_t count, struct plock_xop *xop; xop = (struct plock_xop *)op; if (xop->callback) - count = gdlm_plock_callback(op); + count = dlm_plock_callback(op); else wake_up(&recv_wq); } else - printk(KERN_INFO "gdlm dev_write no op %x %llx\n", info.fsid, - (unsigned long long)info.number); + log_print("dev_write no op %x %llx", info.fsid, + (unsigned long long)info.number); return count; } @@ -377,11 +411,11 @@ static const struct file_operations dev_fops = { static struct miscdevice plock_dev_misc = { .minor = MISC_DYNAMIC_MINOR, - .name = GDLM_PLOCK_MISC_NAME, + .name = DLM_PLOCK_MISC_NAME, .fops = &dev_fops }; -int gdlm_plock_init(void) +int dlm_plock_init(void) { int rv; @@ -393,14 +427,13 @@ int gdlm_plock_init(void) rv = misc_register(&plock_dev_misc); if (rv) - printk(KERN_INFO "gdlm_plock_init: misc_register failed %d", - rv); + log_print("dlm_plock_init: misc_register failed %d", rv); return rv; } -void gdlm_plock_exit(void) +void dlm_plock_exit(void) { if (misc_deregister(&plock_dev_misc) < 0) - printk(KERN_INFO "gdlm_plock_exit: misc_deregister failed"); + log_print("dlm_plock_exit: misc_deregister failed"); } diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c index 997f9531d594..fd677c8c3d3b 100644 --- a/fs/dlm/recoverd.c +++ b/fs/dlm/recoverd.c @@ -257,6 +257,7 @@ static void do_ls_recovery(struct dlm_ls *ls) if (rv) { ls_recover(ls, rv); kfree(rv->nodeids); + kfree(rv->new); kfree(rv); } } diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c index 5deb8b74e649..08f647d8188d 100644 --- a/fs/ext2/ialloc.c +++ b/fs/ext2/ialloc.c @@ -253,7 +253,7 @@ static int find_group_dir(struct super_block *sb, struct inode *parent) * it has too few free inodes left (min_inodes) or * it has too few free blocks left (min_blocks) or * it's already running too large debt (max_debt). - * Parent's group is prefered, if it doesn't satisfy these + * Parent's group is preferred, if it doesn't satisfy these * conditions we search cyclically through the rest. If none * of the groups look good we just look for a group with more * free inodes than average (starting at parent's group). diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index c62006805427..b8a2990bab83 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -239,7 +239,7 @@ no_block: * @inode: owner * @ind: descriptor of indirect block. * - * This function returns the prefered place for block allocation. + * This function returns the preferred place for block allocation. * It is used when heuristic for sequential allocation fails. * Rules are: * + if there is a block to the left of our position - allocate near it. @@ -283,7 +283,7 @@ static unsigned long ext2_find_near(struct inode *inode, Indirect *ind) } /** - * ext2_find_goal - find a prefered place for allocation. + * ext2_find_goal - find a preferred place for allocation. * @inode: owner * @block: block we want * @partial: pointer to the last triple within a chain diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c index b8ea11fee5c6..de876fa793e1 100644 --- a/fs/ext2/ioctl.c +++ b/fs/ext2/ioctl.c @@ -12,6 +12,7 @@ #include <linux/time.h> #include <linux/sched.h> #include <linux/compat.h> +#include <linux/mount.h> #include <linux/smp_lock.h> #include <asm/current.h> #include <asm/uaccess.h> @@ -23,6 +24,7 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) struct ext2_inode_info *ei = EXT2_I(inode); unsigned int flags; unsigned short rsv_window_size; + int ret; ext2_debug ("cmd = %u, arg = %lu\n", cmd, arg); @@ -34,14 +36,19 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) case EXT2_IOC_SETFLAGS: { unsigned int oldflags; - if (IS_RDONLY(inode)) - return -EROFS; + ret = mnt_want_write(filp->f_path.mnt); + if (ret) + return ret; - if (!is_owner_or_cap(inode)) - return -EACCES; + if (!is_owner_or_cap(inode)) { + ret = -EACCES; + goto setflags_out; + } - if (get_user(flags, (int __user *) arg)) - return -EFAULT; + if (get_user(flags, (int __user *) arg)) { + ret = -EFAULT; + goto setflags_out; + } if (!S_ISDIR(inode->i_mode)) flags &= ~EXT2_DIRSYNC_FL; @@ -50,7 +57,8 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) /* Is it quota file? Do not allow user to mess with it */ if (IS_NOQUOTA(inode)) { mutex_unlock(&inode->i_mutex); - return -EPERM; + ret = -EPERM; + goto setflags_out; } oldflags = ei->i_flags; @@ -63,7 +71,8 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) if ((flags ^ oldflags) & (EXT2_APPEND_FL | EXT2_IMMUTABLE_FL)) { if (!capable(CAP_LINUX_IMMUTABLE)) { mutex_unlock(&inode->i_mutex); - return -EPERM; + ret = -EPERM; + goto setflags_out; } } @@ -75,20 +84,26 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) ext2_set_inode_flags(inode); inode->i_ctime = CURRENT_TIME_SEC; mark_inode_dirty(inode); - return 0; +setflags_out: + mnt_drop_write(filp->f_path.mnt); + return ret; } case EXT2_IOC_GETVERSION: return put_user(inode->i_generation, (int __user *) arg); case EXT2_IOC_SETVERSION: if (!is_owner_or_cap(inode)) return -EPERM; - if (IS_RDONLY(inode)) - return -EROFS; - if (get_user(inode->i_generation, (int __user *) arg)) - return -EFAULT; - inode->i_ctime = CURRENT_TIME_SEC; - mark_inode_dirty(inode); - return 0; + ret = mnt_want_write(filp->f_path.mnt); + if (ret) + return ret; + if (get_user(inode->i_generation, (int __user *) arg)) { + ret = -EFAULT; + } else { + inode->i_ctime = CURRENT_TIME_SEC; + mark_inode_dirty(inode); + } + mnt_drop_write(filp->f_path.mnt); + return ret; case EXT2_IOC_GETRSVSZ: if (test_opt(inode->i_sb, RESERVATION) && S_ISREG(inode->i_mode) @@ -102,15 +117,16 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode)) return -ENOTTY; - if (IS_RDONLY(inode)) - return -EROFS; - - if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) + if (!is_owner_or_cap(inode)) return -EACCES; if (get_user(rsv_window_size, (int __user *)arg)) return -EFAULT; + ret = mnt_want_write(filp->f_path.mnt); + if (ret) + return ret; + if (rsv_window_size > EXT2_MAX_RESERVE_BLOCKS) rsv_window_size = EXT2_MAX_RESERVE_BLOCKS; @@ -131,6 +147,7 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) rsv->rsv_goal_size = rsv_window_size; } mutex_unlock(&ei->truncate_mutex); + mnt_drop_write(filp->f_path.mnt); return 0; } default: diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c index 4f4020c54683..96dd5573e49b 100644 --- a/fs/ext3/ialloc.c +++ b/fs/ext3/ialloc.c @@ -239,7 +239,7 @@ static int find_group_dir(struct super_block *sb, struct inode *parent) * it has too few free inodes left (min_inodes) or * it has too few free blocks left (min_blocks) or * it's already running too large debt (max_debt). - * Parent's group is prefered, if it doesn't satisfy these + * Parent's group is preferred, if it doesn't satisfy these * conditions we search cyclically through the rest. If none * of the groups look good we just look for a group with more * free inodes than average (starting at parent's group). diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index eb95670a27eb..c683609b0e3a 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -392,7 +392,7 @@ no_block: * @inode: owner * @ind: descriptor of indirect block. * - * This function returns the prefered place for block allocation. + * This function returns the preferred place for block allocation. * It is used when heuristic for sequential allocation fails. * Rules are: * + if there is a block to the left of our position - allocate near it. @@ -436,12 +436,12 @@ static ext3_fsblk_t ext3_find_near(struct inode *inode, Indirect *ind) } /** - * ext3_find_goal - find a prefered place for allocation. + * ext3_find_goal - find a preferred place for allocation. * @inode: owner * @block: block we want * @partial: pointer to the last triple within a chain * - * Normally this function find the prefered place for block allocation, + * Normally this function find the preferred place for block allocation, * returns it. */ diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c index 023a070f55f1..0d0c70151642 100644 --- a/fs/ext3/ioctl.c +++ b/fs/ext3/ioctl.c @@ -12,6 +12,7 @@ #include <linux/capability.h> #include <linux/ext3_fs.h> #include <linux/ext3_jbd.h> +#include <linux/mount.h> #include <linux/time.h> #include <linux/compat.h> #include <linux/smp_lock.h> @@ -38,14 +39,19 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, unsigned int oldflags; unsigned int jflag; - if (IS_RDONLY(inode)) - return -EROFS; + err = mnt_want_write(filp->f_path.mnt); + if (err) + return err; - if (!is_owner_or_cap(inode)) - return -EACCES; + if (!is_owner_or_cap(inode)) { + err = -EACCES; + goto flags_out; + } - if (get_user(flags, (int __user *) arg)) - return -EFAULT; + if (get_user(flags, (int __user *) arg)) { + err = -EFAULT; + goto flags_out; + } if (!S_ISDIR(inode->i_mode)) flags &= ~EXT3_DIRSYNC_FL; @@ -54,7 +60,8 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, /* Is it quota file? Do not allow user to mess with it */ if (IS_NOQUOTA(inode)) { mutex_unlock(&inode->i_mutex); - return -EPERM; + err = -EPERM; + goto flags_out; } oldflags = ei->i_flags; @@ -70,7 +77,8 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, if ((flags ^ oldflags) & (EXT3_APPEND_FL | EXT3_IMMUTABLE_FL)) { if (!capable(CAP_LINUX_IMMUTABLE)) { mutex_unlock(&inode->i_mutex); - return -EPERM; + err = -EPERM; + goto flags_out; } } @@ -81,7 +89,8 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL)) { if (!capable(CAP_SYS_RESOURCE)) { mutex_unlock(&inode->i_mutex); - return -EPERM; + err = -EPERM; + goto flags_out; } } @@ -89,7 +98,8 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, handle = ext3_journal_start(inode, 1); if (IS_ERR(handle)) { mutex_unlock(&inode->i_mutex); - return PTR_ERR(handle); + err = PTR_ERR(handle); + goto flags_out; } if (IS_SYNC(inode)) handle->h_sync = 1; @@ -115,6 +125,8 @@ flags_err: if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL)) err = ext3_change_inode_journal_flag(inode, jflag); mutex_unlock(&inode->i_mutex); +flags_out: + mnt_drop_write(filp->f_path.mnt); return err; } case EXT3_IOC_GETVERSION: @@ -129,14 +141,18 @@ flags_err: if (!is_owner_or_cap(inode)) return -EPERM; - if (IS_RDONLY(inode)) - return -EROFS; - if (get_user(generation, (int __user *) arg)) - return -EFAULT; - + err = mnt_want_write(filp->f_path.mnt); + if (err) + return err; + if (get_user(generation, (int __user *) arg)) { + err = -EFAULT; + goto setversion_out; + } handle = ext3_journal_start(inode, 1); - if (IS_ERR(handle)) - return PTR_ERR(handle); + if (IS_ERR(handle)) { + err = PTR_ERR(handle); + goto setversion_out; + } err = ext3_reserve_inode_write(handle, inode, &iloc); if (err == 0) { inode->i_ctime = CURRENT_TIME_SEC; @@ -144,6 +160,8 @@ flags_err: err = ext3_mark_iloc_dirty(handle, inode, &iloc); } ext3_journal_stop(handle); +setversion_out: + mnt_drop_write(filp->f_path.mnt); return err; } #ifdef CONFIG_JBD_DEBUG @@ -179,18 +197,24 @@ flags_err: } return -ENOTTY; case EXT3_IOC_SETRSVSZ: { + int err; if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode)) return -ENOTTY; - if (IS_RDONLY(inode)) - return -EROFS; + err = mnt_want_write(filp->f_path.mnt); + if (err) + return err; - if (!is_owner_or_cap(inode)) - return -EACCES; + if (!is_owner_or_cap(inode)) { + err = -EACCES; + goto setrsvsz_out; + } - if (get_user(rsv_window_size, (int __user *)arg)) - return -EFAULT; + if (get_user(rsv_window_size, (int __user *)arg)) { + err = -EFAULT; + goto setrsvsz_out; + } if (rsv_window_size > EXT3_MAX_RESERVE_BLOCKS) rsv_window_size = EXT3_MAX_RESERVE_BLOCKS; @@ -208,7 +232,9 @@ flags_err: rsv->rsv_goal_size = rsv_window_size; } mutex_unlock(&ei->truncate_mutex); - return 0; +setrsvsz_out: + mnt_drop_write(filp->f_path.mnt); + return err; } case EXT3_IOC_GROUP_EXTEND: { ext3_fsblk_t n_blocks_count; @@ -218,17 +244,20 @@ flags_err: if (!capable(CAP_SYS_RESOURCE)) return -EPERM; - if (IS_RDONLY(inode)) - return -EROFS; - - if (get_user(n_blocks_count, (__u32 __user *)arg)) - return -EFAULT; + err = mnt_want_write(filp->f_path.mnt); + if (err) + return err; + if (get_user(n_blocks_count, (__u32 __user *)arg)) { + err = -EFAULT; + goto group_extend_out; + } err = ext3_group_extend(sb, EXT3_SB(sb)->s_es, n_blocks_count); journal_lock_updates(EXT3_SB(sb)->s_journal); journal_flush(EXT3_SB(sb)->s_journal); journal_unlock_updates(EXT3_SB(sb)->s_journal); - +group_extend_out: + mnt_drop_write(filp->f_path.mnt); return err; } case EXT3_IOC_GROUP_ADD: { @@ -239,18 +268,22 @@ flags_err: if (!capable(CAP_SYS_RESOURCE)) return -EPERM; - if (IS_RDONLY(inode)) - return -EROFS; + err = mnt_want_write(filp->f_path.mnt); + if (err) + return err; if (copy_from_user(&input, (struct ext3_new_group_input __user *)arg, - sizeof(input))) - return -EFAULT; + sizeof(input))) { + err = -EFAULT; + goto group_add_out; + } err = ext3_group_add(sb, &input); journal_lock_updates(EXT3_SB(sb)->s_journal); journal_flush(EXT3_SB(sb)->s_journal); journal_unlock_updates(EXT3_SB(sb)->s_journal); - +group_add_out: + mnt_drop_write(filp->f_path.mnt); return err; } diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 8036b9b5376b..486e46a3918d 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -305,7 +305,7 @@ static int find_group_dir(struct super_block *sb, struct inode *parent, * it has too few free inodes left (min_inodes) or * it has too few free blocks left (min_blocks) or * it's already running too large debt (max_debt). - * Parent's group is prefered, if it doesn't satisfy these + * Parent's group is preferred, if it doesn't satisfy these * conditions we search cyclically through the rest. If none * of the groups look good we just look for a group with more * free inodes than average (starting at parent's group). diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 945cbf6cb1fc..8fab233cb05f 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -382,7 +382,7 @@ no_block: * @inode: owner * @ind: descriptor of indirect block. * - * This function returns the prefered place for block allocation. + * This function returns the preferred place for block allocation. * It is used when heuristic for sequential allocation fails. * Rules are: * + if there is a block to the left of our position - allocate near it. @@ -432,12 +432,12 @@ static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind) } /** - * ext4_find_goal - find a prefered place for allocation. + * ext4_find_goal - find a preferred place for allocation. * @inode: owner * @block: block we want * @partial: pointer to the last triple within a chain * - * Normally this function find the prefered place for block allocation, + * Normally this function find the preferred place for block allocation, * returns it. */ static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block, diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 2ed7c37f897e..25b13ede8086 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -15,6 +15,7 @@ #include <linux/time.h> #include <linux/compat.h> #include <linux/smp_lock.h> +#include <linux/mount.h> #include <asm/uaccess.h> int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, @@ -38,24 +39,25 @@ int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, unsigned int oldflags; unsigned int jflag; - if (IS_RDONLY(inode)) - return -EROFS; - if (!is_owner_or_cap(inode)) return -EACCES; if (get_user(flags, (int __user *) arg)) return -EFAULT; + err = mnt_want_write(filp->f_path.mnt); + if (err) + return err; + if (!S_ISDIR(inode->i_mode)) flags &= ~EXT4_DIRSYNC_FL; + err = -EPERM; mutex_lock(&inode->i_mutex); /* Is it quota file? Do not allow user to mess with it */ - if (IS_NOQUOTA(inode)) { - mutex_unlock(&inode->i_mutex); - return -EPERM; - } + if (IS_NOQUOTA(inode)) + goto flags_out; + oldflags = ei->i_flags; /* The JOURNAL_DATA flag is modifiable only by root */ @@ -68,10 +70,8 @@ int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, * This test looks nicer. Thanks to Pauline Middelink */ if ((flags ^ oldflags) & (EXT4_APPEND_FL | EXT4_IMMUTABLE_FL)) { - if (!capable(CAP_LINUX_IMMUTABLE)) { - mutex_unlock(&inode->i_mutex); - return -EPERM; - } + if (!capable(CAP_LINUX_IMMUTABLE)) + goto flags_out; } /* @@ -79,17 +79,14 @@ int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, * the relevant capability. */ if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) { - if (!capable(CAP_SYS_RESOURCE)) { - mutex_unlock(&inode->i_mutex); - return -EPERM; - } + if (!capable(CAP_SYS_RESOURCE)) + goto flags_out; } - handle = ext4_journal_start(inode, 1); if (IS_ERR(handle)) { - mutex_unlock(&inode->i_mutex); - return PTR_ERR(handle); + err = PTR_ERR(handle); + goto flags_out; } if (IS_SYNC(inode)) handle->h_sync = 1; @@ -107,14 +104,14 @@ int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, err = ext4_mark_iloc_dirty(handle, inode, &iloc); flags_err: ext4_journal_stop(handle); - if (err) { - mutex_unlock(&inode->i_mutex); - return err; - } + if (err) + goto flags_out; if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) err = ext4_change_inode_journal_flag(inode, jflag); +flags_out: mutex_unlock(&inode->i_mutex); + mnt_drop_write(filp->f_path.mnt); return err; } case EXT4_IOC_GETVERSION: @@ -129,14 +126,20 @@ flags_err: if (!is_owner_or_cap(inode)) return -EPERM; - if (IS_RDONLY(inode)) - return -EROFS; - if (get_user(generation, (int __user *) arg)) - return -EFAULT; + + err = mnt_want_write(filp->f_path.mnt); + if (err) + return err; + if (get_user(generation, (int __user *) arg)) { + err = -EFAULT; + goto setversion_out; + } handle = ext4_journal_start(inode, 1); - if (IS_ERR(handle)) - return PTR_ERR(handle); + if (IS_ERR(handle)) { + err = PTR_ERR(handle); + goto setversion_out; + } err = ext4_reserve_inode_write(handle, inode, &iloc); if (err == 0) { inode->i_ctime = ext4_current_time(inode); @@ -144,6 +147,8 @@ flags_err: err = ext4_mark_iloc_dirty(handle, inode, &iloc); } ext4_journal_stop(handle); +setversion_out: + mnt_drop_write(filp->f_path.mnt); return err; } #ifdef CONFIG_JBD2_DEBUG @@ -179,19 +184,21 @@ flags_err: } return -ENOTTY; case EXT4_IOC_SETRSVSZ: { + int err; if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode)) return -ENOTTY; - if (IS_RDONLY(inode)) - return -EROFS; - if (!is_owner_or_cap(inode)) return -EACCES; if (get_user(rsv_window_size, (int __user *)arg)) return -EFAULT; + err = mnt_want_write(filp->f_path.mnt); + if (err) + return err; + if (rsv_window_size > EXT4_MAX_RESERVE_BLOCKS) rsv_window_size = EXT4_MAX_RESERVE_BLOCKS; @@ -208,6 +215,7 @@ flags_err: rsv->rsv_goal_size = rsv_window_size; } up_write(&ei->i_data_sem); + mnt_drop_write(filp->f_path.mnt); return 0; } case EXT4_IOC_GROUP_EXTEND: { @@ -218,16 +226,18 @@ flags_err: if (!capable(CAP_SYS_RESOURCE)) return -EPERM; - if (IS_RDONLY(inode)) - return -EROFS; - if (get_user(n_blocks_count, (__u32 __user *)arg)) return -EFAULT; + err = mnt_want_write(filp->f_path.mnt); + if (err) + return err; + err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count); jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); jbd2_journal_flush(EXT4_SB(sb)->s_journal); jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); + mnt_drop_write(filp->f_path.mnt); return err; } @@ -239,17 +249,19 @@ flags_err: if (!capable(CAP_SYS_RESOURCE)) return -EPERM; - if (IS_RDONLY(inode)) - return -EROFS; - if (copy_from_user(&input, (struct ext4_new_group_input __user *)arg, sizeof(input))) return -EFAULT; + err = mnt_want_write(filp->f_path.mnt); + if (err) + return err; + err = ext4_group_add(sb, &input); jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); jbd2_journal_flush(EXT4_SB(sb)->s_journal); jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); + mnt_drop_write(filp->f_path.mnt); return err; } diff --git a/fs/fat/file.c b/fs/fat/file.c index c614175876e0..2a3bed967041 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -8,6 +8,7 @@ #include <linux/capability.h> #include <linux/module.h> +#include <linux/mount.h> #include <linux/time.h> #include <linux/msdos_fs.h> #include <linux/smp_lock.h> @@ -46,10 +47,9 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp, mutex_lock(&inode->i_mutex); - if (IS_RDONLY(inode)) { - err = -EROFS; - goto up; - } + err = mnt_want_write(filp->f_path.mnt); + if (err) + goto up_no_drop_write; /* * ATTR_VOLUME and ATTR_DIR cannot be changed; this also @@ -105,7 +105,9 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp, MSDOS_I(inode)->i_attrs = attr & ATTR_UNUSED; mark_inode_dirty(inode); - up: +up: + mnt_drop_write(filp->f_path.mnt); +up_no_drop_write: mutex_unlock(&inode->i_mutex); return err; } diff --git a/fs/file_table.c b/fs/file_table.c index 986ff4ed0a7c..7a0a9b872251 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -42,6 +42,7 @@ static inline void file_free_rcu(struct rcu_head *head) static inline void file_free(struct file *f) { percpu_counter_dec(&nr_files); + file_check_state(f); call_rcu(&f->f_u.fu_rcuhead, file_free_rcu); } @@ -199,6 +200,18 @@ int init_file(struct file *file, struct vfsmount *mnt, struct dentry *dentry, file->f_mapping = dentry->d_inode->i_mapping; file->f_mode = mode; file->f_op = fop; + + /* + * These mounts don't really matter in practice + * for r/o bind mounts. They aren't userspace- + * visible. We do this for consistency, and so + * that we can do debugging checks at __fput() + */ + if ((mode & FMODE_WRITE) && !special_file(dentry->d_inode->i_mode)) { + file_take_write(file); + error = mnt_want_write(mnt); + WARN_ON(error); + } return error; } EXPORT_SYMBOL(init_file); @@ -211,6 +224,31 @@ void fput(struct file *file) EXPORT_SYMBOL(fput); +/** + * drop_file_write_access - give up ability to write to a file + * @file: the file to which we will stop writing + * + * This is a central place which will give up the ability + * to write to @file, along with access to write through + * its vfsmount. + */ +void drop_file_write_access(struct file *file) +{ + struct vfsmount *mnt = file->f_path.mnt; + struct dentry *dentry = file->f_path.dentry; + struct inode *inode = dentry->d_inode; + + put_write_access(inode); + + if (special_file(inode->i_mode)) + return; + if (file_check_writeable(file) != 0) + return; + mnt_drop_write(mnt); + file_release_write(file); +} +EXPORT_SYMBOL_GPL(drop_file_write_access); + /* __fput is called from task context when aio completion releases the last * last use of a struct file *. Do not use otherwise. */ @@ -236,10 +274,10 @@ void __fput(struct file *file) if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL)) cdev_put(inode->i_cdev); fops_put(file->f_op); - if (file->f_mode & FMODE_WRITE) - put_write_access(inode); put_pid(file->f_owner.pid); file_kill(file); + if (file->f_mode & FMODE_WRITE) + drop_file_write_access(file); file->f_path.dentry = NULL; file->f_path.mnt = NULL; file_free(file); diff --git a/fs/gfs2/locking/dlm/Makefile b/fs/gfs2/locking/dlm/Makefile index 89b93b6b45cf..2609bb6cd013 100644 --- a/fs/gfs2/locking/dlm/Makefile +++ b/fs/gfs2/locking/dlm/Makefile @@ -1,3 +1,3 @@ obj-$(CONFIG_GFS2_FS_LOCKING_DLM) += lock_dlm.o -lock_dlm-y := lock.o main.o mount.o sysfs.o thread.o plock.o +lock_dlm-y := lock.o main.o mount.o sysfs.o thread.o diff --git a/fs/gfs2/locking/dlm/lock_dlm.h b/fs/gfs2/locking/dlm/lock_dlm.h index 58fcf8c5bf39..a243cf69c54e 100644 --- a/fs/gfs2/locking/dlm/lock_dlm.h +++ b/fs/gfs2/locking/dlm/lock_dlm.h @@ -25,6 +25,7 @@ #include <net/sock.h> #include <linux/dlm.h> +#include <linux/dlm_plock.h> #include <linux/lm_interface.h> /* @@ -173,17 +174,6 @@ void gdlm_cancel(void *); int gdlm_hold_lvb(void *, char **); void gdlm_unhold_lvb(void *, char *); -/* plock.c */ - -int gdlm_plock_init(void); -void gdlm_plock_exit(void); -int gdlm_plock(void *, struct lm_lockname *, struct file *, int, - struct file_lock *); -int gdlm_plock_get(void *, struct lm_lockname *, struct file *, - struct file_lock *); -int gdlm_punlock(void *, struct lm_lockname *, struct file *, - struct file_lock *); - /* mount.c */ extern const struct lm_lockops gdlm_ops; diff --git a/fs/gfs2/locking/dlm/main.c b/fs/gfs2/locking/dlm/main.c index 36a225850bd8..b9a03a7ff801 100644 --- a/fs/gfs2/locking/dlm/main.c +++ b/fs/gfs2/locking/dlm/main.c @@ -28,13 +28,6 @@ static int __init init_lock_dlm(void) return error; } - error = gdlm_plock_init(); - if (error) { - gdlm_sysfs_exit(); - gfs2_unregister_lockproto(&gdlm_ops); - return error; - } - printk(KERN_INFO "Lock_DLM (built %s %s) installed\n", __DATE__, __TIME__); return 0; @@ -42,7 +35,6 @@ static int __init init_lock_dlm(void) static void __exit exit_lock_dlm(void) { - gdlm_plock_exit(); gdlm_sysfs_exit(); gfs2_unregister_lockproto(&gdlm_ops); } diff --git a/fs/gfs2/locking/dlm/mount.c b/fs/gfs2/locking/dlm/mount.c index f2efff424224..470bdf650b50 100644 --- a/fs/gfs2/locking/dlm/mount.c +++ b/fs/gfs2/locking/dlm/mount.c @@ -236,6 +236,27 @@ static void gdlm_withdraw(void *lockspace) gdlm_kobject_release(ls); } +static int gdlm_plock(void *lockspace, struct lm_lockname *name, + struct file *file, int cmd, struct file_lock *fl) +{ + struct gdlm_ls *ls = lockspace; + return dlm_posix_lock(ls->dlm_lockspace, name->ln_number, file, cmd, fl); +} + +static int gdlm_punlock(void *lockspace, struct lm_lockname *name, + struct file *file, struct file_lock *fl) +{ + struct gdlm_ls *ls = lockspace; + return dlm_posix_unlock(ls->dlm_lockspace, name->ln_number, file, fl); +} + +static int gdlm_plock_get(void *lockspace, struct lm_lockname *name, + struct file *file, struct file_lock *fl) +{ + struct gdlm_ls *ls = lockspace; + return dlm_posix_get(ls->dlm_lockspace, name->ln_number, file, fl); +} + const struct lm_lockops gdlm_ops = { .lm_proto_name = "lock_dlm", .lm_mount = gdlm_mount, diff --git a/fs/hfsplus/ioctl.c b/fs/hfsplus/ioctl.c index b60c0affbec5..f457d2ca51ab 100644 --- a/fs/hfsplus/ioctl.c +++ b/fs/hfsplus/ioctl.c @@ -14,6 +14,7 @@ #include <linux/capability.h> #include <linux/fs.h> +#include <linux/mount.h> #include <linux/sched.h> #include <linux/xattr.h> #include <asm/uaccess.h> @@ -35,25 +36,32 @@ int hfsplus_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, flags |= FS_NODUMP_FL; /* EXT2_NODUMP_FL */ return put_user(flags, (int __user *)arg); case HFSPLUS_IOC_EXT2_SETFLAGS: { - if (IS_RDONLY(inode)) - return -EROFS; - - if (!is_owner_or_cap(inode)) - return -EACCES; - - if (get_user(flags, (int __user *)arg)) - return -EFAULT; - + int err = 0; + err = mnt_want_write(filp->f_path.mnt); + if (err) + return err; + + if (!is_owner_or_cap(inode)) { + err = -EACCES; + goto setflags_out; + } + if (get_user(flags, (int __user *)arg)) { + err = -EFAULT; + goto setflags_out; + } if (flags & (FS_IMMUTABLE_FL|FS_APPEND_FL) || HFSPLUS_I(inode).rootflags & (HFSPLUS_FLG_IMMUTABLE|HFSPLUS_FLG_APPEND)) { - if (!capable(CAP_LINUX_IMMUTABLE)) - return -EPERM; + if (!capable(CAP_LINUX_IMMUTABLE)) { + err = -EPERM; + goto setflags_out; + } } /* don't silently ignore unsupported ext2 flags */ - if (flags & ~(FS_IMMUTABLE_FL|FS_APPEND_FL|FS_NODUMP_FL)) - return -EOPNOTSUPP; - + if (flags & ~(FS_IMMUTABLE_FL|FS_APPEND_FL|FS_NODUMP_FL)) { + err = -EOPNOTSUPP; + goto setflags_out; + } if (flags & FS_IMMUTABLE_FL) { /* EXT2_IMMUTABLE_FL */ inode->i_flags |= S_IMMUTABLE; HFSPLUS_I(inode).rootflags |= HFSPLUS_FLG_IMMUTABLE; @@ -75,7 +83,9 @@ int hfsplus_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, inode->i_ctime = CURRENT_TIME_SEC; mark_inode_dirty(inode); - return 0; +setflags_out: + mnt_drop_write(filp->f_path.mnt); + return err; } default: return -ENOTTY; diff --git a/fs/inode.c b/fs/inode.c index 53245ffcf93d..27ee1af50d02 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1199,42 +1199,37 @@ void touch_atime(struct vfsmount *mnt, struct dentry *dentry) struct inode *inode = dentry->d_inode; struct timespec now; - if (inode->i_flags & S_NOATIME) + if (mnt_want_write(mnt)) return; + if (inode->i_flags & S_NOATIME) + goto out; if (IS_NOATIME(inode)) - return; + goto out; if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode)) - return; + goto out; - /* - * We may have a NULL vfsmount when coming from NFSD - */ - if (mnt) { - if (mnt->mnt_flags & MNT_NOATIME) - return; - if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode)) - return; - - if (mnt->mnt_flags & MNT_RELATIME) { - /* - * With relative atime, only update atime if the - * previous atime is earlier than either the ctime or - * mtime. - */ - if (timespec_compare(&inode->i_mtime, - &inode->i_atime) < 0 && - timespec_compare(&inode->i_ctime, - &inode->i_atime) < 0) - return; - } + if (mnt->mnt_flags & MNT_NOATIME) + goto out; + if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode)) + goto out; + if (mnt->mnt_flags & MNT_RELATIME) { + /* + * With relative atime, only update atime if the previous + * atime is earlier than either the ctime or mtime. + */ + if (timespec_compare(&inode->i_mtime, &inode->i_atime) < 0 && + timespec_compare(&inode->i_ctime, &inode->i_atime) < 0) + goto out; } now = current_fs_time(inode->i_sb); if (timespec_equal(&inode->i_atime, &now)) - return; + goto out; inode->i_atime = now; mark_inode_dirty_sync(inode); +out: + mnt_drop_write(mnt); } EXPORT_SYMBOL(touch_atime); @@ -1255,10 +1250,13 @@ void file_update_time(struct file *file) struct inode *inode = file->f_path.dentry->d_inode; struct timespec now; int sync_it = 0; + int err; if (IS_NOCMTIME(inode)) return; - if (IS_RDONLY(inode)) + + err = mnt_want_write(file->f_path.mnt); + if (err) return; now = current_fs_time(inode->i_sb); @@ -1279,6 +1277,7 @@ void file_update_time(struct file *file) if (sync_it) mark_inode_dirty_sync(inode); + mnt_drop_write(file->f_path.mnt); } EXPORT_SYMBOL(file_update_time); diff --git a/fs/internal.h b/fs/internal.h index 392e8ccd6fc4..80aa9a023372 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -43,3 +43,14 @@ extern void __init chrdev_init(void); * namespace.c */ extern int copy_mount_options(const void __user *, unsigned long *); + +extern void free_vfsmnt(struct vfsmount *); +extern struct vfsmount *alloc_vfsmnt(const char *); +extern struct vfsmount *__lookup_mnt(struct vfsmount *, struct dentry *, int); +extern void mnt_set_mountpoint(struct vfsmount *, struct dentry *, + struct vfsmount *); +extern void release_mounts(struct list_head *); +extern void umount_tree(struct vfsmount *, int, struct list_head *); +extern struct vfsmount *copy_tree(struct vfsmount *, struct dentry *, int); + +extern void __init mnt_init(void); diff --git a/fs/jffs2/jffs2_fs_i.h b/fs/jffs2/jffs2_fs_i.h index 0b78fdc9773b..a841f4973a74 100644 --- a/fs/jffs2/jffs2_fs_i.h +++ b/fs/jffs2/jffs2_fs_i.h @@ -15,7 +15,7 @@ #include <linux/version.h> #include <linux/rbtree.h> #include <linux/posix_acl.h> -#include <asm/semaphore.h> +#include <linux/semaphore.h> struct jffs2_inode_info { /* We need an internal mutex similar to inode->i_mutex. diff --git a/fs/jffs2/jffs2_fs_sb.h b/fs/jffs2/jffs2_fs_sb.h index 3a2197f3c812..18fca2b9e531 100644 --- a/fs/jffs2/jffs2_fs_sb.h +++ b/fs/jffs2/jffs2_fs_sb.h @@ -16,7 +16,7 @@ #include <linux/spinlock.h> #include <linux/workqueue.h> #include <linux/completion.h> -#include <asm/semaphore.h> +#include <linux/semaphore.h> #include <linux/timer.h> #include <linux/wait.h> #include <linux/list.h> diff --git a/fs/jfs/ioctl.c b/fs/jfs/ioctl.c index a1f8e375ad21..afe222bf300f 100644 --- a/fs/jfs/ioctl.c +++ b/fs/jfs/ioctl.c @@ -8,6 +8,7 @@ #include <linux/fs.h> #include <linux/ctype.h> #include <linux/capability.h> +#include <linux/mount.h> #include <linux/time.h> #include <linux/sched.h> #include <asm/current.h> @@ -65,23 +66,30 @@ long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return put_user(flags, (int __user *) arg); case JFS_IOC_SETFLAGS: { unsigned int oldflags; + int err; - if (IS_RDONLY(inode)) - return -EROFS; + err = mnt_want_write(filp->f_path.mnt); + if (err) + return err; - if (!is_owner_or_cap(inode)) - return -EACCES; - - if (get_user(flags, (int __user *) arg)) - return -EFAULT; + if (!is_owner_or_cap(inode)) { + err = -EACCES; + goto setflags_out; + } + if (get_user(flags, (int __user *) arg)) { + err = -EFAULT; + goto setflags_out; + } flags = jfs_map_ext2(flags, 1); if (!S_ISDIR(inode->i_mode)) flags &= ~JFS_DIRSYNC_FL; /* Is it quota file? Do not allow user to mess with it */ - if (IS_NOQUOTA(inode)) - return -EPERM; + if (IS_NOQUOTA(inode)) { + err = -EPERM; + goto setflags_out; + } /* Lock against other parallel changes of flags */ mutex_lock(&inode->i_mutex); @@ -98,7 +106,8 @@ long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) (JFS_APPEND_FL | JFS_IMMUTABLE_FL))) { if (!capable(CAP_LINUX_IMMUTABLE)) { mutex_unlock(&inode->i_mutex); - return -EPERM; + err = -EPERM; + goto setflags_out; } } @@ -110,7 +119,9 @@ long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) mutex_unlock(&inode->i_mutex); inode->i_ctime = CURRENT_TIME_SEC; mark_inode_dirty(inode); - return 0; +setflags_out: + mnt_drop_write(filp->f_path.mnt); + return err; } default: return -ENOTTY; diff --git a/fs/locks.c b/fs/locks.c index 43c0af21a0c5..592faadbcec1 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -127,7 +127,6 @@ #include <linux/rcupdate.h> #include <linux/pid_namespace.h> -#include <asm/semaphore.h> #include <asm/uaccess.h> #define IS_POSIX(fl) (fl->fl_flags & FL_POSIX) diff --git a/fs/namei.c b/fs/namei.c index 8cf9bb9c2fc0..e179f71bfcb0 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1623,8 +1623,7 @@ int may_open(struct nameidata *nd, int acc_mode, int flag) return -EACCES; flag &= ~O_TRUNC; - } else if (IS_RDONLY(inode) && (acc_mode & MAY_WRITE)) - return -EROFS; + } error = vfs_permission(nd, acc_mode); if (error) @@ -1677,7 +1676,12 @@ int may_open(struct nameidata *nd, int acc_mode, int flag) return 0; } -static int open_namei_create(struct nameidata *nd, struct path *path, +/* + * Be careful about ever adding any more callers of this + * function. Its flags must be in the namei format, not + * what get passed to sys_open(). + */ +static int __open_namei_create(struct nameidata *nd, struct path *path, int flag, int mode) { int error; @@ -1696,26 +1700,56 @@ static int open_namei_create(struct nameidata *nd, struct path *path, } /* - * open_namei() + * Note that while the flag value (low two bits) for sys_open means: + * 00 - read-only + * 01 - write-only + * 10 - read-write + * 11 - special + * it is changed into + * 00 - no permissions needed + * 01 - read-permission + * 10 - write-permission + * 11 - read-write + * for the internal routines (ie open_namei()/follow_link() etc) + * This is more logical, and also allows the 00 "no perm needed" + * to be used for symlinks (where the permissions are checked + * later). * - * namei for open - this is in fact almost the whole open-routine. - * - * Note that the low bits of "flag" aren't the same as in the open - * system call - they are 00 - no permissions needed - * 01 - read permission needed - * 10 - write permission needed - * 11 - read/write permissions needed - * which is a lot more logical, and also allows the "no perm" needed - * for symlinks (where the permissions are checked later). - * SMP-safe +*/ +static inline int open_to_namei_flags(int flag) +{ + if ((flag+1) & O_ACCMODE) + flag++; + return flag; +} + +static int open_will_write_to_fs(int flag, struct inode *inode) +{ + /* + * We'll never write to the fs underlying + * a device file. + */ + if (special_file(inode->i_mode)) + return 0; + return (flag & O_TRUNC); +} + +/* + * Note that the low bits of the passed in "open_flag" + * are not the same as in the local variable "flag". See + * open_to_namei_flags() for more details. */ -int open_namei(int dfd, const char *pathname, int flag, - int mode, struct nameidata *nd) +struct file *do_filp_open(int dfd, const char *pathname, + int open_flag, int mode) { + struct file *filp; + struct nameidata nd; int acc_mode, error; struct path path; struct dentry *dir; int count = 0; + int will_write; + int flag = open_to_namei_flags(open_flag); acc_mode = ACC_MODE(flag); @@ -1733,18 +1767,19 @@ int open_namei(int dfd, const char *pathname, int flag, */ if (!(flag & O_CREAT)) { error = path_lookup_open(dfd, pathname, lookup_flags(flag), - nd, flag); + &nd, flag); if (error) - return error; + return ERR_PTR(error); goto ok; } /* * Create - we need to know the parent. */ - error = path_lookup_create(dfd,pathname,LOOKUP_PARENT,nd,flag,mode); + error = path_lookup_create(dfd, pathname, LOOKUP_PARENT, + &nd, flag, mode); if (error) - return error; + return ERR_PTR(error); /* * We have the parent and last component. First of all, check @@ -1752,14 +1787,14 @@ int open_namei(int dfd, const char *pathname, int flag, * will not do. */ error = -EISDIR; - if (nd->last_type != LAST_NORM || nd->last.name[nd->last.len]) + if (nd.last_type != LAST_NORM || nd.last.name[nd.last.len]) goto exit; - dir = nd->path.dentry; - nd->flags &= ~LOOKUP_PARENT; + dir = nd.path.dentry; + nd.flags &= ~LOOKUP_PARENT; mutex_lock(&dir->d_inode->i_mutex); - path.dentry = lookup_hash(nd); - path.mnt = nd->path.mnt; + path.dentry = lookup_hash(&nd); + path.mnt = nd.path.mnt; do_last: error = PTR_ERR(path.dentry); @@ -1768,18 +1803,31 @@ do_last: goto exit; } - if (IS_ERR(nd->intent.open.file)) { - mutex_unlock(&dir->d_inode->i_mutex); - error = PTR_ERR(nd->intent.open.file); - goto exit_dput; + if (IS_ERR(nd.intent.open.file)) { + error = PTR_ERR(nd.intent.open.file); + goto exit_mutex_unlock; } /* Negative dentry, just create the file */ if (!path.dentry->d_inode) { - error = open_namei_create(nd, &path, flag, mode); + /* + * This write is needed to ensure that a + * ro->rw transition does not occur between + * the time when the file is created and when + * a permanent write count is taken through + * the 'struct file' in nameidata_to_filp(). + */ + error = mnt_want_write(nd.path.mnt); if (error) + goto exit_mutex_unlock; + error = __open_namei_create(&nd, &path, flag, mode); + if (error) { + mnt_drop_write(nd.path.mnt); goto exit; - return 0; + } + filp = nameidata_to_filp(&nd, open_flag); + mnt_drop_write(nd.path.mnt); + return filp; } /* @@ -1804,23 +1852,52 @@ do_last: if (path.dentry->d_inode->i_op && path.dentry->d_inode->i_op->follow_link) goto do_link; - path_to_nameidata(&path, nd); + path_to_nameidata(&path, &nd); error = -EISDIR; if (path.dentry->d_inode && S_ISDIR(path.dentry->d_inode->i_mode)) goto exit; ok: - error = may_open(nd, acc_mode, flag); - if (error) + /* + * Consider: + * 1. may_open() truncates a file + * 2. a rw->ro mount transition occurs + * 3. nameidata_to_filp() fails due to + * the ro mount. + * That would be inconsistent, and should + * be avoided. Taking this mnt write here + * ensures that (2) can not occur. + */ + will_write = open_will_write_to_fs(flag, nd.path.dentry->d_inode); + if (will_write) { + error = mnt_want_write(nd.path.mnt); + if (error) + goto exit; + } + error = may_open(&nd, acc_mode, flag); + if (error) { + if (will_write) + mnt_drop_write(nd.path.mnt); goto exit; - return 0; + } + filp = nameidata_to_filp(&nd, open_flag); + /* + * It is now safe to drop the mnt write + * because the filp has had a write taken + * on its behalf. + */ + if (will_write) + mnt_drop_write(nd.path.mnt); + return filp; +exit_mutex_unlock: + mutex_unlock(&dir->d_inode->i_mutex); exit_dput: - path_put_conditional(&path, nd); + path_put_conditional(&path, &nd); exit: - if (!IS_ERR(nd->intent.open.file)) - release_open_intent(nd); - path_put(&nd->path); - return error; + if (!IS_ERR(nd.intent.open.file)) + release_open_intent(&nd); + path_put(&nd.path); + return ERR_PTR(error); do_link: error = -ELOOP; @@ -1836,43 +1913,60 @@ do_link: * stored in nd->last.name and we will have to putname() it when we * are done. Procfs-like symlinks just set LAST_BIND. */ - nd->flags |= LOOKUP_PARENT; - error = security_inode_follow_link(path.dentry, nd); + nd.flags |= LOOKUP_PARENT; + error = security_inode_follow_link(path.dentry, &nd); if (error) goto exit_dput; - error = __do_follow_link(&path, nd); + error = __do_follow_link(&path, &nd); if (error) { /* Does someone understand code flow here? Or it is only * me so stupid? Anathema to whoever designed this non-sense * with "intent.open". */ - release_open_intent(nd); - return error; + release_open_intent(&nd); + return ERR_PTR(error); } - nd->flags &= ~LOOKUP_PARENT; - if (nd->last_type == LAST_BIND) + nd.flags &= ~LOOKUP_PARENT; + if (nd.last_type == LAST_BIND) goto ok; error = -EISDIR; - if (nd->last_type != LAST_NORM) + if (nd.last_type != LAST_NORM) goto exit; - if (nd->last.name[nd->last.len]) { - __putname(nd->last.name); + if (nd.last.name[nd.last.len]) { + __putname(nd.last.name); goto exit; } error = -ELOOP; if (count++==32) { - __putname(nd->last.name); + __putname(nd.last.name); goto exit; } - dir = nd->path.dentry; + dir = nd.path.dentry; mutex_lock(&dir->d_inode->i_mutex); - path.dentry = lookup_hash(nd); - path.mnt = nd->path.mnt; - __putname(nd->last.name); + path.dentry = lookup_hash(&nd); + path.mnt = nd.path.mnt; + __putname(nd.last.name); goto do_last; } /** + * filp_open - open file and return file pointer + * + * @filename: path to open + * @flags: open flags as per the open(2) second argument + * @mode: mode for the new file if O_CREAT is set, else ignored + * + * This is the helper to open a file from kernelspace if you really + * have to. But in generally you should not do this, so please move + * along, nothing to see here.. + */ +struct file *filp_open(const char *filename, int flags, int mode) +{ + return do_filp_open(AT_FDCWD, filename, flags, mode); +} +EXPORT_SYMBOL(filp_open); + +/** * lookup_create - lookup a dentry, creating it if it doesn't exist * @nd: nameidata info * @is_dir: directory flag @@ -1945,6 +2039,23 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) return error; } +static int may_mknod(mode_t mode) +{ + switch (mode & S_IFMT) { + case S_IFREG: + case S_IFCHR: + case S_IFBLK: + case S_IFIFO: + case S_IFSOCK: + case 0: /* zero mode translates to S_IFREG */ + return 0; + case S_IFDIR: + return -EPERM; + default: + return -EINVAL; + } +} + asmlinkage long sys_mknodat(int dfd, const char __user *filename, int mode, unsigned dev) { @@ -1963,12 +2074,19 @@ asmlinkage long sys_mknodat(int dfd, const char __user *filename, int mode, if (error) goto out; dentry = lookup_create(&nd, 0); - error = PTR_ERR(dentry); - + if (IS_ERR(dentry)) { + error = PTR_ERR(dentry); + goto out_unlock; + } if (!IS_POSIXACL(nd.path.dentry->d_inode)) mode &= ~current->fs->umask; - if (!IS_ERR(dentry)) { - switch (mode & S_IFMT) { + error = may_mknod(mode); + if (error) + goto out_dput; + error = mnt_want_write(nd.path.mnt); + if (error) + goto out_dput; + switch (mode & S_IFMT) { case 0: case S_IFREG: error = vfs_create(nd.path.dentry->d_inode,dentry,mode,&nd); break; @@ -1979,14 +2097,11 @@ asmlinkage long sys_mknodat(int dfd, const char __user *filename, int mode, case S_IFIFO: case S_IFSOCK: error = vfs_mknod(nd.path.dentry->d_inode,dentry,mode,0); break; - case S_IFDIR: - error = -EPERM; - break; - default: - error = -EINVAL; - } - dput(dentry); } + mnt_drop_write(nd.path.mnt); +out_dput: + dput(dentry); +out_unlock: mutex_unlock(&nd.path.dentry->d_inode->i_mutex); path_put(&nd.path); out: @@ -2044,7 +2159,12 @@ asmlinkage long sys_mkdirat(int dfd, const char __user *pathname, int mode) if (!IS_POSIXACL(nd.path.dentry->d_inode)) mode &= ~current->fs->umask; + error = mnt_want_write(nd.path.mnt); + if (error) + goto out_dput; error = vfs_mkdir(nd.path.dentry->d_inode, dentry, mode); + mnt_drop_write(nd.path.mnt); +out_dput: dput(dentry); out_unlock: mutex_unlock(&nd.path.dentry->d_inode->i_mutex); @@ -2151,7 +2271,12 @@ static long do_rmdir(int dfd, const char __user *pathname) error = PTR_ERR(dentry); if (IS_ERR(dentry)) goto exit2; + error = mnt_want_write(nd.path.mnt); + if (error) + goto exit3; error = vfs_rmdir(nd.path.dentry->d_inode, dentry); + mnt_drop_write(nd.path.mnt); +exit3: dput(dentry); exit2: mutex_unlock(&nd.path.dentry->d_inode->i_mutex); @@ -2232,7 +2357,11 @@ static long do_unlinkat(int dfd, const char __user *pathname) inode = dentry->d_inode; if (inode) atomic_inc(&inode->i_count); + error = mnt_want_write(nd.path.mnt); + if (error) + goto exit2; error = vfs_unlink(nd.path.dentry->d_inode, dentry); + mnt_drop_write(nd.path.mnt); exit2: dput(dentry); } @@ -2313,7 +2442,12 @@ asmlinkage long sys_symlinkat(const char __user *oldname, if (IS_ERR(dentry)) goto out_unlock; + error = mnt_want_write(nd.path.mnt); + if (error) + goto out_dput; error = vfs_symlink(nd.path.dentry->d_inode, dentry, from, S_IALLUGO); + mnt_drop_write(nd.path.mnt); +out_dput: dput(dentry); out_unlock: mutex_unlock(&nd.path.dentry->d_inode->i_mutex); @@ -2408,7 +2542,12 @@ asmlinkage long sys_linkat(int olddfd, const char __user *oldname, error = PTR_ERR(new_dentry); if (IS_ERR(new_dentry)) goto out_unlock; + error = mnt_want_write(nd.path.mnt); + if (error) + goto out_dput; error = vfs_link(old_nd.path.dentry, nd.path.dentry->d_inode, new_dentry); + mnt_drop_write(nd.path.mnt); +out_dput: dput(new_dentry); out_unlock: mutex_unlock(&nd.path.dentry->d_inode->i_mutex); @@ -2634,8 +2773,12 @@ static int do_rename(int olddfd, const char *oldname, if (new_dentry == trap) goto exit5; + error = mnt_want_write(oldnd.path.mnt); + if (error) + goto exit5; error = vfs_rename(old_dir->d_inode, old_dentry, new_dir->d_inode, new_dentry); + mnt_drop_write(oldnd.path.mnt); exit5: dput(new_dentry); exit4: diff --git a/fs/namespace.c b/fs/namespace.c index 94f026ec990a..0505fb61aa74 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -17,6 +17,7 @@ #include <linux/quotaops.h> #include <linux/acct.h> #include <linux/capability.h> +#include <linux/cpumask.h> #include <linux/module.h> #include <linux/sysfs.h> #include <linux/seq_file.h> @@ -26,6 +27,7 @@ #include <linux/mount.h> #include <linux/ramfs.h> #include <linux/log2.h> +#include <linux/idr.h> #include <asm/uaccess.h> #include <asm/unistd.h> #include "pnode.h" @@ -38,6 +40,8 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock); static int event; +static DEFINE_IDA(mnt_id_ida); +static DEFINE_IDA(mnt_group_ida); static struct list_head *mount_hashtable __read_mostly; static struct kmem_cache *mnt_cache __read_mostly; @@ -55,10 +59,65 @@ static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry) return tmp & (HASH_SIZE - 1); } +#define MNT_WRITER_UNDERFLOW_LIMIT -(1<<16) + +/* allocation is serialized by namespace_sem */ +static int mnt_alloc_id(struct vfsmount *mnt) +{ + int res; + +retry: + ida_pre_get(&mnt_id_ida, GFP_KERNEL); + spin_lock(&vfsmount_lock); + res = ida_get_new(&mnt_id_ida, &mnt->mnt_id); + spin_unlock(&vfsmount_lock); + if (res == -EAGAIN) + goto retry; + + return res; +} + +static void mnt_free_id(struct vfsmount *mnt) +{ + spin_lock(&vfsmount_lock); + ida_remove(&mnt_id_ida, mnt->mnt_id); + spin_unlock(&vfsmount_lock); +} + +/* + * Allocate a new peer group ID + * + * mnt_group_ida is protected by namespace_sem + */ +static int mnt_alloc_group_id(struct vfsmount *mnt) +{ + if (!ida_pre_get(&mnt_group_ida, GFP_KERNEL)) + return -ENOMEM; + + return ida_get_new_above(&mnt_group_ida, 1, &mnt->mnt_group_id); +} + +/* + * Release a peer group ID + */ +void mnt_release_group_id(struct vfsmount *mnt) +{ + ida_remove(&mnt_group_ida, mnt->mnt_group_id); + mnt->mnt_group_id = 0; +} + struct vfsmount *alloc_vfsmnt(const char *name) { struct vfsmount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL); if (mnt) { + int err; + + err = mnt_alloc_id(mnt); + if (err) { + kmem_cache_free(mnt_cache, mnt); + return NULL; + } + atomic_set(&mnt->mnt_count, 1); INIT_LIST_HEAD(&mnt->mnt_hash); INIT_LIST_HEAD(&mnt->mnt_child); @@ -68,6 +127,7 @@ struct vfsmount *alloc_vfsmnt(const char *name) INIT_LIST_HEAD(&mnt->mnt_share); INIT_LIST_HEAD(&mnt->mnt_slave_list); INIT_LIST_HEAD(&mnt->mnt_slave); + atomic_set(&mnt->__mnt_writers, 0); if (name) { int size = strlen(name) + 1; char *newname = kmalloc(size, GFP_KERNEL); @@ -80,6 +140,263 @@ struct vfsmount *alloc_vfsmnt(const char *name) return mnt; } +/* + * Most r/o checks on a fs are for operations that take + * discrete amounts of time, like a write() or unlink(). + * We must keep track of when those operations start + * (for permission checks) and when they end, so that + * we can determine when writes are able to occur to + * a filesystem. + */ +/* + * __mnt_is_readonly: check whether a mount is read-only + * @mnt: the mount to check for its write status + * + * This shouldn't be used directly ouside of the VFS. + * It does not guarantee that the filesystem will stay + * r/w, just that it is right *now*. This can not and + * should not be used in place of IS_RDONLY(inode). + * mnt_want/drop_write() will _keep_ the filesystem + * r/w. + */ +int __mnt_is_readonly(struct vfsmount *mnt) +{ + if (mnt->mnt_flags & MNT_READONLY) + return 1; + if (mnt->mnt_sb->s_flags & MS_RDONLY) + return 1; + return 0; +} +EXPORT_SYMBOL_GPL(__mnt_is_readonly); + +struct mnt_writer { + /* + * If holding multiple instances of this lock, they + * must be ordered by cpu number. + */ + spinlock_t lock; + struct lock_class_key lock_class; /* compiles out with !lockdep */ + unsigned long count; + struct vfsmount *mnt; +} ____cacheline_aligned_in_smp; +static DEFINE_PER_CPU(struct mnt_writer, mnt_writers); + +static int __init init_mnt_writers(void) +{ + int cpu; + for_each_possible_cpu(cpu) { + struct mnt_writer *writer = &per_cpu(mnt_writers, cpu); + spin_lock_init(&writer->lock); + lockdep_set_class(&writer->lock, &writer->lock_class); + writer->count = 0; + } + return 0; +} +fs_initcall(init_mnt_writers); + +static void unlock_mnt_writers(void) +{ + int cpu; + struct mnt_writer *cpu_writer; + + for_each_possible_cpu(cpu) { + cpu_writer = &per_cpu(mnt_writers, cpu); + spin_unlock(&cpu_writer->lock); + } +} + +static inline void __clear_mnt_count(struct mnt_writer *cpu_writer) +{ + if (!cpu_writer->mnt) + return; + /* + * This is in case anyone ever leaves an invalid, + * old ->mnt and a count of 0. + */ + if (!cpu_writer->count) + return; + atomic_add(cpu_writer->count, &cpu_writer->mnt->__mnt_writers); + cpu_writer->count = 0; +} + /* + * must hold cpu_writer->lock + */ +static inline void use_cpu_writer_for_mount(struct mnt_writer *cpu_writer, + struct vfsmount *mnt) +{ + if (cpu_writer->mnt == mnt) + return; + __clear_mnt_count(cpu_writer); + cpu_writer->mnt = mnt; +} + +/* + * Most r/o checks on a fs are for operations that take + * discrete amounts of time, like a write() or unlink(). + * We must keep track of when those operations start + * (for permission checks) and when they end, so that + * we can determine when writes are able to occur to + * a filesystem. + */ +/** + * mnt_want_write - get write access to a mount + * @mnt: the mount on which to take a write + * + * This tells the low-level filesystem that a write is + * about to be performed to it, and makes sure that + * writes are allowed before returning success. When + * the write operation is finished, mnt_drop_write() + * must be called. This is effectively a refcount. + */ +int mnt_want_write(struct vfsmount *mnt) +{ + int ret = 0; + struct mnt_writer *cpu_writer; + + cpu_writer = &get_cpu_var(mnt_writers); + spin_lock(&cpu_writer->lock); + if (__mnt_is_readonly(mnt)) { + ret = -EROFS; + goto out; + } + use_cpu_writer_for_mount(cpu_writer, mnt); + cpu_writer->count++; +out: + spin_unlock(&cpu_writer->lock); + put_cpu_var(mnt_writers); + return ret; +} +EXPORT_SYMBOL_GPL(mnt_want_write); + +static void lock_mnt_writers(void) +{ + int cpu; + struct mnt_writer *cpu_writer; + + for_each_possible_cpu(cpu) { + cpu_writer = &per_cpu(mnt_writers, cpu); + spin_lock(&cpu_writer->lock); + __clear_mnt_count(cpu_writer); + cpu_writer->mnt = NULL; + } +} + +/* + * These per-cpu write counts are not guaranteed to have + * matched increments and decrements on any given cpu. + * A file open()ed for write on one cpu and close()d on + * another cpu will imbalance this count. Make sure it + * does not get too far out of whack. + */ +static void handle_write_count_underflow(struct vfsmount *mnt) +{ + if (atomic_read(&mnt->__mnt_writers) >= + MNT_WRITER_UNDERFLOW_LIMIT) + return; + /* + * It isn't necessary to hold all of the locks + * at the same time, but doing it this way makes + * us share a lot more code. + */ + lock_mnt_writers(); + /* + * vfsmount_lock is for mnt_flags. + */ + spin_lock(&vfsmount_lock); + /* + * If coalescing the per-cpu writer counts did not + * get us back to a positive writer count, we have + * a bug. + */ + if ((atomic_read(&mnt->__mnt_writers) < 0) && + !(mnt->mnt_flags & MNT_IMBALANCED_WRITE_COUNT)) { + printk(KERN_DEBUG "leak detected on mount(%p) writers " + "count: %d\n", + mnt, atomic_read(&mnt->__mnt_writers)); + WARN_ON(1); + /* use the flag to keep the dmesg spam down */ + mnt->mnt_flags |= MNT_IMBALANCED_WRITE_COUNT; + } + spin_unlock(&vfsmount_lock); + unlock_mnt_writers(); +} + +/** + * mnt_drop_write - give up write access to a mount + * @mnt: the mount on which to give up write access + * + * Tells the low-level filesystem that we are done + * performing writes to it. Must be matched with + * mnt_want_write() call above. + */ +void mnt_drop_write(struct vfsmount *mnt) +{ + int must_check_underflow = 0; + struct mnt_writer *cpu_writer; + + cpu_writer = &get_cpu_var(mnt_writers); + spin_lock(&cpu_writer->lock); + + use_cpu_writer_for_mount(cpu_writer, mnt); + if (cpu_writer->count > 0) { + cpu_writer->count--; + } else { + must_check_underflow = 1; + atomic_dec(&mnt->__mnt_writers); + } + + spin_unlock(&cpu_writer->lock); + /* + * Logically, we could call this each time, + * but the __mnt_writers cacheline tends to + * be cold, and makes this expensive. + */ + if (must_check_underflow) + handle_write_count_underflow(mnt); + /* + * This could be done right after the spinlock + * is taken because the spinlock keeps us on + * the cpu, and disables preemption. However, + * putting it here bounds the amount that + * __mnt_writers can underflow. Without it, + * we could theoretically wrap __mnt_writers. + */ + put_cpu_var(mnt_writers); +} +EXPORT_SYMBOL_GPL(mnt_drop_write); + +static int mnt_make_readonly(struct vfsmount *mnt) +{ + int ret = 0; + + lock_mnt_writers(); + /* + * With all the locks held, this value is stable + */ + if (atomic_read(&mnt->__mnt_writers) > 0) { + ret = -EBUSY; + goto out; + } + /* + * nobody can do a successful mnt_want_write() with all + * of the counts in MNT_DENIED_WRITE and the locks held. + */ + spin_lock(&vfsmount_lock); + if (!ret) + mnt->mnt_flags |= MNT_READONLY; + spin_unlock(&vfsmount_lock); +out: + unlock_mnt_writers(); + return ret; +} + +static void __mnt_unmake_readonly(struct vfsmount *mnt) +{ + spin_lock(&vfsmount_lock); + mnt->mnt_flags &= ~MNT_READONLY; + spin_unlock(&vfsmount_lock); +} + int simple_set_mnt(struct vfsmount *mnt, struct super_block *sb) { mnt->mnt_sb = sb; @@ -92,6 +409,7 @@ EXPORT_SYMBOL(simple_set_mnt); void free_vfsmnt(struct vfsmount *mnt) { kfree(mnt->mnt_devname); + mnt_free_id(mnt); kmem_cache_free(mnt_cache, mnt); } @@ -238,6 +556,17 @@ static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root, struct vfsmount *mnt = alloc_vfsmnt(old->mnt_devname); if (mnt) { + if (flag & (CL_SLAVE | CL_PRIVATE)) + mnt->mnt_group_id = 0; /* not a peer of original */ + else + mnt->mnt_group_id = old->mnt_group_id; + + if ((flag & CL_MAKE_SHARED) && !mnt->mnt_group_id) { + int err = mnt_alloc_group_id(mnt); + if (err) + goto out_free; + } + mnt->mnt_flags = old->mnt_flags; atomic_inc(&sb->s_active); mnt->mnt_sb = sb; @@ -267,11 +596,44 @@ static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root, } } return mnt; + + out_free: + free_vfsmnt(mnt); + return NULL; } static inline void __mntput(struct vfsmount *mnt) { + int cpu; struct super_block *sb = mnt->mnt_sb; + /* + * We don't have to hold all of the locks at the + * same time here because we know that we're the + * last reference to mnt and that no new writers + * can come in. + */ + for_each_possible_cpu(cpu) { + struct mnt_writer *cpu_writer = &per_cpu(mnt_writers, cpu); + if (cpu_writer->mnt != mnt) + continue; + spin_lock(&cpu_writer->lock); + atomic_add(cpu_writer->count, &mnt->__mnt_writers); + cpu_writer->count = 0; + /* + * Might as well do this so that no one + * ever sees the pointer and expects + * it to be valid. + */ + cpu_writer->mnt = NULL; + spin_unlock(&cpu_writer->lock); + } + /* + * This probably indicates that somebody messed + * up a mnt_want/drop_write() pair. If this + * happens, the filesystem was probably unable + * to make r/w->r/o transitions. + */ + WARN_ON(atomic_read(&mnt->__mnt_writers)); dput(mnt->mnt_root); free_vfsmnt(mnt); deactivate_super(sb); @@ -362,20 +724,21 @@ void save_mount_options(struct super_block *sb, char *options) } EXPORT_SYMBOL(save_mount_options); +#ifdef CONFIG_PROC_FS /* iterator */ static void *m_start(struct seq_file *m, loff_t *pos) { - struct mnt_namespace *n = m->private; + struct proc_mounts *p = m->private; down_read(&namespace_sem); - return seq_list_start(&n->list, *pos); + return seq_list_start(&p->ns->list, *pos); } static void *m_next(struct seq_file *m, void *v, loff_t *pos) { - struct mnt_namespace *n = m->private; + struct proc_mounts *p = m->private; - return seq_list_next(v, &n->list, pos); + return seq_list_next(v, &p->ns->list, pos); } static void m_stop(struct seq_file *m, void *v) @@ -383,20 +746,30 @@ static void m_stop(struct seq_file *m, void *v) up_read(&namespace_sem); } -static int show_vfsmnt(struct seq_file *m, void *v) +struct proc_fs_info { + int flag; + const char *str; +}; + +static void show_sb_opts(struct seq_file *m, struct super_block *sb) { - struct vfsmount *mnt = list_entry(v, struct vfsmount, mnt_list); - int err = 0; - static struct proc_fs_info { - int flag; - char *str; - } fs_info[] = { + static const struct proc_fs_info fs_info[] = { { MS_SYNCHRONOUS, ",sync" }, { MS_DIRSYNC, ",dirsync" }, { MS_MANDLOCK, ",mand" }, { 0, NULL } }; - static struct proc_fs_info mnt_info[] = { + const struct proc_fs_info *fs_infop; + + for (fs_infop = fs_info; fs_infop->flag; fs_infop++) { + if (sb->s_flags & fs_infop->flag) + seq_puts(m, fs_infop->str); + } +} + +static void show_mnt_opts(struct seq_file *m, struct vfsmount *mnt) +{ + static const struct proc_fs_info mnt_info[] = { { MNT_NOSUID, ",nosuid" }, { MNT_NODEV, ",nodev" }, { MNT_NOEXEC, ",noexec" }, @@ -405,40 +778,108 @@ static int show_vfsmnt(struct seq_file *m, void *v) { MNT_RELATIME, ",relatime" }, { 0, NULL } }; - struct proc_fs_info *fs_infop; + const struct proc_fs_info *fs_infop; + + for (fs_infop = mnt_info; fs_infop->flag; fs_infop++) { + if (mnt->mnt_flags & fs_infop->flag) + seq_puts(m, fs_infop->str); + } +} + +static void show_type(struct seq_file *m, struct super_block *sb) +{ + mangle(m, sb->s_type->name); + if (sb->s_subtype && sb->s_subtype[0]) { + seq_putc(m, '.'); + mangle(m, sb->s_subtype); + } +} + +static int show_vfsmnt(struct seq_file *m, void *v) +{ + struct vfsmount *mnt = list_entry(v, struct vfsmount, mnt_list); + int err = 0; struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt }; mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none"); seq_putc(m, ' '); seq_path(m, &mnt_path, " \t\n\\"); seq_putc(m, ' '); - mangle(m, mnt->mnt_sb->s_type->name); - if (mnt->mnt_sb->s_subtype && mnt->mnt_sb->s_subtype[0]) { - seq_putc(m, '.'); - mangle(m, mnt->mnt_sb->s_subtype); - } - seq_puts(m, mnt->mnt_sb->s_flags & MS_RDONLY ? " ro" : " rw"); - for (fs_infop = fs_info; fs_infop->flag; fs_infop++) { - if (mnt->mnt_sb->s_flags & fs_infop->flag) - seq_puts(m, fs_infop->str); - } - for (fs_infop = mnt_info; fs_infop->flag; fs_infop++) { - if (mnt->mnt_flags & fs_infop->flag) - seq_puts(m, fs_infop->str); - } + show_type(m, mnt->mnt_sb); + seq_puts(m, __mnt_is_readonly(mnt) ? " ro" : " rw"); + show_sb_opts(m, mnt->mnt_sb); + show_mnt_opts(m, mnt); if (mnt->mnt_sb->s_op->show_options) err = mnt->mnt_sb->s_op->show_options(m, mnt); seq_puts(m, " 0 0\n"); return err; } -struct seq_operations mounts_op = { +const struct seq_operations mounts_op = { .start = m_start, .next = m_next, .stop = m_stop, .show = show_vfsmnt }; +static int show_mountinfo(struct seq_file *m, void *v) +{ + struct proc_mounts *p = m->private; + struct vfsmount *mnt = list_entry(v, struct vfsmount, mnt_list); + struct super_block *sb = mnt->mnt_sb; + struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt }; + struct path root = p->root; + int err = 0; + + seq_printf(m, "%i %i %u:%u ", mnt->mnt_id, mnt->mnt_parent->mnt_id, + MAJOR(sb->s_dev), MINOR(sb->s_dev)); + seq_dentry(m, mnt->mnt_root, " \t\n\\"); + seq_putc(m, ' '); + seq_path_root(m, &mnt_path, &root, " \t\n\\"); + if (root.mnt != p->root.mnt || root.dentry != p->root.dentry) { + /* + * Mountpoint is outside root, discard that one. Ugly, + * but less so than trying to do that in iterator in a + * race-free way (due to renames). + */ + return SEQ_SKIP; + } + seq_puts(m, mnt->mnt_flags & MNT_READONLY ? " ro" : " rw"); + show_mnt_opts(m, mnt); + + /* Tagged fields ("foo:X" or "bar") */ + if (IS_MNT_SHARED(mnt)) + seq_printf(m, " shared:%i", mnt->mnt_group_id); + if (IS_MNT_SLAVE(mnt)) { + int master = mnt->mnt_master->mnt_group_id; + int dom = get_dominating_id(mnt, &p->root); + seq_printf(m, " master:%i", master); + if (dom && dom != master) + seq_printf(m, " propagate_from:%i", dom); + } + if (IS_MNT_UNBINDABLE(mnt)) + seq_puts(m, " unbindable"); + + /* Filesystem specific data */ + seq_puts(m, " - "); + show_type(m, sb); + seq_putc(m, ' '); + mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none"); + seq_puts(m, sb->s_flags & MS_RDONLY ? " ro" : " rw"); + show_sb_opts(m, sb); + if (sb->s_op->show_options) + err = sb->s_op->show_options(m, mnt); + seq_putc(m, '\n'); + return err; +} + +const struct seq_operations mountinfo_op = { + .start = m_start, + .next = m_next, + .stop = m_stop, + .show = show_mountinfo, +}; + static int show_vfsstat(struct seq_file *m, void *v) { struct vfsmount *mnt = list_entry(v, struct vfsmount, mnt_list); @@ -459,7 +900,7 @@ static int show_vfsstat(struct seq_file *m, void *v) /* file system type */ seq_puts(m, "with fstype "); - mangle(m, mnt->mnt_sb->s_type->name); + show_type(m, mnt->mnt_sb); /* optional statistics */ if (mnt->mnt_sb->s_op->show_stats) { @@ -471,12 +912,13 @@ static int show_vfsstat(struct seq_file *m, void *v) return err; } -struct seq_operations mountstats_op = { +const struct seq_operations mountstats_op = { .start = m_start, .next = m_next, .stop = m_stop, .show = show_vfsstat, }; +#endif /* CONFIG_PROC_FS */ /** * may_umount_tree - check if a mount tree is busy @@ -801,23 +1243,50 @@ Enomem: struct vfsmount *collect_mounts(struct vfsmount *mnt, struct dentry *dentry) { struct vfsmount *tree; - down_read(&namespace_sem); + down_write(&namespace_sem); tree = copy_tree(mnt, dentry, CL_COPY_ALL | CL_PRIVATE); - up_read(&namespace_sem); + up_write(&namespace_sem); return tree; } void drop_collected_mounts(struct vfsmount *mnt) { LIST_HEAD(umount_list); - down_read(&namespace_sem); + down_write(&namespace_sem); spin_lock(&vfsmount_lock); umount_tree(mnt, 0, &umount_list); spin_unlock(&vfsmount_lock); - up_read(&namespace_sem); + up_write(&namespace_sem); release_mounts(&umount_list); } +static void cleanup_group_ids(struct vfsmount *mnt, struct vfsmount *end) +{ + struct vfsmount *p; + + for (p = mnt; p != end; p = next_mnt(p, mnt)) { + if (p->mnt_group_id && !IS_MNT_SHARED(p)) + mnt_release_group_id(p); + } +} + +static int invent_group_ids(struct vfsmount *mnt, bool recurse) +{ + struct vfsmount *p; + + for (p = mnt; p; p = recurse ? next_mnt(p, mnt) : NULL) { + if (!p->mnt_group_id && !IS_MNT_SHARED(p)) { + int err = mnt_alloc_group_id(p); + if (err) { + cleanup_group_ids(mnt, p); + return err; + } + } + } + + return 0; +} + /* * @source_mnt : mount tree to be attached * @nd : place the mount tree @source_mnt is attached @@ -888,9 +1357,16 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt, struct vfsmount *dest_mnt = path->mnt; struct dentry *dest_dentry = path->dentry; struct vfsmount *child, *p; + int err; - if (propagate_mnt(dest_mnt, dest_dentry, source_mnt, &tree_list)) - return -EINVAL; + if (IS_MNT_SHARED(dest_mnt)) { + err = invent_group_ids(source_mnt, true); + if (err) + goto out; + } + err = propagate_mnt(dest_mnt, dest_dentry, source_mnt, &tree_list); + if (err) + goto out_cleanup_ids; if (IS_MNT_SHARED(dest_mnt)) { for (p = source_mnt; p; p = next_mnt(p, source_mnt)) @@ -913,34 +1389,40 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt, } spin_unlock(&vfsmount_lock); return 0; + + out_cleanup_ids: + if (IS_MNT_SHARED(dest_mnt)) + cleanup_group_ids(source_mnt, NULL); + out: + return err; } -static int graft_tree(struct vfsmount *mnt, struct nameidata *nd) +static int graft_tree(struct vfsmount *mnt, struct path *path) { int err; if (mnt->mnt_sb->s_flags & MS_NOUSER) return -EINVAL; - if (S_ISDIR(nd->path.dentry->d_inode->i_mode) != + if (S_ISDIR(path->dentry->d_inode->i_mode) != S_ISDIR(mnt->mnt_root->d_inode->i_mode)) return -ENOTDIR; err = -ENOENT; - mutex_lock(&nd->path.dentry->d_inode->i_mutex); - if (IS_DEADDIR(nd->path.dentry->d_inode)) + mutex_lock(&path->dentry->d_inode->i_mutex); + if (IS_DEADDIR(path->dentry->d_inode)) goto out_unlock; - err = security_sb_check_sb(mnt, nd); + err = security_sb_check_sb(mnt, path); if (err) goto out_unlock; err = -ENOENT; - if (IS_ROOT(nd->path.dentry) || !d_unhashed(nd->path.dentry)) - err = attach_recursive_mnt(mnt, &nd->path, NULL); + if (IS_ROOT(path->dentry) || !d_unhashed(path->dentry)) + err = attach_recursive_mnt(mnt, path, NULL); out_unlock: - mutex_unlock(&nd->path.dentry->d_inode->i_mutex); + mutex_unlock(&path->dentry->d_inode->i_mutex); if (!err) - security_sb_post_addmount(mnt, nd); + security_sb_post_addmount(mnt, path); return err; } @@ -953,6 +1435,7 @@ static noinline int do_change_type(struct nameidata *nd, int flag) struct vfsmount *m, *mnt = nd->path.mnt; int recurse = flag & MS_REC; int type = flag & ~MS_REC; + int err = 0; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -961,12 +1444,20 @@ static noinline int do_change_type(struct nameidata *nd, int flag) return -EINVAL; down_write(&namespace_sem); + if (type == MS_SHARED) { + err = invent_group_ids(mnt, recurse); + if (err) + goto out_unlock; + } + spin_lock(&vfsmount_lock); for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL)) change_mnt_propagation(m, type); spin_unlock(&vfsmount_lock); + + out_unlock: up_write(&namespace_sem); - return 0; + return err; } /* @@ -1004,7 +1495,7 @@ static noinline int do_loopback(struct nameidata *nd, char *old_name, if (!mnt) goto out; - err = graft_tree(mnt, nd); + err = graft_tree(mnt, &nd->path); if (err) { LIST_HEAD(umount_list); spin_lock(&vfsmount_lock); @@ -1019,6 +1510,23 @@ out: return err; } +static int change_mount_flags(struct vfsmount *mnt, int ms_flags) +{ + int error = 0; + int readonly_request = 0; + + if (ms_flags & MS_RDONLY) + readonly_request = 1; + if (readonly_request == __mnt_is_readonly(mnt)) + return 0; + + if (readonly_request) + error = mnt_make_readonly(mnt); + else + __mnt_unmake_readonly(mnt); + return error; +} + /* * change filesystem flags. dir should be a physical root of filesystem. * If you've mounted a non-root directory somewhere and want to do remount @@ -1041,7 +1549,10 @@ static noinline int do_remount(struct nameidata *nd, int flags, int mnt_flags, return -EINVAL; down_write(&sb->s_umount); - err = do_remount_sb(sb, flags, data, 0); + if (flags & MS_BIND) + err = change_mount_flags(nd->path.mnt, flags); + else + err = do_remount_sb(sb, flags, data, 0); if (!err) nd->path.mnt->mnt_flags = mnt_flags; up_write(&sb->s_umount); @@ -1191,7 +1702,7 @@ int do_add_mount(struct vfsmount *newmnt, struct nameidata *nd, goto unlock; newmnt->mnt_flags = mnt_flags; - if ((err = graft_tree(newmnt, nd))) + if ((err = graft_tree(newmnt, &nd->path))) goto unlock; if (fslist) /* add to the specified expiration list */ @@ -1425,6 +1936,8 @@ long do_mount(char *dev_name, char *dir_name, char *type_page, mnt_flags |= MNT_NODIRATIME; if (flags & MS_RELATIME) mnt_flags |= MNT_RELATIME; + if (flags & MS_RDONLY) + mnt_flags |= MNT_READONLY; flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT); @@ -1434,7 +1947,8 @@ long do_mount(char *dev_name, char *dir_name, char *type_page, if (retval) return retval; - retval = security_sb_mount(dev_name, &nd, type_page, flags, data_page); + retval = security_sb_mount(dev_name, &nd.path, + type_page, flags, data_page); if (retval) goto dput_out; @@ -1674,15 +2188,13 @@ asmlinkage long sys_pivot_root(const char __user * new_root, const char __user * put_old) { struct vfsmount *tmp; - struct nameidata new_nd, old_nd, user_nd; - struct path parent_path, root_parent; + struct nameidata new_nd, old_nd; + struct path parent_path, root_parent, root; int error; if (!capable(CAP_SYS_ADMIN)) return -EPERM; - lock_kernel(); - error = __user_walk(new_root, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &new_nd); if (error) @@ -1695,14 +2207,14 @@ asmlinkage long sys_pivot_root(const char __user * new_root, if (error) goto out1; - error = security_sb_pivotroot(&old_nd, &new_nd); + error = security_sb_pivotroot(&old_nd.path, &new_nd.path); if (error) { path_put(&old_nd.path); goto out1; } read_lock(¤t->fs->lock); - user_nd.path = current->fs->root; + root = current->fs->root; path_get(¤t->fs->root); read_unlock(¤t->fs->lock); down_write(&namespace_sem); @@ -1710,9 +2222,9 @@ asmlinkage long sys_pivot_root(const char __user * new_root, error = -EINVAL; if (IS_MNT_SHARED(old_nd.path.mnt) || IS_MNT_SHARED(new_nd.path.mnt->mnt_parent) || - IS_MNT_SHARED(user_nd.path.mnt->mnt_parent)) + IS_MNT_SHARED(root.mnt->mnt_parent)) goto out2; - if (!check_mnt(user_nd.path.mnt)) + if (!check_mnt(root.mnt)) goto out2; error = -ENOENT; if (IS_DEADDIR(new_nd.path.dentry->d_inode)) @@ -1722,13 +2234,13 @@ asmlinkage long sys_pivot_root(const char __user * new_root, if (d_unhashed(old_nd.path.dentry) && !IS_ROOT(old_nd.path.dentry)) goto out2; error = -EBUSY; - if (new_nd.path.mnt == user_nd.path.mnt || - old_nd.path.mnt == user_nd.path.mnt) + if (new_nd.path.mnt == root.mnt || + old_nd.path.mnt == root.mnt) goto out2; /* loop, on the same file system */ error = -EINVAL; - if (user_nd.path.mnt->mnt_root != user_nd.path.dentry) + if (root.mnt->mnt_root != root.dentry) goto out2; /* not a mountpoint */ - if (user_nd.path.mnt->mnt_parent == user_nd.path.mnt) + if (root.mnt->mnt_parent == root.mnt) goto out2; /* not attached */ if (new_nd.path.mnt->mnt_root != new_nd.path.dentry) goto out2; /* not a mountpoint */ @@ -1750,27 +2262,26 @@ asmlinkage long sys_pivot_root(const char __user * new_root, } else if (!is_subdir(old_nd.path.dentry, new_nd.path.dentry)) goto out3; detach_mnt(new_nd.path.mnt, &parent_path); - detach_mnt(user_nd.path.mnt, &root_parent); + detach_mnt(root.mnt, &root_parent); /* mount old root on put_old */ - attach_mnt(user_nd.path.mnt, &old_nd.path); + attach_mnt(root.mnt, &old_nd.path); /* mount new_root on / */ attach_mnt(new_nd.path.mnt, &root_parent); touch_mnt_namespace(current->nsproxy->mnt_ns); spin_unlock(&vfsmount_lock); - chroot_fs_refs(&user_nd.path, &new_nd.path); - security_sb_post_pivotroot(&user_nd, &new_nd); + chroot_fs_refs(&root, &new_nd.path); + security_sb_post_pivotroot(&root, &new_nd.path); error = 0; path_put(&root_parent); path_put(&parent_path); out2: mutex_unlock(&old_nd.path.dentry->d_inode->i_mutex); up_write(&namespace_sem); - path_put(&user_nd.path); + path_put(&root); path_put(&old_nd.path); out1: path_put(&new_nd.path); out0: - unlock_kernel(); return error; out3: spin_unlock(&vfsmount_lock); diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c index c67b4bdcf719..ad8f167e54bc 100644 --- a/fs/ncpfs/ioctl.c +++ b/fs/ncpfs/ioctl.c @@ -14,6 +14,7 @@ #include <linux/ioctl.h> #include <linux/time.h> #include <linux/mm.h> +#include <linux/mount.h> #include <linux/highuid.h> #include <linux/smp_lock.h> #include <linux/vmalloc.h> @@ -261,7 +262,7 @@ ncp_get_charsets(struct ncp_server* server, struct ncp_nls_ioctl __user *arg) } #endif /* CONFIG_NCPFS_NLS */ -int ncp_ioctl(struct inode *inode, struct file *filp, +static int __ncp_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg) { struct ncp_server *server = NCP_SERVER(inode); @@ -822,6 +823,57 @@ outrel: return -EINVAL; } +static int ncp_ioctl_need_write(unsigned int cmd) +{ + switch (cmd) { + case NCP_IOC_GET_FS_INFO: + case NCP_IOC_GET_FS_INFO_V2: + case NCP_IOC_NCPREQUEST: + case NCP_IOC_SETDENTRYTTL: + case NCP_IOC_SIGN_INIT: + case NCP_IOC_LOCKUNLOCK: + case NCP_IOC_SET_SIGN_WANTED: + return 1; + case NCP_IOC_GETOBJECTNAME: + case NCP_IOC_SETOBJECTNAME: + case NCP_IOC_GETPRIVATEDATA: + case NCP_IOC_SETPRIVATEDATA: + case NCP_IOC_SETCHARSETS: + case NCP_IOC_GETCHARSETS: + case NCP_IOC_CONN_LOGGED_IN: + case NCP_IOC_GETDENTRYTTL: + case NCP_IOC_GETMOUNTUID2: + case NCP_IOC_SIGN_WANTED: + case NCP_IOC_GETROOT: + case NCP_IOC_SETROOT: + return 0; + default: + /* unkown IOCTL command, assume write */ + return 1; + } +} + +int ncp_ioctl(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg) +{ + int ret; + + if (ncp_ioctl_need_write(cmd)) { + /* + * inside the ioctl(), any failures which + * are because of file_permission() are + * -EACCESS, so it seems consistent to keep + * that here. + */ + if (mnt_want_write(filp->f_path.mnt)) + return -EACCES; + } + ret = __ncp_ioctl(inode, filp, cmd, arg); + if (ncp_ioctl_need_write(cmd)) + mnt_drop_write(filp->f_path.mnt); + return ret; +} + #ifdef CONFIG_COMPAT long ncp_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 6cea7479c5b4..d9e30ac2798d 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -967,7 +967,8 @@ static int is_atomic_open(struct inode *dir, struct nameidata *nd) if (nd->flags & LOOKUP_DIRECTORY) return 0; /* Are we trying to write to a read only partition? */ - if (IS_RDONLY(dir) && (nd->intent.open.flags & (O_CREAT|O_TRUNC|FMODE_WRITE))) + if (__mnt_is_readonly(nd->path.mnt) && + (nd->intent.open.flags & (O_CREAT|O_TRUNC|FMODE_WRITE))) return 0; return 1; } diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index c593db047d8b..c309c881bd4e 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -658,14 +658,19 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, return status; } } + status = mnt_want_write(cstate->current_fh.fh_export->ex_path.mnt); + if (status) + return status; status = nfs_ok; if (setattr->sa_acl != NULL) status = nfsd4_set_nfs4_acl(rqstp, &cstate->current_fh, setattr->sa_acl); if (status) - return status; + goto out; status = nfsd_setattr(rqstp, &cstate->current_fh, &setattr->sa_iattr, 0, (time_t)0); +out: + mnt_drop_write(cstate->current_fh.fh_export->ex_path.mnt); return status; } diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 1ff90625860f..145b3c877a27 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -46,6 +46,7 @@ #include <linux/scatterlist.h> #include <linux/crypto.h> #include <linux/sched.h> +#include <linux/mount.h> #define NFSDDBG_FACILITY NFSDDBG_PROC @@ -154,7 +155,11 @@ nfsd4_create_clid_dir(struct nfs4_client *clp) dprintk("NFSD: nfsd4_create_clid_dir: DIRECTORY EXISTS\n"); goto out_put; } + status = mnt_want_write(rec_dir.path.mnt); + if (status) + goto out_put; status = vfs_mkdir(rec_dir.path.dentry->d_inode, dentry, S_IRWXU); + mnt_drop_write(rec_dir.path.mnt); out_put: dput(dentry); out_unlock: @@ -313,12 +318,17 @@ nfsd4_remove_clid_dir(struct nfs4_client *clp) if (!rec_dir_init || !clp->cl_firststate) return; + status = mnt_want_write(rec_dir.path.mnt); + if (status) + goto out; clp->cl_firststate = 0; nfs4_save_user(&uid, &gid); status = nfsd4_unlink_clid_dir(clp->cl_recdir, HEXDIR_LEN-1); nfs4_reset_user(uid, gid); if (status == 0) nfsd4_sync_rec_dir(); + mnt_drop_write(rec_dir.path.mnt); +out: if (status) printk("NFSD: Failed to remove expired client state directory" " %.*s\n", HEXDIR_LEN, clp->cl_recdir); @@ -347,13 +357,17 @@ nfsd4_recdir_purge_old(void) { if (!rec_dir_init) return; + status = mnt_want_write(rec_dir.path.mnt); + if (status) + goto out; status = nfsd4_list_rec_dir(rec_dir.path.dentry, purge_old); if (status == 0) nfsd4_sync_rec_dir(); + mnt_drop_write(rec_dir.path.mnt); +out: if (status) printk("nfsd4: failed to purge old clients from recovery" " directory %s\n", rec_dir.path.dentry->d_name.name); - return; } static int diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index bcb97d8e8b8b..81a75f3081f4 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -41,6 +41,7 @@ #include <linux/sunrpc/svc.h> #include <linux/nfsd/nfsd.h> #include <linux/nfsd/cache.h> +#include <linux/file.h> #include <linux/mount.h> #include <linux/workqueue.h> #include <linux/smp_lock.h> @@ -1239,7 +1240,7 @@ static inline void nfs4_file_downgrade(struct file *filp, unsigned int share_access) { if (share_access & NFS4_SHARE_ACCESS_WRITE) { - put_write_access(filp->f_path.dentry->d_inode); + drop_file_write_access(filp); filp->f_mode = (filp->f_mode | FMODE_READ) & ~FMODE_WRITE; } } diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 46f59d5365a0..304bf5f643c9 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1255,23 +1255,35 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, err = 0; switch (type) { case S_IFREG: + host_err = mnt_want_write(fhp->fh_export->ex_path.mnt); + if (host_err) + goto out_nfserr; host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL); break; case S_IFDIR: + host_err = mnt_want_write(fhp->fh_export->ex_path.mnt); + if (host_err) + goto out_nfserr; host_err = vfs_mkdir(dirp, dchild, iap->ia_mode); break; case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK: + host_err = mnt_want_write(fhp->fh_export->ex_path.mnt); + if (host_err) + goto out_nfserr; host_err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev); break; default: printk("nfsd: bad file type %o in nfsd_create\n", type); host_err = -EINVAL; + goto out_nfserr; } - if (host_err < 0) + if (host_err < 0) { + mnt_drop_write(fhp->fh_export->ex_path.mnt); goto out_nfserr; + } if (EX_ISSYNC(fhp->fh_export)) { err = nfserrno(nfsd_sync_dir(dentry)); @@ -1282,6 +1294,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, err2 = nfsd_create_setattr(rqstp, resfhp, iap); if (err2) err = err2; + mnt_drop_write(fhp->fh_export->ex_path.mnt); /* * Update the file handle to get the new inode info. */ @@ -1359,6 +1372,9 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp, v_atime = verifier[1]&0x7fffffff; } + host_err = mnt_want_write(fhp->fh_export->ex_path.mnt); + if (host_err) + goto out_nfserr; if (dchild->d_inode) { err = 0; @@ -1390,12 +1406,15 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp, case NFS3_CREATE_GUARDED: err = nfserr_exist; } + mnt_drop_write(fhp->fh_export->ex_path.mnt); goto out; } host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL); - if (host_err < 0) + if (host_err < 0) { + mnt_drop_write(fhp->fh_export->ex_path.mnt); goto out_nfserr; + } if (created) *created = 1; @@ -1420,6 +1439,7 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp, if (err2) err = err2; + mnt_drop_write(fhp->fh_export->ex_path.mnt); /* * Update the filehandle to get the new inode info. */ @@ -1522,6 +1542,10 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp, if (iap && (iap->ia_valid & ATTR_MODE)) mode = iap->ia_mode & S_IALLUGO; + host_err = mnt_want_write(fhp->fh_export->ex_path.mnt); + if (host_err) + goto out_nfserr; + if (unlikely(path[plen] != 0)) { char *path_alloced = kmalloc(plen+1, GFP_KERNEL); if (path_alloced == NULL) @@ -1542,6 +1566,8 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp, err = nfserrno(host_err); fh_unlock(fhp); + mnt_drop_write(fhp->fh_export->ex_path.mnt); + cerr = fh_compose(resfhp, fhp->fh_export, dnew, fhp); dput(dnew); if (err==0) err = cerr; @@ -1592,6 +1618,11 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, dold = tfhp->fh_dentry; dest = dold->d_inode; + host_err = mnt_want_write(tfhp->fh_export->ex_path.mnt); + if (host_err) { + err = nfserrno(host_err); + goto out_dput; + } host_err = vfs_link(dold, dirp, dnew); if (!host_err) { if (EX_ISSYNC(ffhp->fh_export)) { @@ -1605,7 +1636,8 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, else err = nfserrno(host_err); } - + mnt_drop_write(tfhp->fh_export->ex_path.mnt); +out_dput: dput(dnew); out_unlock: fh_unlock(ffhp); @@ -1678,13 +1710,20 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, if (ndentry == trap) goto out_dput_new; -#ifdef MSNFS - if ((ffhp->fh_export->ex_flags & NFSEXP_MSNFS) && + if (svc_msnfs(ffhp) && ((atomic_read(&odentry->d_count) > 1) || (atomic_read(&ndentry->d_count) > 1))) { host_err = -EPERM; - } else -#endif + goto out_dput_new; + } + + host_err = -EXDEV; + if (ffhp->fh_export->ex_path.mnt != tfhp->fh_export->ex_path.mnt) + goto out_dput_new; + host_err = mnt_want_write(ffhp->fh_export->ex_path.mnt); + if (host_err) + goto out_dput_new; + host_err = vfs_rename(fdir, odentry, tdir, ndentry); if (!host_err && EX_ISSYNC(tfhp->fh_export)) { host_err = nfsd_sync_dir(tdentry); @@ -1692,6 +1731,8 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, host_err = nfsd_sync_dir(fdentry); } + mnt_drop_write(ffhp->fh_export->ex_path.mnt); + out_dput_new: dput(ndentry); out_dput_old: @@ -1750,6 +1791,10 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, if (!type) type = rdentry->d_inode->i_mode & S_IFMT; + host_err = mnt_want_write(fhp->fh_export->ex_path.mnt); + if (host_err) + goto out_nfserr; + if (type != S_IFDIR) { /* It's UNLINK */ #ifdef MSNFS if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) && @@ -1765,10 +1810,12 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, dput(rdentry); if (host_err) - goto out_nfserr; + goto out_drop; if (EX_ISSYNC(fhp->fh_export)) host_err = nfsd_sync_dir(dentry); +out_drop: + mnt_drop_write(fhp->fh_export->ex_path.mnt); out_nfserr: err = nfserrno(host_err); out: @@ -1865,7 +1912,7 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp, inode->i_mode, IS_IMMUTABLE(inode)? " immut" : "", IS_APPEND(inode)? " append" : "", - IS_RDONLY(inode)? " ro" : ""); + __mnt_is_readonly(exp->ex_path.mnt)? " ro" : ""); dprintk(" owner %d/%d user %d/%d\n", inode->i_uid, inode->i_gid, current->fsuid, current->fsgid); #endif @@ -1876,7 +1923,8 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp, */ if (!(acc & MAY_LOCAL_ACCESS)) if (acc & (MAY_WRITE | MAY_SATTR | MAY_TRUNC)) { - if (exp_rdonly(rqstp, exp) || IS_RDONLY(inode)) + if (exp_rdonly(rqstp, exp) || + __mnt_is_readonly(exp->ex_path.mnt)) return nfserr_rofs; if (/* (acc & MAY_WRITE) && */ IS_IMMUTABLE(inode)) return nfserr_perm; @@ -2039,6 +2087,9 @@ nfsd_set_posix_acl(struct svc_fh *fhp, int type, struct posix_acl *acl) } else size = 0; + error = mnt_want_write(fhp->fh_export->ex_path.mnt); + if (error) + goto getout; if (size) error = vfs_setxattr(fhp->fh_dentry, name, value, size, 0); else { @@ -2050,6 +2101,7 @@ nfsd_set_posix_acl(struct svc_fh *fhp, int type, struct posix_acl *acl) error = 0; } } + mnt_drop_write(fhp->fh_export->ex_path.mnt); getout: kfree(value); diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index b413166dd163..7b142f0ce995 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c @@ -60,10 +60,6 @@ static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags, goto bail; } - status = -EROFS; - if (IS_RDONLY(inode)) - goto bail_unlock; - status = -EACCES; if (!is_owner_or_cap(inode)) goto bail_unlock; @@ -134,8 +130,13 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) if (get_user(flags, (int __user *) arg)) return -EFAULT; - return ocfs2_set_inode_attr(inode, flags, + status = mnt_want_write(filp->f_path.mnt); + if (status) + return status; + status = ocfs2_set_inode_attr(inode, flags, OCFS2_FL_MODIFIABLE); + mnt_drop_write(filp->f_path.mnt); + return status; case OCFS2_IOC_RESVSP: case OCFS2_IOC_RESVSP64: case OCFS2_IOC_UNRESVSP: diff --git a/fs/open.c b/fs/open.c index 3fa4e4ffce4c..b70e7666bb2c 100644 --- a/fs/open.c +++ b/fs/open.c @@ -244,21 +244,21 @@ static long do_sys_truncate(const char __user * path, loff_t length) if (!S_ISREG(inode->i_mode)) goto dput_and_out; - error = vfs_permission(&nd, MAY_WRITE); + error = mnt_want_write(nd.path.mnt); if (error) goto dput_and_out; - error = -EROFS; - if (IS_RDONLY(inode)) - goto dput_and_out; + error = vfs_permission(&nd, MAY_WRITE); + if (error) + goto mnt_drop_write_and_out; error = -EPERM; if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) - goto dput_and_out; + goto mnt_drop_write_and_out; error = get_write_access(inode); if (error) - goto dput_and_out; + goto mnt_drop_write_and_out; /* * Make sure that there are no leases. get_write_access() protects @@ -276,6 +276,8 @@ static long do_sys_truncate(const char __user * path, loff_t length) put_write_and_out: put_write_access(inode); +mnt_drop_write_and_out: + mnt_drop_write(nd.path.mnt); dput_and_out: path_put(&nd.path); out: @@ -457,8 +459,17 @@ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode) if(res || !(mode & S_IWOTH) || special_file(nd.path.dentry->d_inode->i_mode)) goto out_path_release; - - if(IS_RDONLY(nd.path.dentry->d_inode)) + /* + * This is a rare case where using __mnt_is_readonly() + * is OK without a mnt_want/drop_write() pair. Since + * no actual write to the fs is performed here, we do + * not need to telegraph to that to anyone. + * + * By doing this, we accept that this access is + * inherently racy and know that the fs may change + * state before we even see this result. + */ + if (__mnt_is_readonly(nd.path.mnt)) res = -EROFS; out_path_release: @@ -567,12 +578,12 @@ asmlinkage long sys_fchmod(unsigned int fd, mode_t mode) audit_inode(NULL, dentry); - err = -EROFS; - if (IS_RDONLY(inode)) + err = mnt_want_write(file->f_path.mnt); + if (err) goto out_putf; err = -EPERM; if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) - goto out_putf; + goto out_drop_write; mutex_lock(&inode->i_mutex); if (mode == (mode_t) -1) mode = inode->i_mode; @@ -581,6 +592,8 @@ asmlinkage long sys_fchmod(unsigned int fd, mode_t mode) err = notify_change(dentry, &newattrs); mutex_unlock(&inode->i_mutex); +out_drop_write: + mnt_drop_write(file->f_path.mnt); out_putf: fput(file); out: @@ -600,13 +613,13 @@ asmlinkage long sys_fchmodat(int dfd, const char __user *filename, goto out; inode = nd.path.dentry->d_inode; - error = -EROFS; - if (IS_RDONLY(inode)) + error = mnt_want_write(nd.path.mnt); + if (error) goto dput_and_out; error = -EPERM; if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) - goto dput_and_out; + goto out_drop_write; mutex_lock(&inode->i_mutex); if (mode == (mode_t) -1) @@ -616,6 +629,8 @@ asmlinkage long sys_fchmodat(int dfd, const char __user *filename, error = notify_change(nd.path.dentry, &newattrs); mutex_unlock(&inode->i_mutex); +out_drop_write: + mnt_drop_write(nd.path.mnt); dput_and_out: path_put(&nd.path); out: @@ -638,9 +653,6 @@ static int chown_common(struct dentry * dentry, uid_t user, gid_t group) printk(KERN_ERR "chown_common: NULL inode\n"); goto out; } - error = -EROFS; - if (IS_RDONLY(inode)) - goto out; error = -EPERM; if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) goto out; @@ -671,7 +683,12 @@ asmlinkage long sys_chown(const char __user * filename, uid_t user, gid_t group) error = user_path_walk(filename, &nd); if (error) goto out; + error = mnt_want_write(nd.path.mnt); + if (error) + goto out_release; error = chown_common(nd.path.dentry, user, group); + mnt_drop_write(nd.path.mnt); +out_release: path_put(&nd.path); out: return error; @@ -691,7 +708,12 @@ asmlinkage long sys_fchownat(int dfd, const char __user *filename, uid_t user, error = __user_walk_fd(dfd, filename, follow, &nd); if (error) goto out; + error = mnt_want_write(nd.path.mnt); + if (error) + goto out_release; error = chown_common(nd.path.dentry, user, group); + mnt_drop_write(nd.path.mnt); +out_release: path_put(&nd.path); out: return error; @@ -705,7 +727,12 @@ asmlinkage long sys_lchown(const char __user * filename, uid_t user, gid_t group error = user_path_walk_link(filename, &nd); if (error) goto out; + error = mnt_want_write(nd.path.mnt); + if (error) + goto out_release; error = chown_common(nd.path.dentry, user, group); + mnt_drop_write(nd.path.mnt); +out_release: path_put(&nd.path); out: return error; @@ -722,14 +749,48 @@ asmlinkage long sys_fchown(unsigned int fd, uid_t user, gid_t group) if (!file) goto out; + error = mnt_want_write(file->f_path.mnt); + if (error) + goto out_fput; dentry = file->f_path.dentry; audit_inode(NULL, dentry); error = chown_common(dentry, user, group); + mnt_drop_write(file->f_path.mnt); +out_fput: fput(file); out: return error; } +/* + * You have to be very careful that these write + * counts get cleaned up in error cases and + * upon __fput(). This should probably never + * be called outside of __dentry_open(). + */ +static inline int __get_file_write_access(struct inode *inode, + struct vfsmount *mnt) +{ + int error; + error = get_write_access(inode); + if (error) + return error; + /* + * Do not take mount writer counts on + * special files since no writes to + * the mount itself will occur. + */ + if (!special_file(inode->i_mode)) { + /* + * Balanced in __fput() + */ + error = mnt_want_write(mnt); + if (error) + put_write_access(inode); + } + return error; +} + static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags, struct file *f, int (*open)(struct inode *, struct file *)) @@ -742,9 +803,11 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, FMODE_PREAD | FMODE_PWRITE; inode = dentry->d_inode; if (f->f_mode & FMODE_WRITE) { - error = get_write_access(inode); + error = __get_file_write_access(inode, mnt); if (error) goto cleanup_file; + if (!special_file(inode->i_mode)) + file_take_write(f); } f->f_mapping = inode->i_mapping; @@ -784,8 +847,19 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, cleanup_all: fops_put(f->f_op); - if (f->f_mode & FMODE_WRITE) + if (f->f_mode & FMODE_WRITE) { put_write_access(inode); + if (!special_file(inode->i_mode)) { + /* + * We don't consider this a real + * mnt_want/drop_write() pair + * because it all happenend right + * here, so just reset the state. + */ + file_reset_write(f); + mnt_drop_write(mnt); + } + } file_kill(f); f->f_path.dentry = NULL; f->f_path.mnt = NULL; @@ -796,43 +870,6 @@ cleanup_file: return ERR_PTR(error); } -/* - * Note that while the flag value (low two bits) for sys_open means: - * 00 - read-only - * 01 - write-only - * 10 - read-write - * 11 - special - * it is changed into - * 00 - no permissions needed - * 01 - read-permission - * 10 - write-permission - * 11 - read-write - * for the internal routines (ie open_namei()/follow_link() etc). 00 is - * used by symlinks. - */ -static struct file *do_filp_open(int dfd, const char *filename, int flags, - int mode) -{ - int namei_flags, error; - struct nameidata nd; - - namei_flags = flags; - if ((namei_flags+1) & O_ACCMODE) - namei_flags++; - - error = open_namei(dfd, filename, namei_flags, mode, &nd); - if (!error) - return nameidata_to_filp(&nd, flags); - - return ERR_PTR(error); -} - -struct file *filp_open(const char *filename, int flags, int mode) -{ - return do_filp_open(AT_FDCWD, filename, flags, mode); -} -EXPORT_SYMBOL(filp_open); - /** * lookup_instantiate_filp - instantiates the open intent filp * @nd: pointer to nameidata diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 03f808c5b79d..6149e4b58c88 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -473,6 +473,10 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev) return 0; if (IS_ERR(state)) /* I/O error reading the partition table */ return -EIO; + + /* tell userspace that the media / partition table may have changed */ + kobject_uevent(&disk->dev.kobj, KOBJ_CHANGE); + for (p = 1; p < state->limit; p++) { sector_t size = state->parts[p].size; sector_t from = state->parts[p].from; diff --git a/fs/pipe.c b/fs/pipe.c index 8be381bbcb54..f73492b6817e 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -988,7 +988,10 @@ struct file *create_write_pipe(void) return f; err_dentry: + free_pipe_info(inode); dput(dentry); + return ERR_PTR(err); + err_inode: free_pipe_info(inode); iput(inode); diff --git a/fs/pnode.c b/fs/pnode.c index 1d8f5447f3f7..8d5f392ec3d3 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -9,6 +9,7 @@ #include <linux/mnt_namespace.h> #include <linux/mount.h> #include <linux/fs.h> +#include "internal.h" #include "pnode.h" /* return the next shared peer mount of @p */ @@ -27,6 +28,57 @@ static inline struct vfsmount *next_slave(struct vfsmount *p) return list_entry(p->mnt_slave.next, struct vfsmount, mnt_slave); } +/* + * Return true if path is reachable from root + * + * namespace_sem is held, and mnt is attached + */ +static bool is_path_reachable(struct vfsmount *mnt, struct dentry *dentry, + const struct path *root) +{ + while (mnt != root->mnt && mnt->mnt_parent != mnt) { + dentry = mnt->mnt_mountpoint; + mnt = mnt->mnt_parent; + } + return mnt == root->mnt && is_subdir(dentry, root->dentry); +} + +static struct vfsmount *get_peer_under_root(struct vfsmount *mnt, + struct mnt_namespace *ns, + const struct path *root) +{ + struct vfsmount *m = mnt; + + do { + /* Check the namespace first for optimization */ + if (m->mnt_ns == ns && is_path_reachable(m, m->mnt_root, root)) + return m; + + m = next_peer(m); + } while (m != mnt); + + return NULL; +} + +/* + * Get ID of closest dominating peer group having a representative + * under the given root. + * + * Caller must hold namespace_sem + */ +int get_dominating_id(struct vfsmount *mnt, const struct path *root) +{ + struct vfsmount *m; + + for (m = mnt->mnt_master; m != NULL; m = m->mnt_master) { + struct vfsmount *d = get_peer_under_root(m, mnt->mnt_ns, root); + if (d) + return d->mnt_group_id; + } + + return 0; +} + static int do_make_slave(struct vfsmount *mnt) { struct vfsmount *peer_mnt = mnt, *master = mnt->mnt_master; @@ -45,7 +97,11 @@ static int do_make_slave(struct vfsmount *mnt) if (peer_mnt == mnt) peer_mnt = NULL; } + if (IS_MNT_SHARED(mnt) && list_empty(&mnt->mnt_share)) + mnt_release_group_id(mnt); + list_del_init(&mnt->mnt_share); + mnt->mnt_group_id = 0; if (peer_mnt) master = peer_mnt; @@ -67,7 +123,6 @@ static int do_make_slave(struct vfsmount *mnt) } mnt->mnt_master = master; CLEAR_MNT_SHARED(mnt); - INIT_LIST_HEAD(&mnt->mnt_slave_list); return 0; } @@ -211,8 +266,7 @@ int propagate_mnt(struct vfsmount *dest_mnt, struct dentry *dest_dentry, out: spin_lock(&vfsmount_lock); while (!list_empty(&tmp_list)) { - child = list_entry(tmp_list.next, struct vfsmount, mnt_hash); - list_del_init(&child->mnt_hash); + child = list_first_entry(&tmp_list, struct vfsmount, mnt_hash); umount_tree(child, 0, &umount_list); } spin_unlock(&vfsmount_lock); diff --git a/fs/pnode.h b/fs/pnode.h index f249be2fee7a..958665d662af 100644 --- a/fs/pnode.h +++ b/fs/pnode.h @@ -35,4 +35,6 @@ int propagate_mnt(struct vfsmount *, struct dentry *, struct vfsmount *, struct list_head *); int propagate_umount(struct list_head *); int propagate_mount_busy(struct vfsmount *, int); +void mnt_release_group_id(struct vfsmount *); +int get_dominating_id(struct vfsmount *mnt, const struct path *root); #endif /* _LINUX_PNODE_H */ diff --git a/fs/proc/base.c b/fs/proc/base.c index 81d7d145292a..c5e412a00b17 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -502,17 +502,14 @@ static const struct inode_operations proc_def_inode_operations = { .setattr = proc_setattr, }; -extern const struct seq_operations mounts_op; -struct proc_mounts { - struct seq_file m; - int event; -}; - -static int mounts_open(struct inode *inode, struct file *file) +static int mounts_open_common(struct inode *inode, struct file *file, + const struct seq_operations *op) { struct task_struct *task = get_proc_task(inode); struct nsproxy *nsp; struct mnt_namespace *ns = NULL; + struct fs_struct *fs = NULL; + struct path root; struct proc_mounts *p; int ret = -EINVAL; @@ -525,40 +522,61 @@ static int mounts_open(struct inode *inode, struct file *file) get_mnt_ns(ns); } rcu_read_unlock(); - + if (ns) + fs = get_fs_struct(task); put_task_struct(task); } - if (ns) { - ret = -ENOMEM; - p = kmalloc(sizeof(struct proc_mounts), GFP_KERNEL); - if (p) { - file->private_data = &p->m; - ret = seq_open(file, &mounts_op); - if (!ret) { - p->m.private = ns; - p->event = ns->event; - return 0; - } - kfree(p); - } - put_mnt_ns(ns); - } + if (!ns) + goto err; + if (!fs) + goto err_put_ns; + + read_lock(&fs->lock); + root = fs->root; + path_get(&root); + read_unlock(&fs->lock); + put_fs_struct(fs); + + ret = -ENOMEM; + p = kmalloc(sizeof(struct proc_mounts), GFP_KERNEL); + if (!p) + goto err_put_path; + + file->private_data = &p->m; + ret = seq_open(file, op); + if (ret) + goto err_free; + + p->m.private = p; + p->ns = ns; + p->root = root; + p->event = ns->event; + + return 0; + + err_free: + kfree(p); + err_put_path: + path_put(&root); + err_put_ns: + put_mnt_ns(ns); + err: return ret; } static int mounts_release(struct inode *inode, struct file *file) { - struct seq_file *m = file->private_data; - struct mnt_namespace *ns = m->private; - put_mnt_ns(ns); + struct proc_mounts *p = file->private_data; + path_put(&p->root); + put_mnt_ns(p->ns); return seq_release(inode, file); } static unsigned mounts_poll(struct file *file, poll_table *wait) { struct proc_mounts *p = file->private_data; - struct mnt_namespace *ns = p->m.private; + struct mnt_namespace *ns = p->ns; unsigned res = 0; poll_wait(file, &ns->poll, wait); @@ -573,6 +591,11 @@ static unsigned mounts_poll(struct file *file, poll_table *wait) return res; } +static int mounts_open(struct inode *inode, struct file *file) +{ + return mounts_open_common(inode, file, &mounts_op); +} + static const struct file_operations proc_mounts_operations = { .open = mounts_open, .read = seq_read, @@ -581,38 +604,22 @@ static const struct file_operations proc_mounts_operations = { .poll = mounts_poll, }; -extern const struct seq_operations mountstats_op; -static int mountstats_open(struct inode *inode, struct file *file) +static int mountinfo_open(struct inode *inode, struct file *file) { - int ret = seq_open(file, &mountstats_op); - - if (!ret) { - struct seq_file *m = file->private_data; - struct nsproxy *nsp; - struct mnt_namespace *mnt_ns = NULL; - struct task_struct *task = get_proc_task(inode); - - if (task) { - rcu_read_lock(); - nsp = task_nsproxy(task); - if (nsp) { - mnt_ns = nsp->mnt_ns; - if (mnt_ns) - get_mnt_ns(mnt_ns); - } - rcu_read_unlock(); + return mounts_open_common(inode, file, &mountinfo_op); +} - put_task_struct(task); - } +static const struct file_operations proc_mountinfo_operations = { + .open = mountinfo_open, + .read = seq_read, + .llseek = seq_lseek, + .release = mounts_release, + .poll = mounts_poll, +}; - if (mnt_ns) - m->private = mnt_ns; - else { - seq_release(inode, file); - ret = -EINVAL; - } - } - return ret; +static int mountstats_open(struct inode *inode, struct file *file) +{ + return mounts_open_common(inode, file, &mountstats_op); } static const struct file_operations proc_mountstats_operations = { @@ -1626,7 +1633,6 @@ static int proc_readfd_common(struct file * filp, void * dirent, unsigned int fd, ino; int retval; struct files_struct * files; - struct fdtable *fdt; retval = -ENOENT; if (!p) @@ -1649,9 +1655,8 @@ static int proc_readfd_common(struct file * filp, void * dirent, if (!files) goto out; rcu_read_lock(); - fdt = files_fdtable(files); for (fd = filp->f_pos-2; - fd < fdt->max_fds; + fd < files_fdtable(files)->max_fds; fd++, filp->f_pos++) { char name[PROC_NUMBUF]; int len; @@ -2311,6 +2316,7 @@ static const struct pid_entry tgid_base_stuff[] = { LNK("root", root), LNK("exe", exe), REG("mounts", S_IRUGO, mounts), + REG("mountinfo", S_IRUGO, mountinfo), REG("mountstats", S_IRUSR, mountstats), #ifdef CONFIG_PROC_PAGE_MONITOR REG("clear_refs", S_IWUSR, clear_refs), @@ -2643,6 +2649,7 @@ static const struct pid_entry tid_base_stuff[] = { LNK("root", root), LNK("exe", exe), REG("mounts", S_IRUGO, mounts), + REG("mountinfo", S_IRUGO, mountinfo), #ifdef CONFIG_PROC_PAGE_MONITOR REG("clear_refs", S_IWUSR, clear_refs), REG("smaps", S_IRUGO, smaps), diff --git a/fs/read_write.c b/fs/read_write.c index 49a98718ecdf..f0d1240a5c69 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -33,7 +33,7 @@ EXPORT_SYMBOL(generic_ro_fops); loff_t generic_file_llseek(struct file *file, loff_t offset, int origin) { - long long retval; + loff_t retval; struct inode *inode = file->f_mapping->host; mutex_lock(&inode->i_mutex); @@ -60,7 +60,7 @@ EXPORT_SYMBOL(generic_file_llseek); loff_t remote_llseek(struct file *file, loff_t offset, int origin) { - long long retval; + loff_t retval; lock_kernel(); switch (origin) { @@ -91,7 +91,7 @@ EXPORT_SYMBOL(no_llseek); loff_t default_llseek(struct file *file, loff_t offset, int origin) { - long long retval; + loff_t retval; lock_kernel(); switch (origin) { diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c index e0f0f098a523..74363a7aacbc 100644 --- a/fs/reiserfs/ioctl.c +++ b/fs/reiserfs/ioctl.c @@ -4,6 +4,7 @@ #include <linux/capability.h> #include <linux/fs.h> +#include <linux/mount.h> #include <linux/reiserfs_fs.h> #include <linux/time.h> #include <asm/uaccess.h> @@ -25,6 +26,7 @@ int reiserfs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg) { unsigned int flags; + int err = 0; switch (cmd) { case REISERFS_IOC_UNPACK: @@ -48,50 +50,67 @@ int reiserfs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, if (!reiserfs_attrs(inode->i_sb)) return -ENOTTY; - if (IS_RDONLY(inode)) - return -EROFS; + err = mnt_want_write(filp->f_path.mnt); + if (err) + return err; - if (!is_owner_or_cap(inode)) - return -EPERM; - - if (get_user(flags, (int __user *)arg)) - return -EFAULT; - - /* Is it quota file? Do not allow user to mess with it. */ - if (IS_NOQUOTA(inode)) - return -EPERM; + if (!is_owner_or_cap(inode)) { + err = -EPERM; + goto setflags_out; + } + if (get_user(flags, (int __user *)arg)) { + err = -EFAULT; + goto setflags_out; + } + /* + * Is it quota file? Do not allow user to mess with it + */ + if (IS_NOQUOTA(inode)) { + err = -EPERM; + goto setflags_out; + } if (((flags ^ REISERFS_I(inode)-> i_attrs) & (REISERFS_IMMUTABLE_FL | REISERFS_APPEND_FL)) - && !capable(CAP_LINUX_IMMUTABLE)) - return -EPERM; - + && !capable(CAP_LINUX_IMMUTABLE)) { + err = -EPERM; + goto setflags_out; + } if ((flags & REISERFS_NOTAIL_FL) && S_ISREG(inode->i_mode)) { int result; result = reiserfs_unpack(inode, filp); - if (result) - return result; + if (result) { + err = result; + goto setflags_out; + } } sd_attrs_to_i_attrs(flags, inode); REISERFS_I(inode)->i_attrs = flags; inode->i_ctime = CURRENT_TIME_SEC; mark_inode_dirty(inode); - return 0; +setflags_out: + mnt_drop_write(filp->f_path.mnt); + return err; } case REISERFS_IOC_GETVERSION: return put_user(inode->i_generation, (int __user *)arg); case REISERFS_IOC_SETVERSION: if (!is_owner_or_cap(inode)) return -EPERM; - if (IS_RDONLY(inode)) - return -EROFS; - if (get_user(inode->i_generation, (int __user *)arg)) - return -EFAULT; + err = mnt_want_write(filp->f_path.mnt); + if (err) + return err; + if (get_user(inode->i_generation, (int __user *)arg)) { + err = -EFAULT; + goto setversion_out; + } inode->i_ctime = CURRENT_TIME_SEC; mark_inode_dirty(inode); - return 0; +setversion_out: + mnt_drop_write(filp->f_path.mnt); + return err; default: return -ENOTTY; } diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index bb05a3e51b93..060eb3f598e7 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -38,7 +38,7 @@ #include <asm/system.h> #include <linux/time.h> -#include <asm/semaphore.h> +#include <linux/semaphore.h> #include <linux/vmalloc.h> #include <linux/reiserfs_fs.h> diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 344b9b96cc56..d7c4935c1034 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -44,7 +44,6 @@ #include <net/checksum.h> #include <linux/smp_lock.h> #include <linux/stat.h> -#include <asm/semaphore.h> #define FL_READONLY 128 #define FL_DIR_SEM_HELD 256 diff --git a/fs/select.c b/fs/select.c index 5633fe980781..00f58c5c7e05 100644 --- a/fs/select.c +++ b/fs/select.c @@ -260,7 +260,7 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout) wait = NULL; if (retval || !*timeout || signal_pending(current)) break; - if(table.error) { + if (table.error) { retval = table.error; break; } diff --git a/fs/seq_file.c b/fs/seq_file.c index 853770274f20..3f54dbd6c49b 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -25,6 +25,7 @@ * into the buffer. In case of error ->start() and ->next() return * ERR_PTR(error). In the end of sequence they return %NULL. ->show() * returns 0 in case of success and negative number in case of error. + * Returning SEQ_SKIP means "discard this element and move on". */ int seq_open(struct file *file, const struct seq_operations *op) { @@ -114,8 +115,10 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) if (!p || IS_ERR(p)) break; err = m->op->show(m, p); - if (err) + if (err < 0) break; + if (unlikely(err)) + m->count = 0; if (m->count < m->size) goto Fill; m->op->stop(m, p); @@ -140,9 +143,10 @@ Fill: break; } err = m->op->show(m, p); - if (err || m->count == m->size) { + if (m->count == m->size || err) { m->count = offs; - break; + if (likely(err <= 0)) + break; } pos = next; } @@ -199,8 +203,12 @@ static int traverse(struct seq_file *m, loff_t offset) if (IS_ERR(p)) break; error = m->op->show(m, p); - if (error) + if (error < 0) break; + if (unlikely(error)) { + error = 0; + m->count = 0; + } if (m->count == m->size) goto Eoverflow; if (pos + m->count > offset) { @@ -239,7 +247,7 @@ Eoverflow: loff_t seq_lseek(struct file *file, loff_t offset, int origin) { struct seq_file *m = (struct seq_file *)file->private_data; - long long retval = -EINVAL; + loff_t retval = -EINVAL; mutex_lock(&m->lock); m->version = file->f_version; @@ -342,28 +350,40 @@ int seq_printf(struct seq_file *m, const char *f, ...) } EXPORT_SYMBOL(seq_printf); +static char *mangle_path(char *s, char *p, char *esc) +{ + while (s <= p) { + char c = *p++; + if (!c) { + return s; + } else if (!strchr(esc, c)) { + *s++ = c; + } else if (s + 4 > p) { + break; + } else { + *s++ = '\\'; + *s++ = '0' + ((c & 0300) >> 6); + *s++ = '0' + ((c & 070) >> 3); + *s++ = '0' + (c & 07); + } + } + return NULL; +} + +/* + * return the absolute path of 'dentry' residing in mount 'mnt'. + */ int seq_path(struct seq_file *m, struct path *path, char *esc) { if (m->count < m->size) { char *s = m->buf + m->count; char *p = d_path(path, s, m->size - m->count); if (!IS_ERR(p)) { - while (s <= p) { - char c = *p++; - if (!c) { - p = m->buf + m->count; - m->count = s - m->buf; - return s - p; - } else if (!strchr(esc, c)) { - *s++ = c; - } else if (s + 4 > p) { - break; - } else { - *s++ = '\\'; - *s++ = '0' + ((c & 0300) >> 6); - *s++ = '0' + ((c & 070) >> 3); - *s++ = '0' + (c & 07); - } + s = mangle_path(s, p, esc); + if (s) { + p = m->buf + m->count; + m->count = s - m->buf; + return s - p; } } } @@ -372,6 +392,57 @@ int seq_path(struct seq_file *m, struct path *path, char *esc) } EXPORT_SYMBOL(seq_path); +/* + * Same as seq_path, but relative to supplied root. + * + * root may be changed, see __d_path(). + */ +int seq_path_root(struct seq_file *m, struct path *path, struct path *root, + char *esc) +{ + int err = -ENAMETOOLONG; + if (m->count < m->size) { + char *s = m->buf + m->count; + char *p; + + spin_lock(&dcache_lock); + p = __d_path(path, root, s, m->size - m->count); + spin_unlock(&dcache_lock); + err = PTR_ERR(p); + if (!IS_ERR(p)) { + s = mangle_path(s, p, esc); + if (s) { + p = m->buf + m->count; + m->count = s - m->buf; + return 0; + } + } + } + m->count = m->size; + return err; +} + +/* + * returns the path of the 'dentry' from the root of its filesystem. + */ +int seq_dentry(struct seq_file *m, struct dentry *dentry, char *esc) +{ + if (m->count < m->size) { + char *s = m->buf + m->count; + char *p = dentry_path(dentry, s, m->size - m->count); + if (!IS_ERR(p)) { + s = mangle_path(s, p, esc); + if (s) { + p = m->buf + m->count; + m->count = s - m->buf; + return s - p; + } + } + } + m->count = m->size; + return -1; +} + static void *single_start(struct seq_file *p, loff_t *pos) { return NULL + (*pos == 0); diff --git a/fs/super.c b/fs/super.c index 09008dbd264e..4798350b2bc9 100644 --- a/fs/super.c +++ b/fs/super.c @@ -37,7 +37,9 @@ #include <linux/idr.h> #include <linux/kobject.h> #include <linux/mutex.h> +#include <linux/file.h> #include <asm/uaccess.h> +#include "internal.h" LIST_HEAD(super_blocks); @@ -567,10 +569,29 @@ static void mark_files_ro(struct super_block *sb) { struct file *f; +retry: file_list_lock(); list_for_each_entry(f, &sb->s_files, f_u.fu_list) { - if (S_ISREG(f->f_path.dentry->d_inode->i_mode) && file_count(f)) - f->f_mode &= ~FMODE_WRITE; + struct vfsmount *mnt; + if (!S_ISREG(f->f_path.dentry->d_inode->i_mode)) + continue; + if (!file_count(f)) + continue; + if (!(f->f_mode & FMODE_WRITE)) + continue; + f->f_mode &= ~FMODE_WRITE; + if (file_check_writeable(f) != 0) + continue; + file_release_write(f); + mnt = mntget(f->f_path.mnt); + file_list_unlock(); + /* + * This can sleep, so we can't hold + * the file_list_lock() spinlock. + */ + mnt_drop_write(mnt); + mntput(mnt); + goto retry; } file_list_unlock(); } diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 4948d9bc405d..a1c3a1fab7f0 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -20,6 +20,7 @@ #include <linux/idr.h> #include <linux/completion.h> #include <linux/mutex.h> +#include <linux/slab.h> #include "sysfs.h" DEFINE_MUTEX(sysfs_mutex); diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index baa663e69388..ade9a7e6a757 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -13,6 +13,7 @@ #include <linux/module.h> #include <linux/kobject.h> #include <linux/kallsyms.h> +#include <linux/slab.h> #include <linux/namei.h> #include <linux/poll.h> #include <linux/list.h> @@ -128,7 +129,7 @@ sysfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *ppos) ssize_t retval = 0; mutex_lock(&buffer->mutex); - if (buffer->needs_read_fill) { + if (buffer->needs_read_fill || *ppos == 0) { retval = fill_read_buffer(file->f_path.dentry,buffer); if (retval) goto out; @@ -409,8 +410,7 @@ static int sysfs_release(struct inode *inode, struct file *filp) * return POLLERR|POLLPRI, and select will return the fd whether * it is waiting for read, write, or exceptions. * Once poll/select indicates that the value has changed, you - * need to close and re-open the file, as simply seeking and reading - * again will not get new data, or reset the state of 'poll'. + * need to close and re-open the file, or seek to 0 and read again. * Reminder: this only works for attributes which actively support * it, and it is not possible to test an attribute from userspace * to see if it supports poll (Neither 'poll' nor 'select' return diff --git a/fs/udf/Makefile b/fs/udf/Makefile index be845e7540ef..0d4503f7446d 100644 --- a/fs/udf/Makefile +++ b/fs/udf/Makefile @@ -6,4 +6,4 @@ obj-$(CONFIG_UDF_FS) += udf.o udf-objs := balloc.o dir.o file.o ialloc.o inode.o lowlevel.o namei.o \ partition.o super.o truncate.o symlink.o fsync.o \ - crc.o directory.o misc.o udftime.o unicode.o + directory.o misc.o udftime.o unicode.o diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c index f855dcbbdfb8..1b809bd494bd 100644 --- a/fs/udf/balloc.c +++ b/fs/udf/balloc.c @@ -149,8 +149,7 @@ static bool udf_add_free_space(struct udf_sb_info *sbi, return false; lvid = (struct logicalVolIntegrityDesc *)sbi->s_lvid_bh->b_data; - lvid->freeSpaceTable[partition] = cpu_to_le32(le32_to_cpu( - lvid->freeSpaceTable[partition]) + cnt); + le32_add_cpu(&lvid->freeSpaceTable[partition], cnt); return true; } @@ -589,10 +588,8 @@ static void udf_table_free_blocks(struct super_block *sb, sptr = oepos.bh->b_data + epos.offset; aed = (struct allocExtDesc *) oepos.bh->b_data; - aed->lengthAllocDescs = - cpu_to_le32(le32_to_cpu( - aed->lengthAllocDescs) + - adsize); + le32_add_cpu(&aed->lengthAllocDescs, + adsize); } else { sptr = iinfo->i_ext.i_data + epos.offset; @@ -645,9 +642,7 @@ static void udf_table_free_blocks(struct super_block *sb, mark_inode_dirty(table); } else { aed = (struct allocExtDesc *)epos.bh->b_data; - aed->lengthAllocDescs = - cpu_to_le32(le32_to_cpu( - aed->lengthAllocDescs) + adsize); + le32_add_cpu(&aed->lengthAllocDescs, adsize); udf_update_tag(epos.bh->b_data, epos.offset); mark_buffer_dirty(epos.bh); } diff --git a/fs/udf/crc.c b/fs/udf/crc.c deleted file mode 100644 index b1661296e786..000000000000 --- a/fs/udf/crc.c +++ /dev/null @@ -1,172 +0,0 @@ -/* - * crc.c - * - * PURPOSE - * Routines to generate, calculate, and test a 16-bit CRC. - * - * DESCRIPTION - * The CRC code was devised by Don P. Mitchell of AT&T Bell Laboratories - * and Ned W. Rhodes of Software Systems Group. It has been published in - * "Design and Validation of Computer Protocols", Prentice Hall, - * Englewood Cliffs, NJ, 1991, Chapter 3, ISBN 0-13-539925-4. - * - * Copyright is held by AT&T. - * - * AT&T gives permission for the free use of the CRC source code. - * - * COPYRIGHT - * This file is distributed under the terms of the GNU General Public - * License (GPL). Copies of the GPL can be obtained from: - * ftp://prep.ai.mit.edu/pub/gnu/GPL - * Each contributing author retains all rights to their own work. - */ - -#include "udfdecl.h" - -static uint16_t crc_table[256] = { - 0x0000U, 0x1021U, 0x2042U, 0x3063U, 0x4084U, 0x50a5U, 0x60c6U, 0x70e7U, - 0x8108U, 0x9129U, 0xa14aU, 0xb16bU, 0xc18cU, 0xd1adU, 0xe1ceU, 0xf1efU, - 0x1231U, 0x0210U, 0x3273U, 0x2252U, 0x52b5U, 0x4294U, 0x72f7U, 0x62d6U, - 0x9339U, 0x8318U, 0xb37bU, 0xa35aU, 0xd3bdU, 0xc39cU, 0xf3ffU, 0xe3deU, - 0x2462U, 0x3443U, 0x0420U, 0x1401U, 0x64e6U, 0x74c7U, 0x44a4U, 0x5485U, - 0xa56aU, 0xb54bU, 0x8528U, 0x9509U, 0xe5eeU, 0xf5cfU, 0xc5acU, 0xd58dU, - 0x3653U, 0x2672U, 0x1611U, 0x0630U, 0x76d7U, 0x66f6U, 0x5695U, 0x46b4U, - 0xb75bU, 0xa77aU, 0x9719U, 0x8738U, 0xf7dfU, 0xe7feU, 0xd79dU, 0xc7bcU, - 0x48c4U, 0x58e5U, 0x6886U, 0x78a7U, 0x0840U, 0x1861U, 0x2802U, 0x3823U, - 0xc9ccU, 0xd9edU, 0xe98eU, 0xf9afU, 0x8948U, 0x9969U, 0xa90aU, 0xb92bU, - 0x5af5U, 0x4ad4U, 0x7ab7U, 0x6a96U, 0x1a71U, 0x0a50U, 0x3a33U, 0x2a12U, - 0xdbfdU, 0xcbdcU, 0xfbbfU, 0xeb9eU, 0x9b79U, 0x8b58U, 0xbb3bU, 0xab1aU, - 0x6ca6U, 0x7c87U, 0x4ce4U, 0x5cc5U, 0x2c22U, 0x3c03U, 0x0c60U, 0x1c41U, - 0xedaeU, 0xfd8fU, 0xcdecU, 0xddcdU, 0xad2aU, 0xbd0bU, 0x8d68U, 0x9d49U, - 0x7e97U, 0x6eb6U, 0x5ed5U, 0x4ef4U, 0x3e13U, 0x2e32U, 0x1e51U, 0x0e70U, - 0xff9fU, 0xefbeU, 0xdfddU, 0xcffcU, 0xbf1bU, 0xaf3aU, 0x9f59U, 0x8f78U, - 0x9188U, 0x81a9U, 0xb1caU, 0xa1ebU, 0xd10cU, 0xc12dU, 0xf14eU, 0xe16fU, - 0x1080U, 0x00a1U, 0x30c2U, 0x20e3U, 0x5004U, 0x4025U, 0x7046U, 0x6067U, - 0x83b9U, 0x9398U, 0xa3fbU, 0xb3daU, 0xc33dU, 0xd31cU, 0xe37fU, 0xf35eU, - 0x02b1U, 0x1290U, 0x22f3U, 0x32d2U, 0x4235U, 0x5214U, 0x6277U, 0x7256U, - 0xb5eaU, 0xa5cbU, 0x95a8U, 0x8589U, 0xf56eU, 0xe54fU, 0xd52cU, 0xc50dU, - 0x34e2U, 0x24c3U, 0x14a0U, 0x0481U, 0x7466U, 0x6447U, 0x5424U, 0x4405U, - 0xa7dbU, 0xb7faU, 0x8799U, 0x97b8U, 0xe75fU, 0xf77eU, 0xc71dU, 0xd73cU, - 0x26d3U, 0x36f2U, 0x0691U, 0x16b0U, 0x6657U, 0x7676U, 0x4615U, 0x5634U, - 0xd94cU, 0xc96dU, 0xf90eU, 0xe92fU, 0x99c8U, 0x89e9U, 0xb98aU, 0xa9abU, - 0x5844U, 0x4865U, 0x7806U, 0x6827U, 0x18c0U, 0x08e1U, 0x3882U, 0x28a3U, - 0xcb7dU, 0xdb5cU, 0xeb3fU, 0xfb1eU, 0x8bf9U, 0x9bd8U, 0xabbbU, 0xbb9aU, - 0x4a75U, 0x5a54U, 0x6a37U, 0x7a16U, 0x0af1U, 0x1ad0U, 0x2ab3U, 0x3a92U, - 0xfd2eU, 0xed0fU, 0xdd6cU, 0xcd4dU, 0xbdaaU, 0xad8bU, 0x9de8U, 0x8dc9U, - 0x7c26U, 0x6c07U, 0x5c64U, 0x4c45U, 0x3ca2U, 0x2c83U, 0x1ce0U, 0x0cc1U, - 0xef1fU, 0xff3eU, 0xcf5dU, 0xdf7cU, 0xaf9bU, 0xbfbaU, 0x8fd9U, 0x9ff8U, - 0x6e17U, 0x7e36U, 0x4e55U, 0x5e74U, 0x2e93U, 0x3eb2U, 0x0ed1U, 0x1ef0U -}; - -/* - * udf_crc - * - * PURPOSE - * Calculate a 16-bit CRC checksum using ITU-T V.41 polynomial. - * - * DESCRIPTION - * The OSTA-UDF(tm) 1.50 standard states that using CRCs is mandatory. - * The polynomial used is: x^16 + x^12 + x^15 + 1 - * - * PRE-CONDITIONS - * data Pointer to the data block. - * size Size of the data block. - * - * POST-CONDITIONS - * <return> CRC of the data block. - * - * HISTORY - * July 21, 1997 - Andrew E. Mileski - * Adapted from OSTA-UDF(tm) 1.50 standard. - */ -uint16_t udf_crc(uint8_t *data, uint32_t size, uint16_t crc) -{ - while (size--) - crc = crc_table[(crc >> 8 ^ *(data++)) & 0xffU] ^ (crc << 8); - - return crc; -} - -/****************************************************************************/ -#if defined(TEST) - -/* - * PURPOSE - * Test udf_crc() - * - * HISTORY - * July 21, 1997 - Andrew E. Mileski - * Adapted from OSTA-UDF(tm) 1.50 standard. - */ - -unsigned char bytes[] = { 0x70U, 0x6AU, 0x77U }; - -int main(void) -{ - unsigned short x; - - x = udf_crc(bytes, sizeof bytes); - printf("udf_crc: calculated = %4.4x, correct = %4.4x\n", x, 0x3299U); - - return 0; -} - -#endif /* defined(TEST) */ - -/****************************************************************************/ -#if defined(GENERATE) - -/* - * PURPOSE - * Generate a table for fast 16-bit CRC calculations (any polynomial). - * - * DESCRIPTION - * The ITU-T V.41 polynomial is 010041. - * - * HISTORY - * July 21, 1997 - Andrew E. Mileski - * Adapted from OSTA-UDF(tm) 1.50 standard. - */ - -#include <stdio.h> - -int main(int argc, char **argv) -{ - unsigned long crc, poly; - int n, i; - - /* Get the polynomial */ - sscanf(argv[1], "%lo", &poly); - if (poly & 0xffff0000U) { - fprintf(stderr, "polynomial is too large\en"); - exit(1); - } - - printf("/* CRC 0%o */\n", poly); - - /* Create a table */ - printf("static unsigned short crc_table[256] = {\n"); - for (n = 0; n < 256; n++) { - if (n % 8 == 0) - printf("\t"); - crc = n << 8; - for (i = 0; i < 8; i++) { - if (crc & 0x8000U) - crc = (crc << 1) ^ poly; - else - crc <<= 1; - crc &= 0xFFFFU; - } - if (n == 255) - printf("0x%04xU ", crc); - else - printf("0x%04xU, ", crc); - if (n % 8 == 7) - printf("\n"); - } - printf("};\n"); - - return 0; -} - -#endif /* defined(GENERATE) */ diff --git a/fs/udf/dir.c b/fs/udf/dir.c index 8d8643ada199..62dc270c69d1 100644 --- a/fs/udf/dir.c +++ b/fs/udf/dir.c @@ -39,13 +39,13 @@ static int do_udf_readdir(struct inode *dir, struct file *filp, filldir_t filldir, void *dirent) { - struct udf_fileident_bh fibh; + struct udf_fileident_bh fibh = { .sbh = NULL, .ebh = NULL}; struct fileIdentDesc *fi = NULL; struct fileIdentDesc cfi; int block, iblock; loff_t nf_pos = (filp->f_pos - 1) << 2; int flen; - char fname[UDF_NAME_LEN]; + char *fname = NULL; char *nameptr; uint16_t liu; uint8_t lfi; @@ -54,23 +54,32 @@ static int do_udf_readdir(struct inode *dir, struct file *filp, kernel_lb_addr eloc; uint32_t elen; sector_t offset; - int i, num; + int i, num, ret = 0; unsigned int dt_type; struct extent_position epos = { NULL, 0, {0, 0} }; struct udf_inode_info *iinfo; if (nf_pos >= size) - return 0; + goto out; + + fname = kmalloc(UDF_NAME_LEN, GFP_NOFS); + if (!fname) { + ret = -ENOMEM; + goto out; + } if (nf_pos == 0) nf_pos = udf_ext0_offset(dir); fibh.soffset = fibh.eoffset = nf_pos & (dir->i_sb->s_blocksize - 1); iinfo = UDF_I(dir); - if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) { - fibh.sbh = fibh.ebh = NULL; - } else if (inode_bmap(dir, nf_pos >> dir->i_sb->s_blocksize_bits, - &epos, &eloc, &elen, &offset) == (EXT_RECORDED_ALLOCATED >> 30)) { + if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) { + if (inode_bmap(dir, nf_pos >> dir->i_sb->s_blocksize_bits, + &epos, &eloc, &elen, &offset) + != (EXT_RECORDED_ALLOCATED >> 30)) { + ret = -ENOENT; + goto out; + } block = udf_get_lb_pblock(dir->i_sb, eloc, offset); if ((++offset << dir->i_sb->s_blocksize_bits) < elen) { if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) @@ -83,8 +92,8 @@ static int do_udf_readdir(struct inode *dir, struct file *filp, } if (!(fibh.sbh = fibh.ebh = udf_tread(dir->i_sb, block))) { - brelse(epos.bh); - return -EIO; + ret = -EIO; + goto out; } if (!(offset & ((16 >> (dir->i_sb->s_blocksize_bits - 9)) - 1))) { @@ -105,9 +114,6 @@ static int do_udf_readdir(struct inode *dir, struct file *filp, brelse(bha[i]); } } - } else { - brelse(epos.bh); - return -ENOENT; } while (nf_pos < size) { @@ -115,13 +121,8 @@ static int do_udf_readdir(struct inode *dir, struct file *filp, fi = udf_fileident_read(dir, &nf_pos, &fibh, &cfi, &epos, &eloc, &elen, &offset); - if (!fi) { - if (fibh.sbh != fibh.ebh) - brelse(fibh.ebh); - brelse(fibh.sbh); - brelse(epos.bh); - return 0; - } + if (!fi) + goto out; liu = le16_to_cpu(cfi.lengthOfImpUse); lfi = cfi.lengthFileIdent; @@ -167,53 +168,23 @@ static int do_udf_readdir(struct inode *dir, struct file *filp, dt_type = DT_UNKNOWN; } - if (flen) { - if (filldir(dirent, fname, flen, filp->f_pos, iblock, dt_type) < 0) { - if (fibh.sbh != fibh.ebh) - brelse(fibh.ebh); - brelse(fibh.sbh); - brelse(epos.bh); - return 0; - } - } + if (flen && filldir(dirent, fname, flen, filp->f_pos, + iblock, dt_type) < 0) + goto out; } /* end while */ filp->f_pos = (nf_pos >> 2) + 1; +out: if (fibh.sbh != fibh.ebh) brelse(fibh.ebh); brelse(fibh.sbh); brelse(epos.bh); + kfree(fname); - return 0; + return ret; } -/* - * udf_readdir - * - * PURPOSE - * Read a directory entry. - * - * DESCRIPTION - * Optional - sys_getdents() will return -ENOTDIR if this routine is not - * available. - * - * Refer to sys_getdents() in fs/readdir.c - * sys_getdents() -> . - * - * PRE-CONDITIONS - * filp Pointer to directory file. - * buf Pointer to directory entry buffer. - * filldir Pointer to filldir function. - * - * POST-CONDITIONS - * <return> >=0 on success. - * - * HISTORY - * July 1, 1997 - Andrew E. Mileski - * Written, tested, and released. - */ - static int udf_readdir(struct file *filp, void *dirent, filldir_t filldir) { struct inode *dir = filp->f_path.dentry->d_inode; diff --git a/fs/udf/ecma_167.h b/fs/udf/ecma_167.h index 56387711589b..a0974df82b31 100644 --- a/fs/udf/ecma_167.h +++ b/fs/udf/ecma_167.h @@ -70,19 +70,6 @@ typedef struct { uint8_t microseconds; } __attribute__ ((packed)) timestamp; -typedef struct { - uint16_t typeAndTimezone; - int16_t year; - uint8_t month; - uint8_t day; - uint8_t hour; - uint8_t minute; - uint8_t second; - uint8_t centiseconds; - uint8_t hundredsOfMicroseconds; - uint8_t microseconds; -} __attribute__ ((packed)) kernel_timestamp; - /* Type and Time Zone (ECMA 167r3 1/7.3.1) */ #define TIMESTAMP_TYPE_MASK 0xF000 #define TIMESTAMP_TYPE_CUT 0x0000 diff --git a/fs/udf/file.c b/fs/udf/file.c index 97c71ae7c689..0ed6e146a0d9 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -27,7 +27,6 @@ #include "udfdecl.h" #include <linux/fs.h> -#include <linux/udf_fs.h> #include <asm/uaccess.h> #include <linux/kernel.h> #include <linux/string.h> /* memset */ @@ -144,40 +143,6 @@ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov, return retval; } -/* - * udf_ioctl - * - * PURPOSE - * Issue an ioctl. - * - * DESCRIPTION - * Optional - sys_ioctl() will return -ENOTTY if this routine is not - * available, and the ioctl cannot be handled without filesystem help. - * - * sys_ioctl() handles these ioctls that apply only to regular files: - * FIBMAP [requires udf_block_map()], FIGETBSZ, FIONREAD - * These ioctls are also handled by sys_ioctl(): - * FIOCLEX, FIONCLEX, FIONBIO, FIOASYNC - * All other ioctls are passed to the filesystem. - * - * Refer to sys_ioctl() in fs/ioctl.c - * sys_ioctl() -> . - * - * PRE-CONDITIONS - * inode Pointer to inode that ioctl was issued on. - * filp Pointer to file that ioctl was issued on. - * cmd The ioctl command. - * arg The ioctl argument [can be interpreted as a - * user-space pointer if desired]. - * - * POST-CONDITIONS - * <return> Success (>=0) or an error code (<=0) that - * sys_ioctl() will return. - * - * HISTORY - * July 1, 1997 - Andrew E. Mileski - * Written, tested, and released. - */ int udf_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg) { @@ -225,18 +190,6 @@ int udf_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, return result; } -/* - * udf_release_file - * - * PURPOSE - * Called when all references to the file are closed - * - * DESCRIPTION - * Discard prealloced blocks - * - * HISTORY - * - */ static int udf_release_file(struct inode *inode, struct file *filp) { if (filp->f_mode & FMODE_WRITE) { diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c index 84360315aca2..eb9cfa23dc3d 100644 --- a/fs/udf/ialloc.c +++ b/fs/udf/ialloc.c @@ -21,7 +21,6 @@ #include "udfdecl.h" #include <linux/fs.h> #include <linux/quotaops.h> -#include <linux/udf_fs.h> #include <linux/sched.h> #include <linux/slab.h> @@ -47,11 +46,9 @@ void udf_free_inode(struct inode *inode) struct logicalVolIntegrityDescImpUse *lvidiu = udf_sb_lvidiu(sbi); if (S_ISDIR(inode->i_mode)) - lvidiu->numDirs = - cpu_to_le32(le32_to_cpu(lvidiu->numDirs) - 1); + le32_add_cpu(&lvidiu->numDirs, -1); else - lvidiu->numFiles = - cpu_to_le32(le32_to_cpu(lvidiu->numFiles) - 1); + le32_add_cpu(&lvidiu->numFiles, -1); mark_buffer_dirty(sbi->s_lvid_bh); } @@ -105,11 +102,9 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err) lvhd = (struct logicalVolHeaderDesc *) (lvid->logicalVolContentsUse); if (S_ISDIR(mode)) - lvidiu->numDirs = - cpu_to_le32(le32_to_cpu(lvidiu->numDirs) + 1); + le32_add_cpu(&lvidiu->numDirs, 1); else - lvidiu->numFiles = - cpu_to_le32(le32_to_cpu(lvidiu->numFiles) + 1); + le32_add_cpu(&lvidiu->numFiles, 1); iinfo->i_unique = uniqueID = le64_to_cpu(lvhd->uniqueID); if (!(++uniqueID & 0x00000000FFFFFFFFUL)) uniqueID += 16; diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 24cfa55d0fdc..6e74b117aaf0 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -37,6 +37,7 @@ #include <linux/buffer_head.h> #include <linux/writeback.h> #include <linux/slab.h> +#include <linux/crc-itu-t.h> #include "udf_i.h" #include "udf_sb.h" @@ -66,22 +67,7 @@ static void udf_update_extents(struct inode *, struct extent_position *); static int udf_get_block(struct inode *, sector_t, struct buffer_head *, int); -/* - * udf_delete_inode - * - * PURPOSE - * Clean-up before the specified inode is destroyed. - * - * DESCRIPTION - * This routine is called when the kernel destroys an inode structure - * ie. when iput() finds i_count == 0. - * - * HISTORY - * July 1, 1997 - Andrew E. Mileski - * Written, tested, and released. - * - * Called at the last iput() if i_nlink is zero. - */ + void udf_delete_inode(struct inode *inode) { truncate_inode_pages(&inode->i_data, 0); @@ -323,9 +309,6 @@ static int udf_get_block(struct inode *inode, sector_t block, lock_kernel(); - if (block < 0) - goto abort_negative; - iinfo = UDF_I(inode); if (block == iinfo->i_next_alloc_block + 1) { iinfo->i_next_alloc_block++; @@ -347,10 +330,6 @@ static int udf_get_block(struct inode *inode, sector_t block, abort: unlock_kernel(); return err; - -abort_negative: - udf_warning(inode->i_sb, "udf_get_block", "block < 0"); - goto abort; } static struct buffer_head *udf_getblk(struct inode *inode, long block, @@ -1116,42 +1095,36 @@ static void __udf_read_inode(struct inode *inode) fe = (struct fileEntry *)bh->b_data; if (fe->icbTag.strategyType == cpu_to_le16(4096)) { - struct buffer_head *ibh = NULL, *nbh = NULL; - struct indirectEntry *ie; + struct buffer_head *ibh; ibh = udf_read_ptagged(inode->i_sb, iinfo->i_location, 1, &ident); - if (ident == TAG_IDENT_IE) { - if (ibh) { - kernel_lb_addr loc; - ie = (struct indirectEntry *)ibh->b_data; - - loc = lelb_to_cpu(ie->indirectICB.extLocation); - - if (ie->indirectICB.extLength && - (nbh = udf_read_ptagged(inode->i_sb, loc, 0, - &ident))) { - if (ident == TAG_IDENT_FE || - ident == TAG_IDENT_EFE) { - memcpy(&iinfo->i_location, - &loc, - sizeof(kernel_lb_addr)); - brelse(bh); - brelse(ibh); - brelse(nbh); - __udf_read_inode(inode); - return; - } else { - brelse(nbh); - brelse(ibh); - } - } else { + if (ident == TAG_IDENT_IE && ibh) { + struct buffer_head *nbh = NULL; + kernel_lb_addr loc; + struct indirectEntry *ie; + + ie = (struct indirectEntry *)ibh->b_data; + loc = lelb_to_cpu(ie->indirectICB.extLocation); + + if (ie->indirectICB.extLength && + (nbh = udf_read_ptagged(inode->i_sb, loc, 0, + &ident))) { + if (ident == TAG_IDENT_FE || + ident == TAG_IDENT_EFE) { + memcpy(&iinfo->i_location, + &loc, + sizeof(kernel_lb_addr)); + brelse(bh); brelse(ibh); + brelse(nbh); + __udf_read_inode(inode); + return; } + brelse(nbh); } - } else { - brelse(ibh); } + brelse(ibh); } else if (fe->icbTag.strategyType != cpu_to_le16(4)) { printk(KERN_ERR "udf: unsupported strategy type: %d\n", le16_to_cpu(fe->icbTag.strategyType)); @@ -1168,8 +1141,6 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) { struct fileEntry *fe; struct extendedFileEntry *efe; - time_t convtime; - long convtime_usec; int offset; struct udf_sb_info *sbi = UDF_SB(inode->i_sb); struct udf_inode_info *iinfo = UDF_I(inode); @@ -1257,29 +1228,15 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) inode->i_blocks = le64_to_cpu(fe->logicalBlocksRecorded) << (inode->i_sb->s_blocksize_bits - 9); - if (udf_stamp_to_time(&convtime, &convtime_usec, - lets_to_cpu(fe->accessTime))) { - inode->i_atime.tv_sec = convtime; - inode->i_atime.tv_nsec = convtime_usec * 1000; - } else { + if (!udf_disk_stamp_to_time(&inode->i_atime, fe->accessTime)) inode->i_atime = sbi->s_record_time; - } - if (udf_stamp_to_time(&convtime, &convtime_usec, - lets_to_cpu(fe->modificationTime))) { - inode->i_mtime.tv_sec = convtime; - inode->i_mtime.tv_nsec = convtime_usec * 1000; - } else { + if (!udf_disk_stamp_to_time(&inode->i_mtime, + fe->modificationTime)) inode->i_mtime = sbi->s_record_time; - } - if (udf_stamp_to_time(&convtime, &convtime_usec, - lets_to_cpu(fe->attrTime))) { - inode->i_ctime.tv_sec = convtime; - inode->i_ctime.tv_nsec = convtime_usec * 1000; - } else { + if (!udf_disk_stamp_to_time(&inode->i_ctime, fe->attrTime)) inode->i_ctime = sbi->s_record_time; - } iinfo->i_unique = le64_to_cpu(fe->uniqueID); iinfo->i_lenEAttr = le32_to_cpu(fe->lengthExtendedAttr); @@ -1289,37 +1246,18 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) inode->i_blocks = le64_to_cpu(efe->logicalBlocksRecorded) << (inode->i_sb->s_blocksize_bits - 9); - if (udf_stamp_to_time(&convtime, &convtime_usec, - lets_to_cpu(efe->accessTime))) { - inode->i_atime.tv_sec = convtime; - inode->i_atime.tv_nsec = convtime_usec * 1000; - } else { + if (!udf_disk_stamp_to_time(&inode->i_atime, efe->accessTime)) inode->i_atime = sbi->s_record_time; - } - if (udf_stamp_to_time(&convtime, &convtime_usec, - lets_to_cpu(efe->modificationTime))) { - inode->i_mtime.tv_sec = convtime; - inode->i_mtime.tv_nsec = convtime_usec * 1000; - } else { + if (!udf_disk_stamp_to_time(&inode->i_mtime, + efe->modificationTime)) inode->i_mtime = sbi->s_record_time; - } - if (udf_stamp_to_time(&convtime, &convtime_usec, - lets_to_cpu(efe->createTime))) { - iinfo->i_crtime.tv_sec = convtime; - iinfo->i_crtime.tv_nsec = convtime_usec * 1000; - } else { + if (!udf_disk_stamp_to_time(&iinfo->i_crtime, efe->createTime)) iinfo->i_crtime = sbi->s_record_time; - } - if (udf_stamp_to_time(&convtime, &convtime_usec, - lets_to_cpu(efe->attrTime))) { - inode->i_ctime.tv_sec = convtime; - inode->i_ctime.tv_nsec = convtime_usec * 1000; - } else { + if (!udf_disk_stamp_to_time(&inode->i_ctime, efe->attrTime)) inode->i_ctime = sbi->s_record_time; - } iinfo->i_unique = le64_to_cpu(efe->uniqueID); iinfo->i_lenEAttr = le32_to_cpu(efe->lengthExtendedAttr); @@ -1338,6 +1276,7 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) case ICBTAG_FILE_TYPE_REALTIME: case ICBTAG_FILE_TYPE_REGULAR: case ICBTAG_FILE_TYPE_UNDEF: + case ICBTAG_FILE_TYPE_VAT20: if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) inode->i_data.a_ops = &udf_adinicb_aops; else @@ -1363,6 +1302,15 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) inode->i_op = &page_symlink_inode_operations; inode->i_mode = S_IFLNK | S_IRWXUGO; break; + case ICBTAG_FILE_TYPE_MAIN: + udf_debug("METADATA FILE-----\n"); + break; + case ICBTAG_FILE_TYPE_MIRROR: + udf_debug("METADATA MIRROR FILE-----\n"); + break; + case ICBTAG_FILE_TYPE_BITMAP: + udf_debug("METADATA BITMAP FILE-----\n"); + break; default: printk(KERN_ERR "udf: udf_fill_inode(ino %ld) failed unknown " "file type=%d\n", inode->i_ino, @@ -1416,21 +1364,6 @@ static mode_t udf_convert_permissions(struct fileEntry *fe) return mode; } -/* - * udf_write_inode - * - * PURPOSE - * Write out the specified inode. - * - * DESCRIPTION - * This routine is called whenever an inode is synced. - * Currently this routine is just a placeholder. - * - * HISTORY - * July 1, 1997 - Andrew E. Mileski - * Written, tested, and released. - */ - int udf_write_inode(struct inode *inode, int sync) { int ret; @@ -1455,7 +1388,6 @@ static int udf_update_inode(struct inode *inode, int do_sync) uint32_t udfperms; uint16_t icbflags; uint16_t crclen; - kernel_timestamp cpu_time; int err = 0; struct udf_sb_info *sbi = UDF_SB(inode->i_sb); unsigned char blocksize_bits = inode->i_sb->s_blocksize_bits; @@ -1488,9 +1420,9 @@ static int udf_update_inode(struct inode *inode, int do_sync) iinfo->i_location. logicalBlockNum); use->descTag.descCRCLength = cpu_to_le16(crclen); - use->descTag.descCRC = cpu_to_le16(udf_crc((char *)use + - sizeof(tag), crclen, - 0)); + use->descTag.descCRC = cpu_to_le16(crc_itu_t(0, (char *)use + + sizeof(tag), + crclen)); use->descTag.tagChecksum = udf_tag_checksum(&use->descTag); mark_buffer_dirty(bh); @@ -1558,12 +1490,9 @@ static int udf_update_inode(struct inode *inode, int do_sync) (inode->i_blocks + (1 << (blocksize_bits - 9)) - 1) >> (blocksize_bits - 9)); - if (udf_time_to_stamp(&cpu_time, inode->i_atime)) - fe->accessTime = cpu_to_lets(cpu_time); - if (udf_time_to_stamp(&cpu_time, inode->i_mtime)) - fe->modificationTime = cpu_to_lets(cpu_time); - if (udf_time_to_stamp(&cpu_time, inode->i_ctime)) - fe->attrTime = cpu_to_lets(cpu_time); + udf_time_to_disk_stamp(&fe->accessTime, inode->i_atime); + udf_time_to_disk_stamp(&fe->modificationTime, inode->i_mtime); + udf_time_to_disk_stamp(&fe->attrTime, inode->i_ctime); memset(&(fe->impIdent), 0, sizeof(regid)); strcpy(fe->impIdent.ident, UDF_ID_DEVELOPER); fe->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX; @@ -1598,14 +1527,10 @@ static int udf_update_inode(struct inode *inode, int do_sync) iinfo->i_crtime.tv_nsec > inode->i_ctime.tv_nsec)) iinfo->i_crtime = inode->i_ctime; - if (udf_time_to_stamp(&cpu_time, inode->i_atime)) - efe->accessTime = cpu_to_lets(cpu_time); - if (udf_time_to_stamp(&cpu_time, inode->i_mtime)) - efe->modificationTime = cpu_to_lets(cpu_time); - if (udf_time_to_stamp(&cpu_time, iinfo->i_crtime)) - efe->createTime = cpu_to_lets(cpu_time); - if (udf_time_to_stamp(&cpu_time, inode->i_ctime)) - efe->attrTime = cpu_to_lets(cpu_time); + udf_time_to_disk_stamp(&efe->accessTime, inode->i_atime); + udf_time_to_disk_stamp(&efe->modificationTime, inode->i_mtime); + udf_time_to_disk_stamp(&efe->createTime, iinfo->i_crtime); + udf_time_to_disk_stamp(&efe->attrTime, inode->i_ctime); memset(&(efe->impIdent), 0, sizeof(regid)); strcpy(efe->impIdent.ident, UDF_ID_DEVELOPER); @@ -1660,8 +1585,8 @@ static int udf_update_inode(struct inode *inode, int do_sync) crclen += iinfo->i_lenEAttr + iinfo->i_lenAlloc - sizeof(tag); fe->descTag.descCRCLength = cpu_to_le16(crclen); - fe->descTag.descCRC = cpu_to_le16(udf_crc((char *)fe + sizeof(tag), - crclen, 0)); + fe->descTag.descCRC = cpu_to_le16(crc_itu_t(0, (char *)fe + sizeof(tag), + crclen)); fe->descTag.tagChecksum = udf_tag_checksum(&fe->descTag); /* write the data blocks */ @@ -1778,9 +1703,7 @@ int8_t udf_add_aext(struct inode *inode, struct extent_position *epos, if (epos->bh) { aed = (struct allocExtDesc *)epos->bh->b_data; - aed->lengthAllocDescs = - cpu_to_le32(le32_to_cpu( - aed->lengthAllocDescs) + adsize); + le32_add_cpu(&aed->lengthAllocDescs, adsize); } else { iinfo->i_lenAlloc += adsize; mark_inode_dirty(inode); @@ -1830,9 +1753,7 @@ int8_t udf_add_aext(struct inode *inode, struct extent_position *epos, mark_inode_dirty(inode); } else { aed = (struct allocExtDesc *)epos->bh->b_data; - aed->lengthAllocDescs = - cpu_to_le32(le32_to_cpu(aed->lengthAllocDescs) + - adsize); + le32_add_cpu(&aed->lengthAllocDescs, adsize); if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) || UDF_SB(inode->i_sb)->s_udfrev >= 0x0201) udf_update_tag(epos->bh->b_data, @@ -2046,9 +1967,7 @@ int8_t udf_delete_aext(struct inode *inode, struct extent_position epos, mark_inode_dirty(inode); } else { aed = (struct allocExtDesc *)oepos.bh->b_data; - aed->lengthAllocDescs = - cpu_to_le32(le32_to_cpu(aed->lengthAllocDescs) - - (2 * adsize)); + le32_add_cpu(&aed->lengthAllocDescs, -(2 * adsize)); if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) || UDF_SB(inode->i_sb)->s_udfrev >= 0x0201) udf_update_tag(oepos.bh->b_data, @@ -2065,9 +1984,7 @@ int8_t udf_delete_aext(struct inode *inode, struct extent_position epos, mark_inode_dirty(inode); } else { aed = (struct allocExtDesc *)oepos.bh->b_data; - aed->lengthAllocDescs = - cpu_to_le32(le32_to_cpu(aed->lengthAllocDescs) - - adsize); + le32_add_cpu(&aed->lengthAllocDescs, -adsize); if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) || UDF_SB(inode->i_sb)->s_udfrev >= 0x0201) udf_update_tag(oepos.bh->b_data, @@ -2095,11 +2012,6 @@ int8_t inode_bmap(struct inode *inode, sector_t block, int8_t etype; struct udf_inode_info *iinfo; - if (block < 0) { - printk(KERN_ERR "udf: inode_bmap: block < 0\n"); - return -1; - } - iinfo = UDF_I(inode); pos->offset = 0; pos->block = iinfo->i_location; diff --git a/fs/udf/lowlevel.c b/fs/udf/lowlevel.c index 579bae71e67e..703843f30ffd 100644 --- a/fs/udf/lowlevel.c +++ b/fs/udf/lowlevel.c @@ -23,7 +23,6 @@ #include <linux/cdrom.h> #include <asm/uaccess.h> -#include <linux/udf_fs.h> #include "udf_sb.h" unsigned int udf_get_last_session(struct super_block *sb) diff --git a/fs/udf/misc.c b/fs/udf/misc.c index a1d6da0caf71..84bf0fd4a4f1 100644 --- a/fs/udf/misc.c +++ b/fs/udf/misc.c @@ -23,8 +23,8 @@ #include <linux/fs.h> #include <linux/string.h> -#include <linux/udf_fs.h> #include <linux/buffer_head.h> +#include <linux/crc-itu-t.h> #include "udf_i.h" #include "udf_sb.h" @@ -136,8 +136,8 @@ struct genericFormat *udf_add_extendedattr(struct inode *inode, uint32_t size, /* rewrite CRC + checksum of eahd */ crclen = sizeof(struct extendedAttrHeaderDesc) - sizeof(tag); eahd->descTag.descCRCLength = cpu_to_le16(crclen); - eahd->descTag.descCRC = cpu_to_le16(udf_crc((char *)eahd + - sizeof(tag), crclen, 0)); + eahd->descTag.descCRC = cpu_to_le16(crc_itu_t(0, (char *)eahd + + sizeof(tag), crclen)); eahd->descTag.tagChecksum = udf_tag_checksum(&eahd->descTag); iinfo->i_lenEAttr += size; return (struct genericFormat *)&ea[offset]; @@ -204,16 +204,15 @@ struct buffer_head *udf_read_tagged(struct super_block *sb, uint32_t block, { tag *tag_p; struct buffer_head *bh = NULL; - struct udf_sb_info *sbi = UDF_SB(sb); /* Read the block */ if (block == 0xFFFFFFFF) return NULL; - bh = udf_tread(sb, block + sbi->s_session); + bh = udf_tread(sb, block); if (!bh) { udf_debug("block=%d, location=%d: read failed\n", - block + sbi->s_session, location); + block, location); return NULL; } @@ -223,8 +222,7 @@ struct buffer_head *udf_read_tagged(struct super_block *sb, uint32_t block, if (location != le32_to_cpu(tag_p->tagLocation)) { udf_debug("location mismatch block %u, tag %u != %u\n", - block + sbi->s_session, - le32_to_cpu(tag_p->tagLocation), location); + block, le32_to_cpu(tag_p->tagLocation), location); goto error_out; } @@ -244,13 +242,13 @@ struct buffer_head *udf_read_tagged(struct super_block *sb, uint32_t block, /* Verify the descriptor CRC */ if (le16_to_cpu(tag_p->descCRCLength) + sizeof(tag) > sb->s_blocksize || - le16_to_cpu(tag_p->descCRC) == udf_crc(bh->b_data + sizeof(tag), - le16_to_cpu(tag_p->descCRCLength), 0)) + le16_to_cpu(tag_p->descCRC) == crc_itu_t(0, + bh->b_data + sizeof(tag), + le16_to_cpu(tag_p->descCRCLength))) return bh; - udf_debug("Crc failure block %d: crc = %d, crclen = %d\n", - block + sbi->s_session, le16_to_cpu(tag_p->descCRC), - le16_to_cpu(tag_p->descCRCLength)); + udf_debug("Crc failure block %d: crc = %d, crclen = %d\n", block, + le16_to_cpu(tag_p->descCRC), le16_to_cpu(tag_p->descCRCLength)); error_out: brelse(bh); @@ -270,7 +268,7 @@ void udf_update_tag(char *data, int length) length -= sizeof(tag); tptr->descCRCLength = cpu_to_le16(length); - tptr->descCRC = cpu_to_le16(udf_crc(data + sizeof(tag), length, 0)); + tptr->descCRC = cpu_to_le16(crc_itu_t(0, data + sizeof(tag), length)); tptr->tagChecksum = udf_tag_checksum(tptr); } diff --git a/fs/udf/namei.c b/fs/udf/namei.c index 112a5fb0b27b..ba5537d4bc15 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -31,6 +31,7 @@ #include <linux/smp_lock.h> #include <linux/buffer_head.h> #include <linux/sched.h> +#include <linux/crc-itu-t.h> static inline int udf_match(int len1, const char *name1, int len2, const char *name2) @@ -97,25 +98,23 @@ int udf_write_fi(struct inode *inode, struct fileIdentDesc *cfi, memset(fibh->ebh->b_data, 0x00, padlen + offset); } - crc = udf_crc((uint8_t *)cfi + sizeof(tag), - sizeof(struct fileIdentDesc) - sizeof(tag), 0); + crc = crc_itu_t(0, (uint8_t *)cfi + sizeof(tag), + sizeof(struct fileIdentDesc) - sizeof(tag)); if (fibh->sbh == fibh->ebh) { - crc = udf_crc((uint8_t *)sfi->impUse, + crc = crc_itu_t(crc, (uint8_t *)sfi->impUse, crclen + sizeof(tag) - - sizeof(struct fileIdentDesc), crc); + sizeof(struct fileIdentDesc)); } else if (sizeof(struct fileIdentDesc) >= -fibh->soffset) { - crc = udf_crc(fibh->ebh->b_data + + crc = crc_itu_t(crc, fibh->ebh->b_data + sizeof(struct fileIdentDesc) + fibh->soffset, crclen + sizeof(tag) - - sizeof(struct fileIdentDesc), - crc); + sizeof(struct fileIdentDesc)); } else { - crc = udf_crc((uint8_t *)sfi->impUse, - -fibh->soffset - sizeof(struct fileIdentDesc), - crc); - crc = udf_crc(fibh->ebh->b_data, fibh->eoffset, crc); + crc = crc_itu_t(crc, (uint8_t *)sfi->impUse, + -fibh->soffset - sizeof(struct fileIdentDesc)); + crc = crc_itu_t(crc, fibh->ebh->b_data, fibh->eoffset); } cfi->descTag.descCRC = cpu_to_le16(crc); @@ -149,7 +148,7 @@ static struct fileIdentDesc *udf_find_entry(struct inode *dir, struct fileIdentDesc *fi = NULL; loff_t f_pos; int block, flen; - char fname[UDF_NAME_LEN]; + char *fname = NULL; char *nameptr; uint8_t lfi; uint16_t liu; @@ -163,12 +162,12 @@ static struct fileIdentDesc *udf_find_entry(struct inode *dir, size = udf_ext0_offset(dir) + dir->i_size; f_pos = udf_ext0_offset(dir); + fibh->sbh = fibh->ebh = NULL; fibh->soffset = fibh->eoffset = f_pos & (dir->i_sb->s_blocksize - 1); - if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) - fibh->sbh = fibh->ebh = NULL; - else if (inode_bmap(dir, f_pos >> dir->i_sb->s_blocksize_bits, - &epos, &eloc, &elen, &offset) == - (EXT_RECORDED_ALLOCATED >> 30)) { + if (dinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) { + if (inode_bmap(dir, f_pos >> dir->i_sb->s_blocksize_bits, &epos, + &eloc, &elen, &offset) != (EXT_RECORDED_ALLOCATED >> 30)) + goto out_err; block = udf_get_lb_pblock(dir->i_sb, eloc, offset); if ((++offset << dir->i_sb->s_blocksize_bits) < elen) { if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) @@ -179,25 +178,19 @@ static struct fileIdentDesc *udf_find_entry(struct inode *dir, offset = 0; fibh->sbh = fibh->ebh = udf_tread(dir->i_sb, block); - if (!fibh->sbh) { - brelse(epos.bh); - return NULL; - } - } else { - brelse(epos.bh); - return NULL; + if (!fibh->sbh) + goto out_err; } + fname = kmalloc(UDF_NAME_LEN, GFP_NOFS); + if (!fname) + goto out_err; + while (f_pos < size) { fi = udf_fileident_read(dir, &f_pos, fibh, cfi, &epos, &eloc, &elen, &offset); - if (!fi) { - if (fibh->sbh != fibh->ebh) - brelse(fibh->ebh); - brelse(fibh->sbh); - brelse(epos.bh); - return NULL; - } + if (!fi) + goto out_err; liu = le16_to_cpu(cfi->lengthOfImpUse); lfi = cfi->lengthFileIdent; @@ -237,53 +230,22 @@ static struct fileIdentDesc *udf_find_entry(struct inode *dir, flen = udf_get_filename(dir->i_sb, nameptr, fname, lfi); if (flen && udf_match(flen, fname, dentry->d_name.len, - dentry->d_name.name)) { - brelse(epos.bh); - return fi; - } + dentry->d_name.name)) + goto out_ok; } +out_err: + fi = NULL; if (fibh->sbh != fibh->ebh) brelse(fibh->ebh); brelse(fibh->sbh); +out_ok: brelse(epos.bh); + kfree(fname); - return NULL; + return fi; } -/* - * udf_lookup - * - * PURPOSE - * Look-up the inode for a given name. - * - * DESCRIPTION - * Required - lookup_dentry() will return -ENOTDIR if this routine is not - * available for a directory. The filesystem is useless if this routine is - * not available for at least the filesystem's root directory. - * - * This routine is passed an incomplete dentry - it must be completed by - * calling d_add(dentry, inode). If the name does not exist, then the - * specified inode must be set to null. An error should only be returned - * when the lookup fails for a reason other than the name not existing. - * Note that the directory inode semaphore is held during the call. - * - * Refer to lookup_dentry() in fs/namei.c - * lookup_dentry() -> lookup() -> real_lookup() -> . - * - * PRE-CONDITIONS - * dir Pointer to inode of parent directory. - * dentry Pointer to dentry to complete. - * nd Pointer to lookup nameidata - * - * POST-CONDITIONS - * <return> Zero on success. - * - * HISTORY - * July 1, 1997 - Andrew E. Mileski - * Written, tested, and released. - */ - static struct dentry *udf_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { @@ -336,11 +298,9 @@ static struct fileIdentDesc *udf_add_entry(struct inode *dir, { struct super_block *sb = dir->i_sb; struct fileIdentDesc *fi = NULL; - char name[UDF_NAME_LEN], fname[UDF_NAME_LEN]; + char *name = NULL; int namelen; loff_t f_pos; - int flen; - char *nameptr; loff_t size = udf_ext0_offset(dir) + dir->i_size; int nfidlen; uint8_t lfi; @@ -352,16 +312,23 @@ static struct fileIdentDesc *udf_add_entry(struct inode *dir, struct extent_position epos = {}; struct udf_inode_info *dinfo; + fibh->sbh = fibh->ebh = NULL; + name = kmalloc(UDF_NAME_LEN, GFP_NOFS); + if (!name) { + *err = -ENOMEM; + goto out_err; + } + if (dentry) { if (!dentry->d_name.len) { *err = -EINVAL; - return NULL; + goto out_err; } namelen = udf_put_filename(sb, dentry->d_name.name, name, dentry->d_name.len); if (!namelen) { *err = -ENAMETOOLONG; - return NULL; + goto out_err; } } else { namelen = 0; @@ -373,11 +340,14 @@ static struct fileIdentDesc *udf_add_entry(struct inode *dir, fibh->soffset = fibh->eoffset = f_pos & (dir->i_sb->s_blocksize - 1); dinfo = UDF_I(dir); - if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) - fibh->sbh = fibh->ebh = NULL; - else if (inode_bmap(dir, f_pos >> dir->i_sb->s_blocksize_bits, - &epos, &eloc, &elen, &offset) == - (EXT_RECORDED_ALLOCATED >> 30)) { + if (dinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) { + if (inode_bmap(dir, f_pos >> dir->i_sb->s_blocksize_bits, &epos, + &eloc, &elen, &offset) != (EXT_RECORDED_ALLOCATED >> 30)) { + block = udf_get_lb_pblock(dir->i_sb, + dinfo->i_location, 0); + fibh->soffset = fibh->eoffset = sb->s_blocksize; + goto add; + } block = udf_get_lb_pblock(dir->i_sb, eloc, offset); if ((++offset << dir->i_sb->s_blocksize_bits) < elen) { if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) @@ -389,17 +359,11 @@ static struct fileIdentDesc *udf_add_entry(struct inode *dir, fibh->sbh = fibh->ebh = udf_tread(dir->i_sb, block); if (!fibh->sbh) { - brelse(epos.bh); *err = -EIO; - return NULL; + goto out_err; } block = dinfo->i_location.logicalBlockNum; - } else { - block = udf_get_lb_pblock(dir->i_sb, dinfo->i_location, 0); - fibh->sbh = fibh->ebh = NULL; - fibh->soffset = fibh->eoffset = sb->s_blocksize; - goto add; } while (f_pos < size) { @@ -407,41 +371,16 @@ static struct fileIdentDesc *udf_add_entry(struct inode *dir, &elen, &offset); if (!fi) { - if (fibh->sbh != fibh->ebh) - brelse(fibh->ebh); - brelse(fibh->sbh); - brelse(epos.bh); *err = -EIO; - return NULL; + goto out_err; } liu = le16_to_cpu(cfi->lengthOfImpUse); lfi = cfi->lengthFileIdent; - if (fibh->sbh == fibh->ebh) - nameptr = fi->fileIdent + liu; - else { - int poffset; /* Unpaded ending offset */ - - poffset = fibh->soffset + sizeof(struct fileIdentDesc) + - liu + lfi; - - if (poffset >= lfi) - nameptr = (char *)(fibh->ebh->b_data + - poffset - lfi); - else { - nameptr = fname; - memcpy(nameptr, fi->fileIdent + liu, - lfi - poffset); - memcpy(nameptr + lfi - poffset, - fibh->ebh->b_data, poffset); - } - } - if ((cfi->fileCharacteristics & FID_FILE_CHAR_DELETED) != 0) { if (((sizeof(struct fileIdentDesc) + liu + lfi + 3) & ~3) == nfidlen) { - brelse(epos.bh); cfi->descTag.tagSerialNum = cpu_to_le16(1); cfi->fileVersionNum = cpu_to_le16(1); cfi->fileCharacteristics = 0; @@ -449,27 +388,13 @@ static struct fileIdentDesc *udf_add_entry(struct inode *dir, cfi->lengthOfImpUse = cpu_to_le16(0); if (!udf_write_fi(dir, cfi, fi, fibh, NULL, name)) - return fi; + goto out_ok; else { *err = -EIO; - return NULL; + goto out_err; } } } - - if (!lfi || !dentry) - continue; - - flen = udf_get_filename(dir->i_sb, nameptr, fname, lfi); - if (flen && udf_match(flen, fname, dentry->d_name.len, - dentry->d_name.name)) { - if (fibh->sbh != fibh->ebh) - brelse(fibh->ebh); - brelse(fibh->sbh); - brelse(epos.bh); - *err = -EEXIST; - return NULL; - } } add: @@ -496,7 +421,7 @@ add: fibh->sbh = fibh->ebh = udf_expand_dir_adinicb(dir, &block, err); if (!fibh->sbh) - return NULL; + goto out_err; epos.block = dinfo->i_location; epos.offset = udf_file_entry_alloc_offset(dir); /* Load extent udf_expand_dir_adinicb() has created */ @@ -537,11 +462,8 @@ add: dir->i_sb->s_blocksize_bits); fibh->ebh = udf_bread(dir, f_pos >> dir->i_sb->s_blocksize_bits, 1, err); - if (!fibh->ebh) { - brelse(epos.bh); - brelse(fibh->sbh); - return NULL; - } + if (!fibh->ebh) + goto out_err; if (!fibh->soffset) { if (udf_next_aext(dir, &epos, &eloc, &elen, 1) == @@ -572,20 +494,25 @@ add: cfi->lengthFileIdent = namelen; cfi->lengthOfImpUse = cpu_to_le16(0); if (!udf_write_fi(dir, cfi, fi, fibh, NULL, name)) { - brelse(epos.bh); dir->i_size += nfidlen; if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) dinfo->i_lenAlloc += nfidlen; mark_inode_dirty(dir); - return fi; + goto out_ok; } else { - brelse(epos.bh); - if (fibh->sbh != fibh->ebh) - brelse(fibh->ebh); - brelse(fibh->sbh); *err = -EIO; - return NULL; + goto out_err; } + +out_err: + fi = NULL; + if (fibh->sbh != fibh->ebh) + brelse(fibh->ebh); + brelse(fibh->sbh); +out_ok: + brelse(epos.bh); + kfree(name); + return fi; } static int udf_delete_entry(struct inode *inode, struct fileIdentDesc *fi, @@ -940,7 +867,7 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry, char *ea; int err; int block; - char name[UDF_NAME_LEN]; + char *name = NULL; int namelen; struct buffer_head *bh; struct udf_inode_info *iinfo; @@ -950,6 +877,12 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry, if (!inode) goto out; + name = kmalloc(UDF_NAME_LEN, GFP_NOFS); + if (!name) { + err = -ENOMEM; + goto out_no_entry; + } + iinfo = UDF_I(inode); inode->i_mode = S_IFLNK | S_IRWXUGO; inode->i_data.a_ops = &udf_symlink_aops; @@ -1089,6 +1022,7 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry, err = 0; out: + kfree(name); unlock_kernel(); return err; diff --git a/fs/udf/partition.c b/fs/udf/partition.c index fc533345ab89..63610f026ae1 100644 --- a/fs/udf/partition.c +++ b/fs/udf/partition.c @@ -24,7 +24,6 @@ #include <linux/fs.h> #include <linux/string.h> -#include <linux/udf_fs.h> #include <linux/slab.h> #include <linux/buffer_head.h> @@ -55,11 +54,10 @@ uint32_t udf_get_pblock_virt15(struct super_block *sb, uint32_t block, struct udf_sb_info *sbi = UDF_SB(sb); struct udf_part_map *map; struct udf_virtual_data *vdata; - struct udf_inode_info *iinfo; + struct udf_inode_info *iinfo = UDF_I(sbi->s_vat_inode); map = &sbi->s_partmaps[partition]; vdata = &map->s_type_specific.s_virtual; - index = (sb->s_blocksize - vdata->s_start_offset) / sizeof(uint32_t); if (block > vdata->s_num_entries) { udf_debug("Trying to access block beyond end of VAT " @@ -67,6 +65,12 @@ uint32_t udf_get_pblock_virt15(struct super_block *sb, uint32_t block, return 0xFFFFFFFF; } + if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) { + loc = le32_to_cpu(((__le32 *)(iinfo->i_ext.i_data + + vdata->s_start_offset))[block]); + goto translate; + } + index = (sb->s_blocksize - vdata->s_start_offset) / sizeof(uint32_t); if (block >= index) { block -= index; newblock = 1 + (block / (sb->s_blocksize / sizeof(uint32_t))); @@ -89,7 +93,7 @@ uint32_t udf_get_pblock_virt15(struct super_block *sb, uint32_t block, brelse(bh); - iinfo = UDF_I(sbi->s_vat_inode); +translate: if (iinfo->i_location.partitionReferenceNum == partition) { udf_debug("recursive call to udf_get_pblock!\n"); return 0xFFFFFFFF; @@ -263,3 +267,58 @@ int udf_relocate_blocks(struct super_block *sb, long old_block, long *new_block) return 0; } + +static uint32_t udf_try_read_meta(struct inode *inode, uint32_t block, + uint16_t partition, uint32_t offset) +{ + struct super_block *sb = inode->i_sb; + struct udf_part_map *map; + kernel_lb_addr eloc; + uint32_t elen; + sector_t ext_offset; + struct extent_position epos = {}; + uint32_t phyblock; + + if (inode_bmap(inode, block, &epos, &eloc, &elen, &ext_offset) != + (EXT_RECORDED_ALLOCATED >> 30)) + phyblock = 0xFFFFFFFF; + else { + map = &UDF_SB(sb)->s_partmaps[partition]; + /* map to sparable/physical partition desc */ + phyblock = udf_get_pblock(sb, eloc.logicalBlockNum, + map->s_partition_num, ext_offset + offset); + } + + brelse(epos.bh); + return phyblock; +} + +uint32_t udf_get_pblock_meta25(struct super_block *sb, uint32_t block, + uint16_t partition, uint32_t offset) +{ + struct udf_sb_info *sbi = UDF_SB(sb); + struct udf_part_map *map; + struct udf_meta_data *mdata; + uint32_t retblk; + struct inode *inode; + + udf_debug("READING from METADATA\n"); + + map = &sbi->s_partmaps[partition]; + mdata = &map->s_type_specific.s_metadata; + inode = mdata->s_metadata_fe ? : mdata->s_mirror_fe; + + /* We shouldn't mount such media... */ + BUG_ON(!inode); + retblk = udf_try_read_meta(inode, block, partition, offset); + if (retblk == 0xFFFFFFFF) { + udf_warning(sb, __func__, "error reading from METADATA, " + "trying to read from MIRROR"); + inode = mdata->s_mirror_fe; + if (!inode) + return 0xFFFFFFFF; + retblk = udf_try_read_meta(inode, block, partition, offset); + } + + return retblk; +} diff --git a/fs/udf/super.c b/fs/udf/super.c index f3ac4abfc946..b564fc140fe4 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -55,9 +55,10 @@ #include <linux/errno.h> #include <linux/mount.h> #include <linux/seq_file.h> +#include <linux/bitmap.h> +#include <linux/crc-itu-t.h> #include <asm/byteorder.h> -#include <linux/udf_fs.h> #include "udf_sb.h" #include "udf_i.h" @@ -84,22 +85,19 @@ static void udf_write_super(struct super_block *); static int udf_remount_fs(struct super_block *, int *, char *); static int udf_check_valid(struct super_block *, int, int); static int udf_vrs(struct super_block *sb, int silent); -static int udf_load_partition(struct super_block *, kernel_lb_addr *); -static int udf_load_logicalvol(struct super_block *, struct buffer_head *, - kernel_lb_addr *); static void udf_load_logicalvolint(struct super_block *, kernel_extent_ad); static void udf_find_anchor(struct super_block *); static int udf_find_fileset(struct super_block *, kernel_lb_addr *, kernel_lb_addr *); -static void udf_load_pvoldesc(struct super_block *, struct buffer_head *); static void udf_load_fileset(struct super_block *, struct buffer_head *, kernel_lb_addr *); -static int udf_load_partdesc(struct super_block *, struct buffer_head *); static void udf_open_lvid(struct super_block *); static void udf_close_lvid(struct super_block *); static unsigned int udf_count_free(struct super_block *); static int udf_statfs(struct dentry *, struct kstatfs *); static int udf_show_options(struct seq_file *, struct vfsmount *); +static void udf_error(struct super_block *sb, const char *function, + const char *fmt, ...); struct logicalVolIntegrityDescImpUse *udf_sb_lvidiu(struct udf_sb_info *sbi) { @@ -587,48 +585,10 @@ static int udf_remount_fs(struct super_block *sb, int *flags, char *options) return 0; } -/* - * udf_set_blocksize - * - * PURPOSE - * Set the block size to be used in all transfers. - * - * DESCRIPTION - * To allow room for a DMA transfer, it is best to guess big when unsure. - * This routine picks 2048 bytes as the blocksize when guessing. This - * should be adequate until devices with larger block sizes become common. - * - * Note that the Linux kernel can currently only deal with blocksizes of - * 512, 1024, 2048, 4096, and 8192 bytes. - * - * PRE-CONDITIONS - * sb Pointer to _locked_ superblock. - * - * POST-CONDITIONS - * sb->s_blocksize Blocksize. - * sb->s_blocksize_bits log2 of blocksize. - * <return> 0 Blocksize is valid. - * <return> 1 Blocksize is invalid. - * - * HISTORY - * July 1, 1997 - Andrew E. Mileski - * Written, tested, and released. - */ -static int udf_set_blocksize(struct super_block *sb, int bsize) -{ - if (!sb_min_blocksize(sb, bsize)) { - udf_debug("Bad block size (%d)\n", bsize); - printk(KERN_ERR "udf: bad block size (%d)\n", bsize); - return 0; - } - - return sb->s_blocksize; -} - static int udf_vrs(struct super_block *sb, int silent) { struct volStructDesc *vsd = NULL; - int sector = 32768; + loff_t sector = 32768; int sectorsize; struct buffer_head *bh = NULL; int iso9660 = 0; @@ -649,7 +609,8 @@ static int udf_vrs(struct super_block *sb, int silent) sector += (sbi->s_session << sb->s_blocksize_bits); udf_debug("Starting at sector %u (%ld byte sectors)\n", - (sector >> sb->s_blocksize_bits), sb->s_blocksize); + (unsigned int)(sector >> sb->s_blocksize_bits), + sb->s_blocksize); /* Process the sequence (if applicable) */ for (; !nsr02 && !nsr03; sector += sectorsize) { /* Read a block */ @@ -719,162 +680,140 @@ static int udf_vrs(struct super_block *sb, int silent) } /* - * udf_find_anchor - * - * PURPOSE - * Find an anchor volume descriptor. - * - * PRE-CONDITIONS - * sb Pointer to _locked_ superblock. - * lastblock Last block on media. - * - * POST-CONDITIONS - * <return> 1 if not found, 0 if ok - * - * HISTORY - * July 1, 1997 - Andrew E. Mileski - * Written, tested, and released. + * Check whether there is an anchor block in the given block */ -static void udf_find_anchor(struct super_block *sb) +static int udf_check_anchor_block(struct super_block *sb, sector_t block, + bool varconv) { - int lastblock; struct buffer_head *bh = NULL; + tag *t; uint16_t ident; uint32_t location; - int i; - struct udf_sb_info *sbi; - sbi = UDF_SB(sb); - lastblock = sbi->s_last_block; + if (varconv) { + if (udf_fixed_to_variable(block) >= + sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits) + return 0; + bh = sb_bread(sb, udf_fixed_to_variable(block)); + } + else + bh = sb_bread(sb, block); - if (lastblock) { - int varlastblock = udf_variable_to_fixed(lastblock); - int last[] = { lastblock, lastblock - 2, - lastblock - 150, lastblock - 152, - varlastblock, varlastblock - 2, - varlastblock - 150, varlastblock - 152 }; - - lastblock = 0; - - /* Search for an anchor volume descriptor pointer */ - - /* according to spec, anchor is in either: - * block 256 - * lastblock-256 - * lastblock - * however, if the disc isn't closed, it could be 512 */ - - for (i = 0; !lastblock && i < ARRAY_SIZE(last); i++) { - ident = location = 0; - if (last[i] >= 0) { - bh = sb_bread(sb, last[i]); - if (bh) { - tag *t = (tag *)bh->b_data; - ident = le16_to_cpu(t->tagIdent); - location = le32_to_cpu(t->tagLocation); - brelse(bh); - } - } + if (!bh) + return 0; - if (ident == TAG_IDENT_AVDP) { - if (location == last[i] - sbi->s_session) { - lastblock = last[i] - sbi->s_session; - sbi->s_anchor[0] = lastblock; - sbi->s_anchor[1] = lastblock - 256; - } else if (location == - udf_variable_to_fixed(last[i]) - - sbi->s_session) { - UDF_SET_FLAG(sb, UDF_FLAG_VARCONV); - lastblock = - udf_variable_to_fixed(last[i]) - - sbi->s_session; - sbi->s_anchor[0] = lastblock; - sbi->s_anchor[1] = lastblock - 256 - - sbi->s_session; - } else { - udf_debug("Anchor found at block %d, " - "location mismatch %d.\n", - last[i], location); - } - } else if (ident == TAG_IDENT_FE || - ident == TAG_IDENT_EFE) { - lastblock = last[i]; - sbi->s_anchor[3] = 512; - } else { - ident = location = 0; - if (last[i] >= 256) { - bh = sb_bread(sb, last[i] - 256); - if (bh) { - tag *t = (tag *)bh->b_data; - ident = le16_to_cpu( - t->tagIdent); - location = le32_to_cpu( - t->tagLocation); - brelse(bh); - } - } + t = (tag *)bh->b_data; + ident = le16_to_cpu(t->tagIdent); + location = le32_to_cpu(t->tagLocation); + brelse(bh); + if (ident != TAG_IDENT_AVDP) + return 0; + return location == block; +} - if (ident == TAG_IDENT_AVDP && - location == last[i] - 256 - - sbi->s_session) { - lastblock = last[i]; - sbi->s_anchor[1] = last[i] - 256; - } else { - ident = location = 0; - if (last[i] >= 312 + sbi->s_session) { - bh = sb_bread(sb, - last[i] - 312 - - sbi->s_session); - if (bh) { - tag *t = (tag *) - bh->b_data; - ident = le16_to_cpu( - t->tagIdent); - location = le32_to_cpu( - t->tagLocation); - brelse(bh); - } - } +/* Search for an anchor volume descriptor pointer */ +static sector_t udf_scan_anchors(struct super_block *sb, bool varconv, + sector_t lastblock) +{ + sector_t last[6]; + int i; + struct udf_sb_info *sbi = UDF_SB(sb); - if (ident == TAG_IDENT_AVDP && - location == udf_variable_to_fixed(last[i]) - 256) { - UDF_SET_FLAG(sb, - UDF_FLAG_VARCONV); - lastblock = udf_variable_to_fixed(last[i]); - sbi->s_anchor[1] = lastblock - 256; - } - } - } + last[0] = lastblock; + last[1] = last[0] - 1; + last[2] = last[0] + 1; + last[3] = last[0] - 2; + last[4] = last[0] - 150; + last[5] = last[0] - 152; + + /* according to spec, anchor is in either: + * block 256 + * lastblock-256 + * lastblock + * however, if the disc isn't closed, it could be 512 */ + + for (i = 0; i < ARRAY_SIZE(last); i++) { + if (last[i] < 0) + continue; + if (last[i] >= sb->s_bdev->bd_inode->i_size >> + sb->s_blocksize_bits) + continue; + + if (udf_check_anchor_block(sb, last[i], varconv)) { + sbi->s_anchor[0] = last[i]; + sbi->s_anchor[1] = last[i] - 256; + return last[i]; } - } - if (!lastblock) { - /* We haven't found the lastblock. check 312 */ - bh = sb_bread(sb, 312 + sbi->s_session); - if (bh) { - tag *t = (tag *)bh->b_data; - ident = le16_to_cpu(t->tagIdent); - location = le32_to_cpu(t->tagLocation); - brelse(bh); + if (last[i] < 256) + continue; - if (ident == TAG_IDENT_AVDP && location == 256) - UDF_SET_FLAG(sb, UDF_FLAG_VARCONV); + if (udf_check_anchor_block(sb, last[i] - 256, varconv)) { + sbi->s_anchor[1] = last[i] - 256; + return last[i]; } } + if (udf_check_anchor_block(sb, sbi->s_session + 256, varconv)) { + sbi->s_anchor[0] = sbi->s_session + 256; + return last[0]; + } + if (udf_check_anchor_block(sb, sbi->s_session + 512, varconv)) { + sbi->s_anchor[0] = sbi->s_session + 512; + return last[0]; + } + return 0; +} + +/* + * Find an anchor volume descriptor. The function expects sbi->s_lastblock to + * be the last block on the media. + * + * Return 1 if not found, 0 if ok + * + */ +static void udf_find_anchor(struct super_block *sb) +{ + sector_t lastblock; + struct buffer_head *bh = NULL; + uint16_t ident; + int i; + struct udf_sb_info *sbi = UDF_SB(sb); + + lastblock = udf_scan_anchors(sb, 0, sbi->s_last_block); + if (lastblock) + goto check_anchor; + + /* No anchor found? Try VARCONV conversion of block numbers */ + /* Firstly, we try to not convert number of the last block */ + lastblock = udf_scan_anchors(sb, 1, + udf_variable_to_fixed(sbi->s_last_block)); + if (lastblock) { + UDF_SET_FLAG(sb, UDF_FLAG_VARCONV); + goto check_anchor; + } + + /* Secondly, we try with converted number of the last block */ + lastblock = udf_scan_anchors(sb, 1, sbi->s_last_block); + if (lastblock) + UDF_SET_FLAG(sb, UDF_FLAG_VARCONV); + +check_anchor: + /* + * Check located anchors and the anchor block supplied via + * mount options + */ for (i = 0; i < ARRAY_SIZE(sbi->s_anchor); i++) { - if (sbi->s_anchor[i]) { - bh = udf_read_tagged(sb, sbi->s_anchor[i], - sbi->s_anchor[i], &ident); - if (!bh) + if (!sbi->s_anchor[i]) + continue; + bh = udf_read_tagged(sb, sbi->s_anchor[i], + sbi->s_anchor[i], &ident); + if (!bh) + sbi->s_anchor[i] = 0; + else { + brelse(bh); + if (ident != TAG_IDENT_AVDP) sbi->s_anchor[i] = 0; - else { - brelse(bh); - if ((ident != TAG_IDENT_AVDP) && - (i || (ident != TAG_IDENT_FE && - ident != TAG_IDENT_EFE))) - sbi->s_anchor[i] = 0; - } } } @@ -971,27 +910,30 @@ static int udf_find_fileset(struct super_block *sb, return 1; } -static void udf_load_pvoldesc(struct super_block *sb, struct buffer_head *bh) +static int udf_load_pvoldesc(struct super_block *sb, sector_t block) { struct primaryVolDesc *pvoldesc; - time_t recording; - long recording_usec; struct ustr instr; struct ustr outstr; + struct buffer_head *bh; + uint16_t ident; + + bh = udf_read_tagged(sb, block, block, &ident); + if (!bh) + return 1; + BUG_ON(ident != TAG_IDENT_PVD); pvoldesc = (struct primaryVolDesc *)bh->b_data; - if (udf_stamp_to_time(&recording, &recording_usec, - lets_to_cpu(pvoldesc->recordingDateAndTime))) { - kernel_timestamp ts; - ts = lets_to_cpu(pvoldesc->recordingDateAndTime); - udf_debug("recording time %ld/%ld, %04u/%02u/%02u" + if (udf_disk_stamp_to_time(&UDF_SB(sb)->s_record_time, + pvoldesc->recordingDateAndTime)) { +#ifdef UDFFS_DEBUG + timestamp *ts = &pvoldesc->recordingDateAndTime; + udf_debug("recording time %04u/%02u/%02u" " %02u:%02u (%x)\n", - recording, recording_usec, - ts.year, ts.month, ts.day, ts.hour, - ts.minute, ts.typeAndTimezone); - UDF_SB(sb)->s_record_time.tv_sec = recording; - UDF_SB(sb)->s_record_time.tv_nsec = recording_usec * 1000; + le16_to_cpu(ts->year), ts->month, ts->day, ts->hour, + ts->minute, le16_to_cpu(ts->typeAndTimezone)); +#endif } if (!udf_build_ustr(&instr, pvoldesc->volIdent, 32)) @@ -1005,6 +947,104 @@ static void udf_load_pvoldesc(struct super_block *sb, struct buffer_head *bh) if (!udf_build_ustr(&instr, pvoldesc->volSetIdent, 128)) if (udf_CS0toUTF8(&outstr, &instr)) udf_debug("volSetIdent[] = '%s'\n", outstr.u_name); + + brelse(bh); + return 0; +} + +static int udf_load_metadata_files(struct super_block *sb, int partition) +{ + struct udf_sb_info *sbi = UDF_SB(sb); + struct udf_part_map *map; + struct udf_meta_data *mdata; + kernel_lb_addr addr; + int fe_error = 0; + + map = &sbi->s_partmaps[partition]; + mdata = &map->s_type_specific.s_metadata; + + /* metadata address */ + addr.logicalBlockNum = mdata->s_meta_file_loc; + addr.partitionReferenceNum = map->s_partition_num; + + udf_debug("Metadata file location: block = %d part = %d\n", + addr.logicalBlockNum, addr.partitionReferenceNum); + + mdata->s_metadata_fe = udf_iget(sb, addr); + + if (mdata->s_metadata_fe == NULL) { + udf_warning(sb, __func__, "metadata inode efe not found, " + "will try mirror inode."); + fe_error = 1; + } else if (UDF_I(mdata->s_metadata_fe)->i_alloc_type != + ICBTAG_FLAG_AD_SHORT) { + udf_warning(sb, __func__, "metadata inode efe does not have " + "short allocation descriptors!"); + fe_error = 1; + iput(mdata->s_metadata_fe); + mdata->s_metadata_fe = NULL; + } + + /* mirror file entry */ + addr.logicalBlockNum = mdata->s_mirror_file_loc; + addr.partitionReferenceNum = map->s_partition_num; + + udf_debug("Mirror metadata file location: block = %d part = %d\n", + addr.logicalBlockNum, addr.partitionReferenceNum); + + mdata->s_mirror_fe = udf_iget(sb, addr); + + if (mdata->s_mirror_fe == NULL) { + if (fe_error) { + udf_error(sb, __func__, "mirror inode efe not found " + "and metadata inode is missing too, exiting..."); + goto error_exit; + } else + udf_warning(sb, __func__, "mirror inode efe not found," + " but metadata inode is OK"); + } else if (UDF_I(mdata->s_mirror_fe)->i_alloc_type != + ICBTAG_FLAG_AD_SHORT) { + udf_warning(sb, __func__, "mirror inode efe does not have " + "short allocation descriptors!"); + iput(mdata->s_mirror_fe); + mdata->s_mirror_fe = NULL; + if (fe_error) + goto error_exit; + } + + /* + * bitmap file entry + * Note: + * Load only if bitmap file location differs from 0xFFFFFFFF (DCN-5102) + */ + if (mdata->s_bitmap_file_loc != 0xFFFFFFFF) { + addr.logicalBlockNum = mdata->s_bitmap_file_loc; + addr.partitionReferenceNum = map->s_partition_num; + + udf_debug("Bitmap file location: block = %d part = %d\n", + addr.logicalBlockNum, addr.partitionReferenceNum); + + mdata->s_bitmap_fe = udf_iget(sb, addr); + + if (mdata->s_bitmap_fe == NULL) { + if (sb->s_flags & MS_RDONLY) + udf_warning(sb, __func__, "bitmap inode efe " + "not found but it's ok since the disc" + " is mounted read-only"); + else { + udf_error(sb, __func__, "bitmap inode efe not " + "found and attempted read-write mount"); + goto error_exit; + } + } + } + + udf_debug("udf_load_metadata_files Ok\n"); + + return 0; + +error_exit: + return 1; } static void udf_load_fileset(struct super_block *sb, struct buffer_head *bh, @@ -1025,10 +1065,9 @@ static void udf_load_fileset(struct super_block *sb, struct buffer_head *bh, int udf_compute_nr_groups(struct super_block *sb, u32 partition) { struct udf_part_map *map = &UDF_SB(sb)->s_partmaps[partition]; - return (map->s_partition_len + - (sizeof(struct spaceBitmapDesc) << 3) + - (sb->s_blocksize * 8) - 1) / - (sb->s_blocksize * 8); + return DIV_ROUND_UP(map->s_partition_len + + (sizeof(struct spaceBitmapDesc) << 3), + sb->s_blocksize * 8); } static struct udf_bitmap *udf_sb_alloc_bitmap(struct super_block *sb, u32 index) @@ -1059,134 +1098,241 @@ static struct udf_bitmap *udf_sb_alloc_bitmap(struct super_block *sb, u32 index) return bitmap; } -static int udf_load_partdesc(struct super_block *sb, struct buffer_head *bh) +static int udf_fill_partdesc_info(struct super_block *sb, + struct partitionDesc *p, int p_index) +{ + struct udf_part_map *map; + struct udf_sb_info *sbi = UDF_SB(sb); + struct partitionHeaderDesc *phd; + + map = &sbi->s_partmaps[p_index]; + + map->s_partition_len = le32_to_cpu(p->partitionLength); /* blocks */ + map->s_partition_root = le32_to_cpu(p->partitionStartingLocation); + + if (p->accessType == cpu_to_le32(PD_ACCESS_TYPE_READ_ONLY)) + map->s_partition_flags |= UDF_PART_FLAG_READ_ONLY; + if (p->accessType == cpu_to_le32(PD_ACCESS_TYPE_WRITE_ONCE)) + map->s_partition_flags |= UDF_PART_FLAG_WRITE_ONCE; + if (p->accessType == cpu_to_le32(PD_ACCESS_TYPE_REWRITABLE)) + map->s_partition_flags |= UDF_PART_FLAG_REWRITABLE; + if (p->accessType == cpu_to_le32(PD_ACCESS_TYPE_OVERWRITABLE)) + map->s_partition_flags |= UDF_PART_FLAG_OVERWRITABLE; + + udf_debug("Partition (%d type %x) starts at physical %d, " + "block length %d\n", p_index, + map->s_partition_type, map->s_partition_root, + map->s_partition_len); + + if (strcmp(p->partitionContents.ident, PD_PARTITION_CONTENTS_NSR02) && + strcmp(p->partitionContents.ident, PD_PARTITION_CONTENTS_NSR03)) + return 0; + + phd = (struct partitionHeaderDesc *)p->partitionContentsUse; + if (phd->unallocSpaceTable.extLength) { + kernel_lb_addr loc = { + .logicalBlockNum = le32_to_cpu( + phd->unallocSpaceTable.extPosition), + .partitionReferenceNum = p_index, + }; + + map->s_uspace.s_table = udf_iget(sb, loc); + if (!map->s_uspace.s_table) { + udf_debug("cannot load unallocSpaceTable (part %d)\n", + p_index); + return 1; + } + map->s_partition_flags |= UDF_PART_FLAG_UNALLOC_TABLE; + udf_debug("unallocSpaceTable (part %d) @ %ld\n", + p_index, map->s_uspace.s_table->i_ino); + } + + if (phd->unallocSpaceBitmap.extLength) { + struct udf_bitmap *bitmap = udf_sb_alloc_bitmap(sb, p_index); + if (!bitmap) + return 1; + map->s_uspace.s_bitmap = bitmap; + bitmap->s_extLength = le32_to_cpu( + phd->unallocSpaceBitmap.extLength); + bitmap->s_extPosition = le32_to_cpu( + phd->unallocSpaceBitmap.extPosition); + map->s_partition_flags |= UDF_PART_FLAG_UNALLOC_BITMAP; + udf_debug("unallocSpaceBitmap (part %d) @ %d\n", p_index, + bitmap->s_extPosition); + } + + if (phd->partitionIntegrityTable.extLength) + udf_debug("partitionIntegrityTable (part %d)\n", p_index); + + if (phd->freedSpaceTable.extLength) { + kernel_lb_addr loc = { + .logicalBlockNum = le32_to_cpu( + phd->freedSpaceTable.extPosition), + .partitionReferenceNum = p_index, + }; + + map->s_fspace.s_table = udf_iget(sb, loc); + if (!map->s_fspace.s_table) { + udf_debug("cannot load freedSpaceTable (part %d)\n", + p_index); + return 1; + } + + map->s_partition_flags |= UDF_PART_FLAG_FREED_TABLE; + udf_debug("freedSpaceTable (part %d) @ %ld\n", + p_index, map->s_fspace.s_table->i_ino); + } + + if (phd->freedSpaceBitmap.extLength) { + struct udf_bitmap *bitmap = udf_sb_alloc_bitmap(sb, p_index); + if (!bitmap) + return 1; + map->s_fspace.s_bitmap = bitmap; + bitmap->s_extLength = le32_to_cpu( + phd->freedSpaceBitmap.extLength); + bitmap->s_extPosition = le32_to_cpu( + phd->freedSpaceBitmap.extPosition); + map->s_partition_flags |= UDF_PART_FLAG_FREED_BITMAP; + udf_debug("freedSpaceBitmap (part %d) @ %d\n", p_index, + bitmap->s_extPosition); + } + return 0; +} + +static int udf_load_vat(struct super_block *sb, int p_index, int type1_index) +{ + struct udf_sb_info *sbi = UDF_SB(sb); + struct udf_part_map *map = &sbi->s_partmaps[p_index]; + kernel_lb_addr ino; + struct buffer_head *bh = NULL; + struct udf_inode_info *vati; + uint32_t pos; + struct virtualAllocationTable20 *vat20; + + /* VAT file entry is in the last recorded block */ + ino.partitionReferenceNum = type1_index; + ino.logicalBlockNum = sbi->s_last_block - map->s_partition_root; + sbi->s_vat_inode = udf_iget(sb, ino); + if (!sbi->s_vat_inode) + return 1; + + if (map->s_partition_type == UDF_VIRTUAL_MAP15) { + map->s_type_specific.s_virtual.s_start_offset = 0; + map->s_type_specific.s_virtual.s_num_entries = + (sbi->s_vat_inode->i_size - 36) >> 2; + } else if (map->s_partition_type == UDF_VIRTUAL_MAP20) { + vati = UDF_I(sbi->s_vat_inode); + if (vati->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) { + pos = udf_block_map(sbi->s_vat_inode, 0); + bh = sb_bread(sb, pos); + if (!bh) + return 1; + vat20 = (struct virtualAllocationTable20 *)bh->b_data; + } else { + vat20 = (struct virtualAllocationTable20 *) + vati->i_ext.i_data; + } + + map->s_type_specific.s_virtual.s_start_offset = + le16_to_cpu(vat20->lengthHeader); + map->s_type_specific.s_virtual.s_num_entries = + (sbi->s_vat_inode->i_size - + map->s_type_specific.s_virtual. + s_start_offset) >> 2; + brelse(bh); + } + return 0; +} + +static int udf_load_partdesc(struct super_block *sb, sector_t block) { + struct buffer_head *bh; struct partitionDesc *p; - int i; struct udf_part_map *map; - struct udf_sb_info *sbi; + struct udf_sb_info *sbi = UDF_SB(sb); + int i, type1_idx; + uint16_t partitionNumber; + uint16_t ident; + int ret = 0; + + bh = udf_read_tagged(sb, block, block, &ident); + if (!bh) + return 1; + if (ident != TAG_IDENT_PD) + goto out_bh; p = (struct partitionDesc *)bh->b_data; - sbi = UDF_SB(sb); + partitionNumber = le16_to_cpu(p->partitionNumber); + /* First scan for TYPE1, SPARABLE and METADATA partitions */ for (i = 0; i < sbi->s_partitions; i++) { map = &sbi->s_partmaps[i]; udf_debug("Searching map: (%d == %d)\n", - map->s_partition_num, - le16_to_cpu(p->partitionNumber)); - if (map->s_partition_num == - le16_to_cpu(p->partitionNumber)) { - map->s_partition_len = - le32_to_cpu(p->partitionLength); /* blocks */ - map->s_partition_root = - le32_to_cpu(p->partitionStartingLocation); - if (p->accessType == - cpu_to_le32(PD_ACCESS_TYPE_READ_ONLY)) - map->s_partition_flags |= - UDF_PART_FLAG_READ_ONLY; - if (p->accessType == - cpu_to_le32(PD_ACCESS_TYPE_WRITE_ONCE)) - map->s_partition_flags |= - UDF_PART_FLAG_WRITE_ONCE; - if (p->accessType == - cpu_to_le32(PD_ACCESS_TYPE_REWRITABLE)) - map->s_partition_flags |= - UDF_PART_FLAG_REWRITABLE; - if (p->accessType == - cpu_to_le32(PD_ACCESS_TYPE_OVERWRITABLE)) - map->s_partition_flags |= - UDF_PART_FLAG_OVERWRITABLE; - - if (!strcmp(p->partitionContents.ident, - PD_PARTITION_CONTENTS_NSR02) || - !strcmp(p->partitionContents.ident, - PD_PARTITION_CONTENTS_NSR03)) { - struct partitionHeaderDesc *phd; - - phd = (struct partitionHeaderDesc *) - (p->partitionContentsUse); - if (phd->unallocSpaceTable.extLength) { - kernel_lb_addr loc = { - .logicalBlockNum = le32_to_cpu(phd->unallocSpaceTable.extPosition), - .partitionReferenceNum = i, - }; - - map->s_uspace.s_table = - udf_iget(sb, loc); - if (!map->s_uspace.s_table) { - udf_debug("cannot load unallocSpaceTable (part %d)\n", i); - return 1; - } - map->s_partition_flags |= - UDF_PART_FLAG_UNALLOC_TABLE; - udf_debug("unallocSpaceTable (part %d) @ %ld\n", - i, map->s_uspace.s_table->i_ino); - } - if (phd->unallocSpaceBitmap.extLength) { - struct udf_bitmap *bitmap = - udf_sb_alloc_bitmap(sb, i); - map->s_uspace.s_bitmap = bitmap; - if (bitmap != NULL) { - bitmap->s_extLength = - le32_to_cpu(phd->unallocSpaceBitmap.extLength); - bitmap->s_extPosition = - le32_to_cpu(phd->unallocSpaceBitmap.extPosition); - map->s_partition_flags |= UDF_PART_FLAG_UNALLOC_BITMAP; - udf_debug("unallocSpaceBitmap (part %d) @ %d\n", - i, bitmap->s_extPosition); - } - } - if (phd->partitionIntegrityTable.extLength) - udf_debug("partitionIntegrityTable (part %d)\n", i); - if (phd->freedSpaceTable.extLength) { - kernel_lb_addr loc = { - .logicalBlockNum = le32_to_cpu(phd->freedSpaceTable.extPosition), - .partitionReferenceNum = i, - }; - - map->s_fspace.s_table = - udf_iget(sb, loc); - if (!map->s_fspace.s_table) { - udf_debug("cannot load freedSpaceTable (part %d)\n", i); - return 1; - } - map->s_partition_flags |= - UDF_PART_FLAG_FREED_TABLE; - udf_debug("freedSpaceTable (part %d) @ %ld\n", - i, map->s_fspace.s_table->i_ino); - } - if (phd->freedSpaceBitmap.extLength) { - struct udf_bitmap *bitmap = - udf_sb_alloc_bitmap(sb, i); - map->s_fspace.s_bitmap = bitmap; - if (bitmap != NULL) { - bitmap->s_extLength = - le32_to_cpu(phd->freedSpaceBitmap.extLength); - bitmap->s_extPosition = - le32_to_cpu(phd->freedSpaceBitmap.extPosition); - map->s_partition_flags |= UDF_PART_FLAG_FREED_BITMAP; - udf_debug("freedSpaceBitmap (part %d) @ %d\n", - i, bitmap->s_extPosition); - } - } - } + map->s_partition_num, partitionNumber); + if (map->s_partition_num == partitionNumber && + (map->s_partition_type == UDF_TYPE1_MAP15 || + map->s_partition_type == UDF_SPARABLE_MAP15)) break; - } } - if (i == sbi->s_partitions) + + if (i >= sbi->s_partitions) { udf_debug("Partition (%d) not found in partition map\n", - le16_to_cpu(p->partitionNumber)); - else - udf_debug("Partition (%d:%d type %x) starts at physical %d, " - "block length %d\n", - le16_to_cpu(p->partitionNumber), i, - map->s_partition_type, - map->s_partition_root, - map->s_partition_len); - return 0; + partitionNumber); + goto out_bh; + } + + ret = udf_fill_partdesc_info(sb, p, i); + + /* + * Now rescan for VIRTUAL or METADATA partitions when SPARABLE and + * PHYSICAL partitions are already set up + */ + type1_idx = i; + for (i = 0; i < sbi->s_partitions; i++) { + map = &sbi->s_partmaps[i]; + + if (map->s_partition_num == partitionNumber && + (map->s_partition_type == UDF_VIRTUAL_MAP15 || + map->s_partition_type == UDF_VIRTUAL_MAP20 || + map->s_partition_type == UDF_METADATA_MAP25)) + break; + } + + if (i >= sbi->s_partitions) + goto out_bh; + + ret = udf_fill_partdesc_info(sb, p, i); + if (ret) + goto out_bh; + + if (map->s_partition_type == UDF_METADATA_MAP25) { + ret = udf_load_metadata_files(sb, i); + if (ret) { + printk(KERN_ERR "UDF-fs: error loading MetaData " + "partition map %d\n", i); + goto out_bh; + } + } else { + ret = udf_load_vat(sb, i, type1_idx); + if (ret) + goto out_bh; + /* + * Mark filesystem read-only if we have a partition with + * virtual map since we don't handle writing to it (we + * overwrite blocks instead of relocating them). + */ + sb->s_flags |= MS_RDONLY; + printk(KERN_NOTICE "UDF-fs: Filesystem marked read-only " + "because writing to pseudooverwrite partition is " + "not implemented.\n"); + } +out_bh: + /* In case loading failed, we handle cleanup in udf_fill_super */ + brelse(bh); + return ret; } -static int udf_load_logicalvol(struct super_block *sb, struct buffer_head *bh, +static int udf_load_logicalvol(struct super_block *sb, sector_t block, kernel_lb_addr *fileset) { struct logicalVolDesc *lvd; @@ -1194,12 +1340,21 @@ static int udf_load_logicalvol(struct super_block *sb, struct buffer_head *bh, uint8_t type; struct udf_sb_info *sbi = UDF_SB(sb); struct genericPartitionMap *gpm; + uint16_t ident; + struct buffer_head *bh; + int ret = 0; + bh = udf_read_tagged(sb, block, block, &ident); + if (!bh) + return 1; + BUG_ON(ident != TAG_IDENT_LVD); lvd = (struct logicalVolDesc *)bh->b_data; i = udf_sb_alloc_partition_maps(sb, le32_to_cpu(lvd->numPartitionMaps)); - if (i != 0) - return i; + if (i != 0) { + ret = i; + goto out_bh; + } for (i = 0, offset = 0; i < sbi->s_partitions && offset < le32_to_cpu(lvd->mapTableLength); @@ -1223,12 +1378,12 @@ static int udf_load_logicalvol(struct super_block *sb, struct buffer_head *bh, u16 suf = le16_to_cpu(((__le16 *)upm2->partIdent. identSuffix)[0]); - if (suf == 0x0150) { + if (suf < 0x0200) { map->s_partition_type = UDF_VIRTUAL_MAP15; map->s_partition_func = udf_get_pblock_virt15; - } else if (suf == 0x0200) { + } else { map->s_partition_type = UDF_VIRTUAL_MAP20; map->s_partition_func = @@ -1238,7 +1393,6 @@ static int udf_load_logicalvol(struct super_block *sb, struct buffer_head *bh, UDF_ID_SPARABLE, strlen(UDF_ID_SPARABLE))) { uint32_t loc; - uint16_t ident; struct sparingTable *st; struct sparablePartitionMap *spm = (struct sparablePartitionMap *)gpm; @@ -1256,22 +1410,64 @@ static int udf_load_logicalvol(struct super_block *sb, struct buffer_head *bh, map->s_type_specific.s_sparing. s_spar_map[j] = bh2; - if (bh2 != NULL) { - st = (struct sparingTable *) - bh2->b_data; - if (ident != 0 || strncmp( - st->sparingIdent.ident, - UDF_ID_SPARING, - strlen(UDF_ID_SPARING))) { - brelse(bh2); - map->s_type_specific. - s_sparing. - s_spar_map[j] = - NULL; - } + if (bh2 == NULL) + continue; + + st = (struct sparingTable *)bh2->b_data; + if (ident != 0 || strncmp( + st->sparingIdent.ident, + UDF_ID_SPARING, + strlen(UDF_ID_SPARING))) { + brelse(bh2); + map->s_type_specific.s_sparing. + s_spar_map[j] = NULL; } } map->s_partition_func = udf_get_pblock_spar15; + } else if (!strncmp(upm2->partIdent.ident, + UDF_ID_METADATA, + strlen(UDF_ID_METADATA))) { + struct udf_meta_data *mdata = + &map->s_type_specific.s_metadata; + struct metadataPartitionMap *mdm = + (struct metadataPartitionMap *) + &(lvd->partitionMaps[offset]); + udf_debug("Parsing Logical vol part %d " + "type %d id=%s\n", i, type, + UDF_ID_METADATA); + + map->s_partition_type = UDF_METADATA_MAP25; + map->s_partition_func = udf_get_pblock_meta25; + + mdata->s_meta_file_loc = + le32_to_cpu(mdm->metadataFileLoc); + mdata->s_mirror_file_loc = + le32_to_cpu(mdm->metadataMirrorFileLoc); + mdata->s_bitmap_file_loc = + le32_to_cpu(mdm->metadataBitmapFileLoc); + mdata->s_alloc_unit_size = + le32_to_cpu(mdm->allocUnitSize); + mdata->s_align_unit_size = + le16_to_cpu(mdm->alignUnitSize); + mdata->s_dup_md_flag = + mdm->flags & 0x01; + + udf_debug("Metadata Ident suffix=0x%x\n", + (le16_to_cpu( + ((__le16 *) + mdm->partIdent.identSuffix)[0]))); + udf_debug("Metadata part num=%d\n", + le16_to_cpu(mdm->partitionNum)); + udf_debug("Metadata part alloc unit size=%d\n", + le32_to_cpu(mdm->allocUnitSize)); + udf_debug("Metadata file loc=%d\n", + le32_to_cpu(mdm->metadataFileLoc)); + udf_debug("Mirror file loc=%d\n", + le32_to_cpu(mdm->metadataMirrorFileLoc)); + udf_debug("Bitmap file loc=%d\n", + le32_to_cpu(mdm->metadataBitmapFileLoc)); + udf_debug("Duplicate Flag: %d %d\n", + mdata->s_dup_md_flag, mdm->flags); } else { udf_debug("Unknown ident: %s\n", upm2->partIdent.ident); @@ -1296,7 +1492,9 @@ static int udf_load_logicalvol(struct super_block *sb, struct buffer_head *bh, if (lvd->integritySeqExt.extLength) udf_load_logicalvolint(sb, leea_to_cpu(lvd->integritySeqExt)); - return 0; +out_bh: + brelse(bh); + return ret; } /* @@ -1345,7 +1543,7 @@ static void udf_load_logicalvolint(struct super_block *sb, kernel_extent_ad loc) * July 1, 1997 - Andrew E. Mileski * Written, tested, and released. */ -static int udf_process_sequence(struct super_block *sb, long block, +static noinline int udf_process_sequence(struct super_block *sb, long block, long lastblock, kernel_lb_addr *fileset) { struct buffer_head *bh = NULL; @@ -1354,19 +1552,25 @@ static int udf_process_sequence(struct super_block *sb, long block, struct generic_desc *gd; struct volDescPtr *vdp; int done = 0; - int i, j; uint32_t vdsn; uint16_t ident; long next_s = 0, next_e = 0; memset(vds, 0, sizeof(struct udf_vds_record) * VDS_POS_LENGTH); - /* Read the main descriptor sequence */ + /* + * Read the main descriptor sequence and find which descriptors + * are in it. + */ for (; (!done && block <= lastblock); block++) { bh = udf_read_tagged(sb, block, block, &ident); - if (!bh) - break; + if (!bh) { + printk(KERN_ERR "udf: Block %Lu of volume descriptor " + "sequence is corrupted or we could not read " + "it.\n", (unsigned long long)block); + return 1; + } /* Process each descriptor (ISO 13346 3/8.3-8.4) */ gd = (struct generic_desc *)bh->b_data; @@ -1432,41 +1636,31 @@ static int udf_process_sequence(struct super_block *sb, long block, } brelse(bh); } - for (i = 0; i < VDS_POS_LENGTH; i++) { - if (vds[i].block) { - bh = udf_read_tagged(sb, vds[i].block, vds[i].block, - &ident); - - if (i == VDS_POS_PRIMARY_VOL_DESC) { - udf_load_pvoldesc(sb, bh); - } else if (i == VDS_POS_LOGICAL_VOL_DESC) { - if (udf_load_logicalvol(sb, bh, fileset)) { - brelse(bh); - return 1; - } - } else if (i == VDS_POS_PARTITION_DESC) { - struct buffer_head *bh2 = NULL; - if (udf_load_partdesc(sb, bh)) { - brelse(bh); - return 1; - } - for (j = vds[i].block + 1; - j < vds[VDS_POS_TERMINATING_DESC].block; - j++) { - bh2 = udf_read_tagged(sb, j, j, &ident); - gd = (struct generic_desc *)bh2->b_data; - if (ident == TAG_IDENT_PD) - if (udf_load_partdesc(sb, - bh2)) { - brelse(bh); - brelse(bh2); - return 1; - } - brelse(bh2); - } - } - brelse(bh); - } + /* + * Now read interesting descriptors again and process them + * in a suitable order + */ + if (!vds[VDS_POS_PRIMARY_VOL_DESC].block) { + printk(KERN_ERR "udf: Primary Volume Descriptor not found!\n"); + return 1; + } + if (udf_load_pvoldesc(sb, vds[VDS_POS_PRIMARY_VOL_DESC].block)) + return 1; + + if (vds[VDS_POS_LOGICAL_VOL_DESC].block && udf_load_logicalvol(sb, + vds[VDS_POS_LOGICAL_VOL_DESC].block, fileset)) + return 1; + + if (vds[VDS_POS_PARTITION_DESC].block) { + /* + * We rescan the whole descriptor sequence to find + * partition descriptor blocks and process them. + */ + for (block = vds[VDS_POS_PARTITION_DESC].block; + block < vds[VDS_POS_TERMINATING_DESC].block; + block++) + if (udf_load_partdesc(sb, block)) + return 1; } return 0; @@ -1478,6 +1672,7 @@ static int udf_process_sequence(struct super_block *sb, long block, static int udf_check_valid(struct super_block *sb, int novrs, int silent) { long block; + struct udf_sb_info *sbi = UDF_SB(sb); if (novrs) { udf_debug("Validity check skipped because of novrs option\n"); @@ -1485,27 +1680,22 @@ static int udf_check_valid(struct super_block *sb, int novrs, int silent) } /* Check that it is NSR02 compliant */ /* Process any "CD-ROM Volume Descriptor Set" (ECMA 167 2/8.3.1) */ - else { - block = udf_vrs(sb, silent); - if (block == -1) { - struct udf_sb_info *sbi = UDF_SB(sb); - udf_debug("Failed to read byte 32768. Assuming open " - "disc. Skipping validity check\n"); - if (!sbi->s_last_block) - sbi->s_last_block = udf_get_last_block(sb); - return 0; - } else - return !block; - } + block = udf_vrs(sb, silent); + if (block == -1) + udf_debug("Failed to read byte 32768. Assuming open " + "disc. Skipping validity check\n"); + if (block && !sbi->s_last_block) + sbi->s_last_block = udf_get_last_block(sb); + return !block; } -static int udf_load_partition(struct super_block *sb, kernel_lb_addr *fileset) +static int udf_load_sequence(struct super_block *sb, kernel_lb_addr *fileset) { struct anchorVolDescPtr *anchor; uint16_t ident; struct buffer_head *bh; long main_s, main_e, reserve_s, reserve_e; - int i, j; + int i; struct udf_sb_info *sbi; if (!sb) @@ -1515,6 +1705,7 @@ static int udf_load_partition(struct super_block *sb, kernel_lb_addr *fileset) for (i = 0; i < ARRAY_SIZE(sbi->s_anchor); i++) { if (!sbi->s_anchor[i]) continue; + bh = udf_read_tagged(sb, sbi->s_anchor[i], sbi->s_anchor[i], &ident); if (!bh) @@ -1553,76 +1744,6 @@ static int udf_load_partition(struct super_block *sb, kernel_lb_addr *fileset) } udf_debug("Using anchor in block %d\n", sbi->s_anchor[i]); - for (i = 0; i < sbi->s_partitions; i++) { - kernel_lb_addr uninitialized_var(ino); - struct udf_part_map *map = &sbi->s_partmaps[i]; - switch (map->s_partition_type) { - case UDF_VIRTUAL_MAP15: - case UDF_VIRTUAL_MAP20: - if (!sbi->s_last_block) { - sbi->s_last_block = udf_get_last_block(sb); - udf_find_anchor(sb); - } - - if (!sbi->s_last_block) { - udf_debug("Unable to determine Lastblock (For " - "Virtual Partition)\n"); - return 1; - } - - for (j = 0; j < sbi->s_partitions; j++) { - struct udf_part_map *map2 = &sbi->s_partmaps[j]; - if (j != i && - map->s_volumeseqnum == - map2->s_volumeseqnum && - map->s_partition_num == - map2->s_partition_num) { - ino.partitionReferenceNum = j; - ino.logicalBlockNum = - sbi->s_last_block - - map2->s_partition_root; - break; - } - } - - if (j == sbi->s_partitions) - return 1; - - sbi->s_vat_inode = udf_iget(sb, ino); - if (!sbi->s_vat_inode) - return 1; - - if (map->s_partition_type == UDF_VIRTUAL_MAP15) { - map->s_type_specific.s_virtual.s_start_offset = - udf_ext0_offset(sbi->s_vat_inode); - map->s_type_specific.s_virtual.s_num_entries = - (sbi->s_vat_inode->i_size - 36) >> 2; - } else if (map->s_partition_type == UDF_VIRTUAL_MAP20) { - uint32_t pos; - struct virtualAllocationTable20 *vat20; - - pos = udf_block_map(sbi->s_vat_inode, 0); - bh = sb_bread(sb, pos); - if (!bh) - return 1; - vat20 = (struct virtualAllocationTable20 *) - bh->b_data + - udf_ext0_offset(sbi->s_vat_inode); - map->s_type_specific.s_virtual.s_start_offset = - le16_to_cpu(vat20->lengthHeader) + - udf_ext0_offset(sbi->s_vat_inode); - map->s_type_specific.s_virtual.s_num_entries = - (sbi->s_vat_inode->i_size - - map->s_type_specific.s_virtual. - s_start_offset) >> 2; - brelse(bh); - } - map->s_partition_root = udf_get_pblock(sb, 0, i, 0); - map->s_partition_len = - sbi->s_partmaps[ino.partitionReferenceNum]. - s_partition_len; - } - } return 0; } @@ -1630,65 +1751,61 @@ static void udf_open_lvid(struct super_block *sb) { struct udf_sb_info *sbi = UDF_SB(sb); struct buffer_head *bh = sbi->s_lvid_bh; - if (bh) { - kernel_timestamp cpu_time; - struct logicalVolIntegrityDesc *lvid = - (struct logicalVolIntegrityDesc *)bh->b_data; - struct logicalVolIntegrityDescImpUse *lvidiu = - udf_sb_lvidiu(sbi); + struct logicalVolIntegrityDesc *lvid; + struct logicalVolIntegrityDescImpUse *lvidiu; + if (!bh) + return; - lvidiu->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX; - lvidiu->impIdent.identSuffix[1] = UDF_OS_ID_LINUX; - if (udf_time_to_stamp(&cpu_time, CURRENT_TIME)) - lvid->recordingDateAndTime = cpu_to_lets(cpu_time); - lvid->integrityType = LVID_INTEGRITY_TYPE_OPEN; + lvid = (struct logicalVolIntegrityDesc *)bh->b_data; + lvidiu = udf_sb_lvidiu(sbi); - lvid->descTag.descCRC = cpu_to_le16( - udf_crc((char *)lvid + sizeof(tag), - le16_to_cpu(lvid->descTag.descCRCLength), - 0)); + lvidiu->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX; + lvidiu->impIdent.identSuffix[1] = UDF_OS_ID_LINUX; + udf_time_to_disk_stamp(&lvid->recordingDateAndTime, + CURRENT_TIME); + lvid->integrityType = LVID_INTEGRITY_TYPE_OPEN; - lvid->descTag.tagChecksum = udf_tag_checksum(&lvid->descTag); - mark_buffer_dirty(bh); - } + lvid->descTag.descCRC = cpu_to_le16( + crc_itu_t(0, (char *)lvid + sizeof(tag), + le16_to_cpu(lvid->descTag.descCRCLength))); + + lvid->descTag.tagChecksum = udf_tag_checksum(&lvid->descTag); + mark_buffer_dirty(bh); } static void udf_close_lvid(struct super_block *sb) { - kernel_timestamp cpu_time; struct udf_sb_info *sbi = UDF_SB(sb); struct buffer_head *bh = sbi->s_lvid_bh; struct logicalVolIntegrityDesc *lvid; + struct logicalVolIntegrityDescImpUse *lvidiu; if (!bh) return; lvid = (struct logicalVolIntegrityDesc *)bh->b_data; - if (lvid->integrityType == LVID_INTEGRITY_TYPE_OPEN) { - struct logicalVolIntegrityDescImpUse *lvidiu = - udf_sb_lvidiu(sbi); - lvidiu->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX; - lvidiu->impIdent.identSuffix[1] = UDF_OS_ID_LINUX; - if (udf_time_to_stamp(&cpu_time, CURRENT_TIME)) - lvid->recordingDateAndTime = cpu_to_lets(cpu_time); - if (UDF_MAX_WRITE_VERSION > le16_to_cpu(lvidiu->maxUDFWriteRev)) - lvidiu->maxUDFWriteRev = - cpu_to_le16(UDF_MAX_WRITE_VERSION); - if (sbi->s_udfrev > le16_to_cpu(lvidiu->minUDFReadRev)) - lvidiu->minUDFReadRev = cpu_to_le16(sbi->s_udfrev); - if (sbi->s_udfrev > le16_to_cpu(lvidiu->minUDFWriteRev)) - lvidiu->minUDFWriteRev = cpu_to_le16(sbi->s_udfrev); - lvid->integrityType = cpu_to_le32(LVID_INTEGRITY_TYPE_CLOSE); - - lvid->descTag.descCRC = cpu_to_le16( - udf_crc((char *)lvid + sizeof(tag), - le16_to_cpu(lvid->descTag.descCRCLength), - 0)); - - lvid->descTag.tagChecksum = udf_tag_checksum(&lvid->descTag); - mark_buffer_dirty(bh); - } + if (lvid->integrityType != LVID_INTEGRITY_TYPE_OPEN) + return; + + lvidiu = udf_sb_lvidiu(sbi); + lvidiu->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX; + lvidiu->impIdent.identSuffix[1] = UDF_OS_ID_LINUX; + udf_time_to_disk_stamp(&lvid->recordingDateAndTime, CURRENT_TIME); + if (UDF_MAX_WRITE_VERSION > le16_to_cpu(lvidiu->maxUDFWriteRev)) + lvidiu->maxUDFWriteRev = cpu_to_le16(UDF_MAX_WRITE_VERSION); + if (sbi->s_udfrev > le16_to_cpu(lvidiu->minUDFReadRev)) + lvidiu->minUDFReadRev = cpu_to_le16(sbi->s_udfrev); + if (sbi->s_udfrev > le16_to_cpu(lvidiu->minUDFWriteRev)) + lvidiu->minUDFWriteRev = cpu_to_le16(sbi->s_udfrev); + lvid->integrityType = cpu_to_le32(LVID_INTEGRITY_TYPE_CLOSE); + + lvid->descTag.descCRC = cpu_to_le16( + crc_itu_t(0, (char *)lvid + sizeof(tag), + le16_to_cpu(lvid->descTag.descCRCLength))); + + lvid->descTag.tagChecksum = udf_tag_checksum(&lvid->descTag); + mark_buffer_dirty(bh); } static void udf_sb_free_bitmap(struct udf_bitmap *bitmap) @@ -1708,22 +1825,35 @@ static void udf_sb_free_bitmap(struct udf_bitmap *bitmap) vfree(bitmap); } -/* - * udf_read_super - * - * PURPOSE - * Complete the specified super block. - * - * PRE-CONDITIONS - * sb Pointer to superblock to complete - never NULL. - * sb->s_dev Device to read suberblock from. - * options Pointer to mount options. - * silent Silent flag. - * - * HISTORY - * July 1, 1997 - Andrew E. Mileski - * Written, tested, and released. - */ +static void udf_free_partition(struct udf_part_map *map) +{ + int i; + struct udf_meta_data *mdata; + + if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_TABLE) + iput(map->s_uspace.s_table); + if (map->s_partition_flags & UDF_PART_FLAG_FREED_TABLE) + iput(map->s_fspace.s_table); + if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_BITMAP) + udf_sb_free_bitmap(map->s_uspace.s_bitmap); + if (map->s_partition_flags & UDF_PART_FLAG_FREED_BITMAP) + udf_sb_free_bitmap(map->s_fspace.s_bitmap); + if (map->s_partition_type == UDF_SPARABLE_MAP15) + for (i = 0; i < 4; i++) + brelse(map->s_type_specific.s_sparing.s_spar_map[i]); + else if (map->s_partition_type == UDF_METADATA_MAP25) { + mdata = &map->s_type_specific.s_metadata; + iput(mdata->s_metadata_fe); + mdata->s_metadata_fe = NULL; + + iput(mdata->s_mirror_fe); + mdata->s_mirror_fe = NULL; + + iput(mdata->s_bitmap_fe); + mdata->s_bitmap_fe = NULL; + } +} + static int udf_fill_super(struct super_block *sb, void *options, int silent) { int i; @@ -1776,8 +1906,11 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) sbi->s_nls_map = uopt.nls_map; /* Set the block size for all transfers */ - if (!udf_set_blocksize(sb, uopt.blocksize)) + if (!sb_min_blocksize(sb, uopt.blocksize)) { + udf_debug("Bad block size (%d)\n", uopt.blocksize); + printk(KERN_ERR "udf: bad block size (%d)\n", uopt.blocksize); goto error_out; + } if (uopt.session == 0xFFFFFFFF) sbi->s_session = udf_get_last_session(sb); @@ -1789,7 +1922,6 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) sbi->s_last_block = uopt.lastblock; sbi->s_anchor[0] = sbi->s_anchor[1] = 0; sbi->s_anchor[2] = uopt.anchor; - sbi->s_anchor[3] = 256; if (udf_check_valid(sb, uopt.novrs, silent)) { /* read volume recognition sequences */ @@ -1806,7 +1938,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) sb->s_magic = UDF_SUPER_MAGIC; sb->s_time_gran = 1000; - if (udf_load_partition(sb, &fileset)) { + if (udf_load_sequence(sb, &fileset)) { printk(KERN_WARNING "UDF-fs: No partition found (1)\n"); goto error_out; } @@ -1856,12 +1988,12 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) } if (!silent) { - kernel_timestamp ts; - udf_time_to_stamp(&ts, sbi->s_record_time); + timestamp ts; + udf_time_to_disk_stamp(&ts, sbi->s_record_time); udf_info("UDF: Mounting volume '%s', " "timestamp %04u/%02u/%02u %02u:%02u (%x)\n", - sbi->s_volume_ident, ts.year, ts.month, ts.day, - ts.hour, ts.minute, ts.typeAndTimezone); + sbi->s_volume_ident, le16_to_cpu(ts.year), ts.month, ts.day, + ts.hour, ts.minute, le16_to_cpu(ts.typeAndTimezone)); } if (!(sb->s_flags & MS_RDONLY)) udf_open_lvid(sb); @@ -1890,21 +2022,9 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) error_out: if (sbi->s_vat_inode) iput(sbi->s_vat_inode); - if (sbi->s_partitions) { - struct udf_part_map *map = &sbi->s_partmaps[sbi->s_partition]; - if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_TABLE) - iput(map->s_uspace.s_table); - if (map->s_partition_flags & UDF_PART_FLAG_FREED_TABLE) - iput(map->s_fspace.s_table); - if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_BITMAP) - udf_sb_free_bitmap(map->s_uspace.s_bitmap); - if (map->s_partition_flags & UDF_PART_FLAG_FREED_BITMAP) - udf_sb_free_bitmap(map->s_fspace.s_bitmap); - if (map->s_partition_type == UDF_SPARABLE_MAP15) - for (i = 0; i < 4; i++) - brelse(map->s_type_specific.s_sparing. - s_spar_map[i]); - } + if (sbi->s_partitions) + for (i = 0; i < sbi->s_partitions; i++) + udf_free_partition(&sbi->s_partmaps[i]); #ifdef CONFIG_UDF_NLS if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) unload_nls(sbi->s_nls_map); @@ -1920,8 +2040,8 @@ error_out: return -EINVAL; } -void udf_error(struct super_block *sb, const char *function, - const char *fmt, ...) +static void udf_error(struct super_block *sb, const char *function, + const char *fmt, ...) { va_list args; @@ -1948,19 +2068,6 @@ void udf_warning(struct super_block *sb, const char *function, sb->s_id, function, error_buf); } -/* - * udf_put_super - * - * PURPOSE - * Prepare for destruction of the superblock. - * - * DESCRIPTION - * Called before the filesystem is unmounted. - * - * HISTORY - * July 1, 1997 - Andrew E. Mileski - * Written, tested, and released. - */ static void udf_put_super(struct super_block *sb) { int i; @@ -1969,21 +2076,9 @@ static void udf_put_super(struct super_block *sb) sbi = UDF_SB(sb); if (sbi->s_vat_inode) iput(sbi->s_vat_inode); - if (sbi->s_partitions) { - struct udf_part_map *map = &sbi->s_partmaps[sbi->s_partition]; - if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_TABLE) - iput(map->s_uspace.s_table); - if (map->s_partition_flags & UDF_PART_FLAG_FREED_TABLE) - iput(map->s_fspace.s_table); - if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_BITMAP) - udf_sb_free_bitmap(map->s_uspace.s_bitmap); - if (map->s_partition_flags & UDF_PART_FLAG_FREED_BITMAP) - udf_sb_free_bitmap(map->s_fspace.s_bitmap); - if (map->s_partition_type == UDF_SPARABLE_MAP15) - for (i = 0; i < 4; i++) - brelse(map->s_type_specific.s_sparing. - s_spar_map[i]); - } + if (sbi->s_partitions) + for (i = 0; i < sbi->s_partitions; i++) + udf_free_partition(&sbi->s_partmaps[i]); #ifdef CONFIG_UDF_NLS if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) unload_nls(sbi->s_nls_map); @@ -1996,19 +2091,6 @@ static void udf_put_super(struct super_block *sb) sb->s_fs_info = NULL; } -/* - * udf_stat_fs - * - * PURPOSE - * Return info about the filesystem. - * - * DESCRIPTION - * Called by sys_statfs() - * - * HISTORY - * July 1, 1997 - Andrew E. Mileski - * Written, tested, and released. - */ static int udf_statfs(struct dentry *dentry, struct kstatfs *buf) { struct super_block *sb = dentry->d_sb; @@ -2035,10 +2117,6 @@ static int udf_statfs(struct dentry *dentry, struct kstatfs *buf) return 0; } -static unsigned char udf_bitmap_lookup[16] = { - 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4 -}; - static unsigned int udf_count_free_bitmap(struct super_block *sb, struct udf_bitmap *bitmap) { @@ -2048,7 +2126,6 @@ static unsigned int udf_count_free_bitmap(struct super_block *sb, int block = 0, newblock; kernel_lb_addr loc; uint32_t bytes; - uint8_t value; uint8_t *ptr; uint16_t ident; struct spaceBitmapDesc *bm; @@ -2074,13 +2151,10 @@ static unsigned int udf_count_free_bitmap(struct super_block *sb, ptr = (uint8_t *)bh->b_data; while (bytes > 0) { - while ((bytes > 0) && (index < sb->s_blocksize)) { - value = ptr[index]; - accum += udf_bitmap_lookup[value & 0x0f]; - accum += udf_bitmap_lookup[value >> 4]; - index++; - bytes--; - } + u32 cur_bytes = min_t(u32, bytes, sb->s_blocksize - index); + accum += bitmap_weight((const unsigned long *)(ptr + index), + cur_bytes * 8); + bytes -= cur_bytes; if (bytes) { brelse(bh); newblock = udf_get_lb_pblock(sb, loc, ++block); diff --git a/fs/udf/symlink.c b/fs/udf/symlink.c index 6ec99221e50c..c3265e1385d4 100644 --- a/fs/udf/symlink.c +++ b/fs/udf/symlink.c @@ -23,7 +23,6 @@ #include <asm/uaccess.h> #include <linux/errno.h> #include <linux/fs.h> -#include <linux/udf_fs.h> #include <linux/time.h> #include <linux/mm.h> #include <linux/stat.h> diff --git a/fs/udf/truncate.c b/fs/udf/truncate.c index fe61be17cdab..65e19b4f9424 100644 --- a/fs/udf/truncate.c +++ b/fs/udf/truncate.c @@ -22,7 +22,6 @@ #include "udfdecl.h" #include <linux/fs.h> #include <linux/mm.h> -#include <linux/udf_fs.h> #include <linux/buffer_head.h> #include "udf_i.h" @@ -180,6 +179,24 @@ void udf_discard_prealloc(struct inode *inode) brelse(epos.bh); } +static void udf_update_alloc_ext_desc(struct inode *inode, + struct extent_position *epos, + u32 lenalloc) +{ + struct super_block *sb = inode->i_sb; + struct udf_sb_info *sbi = UDF_SB(sb); + + struct allocExtDesc *aed = (struct allocExtDesc *) (epos->bh->b_data); + int len = sizeof(struct allocExtDesc); + + aed->lengthAllocDescs = cpu_to_le32(lenalloc); + if (!UDF_QUERY_FLAG(sb, UDF_FLAG_STRICT) || sbi->s_udfrev >= 0x0201) + len += lenalloc; + + udf_update_tag(epos->bh->b_data, len); + mark_buffer_dirty_inode(epos->bh, inode); +} + void udf_truncate_extents(struct inode *inode) { struct extent_position epos; @@ -187,7 +204,6 @@ void udf_truncate_extents(struct inode *inode) uint32_t elen, nelen = 0, indirect_ext_len = 0, lenalloc; int8_t etype; struct super_block *sb = inode->i_sb; - struct udf_sb_info *sbi = UDF_SB(sb); sector_t first_block = inode->i_size >> sb->s_blocksize_bits, offset; loff_t byte_offset; int adsize; @@ -224,35 +240,15 @@ void udf_truncate_extents(struct inode *inode) if (indirect_ext_len) { /* We managed to free all extents in the * indirect extent - free it too */ - if (!epos.bh) - BUG(); + BUG_ON(!epos.bh); udf_free_blocks(sb, inode, epos.block, 0, indirect_ext_len); - } else { - if (!epos.bh) { - iinfo->i_lenAlloc = - lenalloc; - mark_inode_dirty(inode); - } else { - struct allocExtDesc *aed = - (struct allocExtDesc *) - (epos.bh->b_data); - int len = - sizeof(struct allocExtDesc); - - aed->lengthAllocDescs = - cpu_to_le32(lenalloc); - if (!UDF_QUERY_FLAG(sb, - UDF_FLAG_STRICT) || - sbi->s_udfrev >= 0x0201) - len += lenalloc; - - udf_update_tag(epos.bh->b_data, - len); - mark_buffer_dirty_inode( - epos.bh, inode); - } - } + } else if (!epos.bh) { + iinfo->i_lenAlloc = lenalloc; + mark_inode_dirty(inode); + } else + udf_update_alloc_ext_desc(inode, + &epos, lenalloc); brelse(epos.bh); epos.offset = sizeof(struct allocExtDesc); epos.block = eloc; @@ -272,29 +268,14 @@ void udf_truncate_extents(struct inode *inode) } if (indirect_ext_len) { - if (!epos.bh) - BUG(); + BUG_ON(!epos.bh); udf_free_blocks(sb, inode, epos.block, 0, indirect_ext_len); - } else { - if (!epos.bh) { - iinfo->i_lenAlloc = lenalloc; - mark_inode_dirty(inode); - } else { - struct allocExtDesc *aed = - (struct allocExtDesc *)(epos.bh->b_data); - aed->lengthAllocDescs = cpu_to_le32(lenalloc); - if (!UDF_QUERY_FLAG(sb, UDF_FLAG_STRICT) || - sbi->s_udfrev >= 0x0201) - udf_update_tag(epos.bh->b_data, - lenalloc + - sizeof(struct allocExtDesc)); - else - udf_update_tag(epos.bh->b_data, - sizeof(struct allocExtDesc)); - mark_buffer_dirty_inode(epos.bh, inode); - } - } + } else if (!epos.bh) { + iinfo->i_lenAlloc = lenalloc; + mark_inode_dirty(inode); + } else + udf_update_alloc_ext_desc(inode, &epos, lenalloc); } else if (inode->i_size) { if (byte_offset) { kernel_long_ad extent; diff --git a/fs/udf/udf_i.h b/fs/udf/udf_i.h index ccc52f16bf7d..4f86b1d98a5d 100644 --- a/fs/udf/udf_i.h +++ b/fs/udf/udf_i.h @@ -1,10 +1,32 @@ -#ifndef __LINUX_UDF_I_H -#define __LINUX_UDF_I_H +#ifndef _UDF_I_H +#define _UDF_I_H + +struct udf_inode_info { + struct timespec i_crtime; + /* Physical address of inode */ + kernel_lb_addr i_location; + __u64 i_unique; + __u32 i_lenEAttr; + __u32 i_lenAlloc; + __u64 i_lenExtents; + __u32 i_next_alloc_block; + __u32 i_next_alloc_goal; + unsigned i_alloc_type : 3; + unsigned i_efe : 1; /* extendedFileEntry */ + unsigned i_use : 1; /* unallocSpaceEntry */ + unsigned i_strat4096 : 1; + unsigned reserved : 26; + union { + short_ad *i_sad; + long_ad *i_lad; + __u8 *i_data; + } i_ext; + struct inode vfs_inode; +}; -#include <linux/udf_fs_i.h> static inline struct udf_inode_info *UDF_I(struct inode *inode) { return list_entry(inode, struct udf_inode_info, vfs_inode); } -#endif /* !defined(_LINUX_UDF_I_H) */ +#endif /* _UDF_I_H) */ diff --git a/fs/udf/udf_sb.h b/fs/udf/udf_sb.h index 737d1c604eea..1c1c514a9725 100644 --- a/fs/udf/udf_sb.h +++ b/fs/udf/udf_sb.h @@ -1,10 +1,12 @@ #ifndef __LINUX_UDF_SB_H #define __LINUX_UDF_SB_H +#include <linux/mutex.h> + /* Since UDF 2.01 is ISO 13346 based... */ #define UDF_SUPER_MAGIC 0x15013346 -#define UDF_MAX_READ_VERSION 0x0201 +#define UDF_MAX_READ_VERSION 0x0250 #define UDF_MAX_WRITE_VERSION 0x0201 #define UDF_FLAG_USE_EXTENDED_FE 0 @@ -38,6 +40,111 @@ #define UDF_PART_FLAG_REWRITABLE 0x0040 #define UDF_PART_FLAG_OVERWRITABLE 0x0080 +#define UDF_MAX_BLOCK_LOADED 8 + +#define UDF_TYPE1_MAP15 0x1511U +#define UDF_VIRTUAL_MAP15 0x1512U +#define UDF_VIRTUAL_MAP20 0x2012U +#define UDF_SPARABLE_MAP15 0x1522U +#define UDF_METADATA_MAP25 0x2511U + +#pragma pack(1) /* XXX(hch): Why? This file just defines in-core structures */ + +struct udf_meta_data { + __u32 s_meta_file_loc; + __u32 s_mirror_file_loc; + __u32 s_bitmap_file_loc; + __u32 s_alloc_unit_size; + __u16 s_align_unit_size; + __u8 s_dup_md_flag; + struct inode *s_metadata_fe; + struct inode *s_mirror_fe; + struct inode *s_bitmap_fe; +}; + +struct udf_sparing_data { + __u16 s_packet_len; + struct buffer_head *s_spar_map[4]; +}; + +struct udf_virtual_data { + __u32 s_num_entries; + __u16 s_start_offset; +}; + +struct udf_bitmap { + __u32 s_extLength; + __u32 s_extPosition; + __u16 s_nr_groups; + struct buffer_head **s_block_bitmap; +}; + +struct udf_part_map { + union { + struct udf_bitmap *s_bitmap; + struct inode *s_table; + } s_uspace; + union { + struct udf_bitmap *s_bitmap; + struct inode *s_table; + } s_fspace; + __u32 s_partition_root; + __u32 s_partition_len; + __u16 s_partition_type; + __u16 s_partition_num; + union { + struct udf_sparing_data s_sparing; + struct udf_virtual_data s_virtual; + struct udf_meta_data s_metadata; + } s_type_specific; + __u32 (*s_partition_func)(struct super_block *, __u32, __u16, __u32); + __u16 s_volumeseqnum; + __u16 s_partition_flags; +}; + +#pragma pack() + +struct udf_sb_info { + struct udf_part_map *s_partmaps; + __u8 s_volume_ident[32]; + + /* Overall info */ + __u16 s_partitions; + __u16 s_partition; + + /* Sector headers */ + __s32 s_session; + __u32 s_anchor[3]; + __u32 s_last_block; + + struct buffer_head *s_lvid_bh; + + /* Default permissions */ + mode_t s_umask; + gid_t s_gid; + uid_t s_uid; + + /* Root Info */ + struct timespec s_record_time; + + /* Fileset Info */ + __u16 s_serial_number; + + /* highest UDF revision we have recorded to this media */ + __u16 s_udfrev; + + /* Miscellaneous flags */ + __u32 s_flags; + + /* Encoding info */ + struct nls_table *s_nls_map; + + /* VAT inode */ + struct inode *s_vat_inode; + + struct mutex s_alloc_mutex; +}; + static inline struct udf_sb_info *UDF_SB(struct super_block *sb) { return sb->s_fs_info; diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h index 681dc2b66cdb..f3f45d029277 100644 --- a/fs/udf/udfdecl.h +++ b/fs/udf/udfdecl.h @@ -1,17 +1,37 @@ #ifndef __UDF_DECL_H #define __UDF_DECL_H -#include <linux/udf_fs.h> #include "ecma_167.h" #include "osta_udf.h" #include <linux/fs.h> #include <linux/types.h> -#include <linux/udf_fs_i.h> -#include <linux/udf_fs_sb.h> #include <linux/buffer_head.h> +#include <linux/udf_fs_i.h> +#include "udf_sb.h" #include "udfend.h" +#include "udf_i.h" + +#define UDF_PREALLOCATE +#define UDF_DEFAULT_PREALLOC_BLOCKS 8 + +#define UDFFS_DEBUG + +#ifdef UDFFS_DEBUG +#define udf_debug(f, a...) \ +do { \ + printk(KERN_DEBUG "UDF-fs DEBUG %s:%d:%s: ", \ + __FILE__, __LINE__, __func__); \ + printk(f, ##a); \ +} while (0) +#else +#define udf_debug(f, a...) /**/ +#endif + +#define udf_info(f, a...) \ + printk(KERN_INFO "UDF-fs INFO " f, ##a); + #define udf_fixed_to_variable(x) ( ( ( (x) >> 5 ) * 39 ) + ( (x) & 0x0000001F ) ) #define udf_variable_to_fixed(x) ( ( ( (x) / 39 ) << 5 ) + ( (x) % 39 ) ) @@ -23,16 +43,24 @@ #define UDF_NAME_LEN 256 #define UDF_PATH_LEN 1023 -#define udf_file_entry_alloc_offset(inode)\ - (UDF_I(inode)->i_use ?\ - sizeof(struct unallocSpaceEntry) :\ - ((UDF_I(inode)->i_efe ?\ - sizeof(struct extendedFileEntry) :\ - sizeof(struct fileEntry)) + UDF_I(inode)->i_lenEAttr)) - -#define udf_ext0_offset(inode)\ - (UDF_I(inode)->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB ?\ - udf_file_entry_alloc_offset(inode) : 0) +static inline size_t udf_file_entry_alloc_offset(struct inode *inode) +{ + struct udf_inode_info *iinfo = UDF_I(inode); + if (iinfo->i_use) + return sizeof(struct unallocSpaceEntry); + else if (iinfo->i_efe) + return sizeof(struct extendedFileEntry) + iinfo->i_lenEAttr; + else + return sizeof(struct fileEntry) + iinfo->i_lenEAttr; +} + +static inline size_t udf_ext0_offset(struct inode *inode) +{ + if (UDF_I(inode)->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) + return udf_file_entry_alloc_offset(inode); + else + return 0; +} #define udf_get_lb_pblock(sb,loc,offset) udf_get_pblock((sb), (loc).logicalBlockNum, (loc).partitionReferenceNum, (offset)) @@ -83,7 +111,6 @@ struct extent_position { }; /* super.c */ -extern void udf_error(struct super_block *, const char *, const char *, ...); extern void udf_warning(struct super_block *, const char *, const char *, ...); /* namei.c */ @@ -150,6 +177,8 @@ extern uint32_t udf_get_pblock_virt20(struct super_block *, uint32_t, uint16_t, uint32_t); extern uint32_t udf_get_pblock_spar15(struct super_block *, uint32_t, uint16_t, uint32_t); +extern uint32_t udf_get_pblock_meta25(struct super_block *, uint32_t, uint16_t, + uint32_t); extern int udf_relocate_blocks(struct super_block *, long, long *); /* unicode.c */ @@ -157,7 +186,7 @@ extern int udf_get_filename(struct super_block *, uint8_t *, uint8_t *, int); extern int udf_put_filename(struct super_block *, const uint8_t *, uint8_t *, int); extern int udf_build_ustr(struct ustr *, dstring *, int); -extern int udf_CS0toUTF8(struct ustr *, struct ustr *); +extern int udf_CS0toUTF8(struct ustr *, const struct ustr *); /* ialloc.c */ extern void udf_free_inode(struct inode *); @@ -191,11 +220,9 @@ extern struct fileIdentDesc *udf_get_fileident(void *buffer, int bufsize, extern long_ad *udf_get_filelongad(uint8_t *, int, uint32_t *, int); extern short_ad *udf_get_fileshortad(uint8_t *, int, uint32_t *, int); -/* crc.c */ -extern uint16_t udf_crc(uint8_t *, uint32_t, uint16_t); - /* udftime.c */ -extern time_t *udf_stamp_to_time(time_t *, long *, kernel_timestamp); -extern kernel_timestamp *udf_time_to_stamp(kernel_timestamp *, struct timespec); +extern struct timespec *udf_disk_stamp_to_time(struct timespec *dest, + timestamp src); +extern timestamp *udf_time_to_disk_stamp(timestamp *dest, struct timespec src); #endif /* __UDF_DECL_H */ diff --git a/fs/udf/udfend.h b/fs/udf/udfend.h index c4bd1203f857..489f52fb428c 100644 --- a/fs/udf/udfend.h +++ b/fs/udf/udfend.h @@ -24,17 +24,6 @@ static inline lb_addr cpu_to_lelb(kernel_lb_addr in) return out; } -static inline kernel_timestamp lets_to_cpu(timestamp in) -{ - kernel_timestamp out; - - memcpy(&out, &in, sizeof(timestamp)); - out.typeAndTimezone = le16_to_cpu(in.typeAndTimezone); - out.year = le16_to_cpu(in.year); - - return out; -} - static inline short_ad lesa_to_cpu(short_ad in) { short_ad out; @@ -85,15 +74,4 @@ static inline kernel_extent_ad leea_to_cpu(extent_ad in) return out; } -static inline timestamp cpu_to_lets(kernel_timestamp in) -{ - timestamp out; - - memcpy(&out, &in, sizeof(timestamp)); - out.typeAndTimezone = cpu_to_le16(in.typeAndTimezone); - out.year = cpu_to_le16(in.year); - - return out; -} - #endif /* __UDF_ENDIAN_H */ diff --git a/fs/udf/udftime.c b/fs/udf/udftime.c index ce595732ba6f..5f811655c9b5 100644 --- a/fs/udf/udftime.c +++ b/fs/udf/udftime.c @@ -85,39 +85,38 @@ extern struct timezone sys_tz; #define SECS_PER_HOUR (60 * 60) #define SECS_PER_DAY (SECS_PER_HOUR * 24) -time_t *udf_stamp_to_time(time_t *dest, long *dest_usec, kernel_timestamp src) +struct timespec *udf_disk_stamp_to_time(struct timespec *dest, timestamp src) { int yday; - uint8_t type = src.typeAndTimezone >> 12; + u16 typeAndTimezone = le16_to_cpu(src.typeAndTimezone); + u16 year = le16_to_cpu(src.year); + uint8_t type = typeAndTimezone >> 12; int16_t offset; if (type == 1) { - offset = src.typeAndTimezone << 4; + offset = typeAndTimezone << 4; /* sign extent offset */ offset = (offset >> 4); if (offset == -2047) /* unspecified offset */ offset = 0; - } else { + } else offset = 0; - } - if ((src.year < EPOCH_YEAR) || - (src.year >= EPOCH_YEAR + MAX_YEAR_SECONDS)) { - *dest = -1; - *dest_usec = -1; + if ((year < EPOCH_YEAR) || + (year >= EPOCH_YEAR + MAX_YEAR_SECONDS)) { return NULL; } - *dest = year_seconds[src.year - EPOCH_YEAR]; - *dest -= offset * 60; + dest->tv_sec = year_seconds[year - EPOCH_YEAR]; + dest->tv_sec -= offset * 60; - yday = ((__mon_yday[__isleap(src.year)][src.month - 1]) + src.day - 1); - *dest += (((yday * 24) + src.hour) * 60 + src.minute) * 60 + src.second; - *dest_usec = src.centiseconds * 10000 + - src.hundredsOfMicroseconds * 100 + src.microseconds; + yday = ((__mon_yday[__isleap(year)][src.month - 1]) + src.day - 1); + dest->tv_sec += (((yday * 24) + src.hour) * 60 + src.minute) * 60 + src.second; + dest->tv_nsec = 1000 * (src.centiseconds * 10000 + + src.hundredsOfMicroseconds * 100 + src.microseconds); return dest; } -kernel_timestamp *udf_time_to_stamp(kernel_timestamp *dest, struct timespec ts) +timestamp *udf_time_to_disk_stamp(timestamp *dest, struct timespec ts) { long int days, rem, y; const unsigned short int *ip; @@ -128,7 +127,7 @@ kernel_timestamp *udf_time_to_stamp(kernel_timestamp *dest, struct timespec ts) if (!dest) return NULL; - dest->typeAndTimezone = 0x1000 | (offset & 0x0FFF); + dest->typeAndTimezone = cpu_to_le16(0x1000 | (offset & 0x0FFF)); ts.tv_sec += offset * 60; days = ts.tv_sec / SECS_PER_DAY; @@ -151,7 +150,7 @@ kernel_timestamp *udf_time_to_stamp(kernel_timestamp *dest, struct timespec ts) - LEAPS_THRU_END_OF(y - 1)); y = yg; } - dest->year = y; + dest->year = cpu_to_le16(y); ip = __mon_yday[__isleap(y)]; for (y = 11; days < (long int)ip[y]; --y) continue; diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c index e533b11703bf..9fdf8c93c58e 100644 --- a/fs/udf/unicode.c +++ b/fs/udf/unicode.c @@ -23,7 +23,7 @@ #include <linux/kernel.h> #include <linux/string.h> /* for memset */ #include <linux/nls.h> -#include <linux/udf_fs.h> +#include <linux/crc-itu-t.h> #include "udf_sb.h" @@ -49,14 +49,16 @@ int udf_build_ustr(struct ustr *dest, dstring *ptr, int size) { int usesize; - if ((!dest) || (!ptr) || (!size)) + if (!dest || !ptr || !size) return -1; + BUG_ON(size < 2); - memset(dest, 0, sizeof(struct ustr)); - usesize = (size > UDF_NAME_LEN) ? UDF_NAME_LEN : size; + usesize = min_t(size_t, ptr[size - 1], sizeof(dest->u_name)); + usesize = min(usesize, size - 2); dest->u_cmpID = ptr[0]; - dest->u_len = ptr[size - 1]; - memcpy(dest->u_name, ptr + 1, usesize - 1); + dest->u_len = usesize; + memcpy(dest->u_name, ptr + 1, usesize); + memset(dest->u_name + usesize, 0, sizeof(dest->u_name) - usesize); return 0; } @@ -83,9 +85,6 @@ static int udf_build_ustr_exact(struct ustr *dest, dstring *ptr, int exactsize) * PURPOSE * Convert OSTA Compressed Unicode to the UTF-8 equivalent. * - * DESCRIPTION - * This routine is only called by udf_filldir(). - * * PRE-CONDITIONS * utf Pointer to UTF-8 output buffer. * ocu Pointer to OSTA Compressed Unicode input buffer @@ -99,43 +98,39 @@ static int udf_build_ustr_exact(struct ustr *dest, dstring *ptr, int exactsize) * November 12, 1997 - Andrew E. Mileski * Written, tested, and released. */ -int udf_CS0toUTF8(struct ustr *utf_o, struct ustr *ocu_i) +int udf_CS0toUTF8(struct ustr *utf_o, const struct ustr *ocu_i) { - uint8_t *ocu; - uint32_t c; + const uint8_t *ocu; uint8_t cmp_id, ocu_len; int i; - ocu = ocu_i->u_name; - ocu_len = ocu_i->u_len; - cmp_id = ocu_i->u_cmpID; - utf_o->u_len = 0; - if (ocu_len == 0) { memset(utf_o, 0, sizeof(struct ustr)); - utf_o->u_cmpID = 0; - utf_o->u_len = 0; return 0; } - if ((cmp_id != 8) && (cmp_id != 16)) { + cmp_id = ocu_i->u_cmpID; + if (cmp_id != 8 && cmp_id != 16) { + memset(utf_o, 0, sizeof(struct ustr)); printk(KERN_ERR "udf: unknown compression code (%d) stri=%s\n", cmp_id, ocu_i->u_name); return 0; } + ocu = ocu_i->u_name; + utf_o->u_len = 0; for (i = 0; (i < ocu_len) && (utf_o->u_len <= (UDF_NAME_LEN - 3));) { /* Expand OSTA compressed Unicode to Unicode */ - c = ocu[i++]; + uint32_t c = ocu[i++]; if (cmp_id == 16) c = (c << 8) | ocu[i++]; /* Compress Unicode to UTF-8 */ - if (c < 0x80U) { + if (c < 0x80U) utf_o->u_name[utf_o->u_len++] = (uint8_t)c; - } else if (c < 0x800U) { + else if (c < 0x800U) { utf_o->u_name[utf_o->u_len++] = (uint8_t)(0xc0 | (c >> 6)); utf_o->u_name[utf_o->u_len++] = @@ -255,35 +250,32 @@ error_out: } static int udf_CS0toNLS(struct nls_table *nls, struct ustr *utf_o, - struct ustr *ocu_i) + const struct ustr *ocu_i) { - uint8_t *ocu; - uint32_t c; + const uint8_t *ocu; uint8_t cmp_id, ocu_len; int i; - ocu = ocu_i->u_name; ocu_len = ocu_i->u_len; - cmp_id = ocu_i->u_cmpID; - utf_o->u_len = 0; - if (ocu_len == 0) { memset(utf_o, 0, sizeof(struct ustr)); - utf_o->u_cmpID = 0; - utf_o->u_len = 0; return 0; } - if ((cmp_id != 8) && (cmp_id != 16)) { + cmp_id = ocu_i->u_cmpID; + if (cmp_id != 8 && cmp_id != 16) { + memset(utf_o, 0, sizeof(struct ustr)); printk(KERN_ERR "udf: unknown compression code (%d) stri=%s\n", cmp_id, ocu_i->u_name); return 0; } + ocu = ocu_i->u_name; + utf_o->u_len = 0; for (i = 0; (i < ocu_len) && (utf_o->u_len <= (UDF_NAME_LEN - 3));) { /* Expand OSTA compressed Unicode to Unicode */ - c = ocu[i++]; + uint32_t c = ocu[i++]; if (cmp_id == 16) c = (c << 8) | ocu[i++]; @@ -463,7 +455,7 @@ static int udf_translate_to_linux(uint8_t *newName, uint8_t *udfName, } else if (newIndex > 250) newIndex = 250; newName[newIndex++] = CRC_MARK; - valueCRC = udf_crc(fidName, fidNameLen, 0); + valueCRC = crc_itu_t(0, fidName, fidNameLen); newName[newIndex++] = hexChar[(valueCRC & 0xf000) >> 12]; newName[newIndex++] = hexChar[(valueCRC & 0x0f00) >> 8]; newName[newIndex++] = hexChar[(valueCRC & 0x00f0) >> 4]; diff --git a/fs/utimes.c b/fs/utimes.c index b18da9c0b97f..a2bef77dc9c9 100644 --- a/fs/utimes.c +++ b/fs/utimes.c @@ -2,6 +2,7 @@ #include <linux/file.h> #include <linux/fs.h> #include <linux/linkage.h> +#include <linux/mount.h> #include <linux/namei.h> #include <linux/sched.h> #include <linux/stat.h> @@ -59,6 +60,7 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags struct inode *inode; struct iattr newattrs; struct file *f = NULL; + struct vfsmount *mnt; error = -EINVAL; if (times && (!nsec_valid(times[0].tv_nsec) || @@ -79,18 +81,20 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags if (!f) goto out; dentry = f->f_path.dentry; + mnt = f->f_path.mnt; } else { error = __user_walk_fd(dfd, filename, (flags & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW, &nd); if (error) goto out; dentry = nd.path.dentry; + mnt = nd.path.mnt; } inode = dentry->d_inode; - error = -EROFS; - if (IS_RDONLY(inode)) + error = mnt_want_write(mnt); + if (error) goto dput_and_out; /* Don't worry, the checks are done in inode_change_ok() */ @@ -98,7 +102,7 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags if (times) { error = -EPERM; if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) - goto dput_and_out; + goto mnt_drop_write_and_out; if (times[0].tv_nsec == UTIME_OMIT) newattrs.ia_valid &= ~ATTR_ATIME; @@ -118,22 +122,24 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags } else { error = -EACCES; if (IS_IMMUTABLE(inode)) - goto dput_and_out; + goto mnt_drop_write_and_out; if (!is_owner_or_cap(inode)) { if (f) { if (!(f->f_mode & FMODE_WRITE)) - goto dput_and_out; + goto mnt_drop_write_and_out; } else { error = vfs_permission(&nd, MAY_WRITE); if (error) - goto dput_and_out; + goto mnt_drop_write_and_out; } } } mutex_lock(&inode->i_mutex); error = notify_change(dentry, &newattrs); mutex_unlock(&inode->i_mutex); +mnt_drop_write_and_out: + mnt_drop_write(mnt); dput_and_out: if (f) fput(f); diff --git a/fs/xattr.c b/fs/xattr.c index 3acab1615460..89a942f07e1b 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -11,6 +11,7 @@ #include <linux/slab.h> #include <linux/file.h> #include <linux/xattr.h> +#include <linux/mount.h> #include <linux/namei.h> #include <linux/security.h> #include <linux/syscalls.h> @@ -32,8 +33,6 @@ xattr_permission(struct inode *inode, const char *name, int mask) * filesystem or on an immutable / append-only inode. */ if (mask & MAY_WRITE) { - if (IS_RDONLY(inode)) - return -EROFS; if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) return -EPERM; } @@ -262,7 +261,11 @@ sys_setxattr(char __user *path, char __user *name, void __user *value, error = user_path_walk(path, &nd); if (error) return error; - error = setxattr(nd.path.dentry, name, value, size, flags); + error = mnt_want_write(nd.path.mnt); + if (!error) { + error = setxattr(nd.path.dentry, name, value, size, flags); + mnt_drop_write(nd.path.mnt); + } path_put(&nd.path); return error; } @@ -277,7 +280,11 @@ sys_lsetxattr(char __user *path, char __user *name, void __user *value, error = user_path_walk_link(path, &nd); if (error) return error; - error = setxattr(nd.path.dentry, name, value, size, flags); + error = mnt_want_write(nd.path.mnt); + if (!error) { + error = setxattr(nd.path.dentry, name, value, size, flags); + mnt_drop_write(nd.path.mnt); + } path_put(&nd.path); return error; } @@ -295,7 +302,11 @@ sys_fsetxattr(int fd, char __user *name, void __user *value, return error; dentry = f->f_path.dentry; audit_inode(NULL, dentry); - error = setxattr(dentry, name, value, size, flags); + error = mnt_want_write(f->f_path.mnt); + if (!error) { + error = setxattr(dentry, name, value, size, flags); + mnt_drop_write(f->f_path.mnt); + } fput(f); return error; } @@ -482,7 +493,11 @@ sys_removexattr(char __user *path, char __user *name) error = user_path_walk(path, &nd); if (error) return error; - error = removexattr(nd.path.dentry, name); + error = mnt_want_write(nd.path.mnt); + if (!error) { + error = removexattr(nd.path.dentry, name); + mnt_drop_write(nd.path.mnt); + } path_put(&nd.path); return error; } @@ -496,7 +511,11 @@ sys_lremovexattr(char __user *path, char __user *name) error = user_path_walk_link(path, &nd); if (error) return error; - error = removexattr(nd.path.dentry, name); + error = mnt_want_write(nd.path.mnt); + if (!error) { + error = removexattr(nd.path.dentry, name); + mnt_drop_write(nd.path.mnt); + } path_put(&nd.path); return error; } @@ -513,7 +532,11 @@ sys_fremovexattr(int fd, char __user *name) return error; dentry = f->f_path.dentry; audit_inode(NULL, dentry); - error = removexattr(dentry, name); + error = mnt_want_write(f->f_path.mnt); + if (!error) { + error = removexattr(dentry, name); + mnt_drop_write(f->f_path.mnt); + } fput(f); return error; } diff --git a/fs/xfs/linux-2.6/sema.h b/fs/xfs/linux-2.6/sema.h index 2009e6d922ce..3abe7e9ceb33 100644 --- a/fs/xfs/linux-2.6/sema.h +++ b/fs/xfs/linux-2.6/sema.h @@ -20,8 +20,8 @@ #include <linux/time.h> #include <linux/wait.h> +#include <linux/semaphore.h> #include <asm/atomic.h> -#include <asm/semaphore.h> /* * sema_t structure just maps to struct semaphore in Linux kernel. diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index bf7759793856..4ddb86b73c6b 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c @@ -535,8 +535,6 @@ xfs_attrmulti_attr_set( char *kbuf; int error = EFAULT; - if (IS_RDONLY(inode)) - return -EROFS; if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) return EPERM; if (len > XATTR_SIZE_MAX) @@ -562,8 +560,6 @@ xfs_attrmulti_attr_remove( char *name, __uint32_t flags) { - if (IS_RDONLY(inode)) - return -EROFS; if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) return EPERM; return xfs_attr_remove(XFS_I(inode), name, flags); @@ -573,6 +569,7 @@ STATIC int xfs_attrmulti_by_handle( xfs_mount_t *mp, void __user *arg, + struct file *parfilp, struct inode *parinode) { int error; @@ -626,13 +623,21 @@ xfs_attrmulti_by_handle( &ops[i].am_length, ops[i].am_flags); break; case ATTR_OP_SET: + ops[i].am_error = mnt_want_write(parfilp->f_path.mnt); + if (ops[i].am_error) + break; ops[i].am_error = xfs_attrmulti_attr_set(inode, attr_name, ops[i].am_attrvalue, ops[i].am_length, ops[i].am_flags); + mnt_drop_write(parfilp->f_path.mnt); break; case ATTR_OP_REMOVE: + ops[i].am_error = mnt_want_write(parfilp->f_path.mnt); + if (ops[i].am_error) + break; ops[i].am_error = xfs_attrmulti_attr_remove(inode, attr_name, ops[i].am_flags); + mnt_drop_write(parfilp->f_path.mnt); break; default: ops[i].am_error = EINVAL; @@ -1133,7 +1138,7 @@ xfs_ioctl( return xfs_attrlist_by_handle(mp, arg, inode); case XFS_IOC_ATTRMULTI_BY_HANDLE: - return xfs_attrmulti_by_handle(mp, arg, inode); + return xfs_attrmulti_by_handle(mp, arg, filp, inode); case XFS_IOC_SWAPEXT: { error = xfs_swapext((struct xfs_swapext __user *)arg); diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index 0c958cf77758..a1237dad6430 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c @@ -155,13 +155,6 @@ xfs_ichgtime_fast( */ ASSERT((flags & XFS_ICHGTIME_ACC) == 0); - /* - * We're not supposed to change timestamps in readonly-mounted - * filesystems. Throw it away if anyone asks us. - */ - if (unlikely(IS_RDONLY(inode))) - return; - if (flags & XFS_ICHGTIME_MOD) { tvp = &inode->i_mtime; ip->i_d.di_mtime.t_sec = (__int32_t)tvp->tv_sec; diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c index 21c0dbc74093..1ebd8004469c 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.c +++ b/fs/xfs/linux-2.6/xfs_lrw.c @@ -51,6 +51,7 @@ #include "xfs_vnodeops.h" #include <linux/capability.h> +#include <linux/mount.h> #include <linux/writeback.h> @@ -670,10 +671,16 @@ start: if (new_size > xip->i_size) xip->i_new_size = new_size; - if (likely(!(ioflags & IO_INVIS))) { + /* + * We're not supposed to change timestamps in readonly-mounted + * filesystems. Throw it away if anyone asks us. + */ + if (likely(!(ioflags & IO_INVIS) && + !mnt_want_write(file->f_path.mnt))) { file_update_time(file); xfs_ichgtime_fast(xip, inode, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); + mnt_drop_write(file->f_path.mnt); } /* |