diff options
Diffstat (limited to 'fs')
62 files changed, 1301 insertions, 1292 deletions
@@ -167,10 +167,25 @@ static int __init aio_setup(void) } __initcall(aio_setup); +static void put_aio_ring_file(struct kioctx *ctx) +{ + struct file *aio_ring_file = ctx->aio_ring_file; + if (aio_ring_file) { + truncate_setsize(aio_ring_file->f_inode, 0); + + /* Prevent further access to the kioctx from migratepages */ + spin_lock(&aio_ring_file->f_inode->i_mapping->private_lock); + aio_ring_file->f_inode->i_mapping->private_data = NULL; + ctx->aio_ring_file = NULL; + spin_unlock(&aio_ring_file->f_inode->i_mapping->private_lock); + + fput(aio_ring_file); + } +} + static void aio_free_ring(struct kioctx *ctx) { int i; - struct file *aio_ring_file = ctx->aio_ring_file; for (i = 0; i < ctx->nr_pages; i++) { pr_debug("pid(%d) [%d] page->count=%d\n", current->pid, i, @@ -178,14 +193,10 @@ static void aio_free_ring(struct kioctx *ctx) put_page(ctx->ring_pages[i]); } + put_aio_ring_file(ctx); + if (ctx->ring_pages && ctx->ring_pages != ctx->internal_pages) kfree(ctx->ring_pages); - - if (aio_ring_file) { - truncate_setsize(aio_ring_file->f_inode, 0); - fput(aio_ring_file); - ctx->aio_ring_file = NULL; - } } static int aio_ring_mmap(struct file *file, struct vm_area_struct *vma) @@ -207,9 +218,8 @@ static int aio_set_page_dirty(struct page *page) static int aio_migratepage(struct address_space *mapping, struct page *new, struct page *old, enum migrate_mode mode) { - struct kioctx *ctx = mapping->private_data; + struct kioctx *ctx; unsigned long flags; - unsigned idx = old->index; int rc; /* Writeback must be complete */ @@ -224,10 +234,23 @@ static int aio_migratepage(struct address_space *mapping, struct page *new, get_page(new); - spin_lock_irqsave(&ctx->completion_lock, flags); - migrate_page_copy(new, old); - ctx->ring_pages[idx] = new; - spin_unlock_irqrestore(&ctx->completion_lock, flags); + /* We can potentially race against kioctx teardown here. Use the + * address_space's private data lock to protect the mapping's + * private_data. + */ + spin_lock(&mapping->private_lock); + ctx = mapping->private_data; + if (ctx) { + pgoff_t idx; + spin_lock_irqsave(&ctx->completion_lock, flags); + migrate_page_copy(new, old); + idx = old->index; + if (idx < (pgoff_t)ctx->nr_pages) + ctx->ring_pages[idx] = new; + spin_unlock_irqrestore(&ctx->completion_lock, flags); + } else + rc = -EBUSY; + spin_unlock(&mapping->private_lock); return rc; } @@ -617,8 +640,7 @@ out_freepcpu: out_freeref: free_percpu(ctx->users.pcpu_count); out_freectx: - if (ctx->aio_ring_file) - fput(ctx->aio_ring_file); + put_aio_ring_file(ctx); kmem_cache_free(kioctx_cachep, ctx); pr_debug("error allocating ioctx %d\n", err); return ERR_PTR(err); diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 58b7d14b08ee..08cc08f037a6 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c @@ -107,7 +107,8 @@ static void check_idle_worker(struct btrfs_worker_thread *worker) worker->idle = 1; /* the list may be empty if the worker is just starting */ - if (!list_empty(&worker->worker_list)) { + if (!list_empty(&worker->worker_list) && + !worker->workers->stopping) { list_move(&worker->worker_list, &worker->workers->idle_list); } @@ -127,7 +128,8 @@ static void check_busy_worker(struct btrfs_worker_thread *worker) spin_lock_irqsave(&worker->workers->lock, flags); worker->idle = 0; - if (!list_empty(&worker->worker_list)) { + if (!list_empty(&worker->worker_list) && + !worker->workers->stopping) { list_move_tail(&worker->worker_list, &worker->workers->worker_list); } @@ -412,6 +414,7 @@ void btrfs_stop_workers(struct btrfs_workers *workers) int can_stop; spin_lock_irq(&workers->lock); + workers->stopping = 1; list_splice_init(&workers->idle_list, &workers->worker_list); while (!list_empty(&workers->worker_list)) { cur = workers->worker_list.next; @@ -455,6 +458,7 @@ void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max, workers->ordered = 0; workers->atomic_start_pending = 0; workers->atomic_worker_start = async_helper; + workers->stopping = 0; } /* @@ -480,15 +484,19 @@ static int __btrfs_start_workers(struct btrfs_workers *workers) atomic_set(&worker->num_pending, 0); atomic_set(&worker->refs, 1); worker->workers = workers; - worker->task = kthread_run(worker_loop, worker, - "btrfs-%s-%d", workers->name, - workers->num_workers + 1); + worker->task = kthread_create(worker_loop, worker, + "btrfs-%s-%d", workers->name, + workers->num_workers + 1); if (IS_ERR(worker->task)) { ret = PTR_ERR(worker->task); - kfree(worker); goto fail; } + spin_lock_irq(&workers->lock); + if (workers->stopping) { + spin_unlock_irq(&workers->lock); + goto fail_kthread; + } list_add_tail(&worker->worker_list, &workers->idle_list); worker->idle = 1; workers->num_workers++; @@ -496,8 +504,13 @@ static int __btrfs_start_workers(struct btrfs_workers *workers) WARN_ON(workers->num_workers_starting < 0); spin_unlock_irq(&workers->lock); + wake_up_process(worker->task); return 0; + +fail_kthread: + kthread_stop(worker->task); fail: + kfree(worker); spin_lock_irq(&workers->lock); workers->num_workers_starting--; spin_unlock_irq(&workers->lock); diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h index 063698b90ce2..1f26792683ed 100644 --- a/fs/btrfs/async-thread.h +++ b/fs/btrfs/async-thread.h @@ -107,6 +107,8 @@ struct btrfs_workers { /* extra name for this worker, used for current->name */ char *name; + + int stopping; }; void btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work); diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 70681686e8dc..9efb94e95858 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -535,10 +535,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list); btrfs_rm_dev_replace_srcdev(fs_info, src_device); - if (src_device->bdev) { - /* zero out the old super */ - btrfs_scratch_superblock(src_device); - } + /* * this is again a consistent state where no dev_replace procedure * is running, the target device is part of the filesystem, the diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 4ae17ed13b32..62176ad89846 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1561,8 +1561,9 @@ int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info, return ret; } -struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, - struct btrfs_key *location) +struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info, + struct btrfs_key *location, + bool check_ref) { struct btrfs_root *root; int ret; @@ -1586,7 +1587,7 @@ struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, again: root = btrfs_lookup_fs_root(fs_info, location->objectid); if (root) { - if (btrfs_root_refs(&root->root_item) == 0) + if (check_ref && btrfs_root_refs(&root->root_item) == 0) return ERR_PTR(-ENOENT); return root; } @@ -1595,7 +1596,7 @@ again: if (IS_ERR(root)) return root; - if (btrfs_root_refs(&root->root_item) == 0) { + if (check_ref && btrfs_root_refs(&root->root_item) == 0) { ret = -ENOENT; goto fail; } diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index b71acd6e1e5b..5ce2a7da8b11 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -68,8 +68,17 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root, int btrfs_init_fs_root(struct btrfs_root *root); int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root); -struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, - struct btrfs_key *location); + +struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info, + struct btrfs_key *key, + bool check_ref); +static inline struct btrfs_root * +btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, + struct btrfs_key *location) +{ + return btrfs_get_fs_root(fs_info, location, true); +} + int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info); void btrfs_btree_balance_dirty(struct btrfs_root *root); void btrfs_btree_balance_dirty_nodelay(struct btrfs_root *root); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index c09a40db53db..51731b76900d 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -145,8 +145,16 @@ int __init extent_io_init(void) offsetof(struct btrfs_io_bio, bio)); if (!btrfs_bioset) goto free_buffer_cache; + + if (bioset_integrity_create(btrfs_bioset, BIO_POOL_SIZE)) + goto free_bioset; + return 0; +free_bioset: + bioset_free(btrfs_bioset); + btrfs_bioset = NULL; + free_buffer_cache: kmem_cache_destroy(extent_buffer_cache); extent_buffer_cache = NULL; @@ -1482,10 +1490,8 @@ static noinline u64 find_delalloc_range(struct extent_io_tree *tree, cur_start = state->end + 1; node = rb_next(node); total_bytes += state->end - state->start + 1; - if (total_bytes >= max_bytes) { - *end = *start + max_bytes - 1; + if (total_bytes >= max_bytes) break; - } if (!node) break; } @@ -1614,7 +1620,7 @@ again: *start = delalloc_start; *end = delalloc_end; free_extent_state(cached_state); - return found; + return 0; } /* @@ -1627,10 +1633,9 @@ again: /* * make sure to limit the number of pages we try to lock down - * if we're looping. */ - if (delalloc_end + 1 - delalloc_start > max_bytes && loops) - delalloc_end = delalloc_start + PAGE_CACHE_SIZE - 1; + if (delalloc_end + 1 - delalloc_start > max_bytes) + delalloc_end = delalloc_start + max_bytes - 1; /* step two, lock all the pages after the page that has start */ ret = lock_delalloc_pages(inode, locked_page, @@ -1641,8 +1646,7 @@ again: */ free_extent_state(cached_state); if (!loops) { - unsigned long offset = (*start) & (PAGE_CACHE_SIZE - 1); - max_bytes = PAGE_CACHE_SIZE - offset; + max_bytes = PAGE_CACHE_SIZE; loops = 1; goto again; } else { diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 22ebc13b6c99..51e3afa78354 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6437,6 +6437,7 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len, if (btrfs_extent_readonly(root, disk_bytenr)) goto out; + btrfs_release_path(path); /* * look for other files referencing this extent, if we @@ -7986,7 +7987,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, /* check for collisions, even if the name isn't there */ - ret = btrfs_check_dir_item_collision(root, new_dir->i_ino, + ret = btrfs_check_dir_item_collision(dest, new_dir->i_ino, new_dentry->d_name.name, new_dentry->d_name.len); diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index a5a26320503f..4a355726151e 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -588,7 +588,7 @@ static struct btrfs_root *read_fs_root(struct btrfs_fs_info *fs_info, else key.offset = (u64)-1; - return btrfs_read_fs_root_no_name(fs_info, &key); + return btrfs_get_fs_root(fs_info, &key, false); } #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 0b1f4ef8db98..ec71ea44d2b4 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -299,11 +299,6 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root) continue; } - if (btrfs_root_refs(&root->root_item) == 0) { - btrfs_add_dead_root(root); - continue; - } - err = btrfs_init_fs_root(root); if (err) { btrfs_free_fs_root(root); @@ -318,6 +313,9 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root) btrfs_free_fs_root(root); break; } + + if (btrfs_root_refs(&root->root_item) == 0) + btrfs_add_dead_root(root); } btrfs_free_path(path); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index e7a95356df83..8c81bdc1ef9b 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1838,11 +1838,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, assert_qgroups_uptodate(trans); update_super_roots(root); - if (!root->fs_info->log_root_recovering) { - btrfs_set_super_log_root(root->fs_info->super_copy, 0); - btrfs_set_super_log_root_level(root->fs_info->super_copy, 0); - } - + btrfs_set_super_log_root(root->fs_info->super_copy, 0); + btrfs_set_super_log_root_level(root->fs_info->super_copy, 0); memcpy(root->fs_info->super_for_commit, root->fs_info->super_copy, sizeof(*root->fs_info->super_copy)); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index a10645830223..043b215769c2 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1716,6 +1716,7 @@ void btrfs_rm_dev_replace_srcdev(struct btrfs_fs_info *fs_info, struct btrfs_device *srcdev) { WARN_ON(!mutex_is_locked(&fs_info->fs_devices->device_list_mutex)); + list_del_rcu(&srcdev->dev_list); list_del_rcu(&srcdev->dev_alloc_list); fs_info->fs_devices->num_devices--; @@ -1725,9 +1726,13 @@ void btrfs_rm_dev_replace_srcdev(struct btrfs_fs_info *fs_info, } if (srcdev->can_discard) fs_info->fs_devices->num_can_discard--; - if (srcdev->bdev) + if (srcdev->bdev) { fs_info->fs_devices->open_devices--; + /* zero out the old super */ + btrfs_scratch_superblock(srcdev); + } + call_rcu(&srcdev->rcu, free_device); } diff --git a/fs/buffer.c b/fs/buffer.c index 4d7433534f5c..6024877335ca 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1005,9 +1005,19 @@ grow_dev_page(struct block_device *bdev, sector_t block, struct buffer_head *bh; sector_t end_block; int ret = 0; /* Will call free_more_memory() */ + gfp_t gfp_mask; - page = find_or_create_page(inode->i_mapping, index, - (mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS)|__GFP_MOVABLE); + gfp_mask = mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS; + gfp_mask |= __GFP_MOVABLE; + /* + * XXX: __getblk_slow() can not really deal with failure and + * will endlessly loop on improvised global reclaim. Prefer + * looping in the allocator rather than here, at least that + * code knows what it's doing. + */ + gfp_mask |= __GFP_NOFAIL; + + page = find_or_create_page(inode->i_mapping, index, gfp_mask); if (!page) return ret; diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index a16b4e58bcc6..77fc5e181077 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -120,14 +120,16 @@ cifs_read_super(struct super_block *sb) { struct inode *inode; struct cifs_sb_info *cifs_sb; + struct cifs_tcon *tcon; int rc = 0; cifs_sb = CIFS_SB(sb); + tcon = cifs_sb_master_tcon(cifs_sb); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIXACL) sb->s_flags |= MS_POSIXACL; - if (cifs_sb_master_tcon(cifs_sb)->ses->capabilities & CAP_LARGE_FILES) + if (tcon->ses->capabilities & tcon->ses->server->vals->cap_large_files) sb->s_maxbytes = MAX_LFS_FILESIZE; else sb->s_maxbytes = MAX_NON_LFS; @@ -147,7 +149,7 @@ cifs_read_super(struct super_block *sb) goto out_no_root; } - if (cifs_sb_master_tcon(cifs_sb)->nocase) + if (tcon->nocase) sb->s_d_op = &cifs_ci_dentry_ops; else sb->s_d_op = &cifs_dentry_ops; diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index ea723a5e8226..6d0b07217ac9 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -132,5 +132,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); extern const struct export_operations cifs_export_ops; #endif /* CONFIG_CIFS_NFSD_EXPORT */ -#define CIFS_VERSION "2.01" +#define CIFS_VERSION "2.02" #endif /* _CIFSFS_H */ diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index cfa14c80ef3b..52b6f6c26bfc 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -547,9 +547,6 @@ struct TCP_Server_Info { unsigned int max_rw; /* maxRw specifies the maximum */ /* message size the server can send or receive for */ /* SMB_COM_WRITE_RAW or SMB_COM_READ_RAW. */ - unsigned int max_vcs; /* maximum number of smb sessions, at least - those that can be specified uniquely with - vcnumbers */ unsigned int capabilities; /* selective disabling of caps by smb sess */ int timeAdj; /* Adjust for difference in server time zone in sec */ __u64 CurrentMid; /* multiplex id - rotating counter */ @@ -715,7 +712,6 @@ struct cifs_ses { enum statusEnum status; unsigned overrideSecFlg; /* if non-zero override global sec flags */ __u16 ipc_tid; /* special tid for connection to IPC share */ - __u16 vcnum; char *serverOS; /* name of operating system underlying server */ char *serverNOS; /* name of network operating system of server */ char *serverDomain; /* security realm of server */ @@ -1272,6 +1268,7 @@ struct dfs_info3_param { #define CIFS_FATTR_DELETE_PENDING 0x2 #define CIFS_FATTR_NEED_REVAL 0x4 #define CIFS_FATTR_INO_COLLISION 0x8 +#define CIFS_FATTR_UNKNOWN_NLINK 0x10 struct cifs_fattr { u32 cf_flags; diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h index 948676db8e2e..08f9dfb1a894 100644 --- a/fs/cifs/cifspdu.h +++ b/fs/cifs/cifspdu.h @@ -1491,15 +1491,30 @@ struct file_notify_information { __u8 FileName[0]; } __attribute__((packed)); -struct reparse_data { - __u32 ReparseTag; - __u16 ReparseDataLength; +/* For IO_REPARSE_TAG_SYMLINK */ +struct reparse_symlink_data { + __le32 ReparseTag; + __le16 ReparseDataLength; __u16 Reserved; - __u16 SubstituteNameOffset; - __u16 SubstituteNameLength; - __u16 PrintNameOffset; - __u16 PrintNameLength; - __u32 Flags; + __le16 SubstituteNameOffset; + __le16 SubstituteNameLength; + __le16 PrintNameOffset; + __le16 PrintNameLength; + __le32 Flags; + char PathBuffer[0]; +} __attribute__((packed)); + +/* For IO_REPARSE_TAG_NFS */ +#define NFS_SPECFILE_LNK 0x00000000014B4E4C +#define NFS_SPECFILE_CHR 0x0000000000524843 +#define NFS_SPECFILE_BLK 0x00000000004B4C42 +#define NFS_SPECFILE_FIFO 0x000000004F464946 +#define NFS_SPECFILE_SOCK 0x000000004B434F53 +struct reparse_posix_data { + __le32 ReparseTag; + __le16 ReparseDataLength; + __u16 Reserved; + __le64 InodeType; /* LNK, FIFO, CHR etc. */ char PathBuffer[0]; } __attribute__((packed)); @@ -2652,26 +2667,7 @@ typedef struct file_xattr_info { } __attribute__((packed)) FILE_XATTR_INFO; /* extended attribute info level 0x205 */ - -/* flags for chattr command */ -#define EXT_SECURE_DELETE 0x00000001 /* EXT3_SECRM_FL */ -#define EXT_ENABLE_UNDELETE 0x00000002 /* EXT3_UNRM_FL */ -/* Reserved for compress file 0x4 */ -#define EXT_SYNCHRONOUS 0x00000008 /* EXT3_SYNC_FL */ -#define EXT_IMMUTABLE_FL 0x00000010 /* EXT3_IMMUTABLE_FL */ -#define EXT_OPEN_APPEND_ONLY 0x00000020 /* EXT3_APPEND_FL */ -#define EXT_DO_NOT_BACKUP 0x00000040 /* EXT3_NODUMP_FL */ -#define EXT_NO_UPDATE_ATIME 0x00000080 /* EXT3_NOATIME_FL */ -/* 0x100 through 0x800 reserved for compression flags and are GET-ONLY */ -#define EXT_HASH_TREE_INDEXED_DIR 0x00001000 /* GET-ONLY EXT3_INDEX_FL */ -/* 0x2000 reserved for IMAGIC_FL */ -#define EXT_JOURNAL_THIS_FILE 0x00004000 /* GET-ONLY EXT3_JOURNAL_DATA_FL */ -/* 0x8000 reserved for EXT3_NOTAIL_FL */ -#define EXT_SYNCHRONOUS_DIR 0x00010000 /* EXT3_DIRSYNC_FL */ -#define EXT_TOPDIR 0x00020000 /* EXT3_TOPDIR_FL */ - -#define EXT_SET_MASK 0x000300FF -#define EXT_GET_MASK 0x0003DFFF +/* flags for lsattr and chflags commands removed arein uapi/linux/fs.h */ typedef struct file_chattr_info { __le64 mask; /* list of all possible attribute bits */ diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index a3d74fea1623..ccd31ab815d4 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -463,7 +463,6 @@ decode_lanman_negprot_rsp(struct TCP_Server_Info *server, NEGOTIATE_RSP *pSMBr) cifs_max_pending); set_credits(server, server->maxReq); server->maxBuf = le16_to_cpu(rsp->MaxBufSize); - server->max_vcs = le16_to_cpu(rsp->MaxNumberVcs); /* even though we do not use raw we might as well set this accurately, in case we ever find a need for it */ if ((le16_to_cpu(rsp->RawMode) & RAW_ENABLE) == RAW_ENABLE) { @@ -3089,7 +3088,8 @@ CIFSSMBQuerySymLink(const unsigned int xid, struct cifs_tcon *tcon, bool is_unicode; unsigned int sub_len; char *sub_start; - struct reparse_data *reparse_buf; + struct reparse_symlink_data *reparse_buf; + struct reparse_posix_data *posix_buf; __u32 data_offset, data_count; char *end_of_smb; @@ -3138,20 +3138,47 @@ CIFSSMBQuerySymLink(const unsigned int xid, struct cifs_tcon *tcon, goto qreparse_out; } end_of_smb = 2 + get_bcc(&pSMBr->hdr) + (char *)&pSMBr->ByteCount; - reparse_buf = (struct reparse_data *) + reparse_buf = (struct reparse_symlink_data *) ((char *)&pSMBr->hdr.Protocol + data_offset); if ((char *)reparse_buf >= end_of_smb) { rc = -EIO; goto qreparse_out; } - if ((reparse_buf->PathBuffer + reparse_buf->PrintNameOffset + - reparse_buf->PrintNameLength) > end_of_smb) { + if (reparse_buf->ReparseTag == cpu_to_le32(IO_REPARSE_TAG_NFS)) { + cifs_dbg(FYI, "NFS style reparse tag\n"); + posix_buf = (struct reparse_posix_data *)reparse_buf; + + if (posix_buf->InodeType != cpu_to_le64(NFS_SPECFILE_LNK)) { + cifs_dbg(FYI, "unsupported file type 0x%llx\n", + le64_to_cpu(posix_buf->InodeType)); + rc = -EOPNOTSUPP; + goto qreparse_out; + } + is_unicode = true; + sub_len = le16_to_cpu(reparse_buf->ReparseDataLength); + if (posix_buf->PathBuffer + sub_len > end_of_smb) { + cifs_dbg(FYI, "reparse buf beyond SMB\n"); + rc = -EIO; + goto qreparse_out; + } + *symlinkinfo = cifs_strndup_from_utf16(posix_buf->PathBuffer, + sub_len, is_unicode, nls_codepage); + goto qreparse_out; + } else if (reparse_buf->ReparseTag != + cpu_to_le32(IO_REPARSE_TAG_SYMLINK)) { + rc = -EOPNOTSUPP; + goto qreparse_out; + } + + /* Reparse tag is NTFS symlink */ + sub_start = le16_to_cpu(reparse_buf->SubstituteNameOffset) + + reparse_buf->PathBuffer; + sub_len = le16_to_cpu(reparse_buf->SubstituteNameLength); + if (sub_start + sub_len > end_of_smb) { cifs_dbg(FYI, "reparse buf beyond SMB\n"); rc = -EIO; goto qreparse_out; } - sub_start = reparse_buf->SubstituteNameOffset + reparse_buf->PathBuffer; - sub_len = reparse_buf->SubstituteNameLength; if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE) is_unicode = true; else diff --git a/fs/cifs/file.c b/fs/cifs/file.c index eb955b525e55..7ddddf2e2504 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -3254,6 +3254,9 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, /* * Reads as many pages as possible from fscache. Returns -ENOBUFS * immediately if the cookie is negative + * + * After this point, every page in the list might have PG_fscache set, + * so we will need to clean that up off of every page we don't use. */ rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list, &num_pages); @@ -3376,6 +3379,11 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, kref_put(&rdata->refcount, cifs_readdata_release); } + /* Any pages that have been shown to fscache but didn't get added to + * the pagecache must be uncached before they get returned to the + * allocator. + */ + cifs_fscache_readpages_cancel(mapping->host, page_list); return rc; } diff --git a/fs/cifs/fscache.c b/fs/cifs/fscache.c index fe2492d2a8fc..8d4b7bc8ae91 100644 --- a/fs/cifs/fscache.c +++ b/fs/cifs/fscache.c @@ -223,6 +223,13 @@ void __cifs_readpage_to_fscache(struct inode *inode, struct page *page) fscache_uncache_page(CIFS_I(inode)->fscache, page); } +void __cifs_fscache_readpages_cancel(struct inode *inode, struct list_head *pages) +{ + cifs_dbg(FYI, "%s: (fsc: %p, i: %p)\n", + __func__, CIFS_I(inode)->fscache, inode); + fscache_readpages_cancel(CIFS_I(inode)->fscache, pages); +} + void __cifs_fscache_invalidate_page(struct page *page, struct inode *inode) { struct cifsInodeInfo *cifsi = CIFS_I(inode); diff --git a/fs/cifs/fscache.h b/fs/cifs/fscache.h index 63539323e0b9..24794b6cd8ec 100644 --- a/fs/cifs/fscache.h +++ b/fs/cifs/fscache.h @@ -54,6 +54,7 @@ extern int __cifs_readpages_from_fscache(struct inode *, struct address_space *, struct list_head *, unsigned *); +extern void __cifs_fscache_readpages_cancel(struct inode *, struct list_head *); extern void __cifs_readpage_to_fscache(struct inode *, struct page *); @@ -91,6 +92,13 @@ static inline void cifs_readpage_to_fscache(struct inode *inode, __cifs_readpage_to_fscache(inode, page); } +static inline void cifs_fscache_readpages_cancel(struct inode *inode, + struct list_head *pages) +{ + if (CIFS_I(inode)->fscache) + return __cifs_fscache_readpages_cancel(inode, pages); +} + #else /* CONFIG_CIFS_FSCACHE */ static inline int cifs_fscache_register(void) { return 0; } static inline void cifs_fscache_unregister(void) {} @@ -131,6 +139,11 @@ static inline int cifs_readpages_from_fscache(struct inode *inode, static inline void cifs_readpage_to_fscache(struct inode *inode, struct page *page) {} +static inline void cifs_fscache_readpages_cancel(struct inode *inode, + struct list_head *pages) +{ +} + #endif /* CONFIG_CIFS_FSCACHE */ #endif /* _CIFS_FSCACHE_H */ diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index f9ff9c173f78..867b7cdc794a 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -120,6 +120,33 @@ cifs_revalidate_cache(struct inode *inode, struct cifs_fattr *fattr) cifs_i->invalid_mapping = true; } +/* + * copy nlink to the inode, unless it wasn't provided. Provide + * sane values if we don't have an existing one and none was provided + */ +static void +cifs_nlink_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr) +{ + /* + * if we're in a situation where we can't trust what we + * got from the server (readdir, some non-unix cases) + * fake reasonable values + */ + if (fattr->cf_flags & CIFS_FATTR_UNKNOWN_NLINK) { + /* only provide fake values on a new inode */ + if (inode->i_state & I_NEW) { + if (fattr->cf_cifsattrs & ATTR_DIRECTORY) + set_nlink(inode, 2); + else + set_nlink(inode, 1); + } + return; + } + + /* we trust the server, so update it */ + set_nlink(inode, fattr->cf_nlink); +} + /* populate an inode with info from a cifs_fattr struct */ void cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr) @@ -134,7 +161,7 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr) inode->i_mtime = fattr->cf_mtime; inode->i_ctime = fattr->cf_ctime; inode->i_rdev = fattr->cf_rdev; - set_nlink(inode, fattr->cf_nlink); + cifs_nlink_fattr_to_inode(inode, fattr); inode->i_uid = fattr->cf_uid; inode->i_gid = fattr->cf_gid; @@ -541,6 +568,7 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info, fattr->cf_bytes = le64_to_cpu(info->AllocationSize); fattr->cf_createtime = le64_to_cpu(info->CreationTime); + fattr->cf_nlink = le32_to_cpu(info->NumberOfLinks); if (fattr->cf_cifsattrs & ATTR_DIRECTORY) { fattr->cf_mode = S_IFDIR | cifs_sb->mnt_dir_mode; fattr->cf_dtype = DT_DIR; @@ -548,7 +576,8 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info, * Server can return wrong NumberOfLinks value for directories * when Unix extensions are disabled - fake it. */ - fattr->cf_nlink = 2; + if (!tcon->unix_ext) + fattr->cf_flags |= CIFS_FATTR_UNKNOWN_NLINK; } else if (fattr->cf_cifsattrs & ATTR_REPARSE) { fattr->cf_mode = S_IFLNK; fattr->cf_dtype = DT_LNK; @@ -561,11 +590,15 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info, if (fattr->cf_cifsattrs & ATTR_READONLY) fattr->cf_mode &= ~(S_IWUGO); - fattr->cf_nlink = le32_to_cpu(info->NumberOfLinks); - if (fattr->cf_nlink < 1) { - cifs_dbg(1, "replacing bogus file nlink value %u\n", + /* + * Don't accept zero nlink from non-unix servers unless + * delete is pending. Instead mark it as unknown. + */ + if ((fattr->cf_nlink < 1) && !tcon->unix_ext && + !info->DeletePending) { + cifs_dbg(1, "bogus file nlink value %u\n", fattr->cf_nlink); - fattr->cf_nlink = 1; + fattr->cf_flags |= CIFS_FATTR_UNKNOWN_NLINK; } } diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c index af847e1cf1c1..651a5279607b 100644 --- a/fs/cifs/netmisc.c +++ b/fs/cifs/netmisc.c @@ -780,7 +780,9 @@ static const struct { ERRDOS, ERRnoaccess, 0xc0000290}, { ERRDOS, ERRbadfunc, 0xc000029c}, { ERRDOS, ERRsymlink, NT_STATUS_STOPPED_ON_SYMLINK}, { - ERRDOS, ERRinvlevel, 0x007c0001}, }; + ERRDOS, ERRinvlevel, 0x007c0001}, { + 0, 0, 0 } +}; /***************************************************************************** Print an error message from the status code diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 42ef03be089f..53a75f3d0179 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -180,6 +180,9 @@ cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb) fattr->cf_dtype = DT_REG; } + /* non-unix readdir doesn't provide nlink */ + fattr->cf_flags |= CIFS_FATTR_UNKNOWN_NLINK; + if (fattr->cf_cifsattrs & ATTR_READONLY) fattr->cf_mode &= ~S_IWUGO; diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 5f99b7f19e78..e87387dbf39f 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -32,88 +32,6 @@ #include <linux/slab.h> #include "cifs_spnego.h" -/* - * Checks if this is the first smb session to be reconnected after - * the socket has been reestablished (so we know whether to use vc 0). - * Called while holding the cifs_tcp_ses_lock, so do not block - */ -static bool is_first_ses_reconnect(struct cifs_ses *ses) -{ - struct list_head *tmp; - struct cifs_ses *tmp_ses; - - list_for_each(tmp, &ses->server->smb_ses_list) { - tmp_ses = list_entry(tmp, struct cifs_ses, - smb_ses_list); - if (tmp_ses->need_reconnect == false) - return false; - } - /* could not find a session that was already connected, - this must be the first one we are reconnecting */ - return true; -} - -/* - * vc number 0 is treated specially by some servers, and should be the - * first one we request. After that we can use vcnumbers up to maxvcs, - * one for each smb session (some Windows versions set maxvcs incorrectly - * so maxvc=1 can be ignored). If we have too many vcs, we can reuse - * any vc but zero (some servers reset the connection on vcnum zero) - * - */ -static __le16 get_next_vcnum(struct cifs_ses *ses) -{ - __u16 vcnum = 0; - struct list_head *tmp; - struct cifs_ses *tmp_ses; - __u16 max_vcs = ses->server->max_vcs; - __u16 i; - int free_vc_found = 0; - - /* Quoting the MS-SMB specification: "Windows-based SMB servers set this - field to one but do not enforce this limit, which allows an SMB client - to establish more virtual circuits than allowed by this value ... but - other server implementations can enforce this limit." */ - if (max_vcs < 2) - max_vcs = 0xFFFF; - - spin_lock(&cifs_tcp_ses_lock); - if ((ses->need_reconnect) && is_first_ses_reconnect(ses)) - goto get_vc_num_exit; /* vcnum will be zero */ - for (i = ses->server->srv_count - 1; i < max_vcs; i++) { - if (i == 0) /* this is the only connection, use vc 0 */ - break; - - free_vc_found = 1; - - list_for_each(tmp, &ses->server->smb_ses_list) { - tmp_ses = list_entry(tmp, struct cifs_ses, - smb_ses_list); - if (tmp_ses->vcnum == i) { - free_vc_found = 0; - break; /* found duplicate, try next vcnum */ - } - } - if (free_vc_found) - break; /* we found a vcnumber that will work - use it */ - } - - if (i == 0) - vcnum = 0; /* for most common case, ie if one smb session, use - vc zero. Also for case when no free vcnum, zero - is safest to send (some clients only send zero) */ - else if (free_vc_found == 0) - vcnum = 1; /* we can not reuse vc=0 safely, since some servers - reset all uids on that, but 1 is ok. */ - else - vcnum = i; - ses->vcnum = vcnum; -get_vc_num_exit: - spin_unlock(&cifs_tcp_ses_lock); - - return cpu_to_le16(vcnum); -} - static __u32 cifs_ssetup_hdr(struct cifs_ses *ses, SESSION_SETUP_ANDX *pSMB) { __u32 capabilities = 0; @@ -128,7 +46,7 @@ static __u32 cifs_ssetup_hdr(struct cifs_ses *ses, SESSION_SETUP_ANDX *pSMB) CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4, USHRT_MAX)); pSMB->req.MaxMpxCount = cpu_to_le16(ses->server->maxReq); - pSMB->req.VcNumber = get_next_vcnum(ses); + pSMB->req.VcNumber = __constant_cpu_to_le16(1); /* Now no need to set SMBFLG_CASELESS or obsolete CANONICAL PATH */ @@ -582,9 +500,9 @@ select_sectype(struct TCP_Server_Info *server, enum securityEnum requested) return NTLMv2; if (global_secflags & CIFSSEC_MAY_NTLM) return NTLM; - /* Fallthrough */ default: - return Unspecified; + /* Fallthrough to attempt LANMAN authentication next */ + break; } case CIFS_NEGFLAVOR_LANMAN: switch (requested) { diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index eba0efde66d7..edccb5252462 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -687,6 +687,10 @@ SMB2_logoff(const unsigned int xid, struct cifs_ses *ses) else return -EIO; + /* no need to send SMB logoff if uid already closed due to reconnect */ + if (ses->need_reconnect) + goto smb2_session_already_dead; + rc = small_smb2_init(SMB2_LOGOFF, NULL, (void **) &req); if (rc) return rc; @@ -701,6 +705,8 @@ SMB2_logoff(const unsigned int xid, struct cifs_ses *ses) * No tcon so can't do * cifs_stats_inc(&tcon->stats.smb2_stats.smb2_com_fail[SMB2...]); */ + +smb2_session_already_dead: return rc; } diff --git a/fs/cifs/smbfsctl.h b/fs/cifs/smbfsctl.h index d952ee48f4dc..a4b2391fe66e 100644 --- a/fs/cifs/smbfsctl.h +++ b/fs/cifs/smbfsctl.h @@ -97,9 +97,23 @@ #define FSCTL_QUERY_NETWORK_INTERFACE_INFO 0x001401FC /* BB add struct */ #define FSCTL_SRV_READ_HASH 0x001441BB /* BB add struct */ +/* See FSCC 2.1.2.5 */ #define IO_REPARSE_TAG_MOUNT_POINT 0xA0000003 #define IO_REPARSE_TAG_HSM 0xC0000004 #define IO_REPARSE_TAG_SIS 0x80000007 +#define IO_REPARSE_TAG_HSM2 0x80000006 +#define IO_REPARSE_TAG_DRIVER_EXTENDER 0x80000005 +/* Used by the DFS filter. See MS-DFSC */ +#define IO_REPARSE_TAG_DFS 0x8000000A +/* Used by the DFS filter See MS-DFSC */ +#define IO_REPARSE_TAG_DFSR 0x80000012 +#define IO_REPARSE_TAG_FILTER_MANAGER 0x8000000B +/* See section MS-FSCC 2.1.2.4 */ +#define IO_REPARSE_TAG_SYMLINK 0xA000000C +#define IO_REPARSE_TAG_DEDUP 0x80000013 +#define IO_REPARSE_APPXSTREAM 0xC0000014 +/* NFS symlinks, Win 8/SMB3 and later */ +#define IO_REPARSE_TAG_NFS 0x80000014 /* fsctl flags */ /* If Flags is set to this value, the request is an FSCTL not ioctl request */ diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 6fdcb1b4a106..800b938e4061 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -410,8 +410,13 @@ static int wait_for_free_request(struct TCP_Server_Info *server, const int timeout, const int optype) { - return wait_for_free_credits(server, timeout, - server->ops->get_credits_field(server, optype)); + int *val; + + val = server->ops->get_credits_field(server, optype); + /* Since an echo is already inflight, no need to wait to send another */ + if (*val <= 0 && optype == CIFS_ECHO_OP) + return -EAGAIN; + return wait_for_free_credits(server, timeout, val); } static int allocate_mid(struct cifs_ses *ses, struct smb_hdr *in_buf, diff --git a/fs/dcache.c b/fs/dcache.c index 41000305d716..ae6ebb88ceff 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -542,7 +542,7 @@ EXPORT_SYMBOL(d_drop); * If ref is non-zero, then decrement the refcount too. * Returns dentry requiring refcount drop, or NULL if we're done. */ -static inline struct dentry * +static struct dentry * dentry_kill(struct dentry *dentry, int unlock_on_failure) __releases(dentry->d_lock) { @@ -630,7 +630,8 @@ repeat: goto kill_it; } - dentry->d_flags |= DCACHE_REFERENCED; + if (!(dentry->d_flags & DCACHE_REFERENCED)) + dentry->d_flags |= DCACHE_REFERENCED; dentry_lru_add(dentry); dentry->d_lockref.count--; @@ -1331,14 +1332,6 @@ rename_retry: * list is non-empty and continue searching. */ -/** - * have_submounts - check for mounts over a dentry - * @parent: dentry to check. - * - * Return true if the parent or its subdirectories contain - * a mount point - */ - static enum d_walk_ret check_mount(void *data, struct dentry *dentry) { int *ret = data; @@ -1349,6 +1342,13 @@ static enum d_walk_ret check_mount(void *data, struct dentry *dentry) return D_WALK_CONTINUE; } +/** + * have_submounts - check for mounts over a dentry + * @parent: dentry to check. + * + * Return true if the parent or its subdirectories contain + * a mount point + */ int have_submounts(struct dentry *parent) { int ret = 0; diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index c88e355f7635..000eae2782b6 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -408,7 +408,7 @@ static loff_t lower_offset_for_page(struct ecryptfs_crypt_stat *crypt_stat, struct page *page) { return ecryptfs_lower_header_size(crypt_stat) + - (page->index << PAGE_CACHE_SHIFT); + ((loff_t)page->index << PAGE_CACHE_SHIFT); } /** diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index 7d52806c2119..4725a07f003c 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c @@ -1149,7 +1149,7 @@ decrypt_pki_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok, struct ecryptfs_msg_ctx *msg_ctx; struct ecryptfs_message *msg = NULL; char *auth_tok_sig; - char *payload; + char *payload = NULL; size_t payload_len = 0; int rc; @@ -1203,6 +1203,7 @@ decrypt_pki_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok, } out: kfree(msg); + kfree(payload); return rc; } diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 473e09da7d02..810c28fb8c3c 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -34,7 +34,6 @@ #include <linux/mutex.h> #include <linux/anon_inodes.h> #include <linux/device.h> -#include <linux/freezer.h> #include <asm/uaccess.h> #include <asm/io.h> #include <asm/mman.h> @@ -1605,8 +1604,7 @@ fetch_events: } spin_unlock_irqrestore(&ep->lock, flags); - if (!freezable_schedule_hrtimeout_range(to, slack, - HRTIMER_MODE_ABS)) + if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS)) timed_out = 1; spin_lock_irqsave(&ep->lock, flags); diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index 1194b1f0f839..f8cde46de9cd 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -1783,7 +1783,7 @@ retry: d_tmpfile(dentry, inode); err = ext3_orphan_add(handle, inode); if (err) - goto err_drop_inode; + goto err_unlock_inode; mark_inode_dirty(inode); unlock_new_inode(inode); } @@ -1791,10 +1791,9 @@ retry: if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries)) goto retry; return err; -err_drop_inode: +err_unlock_inode: ext3_journal_stop(handle); unlock_new_inode(inode); - iput(inode); return err; } diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 0d424d7ac02b..e274e9c1171f 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2563,7 +2563,7 @@ retry: break; } blk_finish_plug(&plug); - if (!ret && !cycled) { + if (!ret && !cycled && wbc->nr_to_write > 0) { cycled = 1; mpd.last_page = writeback_index - 1; mpd.first_page = 0; diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 1bec5a5c1e45..5a0408d7b114 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2319,7 +2319,7 @@ retry: d_tmpfile(dentry, inode); err = ext4_orphan_add(handle, inode); if (err) - goto err_drop_inode; + goto err_unlock_inode; mark_inode_dirty(inode); unlock_new_inode(inode); } @@ -2328,10 +2328,9 @@ retry: if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) goto retry; return err; -err_drop_inode: +err_unlock_inode: ext4_journal_stop(handle); unlock_new_inode(inode); - iput(inode); return err; } diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index c081e34f717f..03e9bebba198 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -1350,6 +1350,8 @@ retry: s_min_extra_isize) { tried_min_extra_isize++; new_extra_isize = s_min_extra_isize; + kfree(is); is = NULL; + kfree(bs); bs = NULL; goto retry; } error = -1; diff --git a/fs/file_table.c b/fs/file_table.c index abdd15ad13c9..e900ca518635 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -297,7 +297,7 @@ void flush_delayed_fput(void) delayed_fput(NULL); } -static DECLARE_WORK(delayed_fput_work, delayed_fput); +static DECLARE_DELAYED_WORK(delayed_fput_work, delayed_fput); void fput(struct file *file) { @@ -317,7 +317,7 @@ void fput(struct file *file) } if (llist_add(&file->f_u.fu_llist, &delayed_fput_list)) - schedule_work(&delayed_fput_work); + schedule_delayed_work(&delayed_fput_work, 1); } } diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 62b43b577bfc..b7989f2ab4c4 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -182,6 +182,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags) struct inode *inode; struct dentry *parent; struct fuse_conn *fc; + struct fuse_inode *fi; int ret; inode = ACCESS_ONCE(entry->d_inode); @@ -228,7 +229,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags) if (!err && !outarg.nodeid) err = -ENOENT; if (!err) { - struct fuse_inode *fi = get_fuse_inode(inode); + fi = get_fuse_inode(inode); if (outarg.nodeid != get_node_id(inode)) { fuse_queue_forget(fc, forget, outarg.nodeid, 1); goto invalid; @@ -246,8 +247,11 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags) attr_version); fuse_change_entry_timeout(entry, &outarg); } else if (inode) { - fc = get_fuse_conn(inode); - if (fc->readdirplus_auto) { + fi = get_fuse_inode(inode); + if (flags & LOOKUP_RCU) { + if (test_bit(FUSE_I_INIT_RDPLUS, &fi->state)) + return -ECHILD; + } else if (test_and_clear_bit(FUSE_I_INIT_RDPLUS, &fi->state)) { parent = dget_parent(entry); fuse_advise_use_readdirplus(parent->d_inode); dput(parent); @@ -259,7 +263,8 @@ out: invalid: ret = 0; - if (check_submounts_and_drop(entry) != 0) + + if (!(flags & LOOKUP_RCU) && check_submounts_and_drop(entry) != 0) ret = 1; goto out; } @@ -1063,6 +1068,8 @@ static int fuse_access(struct inode *inode, int mask) struct fuse_access_in inarg; int err; + BUG_ON(mask & MAY_NOT_BLOCK); + if (fc->no_access) return 0; @@ -1150,9 +1157,6 @@ static int fuse_permission(struct inode *inode, int mask) noticed immediately, only after the attribute timeout has expired */ } else if (mask & (MAY_ACCESS | MAY_CHDIR)) { - if (mask & MAY_NOT_BLOCK) - return -ECHILD; - err = fuse_access(inode, mask); } else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) { if (!(inode->i_mode & S_IXUGO)) { @@ -1291,6 +1295,8 @@ static int fuse_direntplus_link(struct file *file, } found: + if (fc->readdirplus_auto) + set_bit(FUSE_I_INIT_RDPLUS, &get_fuse_inode(inode)->state); fuse_change_entry_timeout(dentry, o); err = 0; diff --git a/fs/fuse/file.c b/fs/fuse/file.c index d409deafc67b..4598345ab87d 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -2467,6 +2467,7 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, { struct fuse_file *ff = file->private_data; struct inode *inode = file->f_inode; + struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_conn *fc = ff->fc; struct fuse_req *req; struct fuse_fallocate_in inarg = { @@ -2484,10 +2485,20 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, if (lock_inode) { mutex_lock(&inode->i_mutex); - if (mode & FALLOC_FL_PUNCH_HOLE) - fuse_set_nowrite(inode); + if (mode & FALLOC_FL_PUNCH_HOLE) { + loff_t endbyte = offset + length - 1; + err = filemap_write_and_wait_range(inode->i_mapping, + offset, endbyte); + if (err) + goto out; + + fuse_sync_writes(inode); + } } + if (!(mode & FALLOC_FL_KEEP_SIZE)) + set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); + req = fuse_get_req_nopages(fc); if (IS_ERR(req)) { err = PTR_ERR(req); @@ -2520,11 +2531,11 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, fuse_invalidate_attr(inode); out: - if (lock_inode) { - if (mode & FALLOC_FL_PUNCH_HOLE) - fuse_release_nowrite(inode); + if (!(mode & FALLOC_FL_KEEP_SIZE)) + clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); + + if (lock_inode) mutex_unlock(&inode->i_mutex); - } return err; } diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 5ced199b50bb..5b9e6f3b6aef 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -115,6 +115,8 @@ struct fuse_inode { enum { /** Advise readdirplus */ FUSE_I_ADVISE_RDPLUS, + /** Initialized with readdirplus */ + FUSE_I_INIT_RDPLUS, /** An operation changing file size is in progress */ FUSE_I_SIZE_UNSTABLE, }; diff --git a/fs/jfs/jfs_inode.c b/fs/jfs/jfs_inode.c index c1a3e603279c..7f464c513ba0 100644 --- a/fs/jfs/jfs_inode.c +++ b/fs/jfs/jfs_inode.c @@ -95,7 +95,7 @@ struct inode *ialloc(struct inode *parent, umode_t mode) if (insert_inode_locked(inode) < 0) { rc = -EINVAL; - goto fail_unlock; + goto fail_put; } inode_init_owner(inode, parent, mode); @@ -156,7 +156,6 @@ struct inode *ialloc(struct inode *parent, umode_t mode) fail_drop: dquot_drop(inode); inode->i_flags |= S_NOQUOTA; -fail_unlock: clear_nlink(inode); unlock_new_inode(inode); fail_put: diff --git a/fs/namei.c b/fs/namei.c index 645268f23eb6..caa28051e197 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2294,10 +2294,11 @@ out: * path_mountpoint - look up a path to be umounted * @dfd: directory file descriptor to start walk from * @name: full pathname to walk + * @path: pointer to container for result * @flags: lookup flags * * Look up the given name, but don't attempt to revalidate the last component. - * Returns 0 and "path" will be valid on success; Retuns error otherwise. + * Returns 0 and "path" will be valid on success; Returns error otherwise. */ static int path_mountpoint(int dfd, const char *name, struct path *path, unsigned int flags) diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 9f8ef9b7674d..8eaa1ba793fc 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -288,10 +288,14 @@ static int proc_reg_mmap(struct file *file, struct vm_area_struct *vma) static unsigned long proc_reg_get_unmapped_area(struct file *file, unsigned long orig_addr, unsigned long len, unsigned long pgoff, unsigned long flags) { struct proc_dir_entry *pde = PDE(file_inode(file)); - int rv = -EIO; - unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); + unsigned long rv = -EIO; + unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long) = NULL; if (use_pde(pde)) { - get_unmapped_area = pde->proc_fops->get_unmapped_area; +#ifdef CONFIG_MMU + get_unmapped_area = current->mm->get_unmapped_area; +#endif + if (pde->proc_fops->get_unmapped_area) + get_unmapped_area = pde->proc_fops->get_unmapped_area; if (get_unmapped_area) rv = get_unmapped_area(file, orig_addr, len, pgoff, flags); unuse_pde(pde); diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 7366e9d63cee..390bdab01c3c 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -941,6 +941,8 @@ static void pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm, frame = pte_pfn(pte); flags = PM_PRESENT; page = vm_normal_page(vma, addr, pte); + if (pte_soft_dirty(pte)) + flags2 |= __PM_SOFT_DIRTY; } else if (is_swap_pte(pte)) { swp_entry_t entry; if (pte_swp_soft_dirty(pte)) @@ -960,7 +962,7 @@ static void pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm, if (page && !PageAnon(page)) flags |= PM_FILE; - if ((vma->vm_flags & VM_SOFTDIRTY) || pte_soft_dirty(pte)) + if ((vma->vm_flags & VM_SOFTDIRTY)) flags2 |= __PM_SOFT_DIRTY; *pme = make_pme(PM_PFRAME(frame) | PM_STATUS2(pm->v2, flags2) | flags); diff --git a/fs/select.c b/fs/select.c index 35d4adc749d9..dfd5cb18c012 100644 --- a/fs/select.c +++ b/fs/select.c @@ -238,8 +238,7 @@ int poll_schedule_timeout(struct poll_wqueues *pwq, int state, set_current_state(state); if (!pwq->triggered) - rc = freezable_schedule_hrtimeout_range(expires, slack, - HRTIMER_MODE_ABS); + rc = schedule_hrtimeout_range(expires, slack, HRTIMER_MODE_ABS); __set_current_state(TASK_RUNNING); /* diff --git a/fs/seq_file.c b/fs/seq_file.c index 3135c2525c76..a290157265ef 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -328,6 +328,8 @@ loff_t seq_lseek(struct file *file, loff_t offset, int whence) m->read_pos = offset; retval = file->f_pos = offset; } + } else { + file->f_pos = offset; } } file->f_version = m->version; diff --git a/fs/statfs.c b/fs/statfs.c index c219e733f553..083dc0ac9140 100644 --- a/fs/statfs.c +++ b/fs/statfs.c @@ -94,7 +94,7 @@ retry: int fd_statfs(int fd, struct kstatfs *st) { - struct fd f = fdget(fd); + struct fd f = fdget_raw(fd); int error = -EBADF; if (f.file) { error = vfs_statfs(&f.file->f_path, st); diff --git a/fs/super.c b/fs/super.c index 3a96c9783a8b..0225c20f8770 100644 --- a/fs/super.c +++ b/fs/super.c @@ -264,6 +264,8 @@ out_free_sb: */ static inline void destroy_super(struct super_block *s) { + list_lru_destroy(&s->s_dentry_lru); + list_lru_destroy(&s->s_inode_lru); #ifdef CONFIG_SMP free_percpu(s->s_files); #endif @@ -323,8 +325,6 @@ void deactivate_locked_super(struct super_block *s) /* caches are now gone, we can safely kill the shrinker now */ unregister_shrinker(&s->s_shrink); - list_lru_destroy(&s->s_dentry_lru); - list_lru_destroy(&s->s_inode_lru); put_filesystem(fs); put_super(s); diff --git a/fs/sysfs/Makefile b/fs/sysfs/Makefile index 7a1ceb946b80..8876ac183373 100644 --- a/fs/sysfs/Makefile +++ b/fs/sysfs/Makefile @@ -2,5 +2,4 @@ # Makefile for the sysfs virtual filesystem # -obj-y := inode.o file.o dir.o symlink.o mount.o bin.o \ - group.o +obj-y := inode.o file.o dir.o symlink.o mount.o group.o diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c deleted file mode 100644 index c590cabd57bb..000000000000 --- a/fs/sysfs/bin.c +++ /dev/null @@ -1,502 +0,0 @@ -/* - * fs/sysfs/bin.c - sysfs binary file implementation - * - * Copyright (c) 2003 Patrick Mochel - * Copyright (c) 2003 Matthew Wilcox - * Copyright (c) 2004 Silicon Graphics, Inc. - * Copyright (c) 2007 SUSE Linux Products GmbH - * Copyright (c) 2007 Tejun Heo <teheo@suse.de> - * - * This file is released under the GPLv2. - * - * Please see Documentation/filesystems/sysfs.txt for more information. - */ - -#undef DEBUG - -#include <linux/errno.h> -#include <linux/fs.h> -#include <linux/kernel.h> -#include <linux/kobject.h> -#include <linux/module.h> -#include <linux/slab.h> -#include <linux/mutex.h> -#include <linux/mm.h> -#include <linux/uaccess.h> - -#include "sysfs.h" - -/* - * There's one bin_buffer for each open file. - * - * filp->private_data points to bin_buffer and - * sysfs_dirent->s_bin_attr.buffers points to a the bin_buffer s - * sysfs_dirent->s_bin_attr.buffers is protected by sysfs_bin_lock - */ -static DEFINE_MUTEX(sysfs_bin_lock); - -struct bin_buffer { - struct mutex mutex; - void *buffer; - int mmapped; - const struct vm_operations_struct *vm_ops; - struct file *file; - struct hlist_node list; -}; - -static int -fill_read(struct file *file, char *buffer, loff_t off, size_t count) -{ - struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; - struct bin_attribute *attr = attr_sd->s_bin_attr.bin_attr; - struct kobject *kobj = attr_sd->s_parent->s_dir.kobj; - int rc; - - /* need attr_sd for attr, its parent for kobj */ - if (!sysfs_get_active(attr_sd)) - return -ENODEV; - - rc = -EIO; - if (attr->read) - rc = attr->read(file, kobj, attr, buffer, off, count); - - sysfs_put_active(attr_sd); - - return rc; -} - -static ssize_t -read(struct file *file, char __user *userbuf, size_t bytes, loff_t *off) -{ - struct bin_buffer *bb = file->private_data; - int size = file_inode(file)->i_size; - loff_t offs = *off; - int count = min_t(size_t, bytes, PAGE_SIZE); - char *temp; - - if (!bytes) - return 0; - - if (size) { - if (offs > size) - return 0; - if (offs + count > size) - count = size - offs; - } - - temp = kmalloc(count, GFP_KERNEL); - if (!temp) - return -ENOMEM; - - mutex_lock(&bb->mutex); - - count = fill_read(file, bb->buffer, offs, count); - if (count < 0) { - mutex_unlock(&bb->mutex); - goto out_free; - } - - memcpy(temp, bb->buffer, count); - - mutex_unlock(&bb->mutex); - - if (copy_to_user(userbuf, temp, count)) { - count = -EFAULT; - goto out_free; - } - - pr_debug("offs = %lld, *off = %lld, count = %d\n", offs, *off, count); - - *off = offs + count; - - out_free: - kfree(temp); - return count; -} - -static int -flush_write(struct file *file, char *buffer, loff_t offset, size_t count) -{ - struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; - struct bin_attribute *attr = attr_sd->s_bin_attr.bin_attr; - struct kobject *kobj = attr_sd->s_parent->s_dir.kobj; - int rc; - - /* need attr_sd for attr, its parent for kobj */ - if (!sysfs_get_active(attr_sd)) - return -ENODEV; - - rc = -EIO; - if (attr->write) - rc = attr->write(file, kobj, attr, buffer, offset, count); - - sysfs_put_active(attr_sd); - - return rc; -} - -static ssize_t write(struct file *file, const char __user *userbuf, - size_t bytes, loff_t *off) -{ - struct bin_buffer *bb = file->private_data; - int size = file_inode(file)->i_size; - loff_t offs = *off; - int count = min_t(size_t, bytes, PAGE_SIZE); - char *temp; - - if (!bytes) - return 0; - - if (size) { - if (offs > size) - return 0; - if (offs + count > size) - count = size - offs; - } - - temp = memdup_user(userbuf, count); - if (IS_ERR(temp)) - return PTR_ERR(temp); - - mutex_lock(&bb->mutex); - - memcpy(bb->buffer, temp, count); - - count = flush_write(file, bb->buffer, offs, count); - mutex_unlock(&bb->mutex); - - if (count > 0) - *off = offs + count; - - kfree(temp); - return count; -} - -static void bin_vma_open(struct vm_area_struct *vma) -{ - struct file *file = vma->vm_file; - struct bin_buffer *bb = file->private_data; - struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; - - if (!bb->vm_ops) - return; - - if (!sysfs_get_active(attr_sd)) - return; - - if (bb->vm_ops->open) - bb->vm_ops->open(vma); - - sysfs_put_active(attr_sd); -} - -static int bin_fault(struct vm_area_struct *vma, struct vm_fault *vmf) -{ - struct file *file = vma->vm_file; - struct bin_buffer *bb = file->private_data; - struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; - int ret; - - if (!bb->vm_ops) - return VM_FAULT_SIGBUS; - - if (!sysfs_get_active(attr_sd)) - return VM_FAULT_SIGBUS; - - ret = VM_FAULT_SIGBUS; - if (bb->vm_ops->fault) - ret = bb->vm_ops->fault(vma, vmf); - - sysfs_put_active(attr_sd); - return ret; -} - -static int bin_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) -{ - struct file *file = vma->vm_file; - struct bin_buffer *bb = file->private_data; - struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; - int ret; - - if (!bb->vm_ops) - return VM_FAULT_SIGBUS; - - if (!sysfs_get_active(attr_sd)) - return VM_FAULT_SIGBUS; - - ret = 0; - if (bb->vm_ops->page_mkwrite) - ret = bb->vm_ops->page_mkwrite(vma, vmf); - else - file_update_time(file); - - sysfs_put_active(attr_sd); - return ret; -} - -static int bin_access(struct vm_area_struct *vma, unsigned long addr, - void *buf, int len, int write) -{ - struct file *file = vma->vm_file; - struct bin_buffer *bb = file->private_data; - struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; - int ret; - - if (!bb->vm_ops) - return -EINVAL; - - if (!sysfs_get_active(attr_sd)) - return -EINVAL; - - ret = -EINVAL; - if (bb->vm_ops->access) - ret = bb->vm_ops->access(vma, addr, buf, len, write); - - sysfs_put_active(attr_sd); - return ret; -} - -#ifdef CONFIG_NUMA -static int bin_set_policy(struct vm_area_struct *vma, struct mempolicy *new) -{ - struct file *file = vma->vm_file; - struct bin_buffer *bb = file->private_data; - struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; - int ret; - - if (!bb->vm_ops) - return 0; - - if (!sysfs_get_active(attr_sd)) - return -EINVAL; - - ret = 0; - if (bb->vm_ops->set_policy) - ret = bb->vm_ops->set_policy(vma, new); - - sysfs_put_active(attr_sd); - return ret; -} - -static struct mempolicy *bin_get_policy(struct vm_area_struct *vma, - unsigned long addr) -{ - struct file *file = vma->vm_file; - struct bin_buffer *bb = file->private_data; - struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; - struct mempolicy *pol; - - if (!bb->vm_ops) - return vma->vm_policy; - - if (!sysfs_get_active(attr_sd)) - return vma->vm_policy; - - pol = vma->vm_policy; - if (bb->vm_ops->get_policy) - pol = bb->vm_ops->get_policy(vma, addr); - - sysfs_put_active(attr_sd); - return pol; -} - -static int bin_migrate(struct vm_area_struct *vma, const nodemask_t *from, - const nodemask_t *to, unsigned long flags) -{ - struct file *file = vma->vm_file; - struct bin_buffer *bb = file->private_data; - struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; - int ret; - - if (!bb->vm_ops) - return 0; - - if (!sysfs_get_active(attr_sd)) - return 0; - - ret = 0; - if (bb->vm_ops->migrate) - ret = bb->vm_ops->migrate(vma, from, to, flags); - - sysfs_put_active(attr_sd); - return ret; -} -#endif - -static const struct vm_operations_struct bin_vm_ops = { - .open = bin_vma_open, - .fault = bin_fault, - .page_mkwrite = bin_page_mkwrite, - .access = bin_access, -#ifdef CONFIG_NUMA - .set_policy = bin_set_policy, - .get_policy = bin_get_policy, - .migrate = bin_migrate, -#endif -}; - -static int mmap(struct file *file, struct vm_area_struct *vma) -{ - struct bin_buffer *bb = file->private_data; - struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; - struct bin_attribute *attr = attr_sd->s_bin_attr.bin_attr; - struct kobject *kobj = attr_sd->s_parent->s_dir.kobj; - int rc; - - mutex_lock(&bb->mutex); - - /* need attr_sd for attr, its parent for kobj */ - rc = -ENODEV; - if (!sysfs_get_active(attr_sd)) - goto out_unlock; - - rc = -EINVAL; - if (!attr->mmap) - goto out_put; - - rc = attr->mmap(file, kobj, attr, vma); - if (rc) - goto out_put; - - /* - * PowerPC's pci_mmap of legacy_mem uses shmem_zero_setup() - * to satisfy versions of X which crash if the mmap fails: that - * substitutes a new vm_file, and we don't then want bin_vm_ops. - */ - if (vma->vm_file != file) - goto out_put; - - rc = -EINVAL; - if (bb->mmapped && bb->vm_ops != vma->vm_ops) - goto out_put; - - /* - * It is not possible to successfully wrap close. - * So error if someone is trying to use close. - */ - rc = -EINVAL; - if (vma->vm_ops && vma->vm_ops->close) - goto out_put; - - rc = 0; - bb->mmapped = 1; - bb->vm_ops = vma->vm_ops; - vma->vm_ops = &bin_vm_ops; -out_put: - sysfs_put_active(attr_sd); -out_unlock: - mutex_unlock(&bb->mutex); - - return rc; -} - -static int open(struct inode *inode, struct file *file) -{ - struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; - struct bin_attribute *attr = attr_sd->s_bin_attr.bin_attr; - struct bin_buffer *bb = NULL; - int error; - - /* binary file operations requires both @sd and its parent */ - if (!sysfs_get_active(attr_sd)) - return -ENODEV; - - error = -EACCES; - if ((file->f_mode & FMODE_WRITE) && !(attr->write || attr->mmap)) - goto err_out; - if ((file->f_mode & FMODE_READ) && !(attr->read || attr->mmap)) - goto err_out; - - error = -ENOMEM; - bb = kzalloc(sizeof(*bb), GFP_KERNEL); - if (!bb) - goto err_out; - - bb->buffer = kmalloc(PAGE_SIZE, GFP_KERNEL); - if (!bb->buffer) - goto err_out; - - mutex_init(&bb->mutex); - bb->file = file; - file->private_data = bb; - - mutex_lock(&sysfs_bin_lock); - hlist_add_head(&bb->list, &attr_sd->s_bin_attr.buffers); - mutex_unlock(&sysfs_bin_lock); - - /* open succeeded, put active references */ - sysfs_put_active(attr_sd); - return 0; - - err_out: - sysfs_put_active(attr_sd); - kfree(bb); - return error; -} - -static int release(struct inode *inode, struct file *file) -{ - struct bin_buffer *bb = file->private_data; - - mutex_lock(&sysfs_bin_lock); - hlist_del(&bb->list); - mutex_unlock(&sysfs_bin_lock); - - kfree(bb->buffer); - kfree(bb); - return 0; -} - -const struct file_operations bin_fops = { - .read = read, - .write = write, - .mmap = mmap, - .llseek = generic_file_llseek, - .open = open, - .release = release, -}; - - -void unmap_bin_file(struct sysfs_dirent *attr_sd) -{ - struct bin_buffer *bb; - - if (sysfs_type(attr_sd) != SYSFS_KOBJ_BIN_ATTR) - return; - - mutex_lock(&sysfs_bin_lock); - - hlist_for_each_entry(bb, &attr_sd->s_bin_attr.buffers, list) { - struct inode *inode = file_inode(bb->file); - - unmap_mapping_range(inode->i_mapping, 0, 0, 1); - } - - mutex_unlock(&sysfs_bin_lock); -} - -/** - * sysfs_create_bin_file - create binary file for object. - * @kobj: object. - * @attr: attribute descriptor. - */ -int sysfs_create_bin_file(struct kobject *kobj, - const struct bin_attribute *attr) -{ - BUG_ON(!kobj || !kobj->sd || !attr); - - return sysfs_add_file(kobj->sd, &attr->attr, SYSFS_KOBJ_BIN_ATTR); -} -EXPORT_SYMBOL_GPL(sysfs_create_bin_file); - -/** - * sysfs_remove_bin_file - remove binary file for object. - * @kobj: object. - * @attr: attribute descriptor. - */ -void sysfs_remove_bin_file(struct kobject *kobj, - const struct bin_attribute *attr) -{ - sysfs_hash_and_remove(kobj->sd, NULL, attr->attr.name); -} -EXPORT_SYMBOL_GPL(sysfs_remove_bin_file); diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 4d83cedb9fcb..5e73d6626e50 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -26,21 +26,21 @@ #include "sysfs.h" DEFINE_MUTEX(sysfs_mutex); -DEFINE_SPINLOCK(sysfs_assoc_lock); +DEFINE_SPINLOCK(sysfs_symlink_target_lock); -#define to_sysfs_dirent(X) rb_entry((X), struct sysfs_dirent, s_rb); +#define to_sysfs_dirent(X) rb_entry((X), struct sysfs_dirent, s_rb) static DEFINE_SPINLOCK(sysfs_ino_lock); static DEFINE_IDA(sysfs_ino_ida); /** * sysfs_name_hash - * @ns: Namespace tag to hash * @name: Null terminated string to hash + * @ns: Namespace tag to hash * * Returns 31 bit hash of ns + name (so it fits in an off_t ) */ -static unsigned int sysfs_name_hash(const void *ns, const char *name) +static unsigned int sysfs_name_hash(const char *name, const void *ns) { unsigned long hash = init_name_hash(); unsigned int len = strlen(name); @@ -56,8 +56,8 @@ static unsigned int sysfs_name_hash(const void *ns, const char *name) return hash; } -static int sysfs_name_compare(unsigned int hash, const void *ns, - const char *name, const struct sysfs_dirent *sd) +static int sysfs_name_compare(unsigned int hash, const char *name, + const void *ns, const struct sysfs_dirent *sd) { if (hash != sd->s_hash) return hash - sd->s_hash; @@ -69,7 +69,7 @@ static int sysfs_name_compare(unsigned int hash, const void *ns, static int sysfs_sd_compare(const struct sysfs_dirent *left, const struct sysfs_dirent *right) { - return sysfs_name_compare(left->s_hash, left->s_ns, left->s_name, + return sysfs_name_compare(left->s_hash, left->s_name, left->s_ns, right); } @@ -132,24 +132,6 @@ static void sysfs_unlink_sibling(struct sysfs_dirent *sd) rb_erase(&sd->s_rb, &sd->s_parent->s_dir.children); } -#ifdef CONFIG_DEBUG_LOCK_ALLOC - -/* Test for attributes that want to ignore lockdep for read-locking */ -static bool ignore_lockdep(struct sysfs_dirent *sd) -{ - return sysfs_type(sd) == SYSFS_KOBJ_ATTR && - sd->s_attr.attr->ignore_lockdep; -} - -#else - -static inline bool ignore_lockdep(struct sysfs_dirent *sd) -{ - return true; -} - -#endif - /** * sysfs_get_active - get an active reference to sysfs_dirent * @sd: sysfs_dirent to get an active reference to @@ -168,7 +150,7 @@ struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd) if (!atomic_inc_unless_negative(&sd->s_active)) return NULL; - if (likely(!ignore_lockdep(sd))) + if (likely(!sysfs_ignore_lockdep(sd))) rwsem_acquire_read(&sd->dep_map, 0, 1, _RET_IP_); return sd; } @@ -187,7 +169,7 @@ void sysfs_put_active(struct sysfs_dirent *sd) if (unlikely(!sd)) return; - if (likely(!ignore_lockdep(sd))) + if (likely(!sysfs_ignore_lockdep(sd))) rwsem_release(&sd->dep_map, 1, _RET_IP_); v = atomic_dec_return(&sd->s_active); if (likely(v != SD_DEACTIVATED_BIAS)) @@ -400,22 +382,19 @@ struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type) /** * sysfs_addrm_start - prepare for sysfs_dirent add/remove * @acxt: pointer to sysfs_addrm_cxt to be used - * @parent_sd: parent sysfs_dirent * - * This function is called when the caller is about to add or - * remove sysfs_dirent under @parent_sd. This function acquires - * sysfs_mutex. @acxt is used to keep and pass context to - * other addrm functions. + * This function is called when the caller is about to add or remove + * sysfs_dirent. This function acquires sysfs_mutex. @acxt is used + * to keep and pass context to other addrm functions. * * LOCKING: * Kernel thread context (may sleep). sysfs_mutex is locked on * return. */ -void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt, - struct sysfs_dirent *parent_sd) +void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt) + __acquires(sysfs_mutex) { memset(acxt, 0, sizeof(*acxt)); - acxt->parent_sd = parent_sd; mutex_lock(&sysfs_mutex); } @@ -424,10 +403,11 @@ void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt, * __sysfs_add_one - add sysfs_dirent to parent without warning * @acxt: addrm context to use * @sd: sysfs_dirent to be added + * @parent_sd: the parent sysfs_dirent to add @sd to * - * Get @acxt->parent_sd and set sd->s_parent to it and increment - * nlink of parent inode if @sd is a directory and link into the - * children list of the parent. + * Get @parent_sd and set @sd->s_parent to it and increment nlink of + * the parent inode if @sd is a directory and link into the children + * list of the parent. * * This function should be called between calls to * sysfs_addrm_start() and sysfs_addrm_finish() and should be @@ -440,27 +420,28 @@ void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt, * 0 on success, -EEXIST if entry with the given name already * exists. */ -int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd) +int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd, + struct sysfs_dirent *parent_sd) { struct sysfs_inode_attrs *ps_iattr; int ret; - if (!!sysfs_ns_type(acxt->parent_sd) != !!sd->s_ns) { + if (!!sysfs_ns_type(parent_sd) != !!sd->s_ns) { WARN(1, KERN_WARNING "sysfs: ns %s in '%s' for '%s'\n", - sysfs_ns_type(acxt->parent_sd) ? "required" : "invalid", - acxt->parent_sd->s_name, sd->s_name); + sysfs_ns_type(parent_sd) ? "required" : "invalid", + parent_sd->s_name, sd->s_name); return -EINVAL; } - sd->s_hash = sysfs_name_hash(sd->s_ns, sd->s_name); - sd->s_parent = sysfs_get(acxt->parent_sd); + sd->s_hash = sysfs_name_hash(sd->s_name, sd->s_ns); + sd->s_parent = sysfs_get(parent_sd); ret = sysfs_link_sibling(sd); if (ret) return ret; /* Update timestamps on the parent */ - ps_iattr = acxt->parent_sd->s_iattr; + ps_iattr = parent_sd->s_iattr; if (ps_iattr) { struct iattr *ps_iattrs = &ps_iattr->ia_iattr; ps_iattrs->ia_ctime = ps_iattrs->ia_mtime = CURRENT_TIME; @@ -490,14 +471,32 @@ static char *sysfs_pathname(struct sysfs_dirent *sd, char *path) return path; } +void sysfs_warn_dup(struct sysfs_dirent *parent, const char *name) +{ + char *path; + + path = kzalloc(PATH_MAX, GFP_KERNEL); + if (path) { + sysfs_pathname(parent, path); + strlcat(path, "/", PATH_MAX); + strlcat(path, name, PATH_MAX); + } + + WARN(1, KERN_WARNING "sysfs: cannot create duplicate filename '%s'\n", + path ? path : name); + + kfree(path); +} + /** * sysfs_add_one - add sysfs_dirent to parent * @acxt: addrm context to use * @sd: sysfs_dirent to be added + * @parent_sd: the parent sysfs_dirent to add @sd to * - * Get @acxt->parent_sd and set sd->s_parent to it and increment - * nlink of parent inode if @sd is a directory and link into the - * children list of the parent. + * Get @parent_sd and set @sd->s_parent to it and increment nlink of + * the parent inode if @sd is a directory and link into the children + * list of the parent. * * This function should be called between calls to * sysfs_addrm_start() and sysfs_addrm_finish() and should be @@ -510,23 +509,15 @@ static char *sysfs_pathname(struct sysfs_dirent *sd, char *path) * 0 on success, -EEXIST if entry with the given name already * exists. */ -int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd) +int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd, + struct sysfs_dirent *parent_sd) { int ret; - ret = __sysfs_add_one(acxt, sd); - if (ret == -EEXIST) { - char *path = kzalloc(PATH_MAX, GFP_KERNEL); - WARN(1, KERN_WARNING - "sysfs: cannot create duplicate filename '%s'\n", - (path == NULL) ? sd->s_name - : (sysfs_pathname(acxt->parent_sd, path), - strlcat(path, "/", PATH_MAX), - strlcat(path, sd->s_name, PATH_MAX), - path)); - kfree(path); - } + ret = __sysfs_add_one(acxt, sd, parent_sd); + if (ret == -EEXIST) + sysfs_warn_dup(parent_sd, sd->s_name); return ret; } @@ -545,16 +536,22 @@ int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd) * LOCKING: * Determined by sysfs_addrm_start(). */ -void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd) +static void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, + struct sysfs_dirent *sd) { struct sysfs_inode_attrs *ps_iattr; - BUG_ON(sd->s_flags & SYSFS_FLAG_REMOVED); + /* + * Removal can be called multiple times on the same node. Only the + * first invocation is effective and puts the base ref. + */ + if (sd->s_flags & SYSFS_FLAG_REMOVED) + return; sysfs_unlink_sibling(sd); /* Update timestamps on the parent */ - ps_iattr = acxt->parent_sd->s_iattr; + ps_iattr = sd->s_parent->s_iattr; if (ps_iattr) { struct iattr *ps_iattrs = &ps_iattr->ia_iattr; ps_iattrs->ia_ctime = ps_iattrs->ia_mtime = CURRENT_TIME; @@ -577,6 +574,7 @@ void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd) * sysfs_mutex is released. */ void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt) + __releases(sysfs_mutex) { /* release resources acquired by sysfs_addrm_start() */ mutex_unlock(&sysfs_mutex); @@ -588,7 +586,7 @@ void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt) acxt->removed = sd->u.removed_list; sysfs_deactivate(sd); - unmap_bin_file(sd); + sysfs_unmap_bin_file(sd); sysfs_put(sd); } } @@ -597,6 +595,7 @@ void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt) * sysfs_find_dirent - find sysfs_dirent with the given name * @parent_sd: sysfs_dirent to search under * @name: name to look for + * @ns: the namespace tag to use * * Look for sysfs_dirent with name @name under @parent_sd. * @@ -607,8 +606,8 @@ void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt) * Pointer to sysfs_dirent if found, NULL if not. */ struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd, - const void *ns, - const unsigned char *name) + const unsigned char *name, + const void *ns) { struct rb_node *node = parent_sd->s_dir.children.rb_node; unsigned int hash; @@ -620,13 +619,13 @@ struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd, return NULL; } - hash = sysfs_name_hash(ns, name); + hash = sysfs_name_hash(name, ns); while (node) { struct sysfs_dirent *sd; int result; sd = to_sysfs_dirent(node); - result = sysfs_name_compare(hash, ns, name, sd); + result = sysfs_name_compare(hash, name, ns, sd); if (result < 0) node = node->rb_left; else if (result > 0) @@ -638,9 +637,10 @@ struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd, } /** - * sysfs_get_dirent - find and get sysfs_dirent with the given name + * sysfs_get_dirent_ns - find and get sysfs_dirent with the given name * @parent_sd: sysfs_dirent to search under * @name: name to look for + * @ns: the namespace tag to use * * Look for sysfs_dirent with name @name under @parent_sd and get * it if found. @@ -651,24 +651,25 @@ struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd, * RETURNS: * Pointer to sysfs_dirent if found, NULL if not. */ -struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd, - const void *ns, - const unsigned char *name) +struct sysfs_dirent *sysfs_get_dirent_ns(struct sysfs_dirent *parent_sd, + const unsigned char *name, + const void *ns) { struct sysfs_dirent *sd; mutex_lock(&sysfs_mutex); - sd = sysfs_find_dirent(parent_sd, ns, name); + sd = sysfs_find_dirent(parent_sd, name, ns); sysfs_get(sd); mutex_unlock(&sysfs_mutex); return sd; } -EXPORT_SYMBOL_GPL(sysfs_get_dirent); +EXPORT_SYMBOL_GPL(sysfs_get_dirent_ns); static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd, - enum kobj_ns_type type, const void *ns, const char *name, - struct sysfs_dirent **p_sd) + enum kobj_ns_type type, + const char *name, const void *ns, + struct sysfs_dirent **p_sd) { umode_t mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO; struct sysfs_addrm_cxt acxt; @@ -685,8 +686,8 @@ static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd, sd->s_dir.kobj = kobj; /* link in */ - sysfs_addrm_start(&acxt, parent_sd); - rc = sysfs_add_one(&acxt, sd); + sysfs_addrm_start(&acxt); + rc = sysfs_add_one(&acxt, sd, parent_sd); sysfs_addrm_finish(&acxt); if (rc == 0) @@ -701,7 +702,7 @@ int sysfs_create_subdir(struct kobject *kobj, const char *name, struct sysfs_dirent **p_sd) { return create_dir(kobj, kobj->sd, - KOBJ_NS_TYPE_NONE, NULL, name, p_sd); + KOBJ_NS_TYPE_NONE, name, NULL, p_sd); } /** @@ -730,14 +731,14 @@ static enum kobj_ns_type sysfs_read_ns_type(struct kobject *kobj) } /** - * sysfs_create_dir - create a directory for an object. - * @kobj: object we're creating directory for. + * sysfs_create_dir_ns - create a directory for an object with a namespace tag + * @kobj: object we're creating directory for + * @ns: the namespace tag to use */ -int sysfs_create_dir(struct kobject *kobj) +int sysfs_create_dir_ns(struct kobject *kobj, const void *ns) { enum kobj_ns_type type; struct sysfs_dirent *parent_sd, *sd; - const void *ns = NULL; int error = 0; BUG_ON(!kobj); @@ -750,11 +751,9 @@ int sysfs_create_dir(struct kobject *kobj) if (!parent_sd) return -ENOENT; - if (sysfs_ns_type(parent_sd)) - ns = kobj->ktype->namespace(kobj); type = sysfs_read_ns_type(kobj); - error = create_dir(kobj, parent_sd, type, ns, kobject_name(kobj), &sd); + error = create_dir(kobj, parent_sd, type, kobject_name(kobj), ns, &sd); if (!error) kobj->sd = sd; return error; @@ -776,7 +775,7 @@ static struct dentry *sysfs_lookup(struct inode *dir, struct dentry *dentry, type = sysfs_ns_type(parent_sd); ns = sysfs_info(dir->i_sb)->ns[type]; - sd = sysfs_find_dirent(parent_sd, ns, dentry->d_name.name); + sd = sysfs_find_dirent(parent_sd, dentry->d_name.name, ns); /* no such entry */ if (!sd) { @@ -807,41 +806,128 @@ const struct inode_operations sysfs_dir_inode_operations = { .setxattr = sysfs_setxattr, }; -static void remove_dir(struct sysfs_dirent *sd) +static struct sysfs_dirent *sysfs_leftmost_descendant(struct sysfs_dirent *pos) { - struct sysfs_addrm_cxt acxt; + struct sysfs_dirent *last; - sysfs_addrm_start(&acxt, sd->s_parent); - sysfs_remove_one(&acxt, sd); - sysfs_addrm_finish(&acxt); + while (true) { + struct rb_node *rbn; + + last = pos; + + if (sysfs_type(pos) != SYSFS_DIR) + break; + + rbn = rb_first(&pos->s_dir.children); + if (!rbn) + break; + + pos = to_sysfs_dirent(rbn); + } + + return last; } -void sysfs_remove_subdir(struct sysfs_dirent *sd) +/** + * sysfs_next_descendant_post - find the next descendant for post-order walk + * @pos: the current position (%NULL to initiate traversal) + * @root: sysfs_dirent whose descendants to walk + * + * Find the next descendant to visit for post-order traversal of @root's + * descendants. @root is included in the iteration and the last node to be + * visited. + */ +static struct sysfs_dirent *sysfs_next_descendant_post(struct sysfs_dirent *pos, + struct sysfs_dirent *root) { - remove_dir(sd); + struct rb_node *rbn; + + lockdep_assert_held(&sysfs_mutex); + + /* if first iteration, visit leftmost descendant which may be root */ + if (!pos) + return sysfs_leftmost_descendant(root); + + /* if we visited @root, we're done */ + if (pos == root) + return NULL; + + /* if there's an unvisited sibling, visit its leftmost descendant */ + rbn = rb_next(&pos->s_rb); + if (rbn) + return sysfs_leftmost_descendant(to_sysfs_dirent(rbn)); + + /* no sibling left, visit parent */ + return pos->s_parent; } +static void __sysfs_remove(struct sysfs_addrm_cxt *acxt, + struct sysfs_dirent *sd) +{ + struct sysfs_dirent *pos, *next; + + if (!sd) + return; -static void __sysfs_remove_dir(struct sysfs_dirent *dir_sd) + pr_debug("sysfs %s: removing\n", sd->s_name); + + next = NULL; + do { + pos = next; + next = sysfs_next_descendant_post(pos, sd); + if (pos) + sysfs_remove_one(acxt, pos); + } while (next); +} + +/** + * sysfs_remove - remove a sysfs_dirent recursively + * @sd: the sysfs_dirent to remove + * + * Remove @sd along with all its subdirectories and files. + */ +void sysfs_remove(struct sysfs_dirent *sd) { struct sysfs_addrm_cxt acxt; - struct rb_node *pos; - if (!dir_sd) - return; + sysfs_addrm_start(&acxt); + __sysfs_remove(&acxt, sd); + sysfs_addrm_finish(&acxt); +} - pr_debug("sysfs %s: removing dir\n", dir_sd->s_name); - sysfs_addrm_start(&acxt, dir_sd); - pos = rb_first(&dir_sd->s_dir.children); - while (pos) { - struct sysfs_dirent *sd = to_sysfs_dirent(pos); - pos = rb_next(pos); - if (sysfs_type(sd) != SYSFS_DIR) - sysfs_remove_one(&acxt, sd); +/** + * sysfs_hash_and_remove - find a sysfs_dirent by name and remove it + * @dir_sd: parent of the target + * @name: name of the sysfs_dirent to remove + * @ns: namespace tag of the sysfs_dirent to remove + * + * Look for the sysfs_dirent with @name and @ns under @dir_sd and remove + * it. Returns 0 on success, -ENOENT if such entry doesn't exist. + */ +int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name, + const void *ns) +{ + struct sysfs_addrm_cxt acxt; + struct sysfs_dirent *sd; + + if (!dir_sd) { + WARN(1, KERN_WARNING "sysfs: can not remove '%s', no directory\n", + name); + return -ENOENT; } + + sysfs_addrm_start(&acxt); + + sd = sysfs_find_dirent(dir_sd, name, ns); + if (sd) + __sysfs_remove(&acxt, sd); + sysfs_addrm_finish(&acxt); - remove_dir(dir_sd); + if (sd) + return 0; + else + return -ENOENT; } /** @@ -852,21 +938,34 @@ static void __sysfs_remove_dir(struct sysfs_dirent *dir_sd) * the directory before we remove the directory, and we've inlined * what used to be sysfs_rmdir() below, instead of calling separately. */ - void sysfs_remove_dir(struct kobject *kobj) { struct sysfs_dirent *sd = kobj->sd; - spin_lock(&sysfs_assoc_lock); + /* + * In general, kboject owner is responsible for ensuring removal + * doesn't race with other operations and sysfs doesn't provide any + * protection; however, when @kobj is used as a symlink target, the + * symlinking entity usually doesn't own @kobj and thus has no + * control over removal. @kobj->sd may be removed anytime and + * symlink code may end up dereferencing an already freed sd. + * + * sysfs_symlink_target_lock synchronizes @kobj->sd disassociation + * against symlink operations so that symlink code can safely + * dereference @kobj->sd. + */ + spin_lock(&sysfs_symlink_target_lock); kobj->sd = NULL; - spin_unlock(&sysfs_assoc_lock); + spin_unlock(&sysfs_symlink_target_lock); - __sysfs_remove_dir(sd); + if (sd) { + WARN_ON_ONCE(sysfs_type(sd) != SYSFS_DIR); + sysfs_remove(sd); + } } -int sysfs_rename(struct sysfs_dirent *sd, - struct sysfs_dirent *new_parent_sd, const void *new_ns, - const char *new_name) +int sysfs_rename(struct sysfs_dirent *sd, struct sysfs_dirent *new_parent_sd, + const char *new_name, const void *new_ns) { int error; @@ -878,7 +977,7 @@ int sysfs_rename(struct sysfs_dirent *sd, goto out; /* nothing to rename */ error = -EEXIST; - if (sysfs_find_dirent(new_parent_sd, new_ns, new_name)) + if (sysfs_find_dirent(new_parent_sd, new_name, new_ns)) goto out; /* rename sysfs_dirent */ @@ -899,7 +998,7 @@ int sysfs_rename(struct sysfs_dirent *sd, sysfs_get(new_parent_sd); sysfs_put(sd->s_parent); sd->s_ns = new_ns; - sd->s_hash = sysfs_name_hash(sd->s_ns, sd->s_name); + sd->s_hash = sysfs_name_hash(sd->s_name, sd->s_ns); sd->s_parent = new_parent_sd; sysfs_link_sibling(sd); @@ -909,30 +1008,25 @@ int sysfs_rename(struct sysfs_dirent *sd, return error; } -int sysfs_rename_dir(struct kobject *kobj, const char *new_name) +int sysfs_rename_dir_ns(struct kobject *kobj, const char *new_name, + const void *new_ns) { struct sysfs_dirent *parent_sd = kobj->sd->s_parent; - const void *new_ns = NULL; - - if (sysfs_ns_type(parent_sd)) - new_ns = kobj->ktype->namespace(kobj); - return sysfs_rename(kobj->sd, parent_sd, new_ns, new_name); + return sysfs_rename(kobj->sd, parent_sd, new_name, new_ns); } -int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent_kobj) +int sysfs_move_dir_ns(struct kobject *kobj, struct kobject *new_parent_kobj, + const void *new_ns) { struct sysfs_dirent *sd = kobj->sd; struct sysfs_dirent *new_parent_sd; - const void *new_ns = NULL; BUG_ON(!sd->s_parent); - if (sysfs_ns_type(sd->s_parent)) - new_ns = kobj->ktype->namespace(kobj); new_parent_sd = new_parent_kobj && new_parent_kobj->sd ? new_parent_kobj->sd : &sysfs_root; - return sysfs_rename(sd, new_parent_sd, new_ns, sd->s_name); + return sysfs_rename(sd, new_parent_sd, sd->s_name, new_ns); } /* Relationship between s_mode and the DT_xxx types */ diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index 15ef5eb13663..79b5da2acbe1 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -21,70 +21,114 @@ #include <linux/mutex.h> #include <linux/limits.h> #include <linux/uaccess.h> +#include <linux/seq_file.h> +#include <linux/mm.h> #include "sysfs.h" /* - * There's one sysfs_buffer for each open file and one - * sysfs_open_dirent for each sysfs_dirent with one or more open - * files. + * There's one sysfs_open_file for each open file and one sysfs_open_dirent + * for each sysfs_dirent with one or more open files. * - * filp->private_data points to sysfs_buffer and - * sysfs_dirent->s_attr.open points to sysfs_open_dirent. s_attr.open - * is protected by sysfs_open_dirent_lock. + * sysfs_dirent->s_attr.open points to sysfs_open_dirent. s_attr.open is + * protected by sysfs_open_dirent_lock. + * + * filp->private_data points to seq_file whose ->private points to + * sysfs_open_file. sysfs_open_files are chained at + * sysfs_open_dirent->files, which is protected by sysfs_open_file_mutex. */ static DEFINE_SPINLOCK(sysfs_open_dirent_lock); +static DEFINE_MUTEX(sysfs_open_file_mutex); struct sysfs_open_dirent { atomic_t refcnt; atomic_t event; wait_queue_head_t poll; - struct list_head buffers; /* goes through sysfs_buffer.list */ + struct list_head files; /* goes through sysfs_open_file.list */ }; -struct sysfs_buffer { - size_t count; - loff_t pos; - char *page; - const struct sysfs_ops *ops; +struct sysfs_open_file { + struct sysfs_dirent *sd; + struct file *file; struct mutex mutex; - int needs_read_fill; int event; struct list_head list; + + bool mmapped; + const struct vm_operations_struct *vm_ops; }; -/** - * fill_read_buffer - allocate and fill buffer from object. - * @dentry: dentry pointer. - * @buffer: data buffer for file. - * - * Allocate @buffer->page, if it hasn't been already, then call the - * kobject's show() method to fill the buffer with this attribute's - * data. - * This is called only once, on the file's first read unless an error - * is returned. +static bool sysfs_is_bin(struct sysfs_dirent *sd) +{ + return sysfs_type(sd) == SYSFS_KOBJ_BIN_ATTR; +} + +static struct sysfs_open_file *sysfs_of(struct file *file) +{ + return ((struct seq_file *)file->private_data)->private; +} + +/* + * Determine ktype->sysfs_ops for the given sysfs_dirent. This function + * must be called while holding an active reference. */ -static int fill_read_buffer(struct dentry *dentry, struct sysfs_buffer *buffer) +static const struct sysfs_ops *sysfs_file_ops(struct sysfs_dirent *sd) { - struct sysfs_dirent *attr_sd = dentry->d_fsdata; - struct kobject *kobj = attr_sd->s_parent->s_dir.kobj; - const struct sysfs_ops *ops = buffer->ops; - int ret = 0; + struct kobject *kobj = sd->s_parent->s_dir.kobj; + + if (!sysfs_ignore_lockdep(sd)) + lockdep_assert_held(sd); + return kobj->ktype ? kobj->ktype->sysfs_ops : NULL; +} + +/* + * Reads on sysfs are handled through seq_file, which takes care of hairy + * details like buffering and seeking. The following function pipes + * sysfs_ops->show() result through seq_file. + */ +static int sysfs_seq_show(struct seq_file *sf, void *v) +{ + struct sysfs_open_file *of = sf->private; + struct kobject *kobj = of->sd->s_parent->s_dir.kobj; + const struct sysfs_ops *ops; + char *buf; ssize_t count; - if (!buffer->page) - buffer->page = (char *) get_zeroed_page(GFP_KERNEL); - if (!buffer->page) - return -ENOMEM; + /* acquire buffer and ensure that it's >= PAGE_SIZE */ + count = seq_get_buf(sf, &buf); + if (count < PAGE_SIZE) { + seq_commit(sf, -1); + return 0; + } - /* need attr_sd for attr and ops, its parent for kobj */ - if (!sysfs_get_active(attr_sd)) + /* + * Need @of->sd for attr and ops, its parent for kobj. @of->mutex + * nests outside active ref and is just to ensure that the ops + * aren't called concurrently for the same open file. + */ + mutex_lock(&of->mutex); + if (!sysfs_get_active(of->sd)) { + mutex_unlock(&of->mutex); return -ENODEV; + } - buffer->event = atomic_read(&attr_sd->s_attr.open->event); - count = ops->show(kobj, attr_sd->s_attr.attr, buffer->page); + of->event = atomic_read(&of->sd->s_attr.open->event); - sysfs_put_active(attr_sd); + /* + * Lookup @ops and invoke show(). Control may reach here via seq + * file lseek even if @ops->show() isn't implemented. + */ + ops = sysfs_file_ops(of->sd); + if (ops->show) + count = ops->show(kobj, of->sd->s_attr.attr, buf); + else + count = 0; + + sysfs_put_active(of->sd); + mutex_unlock(&of->mutex); + + if (count < 0) + return count; /* * The code works fine with PAGE_SIZE return but it's likely to @@ -96,155 +140,389 @@ static int fill_read_buffer(struct dentry *dentry, struct sysfs_buffer *buffer) /* Try to struggle along */ count = PAGE_SIZE - 1; } - if (count >= 0) { - buffer->needs_read_fill = 0; - buffer->count = count; - } else { - ret = count; - } - return ret; + seq_commit(sf, count); + return 0; } -/** - * sysfs_read_file - read an attribute. - * @file: file pointer. - * @buf: buffer to fill. - * @count: number of bytes to read. - * @ppos: starting offset in file. - * - * Userspace wants to read an attribute file. The attribute descriptor - * is in the file's ->d_fsdata. The target object is in the directory's - * ->d_fsdata. - * - * We call fill_read_buffer() to allocate and fill the buffer from the - * object's show() method exactly once (if the read is happening from - * the beginning of the file). That should fill the entire buffer with - * all the data the object has to offer for that attribute. - * We then call flush_read_buffer() to copy the buffer to userspace - * in the increments specified. +/* + * Read method for bin files. As reading a bin file can have side-effects, + * the exact offset and bytes specified in read(2) call should be passed to + * the read callback making it difficult to use seq_file. Implement + * simplistic custom buffering for bin files. */ - -static ssize_t -sysfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *ppos) +static ssize_t sysfs_bin_read(struct file *file, char __user *userbuf, + size_t bytes, loff_t *off) { - struct sysfs_buffer *buffer = file->private_data; - ssize_t retval = 0; + struct sysfs_open_file *of = sysfs_of(file); + struct bin_attribute *battr = of->sd->s_attr.bin_attr; + struct kobject *kobj = of->sd->s_parent->s_dir.kobj; + loff_t size = file_inode(file)->i_size; + int count = min_t(size_t, bytes, PAGE_SIZE); + loff_t offs = *off; + char *buf; + + if (!bytes) + return 0; - mutex_lock(&buffer->mutex); - if (buffer->needs_read_fill || *ppos == 0) { - retval = fill_read_buffer(file->f_path.dentry, buffer); - if (retval) - goto out; + if (size) { + if (offs > size) + return 0; + if (offs + count > size) + count = size - offs; } - pr_debug("%s: count = %zd, ppos = %lld, buf = %s\n", - __func__, count, *ppos, buffer->page); - retval = simple_read_from_buffer(buf, count, ppos, buffer->page, - buffer->count); -out: - mutex_unlock(&buffer->mutex); - return retval; -} -/** - * fill_write_buffer - copy buffer from userspace. - * @buffer: data buffer for file. - * @buf: data from user. - * @count: number of bytes in @userbuf. - * - * Allocate @buffer->page if it hasn't been already, then - * copy the user-supplied buffer into it. - */ -static int fill_write_buffer(struct sysfs_buffer *buffer, - const char __user *buf, size_t count) -{ - int error; - - if (!buffer->page) - buffer->page = (char *)get_zeroed_page(GFP_KERNEL); - if (!buffer->page) + buf = kmalloc(count, GFP_KERNEL); + if (!buf) return -ENOMEM; - if (count >= PAGE_SIZE) - count = PAGE_SIZE - 1; - error = copy_from_user(buffer->page, buf, count); - buffer->needs_read_fill = 1; - /* if buf is assumed to contain a string, terminate it by \0, - so e.g. sscanf() can scan the string easily */ - buffer->page[count] = 0; - return error ? -EFAULT : count; -} + /* need of->sd for battr, its parent for kobj */ + mutex_lock(&of->mutex); + if (!sysfs_get_active(of->sd)) { + count = -ENODEV; + mutex_unlock(&of->mutex); + goto out_free; + } + + if (battr->read) + count = battr->read(file, kobj, battr, buf, offs, count); + else + count = -EIO; + sysfs_put_active(of->sd); + mutex_unlock(&of->mutex); + + if (count < 0) + goto out_free; + + if (copy_to_user(userbuf, buf, count)) { + count = -EFAULT; + goto out_free; + } + + pr_debug("offs = %lld, *off = %lld, count = %d\n", offs, *off, count); + + *off = offs + count; + + out_free: + kfree(buf); + return count; +} /** - * flush_write_buffer - push buffer to kobject. - * @dentry: dentry to the attribute - * @buffer: data buffer for file. - * @count: number of bytes + * flush_write_buffer - push buffer to kobject + * @of: open file + * @buf: data buffer for file + * @off: file offset to write to + * @count: number of bytes * - * Get the correct pointers for the kobject and the attribute we're - * dealing with, then call the store() method for the attribute, - * passing the buffer that we acquired in fill_write_buffer(). + * Get the correct pointers for the kobject and the attribute we're dealing + * with, then call the store() method for it with @buf. */ -static int flush_write_buffer(struct dentry *dentry, - struct sysfs_buffer *buffer, size_t count) +static int flush_write_buffer(struct sysfs_open_file *of, char *buf, loff_t off, + size_t count) { - struct sysfs_dirent *attr_sd = dentry->d_fsdata; - struct kobject *kobj = attr_sd->s_parent->s_dir.kobj; - const struct sysfs_ops *ops = buffer->ops; - int rc; + struct kobject *kobj = of->sd->s_parent->s_dir.kobj; + int rc = 0; - /* need attr_sd for attr and ops, its parent for kobj */ - if (!sysfs_get_active(attr_sd)) + /* + * Need @of->sd for attr and ops, its parent for kobj. @of->mutex + * nests outside active ref and is just to ensure that the ops + * aren't called concurrently for the same open file. + */ + mutex_lock(&of->mutex); + if (!sysfs_get_active(of->sd)) { + mutex_unlock(&of->mutex); return -ENODEV; + } - rc = ops->store(kobj, attr_sd->s_attr.attr, buffer->page, count); + if (sysfs_is_bin(of->sd)) { + struct bin_attribute *battr = of->sd->s_attr.bin_attr; - sysfs_put_active(attr_sd); + rc = -EIO; + if (battr->write) + rc = battr->write(of->file, kobj, battr, buf, off, + count); + } else { + const struct sysfs_ops *ops = sysfs_file_ops(of->sd); + + rc = ops->store(kobj, of->sd->s_attr.attr, buf, count); + } + + sysfs_put_active(of->sd); + mutex_unlock(&of->mutex); return rc; } - /** - * sysfs_write_file - write an attribute. - * @file: file pointer - * @buf: data to write - * @count: number of bytes - * @ppos: starting offset + * sysfs_write_file - write an attribute + * @file: file pointer + * @user_buf: data to write + * @count: number of bytes + * @ppos: starting offset + * + * Copy data in from userland and pass it to the matching + * sysfs_ops->store() by invoking flush_write_buffer(). * - * Similar to sysfs_read_file(), though working in the opposite direction. - * We allocate and fill the data from the user in fill_write_buffer(), - * then push it to the kobject in flush_write_buffer(). - * There is no easy way for us to know if userspace is only doing a partial - * write, so we don't support them. We expect the entire buffer to come - * on the first write. - * Hint: if you're writing a value, first read the file, modify only the - * the value you're changing, then write entire buffer back. + * There is no easy way for us to know if userspace is only doing a partial + * write, so we don't support them. We expect the entire buffer to come on + * the first write. Hint: if you're writing a value, first read the file, + * modify only the the value you're changing, then write entire buffer + * back. */ -static ssize_t sysfs_write_file(struct file *file, const char __user *buf, +static ssize_t sysfs_write_file(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos) { - struct sysfs_buffer *buffer = file->private_data; - ssize_t len; + struct sysfs_open_file *of = sysfs_of(file); + ssize_t len = min_t(size_t, count, PAGE_SIZE); + loff_t size = file_inode(file)->i_size; + char *buf; + + if (sysfs_is_bin(of->sd) && size) { + if (size <= *ppos) + return 0; + len = min_t(ssize_t, len, size - *ppos); + } - mutex_lock(&buffer->mutex); - len = fill_write_buffer(buffer, buf, count); - if (len > 0) - len = flush_write_buffer(file->f_path.dentry, buffer, len); + if (!len) + return 0; + + buf = kmalloc(len + 1, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + if (copy_from_user(buf, user_buf, len)) { + len = -EFAULT; + goto out_free; + } + buf[len] = '\0'; /* guarantee string termination */ + + len = flush_write_buffer(of, buf, *ppos, len); if (len > 0) *ppos += len; - mutex_unlock(&buffer->mutex); +out_free: + kfree(buf); return len; } +static void sysfs_bin_vma_open(struct vm_area_struct *vma) +{ + struct file *file = vma->vm_file; + struct sysfs_open_file *of = sysfs_of(file); + + if (!of->vm_ops) + return; + + if (!sysfs_get_active(of->sd)) + return; + + if (of->vm_ops->open) + of->vm_ops->open(vma); + + sysfs_put_active(of->sd); +} + +static int sysfs_bin_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + struct file *file = vma->vm_file; + struct sysfs_open_file *of = sysfs_of(file); + int ret; + + if (!of->vm_ops) + return VM_FAULT_SIGBUS; + + if (!sysfs_get_active(of->sd)) + return VM_FAULT_SIGBUS; + + ret = VM_FAULT_SIGBUS; + if (of->vm_ops->fault) + ret = of->vm_ops->fault(vma, vmf); + + sysfs_put_active(of->sd); + return ret; +} + +static int sysfs_bin_page_mkwrite(struct vm_area_struct *vma, + struct vm_fault *vmf) +{ + struct file *file = vma->vm_file; + struct sysfs_open_file *of = sysfs_of(file); + int ret; + + if (!of->vm_ops) + return VM_FAULT_SIGBUS; + + if (!sysfs_get_active(of->sd)) + return VM_FAULT_SIGBUS; + + ret = 0; + if (of->vm_ops->page_mkwrite) + ret = of->vm_ops->page_mkwrite(vma, vmf); + else + file_update_time(file); + + sysfs_put_active(of->sd); + return ret; +} + +static int sysfs_bin_access(struct vm_area_struct *vma, unsigned long addr, + void *buf, int len, int write) +{ + struct file *file = vma->vm_file; + struct sysfs_open_file *of = sysfs_of(file); + int ret; + + if (!of->vm_ops) + return -EINVAL; + + if (!sysfs_get_active(of->sd)) + return -EINVAL; + + ret = -EINVAL; + if (of->vm_ops->access) + ret = of->vm_ops->access(vma, addr, buf, len, write); + + sysfs_put_active(of->sd); + return ret; +} + +#ifdef CONFIG_NUMA +static int sysfs_bin_set_policy(struct vm_area_struct *vma, + struct mempolicy *new) +{ + struct file *file = vma->vm_file; + struct sysfs_open_file *of = sysfs_of(file); + int ret; + + if (!of->vm_ops) + return 0; + + if (!sysfs_get_active(of->sd)) + return -EINVAL; + + ret = 0; + if (of->vm_ops->set_policy) + ret = of->vm_ops->set_policy(vma, new); + + sysfs_put_active(of->sd); + return ret; +} + +static struct mempolicy *sysfs_bin_get_policy(struct vm_area_struct *vma, + unsigned long addr) +{ + struct file *file = vma->vm_file; + struct sysfs_open_file *of = sysfs_of(file); + struct mempolicy *pol; + + if (!of->vm_ops) + return vma->vm_policy; + + if (!sysfs_get_active(of->sd)) + return vma->vm_policy; + + pol = vma->vm_policy; + if (of->vm_ops->get_policy) + pol = of->vm_ops->get_policy(vma, addr); + + sysfs_put_active(of->sd); + return pol; +} + +static int sysfs_bin_migrate(struct vm_area_struct *vma, const nodemask_t *from, + const nodemask_t *to, unsigned long flags) +{ + struct file *file = vma->vm_file; + struct sysfs_open_file *of = sysfs_of(file); + int ret; + + if (!of->vm_ops) + return 0; + + if (!sysfs_get_active(of->sd)) + return 0; + + ret = 0; + if (of->vm_ops->migrate) + ret = of->vm_ops->migrate(vma, from, to, flags); + + sysfs_put_active(of->sd); + return ret; +} +#endif + +static const struct vm_operations_struct sysfs_bin_vm_ops = { + .open = sysfs_bin_vma_open, + .fault = sysfs_bin_fault, + .page_mkwrite = sysfs_bin_page_mkwrite, + .access = sysfs_bin_access, +#ifdef CONFIG_NUMA + .set_policy = sysfs_bin_set_policy, + .get_policy = sysfs_bin_get_policy, + .migrate = sysfs_bin_migrate, +#endif +}; + +static int sysfs_bin_mmap(struct file *file, struct vm_area_struct *vma) +{ + struct sysfs_open_file *of = sysfs_of(file); + struct bin_attribute *battr = of->sd->s_attr.bin_attr; + struct kobject *kobj = of->sd->s_parent->s_dir.kobj; + int rc; + + mutex_lock(&of->mutex); + + /* need of->sd for battr, its parent for kobj */ + rc = -ENODEV; + if (!sysfs_get_active(of->sd)) + goto out_unlock; + + if (!battr->mmap) + goto out_put; + + rc = battr->mmap(file, kobj, battr, vma); + if (rc) + goto out_put; + + /* + * PowerPC's pci_mmap of legacy_mem uses shmem_zero_setup() + * to satisfy versions of X which crash if the mmap fails: that + * substitutes a new vm_file, and we don't then want bin_vm_ops. + */ + if (vma->vm_file != file) + goto out_put; + + rc = -EINVAL; + if (of->mmapped && of->vm_ops != vma->vm_ops) + goto out_put; + + /* + * It is not possible to successfully wrap close. + * So error if someone is trying to use close. + */ + rc = -EINVAL; + if (vma->vm_ops && vma->vm_ops->close) + goto out_put; + + rc = 0; + of->mmapped = 1; + of->vm_ops = vma->vm_ops; + vma->vm_ops = &sysfs_bin_vm_ops; +out_put: + sysfs_put_active(of->sd); +out_unlock: + mutex_unlock(&of->mutex); + + return rc; +} + /** * sysfs_get_open_dirent - get or create sysfs_open_dirent * @sd: target sysfs_dirent - * @buffer: sysfs_buffer for this instance of open + * @of: sysfs_open_file for this instance of open * * If @sd->s_attr.open exists, increment its reference count; - * otherwise, create one. @buffer is chained to the buffers - * list. + * otherwise, create one. @of is chained to the files list. * * LOCKING: * Kernel thread context (may sleep). @@ -253,11 +531,12 @@ static ssize_t sysfs_write_file(struct file *file, const char __user *buf, * 0 on success, -errno on failure. */ static int sysfs_get_open_dirent(struct sysfs_dirent *sd, - struct sysfs_buffer *buffer) + struct sysfs_open_file *of) { struct sysfs_open_dirent *od, *new_od = NULL; retry: + mutex_lock(&sysfs_open_file_mutex); spin_lock_irq(&sysfs_open_dirent_lock); if (!sd->s_attr.open && new_od) { @@ -268,10 +547,11 @@ static int sysfs_get_open_dirent(struct sysfs_dirent *sd, od = sd->s_attr.open; if (od) { atomic_inc(&od->refcnt); - list_add_tail(&buffer->list, &od->buffers); + list_add_tail(&of->list, &od->files); } spin_unlock_irq(&sysfs_open_dirent_lock); + mutex_unlock(&sysfs_open_file_mutex); if (od) { kfree(new_od); @@ -286,36 +566,40 @@ static int sysfs_get_open_dirent(struct sysfs_dirent *sd, atomic_set(&new_od->refcnt, 0); atomic_set(&new_od->event, 1); init_waitqueue_head(&new_od->poll); - INIT_LIST_HEAD(&new_od->buffers); + INIT_LIST_HEAD(&new_od->files); goto retry; } /** * sysfs_put_open_dirent - put sysfs_open_dirent * @sd: target sysfs_dirent - * @buffer: associated sysfs_buffer + * @of: associated sysfs_open_file * - * Put @sd->s_attr.open and unlink @buffer from the buffers list. - * If reference count reaches zero, disassociate and free it. + * Put @sd->s_attr.open and unlink @of from the files list. If + * reference count reaches zero, disassociate and free it. * * LOCKING: * None. */ static void sysfs_put_open_dirent(struct sysfs_dirent *sd, - struct sysfs_buffer *buffer) + struct sysfs_open_file *of) { struct sysfs_open_dirent *od = sd->s_attr.open; unsigned long flags; + mutex_lock(&sysfs_open_file_mutex); spin_lock_irqsave(&sysfs_open_dirent_lock, flags); - list_del(&buffer->list); + if (of) + list_del(&of->list); + if (atomic_dec_and_test(&od->refcnt)) sd->s_attr.open = NULL; else od = NULL; spin_unlock_irqrestore(&sysfs_open_dirent_lock, flags); + mutex_unlock(&sysfs_open_file_mutex); kfree(od); } @@ -324,67 +608,81 @@ static int sysfs_open_file(struct inode *inode, struct file *file) { struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; struct kobject *kobj = attr_sd->s_parent->s_dir.kobj; - struct sysfs_buffer *buffer; - const struct sysfs_ops *ops; + struct sysfs_open_file *of; + bool has_read, has_write; int error = -EACCES; /* need attr_sd for attr and ops, its parent for kobj */ if (!sysfs_get_active(attr_sd)) return -ENODEV; - /* every kobject with an attribute needs a ktype assigned */ - if (kobj->ktype && kobj->ktype->sysfs_ops) - ops = kobj->ktype->sysfs_ops; - else { - WARN(1, KERN_ERR - "missing sysfs attribute operations for kobject: %s\n", - kobject_name(kobj)); - goto err_out; - } + if (sysfs_is_bin(attr_sd)) { + struct bin_attribute *battr = attr_sd->s_attr.bin_attr; - /* File needs write support. - * The inode's perms must say it's ok, - * and we must have a store method. - */ - if (file->f_mode & FMODE_WRITE) { - if (!(inode->i_mode & S_IWUGO) || !ops->store) - goto err_out; - } + has_read = battr->read || battr->mmap; + has_write = battr->write || battr->mmap; + } else { + const struct sysfs_ops *ops = sysfs_file_ops(attr_sd); - /* File needs read support. - * The inode's perms must say it's ok, and we there - * must be a show method for it. - */ - if (file->f_mode & FMODE_READ) { - if (!(inode->i_mode & S_IRUGO) || !ops->show) + /* every kobject with an attribute needs a ktype assigned */ + if (WARN(!ops, KERN_ERR + "missing sysfs attribute operations for kobject: %s\n", + kobject_name(kobj))) goto err_out; + + has_read = ops->show; + has_write = ops->store; } - /* No error? Great, allocate a buffer for the file, and store it - * it in file->private_data for easy access. - */ + /* check perms and supported operations */ + if ((file->f_mode & FMODE_WRITE) && + (!(inode->i_mode & S_IWUGO) || !has_write)) + goto err_out; + + if ((file->f_mode & FMODE_READ) && + (!(inode->i_mode & S_IRUGO) || !has_read)) + goto err_out; + + /* allocate a sysfs_open_file for the file */ error = -ENOMEM; - buffer = kzalloc(sizeof(struct sysfs_buffer), GFP_KERNEL); - if (!buffer) + of = kzalloc(sizeof(struct sysfs_open_file), GFP_KERNEL); + if (!of) goto err_out; - mutex_init(&buffer->mutex); - buffer->needs_read_fill = 1; - buffer->ops = ops; - file->private_data = buffer; + mutex_init(&of->mutex); + of->sd = attr_sd; + of->file = file; - /* make sure we have open dirent struct */ - error = sysfs_get_open_dirent(attr_sd, buffer); + /* + * Always instantiate seq_file even if read access doesn't use + * seq_file or is not requested. This unifies private data access + * and readable regular files are the vast majority anyway. + */ + if (sysfs_is_bin(attr_sd)) + error = single_open(file, NULL, of); + else + error = single_open(file, sysfs_seq_show, of); if (error) goto err_free; + /* seq_file clears PWRITE unconditionally, restore it if WRITE */ + if (file->f_mode & FMODE_WRITE) + file->f_mode |= FMODE_PWRITE; + + /* make sure we have open dirent struct */ + error = sysfs_get_open_dirent(attr_sd, of); + if (error) + goto err_close; + /* open succeeded, put active references */ sysfs_put_active(attr_sd); return 0; - err_free: - kfree(buffer); - err_out: +err_close: + single_release(inode, file); +err_free: + kfree(of); +err_out: sysfs_put_active(attr_sd); return error; } @@ -392,17 +690,41 @@ static int sysfs_open_file(struct inode *inode, struct file *file) static int sysfs_release(struct inode *inode, struct file *filp) { struct sysfs_dirent *sd = filp->f_path.dentry->d_fsdata; - struct sysfs_buffer *buffer = filp->private_data; + struct sysfs_open_file *of = sysfs_of(filp); - sysfs_put_open_dirent(sd, buffer); - - if (buffer->page) - free_page((unsigned long)buffer->page); - kfree(buffer); + sysfs_put_open_dirent(sd, of); + single_release(inode, filp); + kfree(of); return 0; } +void sysfs_unmap_bin_file(struct sysfs_dirent *sd) +{ + struct sysfs_open_dirent *od; + struct sysfs_open_file *of; + + if (!sysfs_is_bin(sd)) + return; + + spin_lock_irq(&sysfs_open_dirent_lock); + od = sd->s_attr.open; + if (od) + atomic_inc(&od->refcnt); + spin_unlock_irq(&sysfs_open_dirent_lock); + if (!od) + return; + + mutex_lock(&sysfs_open_file_mutex); + list_for_each_entry(of, &od->files, list) { + struct inode *inode = file_inode(of->file); + unmap_mapping_range(inode->i_mapping, 0, 0, 1); + } + mutex_unlock(&sysfs_open_file_mutex); + + sysfs_put_open_dirent(sd, NULL); +} + /* Sysfs attribute files are pollable. The idea is that you read * the content and then you use 'poll' or 'select' to wait for * the content to change. When the content changes (assuming the @@ -418,7 +740,7 @@ static int sysfs_release(struct inode *inode, struct file *filp) */ static unsigned int sysfs_poll(struct file *filp, poll_table *wait) { - struct sysfs_buffer *buffer = filp->private_data; + struct sysfs_open_file *of = sysfs_of(filp); struct sysfs_dirent *attr_sd = filp->f_path.dentry->d_fsdata; struct sysfs_open_dirent *od = attr_sd->s_attr.open; @@ -430,13 +752,12 @@ static unsigned int sysfs_poll(struct file *filp, poll_table *wait) sysfs_put_active(attr_sd); - if (buffer->event != atomic_read(&od->event)) + if (of->event != atomic_read(&od->event)) goto trigger; return DEFAULT_POLLMASK; trigger: - buffer->needs_read_fill = 1; return DEFAULT_POLLMASK|POLLERR|POLLPRI; } @@ -466,9 +787,9 @@ void sysfs_notify(struct kobject *k, const char *dir, const char *attr) mutex_lock(&sysfs_mutex); if (sd && dir) - sd = sysfs_find_dirent(sd, NULL, dir); + sd = sysfs_find_dirent(sd, dir, NULL); if (sd && attr) - sd = sysfs_find_dirent(sd, NULL, attr); + sd = sysfs_find_dirent(sd, attr, NULL); if (sd) sysfs_notify_dirent(sd); @@ -477,7 +798,7 @@ void sysfs_notify(struct kobject *k, const char *dir, const char *attr) EXPORT_SYMBOL_GPL(sysfs_notify); const struct file_operations sysfs_file_operations = { - .read = sysfs_read_file, + .read = seq_read, .write = sysfs_write_file, .llseek = generic_file_llseek, .open = sysfs_open_file, @@ -485,58 +806,25 @@ const struct file_operations sysfs_file_operations = { .poll = sysfs_poll, }; -static int sysfs_attr_ns(struct kobject *kobj, const struct attribute *attr, - const void **pns) -{ - struct sysfs_dirent *dir_sd = kobj->sd; - const struct sysfs_ops *ops; - const void *ns = NULL; - int err; - - if (!dir_sd) { - WARN(1, KERN_ERR "sysfs: kobject %s without dirent\n", - kobject_name(kobj)); - return -ENOENT; - } - - err = 0; - if (!sysfs_ns_type(dir_sd)) - goto out; - - err = -EINVAL; - if (!kobj->ktype) - goto out; - ops = kobj->ktype->sysfs_ops; - if (!ops) - goto out; - if (!ops->namespace) - goto out; - - err = 0; - ns = ops->namespace(kobj, attr); -out: - if (err) { - WARN(1, KERN_ERR - "missing sysfs namespace attribute operation for kobject: %s\n", - kobject_name(kobj)); - } - *pns = ns; - return err; -} +const struct file_operations sysfs_bin_operations = { + .read = sysfs_bin_read, + .write = sysfs_write_file, + .llseek = generic_file_llseek, + .mmap = sysfs_bin_mmap, + .open = sysfs_open_file, + .release = sysfs_release, + .poll = sysfs_poll, +}; -int sysfs_add_file_mode(struct sysfs_dirent *dir_sd, - const struct attribute *attr, int type, umode_t amode) +int sysfs_add_file_mode_ns(struct sysfs_dirent *dir_sd, + const struct attribute *attr, int type, + umode_t amode, const void *ns) { umode_t mode = (amode & S_IALLUGO) | S_IFREG; struct sysfs_addrm_cxt acxt; struct sysfs_dirent *sd; - const void *ns; int rc; - rc = sysfs_attr_ns(dir_sd->s_dir.kobj, attr, &ns); - if (rc) - return rc; - sd = sysfs_new_dirent(attr->name, mode, type); if (!sd) return -ENOMEM; @@ -545,8 +833,8 @@ int sysfs_add_file_mode(struct sysfs_dirent *dir_sd, sd->s_attr.attr = (void *)attr; sysfs_dirent_init_lockdep(sd); - sysfs_addrm_start(&acxt, dir_sd); - rc = sysfs_add_one(&acxt, sd); + sysfs_addrm_start(&acxt); + rc = sysfs_add_one(&acxt, sd, dir_sd); sysfs_addrm_finish(&acxt); if (rc) @@ -559,23 +847,25 @@ int sysfs_add_file_mode(struct sysfs_dirent *dir_sd, int sysfs_add_file(struct sysfs_dirent *dir_sd, const struct attribute *attr, int type) { - return sysfs_add_file_mode(dir_sd, attr, type, attr->mode); + return sysfs_add_file_mode_ns(dir_sd, attr, type, attr->mode, NULL); } - /** - * sysfs_create_file - create an attribute file for an object. - * @kobj: object we're creating for. - * @attr: attribute descriptor. + * sysfs_create_file_ns - create an attribute file for an object with custom ns + * @kobj: object we're creating for + * @attr: attribute descriptor + * @ns: namespace the new file should belong to */ -int sysfs_create_file(struct kobject *kobj, const struct attribute *attr) +int sysfs_create_file_ns(struct kobject *kobj, const struct attribute *attr, + const void *ns) { BUG_ON(!kobj || !kobj->sd || !attr); - return sysfs_add_file(kobj->sd, attr, SYSFS_KOBJ_ATTR); + return sysfs_add_file_mode_ns(kobj->sd, attr, SYSFS_KOBJ_ATTR, + attr->mode, ns); } -EXPORT_SYMBOL_GPL(sysfs_create_file); +EXPORT_SYMBOL_GPL(sysfs_create_file_ns); int sysfs_create_files(struct kobject *kobj, const struct attribute **ptr) { @@ -604,7 +894,7 @@ int sysfs_add_file_to_group(struct kobject *kobj, int error; if (group) - dir_sd = sysfs_get_dirent(kobj->sd, NULL, group); + dir_sd = sysfs_get_dirent(kobj->sd, group); else dir_sd = sysfs_get(kobj->sd); @@ -630,17 +920,12 @@ int sysfs_chmod_file(struct kobject *kobj, const struct attribute *attr, { struct sysfs_dirent *sd; struct iattr newattrs; - const void *ns; int rc; - rc = sysfs_attr_ns(kobj, attr, &ns); - if (rc) - return rc; - mutex_lock(&sysfs_mutex); rc = -ENOENT; - sd = sysfs_find_dirent(kobj->sd, ns, attr->name); + sd = sysfs_find_dirent(kobj->sd, attr->name, NULL); if (!sd) goto out; @@ -655,22 +940,21 @@ int sysfs_chmod_file(struct kobject *kobj, const struct attribute *attr, EXPORT_SYMBOL_GPL(sysfs_chmod_file); /** - * sysfs_remove_file - remove an object attribute. - * @kobj: object we're acting for. - * @attr: attribute descriptor. + * sysfs_remove_file_ns - remove an object attribute with a custom ns tag + * @kobj: object we're acting for + * @attr: attribute descriptor + * @ns: namespace tag of the file to remove * - * Hash the attribute name and kill the victim. + * Hash the attribute name and namespace tag and kill the victim. */ -void sysfs_remove_file(struct kobject *kobj, const struct attribute *attr) +void sysfs_remove_file_ns(struct kobject *kobj, const struct attribute *attr, + const void *ns) { - const void *ns; - - if (sysfs_attr_ns(kobj, attr, &ns)) - return; + struct sysfs_dirent *dir_sd = kobj->sd; - sysfs_hash_and_remove(kobj->sd, ns, attr->name); + sysfs_hash_and_remove(dir_sd, attr->name, ns); } -EXPORT_SYMBOL_GPL(sysfs_remove_file); +EXPORT_SYMBOL_GPL(sysfs_remove_file_ns); void sysfs_remove_files(struct kobject *kobj, const struct attribute **ptr) { @@ -692,16 +976,42 @@ void sysfs_remove_file_from_group(struct kobject *kobj, struct sysfs_dirent *dir_sd; if (group) - dir_sd = sysfs_get_dirent(kobj->sd, NULL, group); + dir_sd = sysfs_get_dirent(kobj->sd, group); else dir_sd = sysfs_get(kobj->sd); if (dir_sd) { - sysfs_hash_and_remove(dir_sd, NULL, attr->name); + sysfs_hash_and_remove(dir_sd, attr->name, NULL); sysfs_put(dir_sd); } } EXPORT_SYMBOL_GPL(sysfs_remove_file_from_group); +/** + * sysfs_create_bin_file - create binary file for object. + * @kobj: object. + * @attr: attribute descriptor. + */ +int sysfs_create_bin_file(struct kobject *kobj, + const struct bin_attribute *attr) +{ + BUG_ON(!kobj || !kobj->sd || !attr); + + return sysfs_add_file(kobj->sd, &attr->attr, SYSFS_KOBJ_BIN_ATTR); +} +EXPORT_SYMBOL_GPL(sysfs_create_bin_file); + +/** + * sysfs_remove_bin_file - remove binary file for object. + * @kobj: object. + * @attr: attribute descriptor. + */ +void sysfs_remove_bin_file(struct kobject *kobj, + const struct bin_attribute *attr) +{ + sysfs_hash_and_remove(kobj->sd, attr->attr.name, NULL); +} +EXPORT_SYMBOL_GPL(sysfs_remove_bin_file); + struct sysfs_schedule_callback_struct { struct list_head workq_list; struct kobject *kobj; diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c index 5f92cd2f61c1..1898a10e38ce 100644 --- a/fs/sysfs/group.c +++ b/fs/sysfs/group.c @@ -26,7 +26,7 @@ static void remove_files(struct sysfs_dirent *dir_sd, struct kobject *kobj, if (grp->attrs) for (attr = grp->attrs; *attr; attr++) - sysfs_hash_and_remove(dir_sd, NULL, (*attr)->name); + sysfs_hash_and_remove(dir_sd, (*attr)->name, NULL); if (grp->bin_attrs) for (bin_attr = grp->bin_attrs; *bin_attr; bin_attr++) sysfs_remove_bin_file(kobj, *bin_attr); @@ -49,16 +49,17 @@ static int create_files(struct sysfs_dirent *dir_sd, struct kobject *kobj, * re-adding (if required) the file. */ if (update) - sysfs_hash_and_remove(dir_sd, NULL, - (*attr)->name); + sysfs_hash_and_remove(dir_sd, (*attr)->name, + NULL); if (grp->is_visible) { mode = grp->is_visible(kobj, *attr, i); if (!mode) continue; } - error = sysfs_add_file_mode(dir_sd, *attr, - SYSFS_KOBJ_ATTR, - (*attr)->mode | mode); + error = sysfs_add_file_mode_ns(dir_sd, *attr, + SYSFS_KOBJ_ATTR, + (*attr)->mode | mode, + NULL); if (unlikely(error)) break; } @@ -110,7 +111,7 @@ static int internal_create_group(struct kobject *kobj, int update, error = create_files(sd, kobj, grp, update); if (error) { if (grp->name) - sysfs_remove_subdir(sd); + sysfs_remove(sd); } sysfs_put(sd); return error; @@ -206,7 +207,7 @@ void sysfs_remove_group(struct kobject *kobj, struct sysfs_dirent *sd; if (grp->name) { - sd = sysfs_get_dirent(dir_sd, NULL, grp->name); + sd = sysfs_get_dirent(dir_sd, grp->name); if (!sd) { WARN(!sd, KERN_WARNING "sysfs group %p not found for kobject '%s'\n", @@ -218,7 +219,7 @@ void sysfs_remove_group(struct kobject *kobj, remove_files(sd, kobj, grp); if (grp->name) - sysfs_remove_subdir(sd); + sysfs_remove(sd); sysfs_put(sd); } @@ -261,7 +262,7 @@ int sysfs_merge_group(struct kobject *kobj, struct attribute *const *attr; int i; - dir_sd = sysfs_get_dirent(kobj->sd, NULL, grp->name); + dir_sd = sysfs_get_dirent(kobj->sd, grp->name); if (!dir_sd) return -ENOENT; @@ -269,7 +270,7 @@ int sysfs_merge_group(struct kobject *kobj, error = sysfs_add_file(dir_sd, *attr, SYSFS_KOBJ_ATTR); if (error) { while (--i >= 0) - sysfs_hash_and_remove(dir_sd, NULL, (*--attr)->name); + sysfs_hash_and_remove(dir_sd, (*--attr)->name, NULL); } sysfs_put(dir_sd); @@ -288,10 +289,10 @@ void sysfs_unmerge_group(struct kobject *kobj, struct sysfs_dirent *dir_sd; struct attribute *const *attr; - dir_sd = sysfs_get_dirent(kobj->sd, NULL, grp->name); + dir_sd = sysfs_get_dirent(kobj->sd, grp->name); if (dir_sd) { for (attr = grp->attrs; *attr; ++attr) - sysfs_hash_and_remove(dir_sd, NULL, (*attr)->name); + sysfs_hash_and_remove(dir_sd, (*attr)->name, NULL); sysfs_put(dir_sd); } } @@ -310,7 +311,7 @@ int sysfs_add_link_to_group(struct kobject *kobj, const char *group_name, struct sysfs_dirent *dir_sd; int error = 0; - dir_sd = sysfs_get_dirent(kobj->sd, NULL, group_name); + dir_sd = sysfs_get_dirent(kobj->sd, group_name); if (!dir_sd) return -ENOENT; @@ -332,9 +333,9 @@ void sysfs_remove_link_from_group(struct kobject *kobj, const char *group_name, { struct sysfs_dirent *dir_sd; - dir_sd = sysfs_get_dirent(kobj->sd, NULL, group_name); + dir_sd = sysfs_get_dirent(kobj->sd, group_name); if (dir_sd) { - sysfs_hash_and_remove(dir_sd, NULL, link_name); + sysfs_hash_and_remove(dir_sd, link_name, NULL); sysfs_put(dir_sd); } } diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c index 963f910c8034..1750f790af3b 100644 --- a/fs/sysfs/inode.c +++ b/fs/sysfs/inode.c @@ -258,9 +258,9 @@ static void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode) inode->i_fop = &sysfs_file_operations; break; case SYSFS_KOBJ_BIN_ATTR: - bin_attr = sd->s_bin_attr.bin_attr; + bin_attr = sd->s_attr.bin_attr; inode->i_size = bin_attr->size; - inode->i_fop = &bin_fops; + inode->i_fop = &sysfs_bin_operations; break; case SYSFS_KOBJ_LINK: inode->i_op = &sysfs_symlink_inode_operations; @@ -314,32 +314,6 @@ void sysfs_evict_inode(struct inode *inode) sysfs_put(sd); } -int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const void *ns, - const char *name) -{ - struct sysfs_addrm_cxt acxt; - struct sysfs_dirent *sd; - - if (!dir_sd) { - WARN(1, KERN_WARNING "sysfs: can not remove '%s', no directory\n", - name); - return -ENOENT; - } - - sysfs_addrm_start(&acxt, dir_sd); - - sd = sysfs_find_dirent(dir_sd, ns, name); - if (sd) - sysfs_remove_one(&acxt, sd); - - sysfs_addrm_finish(&acxt); - - if (sd) - return 0; - else - return -ENOENT; -} - int sysfs_permission(struct inode *inode, int mask) { struct sysfs_dirent *sd; diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c index 2dd4507d9edd..3ae3f1bf1a09 100644 --- a/fs/sysfs/symlink.c +++ b/fs/sysfs/symlink.c @@ -33,13 +33,15 @@ static int sysfs_do_create_link_sd(struct sysfs_dirent *parent_sd, BUG_ON(!name || !parent_sd); - /* target->sd can go away beneath us but is protected with - * sysfs_assoc_lock. Fetch target_sd from it. + /* + * We don't own @target and it may be removed at any time. + * Synchronize using sysfs_symlink_target_lock. See + * sysfs_remove_dir() for details. */ - spin_lock(&sysfs_assoc_lock); + spin_lock(&sysfs_symlink_target_lock); if (target->sd) target_sd = sysfs_get(target->sd); - spin_unlock(&sysfs_assoc_lock); + spin_unlock(&sysfs_symlink_target_lock); error = -ENOENT; if (!target_sd) @@ -52,18 +54,18 @@ static int sysfs_do_create_link_sd(struct sysfs_dirent *parent_sd, ns_type = sysfs_ns_type(parent_sd); if (ns_type) - sd->s_ns = target->ktype->namespace(target); + sd->s_ns = target_sd->s_ns; sd->s_symlink.target_sd = target_sd; target_sd = NULL; /* reference is now owned by the symlink */ - sysfs_addrm_start(&acxt, parent_sd); + sysfs_addrm_start(&acxt); /* Symlinks must be between directories with the same ns_type */ if (!ns_type || (ns_type == sysfs_ns_type(sd->s_symlink.target_sd->s_parent))) { if (warn) - error = sysfs_add_one(&acxt, sd); + error = sysfs_add_one(&acxt, sd, parent_sd); else - error = __sysfs_add_one(&acxt, sd); + error = __sysfs_add_one(&acxt, sd, parent_sd); } else { error = -EINVAL; WARN(1, KERN_WARNING @@ -155,11 +157,17 @@ void sysfs_delete_link(struct kobject *kobj, struct kobject *targ, const char *name) { const void *ns = NULL; - spin_lock(&sysfs_assoc_lock); + + /* + * We don't own @target and it may be removed at any time. + * Synchronize using sysfs_symlink_target_lock. See + * sysfs_remove_dir() for details. + */ + spin_lock(&sysfs_symlink_target_lock); if (targ->sd && sysfs_ns_type(kobj->sd)) ns = targ->sd->s_ns; - spin_unlock(&sysfs_assoc_lock); - sysfs_hash_and_remove(kobj->sd, ns, name); + spin_unlock(&sysfs_symlink_target_lock); + sysfs_hash_and_remove(kobj->sd, name, ns); } /** @@ -176,24 +184,25 @@ void sysfs_remove_link(struct kobject *kobj, const char *name) else parent_sd = kobj->sd; - sysfs_hash_and_remove(parent_sd, NULL, name); + sysfs_hash_and_remove(parent_sd, name, NULL); } EXPORT_SYMBOL_GPL(sysfs_remove_link); /** - * sysfs_rename_link - rename symlink in object's directory. + * sysfs_rename_link_ns - rename symlink in object's directory. * @kobj: object we're acting for. * @targ: object we're pointing to. * @old: previous name of the symlink. * @new: new name of the symlink. + * @new_ns: new namespace of the symlink. * * A helper function for the common rename symlink idiom. */ -int sysfs_rename_link(struct kobject *kobj, struct kobject *targ, - const char *old, const char *new) +int sysfs_rename_link_ns(struct kobject *kobj, struct kobject *targ, + const char *old, const char *new, const void *new_ns) { struct sysfs_dirent *parent_sd, *sd = NULL; - const void *old_ns = NULL, *new_ns = NULL; + const void *old_ns = NULL; int result; if (!kobj) @@ -205,7 +214,7 @@ int sysfs_rename_link(struct kobject *kobj, struct kobject *targ, old_ns = targ->sd->s_ns; result = -ENOENT; - sd = sysfs_get_dirent(parent_sd, old_ns, old); + sd = sysfs_get_dirent_ns(parent_sd, old, old_ns); if (!sd) goto out; @@ -215,16 +224,13 @@ int sysfs_rename_link(struct kobject *kobj, struct kobject *targ, if (sd->s_symlink.target_sd->s_dir.kobj != targ) goto out; - if (sysfs_ns_type(parent_sd)) - new_ns = targ->ktype->namespace(targ); - - result = sysfs_rename(sd, parent_sd, new_ns, new); + result = sysfs_rename(sd, parent_sd, new, new_ns); out: sysfs_put(sd); return result; } -EXPORT_SYMBOL_GPL(sysfs_rename_link); +EXPORT_SYMBOL_GPL(sysfs_rename_link_ns); static int sysfs_get_target_path(struct sysfs_dirent *parent_sd, struct sysfs_dirent *target_sd, char *path) diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h index b6deca3e301d..0af09fbfb3f6 100644 --- a/fs/sysfs/sysfs.h +++ b/fs/sysfs/sysfs.h @@ -29,15 +29,13 @@ struct sysfs_elem_symlink { }; struct sysfs_elem_attr { - struct attribute *attr; + union { + struct attribute *attr; + struct bin_attribute *bin_attr; + }; struct sysfs_open_dirent *open; }; -struct sysfs_elem_bin_attr { - struct bin_attribute *bin_attr; - struct hlist_head buffers; -}; - struct sysfs_inode_attrs { struct iattr ia_iattr; void *ia_secdata; @@ -74,7 +72,6 @@ struct sysfs_dirent { struct sysfs_elem_dir s_dir; struct sysfs_elem_symlink s_symlink; struct sysfs_elem_attr s_attr; - struct sysfs_elem_bin_attr s_bin_attr; }; unsigned short s_flags; @@ -115,6 +112,7 @@ static inline enum kobj_ns_type sysfs_ns_type(struct sysfs_dirent *sd) } #ifdef CONFIG_DEBUG_LOCK_ALLOC + #define sysfs_dirent_init_lockdep(sd) \ do { \ struct attribute *attr = sd->s_attr.attr; \ @@ -124,15 +122,31 @@ do { \ \ lockdep_init_map(&sd->dep_map, "s_active", key, 0); \ } while (0) + +/* Test for attributes that want to ignore lockdep for read-locking */ +static inline bool sysfs_ignore_lockdep(struct sysfs_dirent *sd) +{ + int type = sysfs_type(sd); + + return (type == SYSFS_KOBJ_ATTR || type == SYSFS_KOBJ_BIN_ATTR) && + sd->s_attr.attr->ignore_lockdep; +} + #else + #define sysfs_dirent_init_lockdep(sd) do {} while (0) + +static inline bool sysfs_ignore_lockdep(struct sysfs_dirent *sd) +{ + return true; +} + #endif /* * Context structure to be used while adding/removing nodes. */ struct sysfs_addrm_cxt { - struct sysfs_dirent *parent_sd; struct sysfs_dirent *removed; }; @@ -156,38 +170,37 @@ extern struct kmem_cache *sysfs_dir_cachep; * dir.c */ extern struct mutex sysfs_mutex; -extern spinlock_t sysfs_assoc_lock; +extern spinlock_t sysfs_symlink_target_lock; extern const struct dentry_operations sysfs_dentry_ops; extern const struct file_operations sysfs_dir_operations; extern const struct inode_operations sysfs_dir_inode_operations; -struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd); struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd); void sysfs_put_active(struct sysfs_dirent *sd); -void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt, - struct sysfs_dirent *parent_sd); -int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd); -int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd); -void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd); +void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt); +void sysfs_warn_dup(struct sysfs_dirent *parent, const char *name); +int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd, + struct sysfs_dirent *parent_sd); +int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd, + struct sysfs_dirent *parent_sd); +void sysfs_remove(struct sysfs_dirent *sd); +int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name, + const void *ns); void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt); struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd, - const void *ns, - const unsigned char *name); -struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd, - const void *ns, - const unsigned char *name); + const unsigned char *name, + const void *ns); struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type); void release_sysfs_dirent(struct sysfs_dirent *sd); int sysfs_create_subdir(struct kobject *kobj, const char *name, struct sysfs_dirent **p_sd); -void sysfs_remove_subdir(struct sysfs_dirent *sd); int sysfs_rename(struct sysfs_dirent *sd, struct sysfs_dirent *new_parent_sd, - const void *ns, const char *new_name); + const char *new_name, const void *new_ns); static inline struct sysfs_dirent *__sysfs_get(struct sysfs_dirent *sd) { @@ -218,25 +231,21 @@ int sysfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat); int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags); -int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const void *ns, - const char *name); int sysfs_inode_init(void); /* * file.c */ extern const struct file_operations sysfs_file_operations; +extern const struct file_operations sysfs_bin_operations; int sysfs_add_file(struct sysfs_dirent *dir_sd, const struct attribute *attr, int type); -int sysfs_add_file_mode(struct sysfs_dirent *dir_sd, - const struct attribute *attr, int type, umode_t amode); -/* - * bin.c - */ -extern const struct file_operations bin_fops; -void unmap_bin_file(struct sysfs_dirent *attr_sd); +int sysfs_add_file_mode_ns(struct sysfs_dirent *dir_sd, + const struct attribute *attr, int type, + umode_t amode, const void *ns); +void sysfs_unmap_bin_file(struct sysfs_dirent *sd); /* * symlink.c diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c index 0957aa98b6c0..12dad188939d 100644 --- a/fs/xfs/xfs_dir2_block.c +++ b/fs/xfs/xfs_dir2_block.c @@ -1158,7 +1158,7 @@ xfs_dir2_sf_to_block( /* * Create entry for . */ - dep = xfs_dir3_data_dot_entry_p(hdr); + dep = xfs_dir3_data_dot_entry_p(mp, hdr); dep->inumber = cpu_to_be64(dp->i_ino); dep->namelen = 1; dep->name[0] = '.'; @@ -1172,7 +1172,7 @@ xfs_dir2_sf_to_block( /* * Create entry for .. */ - dep = xfs_dir3_data_dotdot_entry_p(hdr); + dep = xfs_dir3_data_dotdot_entry_p(mp, hdr); dep->inumber = cpu_to_be64(xfs_dir2_sf_get_parent_ino(sfp)); dep->namelen = 2; dep->name[0] = dep->name[1] = '.'; @@ -1183,7 +1183,7 @@ xfs_dir2_sf_to_block( blp[1].hashval = cpu_to_be32(xfs_dir_hash_dotdot); blp[1].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp, (char *)dep - (char *)hdr)); - offset = xfs_dir3_data_first_offset(hdr); + offset = xfs_dir3_data_first_offset(mp); /* * Loop over existing entries, stuff them in. */ diff --git a/fs/xfs/xfs_dir2_format.h b/fs/xfs/xfs_dir2_format.h index a0961a61ac1a..9cf67381adf6 100644 --- a/fs/xfs/xfs_dir2_format.h +++ b/fs/xfs/xfs_dir2_format.h @@ -497,69 +497,58 @@ xfs_dir3_data_unused_p(struct xfs_dir2_data_hdr *hdr) /* * Offsets of . and .. in data space (always block 0) * - * The macros are used for shortform directories as they have no headers to read - * the magic number out of. Shortform directories need to know the size of the - * data block header because the sfe embeds the block offset of the entry into - * it so that it doesn't change when format conversion occurs. Bad Things Happen - * if we don't follow this rule. - * * XXX: there is scope for significant optimisation of the logic here. Right * now we are checking for "dir3 format" over and over again. Ideally we should * only do it once for each operation. */ -#define XFS_DIR3_DATA_DOT_OFFSET(mp) \ - xfs_dir3_data_hdr_size(xfs_sb_version_hascrc(&(mp)->m_sb)) -#define XFS_DIR3_DATA_DOTDOT_OFFSET(mp) \ - (XFS_DIR3_DATA_DOT_OFFSET(mp) + xfs_dir3_data_entsize(mp, 1)) -#define XFS_DIR3_DATA_FIRST_OFFSET(mp) \ - (XFS_DIR3_DATA_DOTDOT_OFFSET(mp) + xfs_dir3_data_entsize(mp, 2)) - static inline xfs_dir2_data_aoff_t -xfs_dir3_data_dot_offset(struct xfs_dir2_data_hdr *hdr) +xfs_dir3_data_dot_offset(struct xfs_mount *mp) { - return xfs_dir3_data_entry_offset(hdr); + return xfs_dir3_data_hdr_size(xfs_sb_version_hascrc(&mp->m_sb)); } static inline xfs_dir2_data_aoff_t -xfs_dir3_data_dotdot_offset(struct xfs_dir2_data_hdr *hdr) +xfs_dir3_data_dotdot_offset(struct xfs_mount *mp) { - bool dir3 = hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) || - hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC); - return xfs_dir3_data_dot_offset(hdr) + - __xfs_dir3_data_entsize(dir3, 1); + return xfs_dir3_data_dot_offset(mp) + + xfs_dir3_data_entsize(mp, 1); } static inline xfs_dir2_data_aoff_t -xfs_dir3_data_first_offset(struct xfs_dir2_data_hdr *hdr) +xfs_dir3_data_first_offset(struct xfs_mount *mp) { - bool dir3 = hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) || - hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC); - return xfs_dir3_data_dotdot_offset(hdr) + - __xfs_dir3_data_entsize(dir3, 2); + return xfs_dir3_data_dotdot_offset(mp) + + xfs_dir3_data_entsize(mp, 2); } /* * location of . and .. in data space (always block 0) */ static inline struct xfs_dir2_data_entry * -xfs_dir3_data_dot_entry_p(struct xfs_dir2_data_hdr *hdr) +xfs_dir3_data_dot_entry_p( + struct xfs_mount *mp, + struct xfs_dir2_data_hdr *hdr) { return (struct xfs_dir2_data_entry *) - ((char *)hdr + xfs_dir3_data_dot_offset(hdr)); + ((char *)hdr + xfs_dir3_data_dot_offset(mp)); } static inline struct xfs_dir2_data_entry * -xfs_dir3_data_dotdot_entry_p(struct xfs_dir2_data_hdr *hdr) +xfs_dir3_data_dotdot_entry_p( + struct xfs_mount *mp, + struct xfs_dir2_data_hdr *hdr) { return (struct xfs_dir2_data_entry *) - ((char *)hdr + xfs_dir3_data_dotdot_offset(hdr)); + ((char *)hdr + xfs_dir3_data_dotdot_offset(mp)); } static inline struct xfs_dir2_data_entry * -xfs_dir3_data_first_entry_p(struct xfs_dir2_data_hdr *hdr) +xfs_dir3_data_first_entry_p( + struct xfs_mount *mp, + struct xfs_dir2_data_hdr *hdr) { return (struct xfs_dir2_data_entry *) - ((char *)hdr + xfs_dir3_data_first_offset(hdr)); + ((char *)hdr + xfs_dir3_data_first_offset(mp)); } /* diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c index 8993ec17452c..8f84153e98a8 100644 --- a/fs/xfs/xfs_dir2_readdir.c +++ b/fs/xfs/xfs_dir2_readdir.c @@ -119,9 +119,9 @@ xfs_dir2_sf_getdents( * mp->m_dirdatablk. */ dot_offset = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk, - XFS_DIR3_DATA_DOT_OFFSET(mp)); + xfs_dir3_data_dot_offset(mp)); dotdot_offset = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk, - XFS_DIR3_DATA_DOTDOT_OFFSET(mp)); + xfs_dir3_data_dotdot_offset(mp)); /* * Put . entry unless we're starting past it. diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c index bb6e2848f473..3ef6d402084c 100644 --- a/fs/xfs/xfs_dir2_sf.c +++ b/fs/xfs/xfs_dir2_sf.c @@ -557,7 +557,7 @@ xfs_dir2_sf_addname_hard( * to insert the new entry. * If it's going to end up at the end then oldsfep will point there. */ - for (offset = XFS_DIR3_DATA_FIRST_OFFSET(mp), + for (offset = xfs_dir3_data_first_offset(mp), oldsfep = xfs_dir2_sf_firstentry(oldsfp), add_datasize = xfs_dir3_data_entsize(mp, args->namelen), eof = (char *)oldsfep == &buf[old_isize]; @@ -640,7 +640,7 @@ xfs_dir2_sf_addname_pick( sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; size = xfs_dir3_data_entsize(mp, args->namelen); - offset = XFS_DIR3_DATA_FIRST_OFFSET(mp); + offset = xfs_dir3_data_first_offset(mp); sfep = xfs_dir2_sf_firstentry(sfp); holefit = 0; /* @@ -713,7 +713,7 @@ xfs_dir2_sf_check( mp = dp->i_mount; sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; - offset = XFS_DIR3_DATA_FIRST_OFFSET(mp); + offset = xfs_dir3_data_first_offset(mp); ino = xfs_dir2_sf_get_parent_ino(sfp); i8count = ino > XFS_DIR2_MAX_SHORT_INUM; diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index 71520e6e5d65..1ee776d477c3 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -64,7 +64,8 @@ int xfs_dqerror_mod = 33; struct kmem_zone *xfs_qm_dqtrxzone; static struct kmem_zone *xfs_qm_dqzone; -static struct lock_class_key xfs_dquot_other_class; +static struct lock_class_key xfs_dquot_group_class; +static struct lock_class_key xfs_dquot_project_class; /* * This is called to free all the memory associated with a dquot @@ -703,8 +704,20 @@ xfs_qm_dqread( * Make sure group quotas have a different lock class than user * quotas. */ - if (!(type & XFS_DQ_USER)) - lockdep_set_class(&dqp->q_qlock, &xfs_dquot_other_class); + switch (type) { + case XFS_DQ_USER: + /* uses the default lock class */ + break; + case XFS_DQ_GROUP: + lockdep_set_class(&dqp->q_qlock, &xfs_dquot_group_class); + break; + case XFS_DQ_PROJ: + lockdep_set_class(&dqp->q_qlock, &xfs_dquot_project_class); + break; + default: + ASSERT(0); + break; + } XFS_STATS_INC(xs_qm_dquot); diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index cc179878fe41..39797490a1f1 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -1585,6 +1585,7 @@ xlog_recover_add_to_trans( "bad number of regions (%d) in inode log format", in_f->ilf_size); ASSERT(0); + kmem_free(ptr); return XFS_ERROR(EIO); } |