diff options
author | Jan Kara <jack@suse.cz> | 2019-11-06 10:52:10 +0100 |
---|---|---|
committer | Jan Kara <jack@suse.cz> | 2019-11-06 10:52:10 +0100 |
commit | dae82c7fd0926840c832151f3258ba751f73d348 (patch) | |
tree | 5cdef8359f6cfb964919ffb0bfa36d2de60140b1 /fs | |
parent | reiserfs: replace open-coded atomic_dec_and_mutex_lock() (diff) | |
parent | quota: Handle quotas without quota inodes in dquot_get_state() (diff) | |
download | linux-dae82c7fd0926840c832151f3258ba751f73d348.tar.xz linux-dae82c7fd0926840c832151f3258ba751f73d348.zip |
Pull series refactoring quota enabling and disabling code.
Diffstat (limited to 'fs')
43 files changed, 561 insertions, 517 deletions
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index ad4c6b1d5074..c5642bcb6b46 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -879,7 +879,7 @@ out_free_interp: the correct location in memory. */ for(i = 0, elf_ppnt = elf_phdata; i < loc->elf_ex.e_phnum; i++, elf_ppnt++) { - int elf_prot, elf_flags, elf_fixed = MAP_FIXED_NOREPLACE; + int elf_prot, elf_flags; unsigned long k, vaddr; unsigned long total_size = 0; @@ -911,13 +911,6 @@ out_free_interp: */ } } - - /* - * Some binaries have overlapping elf segments and then - * we have to forcefully map over an existing mapping - * e.g. over this newly established brk mapping. - */ - elf_fixed = MAP_FIXED; } elf_prot = make_prot(elf_ppnt->p_flags); @@ -930,7 +923,7 @@ out_free_interp: * the ET_DYN load_addr calculations, proceed normally. */ if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) { - elf_flags |= elf_fixed; + elf_flags |= MAP_FIXED; } else if (loc->elf_ex.e_type == ET_DYN) { /* * This logic is run once for the first LOAD Program @@ -966,7 +959,7 @@ out_free_interp: load_bias = ELF_ET_DYN_BASE; if (current->flags & PF_RANDOMIZE) load_bias += arch_mmap_rnd(); - elf_flags |= elf_fixed; + elf_flags |= MAP_FIXED; } else load_bias = 0; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 8fe4eb7e5045..27e5b269e729 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1591,7 +1591,6 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb, struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct btrfs_root *root = BTRFS_I(inode)->root; struct page **pages = NULL; - struct extent_state *cached_state = NULL; struct extent_changeset *data_reserved = NULL; u64 release_bytes = 0; u64 lockstart; @@ -1611,6 +1610,7 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb, return -ENOMEM; while (iov_iter_count(i) > 0) { + struct extent_state *cached_state = NULL; size_t offset = offset_in_page(pos); size_t sector_offset; size_t write_bytes = min(iov_iter_count(i), @@ -1758,9 +1758,20 @@ again: if (copied > 0) ret = btrfs_dirty_pages(inode, pages, dirty_pages, pos, copied, &cached_state); + + /* + * If we have not locked the extent range, because the range's + * start offset is >= i_size, we might still have a non-NULL + * cached extent state, acquired while marking the extent range + * as delalloc through btrfs_dirty_pages(). Therefore free any + * possible cached extent state to avoid a memory leak. + */ if (extents_locked) unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, &cached_state); + else + free_extent_state(cached_state); + btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes, true); if (ret) { diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a0546401bc0a..0f2754eaa05b 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6305,13 +6305,16 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, u32 sizes[2]; int nitems = name ? 2 : 1; unsigned long ptr; + unsigned int nofs_flag; int ret; path = btrfs_alloc_path(); if (!path) return ERR_PTR(-ENOMEM); + nofs_flag = memalloc_nofs_save(); inode = new_inode(fs_info->sb); + memalloc_nofs_restore(nofs_flag); if (!inode) { btrfs_free_path(path); return ERR_PTR(-ENOMEM); diff --git a/fs/btrfs/ref-verify.c b/fs/btrfs/ref-verify.c index e87cbdad02a3..b57f3618e58e 100644 --- a/fs/btrfs/ref-verify.c +++ b/fs/btrfs/ref-verify.c @@ -500,7 +500,7 @@ static int process_leaf(struct btrfs_root *root, struct btrfs_extent_data_ref *dref; struct btrfs_shared_data_ref *sref; u32 count; - int i = 0, tree_block_level = 0, ret; + int i = 0, tree_block_level = 0, ret = 0; struct btrfs_key key; int nritems = btrfs_header_nritems(leaf); diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index f3215028235c..123ac54af071 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -5085,7 +5085,7 @@ static int clone_range(struct send_ctx *sctx, struct btrfs_path *path; struct btrfs_key key; int ret; - u64 clone_src_i_size; + u64 clone_src_i_size = 0; /* * Prevent cloning from a zero offset with a length matching the sector diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 29b82a795522..8a6cc600bf18 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -2932,7 +2932,8 @@ out: * in the tree of log roots */ static int update_log_root(struct btrfs_trans_handle *trans, - struct btrfs_root *log) + struct btrfs_root *log, + struct btrfs_root_item *root_item) { struct btrfs_fs_info *fs_info = log->fs_info; int ret; @@ -2940,10 +2941,10 @@ static int update_log_root(struct btrfs_trans_handle *trans, if (log->log_transid == 1) { /* insert root item on the first sync */ ret = btrfs_insert_root(trans, fs_info->log_root_tree, - &log->root_key, &log->root_item); + &log->root_key, root_item); } else { ret = btrfs_update_root(trans, fs_info->log_root_tree, - &log->root_key, &log->root_item); + &log->root_key, root_item); } return ret; } @@ -3041,6 +3042,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_root *log = root->log_root; struct btrfs_root *log_root_tree = fs_info->log_root_tree; + struct btrfs_root_item new_root_item; int log_transid = 0; struct btrfs_log_ctx root_log_ctx; struct blk_plug plug; @@ -3104,18 +3106,26 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, goto out; } + /* + * We _must_ update under the root->log_mutex in order to make sure we + * have a consistent view of the log root we are trying to commit at + * this moment. + * + * We _must_ copy this into a local copy, because we are not holding the + * log_root_tree->log_mutex yet. This is important because when we + * commit the log_root_tree we must have a consistent view of the + * log_root_tree when we update the super block to point at the + * log_root_tree bytenr. If we update the log_root_tree here we'll race + * with the commit and possibly point at the new block which we may not + * have written out. + */ btrfs_set_root_node(&log->root_item, log->node); + memcpy(&new_root_item, &log->root_item, sizeof(new_root_item)); root->log_transid++; log->log_transid = root->log_transid; root->log_start_pid = 0; /* - * Update or create log root item under the root's log_mutex to prevent - * races with concurrent log syncs that can lead to failure to update - * log root item because it was not created yet. - */ - ret = update_log_root(trans, log); - /* * IO has been started, blocks of the log tree have WRITTEN flag set * in their headers. new modifications of the log will be written to * new positions. so it's safe to allow log writers to go in. @@ -3135,6 +3145,14 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, mutex_unlock(&log_root_tree->log_mutex); mutex_lock(&log_root_tree->log_mutex); + + /* + * Now we are safe to update the log_root_tree because we're under the + * log_mutex, and we're a current writer so we're holding the commit + * open until we drop the log_mutex. + */ + ret = update_log_root(trans, log, &new_root_item); + if (atomic_dec_and_test(&log_root_tree->log_writers)) { /* atomic_dec_and_test implies a barrier */ cond_wake_up_nomb(&log_root_tree->log_writer_wait); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index cdd7af424033..bdfe4493e43a 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -3845,7 +3845,11 @@ static int alloc_profile_is_valid(u64 flags, int extended) return !extended; /* "0" is valid for usual profiles */ /* true if exactly one bit set */ - return is_power_of_2(flags); + /* + * Don't use is_power_of_2(unsigned long) because it won't work + * for the single profile (1ULL << 48) on 32-bit CPUs. + */ + return flags != 0 && (flags & (flags - 1)) == 0; } static inline int balance_need_close(struct btrfs_fs_info *fs_info) diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 2e9c7f493f99..c049c7b3aa87 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -169,18 +169,26 @@ cifs_read_super(struct super_block *sb) else sb->s_maxbytes = MAX_NON_LFS; - /* BB FIXME fix time_gran to be larger for LANMAN sessions */ - sb->s_time_gran = 100; - - if (tcon->unix_ext) { - ts = cifs_NTtimeToUnix(0); + /* Some very old servers like DOS and OS/2 used 2 second granularity */ + if ((tcon->ses->server->vals->protocol_id == SMB10_PROT_ID) && + ((tcon->ses->capabilities & + tcon->ses->server->vals->cap_nt_find) == 0) && + !tcon->unix_ext) { + sb->s_time_gran = 1000000000; /* 1 second is max allowed gran */ + ts = cnvrtDosUnixTm(cpu_to_le16(SMB_DATE_MIN), 0, 0); sb->s_time_min = ts.tv_sec; - ts = cifs_NTtimeToUnix(cpu_to_le64(S64_MAX)); + ts = cnvrtDosUnixTm(cpu_to_le16(SMB_DATE_MAX), + cpu_to_le16(SMB_TIME_MAX), 0); sb->s_time_max = ts.tv_sec; } else { - ts = cnvrtDosUnixTm(cpu_to_le16(SMB_DATE_MIN), 0, 0); + /* + * Almost every server, including all SMB2+, uses DCE TIME + * ie 100 nanosecond units, since 1601. See MS-DTYP and MS-FSCC + */ + sb->s_time_gran = 100; + ts = cifs_NTtimeToUnix(0); sb->s_time_min = ts.tv_sec; - ts = cnvrtDosUnixTm(cpu_to_le16(SMB_DATE_MAX), cpu_to_le16(SMB_TIME_MAX), 0); + ts = cifs_NTtimeToUnix(cpu_to_le64(S64_MAX)); sb->s_time_max = ts.tv_sec; } diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 2e960e1049db..50dfd9049370 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -1210,7 +1210,7 @@ struct cifs_search_info { bool smallBuf:1; /* so we know which buf_release function to call */ }; -#define ACL_NO_MODE -1 +#define ACL_NO_MODE ((umode_t)(-1)) struct cifs_open_parms { struct cifs_tcon *tcon; struct cifs_sb_info *cifs_sb; diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 2850c3ce4391..a64dfa95a925 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -4264,7 +4264,7 @@ static int mount_get_conns(struct smb_vol *vol, struct cifs_sb_info *cifs_sb, server->ops->qfs_tcon(*xid, tcon); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RO_CACHE) { if (tcon->fsDevInfo.DeviceCharacteristics & - FILE_READ_ONLY_DEVICE) + cpu_to_le32(FILE_READ_ONLY_DEVICE)) cifs_dbg(VFS, "mounted to read only share\n"); else if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RW_CACHE) == 0) @@ -4445,7 +4445,7 @@ static int setup_dfs_tgt_conn(const char *path, int rc; struct dfs_info3_param ref = {0}; char *mdata = NULL, *fake_devname = NULL; - struct smb_vol fake_vol = {0}; + struct smb_vol fake_vol = {NULL}; cifs_dbg(FYI, "%s: dfs path: %s\n", __func__, path); diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index dd5ac841aefa..7ce689d31aa2 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -738,10 +738,16 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, static int cifs_d_revalidate(struct dentry *direntry, unsigned int flags) { + struct inode *inode; + if (flags & LOOKUP_RCU) return -ECHILD; if (d_really_is_positive(direntry)) { + inode = d_inode(direntry); + if ((flags & LOOKUP_REVAL) && !CIFS_CACHE_READ(CIFS_I(inode))) + CIFS_I(inode)->time = 0; /* force reval */ + if (cifs_revalidate_dentry(direntry)) return 0; else { @@ -752,7 +758,7 @@ cifs_d_revalidate(struct dentry *direntry, unsigned int flags) * attributes will have been updated by * cifs_revalidate_dentry(). */ - if (IS_AUTOMOUNT(d_inode(direntry)) && + if (IS_AUTOMOUNT(inode) && !(direntry->d_flags & DCACHE_NEED_AUTOMOUNT)) { spin_lock(&direntry->d_lock); direntry->d_flags |= DCACHE_NEED_AUTOMOUNT; diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 4b95700c507c..5ad15de2bb4f 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -253,6 +253,12 @@ cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb, rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb, xid, fid); + if (rc) { + server->ops->close(xid, tcon, fid); + if (rc == -ESTALE) + rc = -EOPENSTALE; + } + out: kfree(buf); return rc; @@ -1840,13 +1846,12 @@ struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode, { struct cifsFileInfo *open_file = NULL; struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb); - struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); /* only filter by fsuid on multiuser mounts */ if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER)) fsuid_only = false; - spin_lock(&tcon->open_file_lock); + spin_lock(&cifs_inode->open_file_lock); /* we could simply get the first_list_entry since write-only entries are always at the end of the list but since the first entry might have a close pending, we go through the whole list */ @@ -1858,7 +1863,7 @@ struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode, /* found a good file */ /* lock it so it will not be closed on us */ cifsFileInfo_get(open_file); - spin_unlock(&tcon->open_file_lock); + spin_unlock(&cifs_inode->open_file_lock); return open_file; } /* else might as well continue, and look for another, or simply have the caller reopen it @@ -1866,7 +1871,7 @@ struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode, } else /* write only file */ break; /* write only files are last so must be done */ } - spin_unlock(&tcon->open_file_lock); + spin_unlock(&cifs_inode->open_file_lock); return NULL; } @@ -1877,7 +1882,6 @@ cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, bool fsuid_only, { struct cifsFileInfo *open_file, *inv_file = NULL; struct cifs_sb_info *cifs_sb; - struct cifs_tcon *tcon; bool any_available = false; int rc = -EBADF; unsigned int refind = 0; @@ -1897,16 +1901,15 @@ cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, bool fsuid_only, } cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb); - tcon = cifs_sb_master_tcon(cifs_sb); /* only filter by fsuid on multiuser mounts */ if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER)) fsuid_only = false; - spin_lock(&tcon->open_file_lock); + spin_lock(&cifs_inode->open_file_lock); refind_writable: if (refind > MAX_REOPEN_ATT) { - spin_unlock(&tcon->open_file_lock); + spin_unlock(&cifs_inode->open_file_lock); return rc; } list_for_each_entry(open_file, &cifs_inode->openFileList, flist) { @@ -1918,7 +1921,7 @@ refind_writable: if (!open_file->invalidHandle) { /* found a good writable file */ cifsFileInfo_get(open_file); - spin_unlock(&tcon->open_file_lock); + spin_unlock(&cifs_inode->open_file_lock); *ret_file = open_file; return 0; } else { @@ -1938,7 +1941,7 @@ refind_writable: cifsFileInfo_get(inv_file); } - spin_unlock(&tcon->open_file_lock); + spin_unlock(&cifs_inode->open_file_lock); if (inv_file) { rc = cifs_reopen_file(inv_file, false); @@ -1953,7 +1956,7 @@ refind_writable: cifsFileInfo_put(inv_file); ++refind; inv_file = NULL; - spin_lock(&tcon->open_file_lock); + spin_lock(&cifs_inode->open_file_lock); goto refind_writable; } @@ -4461,17 +4464,15 @@ static int cifs_readpage(struct file *file, struct page *page) static int is_inode_writable(struct cifsInodeInfo *cifs_inode) { struct cifsFileInfo *open_file; - struct cifs_tcon *tcon = - cifs_sb_master_tcon(CIFS_SB(cifs_inode->vfs_inode.i_sb)); - spin_lock(&tcon->open_file_lock); + spin_lock(&cifs_inode->open_file_lock); list_for_each_entry(open_file, &cifs_inode->openFileList, flist) { if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) { - spin_unlock(&tcon->open_file_lock); + spin_unlock(&cifs_inode->open_file_lock); return 1; } } - spin_unlock(&tcon->open_file_lock); + spin_unlock(&cifs_inode->open_file_lock); return 0; } diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 3bae2e53f0b8..5dcc95b38310 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -414,6 +414,7 @@ int cifs_get_inode_info_unix(struct inode **pinode, /* if uniqueid is different, return error */ if (unlikely(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM && CIFS_I(*pinode)->uniqueid != fattr.cf_uniqueid)) { + CIFS_I(*pinode)->time = 0; /* force reval */ rc = -ESTALE; goto cgiiu_exit; } @@ -421,6 +422,7 @@ int cifs_get_inode_info_unix(struct inode **pinode, /* if filetype is different, return error */ if (unlikely(((*pinode)->i_mode & S_IFMT) != (fattr.cf_mode & S_IFMT))) { + CIFS_I(*pinode)->time = 0; /* force reval */ rc = -ESTALE; goto cgiiu_exit; } @@ -933,6 +935,7 @@ cifs_get_inode_info(struct inode **inode, const char *full_path, /* if uniqueid is different, return error */ if (unlikely(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM && CIFS_I(*inode)->uniqueid != fattr.cf_uniqueid)) { + CIFS_I(*inode)->time = 0; /* force reval */ rc = -ESTALE; goto cgii_exit; } @@ -940,6 +943,7 @@ cifs_get_inode_info(struct inode **inode, const char *full_path, /* if filetype is different, return error */ if (unlikely(((*inode)->i_mode & S_IFMT) != (fattr.cf_mode & S_IFMT))) { + CIFS_I(*inode)->time = 0; /* force reval */ rc = -ESTALE; goto cgii_exit; } diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c index 49c17ee18254..9b41436fb8db 100644 --- a/fs/cifs/netmisc.c +++ b/fs/cifs/netmisc.c @@ -117,10 +117,6 @@ static const struct smb_to_posix_error mapping_table_ERRSRV[] = { {0, 0} }; -static const struct smb_to_posix_error mapping_table_ERRHRD[] = { - {0, 0} -}; - /* * Convert a string containing text IPv4 or IPv6 address to binary form. * diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 85f9d614d968..05149862aea4 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -751,8 +751,8 @@ add_posix_context(struct kvec *iov, unsigned int *num_iovec, umode_t mode) unsigned int num = *num_iovec; iov[num].iov_base = create_posix_buf(mode); - if (mode == -1) - cifs_dbg(VFS, "illegal mode\n"); /* BB REMOVEME */ + if (mode == ACL_NO_MODE) + cifs_dbg(FYI, "illegal mode\n"); if (iov[num].iov_base == NULL) return -ENOMEM; iov[num].iov_len = sizeof(struct create_posix); @@ -2521,11 +2521,8 @@ SMB2_open_init(struct cifs_tcon *tcon, struct smb_rqst *rqst, __u8 *oplock, return rc; } - /* TODO: add handling for the mode on create */ - if (oparms->disposition == FILE_CREATE) - cifs_dbg(VFS, "mode is 0x%x\n", oparms->mode); /* BB REMOVEME */ - - if ((oparms->disposition == FILE_CREATE) && (oparms->mode != -1)) { + if ((oparms->disposition == FILE_CREATE) && + (oparms->mode != ACL_NO_MODE)) { if (n_iov > 2) { struct create_context *ccontext = (struct create_context *)iov[n_iov-1].iov_base; @@ -3217,7 +3214,8 @@ SMB2_notify_init(const unsigned int xid, struct smb_rqst *rqst, req->PersistentFileId = persistent_fid; req->VolatileFileId = volatile_fid; - req->OutputBufferLength = SMB2_MAX_BUFFER_SIZE - MAX_SMB2_HDR_SIZE; + req->OutputBufferLength = + cpu_to_le32(SMB2_MAX_BUFFER_SIZE - MAX_SMB2_HDR_SIZE); req->CompletionFilter = cpu_to_le32(completion_filter); if (watch_tree) req->Flags = cpu_to_le16(SMB2_WATCH_TREE); diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index da3a6d580808..71b2930b8e0b 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -150,6 +150,10 @@ extern int SMB2_ioctl_init(struct cifs_tcon *tcon, struct smb_rqst *rqst, bool is_fsctl, char *in_data, u32 indatalen, __u32 max_response_size); extern void SMB2_ioctl_free(struct smb_rqst *rqst); +extern int SMB2_change_notify(const unsigned int xid, struct cifs_tcon *tcon, + u64 persistent_fid, u64 volatile_fid, bool watch_tree, + u32 completion_filter); + extern int SMB2_close(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_file_id, u64 volatile_file_id); extern int SMB2_close_flags(const unsigned int xid, struct cifs_tcon *tcon, diff --git a/fs/direct-io.c b/fs/direct-io.c index ae196784f487..9329ced91f1d 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -241,9 +241,8 @@ void dio_warn_stale_pagecache(struct file *filp) } } -/** +/* * dio_complete() - called when all DIO BIO I/O has been completed - * @offset: the byte offset in the file of the completed operation * * This drops i_dio_count, lets interested parties know that a DIO operation * has completed, and calculates the resulting return code for the operation. diff --git a/fs/ext4/super.c b/fs/ext4/super.c index dd654e53ba3d..1b947c95eff2 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -5835,7 +5835,7 @@ static int ext4_quota_enable(struct super_block *sb, int type, int format_id, /* Don't account quota for quota files to avoid recursion */ qf_inode->i_flags |= S_NOQUOTA; lockdep_set_quota_inode(qf_inode, I_DATA_SEM_QUOTA); - err = dquot_enable(qf_inode, type, format_id, flags); + err = dquot_load_quota_inode(qf_inode, type, format_id, flags); if (err) lockdep_set_quota_inode(qf_inode, I_DATA_SEM_NORMAL); iput(qf_inode); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 1443cee15863..91745d5b718d 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1932,7 +1932,7 @@ static int f2fs_quota_enable(struct super_block *sb, int type, int format_id, /* Don't account quota for quota files to avoid recursion */ qf_inode->i_flags |= S_NOQUOTA; - err = dquot_enable(qf_inode, type, format_id, flags); + err = dquot_load_quota_inode(qf_inode, type, format_id, flags); iput(qf_inode); return err; } diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 8aaa7eec7b74..8461a6322039 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -164,8 +164,13 @@ static void finish_writeback_work(struct bdi_writeback *wb, if (work->auto_free) kfree(work); - if (done && atomic_dec_and_test(&done->cnt)) - wake_up_all(done->waitq); + if (done) { + wait_queue_head_t *waitq = done->waitq; + + /* @done can't be accessed after the following dec */ + if (atomic_dec_and_test(&done->cnt)) + wake_up_all(waitq); + } } static void wb_queue_work(struct bdi_writeback *wb, @@ -900,7 +905,7 @@ restart: * cgroup_writeback_by_id - initiate cgroup writeback from bdi and memcg IDs * @bdi_id: target bdi id * @memcg_id: target memcg css id - * @nr_pages: number of pages to write, 0 for best-effort dirty flushing + * @nr: number of pages to write, 0 for best-effort dirty flushing * @reason: reason why some writeback work initiated * @done: target wb_completion * diff --git a/fs/io_uring.c b/fs/io_uring.c index aa8ac557493c..76fdbe84aff5 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -415,27 +415,27 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) return ctx; } +static inline bool __io_sequence_defer(struct io_ring_ctx *ctx, + struct io_kiocb *req) +{ + return req->sequence != ctx->cached_cq_tail + ctx->rings->sq_dropped; +} + static inline bool io_sequence_defer(struct io_ring_ctx *ctx, struct io_kiocb *req) { - /* timeout requests always honor sequence */ - if (!(req->flags & REQ_F_TIMEOUT) && - (req->flags & (REQ_F_IO_DRAIN|REQ_F_IO_DRAINED)) != REQ_F_IO_DRAIN) + if ((req->flags & (REQ_F_IO_DRAIN|REQ_F_IO_DRAINED)) != REQ_F_IO_DRAIN) return false; - return req->sequence != ctx->cached_cq_tail + ctx->rings->sq_dropped; + return __io_sequence_defer(ctx, req); } -static struct io_kiocb *__io_get_deferred_req(struct io_ring_ctx *ctx, - struct list_head *list) +static struct io_kiocb *io_get_deferred_req(struct io_ring_ctx *ctx) { struct io_kiocb *req; - if (list_empty(list)) - return NULL; - - req = list_first_entry(list, struct io_kiocb, list); - if (!io_sequence_defer(ctx, req)) { + req = list_first_entry_or_null(&ctx->defer_list, struct io_kiocb, list); + if (req && !io_sequence_defer(ctx, req)) { list_del_init(&req->list); return req; } @@ -443,14 +443,17 @@ static struct io_kiocb *__io_get_deferred_req(struct io_ring_ctx *ctx, return NULL; } -static struct io_kiocb *io_get_deferred_req(struct io_ring_ctx *ctx) -{ - return __io_get_deferred_req(ctx, &ctx->defer_list); -} - static struct io_kiocb *io_get_timeout_req(struct io_ring_ctx *ctx) { - return __io_get_deferred_req(ctx, &ctx->timeout_list); + struct io_kiocb *req; + + req = list_first_entry_or_null(&ctx->timeout_list, struct io_kiocb, list); + if (req && !__io_sequence_defer(ctx, req)) { + list_del_init(&req->list); + return req; + } + + return NULL; } static void __io_commit_cqring(struct io_ring_ctx *ctx) @@ -591,14 +594,6 @@ static void io_cqring_add_event(struct io_ring_ctx *ctx, u64 user_data, io_cqring_ev_posted(ctx); } -static void io_ring_drop_ctx_refs(struct io_ring_ctx *ctx, unsigned refs) -{ - percpu_ref_put_many(&ctx->refs, refs); - - if (waitqueue_active(&ctx->wait)) - wake_up(&ctx->wait); -} - static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx, struct io_submit_state *state) { @@ -646,7 +641,7 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx, req->result = 0; return req; out: - io_ring_drop_ctx_refs(ctx, 1); + percpu_ref_put(&ctx->refs); return NULL; } @@ -654,7 +649,7 @@ static void io_free_req_many(struct io_ring_ctx *ctx, void **reqs, int *nr) { if (*nr) { kmem_cache_free_bulk(req_cachep, *nr, reqs); - io_ring_drop_ctx_refs(ctx, *nr); + percpu_ref_put_many(&ctx->refs, *nr); *nr = 0; } } @@ -663,7 +658,7 @@ static void __io_free_req(struct io_kiocb *req) { if (req->file && !(req->flags & REQ_F_FIXED_FILE)) fput(req->file); - io_ring_drop_ctx_refs(req->ctx, 1); + percpu_ref_put(&req->ctx->refs); kmem_cache_free(req_cachep, req); } @@ -1892,15 +1887,15 @@ static int io_timeout(struct io_kiocb *req, const struct io_uring_sqe *sqe) unsigned count, req_dist, tail_index; struct io_ring_ctx *ctx = req->ctx; struct list_head *entry; - struct timespec ts; + struct timespec64 ts; if (unlikely(ctx->flags & IORING_SETUP_IOPOLL)) return -EINVAL; if (sqe->flags || sqe->ioprio || sqe->buf_index || sqe->timeout_flags || sqe->len != 1) return -EINVAL; - if (copy_from_user(&ts, (void __user *) (unsigned long) sqe->addr, - sizeof(ts))) + + if (get_timespec64(&ts, u64_to_user_ptr(sqe->addr))) return -EFAULT; /* @@ -1934,7 +1929,7 @@ static int io_timeout(struct io_kiocb *req, const struct io_uring_sqe *sqe) hrtimer_init(&req->timeout.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); req->timeout.timer.function = io_timeout_fn; - hrtimer_start(&req->timeout.timer, timespec_to_ktime(ts), + hrtimer_start(&req->timeout.timer, timespec64_to_ktime(ts), HRTIMER_MODE_REL); return 0; } @@ -2761,7 +2756,7 @@ out: if (link) io_queue_link_head(ctx, link, &link->submit, shadow_req, - block_for_last); + !block_for_last); if (statep) io_submit_state_end(statep); @@ -2920,8 +2915,12 @@ static void io_finish_async(struct io_ring_ctx *ctx) static void io_destruct_skb(struct sk_buff *skb) { struct io_ring_ctx *ctx = skb->sk->sk_user_data; + int i; + + for (i = 0; i < ARRAY_SIZE(ctx->sqo_wq); i++) + if (ctx->sqo_wq[i]) + flush_workqueue(ctx->sqo_wq[i]); - io_finish_async(ctx); unix_destruct_scm(skb); } @@ -3630,7 +3629,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, } } - io_ring_drop_ctx_refs(ctx, 1); + percpu_ref_put(&ctx->refs); out_fput: fdput(f); return submitted ? submitted : ret; diff --git a/fs/libfs.c b/fs/libfs.c index c9b2850c0f7c..1463b038ffc4 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -89,58 +89,45 @@ int dcache_dir_close(struct inode *inode, struct file *file) EXPORT_SYMBOL(dcache_dir_close); /* parent is locked at least shared */ -static struct dentry *next_positive(struct dentry *parent, - struct list_head *from, - int count) +/* + * Returns an element of siblings' list. + * We are looking for <count>th positive after <p>; if + * found, dentry is grabbed and returned to caller. + * If no such element exists, NULL is returned. + */ +static struct dentry *scan_positives(struct dentry *cursor, + struct list_head *p, + loff_t count, + struct dentry *last) { - unsigned *seq = &parent->d_inode->i_dir_seq, n; - struct dentry *res; - struct list_head *p; - bool skipped; - int i; + struct dentry *dentry = cursor->d_parent, *found = NULL; -retry: - i = count; - skipped = false; - n = smp_load_acquire(seq) & ~1; - res = NULL; - rcu_read_lock(); - for (p = from->next; p != &parent->d_subdirs; p = p->next) { + spin_lock(&dentry->d_lock); + while ((p = p->next) != &dentry->d_subdirs) { struct dentry *d = list_entry(p, struct dentry, d_child); - if (!simple_positive(d)) { - skipped = true; - } else if (!--i) { - res = d; - break; + // we must at least skip cursors, to avoid livelocks + if (d->d_flags & DCACHE_DENTRY_CURSOR) + continue; + if (simple_positive(d) && !--count) { + spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED); + if (simple_positive(d)) + found = dget_dlock(d); + spin_unlock(&d->d_lock); + if (likely(found)) + break; + count = 1; + } + if (need_resched()) { + list_move(&cursor->d_child, p); + p = &cursor->d_child; + spin_unlock(&dentry->d_lock); + cond_resched(); + spin_lock(&dentry->d_lock); } } - rcu_read_unlock(); - if (skipped) { - smp_rmb(); - if (unlikely(*seq != n)) - goto retry; - } - return res; -} - -static void move_cursor(struct dentry *cursor, struct list_head *after) -{ - struct dentry *parent = cursor->d_parent; - unsigned n, *seq = &parent->d_inode->i_dir_seq; - spin_lock(&parent->d_lock); - for (;;) { - n = *seq; - if (!(n & 1) && cmpxchg(seq, n, n + 1) == n) - break; - cpu_relax(); - } - __list_del(cursor->d_child.prev, cursor->d_child.next); - if (after) - list_add(&cursor->d_child, after); - else - list_add_tail(&cursor->d_child, &parent->d_subdirs); - smp_store_release(seq, n + 2); - spin_unlock(&parent->d_lock); + spin_unlock(&dentry->d_lock); + dput(last); + return found; } loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence) @@ -158,17 +145,25 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence) return -EINVAL; } if (offset != file->f_pos) { + struct dentry *cursor = file->private_data; + struct dentry *to = NULL; + + inode_lock_shared(dentry->d_inode); + + if (offset > 2) + to = scan_positives(cursor, &dentry->d_subdirs, + offset - 2, NULL); + spin_lock(&dentry->d_lock); + if (to) + list_move(&cursor->d_child, &to->d_child); + else + list_del_init(&cursor->d_child); + spin_unlock(&dentry->d_lock); + dput(to); + file->f_pos = offset; - if (file->f_pos >= 2) { - struct dentry *cursor = file->private_data; - struct dentry *to; - loff_t n = file->f_pos - 2; - - inode_lock_shared(dentry->d_inode); - to = next_positive(dentry, &dentry->d_subdirs, n); - move_cursor(cursor, to ? &to->d_child : NULL); - inode_unlock_shared(dentry->d_inode); - } + + inode_unlock_shared(dentry->d_inode); } return offset; } @@ -190,25 +185,35 @@ int dcache_readdir(struct file *file, struct dir_context *ctx) { struct dentry *dentry = file->f_path.dentry; struct dentry *cursor = file->private_data; - struct list_head *p = &cursor->d_child; - struct dentry *next; - bool moved = false; + struct list_head *anchor = &dentry->d_subdirs; + struct dentry *next = NULL; + struct list_head *p; if (!dir_emit_dots(file, ctx)) return 0; if (ctx->pos == 2) - p = &dentry->d_subdirs; - while ((next = next_positive(dentry, p, 1)) != NULL) { + p = anchor; + else if (!list_empty(&cursor->d_child)) + p = &cursor->d_child; + else + return 0; + + while ((next = scan_positives(cursor, p, 1, next)) != NULL) { if (!dir_emit(ctx, next->d_name.name, next->d_name.len, d_inode(next)->i_ino, dt_type(d_inode(next)))) break; - moved = true; - p = &next->d_child; ctx->pos++; + p = &next->d_child; } - if (moved) - move_cursor(cursor, p); + spin_lock(&dentry->d_lock); + if (next) + list_move_tail(&cursor->d_child, &next->d_child); + else + list_del_init(&cursor->d_child); + spin_unlock(&dentry->d_lock); + dput(next); + return 0; } EXPORT_SYMBOL(dcache_readdir); @@ -468,8 +473,7 @@ EXPORT_SYMBOL(simple_write_begin); /** * simple_write_end - .write_end helper for non-block-device FSes - * @available: See .write_end of address_space_operations - * @file: " + * @file: See .write_end of address_space_operations * @mapping: " * @pos: " * @len: " diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 222d7115db71..040a50fd9bf3 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -64,13 +64,6 @@ static struct kmem_cache *nfs_direct_cachep; -/* - * This represents a set of asynchronous requests that we're waiting on - */ -struct nfs_direct_mirror { - ssize_t count; -}; - struct nfs_direct_req { struct kref kref; /* release manager */ @@ -84,9 +77,6 @@ struct nfs_direct_req { atomic_t io_count; /* i/os we're waiting for */ spinlock_t lock; /* protect completion state */ - struct nfs_direct_mirror mirrors[NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX]; - int mirror_count; - loff_t io_start; /* Start offset for I/O */ ssize_t count, /* bytes actually processed */ max_count, /* max expected count */ @@ -123,32 +113,42 @@ static inline int put_dreq(struct nfs_direct_req *dreq) } static void -nfs_direct_good_bytes(struct nfs_direct_req *dreq, struct nfs_pgio_header *hdr) +nfs_direct_handle_truncated(struct nfs_direct_req *dreq, + const struct nfs_pgio_header *hdr, + ssize_t dreq_len) { - int i; - ssize_t count; + if (!(test_bit(NFS_IOHDR_ERROR, &hdr->flags) || + test_bit(NFS_IOHDR_EOF, &hdr->flags))) + return; + if (dreq->max_count >= dreq_len) { + dreq->max_count = dreq_len; + if (dreq->count > dreq_len) + dreq->count = dreq_len; + + if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) + dreq->error = hdr->error; + else /* Clear outstanding error if this is EOF */ + dreq->error = 0; + } +} - WARN_ON_ONCE(dreq->count >= dreq->max_count); +static void +nfs_direct_count_bytes(struct nfs_direct_req *dreq, + const struct nfs_pgio_header *hdr) +{ + loff_t hdr_end = hdr->io_start + hdr->good_bytes; + ssize_t dreq_len = 0; - if (dreq->mirror_count == 1) { - dreq->mirrors[hdr->pgio_mirror_idx].count += hdr->good_bytes; - dreq->count += hdr->good_bytes; - } else { - /* mirrored writes */ - count = dreq->mirrors[hdr->pgio_mirror_idx].count; - if (count + dreq->io_start < hdr->io_start + hdr->good_bytes) { - count = hdr->io_start + hdr->good_bytes - dreq->io_start; - dreq->mirrors[hdr->pgio_mirror_idx].count = count; - } - /* update the dreq->count by finding the minimum agreed count from all - * mirrors */ - count = dreq->mirrors[0].count; + if (hdr_end > dreq->io_start) + dreq_len = hdr_end - dreq->io_start; - for (i = 1; i < dreq->mirror_count; i++) - count = min(count, dreq->mirrors[i].count); + nfs_direct_handle_truncated(dreq, hdr, dreq_len); - dreq->count = count; - } + if (dreq_len > dreq->max_count) + dreq_len = dreq->max_count; + + if (dreq->count < dreq_len) + dreq->count = dreq_len; } /* @@ -293,18 +293,6 @@ void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo, cinfo->completion_ops = &nfs_direct_commit_completion_ops; } -static inline void nfs_direct_setup_mirroring(struct nfs_direct_req *dreq, - struct nfs_pageio_descriptor *pgio, - struct nfs_page *req) -{ - int mirror_count = 1; - - if (pgio->pg_ops->pg_get_mirror_count) - mirror_count = pgio->pg_ops->pg_get_mirror_count(pgio, req); - - dreq->mirror_count = mirror_count; -} - static inline struct nfs_direct_req *nfs_direct_req_alloc(void) { struct nfs_direct_req *dreq; @@ -319,7 +307,6 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void) INIT_LIST_HEAD(&dreq->mds_cinfo.list); dreq->verf.committed = NFS_INVALID_STABLE_HOW; /* not set yet */ INIT_WORK(&dreq->work, nfs_direct_write_schedule_work); - dreq->mirror_count = 1; spin_lock_init(&dreq->lock); return dreq; @@ -402,20 +389,12 @@ static void nfs_direct_read_completion(struct nfs_pgio_header *hdr) struct nfs_direct_req *dreq = hdr->dreq; spin_lock(&dreq->lock); - if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) - dreq->error = hdr->error; - if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) { spin_unlock(&dreq->lock); goto out_put; } - if (hdr->good_bytes != 0) - nfs_direct_good_bytes(dreq, hdr); - - if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) - dreq->error = 0; - + nfs_direct_count_bytes(dreq, hdr); spin_unlock(&dreq->lock); while (!list_empty(&hdr->pages)) { @@ -646,29 +625,22 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) LIST_HEAD(reqs); struct nfs_commit_info cinfo; LIST_HEAD(failed); - int i; nfs_init_cinfo_from_dreq(&cinfo, dreq); nfs_direct_write_scan_commit_list(dreq->inode, &reqs, &cinfo); dreq->count = 0; + dreq->max_count = 0; + list_for_each_entry(req, &reqs, wb_list) + dreq->max_count += req->wb_bytes; dreq->verf.committed = NFS_INVALID_STABLE_HOW; nfs_clear_pnfs_ds_commit_verifiers(&dreq->ds_cinfo); - for (i = 0; i < dreq->mirror_count; i++) - dreq->mirrors[i].count = 0; get_dreq(dreq); nfs_pageio_init_write(&desc, dreq->inode, FLUSH_STABLE, false, &nfs_direct_write_completion_ops); desc.pg_dreq = dreq; - req = nfs_list_entry(reqs.next); - nfs_direct_setup_mirroring(dreq, &desc, req); - if (desc.pg_error < 0) { - list_splice_init(&reqs, &failed); - goto out_failed; - } - list_for_each_entry_safe(req, tmp, &reqs, wb_list) { /* Bump the transmission count */ req->wb_nio++; @@ -686,7 +658,6 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) } nfs_pageio_complete(&desc); -out_failed: while (!list_empty(&failed)) { req = nfs_list_entry(failed.next); nfs_list_remove_request(req); @@ -791,17 +762,13 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) nfs_init_cinfo_from_dreq(&cinfo, dreq); spin_lock(&dreq->lock); - - if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) - dreq->error = hdr->error; - if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) { spin_unlock(&dreq->lock); goto out_put; } + nfs_direct_count_bytes(dreq, hdr); if (hdr->good_bytes != 0) { - nfs_direct_good_bytes(dreq, hdr); if (nfs_write_need_commit(hdr)) { if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) request_commit = true; @@ -923,7 +890,6 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, break; } - nfs_direct_setup_mirroring(dreq, &desc, req); if (desc.pg_error < 0) { nfs_free_request(req); result = desc.pg_error; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 11eafcfc490b..ab8ca20fd579 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6106,6 +6106,7 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, status = nfs4_call_sync_custom(&task_setup_data); if (setclientid.sc_cred) { + kfree(clp->cl_acceptor); clp->cl_acceptor = rpcauth_stringify_acceptor(setclientid.sc_cred); put_rpccred(setclientid.sc_cred); } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 85ca49549b39..52cab65f91cf 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -786,7 +786,6 @@ static void nfs_inode_remove_request(struct nfs_page *req) struct nfs_inode *nfsi = NFS_I(inode); struct nfs_page *head; - atomic_long_dec(&nfsi->nrequests); if (nfs_page_group_sync_on_bit(req, PG_REMOVE)) { head = req->wb_head; @@ -799,8 +798,10 @@ static void nfs_inode_remove_request(struct nfs_page *req) spin_unlock(&mapping->private_lock); } - if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags)) + if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags)) { nfs_release_request(req); + atomic_long_dec(&nfsi->nrequests); + } } static void diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 8de1c9d644f6..9cd0a6815933 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -2049,7 +2049,8 @@ out_write_size: inode->i_mtime = inode->i_ctime = current_time(inode); di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec); di->i_mtime_nsec = di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); - ocfs2_update_inode_fsync_trans(handle, inode, 1); + if (handle) + ocfs2_update_inode_fsync_trans(handle, inode, 1); } if (handle) ocfs2_journal_dirty(handle, wc->w_di_bh); @@ -2146,13 +2147,30 @@ static int ocfs2_dio_wr_get_block(struct inode *inode, sector_t iblock, struct ocfs2_dio_write_ctxt *dwc = NULL; struct buffer_head *di_bh = NULL; u64 p_blkno; - loff_t pos = iblock << inode->i_sb->s_blocksize_bits; + unsigned int i_blkbits = inode->i_sb->s_blocksize_bits; + loff_t pos = iblock << i_blkbits; + sector_t endblk = (i_size_read(inode) - 1) >> i_blkbits; unsigned len, total_len = bh_result->b_size; int ret = 0, first_get_block = 0; len = osb->s_clustersize - (pos & (osb->s_clustersize - 1)); len = min(total_len, len); + /* + * bh_result->b_size is count in get_more_blocks according to write + * "pos" and "end", we need map twice to return different buffer state: + * 1. area in file size, not set NEW; + * 2. area out file size, set NEW. + * + * iblock endblk + * |--------|---------|---------|--------- + * |<-------area in file------->| + */ + + if ((iblock <= endblk) && + ((iblock + ((len - 1) >> i_blkbits)) > endblk)) + len = (endblk - iblock + 1) << i_blkbits; + mlog(0, "get block of %lu at %llu:%u req %u\n", inode->i_ino, pos, len, total_len); @@ -2236,6 +2254,9 @@ static int ocfs2_dio_wr_get_block(struct inode *inode, sector_t iblock, if (desc->c_needs_zero) set_buffer_new(bh_result); + if (iblock > endblk) + set_buffer_new(bh_result); + /* May sleep in end_io. It should not happen in a irq context. So defer * it to dio work queue. */ set_buffer_defer_completion(bh_result); diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index d6f7b299eb23..efeea208fdeb 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c @@ -283,7 +283,7 @@ static int ocfs2_info_scan_inode_alloc(struct ocfs2_super *osb, if (inode_alloc) inode_lock(inode_alloc); - if (o2info_coherent(&fi->ifi_req)) { + if (inode_alloc && o2info_coherent(&fi->ifi_req)) { status = ocfs2_inode_lock(inode_alloc, &bh, 0); if (status < 0) { mlog_errno(status); diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index c81e86c62380..05dd68ade293 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -926,8 +926,8 @@ static int ocfs2_enable_quotas(struct ocfs2_super *osb) status = -ENOENT; goto out_quota_off; } - status = dquot_enable(inode[type], type, QFMT_OCFS2, - DQUOT_USAGE_ENABLED); + status = dquot_load_quota_inode(inode[type], type, QFMT_OCFS2, + DQUOT_USAGE_ENABLED); if (status < 0) goto out_quota_off; } diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 90c830e3758e..d8507972ee13 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -1490,18 +1490,6 @@ static int ocfs2_xa_check_space(struct ocfs2_xa_loc *loc, return loc->xl_ops->xlo_check_space(loc, xi); } -static void ocfs2_xa_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash) -{ - loc->xl_ops->xlo_add_entry(loc, name_hash); - loc->xl_entry->xe_name_hash = cpu_to_le32(name_hash); - /* - * We can't leave the new entry's xe_name_offset at zero or - * add_namevalue() will go nuts. We set it to the size of our - * storage so that it can never be less than any other entry. - */ - loc->xl_entry->xe_name_offset = cpu_to_le16(loc->xl_size); -} - static void ocfs2_xa_add_namevalue(struct ocfs2_xa_loc *loc, struct ocfs2_xattr_info *xi) { @@ -2133,29 +2121,31 @@ static int ocfs2_xa_prepare_entry(struct ocfs2_xa_loc *loc, if (rc) goto out; - if (loc->xl_entry) { - if (ocfs2_xa_can_reuse_entry(loc, xi)) { - orig_value_size = loc->xl_entry->xe_value_size; - rc = ocfs2_xa_reuse_entry(loc, xi, ctxt); - if (rc) - goto out; - goto alloc_value; - } + if (!loc->xl_entry) { + rc = -EINVAL; + goto out; + } - if (!ocfs2_xattr_is_local(loc->xl_entry)) { - orig_clusters = ocfs2_xa_value_clusters(loc); - rc = ocfs2_xa_value_truncate(loc, 0, ctxt); - if (rc) { - mlog_errno(rc); - ocfs2_xa_cleanup_value_truncate(loc, - "overwriting", - orig_clusters); - goto out; - } + if (ocfs2_xa_can_reuse_entry(loc, xi)) { + orig_value_size = loc->xl_entry->xe_value_size; + rc = ocfs2_xa_reuse_entry(loc, xi, ctxt); + if (rc) + goto out; + goto alloc_value; + } + + if (!ocfs2_xattr_is_local(loc->xl_entry)) { + orig_clusters = ocfs2_xa_value_clusters(loc); + rc = ocfs2_xa_value_truncate(loc, 0, ctxt); + if (rc) { + mlog_errno(rc); + ocfs2_xa_cleanup_value_truncate(loc, + "overwriting", + orig_clusters); + goto out; } - ocfs2_xa_wipe_namevalue(loc); - } else - ocfs2_xa_add_entry(loc, name_hash); + } + ocfs2_xa_wipe_namevalue(loc); /* * If we get here, we have a blank entry. Fill it. We grow our diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 72d24a5451bb..54a6626c2e9b 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -2161,14 +2161,29 @@ int dquot_file_open(struct inode *inode, struct file *file) } EXPORT_SYMBOL(dquot_file_open); +static void vfs_cleanup_quota_inode(struct super_block *sb, int type) +{ + struct quota_info *dqopt = sb_dqopt(sb); + struct inode *inode = dqopt->files[type]; + + if (!inode) + return; + if (!(dqopt->flags & DQUOT_QUOTA_SYS_FILE)) { + inode_lock(inode); + inode->i_flags &= ~S_NOQUOTA; + inode_unlock(inode); + } + dqopt->files[type] = NULL; + iput(inode); +} + /* * Turn quota off on a device. type == -1 ==> quotaoff for all types (umount) */ int dquot_disable(struct super_block *sb, int type, unsigned int flags) { - int cnt, ret = 0; + int cnt; struct quota_info *dqopt = sb_dqopt(sb); - struct inode *toputinode[MAXQUOTAS]; /* s_umount should be held in exclusive mode */ if (WARN_ON_ONCE(down_read_trylock(&sb->s_umount))) @@ -2190,7 +2205,6 @@ int dquot_disable(struct super_block *sb, int type, unsigned int flags) return 0; for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - toputinode[cnt] = NULL; if (type != -1 && cnt != type) continue; if (!sb_has_quota_loaded(sb, cnt)) @@ -2210,8 +2224,7 @@ int dquot_disable(struct super_block *sb, int type, unsigned int flags) dqopt->flags &= ~dquot_state_flag( DQUOT_SUSPENDED, cnt); spin_unlock(&dq_state_lock); - iput(dqopt->files[cnt]); - dqopt->files[cnt] = NULL; + vfs_cleanup_quota_inode(sb, cnt); continue; } spin_unlock(&dq_state_lock); @@ -2233,10 +2246,6 @@ int dquot_disable(struct super_block *sb, int type, unsigned int flags) if (dqopt->ops[cnt]->free_file_info) dqopt->ops[cnt]->free_file_info(sb, cnt); put_quota_format(dqopt->info[cnt].dqi_format); - - toputinode[cnt] = dqopt->files[cnt]; - if (!sb_has_quota_loaded(sb, cnt)) - dqopt->files[cnt] = NULL; dqopt->info[cnt].dqi_flags = 0; dqopt->info[cnt].dqi_igrace = 0; dqopt->info[cnt].dqi_bgrace = 0; @@ -2258,32 +2267,22 @@ int dquot_disable(struct super_block *sb, int type, unsigned int flags) * must also discard the blockdev buffers so that we see the * changes done by userspace on the next quotaon() */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) - /* This can happen when suspending quotas on remount-ro... */ - if (toputinode[cnt] && !sb_has_quota_loaded(sb, cnt)) { - inode_lock(toputinode[cnt]); - toputinode[cnt]->i_flags &= ~S_NOQUOTA; - truncate_inode_pages(&toputinode[cnt]->i_data, 0); - inode_unlock(toputinode[cnt]); - mark_inode_dirty_sync(toputinode[cnt]); + if (!sb_has_quota_loaded(sb, cnt) && dqopt->files[cnt]) { + inode_lock(dqopt->files[cnt]); + truncate_inode_pages(&dqopt->files[cnt]->i_data, 0); + inode_unlock(dqopt->files[cnt]); } if (sb->s_bdev) invalidate_bdev(sb->s_bdev); put_inodes: + /* We are done when suspending quotas */ + if (flags & DQUOT_SUSPENDED) + return 0; + for (cnt = 0; cnt < MAXQUOTAS; cnt++) - if (toputinode[cnt]) { - /* On remount RO, we keep the inode pointer so that we - * can reenable quota on the subsequent remount RW. We - * have to check 'flags' variable and not use sb_has_ - * function because another quotaon / quotaoff could - * change global state before we got here. We refuse - * to suspend quotas when there is pending delete on - * the quota file... */ - if (!(flags & DQUOT_SUSPENDED)) - iput(toputinode[cnt]); - else if (!toputinode[cnt]->i_nlink) - ret = -EBUSY; - } - return ret; + if (!sb_has_quota_loaded(sb, cnt)) + vfs_cleanup_quota_inode(sb, cnt); + return 0; } EXPORT_SYMBOL(dquot_disable); @@ -2298,28 +2297,52 @@ EXPORT_SYMBOL(dquot_quota_off); * Turn quotas on on a device */ -/* - * Helper function to turn quotas on when we already have the inode of - * quota file and no quota information is loaded. - */ -static int vfs_load_quota_inode(struct inode *inode, int type, int format_id, +static int vfs_setup_quota_inode(struct inode *inode, int type) +{ + struct super_block *sb = inode->i_sb; + struct quota_info *dqopt = sb_dqopt(sb); + + if (!S_ISREG(inode->i_mode)) + return -EACCES; + if (IS_RDONLY(inode)) + return -EROFS; + if (sb_has_quota_loaded(sb, type)) + return -EBUSY; + + dqopt->files[type] = igrab(inode); + if (!dqopt->files[type]) + return -EIO; + if (!(dqopt->flags & DQUOT_QUOTA_SYS_FILE)) { + /* We don't want quota and atime on quota files (deadlocks + * possible) Also nobody should write to the file - we use + * special IO operations which ignore the immutable bit. */ + inode_lock(inode); + inode->i_flags |= S_NOQUOTA; + inode_unlock(inode); + /* + * When S_NOQUOTA is set, remove dquot references as no more + * references can be added + */ + __dquot_drop(inode); + } + return 0; +} + +int dquot_load_quota_sb(struct super_block *sb, int type, int format_id, unsigned int flags) { struct quota_format_type *fmt = find_quota_format(format_id); - struct super_block *sb = inode->i_sb; struct quota_info *dqopt = sb_dqopt(sb); int error; + /* Just unsuspend quotas? */ + BUG_ON(flags & DQUOT_SUSPENDED); + /* s_umount should be held in exclusive mode */ + if (WARN_ON_ONCE(down_read_trylock(&sb->s_umount))) + up_read(&sb->s_umount); + if (!fmt) return -ESRCH; - if (!S_ISREG(inode->i_mode)) { - error = -EACCES; - goto out_fmt; - } - if (IS_RDONLY(inode)) { - error = -EROFS; - goto out_fmt; - } if (!sb->s_op->quota_write || !sb->s_op->quota_read || (type == PRJQUOTA && sb->dq_op->get_projid == NULL)) { error = -EINVAL; @@ -2351,27 +2374,9 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id, invalidate_bdev(sb->s_bdev); } - if (!(dqopt->flags & DQUOT_QUOTA_SYS_FILE)) { - /* We don't want quota and atime on quota files (deadlocks - * possible) Also nobody should write to the file - we use - * special IO operations which ignore the immutable bit. */ - inode_lock(inode); - inode->i_flags |= S_NOQUOTA; - inode_unlock(inode); - /* - * When S_NOQUOTA is set, remove dquot references as no more - * references can be added - */ - __dquot_drop(inode); - } - - error = -EIO; - dqopt->files[type] = igrab(inode); - if (!dqopt->files[type]) - goto out_file_flags; error = -EINVAL; if (!fmt->qf_ops->check_quota_file(sb, type)) - goto out_file_init; + goto out_fmt; dqopt->ops[type] = fmt->qf_ops; dqopt->info[type].dqi_format = fmt; @@ -2379,7 +2384,7 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id, INIT_LIST_HEAD(&dqopt->info[type].dqi_dirty_list); error = dqopt->ops[type]->read_file_info(sb, type); if (error < 0) - goto out_file_init; + goto out_fmt; if (dqopt->flags & DQUOT_QUOTA_SYS_FILE) { spin_lock(&dq_data_lock); dqopt->info[type].dqi_flags |= DQF_SYS_FILE; @@ -2394,24 +2399,36 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id, dquot_disable(sb, type, flags); return error; -out_file_init: - dqopt->files[type] = NULL; - iput(inode); -out_file_flags: - inode_lock(inode); - inode->i_flags &= ~S_NOQUOTA; - inode_unlock(inode); out_fmt: put_quota_format(fmt); return error; } +EXPORT_SYMBOL(dquot_load_quota_sb); + +/* + * More powerful function for turning on quotas on given quota inode allowing + * setting of individual quota flags + */ +int dquot_load_quota_inode(struct inode *inode, int type, int format_id, + unsigned int flags) +{ + int err; + + err = vfs_setup_quota_inode(inode, type); + if (err < 0) + return err; + err = dquot_load_quota_sb(inode->i_sb, type, format_id, flags); + if (err < 0) + vfs_cleanup_quota_inode(inode->i_sb, type); + return err; +} +EXPORT_SYMBOL(dquot_load_quota_inode); /* Reenable quotas on remount RW */ int dquot_resume(struct super_block *sb, int type) { struct quota_info *dqopt = sb_dqopt(sb); - struct inode *inode; int ret = 0, cnt; unsigned int flags; @@ -2425,8 +2442,6 @@ int dquot_resume(struct super_block *sb, int type) if (!sb_has_quota_suspended(sb, cnt)) continue; - inode = dqopt->files[cnt]; - dqopt->files[cnt] = NULL; spin_lock(&dq_state_lock); flags = dqopt->flags & dquot_state_flag(DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED, @@ -2435,9 +2450,10 @@ int dquot_resume(struct super_block *sb, int type) spin_unlock(&dq_state_lock); flags = dquot_generic_flag(flags, cnt); - ret = vfs_load_quota_inode(inode, cnt, - dqopt->info[cnt].dqi_fmt_id, flags); - iput(inode); + ret = dquot_load_quota_sb(sb, cnt, dqopt->info[cnt].dqi_fmt_id, + flags); + if (ret < 0) + vfs_cleanup_quota_inode(sb, type); } return ret; @@ -2454,7 +2470,7 @@ int dquot_quota_on(struct super_block *sb, int type, int format_id, if (path->dentry->d_sb != sb) error = -EXDEV; else - error = vfs_load_quota_inode(d_inode(path->dentry), type, + error = dquot_load_quota_inode(d_inode(path->dentry), type, format_id, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); return error; @@ -2462,41 +2478,6 @@ int dquot_quota_on(struct super_block *sb, int type, int format_id, EXPORT_SYMBOL(dquot_quota_on); /* - * More powerful function for turning on quotas allowing setting - * of individual quota flags - */ -int dquot_enable(struct inode *inode, int type, int format_id, - unsigned int flags) -{ - struct super_block *sb = inode->i_sb; - - /* Just unsuspend quotas? */ - BUG_ON(flags & DQUOT_SUSPENDED); - /* s_umount should be held in exclusive mode */ - if (WARN_ON_ONCE(down_read_trylock(&sb->s_umount))) - up_read(&sb->s_umount); - - if (!flags) - return 0; - /* Just updating flags needed? */ - if (sb_has_quota_loaded(sb, type)) { - if (flags & DQUOT_USAGE_ENABLED && - sb_has_quota_usage_enabled(sb, type)) - return -EBUSY; - if (flags & DQUOT_LIMITS_ENABLED && - sb_has_quota_limits_enabled(sb, type)) - return -EBUSY; - spin_lock(&dq_state_lock); - sb_dqopt(sb)->flags |= dquot_state_flag(flags, type); - spin_unlock(&dq_state_lock); - return 0; - } - - return vfs_load_quota_inode(inode, type, format_id, flags); -} -EXPORT_SYMBOL(dquot_enable); - -/* * This function is used when filesystem needs to initialize quotas * during mount time. */ @@ -2517,7 +2498,7 @@ int dquot_quota_on_mount(struct super_block *sb, char *qf_name, error = security_quota_on(dentry); if (!error) - error = vfs_load_quota_inode(d_inode(dentry), type, format_id, + error = dquot_load_quota_inode(d_inode(dentry), type, format_id, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); out: @@ -2542,13 +2523,17 @@ static int dquot_quota_enable(struct super_block *sb, unsigned int flags) if (!(flags & qtype_enforce_flag(type))) continue; /* Can't enforce without accounting */ - if (!sb_has_quota_usage_enabled(sb, type)) - return -EINVAL; - ret = dquot_enable(dqopt->files[type], type, - dqopt->info[type].dqi_fmt_id, - DQUOT_LIMITS_ENABLED); - if (ret < 0) + if (!sb_has_quota_usage_enabled(sb, type)) { + ret = -EINVAL; + goto out_err; + } + if (sb_has_quota_limits_enabled(sb, type)) { + ret = -EBUSY; goto out_err; + } + spin_lock(&dq_state_lock); + dqopt->flags |= dquot_state_flag(DQUOT_LIMITS_ENABLED, type); + spin_unlock(&dq_state_lock); } return 0; out_err: @@ -2598,10 +2583,12 @@ static int dquot_quota_disable(struct super_block *sb, unsigned int flags) out_err: /* Backout enforcement disabling we already did */ for (type--; type >= 0; type--) { - if (flags & qtype_enforce_flag(type)) - dquot_enable(dqopt->files[type], type, - dqopt->info[type].dqi_fmt_id, - DQUOT_LIMITS_ENABLED); + if (flags & qtype_enforce_flag(type)) { + spin_lock(&dq_state_lock); + dqopt->flags |= + dquot_state_flag(DQUOT_LIMITS_ENABLED, type); + spin_unlock(&dq_state_lock); + } } return ret; } @@ -2799,8 +2786,10 @@ int dquot_get_state(struct super_block *sb, struct qc_state *state) tstate->flags |= QCI_LIMITS_ENFORCED; tstate->spc_timelimit = mi->dqi_bgrace; tstate->ino_timelimit = mi->dqi_igrace; - tstate->ino = dqopt->files[type]->i_ino; - tstate->blocks = dqopt->files[type]->i_blocks; + if (dqopt->files[type]) { + tstate->ino = dqopt->files[type]->i_ino; + tstate->blocks = dqopt->files[type]->i_blocks; + } tstate->nextents = 1; /* We don't know... */ spin_unlock(&dq_data_lock); } diff --git a/fs/readdir.c b/fs/readdir.c index 2f6a4534e0df..6e2623e57b2e 100644 --- a/fs/readdir.c +++ b/fs/readdir.c @@ -20,9 +20,23 @@ #include <linux/syscalls.h> #include <linux/unistd.h> #include <linux/compat.h> - #include <linux/uaccess.h> +#include <asm/unaligned.h> + +/* + * Note the "unsafe_put_user() semantics: we goto a + * label for errors. + */ +#define unsafe_copy_dirent_name(_dst, _src, _len, label) do { \ + char __user *dst = (_dst); \ + const char *src = (_src); \ + size_t len = (_len); \ + unsafe_put_user(0, dst+len, label); \ + unsafe_copy_to_user(dst, src, len, label); \ +} while (0) + + int iterate_dir(struct file *file, struct dir_context *ctx) { struct inode *inode = file_inode(file); @@ -65,6 +79,40 @@ out: EXPORT_SYMBOL(iterate_dir); /* + * POSIX says that a dirent name cannot contain NULL or a '/'. + * + * It's not 100% clear what we should really do in this case. + * The filesystem is clearly corrupted, but returning a hard + * error means that you now don't see any of the other names + * either, so that isn't a perfect alternative. + * + * And if you return an error, what error do you use? Several + * filesystems seem to have decided on EUCLEAN being the error + * code for EFSCORRUPTED, and that may be the error to use. Or + * just EIO, which is perhaps more obvious to users. + * + * In order to see the other file names in the directory, the + * caller might want to make this a "soft" error: skip the + * entry, and return the error at the end instead. + * + * Note that this should likely do a "memchr(name, 0, len)" + * check too, since that would be filesystem corruption as + * well. However, that case can't actually confuse user space, + * which has to do a strlen() on the name anyway to find the + * filename length, and the above "soft error" worry means + * that it's probably better left alone until we have that + * issue clarified. + */ +static int verify_dirent_name(const char *name, int len) +{ + if (WARN_ON_ONCE(!len)) + return -EIO; + if (WARN_ON_ONCE(memchr(name, '/', len))) + return -EIO; + return 0; +} + +/* * Traditional linux readdir() handling.. * * "count=1" is a special case, meaning that the buffer is one @@ -173,6 +221,9 @@ static int filldir(struct dir_context *ctx, const char *name, int namlen, int reclen = ALIGN(offsetof(struct linux_dirent, d_name) + namlen + 2, sizeof(long)); + buf->error = verify_dirent_name(name, namlen); + if (unlikely(buf->error)) + return buf->error; buf->error = -EINVAL; /* only used if we fail.. */ if (reclen > buf->count) return -EINVAL; @@ -182,28 +233,31 @@ static int filldir(struct dir_context *ctx, const char *name, int namlen, return -EOVERFLOW; } dirent = buf->previous; - if (dirent) { - if (signal_pending(current)) - return -EINTR; - if (__put_user(offset, &dirent->d_off)) - goto efault; - } - dirent = buf->current_dir; - if (__put_user(d_ino, &dirent->d_ino)) - goto efault; - if (__put_user(reclen, &dirent->d_reclen)) - goto efault; - if (copy_to_user(dirent->d_name, name, namlen)) - goto efault; - if (__put_user(0, dirent->d_name + namlen)) - goto efault; - if (__put_user(d_type, (char __user *) dirent + reclen - 1)) + if (dirent && signal_pending(current)) + return -EINTR; + + /* + * Note! This range-checks 'previous' (which may be NULL). + * The real range was checked in getdents + */ + if (!user_access_begin(dirent, sizeof(*dirent))) goto efault; + if (dirent) + unsafe_put_user(offset, &dirent->d_off, efault_end); + dirent = buf->current_dir; + unsafe_put_user(d_ino, &dirent->d_ino, efault_end); + unsafe_put_user(reclen, &dirent->d_reclen, efault_end); + unsafe_put_user(d_type, (char __user *) dirent + reclen - 1, efault_end); + unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end); + user_access_end(); + buf->previous = dirent; dirent = (void __user *)dirent + reclen; buf->current_dir = dirent; buf->count -= reclen; return 0; +efault_end: + user_access_end(); efault: buf->error = -EFAULT; return -EFAULT; @@ -259,34 +313,38 @@ static int filldir64(struct dir_context *ctx, const char *name, int namlen, int reclen = ALIGN(offsetof(struct linux_dirent64, d_name) + namlen + 1, sizeof(u64)); + buf->error = verify_dirent_name(name, namlen); + if (unlikely(buf->error)) + return buf->error; buf->error = -EINVAL; /* only used if we fail.. */ if (reclen > buf->count) return -EINVAL; dirent = buf->previous; - if (dirent) { - if (signal_pending(current)) - return -EINTR; - if (__put_user(offset, &dirent->d_off)) - goto efault; - } - dirent = buf->current_dir; - if (__put_user(ino, &dirent->d_ino)) - goto efault; - if (__put_user(0, &dirent->d_off)) - goto efault; - if (__put_user(reclen, &dirent->d_reclen)) - goto efault; - if (__put_user(d_type, &dirent->d_type)) - goto efault; - if (copy_to_user(dirent->d_name, name, namlen)) - goto efault; - if (__put_user(0, dirent->d_name + namlen)) + if (dirent && signal_pending(current)) + return -EINTR; + + /* + * Note! This range-checks 'previous' (which may be NULL). + * The real range was checked in getdents + */ + if (!user_access_begin(dirent, sizeof(*dirent))) goto efault; + if (dirent) + unsafe_put_user(offset, &dirent->d_off, efault_end); + dirent = buf->current_dir; + unsafe_put_user(ino, &dirent->d_ino, efault_end); + unsafe_put_user(reclen, &dirent->d_reclen, efault_end); + unsafe_put_user(d_type, &dirent->d_type, efault_end); + unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end); + user_access_end(); + buf->previous = dirent; dirent = (void __user *)dirent + reclen; buf->current_dir = dirent; buf->count -= reclen; return 0; +efault_end: + user_access_end(); efault: buf->error = -EFAULT; return -EFAULT; diff --git a/fs/super.c b/fs/super.c index f627b7c53d2b..cfadab2cbf35 100644 --- a/fs/super.c +++ b/fs/super.c @@ -1300,6 +1300,7 @@ int get_tree_bdev(struct fs_context *fc, mutex_lock(&bdev->bd_fsfreeze_mutex); if (bdev->bd_fsfreeze_count > 0) { mutex_unlock(&bdev->bd_fsfreeze_mutex); + blkdev_put(bdev, mode); warnf(fc, "%pg: Can't mount, blockdev is frozen", bdev); return -EBUSY; } @@ -1308,8 +1309,10 @@ int get_tree_bdev(struct fs_context *fc, fc->sget_key = bdev; s = sget_fc(fc, test_bdev_super_fc, set_bdev_super_fc); mutex_unlock(&bdev->bd_fsfreeze_mutex); - if (IS_ERR(s)) + if (IS_ERR(s)) { + blkdev_put(bdev, mode); return PTR_ERR(s); + } if (s->s_root) { /* Don't summarily change the RO/RW state. */ diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c index 9fc14e38927f..0caa151cae4e 100644 --- a/fs/tracefs/inode.c +++ b/fs/tracefs/inode.c @@ -16,11 +16,11 @@ #include <linux/namei.h> #include <linux/tracefs.h> #include <linux/fsnotify.h> +#include <linux/security.h> #include <linux/seq_file.h> #include <linux/parser.h> #include <linux/magic.h> #include <linux/slab.h> -#include <linux/security.h> #define TRACEFS_DEFAULT_MODE 0700 @@ -28,25 +28,6 @@ static struct vfsmount *tracefs_mount; static int tracefs_mount_count; static bool tracefs_registered; -static int default_open_file(struct inode *inode, struct file *filp) -{ - struct dentry *dentry = filp->f_path.dentry; - struct file_operations *real_fops; - int ret; - - if (!dentry) - return -EINVAL; - - ret = security_locked_down(LOCKDOWN_TRACEFS); - if (ret) - return ret; - - real_fops = dentry->d_fsdata; - if (!real_fops->open) - return 0; - return real_fops->open(inode, filp); -} - static ssize_t default_read_file(struct file *file, char __user *buf, size_t count, loff_t *ppos) { @@ -241,12 +222,6 @@ static int tracefs_apply_options(struct super_block *sb) return 0; } -static void tracefs_destroy_inode(struct inode *inode) -{ - if (S_ISREG(inode->i_mode)) - kfree(inode->i_fop); -} - static int tracefs_remount(struct super_block *sb, int *flags, char *data) { int err; @@ -283,7 +258,6 @@ static int tracefs_show_options(struct seq_file *m, struct dentry *root) static const struct super_operations tracefs_super_operations = { .statfs = simple_statfs, .remount_fs = tracefs_remount, - .destroy_inode = tracefs_destroy_inode, .show_options = tracefs_show_options, }; @@ -414,10 +388,12 @@ struct dentry *tracefs_create_file(const char *name, umode_t mode, struct dentry *parent, void *data, const struct file_operations *fops) { - struct file_operations *proxy_fops; struct dentry *dentry; struct inode *inode; + if (security_locked_down(LOCKDOWN_TRACEFS)) + return NULL; + if (!(mode & S_IFMT)) mode |= S_IFREG; BUG_ON(!S_ISREG(mode)); @@ -430,20 +406,8 @@ struct dentry *tracefs_create_file(const char *name, umode_t mode, if (unlikely(!inode)) return failed_creating(dentry); - proxy_fops = kzalloc(sizeof(struct file_operations), GFP_KERNEL); - if (unlikely(!proxy_fops)) { - iput(inode); - return failed_creating(dentry); - } - - if (!fops) - fops = &tracefs_file_operations; - - dentry->d_fsdata = (void *)fops; - memcpy(proxy_fops, fops, sizeof(*proxy_fops)); - proxy_fops->open = default_open_file; inode->i_mode = mode; - inode->i_fop = proxy_fops; + inode->i_fop = fops ? fops : &tracefs_file_operations; inode->i_private = data; d_instantiate(dentry, inode); fsnotify_create(dentry->d_parent->d_inode, dentry); diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c index 5de296b34ab1..14fbdf22b7e7 100644 --- a/fs/xfs/libxfs/xfs_ag.c +++ b/fs/xfs/libxfs/xfs_ag.c @@ -28,12 +28,11 @@ xfs_get_aghdr_buf( struct xfs_mount *mp, xfs_daddr_t blkno, size_t numblks, - int flags, const struct xfs_buf_ops *ops) { struct xfs_buf *bp; - bp = xfs_buf_get_uncached(mp->m_ddev_targp, numblks, flags); + bp = xfs_buf_get_uncached(mp->m_ddev_targp, numblks, 0); if (!bp) return NULL; @@ -345,7 +344,7 @@ xfs_ag_init_hdr( { struct xfs_buf *bp; - bp = xfs_get_aghdr_buf(mp, id->daddr, id->numblks, 0, ops); + bp = xfs_get_aghdr_buf(mp, id->daddr, id->numblks, ops); if (!bp) return -ENOMEM; diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index b9f019603d0b..f0089e862216 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -826,32 +826,17 @@ xfs_attr_shortform_to_leaf( sf = (xfs_attr_shortform_t *)tmpbuffer; xfs_idata_realloc(dp, -size, XFS_ATTR_FORK); - xfs_bmap_local_to_extents_empty(dp, XFS_ATTR_FORK); + xfs_bmap_local_to_extents_empty(args->trans, dp, XFS_ATTR_FORK); bp = NULL; error = xfs_da_grow_inode(args, &blkno); - if (error) { - /* - * If we hit an IO error middle of the transaction inside - * grow_inode(), we may have inconsistent data. Bail out. - */ - if (error == -EIO) - goto out; - xfs_idata_realloc(dp, size, XFS_ATTR_FORK); /* try to put */ - memcpy(ifp->if_u1.if_data, tmpbuffer, size); /* it back */ + if (error) goto out; - } ASSERT(blkno == 0); error = xfs_attr3_leaf_create(args, blkno, &bp); - if (error) { - /* xfs_attr3_leaf_create may not have instantiated a block */ - if (bp && (xfs_da_shrink_inode(args, 0, bp) != 0)) - goto out; - xfs_idata_realloc(dp, size, XFS_ATTR_FORK); /* try to put */ - memcpy(ifp->if_u1.if_data, tmpbuffer, size); /* it back */ + if (error) goto out; - } memset((char *)&nargs, 0, sizeof(nargs)); nargs.dp = dp; diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 4edc25a2ba80..02469d59c787 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -792,6 +792,7 @@ out_root_realloc: */ void xfs_bmap_local_to_extents_empty( + struct xfs_trans *tp, struct xfs_inode *ip, int whichfork) { @@ -808,6 +809,7 @@ xfs_bmap_local_to_extents_empty( ifp->if_u1.if_root = NULL; ifp->if_height = 0; XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS); + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); } @@ -840,7 +842,7 @@ xfs_bmap_local_to_extents( ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL); if (!ifp->if_bytes) { - xfs_bmap_local_to_extents_empty(ip, whichfork); + xfs_bmap_local_to_extents_empty(tp, ip, whichfork); flags = XFS_ILOG_CORE; goto done; } @@ -887,7 +889,7 @@ xfs_bmap_local_to_extents( /* account for the change in fork size */ xfs_idata_realloc(ip, -ifp->if_bytes, whichfork); - xfs_bmap_local_to_extents_empty(ip, whichfork); + xfs_bmap_local_to_extents_empty(tp, ip, whichfork); flags |= XFS_ILOG_CORE; ifp->if_u1.if_root = NULL; diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index 5bb446d80542..e2798c6f3a5f 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h @@ -182,7 +182,8 @@ void xfs_trim_extent(struct xfs_bmbt_irec *irec, xfs_fileoff_t bno, xfs_filblks_t len); int xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd); int xfs_bmap_set_attrforkoff(struct xfs_inode *ip, int size, int *version); -void xfs_bmap_local_to_extents_empty(struct xfs_inode *ip, int whichfork); +void xfs_bmap_local_to_extents_empty(struct xfs_trans *tp, + struct xfs_inode *ip, int whichfork); void __xfs_bmap_add_free(struct xfs_trans *tp, xfs_fsblock_t bno, xfs_filblks_t len, const struct xfs_owner_info *oinfo, bool skip_discard); diff --git a/fs/xfs/libxfs/xfs_dir2_block.c b/fs/xfs/libxfs/xfs_dir2_block.c index 9595ced393dc..49e4bc39e7bb 100644 --- a/fs/xfs/libxfs/xfs_dir2_block.c +++ b/fs/xfs/libxfs/xfs_dir2_block.c @@ -1096,7 +1096,7 @@ xfs_dir2_sf_to_block( memcpy(sfp, oldsfp, ifp->if_bytes); xfs_idata_realloc(dp, -ifp->if_bytes, XFS_DATA_FORK); - xfs_bmap_local_to_extents_empty(dp, XFS_DATA_FORK); + xfs_bmap_local_to_extents_empty(tp, dp, XFS_DATA_FORK); dp->i_d.di_size = 0; /* diff --git a/fs/xfs/scrub/refcount.c b/fs/xfs/scrub/refcount.c index 93b3793bc5b3..0cab11a5d390 100644 --- a/fs/xfs/scrub/refcount.c +++ b/fs/xfs/scrub/refcount.c @@ -341,7 +341,6 @@ xchk_refcountbt_rec( xfs_extlen_t len; xfs_nlink_t refcount; bool has_cowflag; - int error = 0; bno = be32_to_cpu(rec->refc.rc_startblock); len = be32_to_cpu(rec->refc.rc_blockcount); @@ -366,7 +365,7 @@ xchk_refcountbt_rec( xchk_refcountbt_xref(bs->sc, bno, len, refcount); - return error; + return 0; } /* Make sure we have as many refc blocks as the rmap says. */ diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 0910cb75b65d..4f443703065e 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -864,6 +864,7 @@ xfs_alloc_file_space( xfs_filblks_t allocatesize_fsb; xfs_extlen_t extsz, temp; xfs_fileoff_t startoffset_fsb; + xfs_fileoff_t endoffset_fsb; int nimaps; int quota_flag; int rt; @@ -891,7 +892,8 @@ xfs_alloc_file_space( imapp = &imaps[0]; nimaps = 1; startoffset_fsb = XFS_B_TO_FSBT(mp, offset); - allocatesize_fsb = XFS_B_TO_FSB(mp, count); + endoffset_fsb = XFS_B_TO_FSB(mp, offset + count); + allocatesize_fsb = endoffset_fsb - startoffset_fsb; /* * Allocate file space until done or until there is an error diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 21c243622a79..0abba171aa89 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -345,6 +345,15 @@ xfs_buf_allocate_memory( unsigned short page_count, i; xfs_off_t start, end; int error; + xfs_km_flags_t kmflag_mask = 0; + + /* + * assure zeroed buffer for non-read cases. + */ + if (!(flags & XBF_READ)) { + kmflag_mask |= KM_ZERO; + gfp_mask |= __GFP_ZERO; + } /* * for buffers that are contained within a single page, just allocate @@ -354,7 +363,8 @@ xfs_buf_allocate_memory( size = BBTOB(bp->b_length); if (size < PAGE_SIZE) { int align_mask = xfs_buftarg_dma_alignment(bp->b_target); - bp->b_addr = kmem_alloc_io(size, align_mask, KM_NOFS); + bp->b_addr = kmem_alloc_io(size, align_mask, + KM_NOFS | kmflag_mask); if (!bp->b_addr) { /* low memory - use alloc_page loop instead */ goto use_alloc_page; diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index a2beee9f74da..641d07f30a27 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -1443,7 +1443,7 @@ xlog_alloc_log( prev_iclog = iclog; iclog->ic_data = kmem_alloc_io(log->l_iclog_size, align_mask, - KM_MAYFAIL); + KM_MAYFAIL | KM_ZERO); if (!iclog->ic_data) goto out_free_iclog; #ifdef DEBUG diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 508319039dce..c1a514ffff55 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -127,7 +127,7 @@ xlog_alloc_buffer( if (nbblks > 1 && log->l_sectBBsize > 1) nbblks += log->l_sectBBsize; nbblks = round_up(nbblks, log->l_sectBBsize); - return kmem_alloc_io(BBTOB(nbblks), align_mask, KM_MAYFAIL); + return kmem_alloc_io(BBTOB(nbblks), align_mask, KM_MAYFAIL | KM_ZERO); } /* |