diff options
Diffstat (limited to 'fs')
40 files changed, 301 insertions, 181 deletions
diff --git a/fs/Kconfig b/fs/Kconfig index f1046cf6ad85..bfb1c6095c7a 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -11,7 +11,6 @@ config DCACHE_WORD_ACCESS config VALIDATE_FS_PARSER bool "Validate filesystem parameter description" - default y help Enable this to perform validation of the parameter description for a filesystem when it is registered. diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c index 86da532c192f..df415c05939e 100644 --- a/fs/afs/addr_list.c +++ b/fs/afs/addr_list.c @@ -246,8 +246,8 @@ struct afs_vlserver_list *afs_dns_query(struct afs_cell *cell, time64_t *_expiry _enter("%s", cell->name); - ret = dns_query("afsdb", cell->name, cell->name_len, "srv=1", - &result, _expiry, true); + ret = dns_query(cell->net->net, "afsdb", cell->name, cell->name_len, + "srv=1", &result, _expiry, true); if (ret < 0) { _leave(" = %d [dns]", ret); return ERR_PTR(ret); diff --git a/fs/afs/callback.c b/fs/afs/callback.c index d441bef72163..915010464572 100644 --- a/fs/afs/callback.c +++ b/fs/afs/callback.c @@ -275,9 +275,9 @@ static void afs_break_one_callback(struct afs_server *server, struct afs_super_info *as = AFS_FS_S(cbi->sb); struct afs_volume *volume = as->volume; - write_lock(&volume->cb_break_lock); + write_lock(&volume->cb_v_break_lock); volume->cb_v_break++; - write_unlock(&volume->cb_break_lock); + write_unlock(&volume->cb_v_break_lock); } else { data.volume = NULL; data.fid = *fid; diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c index 9b3b2f1f1fc0..bcd1bafb0278 100644 --- a/fs/afs/dynroot.c +++ b/fs/afs/dynroot.c @@ -24,6 +24,7 @@ const struct file_operations afs_dynroot_file_operations = { static int afs_probe_cell_name(struct dentry *dentry) { struct afs_cell *cell; + struct afs_net *net = afs_d2net(dentry); const char *name = dentry->d_name.name; size_t len = dentry->d_name.len; int ret; @@ -36,13 +37,14 @@ static int afs_probe_cell_name(struct dentry *dentry) len--; } - cell = afs_lookup_cell_rcu(afs_d2net(dentry), name, len); + cell = afs_lookup_cell_rcu(net, name, len); if (!IS_ERR(cell)) { - afs_put_cell(afs_d2net(dentry), cell); + afs_put_cell(net, cell); return 0; } - ret = dns_query("afsdb", name, len, "srv=1", NULL, NULL, false); + ret = dns_query(net->net, "afsdb", name, len, "srv=1", + NULL, NULL, false); if (ret == -ENODATA) ret = -EDESTADDRREQ; return ret; diff --git a/fs/afs/inode.c b/fs/afs/inode.c index b42d9d09669c..18a50d4febcf 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -56,6 +56,16 @@ static noinline void dump_vnode(struct afs_vnode *vnode, struct afs_vnode *paren } /* + * Set the file size and block count. Estimate the number of 512 bytes blocks + * used, rounded up to nearest 1K for consistency with other AFS clients. + */ +static void afs_set_i_size(struct afs_vnode *vnode, u64 size) +{ + i_size_write(&vnode->vfs_inode, size); + vnode->vfs_inode.i_blocks = ((size + 1023) >> 10) << 1; +} + +/* * Initialise an inode from the vnode status. */ static int afs_inode_init_from_status(struct afs_vnode *vnode, struct key *key, @@ -124,12 +134,7 @@ static int afs_inode_init_from_status(struct afs_vnode *vnode, struct key *key, return afs_protocol_error(NULL, -EBADMSG, afs_eproto_file_type); } - /* - * Estimate 512 bytes blocks used, rounded up to nearest 1K - * for consistency with other AFS clients. - */ - inode->i_blocks = ((i_size_read(inode) + 1023) >> 10) << 1; - i_size_write(&vnode->vfs_inode, status->size); + afs_set_i_size(vnode, status->size); vnode->invalid_before = status->data_version; inode_set_iversion_raw(&vnode->vfs_inode, status->data_version); @@ -207,11 +212,13 @@ static void afs_apply_status(struct afs_fs_cursor *fc, if (expected_version && *expected_version != status->data_version) { - kdebug("vnode modified %llx on {%llx:%llu} [exp %llx] %s", - (unsigned long long) status->data_version, - vnode->fid.vid, vnode->fid.vnode, - (unsigned long long) *expected_version, - fc->type ? fc->type->name : "???"); + if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) + pr_warn("kAFS: vnode modified {%llx:%llu} %llx->%llx %s\n", + vnode->fid.vid, vnode->fid.vnode, + (unsigned long long)*expected_version, + (unsigned long long)status->data_version, + fc->type ? fc->type->name : "???"); + vnode->invalid_before = status->data_version; if (vnode->status.type == AFS_FTYPE_DIR) { if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags)) @@ -230,7 +237,7 @@ static void afs_apply_status(struct afs_fs_cursor *fc, if (data_changed) { inode_set_iversion_raw(&vnode->vfs_inode, status->data_version); - i_size_write(&vnode->vfs_inode, status->size); + afs_set_i_size(vnode, status->size); } } diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 8a67bf741880..7ee63526c6a2 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -109,10 +109,8 @@ struct afs_call { struct rxrpc_call *rxcall; /* RxRPC call handle */ struct key *key; /* security for this call */ struct afs_net *net; /* The network namespace */ - union { - struct afs_server *server; - struct afs_vlserver *vlserver; - }; + struct afs_server *server; /* The fileserver record if fs op (pins ref) */ + struct afs_vlserver *vlserver; /* The vlserver record if vl op */ struct afs_cb_interest *cbi; /* Callback interest for server used */ struct afs_vnode *lvnode; /* vnode being locked */ void *request; /* request data (first part) */ @@ -616,7 +614,7 @@ struct afs_volume { unsigned int servers_seq; /* Incremented each time ->servers changes */ unsigned cb_v_break; /* Break-everything counter. */ - rwlock_t cb_break_lock; + rwlock_t cb_v_break_lock; afs_voltype_t type; /* type of volume */ short error; diff --git a/fs/afs/security.c b/fs/afs/security.c index 71e71c07568f..8866703b2e6c 100644 --- a/fs/afs/security.c +++ b/fs/afs/security.c @@ -28,7 +28,7 @@ struct key *afs_request_key(struct afs_cell *cell) _debug("key %s", cell->anonymous_key->description); key = request_key(&key_type_rxrpc, cell->anonymous_key->description, - NULL); + NULL, NULL); if (IS_ERR(key)) { if (PTR_ERR(key) != -ENOKEY) { _leave(" = %ld", PTR_ERR(key)); diff --git a/fs/afs/volume.c b/fs/afs/volume.c index 08fdb3951c49..1a414300b654 100644 --- a/fs/afs/volume.c +++ b/fs/afs/volume.c @@ -43,6 +43,7 @@ static struct afs_volume *afs_alloc_volume(struct afs_fs_context *params, atomic_set(&volume->usage, 1); INIT_LIST_HEAD(&volume->proc_link); rwlock_init(&volume->servers_lock); + rwlock_init(&volume->cb_v_break_lock); memcpy(volume->name, vldb->name, vldb->name_len + 1); slist = afs_alloc_server_list(params->cell, params->key, vldb, type_mask); @@ -2095,6 +2095,7 @@ SYSCALL_DEFINE6(io_pgetevents, struct __aio_sigset ksig = { NULL, }; sigset_t ksigmask, sigsaved; struct timespec64 ts; + bool interrupted; int ret; if (timeout && unlikely(get_timespec64(&ts, timeout))) @@ -2108,8 +2109,10 @@ SYSCALL_DEFINE6(io_pgetevents, return ret; ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL); - restore_user_sigmask(ksig.sigmask, &sigsaved); - if (signal_pending(current) && !ret) + + interrupted = signal_pending(current); + restore_user_sigmask(ksig.sigmask, &sigsaved, interrupted); + if (interrupted && !ret) ret = -ERESTARTNOHAND; return ret; @@ -2128,6 +2131,7 @@ SYSCALL_DEFINE6(io_pgetevents_time32, struct __aio_sigset ksig = { NULL, }; sigset_t ksigmask, sigsaved; struct timespec64 ts; + bool interrupted; int ret; if (timeout && unlikely(get_old_timespec32(&ts, timeout))) @@ -2142,8 +2146,10 @@ SYSCALL_DEFINE6(io_pgetevents_time32, return ret; ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL); - restore_user_sigmask(ksig.sigmask, &sigsaved); - if (signal_pending(current) && !ret) + + interrupted = signal_pending(current); + restore_user_sigmask(ksig.sigmask, &sigsaved, interrupted); + if (interrupted && !ret) ret = -ERESTARTNOHAND; return ret; @@ -2193,6 +2199,7 @@ COMPAT_SYSCALL_DEFINE6(io_pgetevents, struct __compat_aio_sigset ksig = { NULL, }; sigset_t ksigmask, sigsaved; struct timespec64 t; + bool interrupted; int ret; if (timeout && get_old_timespec32(&t, timeout)) @@ -2206,8 +2213,10 @@ COMPAT_SYSCALL_DEFINE6(io_pgetevents, return ret; ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL); - restore_user_sigmask(ksig.sigmask, &sigsaved); - if (signal_pending(current) && !ret) + + interrupted = signal_pending(current); + restore_user_sigmask(ksig.sigmask, &sigsaved, interrupted); + if (interrupted && !ret) ret = -ERESTARTNOHAND; return ret; @@ -2226,6 +2235,7 @@ COMPAT_SYSCALL_DEFINE6(io_pgetevents_time64, struct __compat_aio_sigset ksig = { NULL, }; sigset_t ksigmask, sigsaved; struct timespec64 t; + bool interrupted; int ret; if (timeout && get_timespec64(&t, timeout)) @@ -2239,8 +2249,10 @@ COMPAT_SYSCALL_DEFINE6(io_pgetevents_time64, return ret; ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL); - restore_user_sigmask(ksig.sigmask, &sigsaved); - if (signal_pending(current) && !ret) + + interrupted = signal_pending(current); + restore_user_sigmask(ksig.sigmask, &sigsaved, interrupted); + if (interrupted && !ret) ret = -ERESTARTNOHAND; return ret; diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index 82a48e830018..e4b59e76afb0 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -856,9 +856,14 @@ err: static int load_flat_shared_library(int id, struct lib_info *libs) { + /* + * This is a fake bprm struct; only the members "buf", "file" and + * "filename" are actually used. + */ struct linux_binprm bprm; int res; char buf[16]; + loff_t pos = 0; memset(&bprm, 0, sizeof(bprm)); @@ -872,25 +877,11 @@ static int load_flat_shared_library(int id, struct lib_info *libs) if (IS_ERR(bprm.file)) return res; - bprm.cred = prepare_exec_creds(); - res = -ENOMEM; - if (!bprm.cred) - goto out; - - /* We don't really care about recalculating credentials at this point - * as we're past the point of no return and are dealing with shared - * libraries. - */ - bprm.called_set_creds = 1; + res = kernel_read(bprm.file, bprm.buf, BINPRM_BUF_SIZE, &pos); - res = prepare_binprm(&bprm); - - if (!res) + if (res >= 0) res = load_flat_file(&bprm, libs, id, NULL); - abort_creds(bprm.cred); - -out: allow_write_access(bprm.file); fput(bprm.file); diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 6af2d0d4a87a..c8a9b89b922d 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2121,9 +2121,10 @@ retry: if (inode && ceph_snap(inode) == CEPH_SNAPDIR) { dout("build_path path+%d: %p SNAPDIR\n", pos, temp); - } else if (stop_on_nosnap && inode && + } else if (stop_on_nosnap && inode && dentry != temp && ceph_snap(inode) == CEPH_NOSNAP) { spin_unlock(&temp->d_lock); + pos++; /* get rid of any prepended '/' */ break; } else { pos -= temp->d_name.len; diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig index aae2b8b2adf5..523e9ea78a28 100644 --- a/fs/cifs/Kconfig +++ b/fs/cifs/Kconfig @@ -10,7 +10,7 @@ config CIFS select CRYPTO_SHA512 select CRYPTO_CMAC select CRYPTO_HMAC - select CRYPTO_ARC4 + select CRYPTO_LIB_ARC4 select CRYPTO_AEAD2 select CRYPTO_CCM select CRYPTO_ECB diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c index 7f01c6e60791..d1b439ad0f1a 100644 --- a/fs/cifs/cifs_spnego.c +++ b/fs/cifs/cifs_spnego.c @@ -32,6 +32,25 @@ #include "cifsproto.h" static const struct cred *spnego_cred; +static struct key_acl cifs_spnego_key_acl = { + .usage = REFCOUNT_INIT(1), + .nr_ace = 2, + .possessor_viewable = true, + .aces = { + KEY_POSSESSOR_ACE(KEY_ACE_VIEW | KEY_ACE_SEARCH | KEY_ACE_READ), + KEY_OWNER_ACE(KEY_ACE_VIEW), + } +}; + +static struct key_acl cifs_spnego_keyring_acl = { + .usage = REFCOUNT_INIT(1), + .nr_ace = 2, + .aces = { + KEY_POSSESSOR_ACE(KEY_ACE_SEARCH | KEY_ACE_WRITE), + KEY_OWNER_ACE(KEY_ACE_VIEW | KEY_ACE_READ | KEY_ACE_CLEAR), + } +}; + /* create a new cifs key */ static int cifs_spnego_key_instantiate(struct key *key, struct key_preparsed_payload *prep) @@ -170,7 +189,8 @@ cifs_get_spnego_key(struct cifs_ses *sesInfo) cifs_dbg(FYI, "key description = %s\n", description); saved_cred = override_creds(spnego_cred); - spnego_key = request_key(&cifs_spnego_key_type, description, ""); + spnego_key = request_key(&cifs_spnego_key_type, description, "", + &cifs_spnego_key_acl); revert_creds(saved_cred); #ifdef CONFIG_CIFS_DEBUG2 @@ -207,8 +227,7 @@ init_cifs_spnego(void) keyring = keyring_alloc(".cifs_spnego", GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, cred, - (KEY_POS_ALL & ~KEY_POS_SETATTR) | - KEY_USR_VIEW | KEY_USR_READ, + &cifs_spnego_keyring_acl, KEY_ALLOC_NOT_IN_QUOTA, NULL, NULL); if (IS_ERR(keyring)) { ret = PTR_ERR(keyring); diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index 1d377b7f2860..78eed72f3af0 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c @@ -33,6 +33,25 @@ #include "cifsproto.h" #include "cifs_debug.h" +static struct key_acl cifs_idmap_key_acl = { + .usage = REFCOUNT_INIT(1), + .nr_ace = 2, + .possessor_viewable = true, + .aces = { + KEY_POSSESSOR_ACE(KEY_ACE_VIEW | KEY_ACE_SEARCH | KEY_ACE_READ), + KEY_OWNER_ACE(KEY_ACE_VIEW), + } +}; + +static struct key_acl cifs_idmap_keyring_acl = { + .usage = REFCOUNT_INIT(1), + .nr_ace = 2, + .aces = { + KEY_POSSESSOR_ACE(KEY_ACE_SEARCH | KEY_ACE_WRITE), + KEY_OWNER_ACE(KEY_ACE_VIEW | KEY_ACE_READ), + } +}; + /* security id for everyone/world system group */ static const struct cifs_sid sid_everyone = { 1, 1, {0, 0, 0, 0, 0, 1}, {0} }; @@ -298,7 +317,8 @@ id_to_sid(unsigned int cid, uint sidtype, struct cifs_sid *ssid) rc = 0; saved_cred = override_creds(root_cred); - sidkey = request_key(&cifs_idmap_key_type, desc, ""); + sidkey = request_key(&cifs_idmap_key_type, desc, "", + &cifs_idmap_key_acl); if (IS_ERR(sidkey)) { rc = -EINVAL; cifs_dbg(FYI, "%s: Can't map %cid %u to a SID\n", @@ -403,7 +423,8 @@ try_upcall_to_get_id: return -ENOMEM; saved_cred = override_creds(root_cred); - sidkey = request_key(&cifs_idmap_key_type, sidstr, ""); + sidkey = request_key(&cifs_idmap_key_type, sidstr, "", + &cifs_idmap_key_acl); if (IS_ERR(sidkey)) { rc = -EINVAL; cifs_dbg(FYI, "%s: Can't map SID %s to a %cid\n", @@ -481,8 +502,7 @@ init_cifs_idmap(void) keyring = keyring_alloc(".cifs_idmap", GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, cred, - (KEY_POS_ALL & ~KEY_POS_SETATTR) | - KEY_USR_VIEW | KEY_USR_READ, + &cifs_idmap_keyring_acl, KEY_ALLOC_NOT_IN_QUOTA, NULL, NULL); if (IS_ERR(keyring)) { ret = PTR_ERR(keyring); diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index d2a05e46d6f5..97b7497c13ef 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -33,7 +33,8 @@ #include <linux/ctype.h> #include <linux/random.h> #include <linux/highmem.h> -#include <crypto/skcipher.h> +#include <linux/fips.h> +#include <crypto/arc4.h> #include <crypto/aead.h> int __cifs_calc_signature(struct smb_rqst *rqst, @@ -772,63 +773,32 @@ setup_ntlmv2_rsp_ret: int calc_seckey(struct cifs_ses *ses) { - int rc; - struct crypto_skcipher *tfm_arc4; - struct scatterlist sgin, sgout; - struct skcipher_request *req; - unsigned char *sec_key; + unsigned char sec_key[CIFS_SESS_KEY_SIZE]; /* a nonce */ + struct arc4_ctx *ctx_arc4; - sec_key = kmalloc(CIFS_SESS_KEY_SIZE, GFP_KERNEL); - if (sec_key == NULL) - return -ENOMEM; + if (fips_enabled) + return -ENODEV; get_random_bytes(sec_key, CIFS_SESS_KEY_SIZE); - tfm_arc4 = crypto_alloc_skcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(tfm_arc4)) { - rc = PTR_ERR(tfm_arc4); - cifs_dbg(VFS, "could not allocate crypto API arc4\n"); - goto out; - } - - rc = crypto_skcipher_setkey(tfm_arc4, ses->auth_key.response, - CIFS_SESS_KEY_SIZE); - if (rc) { - cifs_dbg(VFS, "%s: Could not set response as a key\n", - __func__); - goto out_free_cipher; - } - - req = skcipher_request_alloc(tfm_arc4, GFP_KERNEL); - if (!req) { - rc = -ENOMEM; - cifs_dbg(VFS, "could not allocate crypto API arc4 request\n"); - goto out_free_cipher; + ctx_arc4 = kmalloc(sizeof(*ctx_arc4), GFP_KERNEL); + if (!ctx_arc4) { + cifs_dbg(VFS, "could not allocate arc4 context\n"); + return -ENOMEM; } - sg_init_one(&sgin, sec_key, CIFS_SESS_KEY_SIZE); - sg_init_one(&sgout, ses->ntlmssp->ciphertext, CIFS_CPHTXT_SIZE); - - skcipher_request_set_callback(req, 0, NULL, NULL); - skcipher_request_set_crypt(req, &sgin, &sgout, CIFS_CPHTXT_SIZE, NULL); - - rc = crypto_skcipher_encrypt(req); - skcipher_request_free(req); - if (rc) { - cifs_dbg(VFS, "could not encrypt session key rc: %d\n", rc); - goto out_free_cipher; - } + arc4_setkey(ctx_arc4, ses->auth_key.response, CIFS_SESS_KEY_SIZE); + arc4_crypt(ctx_arc4, ses->ntlmssp->ciphertext, sec_key, + CIFS_CPHTXT_SIZE); /* make secondary_key/nonce as session key */ memcpy(ses->auth_key.response, sec_key, CIFS_SESS_KEY_SIZE); /* and make len as that of session key only */ ses->auth_key.len = CIFS_SESS_KEY_SIZE; -out_free_cipher: - crypto_free_skcipher(tfm_arc4); -out: - kfree(sec_key); - return rc; + memzero_explicit(sec_key, CIFS_SESS_KEY_SIZE); + kzfree(ctx_arc4); + return 0; } void diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 65d9771e49f9..72db1c89bf5a 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -1591,7 +1591,6 @@ MODULE_DESCRIPTION ("VFS to access SMB3 servers e.g. Samba, Macs, Azure and Windows (and " "also older servers complying with the SNIA CIFS Specification)"); MODULE_VERSION(CIFS_VERSION); -MODULE_SOFTDEP("pre: arc4"); MODULE_SOFTDEP("pre: des"); MODULE_SOFTDEP("pre: ecb"); MODULE_SOFTDEP("pre: hmac"); diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 8dd6637a3cbb..ae6bae2ecb5d 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -2631,7 +2631,7 @@ cifs_put_tcp_session(struct TCP_Server_Info *server, int from_reconnect) task = xchg(&server->tsk, NULL); if (task) - force_sig(SIGKILL, task); + send_sig(SIGKILL, task, 1); } static struct TCP_Server_Info * @@ -2992,7 +2992,7 @@ cifs_set_cifscreds(struct smb_vol *vol, struct cifs_ses *ses) } cifs_dbg(FYI, "%s: desc=%s\n", __func__, desc); - key = request_key(&key_type_logon, desc, ""); + key = request_key(&key_type_logon, desc, "", NULL); if (IS_ERR(key)) { if (!ses->domainName) { cifs_dbg(FYI, "domainName is NULL\n"); @@ -3003,7 +3003,7 @@ cifs_set_cifscreds(struct smb_vol *vol, struct cifs_ses *ses) /* didn't work, try to find a domain key */ sprintf(desc, "cifs:d:%s", ses->domainName); cifs_dbg(FYI, "%s: desc=%s\n", __func__, desc); - key = request_key(&key_type_logon, desc, ""); + key = request_key(&key_type_logon, desc, "", NULL); if (IS_ERR(key)) { rc = PTR_ERR(key); goto out_err; diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c index 1e21b2528cfb..534cbba72789 100644 --- a/fs/cifs/dns_resolve.c +++ b/fs/cifs/dns_resolve.c @@ -77,7 +77,8 @@ dns_resolve_server_name_to_ip(const char *unc, char **ip_addr) goto name_is_IP_address; /* Perform the upcall */ - rc = dns_query(NULL, hostname, len, NULL, ip_addr, NULL, false); + rc = dns_query(current->nsproxy->net_ns, NULL, hostname, len, + NULL, ip_addr, NULL, false); if (rc < 0) cifs_dbg(FYI, "%s: unable to resolve: %*.*s\n", __func__, len, len, hostname); diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 3fdc6a41b304..9fd56b0acd7e 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -2372,6 +2372,41 @@ smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses, kfree(dfs_rsp); return rc; } + +static int +parse_reparse_symlink(struct reparse_symlink_data_buffer *symlink_buf, + u32 plen, char **target_path, + struct cifs_sb_info *cifs_sb) +{ + unsigned int sub_len; + unsigned int sub_offset; + + /* We only handle Symbolic Link : MS-FSCC 2.1.2.4 */ + if (le32_to_cpu(symlink_buf->ReparseTag) != IO_REPARSE_TAG_SYMLINK) { + cifs_dbg(VFS, "srv returned invalid symlink buffer\n"); + return -EIO; + } + + sub_offset = le16_to_cpu(symlink_buf->SubstituteNameOffset); + sub_len = le16_to_cpu(symlink_buf->SubstituteNameLength); + if (sub_offset + 20 > plen || + sub_offset + sub_len + 20 > plen) { + cifs_dbg(VFS, "srv returned malformed symlink buffer\n"); + return -EIO; + } + + *target_path = cifs_strndup_from_utf16( + symlink_buf->PathBuffer + sub_offset, + sub_len, true, cifs_sb->local_nls); + if (!(*target_path)) + return -ENOMEM; + + convert_delimiter(*target_path, '/'); + cifs_dbg(FYI, "%s: target path: %s\n", __func__, *target_path); + + return 0; +} + #define SMB2_SYMLINK_STRUCT_SIZE \ (sizeof(struct smb2_err_rsp) - 1 + sizeof(struct smb2_symlink_err_rsp)) @@ -2401,11 +2436,13 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon, struct kvec close_iov[1]; struct smb2_create_rsp *create_rsp; struct smb2_ioctl_rsp *ioctl_rsp; - char *ioctl_buf; + struct reparse_data_buffer *reparse_buf; u32 plen; cifs_dbg(FYI, "%s: path: %s\n", __func__, full_path); + *target_path = NULL; + if (smb3_encryption_required(tcon)) flags |= CIFS_TRANSFORM_REQ; @@ -2483,17 +2520,36 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon, if ((rc == 0) && (is_reparse_point)) { /* See MS-FSCC 2.3.23 */ - ioctl_buf = (char *)ioctl_rsp + le32_to_cpu(ioctl_rsp->OutputOffset); + reparse_buf = (struct reparse_data_buffer *) + ((char *)ioctl_rsp + + le32_to_cpu(ioctl_rsp->OutputOffset)); plen = le32_to_cpu(ioctl_rsp->OutputCount); if (plen + le32_to_cpu(ioctl_rsp->OutputOffset) > rsp_iov[1].iov_len) { - cifs_dbg(VFS, "srv returned invalid ioctl length: %d\n", plen); + cifs_dbg(VFS, "srv returned invalid ioctl len: %d\n", + plen); + rc = -EIO; + goto querty_exit; + } + + if (plen < 8) { + cifs_dbg(VFS, "reparse buffer is too small. Must be " + "at least 8 bytes but was %d\n", plen); + rc = -EIO; + goto querty_exit; + } + + if (plen < le16_to_cpu(reparse_buf->ReparseDataLength) + 8) { + cifs_dbg(VFS, "srv returned invalid reparse buf " + "length: %d\n", plen); rc = -EIO; goto querty_exit; } - /* Do stuff with ioctl_buf/plen */ + rc = parse_reparse_symlink( + (struct reparse_symlink_data_buffer *)reparse_buf, + plen, target_path, cifs_sb); goto querty_exit; } diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index c7d5813bebd8..858353d20c39 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -914,7 +914,19 @@ struct reparse_mount_point_data_buffer { __u8 PathBuffer[0]; /* Variable Length */ } __packed; -/* See MS-FSCC 2.1.2.4 and cifspdu.h for struct reparse_symlink_data */ +#define SYMLINK_FLAG_RELATIVE 0x00000001 + +struct reparse_symlink_data_buffer { + __le32 ReparseTag; + __le16 ReparseDataLength; + __u16 Reserved; + __le16 SubstituteNameOffset; + __le16 SubstituteNameLength; + __le16 PrintNameOffset; + __le16 PrintNameLength; + __le32 Flags; + __u8 PathBuffer[0]; /* Variable Length */ +} __packed; /* See MS-FSCC 2.1.2.6 and cifspdu.h for struct reparse_posix_data */ diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c index dcd91a3fbe49..4f85af8ab239 100644 --- a/fs/crypto/keyinfo.c +++ b/fs/crypto/keyinfo.c @@ -92,7 +92,7 @@ find_and_lock_process_key(const char *prefix, if (!description) return ERR_PTR(-ENOMEM); - key = request_key(&key_type_logon, description, NULL); + key = request_key(&key_type_logon, description, NULL, NULL); kfree(description); if (IS_ERR(key)) return key; @@ -720,12 +720,11 @@ static void *dax_insert_entry(struct xa_state *xas, xas_reset(xas); xas_lock_irq(xas); - if (dax_entry_size(entry) != dax_entry_size(new_entry)) { + if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) { + void *old; + dax_disassociate_entry(entry, mapping, false); dax_associate_entry(new_entry, mapping, vmf->vma, vmf->address); - } - - if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) { /* * Only swap our new entry into the page cache if the current * entry is a zero page or an empty entry. If a normal PTE or @@ -734,7 +733,7 @@ static void *dax_insert_entry(struct xa_state *xas, * existing entry is a PMD, we will just leave the PMD in the * tree and dirty it if necessary. */ - void *old = dax_lock_entry(xas, new_entry); + old = dax_lock_entry(xas, new_entry); WARN_ON_ONCE(old != xa_mk_value(xa_to_value(entry) | DAX_LOCKED)); entry = new_entry; @@ -1188,7 +1187,7 @@ dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, unsigned flags = 0; if (iov_iter_rw(iter) == WRITE) { - lockdep_assert_held_exclusive(&inode->i_rwsem); + lockdep_assert_held_write(&inode->i_rwsem); flags |= IOMAP_WRITE; } else { lockdep_assert_held(&inode->i_rwsem); diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index 1c1a56be7ea2..67844fe41a61 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -91,7 +91,7 @@ ecryptfs_get_encrypted_key_payload_data(struct key *key) static inline struct key *ecryptfs_get_encrypted_key(char *sig) { - return request_key(&key_type_encrypted, sig, NULL); + return request_key(&key_type_encrypted, sig, NULL, NULL); } #else diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index 9536e592e25a..ba382f135918 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c @@ -1610,7 +1610,7 @@ int ecryptfs_keyring_auth_tok_for_sig(struct key **auth_tok_key, { int rc = 0; - (*auth_tok_key) = request_key(&key_type_user, sig, NULL); + (*auth_tok_key) = request_key(&key_type_user, sig, NULL, NULL); if (!(*auth_tok_key) || IS_ERR(*auth_tok_key)) { (*auth_tok_key) = ecryptfs_get_encrypted_key(sig); if (!(*auth_tok_key) || IS_ERR(*auth_tok_key)) { diff --git a/fs/eventpoll.c b/fs/eventpoll.c index c6f513100cc9..4c74c768ae43 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -2325,7 +2325,7 @@ SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events, error = do_epoll_wait(epfd, events, maxevents, timeout); - restore_user_sigmask(sigmask, &sigsaved); + restore_user_sigmask(sigmask, &sigsaved, error == -EINTR); return error; } @@ -2350,7 +2350,7 @@ COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd, err = do_epoll_wait(epfd, events, maxevents, timeout); - restore_user_sigmask(sigmask, &sigsaved); + restore_user_sigmask(sigmask, &sigsaved, err == -EINTR); return err; } diff --git a/fs/exec.c b/fs/exec.c index 89a500bb897a..c71cbfe6826a 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1663,7 +1663,7 @@ int search_binary_handler(struct linux_binprm *bprm) if (retval < 0 && !bprm->mm) { /* we got to flush_old_exec() and failed after it */ read_unlock(&binfmt_lock); - force_sigsegv(SIGSEGV, current); + force_sigsegv(SIGSEGV); return retval; } if (retval != -ENOEXEC || !bprm->file) { diff --git a/fs/fscache/object-list.c b/fs/fscache/object-list.c index 72ebfe578f40..67b7bda5647a 100644 --- a/fs/fscache/object-list.c +++ b/fs/fscache/object-list.c @@ -317,7 +317,7 @@ static void fscache_objlist_config(struct fscache_objlist_data *data) const char *buf; int len; - key = request_key(&key_type_user, "fscache:objlist", NULL); + key = request_key(&key_type_user, "fscache:objlist", NULL, NULL); if (IS_ERR(key)) goto no_config; diff --git a/fs/inode.c b/fs/inode.c index df6542ec3b88..2bf21e2c90fc 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -362,7 +362,7 @@ EXPORT_SYMBOL(inc_nlink); static void __address_space_init_once(struct address_space *mapping) { - xa_init_flags(&mapping->i_pages, XA_FLAGS_LOCK_IRQ); + xa_init_flags(&mapping->i_pages, XA_FLAGS_LOCK_IRQ | XA_FLAGS_ACCOUNT); init_rwsem(&mapping->i_mmap_rwsem); INIT_LIST_HEAD(&mapping->private_list); spin_lock_init(&mapping->private_lock); diff --git a/fs/io_uring.c b/fs/io_uring.c index eb6ab1507913..4ed4b110a154 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -579,6 +579,7 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx, state->cur_req++; } + req->file = NULL; req->ctx = ctx; req->flags = 0; /* one is dropped after submission, the other at completion */ @@ -1798,10 +1799,8 @@ static int io_req_set_file(struct io_ring_ctx *ctx, const struct sqe_submit *s, req->sequence = ctx->cached_sq_head - 1; } - if (!io_op_needs_file(s->sqe)) { - req->file = NULL; + if (!io_op_needs_file(s->sqe)) return 0; - } if (flags & IOSQE_FIXED_FILE) { if (unlikely(!ctx->user_files || @@ -2198,11 +2197,12 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, } ret = wait_event_interruptible(ctx->wait, io_cqring_events(ring) >= min_events); - if (ret == -ERESTARTSYS) - ret = -EINTR; if (sig) - restore_user_sigmask(sig, &sigsaved); + restore_user_sigmask(sig, &sigsaved, ret == -ERESTARTSYS); + + if (ret == -ERESTARTSYS) + ret = -EINTR; return READ_ONCE(ring->r.head) == READ_ONCE(ring->r.tail) ? ret : 0; } diff --git a/fs/namespace.c b/fs/namespace.c index 7660c2749c96..6fbc9126367a 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2596,11 +2596,12 @@ static int do_move_mount(struct path *old_path, struct path *new_path) if (!check_mnt(p)) goto out; - /* The thing moved should be either ours or completely unattached. */ - if (attached && !check_mnt(old)) + /* The thing moved must be mounted... */ + if (!is_mounted(&old->mnt)) goto out; - if (!attached && !(ns && is_anon_ns(ns))) + /* ... and either ours or the root of anon namespace */ + if (!(attached ? check_mnt(old) : is_anon_ns(ns))) goto out; if (old->mnt.mnt_flags & MNT_LOCKED) diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c index e6a700f01452..aec769a500a1 100644 --- a/fs/nfs/dns_resolve.c +++ b/fs/nfs/dns_resolve.c @@ -22,7 +22,8 @@ ssize_t nfs_dns_resolve_name(struct net *net, char *name, size_t namelen, char *ip_addr = NULL; int ip_len; - ip_len = dns_query(NULL, name, namelen, NULL, &ip_addr, NULL, false); + ip_len = dns_query(net, NULL, name, namelen, NULL, &ip_addr, NULL, + false); if (ip_len > 0) ret = rpc_pton(net, ip_addr, ip_len, sa, salen); else diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index a809989807d6..19f856f45689 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -18,7 +18,7 @@ #define NFSDBG_FACILITY NFSDBG_PNFS_LD -static unsigned int dataserver_timeo = NFS_DEF_TCP_RETRANS; +static unsigned int dataserver_timeo = NFS_DEF_TCP_TIMEO; static unsigned int dataserver_retrans; static bool ff_layout_has_available_ds(struct pnfs_layout_segment *lseg); diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c index 4884fdae28fb..69679f4f2e6c 100644 --- a/fs/nfs/nfs4idmap.c +++ b/fs/nfs/nfs4idmap.c @@ -72,6 +72,25 @@ struct idmap { const struct cred *cred; }; +static struct key_acl nfs_idmap_key_acl = { + .usage = REFCOUNT_INIT(1), + .nr_ace = 2, + .possessor_viewable = true, + .aces = { + KEY_POSSESSOR_ACE(KEY_ACE_VIEW | KEY_ACE_SEARCH | KEY_ACE_READ), + KEY_OWNER_ACE(KEY_ACE_VIEW), + } +}; + +static struct key_acl nfs_idmap_keyring_acl = { + .usage = REFCOUNT_INIT(1), + .nr_ace = 2, + .aces = { + KEY_POSSESSOR_ACE(KEY_ACE_SEARCH | KEY_ACE_WRITE), + KEY_OWNER_ACE(KEY_ACE_VIEW | KEY_ACE_READ), + } +}; + static struct user_namespace *idmap_userns(const struct idmap *idmap) { if (idmap && idmap->cred) @@ -208,8 +227,7 @@ int nfs_idmap_init(void) keyring = keyring_alloc(".id_resolver", GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, cred, - (KEY_POS_ALL & ~KEY_POS_SETATTR) | - KEY_USR_VIEW | KEY_USR_READ, + &nfs_idmap_keyring_acl, KEY_ALLOC_NOT_IN_QUOTA, NULL, NULL); if (IS_ERR(keyring)) { ret = PTR_ERR(keyring); @@ -287,11 +305,13 @@ static struct key *nfs_idmap_request_key(const char *name, size_t namelen, return ERR_PTR(ret); if (!idmap->cred || idmap->cred->user_ns == &init_user_ns) - rkey = request_key(&key_type_id_resolver, desc, ""); + rkey = request_key(&key_type_id_resolver, desc, "", + &nfs_idmap_key_acl); if (IS_ERR(rkey)) { mutex_lock(&idmap->idmap_mutex); rkey = request_key_with_auxdata(&key_type_id_resolver_legacy, - desc, "", 0, idmap); + desc, NULL, "", 0, idmap, + &nfs_idmap_key_acl); mutex_unlock(&idmap->idmap_mutex); } if (!IS_ERR(rkey)) @@ -320,8 +340,6 @@ static ssize_t nfs_idmap_get_key(const char *name, size_t namelen, } rcu_read_lock(); - rkey->perm |= KEY_USR_VIEW; - ret = key_validate(rkey); if (ret < 0) goto out_up; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 618e66078ee5..1a0cdeb3b875 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1563,7 +1563,7 @@ static u32 nfsd4_get_drc_mem(struct nfsd4_channel_attrs *ca) * Never use more than a third of the remaining memory, * unless it's the only way to give this client a slot: */ - avail = clamp_t(int, avail, slotsize, total_avail/3); + avail = clamp_t(unsigned long, avail, slotsize, total_avail/3); num = min_t(int, num, avail / slotsize); nfsd_drc_mem_used += num * slotsize; spin_unlock(&nfsd_drc_lock); diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig index 62ee41b4bbd0..4c3dcb718961 100644 --- a/fs/proc/Kconfig +++ b/fs/proc/Kconfig @@ -98,3 +98,7 @@ config PROC_CHILDREN Say Y if you are running any user-space software which takes benefit from this interface. For example, rkt is such a piece of software. + +config PROC_PID_ARCH_STATUS + def_bool n + depends on PROC_FS diff --git a/fs/proc/array.c b/fs/proc/array.c index 2edbb657f859..46dcb6f0eccf 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -381,9 +381,9 @@ static inline void task_context_switch_counts(struct seq_file *m, static void task_cpus_allowed(struct seq_file *m, struct task_struct *task) { seq_printf(m, "Cpus_allowed:\t%*pb\n", - cpumask_pr_args(&task->cpus_allowed)); + cpumask_pr_args(task->cpus_ptr)); seq_printf(m, "Cpus_allowed_list:\t%*pbl\n", - cpumask_pr_args(&task->cpus_allowed)); + cpumask_pr_args(task->cpus_ptr)); } static inline void task_core_dumping(struct seq_file *m, struct mm_struct *mm) @@ -462,7 +462,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, * a program is not able to use ptrace(2) in that case. It is * safe because the task has stopped executing permanently. */ - if (permitted && (task->flags & PF_DUMPCORE)) { + if (permitted && (task->flags & (PF_EXITING|PF_DUMPCORE))) { if (try_get_task_stack(task)) { eip = KSTK_EIP(task); esp = KSTK_ESP(task); diff --git a/fs/proc/base.c b/fs/proc/base.c index 9c8ca6cd3ce4..c40fca98f2b7 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -3061,6 +3061,9 @@ static const struct pid_entry tgid_base_stuff[] = { #ifdef CONFIG_STACKLEAK_METRICS ONE("stack_depth", S_IRUGO, proc_stack_depth), #endif +#ifdef CONFIG_PROC_PID_ARCH_STATUS + ONE("arch_status", S_IRUGO, proc_pid_arch_status), +#endif }; static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx) @@ -3077,8 +3080,7 @@ static const struct file_operations proc_tgid_base_operations = { struct pid *tgid_pidfd_to_pid(const struct file *file) { - if (!d_is_dir(file->f_path.dentry) || - (file->f_op != &proc_tgid_base_operations)) + if (file->f_op != &proc_tgid_base_operations) return ERR_PTR(-EBADF); return proc_pid(file_inode(file)); @@ -3449,6 +3451,9 @@ static const struct pid_entry tid_base_stuff[] = { #ifdef CONFIG_LIVEPATCH ONE("patch_state", S_IRUSR, proc_pid_patch_state), #endif +#ifdef CONFIG_PROC_PID_ARCH_STATUS + ONE("arch_status", S_IRUGO, proc_pid_arch_status), +#endif }; static int proc_tid_base_readdir(struct file *file, struct dir_context *ctx) diff --git a/fs/select.c b/fs/select.c index 6cbc9ff56ba0..a4d8f6e8b63c 100644 --- a/fs/select.c +++ b/fs/select.c @@ -758,10 +758,9 @@ static long do_pselect(int n, fd_set __user *inp, fd_set __user *outp, return ret; ret = core_sys_select(n, inp, outp, exp, to); + restore_user_sigmask(sigmask, &sigsaved, ret == -ERESTARTNOHAND); ret = poll_select_copy_remaining(&end_time, tsp, type, ret); - restore_user_sigmask(sigmask, &sigsaved); - return ret; } @@ -1106,8 +1105,7 @@ SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, unsigned int, nfds, ret = do_sys_poll(ufds, nfds, to); - restore_user_sigmask(sigmask, &sigsaved); - + restore_user_sigmask(sigmask, &sigsaved, ret == -EINTR); /* We can restart this syscall, usually */ if (ret == -EINTR) ret = -ERESTARTNOHAND; @@ -1142,8 +1140,7 @@ SYSCALL_DEFINE5(ppoll_time32, struct pollfd __user *, ufds, unsigned int, nfds, ret = do_sys_poll(ufds, nfds, to); - restore_user_sigmask(sigmask, &sigsaved); - + restore_user_sigmask(sigmask, &sigsaved, ret == -EINTR); /* We can restart this syscall, usually */ if (ret == -EINTR) ret = -ERESTARTNOHAND; @@ -1350,10 +1347,9 @@ static long do_compat_pselect(int n, compat_ulong_t __user *inp, return ret; ret = compat_core_sys_select(n, inp, outp, exp, to); + restore_user_sigmask(sigmask, &sigsaved, ret == -ERESTARTNOHAND); ret = poll_select_copy_remaining(&end_time, tsp, type, ret); - restore_user_sigmask(sigmask, &sigsaved); - return ret; } @@ -1425,8 +1421,7 @@ COMPAT_SYSCALL_DEFINE5(ppoll_time32, struct pollfd __user *, ufds, ret = do_sys_poll(ufds, nfds, to); - restore_user_sigmask(sigmask, &sigsaved); - + restore_user_sigmask(sigmask, &sigsaved, ret == -EINTR); /* We can restart this syscall, usually */ if (ret == -EINTR) ret = -ERESTARTNOHAND; @@ -1461,8 +1456,7 @@ COMPAT_SYSCALL_DEFINE5(ppoll_time64, struct pollfd __user *, ufds, ret = do_sys_poll(ufds, nfds, to); - restore_user_sigmask(sigmask, &sigsaved); - + restore_user_sigmask(sigmask, &sigsaved, ret == -EINTR); /* We can restart this syscall, usually */ if (ret == -EINTR) ret = -ERESTARTNOHAND; diff --git a/fs/ubifs/auth.c b/fs/ubifs/auth.c index 60f43b93d06e..38718026ad0b 100644 --- a/fs/ubifs/auth.c +++ b/fs/ubifs/auth.c @@ -227,7 +227,7 @@ int ubifs_init_authentication(struct ubifs_info *c) snprintf(hmac_name, CRYPTO_MAX_ALG_NAME, "hmac(%s)", c->auth_hash_name); - keyring_key = request_key(&key_type_logon, c->auth_key_name, NULL); + keyring_key = request_key(&key_type_logon, c->auth_key_name, NULL, NULL); if (IS_ERR(keyring_key)) { ubifs_err(c, "Failed to request key: %ld", diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index ae0b8b5f69e6..ccbdbd62f0d8 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -40,6 +40,16 @@ enum userfaultfd_state { /* * Start with fault_pending_wqh and fault_wqh so they're more likely * to be in the same cacheline. + * + * Locking order: + * fd_wqh.lock + * fault_pending_wqh.lock + * fault_wqh.lock + * event_wqh.lock + * + * To avoid deadlocks, IRQs must be disabled when taking any of the above locks, + * since fd_wqh.lock is taken by aio_poll() while it's holding a lock that's + * also taken in IRQ context. */ struct userfaultfd_ctx { /* waitqueue head for the pending (i.e. not read) userfaults */ @@ -458,7 +468,7 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason) blocking_state = return_to_userland ? TASK_INTERRUPTIBLE : TASK_KILLABLE; - spin_lock(&ctx->fault_pending_wqh.lock); + spin_lock_irq(&ctx->fault_pending_wqh.lock); /* * After the __add_wait_queue the uwq is visible to userland * through poll/read(). @@ -470,7 +480,7 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason) * __add_wait_queue. */ set_current_state(blocking_state); - spin_unlock(&ctx->fault_pending_wqh.lock); + spin_unlock_irq(&ctx->fault_pending_wqh.lock); if (!is_vm_hugetlb_page(vmf->vma)) must_wait = userfaultfd_must_wait(ctx, vmf->address, vmf->flags, @@ -552,13 +562,13 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason) * kernel stack can be released after the list_del_init. */ if (!list_empty_careful(&uwq.wq.entry)) { - spin_lock(&ctx->fault_pending_wqh.lock); + spin_lock_irq(&ctx->fault_pending_wqh.lock); /* * No need of list_del_init(), the uwq on the stack * will be freed shortly anyway. */ list_del(&uwq.wq.entry); - spin_unlock(&ctx->fault_pending_wqh.lock); + spin_unlock_irq(&ctx->fault_pending_wqh.lock); } /* @@ -583,7 +593,7 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx, init_waitqueue_entry(&ewq->wq, current); release_new_ctx = NULL; - spin_lock(&ctx->event_wqh.lock); + spin_lock_irq(&ctx->event_wqh.lock); /* * After the __add_wait_queue the uwq is visible to userland * through poll/read(). @@ -613,15 +623,15 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx, break; } - spin_unlock(&ctx->event_wqh.lock); + spin_unlock_irq(&ctx->event_wqh.lock); wake_up_poll(&ctx->fd_wqh, EPOLLIN); schedule(); - spin_lock(&ctx->event_wqh.lock); + spin_lock_irq(&ctx->event_wqh.lock); } __set_current_state(TASK_RUNNING); - spin_unlock(&ctx->event_wqh.lock); + spin_unlock_irq(&ctx->event_wqh.lock); if (release_new_ctx) { struct vm_area_struct *vma; @@ -918,10 +928,10 @@ wakeup: * the last page faults that may have been already waiting on * the fault_*wqh. */ - spin_lock(&ctx->fault_pending_wqh.lock); + spin_lock_irq(&ctx->fault_pending_wqh.lock); __wake_up_locked_key(&ctx->fault_pending_wqh, TASK_NORMAL, &range); __wake_up(&ctx->fault_wqh, TASK_NORMAL, 1, &range); - spin_unlock(&ctx->fault_pending_wqh.lock); + spin_unlock_irq(&ctx->fault_pending_wqh.lock); /* Flush pending events that may still wait on event_wqh */ wake_up_all(&ctx->event_wqh); @@ -1134,7 +1144,7 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait, if (!ret && msg->event == UFFD_EVENT_FORK) { ret = resolve_userfault_fork(ctx, fork_nctx, msg); - spin_lock(&ctx->event_wqh.lock); + spin_lock_irq(&ctx->event_wqh.lock); if (!list_empty(&fork_event)) { /* * The fork thread didn't abort, so we can @@ -1180,7 +1190,7 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait, if (ret) userfaultfd_ctx_put(fork_nctx); } - spin_unlock(&ctx->event_wqh.lock); + spin_unlock_irq(&ctx->event_wqh.lock); } return ret; @@ -1219,14 +1229,14 @@ static ssize_t userfaultfd_read(struct file *file, char __user *buf, static void __wake_userfault(struct userfaultfd_ctx *ctx, struct userfaultfd_wake_range *range) { - spin_lock(&ctx->fault_pending_wqh.lock); + spin_lock_irq(&ctx->fault_pending_wqh.lock); /* wake all in the range and autoremove */ if (waitqueue_active(&ctx->fault_pending_wqh)) __wake_up_locked_key(&ctx->fault_pending_wqh, TASK_NORMAL, range); if (waitqueue_active(&ctx->fault_wqh)) __wake_up(&ctx->fault_wqh, TASK_NORMAL, 1, range); - spin_unlock(&ctx->fault_pending_wqh.lock); + spin_unlock_irq(&ctx->fault_pending_wqh.lock); } static __always_inline void wake_userfault(struct userfaultfd_ctx *ctx, @@ -1881,7 +1891,7 @@ static void userfaultfd_show_fdinfo(struct seq_file *m, struct file *f) wait_queue_entry_t *wq; unsigned long pending = 0, total = 0; - spin_lock(&ctx->fault_pending_wqh.lock); + spin_lock_irq(&ctx->fault_pending_wqh.lock); list_for_each_entry(wq, &ctx->fault_pending_wqh.head, entry) { pending++; total++; @@ -1889,7 +1899,7 @@ static void userfaultfd_show_fdinfo(struct seq_file *m, struct file *f) list_for_each_entry(wq, &ctx->fault_wqh.head, entry) { total++; } - spin_unlock(&ctx->fault_pending_wqh.lock); + spin_unlock_irq(&ctx->fault_pending_wqh.lock); /* * If more protocols will be added, there will be all shown |