diff options
Diffstat (limited to 'fs')
69 files changed, 1437 insertions, 884 deletions
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 9e60fd201716..a7528b913936 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -108,7 +108,7 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs) Node *fmt; struct file * interp_file = NULL; char iname[BINPRM_BUF_SIZE]; - char *iname_addr = iname; + const char *iname_addr = iname; int retval; int fd_binary = -1; diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c index aca9d55afb22..396a9884591f 100644 --- a/fs/binfmt_script.c +++ b/fs/binfmt_script.c @@ -16,7 +16,8 @@ static int load_script(struct linux_binprm *bprm,struct pt_regs *regs) { - char *cp, *i_name, *i_arg; + const char *i_arg, *i_name; + char *cp; struct file *file; char interp[BINPRM_BUF_SIZE]; int retval; diff --git a/fs/buffer.c b/fs/buffer.c index 50efa339e051..3e7dca279d1c 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -770,11 +770,12 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list) spin_unlock(lock); /* * Ensure any pending I/O completes so that - * ll_rw_block() actually writes the current - * contents - it is a noop if I/O is still in - * flight on potentially older contents. + * write_dirty_buffer() actually writes the + * current contents - it is a noop if I/O is + * still in flight on potentially older + * contents. */ - ll_rw_block(SWRITE_SYNC_PLUG, 1, &bh); + write_dirty_buffer(bh, WRITE_SYNC_PLUG); /* * Kick off IO for the previous mapping. Note @@ -2912,13 +2913,6 @@ int submit_bh(int rw, struct buffer_head * bh) BUG_ON(buffer_unwritten(bh)); /* - * Mask in barrier bit for a write (could be either a WRITE or a - * WRITE_SYNC - */ - if (buffer_ordered(bh) && (rw & WRITE)) - rw |= WRITE_BARRIER; - - /* * Only clear out a write error when rewriting */ if (test_set_buffer_req(bh) && (rw & WRITE)) @@ -2956,22 +2950,21 @@ EXPORT_SYMBOL(submit_bh); /** * ll_rw_block: low-level access to block devices (DEPRECATED) - * @rw: whether to %READ or %WRITE or %SWRITE or maybe %READA (readahead) + * @rw: whether to %READ or %WRITE or maybe %READA (readahead) * @nr: number of &struct buffer_heads in the array * @bhs: array of pointers to &struct buffer_head * * ll_rw_block() takes an array of pointers to &struct buffer_heads, and * requests an I/O operation on them, either a %READ or a %WRITE. The third - * %SWRITE is like %WRITE only we make sure that the *current* data in buffers - * are sent to disk. The fourth %READA option is described in the documentation - * for generic_make_request() which ll_rw_block() calls. + * %READA option is described in the documentation for generic_make_request() + * which ll_rw_block() calls. * * This function drops any buffer that it cannot get a lock on (with the - * BH_Lock state bit) unless SWRITE is required, any buffer that appears to be - * clean when doing a write request, and any buffer that appears to be - * up-to-date when doing read request. Further it marks as clean buffers that - * are processed for writing (the buffer cache won't assume that they are - * actually clean until the buffer gets unlocked). + * BH_Lock state bit), any buffer that appears to be clean when doing a write + * request, and any buffer that appears to be up-to-date when doing read + * request. Further it marks as clean buffers that are processed for + * writing (the buffer cache won't assume that they are actually clean + * until the buffer gets unlocked). * * ll_rw_block sets b_end_io to simple completion handler that marks * the buffer up-to-date (if approriate), unlocks the buffer and wakes @@ -2987,20 +2980,13 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[]) for (i = 0; i < nr; i++) { struct buffer_head *bh = bhs[i]; - if (rw == SWRITE || rw == SWRITE_SYNC || rw == SWRITE_SYNC_PLUG) - lock_buffer(bh); - else if (!trylock_buffer(bh)) + if (!trylock_buffer(bh)) continue; - - if (rw == WRITE || rw == SWRITE || rw == SWRITE_SYNC || - rw == SWRITE_SYNC_PLUG) { + if (rw == WRITE) { if (test_clear_buffer_dirty(bh)) { bh->b_end_io = end_buffer_write_sync; get_bh(bh); - if (rw == SWRITE_SYNC) - submit_bh(WRITE_SYNC, bh); - else - submit_bh(WRITE, bh); + submit_bh(WRITE, bh); continue; } } else { @@ -3016,12 +3002,25 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[]) } EXPORT_SYMBOL(ll_rw_block); +void write_dirty_buffer(struct buffer_head *bh, int rw) +{ + lock_buffer(bh); + if (!test_clear_buffer_dirty(bh)) { + unlock_buffer(bh); + return; + } + bh->b_end_io = end_buffer_write_sync; + get_bh(bh); + submit_bh(rw, bh); +} +EXPORT_SYMBOL(write_dirty_buffer); + /* * For a data-integrity writeout, we need to wait upon any in-progress I/O * and then start new I/O and then wait upon it. The caller must have a ref on * the buffer_head. */ -int sync_dirty_buffer(struct buffer_head *bh) +int __sync_dirty_buffer(struct buffer_head *bh, int rw) { int ret = 0; @@ -3030,7 +3029,7 @@ int sync_dirty_buffer(struct buffer_head *bh) if (test_clear_buffer_dirty(bh)) { get_bh(bh); bh->b_end_io = end_buffer_write_sync; - ret = submit_bh(WRITE_SYNC, bh); + ret = submit_bh(rw, bh); wait_on_buffer(bh); if (buffer_eopnotsupp(bh)) { clear_buffer_eopnotsupp(bh); @@ -3043,6 +3042,12 @@ int sync_dirty_buffer(struct buffer_head *bh) } return ret; } +EXPORT_SYMBOL(__sync_dirty_buffer); + +int sync_dirty_buffer(struct buffer_head *bh) +{ + return __sync_dirty_buffer(bh, WRITE_SYNC); +} EXPORT_SYMBOL(sync_dirty_buffer); /* diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig index 917b7d449bb2..0da1debd499d 100644 --- a/fs/cifs/Kconfig +++ b/fs/cifs/Kconfig @@ -2,6 +2,8 @@ config CIFS tristate "CIFS support (advanced network filesystem, SMBFS successor)" depends on INET select NLS + select CRYPTO_MD5 + select CRYPTO_ARC4 help This is the client VFS module for the Common Internet File System (CIFS) protocol which is the successor to the Server Message Block diff --git a/fs/cifs/asn1.c b/fs/cifs/asn1.c index cfd1ce34e0bc..21f0fbd86989 100644 --- a/fs/cifs/asn1.c +++ b/fs/cifs/asn1.c @@ -597,13 +597,13 @@ decode_negTokenInit(unsigned char *security_blob, int length, if (compare_oid(oid, oidlen, MSKRB5_OID, MSKRB5_OID_LEN)) server->sec_mskerberos = true; - else if (compare_oid(oid, oidlen, KRB5U2U_OID, + if (compare_oid(oid, oidlen, KRB5U2U_OID, KRB5U2U_OID_LEN)) server->sec_kerberosu2u = true; - else if (compare_oid(oid, oidlen, KRB5_OID, + if (compare_oid(oid, oidlen, KRB5_OID, KRB5_OID_LEN)) server->sec_kerberos = true; - else if (compare_oid(oid, oidlen, NTLMSSP_OID, + if (compare_oid(oid, oidlen, NTLMSSP_OID, NTLMSSP_OID_LEN)) server->sec_ntlmssp = true; diff --git a/fs/cifs/cifs_unicode.h b/fs/cifs/cifs_unicode.h index 650638275a6f..7fe6b52df507 100644 --- a/fs/cifs/cifs_unicode.h +++ b/fs/cifs/cifs_unicode.h @@ -30,6 +30,8 @@ * This is a compressed table of upper and lower case conversion. * */ +#ifndef _CIFS_UNICODE_H +#define _CIFS_UNICODE_H #include <asm/byteorder.h> #include <linux/types.h> @@ -67,8 +69,8 @@ extern const struct UniCaseRange CifsUniUpperRange[]; #endif /* UNIUPR_NOUPPER */ #ifndef UNIUPR_NOLOWER -extern signed char UniLowerTable[512]; -extern struct UniCaseRange UniLowerRange[]; +extern signed char CifsUniLowerTable[512]; +extern const struct UniCaseRange CifsUniLowerRange[]; #endif /* UNIUPR_NOLOWER */ #ifdef __KERNEL__ @@ -337,15 +339,15 @@ UniStrupr(register wchar_t *upin) * UniTolower: Convert a unicode character to lower case */ static inline wchar_t -UniTolower(wchar_t uc) +UniTolower(register wchar_t uc) { - register struct UniCaseRange *rp; + register const struct UniCaseRange *rp; - if (uc < sizeof(UniLowerTable)) { + if (uc < sizeof(CifsUniLowerTable)) { /* Latin characters */ - return uc + UniLowerTable[uc]; /* Use base tables */ + return uc + CifsUniLowerTable[uc]; /* Use base tables */ } else { - rp = UniLowerRange; /* Use range tables */ + rp = CifsUniLowerRange; /* Use range tables */ while (rp->start) { if (uc < rp->start) /* Before start of range */ return uc; /* Uppercase = input */ @@ -374,3 +376,5 @@ UniStrlwr(register wchar_t *upin) } #endif + +#endif /* _CIFS_UNICODE_H */ diff --git a/fs/cifs/cifs_uniupr.h b/fs/cifs/cifs_uniupr.h index 18a9d978e519..0ac7c5a8633a 100644 --- a/fs/cifs/cifs_uniupr.h +++ b/fs/cifs/cifs_uniupr.h @@ -140,7 +140,7 @@ const struct UniCaseRange CifsUniUpperRange[] = { /* * Latin lower case */ -static signed char CifsUniLowerTable[512] = { +signed char CifsUniLowerTable[512] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 000-00f */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 010-01f */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 020-02f */ @@ -242,12 +242,12 @@ static signed char UniCaseRangeLff20[27] = { /* * Lower Case Range */ -static const struct UniCaseRange CifsUniLowerRange[] = { - 0x0380, 0x03ab, UniCaseRangeL0380, - 0x0400, 0x042f, UniCaseRangeL0400, - 0x0490, 0x04cb, UniCaseRangeL0490, - 0x1e00, 0x1ff7, UniCaseRangeL1e00, - 0xff20, 0xff3a, UniCaseRangeLff20, - 0, 0, 0 +const struct UniCaseRange CifsUniLowerRange[] = { + {0x0380, 0x03ab, UniCaseRangeL0380}, + {0x0400, 0x042f, UniCaseRangeL0400}, + {0x0490, 0x04cb, UniCaseRangeL0490}, + {0x1e00, 0x1ff7, UniCaseRangeL1e00}, + {0xff20, 0xff3a, UniCaseRangeLff20}, + {0} }; #endif diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index 847628dfdc44..709f2296bdb4 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -27,6 +27,7 @@ #include "md5.h" #include "cifs_unicode.h" #include "cifsproto.h" +#include "ntlmssp.h" #include <linux/ctype.h> #include <linux/random.h> @@ -42,21 +43,43 @@ extern void SMBencrypt(unsigned char *passwd, const unsigned char *c8, unsigned char *p24); static int cifs_calculate_signature(const struct smb_hdr *cifs_pdu, - const struct mac_key *key, char *signature) + struct TCP_Server_Info *server, char *signature) { - struct MD5Context context; + int rc; - if ((cifs_pdu == NULL) || (signature == NULL) || (key == NULL)) + if (cifs_pdu == NULL || server == NULL || signature == NULL) return -EINVAL; - cifs_MD5_init(&context); - cifs_MD5_update(&context, (char *)&key->data, key->len); - cifs_MD5_update(&context, cifs_pdu->Protocol, cifs_pdu->smb_buf_length); + if (!server->ntlmssp.sdescmd5) { + cERROR(1, + "cifs_calculate_signature: can't generate signature\n"); + return -1; + } - cifs_MD5_final(signature, &context); - return 0; + rc = crypto_shash_init(&server->ntlmssp.sdescmd5->shash); + if (rc) { + cERROR(1, "cifs_calculate_signature: oould not init md5\n"); + return rc; + } + + if (server->secType == RawNTLMSSP) + crypto_shash_update(&server->ntlmssp.sdescmd5->shash, + server->session_key.data.ntlmv2.key, + CIFS_NTLMV2_SESSKEY_SIZE); + else + crypto_shash_update(&server->ntlmssp.sdescmd5->shash, + (char *)&server->session_key.data, + server->session_key.len); + + crypto_shash_update(&server->ntlmssp.sdescmd5->shash, + cifs_pdu->Protocol, cifs_pdu->smb_buf_length); + + rc = crypto_shash_final(&server->ntlmssp.sdescmd5->shash, signature); + + return rc; } + int cifs_sign_smb(struct smb_hdr *cifs_pdu, struct TCP_Server_Info *server, __u32 *pexpected_response_sequence_number) { @@ -78,8 +101,7 @@ int cifs_sign_smb(struct smb_hdr *cifs_pdu, struct TCP_Server_Info *server, server->sequence_number++; spin_unlock(&GlobalMid_Lock); - rc = cifs_calculate_signature(cifs_pdu, &server->mac_signing_key, - smb_signature); + rc = cifs_calculate_signature(cifs_pdu, server, smb_signature); if (rc) memset(cifs_pdu->Signature.SecuritySignature, 0, 8); else @@ -89,21 +111,39 @@ int cifs_sign_smb(struct smb_hdr *cifs_pdu, struct TCP_Server_Info *server, } static int cifs_calc_signature2(const struct kvec *iov, int n_vec, - const struct mac_key *key, char *signature) + struct TCP_Server_Info *server, char *signature) { - struct MD5Context context; int i; + int rc; - if ((iov == NULL) || (signature == NULL) || (key == NULL)) + if (iov == NULL || server == NULL || signature == NULL) return -EINVAL; - cifs_MD5_init(&context); - cifs_MD5_update(&context, (char *)&key->data, key->len); + if (!server->ntlmssp.sdescmd5) { + cERROR(1, "cifs_calc_signature2: can't generate signature\n"); + return -1; + } + + rc = crypto_shash_init(&server->ntlmssp.sdescmd5->shash); + if (rc) { + cERROR(1, "cifs_calc_signature2: oould not init md5\n"); + return rc; + } + + if (server->secType == RawNTLMSSP) + crypto_shash_update(&server->ntlmssp.sdescmd5->shash, + server->session_key.data.ntlmv2.key, + CIFS_NTLMV2_SESSKEY_SIZE); + else + crypto_shash_update(&server->ntlmssp.sdescmd5->shash, + (char *)&server->session_key.data, + server->session_key.len); + for (i = 0; i < n_vec; i++) { if (iov[i].iov_len == 0) continue; if (iov[i].iov_base == NULL) { - cERROR(1, "null iovec entry"); + cERROR(1, "cifs_calc_signature2: null iovec entry"); return -EIO; } /* The first entry includes a length field (which does not get @@ -111,18 +151,18 @@ static int cifs_calc_signature2(const struct kvec *iov, int n_vec, if (i == 0) { if (iov[0].iov_len <= 8) /* cmd field at offset 9 */ break; /* nothing to sign or corrupt header */ - cifs_MD5_update(&context, iov[0].iov_base+4, - iov[0].iov_len-4); + crypto_shash_update(&server->ntlmssp.sdescmd5->shash, + iov[i].iov_base + 4, iov[i].iov_len - 4); } else - cifs_MD5_update(&context, iov[i].iov_base, iov[i].iov_len); + crypto_shash_update(&server->ntlmssp.sdescmd5->shash, + iov[i].iov_base, iov[i].iov_len); } - cifs_MD5_final(signature, &context); + rc = crypto_shash_final(&server->ntlmssp.sdescmd5->shash, signature); - return 0; + return rc; } - int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server, __u32 *pexpected_response_sequence_number) { @@ -145,8 +185,7 @@ int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server, server->sequence_number++; spin_unlock(&GlobalMid_Lock); - rc = cifs_calc_signature2(iov, n_vec, &server->mac_signing_key, - smb_signature); + rc = cifs_calc_signature2(iov, n_vec, server, smb_signature); if (rc) memset(cifs_pdu->Signature.SecuritySignature, 0, 8); else @@ -156,14 +195,14 @@ int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server, } int cifs_verify_signature(struct smb_hdr *cifs_pdu, - const struct mac_key *mac_key, + struct TCP_Server_Info *server, __u32 expected_sequence_number) { - unsigned int rc; + int rc; char server_response_sig[8]; char what_we_think_sig_should_be[20]; - if ((cifs_pdu == NULL) || (mac_key == NULL)) + if (cifs_pdu == NULL || server == NULL) return -EINVAL; if (cifs_pdu->Command == SMB_COM_NEGOTIATE) @@ -192,7 +231,7 @@ int cifs_verify_signature(struct smb_hdr *cifs_pdu, cpu_to_le32(expected_sequence_number); cifs_pdu->Signature.Sequence.Reserved = 0; - rc = cifs_calculate_signature(cifs_pdu, mac_key, + rc = cifs_calculate_signature(cifs_pdu, server, what_we_think_sig_should_be); if (rc) @@ -209,7 +248,7 @@ int cifs_verify_signature(struct smb_hdr *cifs_pdu, } /* We fill in key by putting in 40 byte array which was allocated by caller */ -int cifs_calculate_mac_key(struct mac_key *key, const char *rn, +int cifs_calculate_session_key(struct session_key *key, const char *rn, const char *password) { char temp_key[16]; @@ -223,63 +262,6 @@ int cifs_calculate_mac_key(struct mac_key *key, const char *rn, return 0; } -int CalcNTLMv2_partial_mac_key(struct cifsSesInfo *ses, - const struct nls_table *nls_info) -{ - char temp_hash[16]; - struct HMACMD5Context ctx; - char *ucase_buf; - __le16 *unicode_buf; - unsigned int i, user_name_len, dom_name_len; - - if (ses == NULL) - return -EINVAL; - - E_md4hash(ses->password, temp_hash); - - hmac_md5_init_limK_to_64(temp_hash, 16, &ctx); - user_name_len = strlen(ses->userName); - if (user_name_len > MAX_USERNAME_SIZE) - return -EINVAL; - if (ses->domainName == NULL) - return -EINVAL; /* BB should we use CIFS_LINUX_DOM */ - dom_name_len = strlen(ses->domainName); - if (dom_name_len > MAX_USERNAME_SIZE) - return -EINVAL; - - ucase_buf = kmalloc((MAX_USERNAME_SIZE+1), GFP_KERNEL); - if (ucase_buf == NULL) - return -ENOMEM; - unicode_buf = kmalloc((MAX_USERNAME_SIZE+1)*4, GFP_KERNEL); - if (unicode_buf == NULL) { - kfree(ucase_buf); - return -ENOMEM; - } - - for (i = 0; i < user_name_len; i++) - ucase_buf[i] = nls_info->charset2upper[(int)ses->userName[i]]; - ucase_buf[i] = 0; - user_name_len = cifs_strtoUCS(unicode_buf, ucase_buf, - MAX_USERNAME_SIZE*2, nls_info); - unicode_buf[user_name_len] = 0; - user_name_len++; - - for (i = 0; i < dom_name_len; i++) - ucase_buf[i] = nls_info->charset2upper[(int)ses->domainName[i]]; - ucase_buf[i] = 0; - dom_name_len = cifs_strtoUCS(unicode_buf+user_name_len, ucase_buf, - MAX_USERNAME_SIZE*2, nls_info); - - unicode_buf[user_name_len + dom_name_len] = 0; - hmac_md5_update((const unsigned char *) unicode_buf, - (user_name_len+dom_name_len)*2, &ctx); - - hmac_md5_final(ses->server->ntlmv2_hash, &ctx); - kfree(ucase_buf); - kfree(unicode_buf); - return 0; -} - #ifdef CONFIG_CIFS_WEAK_PW_HASH void calc_lanman_hash(const char *password, const char *cryptkey, bool encrypt, char *lnm_session_key) @@ -324,38 +306,52 @@ static int calc_ntlmv2_hash(struct cifsSesInfo *ses, { int rc = 0; int len; - char nt_hash[16]; - struct HMACMD5Context *pctxt; + char nt_hash[CIFS_NTHASH_SIZE]; wchar_t *user; wchar_t *domain; + wchar_t *server; - pctxt = kmalloc(sizeof(struct HMACMD5Context), GFP_KERNEL); - - if (pctxt == NULL) - return -ENOMEM; + if (!ses->server->ntlmssp.sdeschmacmd5) { + cERROR(1, "calc_ntlmv2_hash: can't generate ntlmv2 hash\n"); + return -1; + } /* calculate md4 hash of password */ E_md4hash(ses->password, nt_hash); - /* convert Domainname to unicode and uppercase */ - hmac_md5_init_limK_to_64(nt_hash, 16, pctxt); + crypto_shash_setkey(ses->server->ntlmssp.hmacmd5, nt_hash, + CIFS_NTHASH_SIZE); + + rc = crypto_shash_init(&ses->server->ntlmssp.sdeschmacmd5->shash); + if (rc) { + cERROR(1, "calc_ntlmv2_hash: could not init hmacmd5\n"); + return rc; + } /* convert ses->userName to unicode and uppercase */ len = strlen(ses->userName); user = kmalloc(2 + (len * 2), GFP_KERNEL); - if (user == NULL) + if (user == NULL) { + cERROR(1, "calc_ntlmv2_hash: user mem alloc failure\n"); + rc = -ENOMEM; goto calc_exit_2; + } len = cifs_strtoUCS((__le16 *)user, ses->userName, len, nls_cp); UniStrupr(user); - hmac_md5_update((char *)user, 2*len, pctxt); + + crypto_shash_update(&ses->server->ntlmssp.sdeschmacmd5->shash, + (char *)user, 2 * len); /* convert ses->domainName to unicode and uppercase */ if (ses->domainName) { len = strlen(ses->domainName); domain = kmalloc(2 + (len * 2), GFP_KERNEL); - if (domain == NULL) + if (domain == NULL) { + cERROR(1, "calc_ntlmv2_hash: domain mem alloc failure"); + rc = -ENOMEM; goto calc_exit_1; + } len = cifs_strtoUCS((__le16 *)domain, ses->domainName, len, nls_cp); /* the following line was removed since it didn't work well @@ -363,65 +359,292 @@ static int calc_ntlmv2_hash(struct cifsSesInfo *ses, Maybe converting the domain name earlier makes sense */ /* UniStrupr(domain); */ - hmac_md5_update((char *)domain, 2*len, pctxt); + crypto_shash_update(&ses->server->ntlmssp.sdeschmacmd5->shash, + (char *)domain, 2 * len); kfree(domain); + } else if (ses->serverName) { + len = strlen(ses->serverName); + + server = kmalloc(2 + (len * 2), GFP_KERNEL); + if (server == NULL) { + cERROR(1, "calc_ntlmv2_hash: server mem alloc failure"); + rc = -ENOMEM; + goto calc_exit_1; + } + len = cifs_strtoUCS((__le16 *)server, ses->serverName, len, + nls_cp); + /* the following line was removed since it didn't work well + with lower cased domain name that passed as an option. + Maybe converting the domain name earlier makes sense */ + /* UniStrupr(domain); */ + + crypto_shash_update(&ses->server->ntlmssp.sdeschmacmd5->shash, + (char *)server, 2 * len); + + kfree(server); } + + rc = crypto_shash_final(&ses->server->ntlmssp.sdeschmacmd5->shash, + ses->server->ntlmv2_hash); + calc_exit_1: kfree(user); calc_exit_2: /* BB FIXME what about bytes 24 through 40 of the signing key? compare with the NTLM example */ - hmac_md5_final(ses->server->ntlmv2_hash, pctxt); - kfree(pctxt); return rc; } -void setup_ntlmv2_rsp(struct cifsSesInfo *ses, char *resp_buf, - const struct nls_table *nls_cp) +static int +find_domain_name(struct cifsSesInfo *ses) +{ + int rc = 0; + unsigned int attrsize; + unsigned int type; + unsigned char *blobptr; + struct ntlmssp2_name *attrptr; + + if (ses->server->tiblob) { + blobptr = ses->server->tiblob; + attrptr = (struct ntlmssp2_name *) blobptr; + + while ((type = attrptr->type) != 0) { + blobptr += 2; /* advance attr type */ + attrsize = attrptr->length; + blobptr += 2; /* advance attr size */ + if (type == NTLMSSP_AV_NB_DOMAIN_NAME) { + if (!ses->domainName) { + ses->domainName = + kmalloc(attrptr->length + 1, + GFP_KERNEL); + if (!ses->domainName) + return -ENOMEM; + cifs_from_ucs2(ses->domainName, + (__le16 *)blobptr, + attrptr->length, + attrptr->length, + load_nls_default(), false); + } + } + blobptr += attrsize; /* advance attr value */ + attrptr = (struct ntlmssp2_name *) blobptr; + } + } else { + ses->server->tilen = 2 * sizeof(struct ntlmssp2_name); + ses->server->tiblob = kmalloc(ses->server->tilen, GFP_KERNEL); + if (!ses->server->tiblob) { + ses->server->tilen = 0; + cERROR(1, "Challenge target info allocation failure"); + return -ENOMEM; + } + memset(ses->server->tiblob, 0x0, ses->server->tilen); + attrptr = (struct ntlmssp2_name *) ses->server->tiblob; + attrptr->type = cpu_to_le16(NTLMSSP_DOMAIN_TYPE); + } + + return rc; +} + +static int +CalcNTLMv2_response(const struct TCP_Server_Info *server, + char *v2_session_response) { int rc; + + if (!server->ntlmssp.sdeschmacmd5) { + cERROR(1, "calc_ntlmv2_hash: can't generate ntlmv2 hash\n"); + return -1; + } + + crypto_shash_setkey(server->ntlmssp.hmacmd5, server->ntlmv2_hash, + CIFS_HMAC_MD5_HASH_SIZE); + + rc = crypto_shash_init(&server->ntlmssp.sdeschmacmd5->shash); + if (rc) { + cERROR(1, "CalcNTLMv2_response: could not init hmacmd5"); + return rc; + } + + memcpy(v2_session_response + CIFS_SERVER_CHALLENGE_SIZE, + server->cryptKey, CIFS_SERVER_CHALLENGE_SIZE); + crypto_shash_update(&server->ntlmssp.sdeschmacmd5->shash, + v2_session_response + CIFS_SERVER_CHALLENGE_SIZE, + sizeof(struct ntlmv2_resp) - CIFS_SERVER_CHALLENGE_SIZE); + + if (server->tilen) + crypto_shash_update(&server->ntlmssp.sdeschmacmd5->shash, + server->tiblob, server->tilen); + + rc = crypto_shash_final(&server->ntlmssp.sdeschmacmd5->shash, + v2_session_response); + + return rc; +} + +int +setup_ntlmv2_rsp(struct cifsSesInfo *ses, char *resp_buf, + const struct nls_table *nls_cp) +{ + int rc = 0; struct ntlmv2_resp *buf = (struct ntlmv2_resp *)resp_buf; - struct HMACMD5Context context; buf->blob_signature = cpu_to_le32(0x00000101); buf->reserved = 0; buf->time = cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME)); get_random_bytes(&buf->client_chal, sizeof(buf->client_chal)); buf->reserved2 = 0; - buf->names[0].type = cpu_to_le16(NTLMSSP_DOMAIN_TYPE); - buf->names[0].length = 0; - buf->names[1].type = 0; - buf->names[1].length = 0; + + if (!ses->domainName) { + rc = find_domain_name(ses); + if (rc) { + cERROR(1, "could not get domain/server name rc %d", rc); + return rc; + } + } /* calculate buf->ntlmv2_hash */ rc = calc_ntlmv2_hash(ses, nls_cp); - if (rc) + if (rc) { cERROR(1, "could not get v2 hash rc %d", rc); - CalcNTLMv2_response(ses, resp_buf); + return rc; + } + rc = CalcNTLMv2_response(ses->server, resp_buf); + if (rc) { + cERROR(1, "could not get v2 hash rc %d", rc); + return rc; + } - /* now calculate the MAC key for NTLMv2 */ - hmac_md5_init_limK_to_64(ses->server->ntlmv2_hash, 16, &context); - hmac_md5_update(resp_buf, 16, &context); - hmac_md5_final(ses->server->mac_signing_key.data.ntlmv2.key, &context); + if (!ses->server->ntlmssp.sdeschmacmd5) { + cERROR(1, "calc_ntlmv2_hash: can't generate ntlmv2 hash\n"); + return -1; + } - memcpy(&ses->server->mac_signing_key.data.ntlmv2.resp, resp_buf, - sizeof(struct ntlmv2_resp)); - ses->server->mac_signing_key.len = 16 + sizeof(struct ntlmv2_resp); + crypto_shash_setkey(ses->server->ntlmssp.hmacmd5, + ses->server->ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE); + + rc = crypto_shash_init(&ses->server->ntlmssp.sdeschmacmd5->shash); + if (rc) { + cERROR(1, "setup_ntlmv2_rsp: could not init hmacmd5\n"); + return rc; + } + + crypto_shash_update(&ses->server->ntlmssp.sdeschmacmd5->shash, + resp_buf, CIFS_HMAC_MD5_HASH_SIZE); + + rc = crypto_shash_final(&ses->server->ntlmssp.sdeschmacmd5->shash, + ses->server->session_key.data.ntlmv2.key); + + memcpy(&ses->server->session_key.data.ntlmv2.resp, resp_buf, + sizeof(struct ntlmv2_resp)); + ses->server->session_key.len = 16 + sizeof(struct ntlmv2_resp); + + return rc; } -void CalcNTLMv2_response(const struct cifsSesInfo *ses, - char *v2_session_response) +int +calc_seckey(struct TCP_Server_Info *server) { - struct HMACMD5Context context; - /* rest of v2 struct already generated */ - memcpy(v2_session_response + 8, ses->server->cryptKey, 8); - hmac_md5_init_limK_to_64(ses->server->ntlmv2_hash, 16, &context); + int rc; + unsigned char sec_key[CIFS_NTLMV2_SESSKEY_SIZE]; + struct crypto_blkcipher *tfm_arc4; + struct scatterlist sgin, sgout; + struct blkcipher_desc desc; + + get_random_bytes(sec_key, CIFS_NTLMV2_SESSKEY_SIZE); + + tfm_arc4 = crypto_alloc_blkcipher("ecb(arc4)", + 0, CRYPTO_ALG_ASYNC); + if (!tfm_arc4 || IS_ERR(tfm_arc4)) { + cERROR(1, "could not allocate " "master crypto API arc4\n"); + return 1; + } + + desc.tfm = tfm_arc4; + + crypto_blkcipher_setkey(tfm_arc4, + server->session_key.data.ntlmv2.key, CIFS_CPHTXT_SIZE); + sg_init_one(&sgin, sec_key, CIFS_CPHTXT_SIZE); + sg_init_one(&sgout, server->ntlmssp.ciphertext, CIFS_CPHTXT_SIZE); + rc = crypto_blkcipher_encrypt(&desc, &sgout, &sgin, CIFS_CPHTXT_SIZE); - hmac_md5_update(v2_session_response+8, - sizeof(struct ntlmv2_resp) - 8, &context); + if (!rc) + memcpy(server->session_key.data.ntlmv2.key, + sec_key, CIFS_NTLMV2_SESSKEY_SIZE); + + crypto_free_blkcipher(tfm_arc4); + + return 0; +} - hmac_md5_final(v2_session_response, &context); -/* cifs_dump_mem("v2_sess_rsp: ", v2_session_response, 32); */ +void +cifs_crypto_shash_release(struct TCP_Server_Info *server) +{ + if (server->ntlmssp.md5) + crypto_free_shash(server->ntlmssp.md5); + + if (server->ntlmssp.hmacmd5) + crypto_free_shash(server->ntlmssp.hmacmd5); + + kfree(server->ntlmssp.sdeschmacmd5); + + kfree(server->ntlmssp.sdescmd5); +} + +int +cifs_crypto_shash_allocate(struct TCP_Server_Info *server) +{ + int rc; + unsigned int size; + + server->ntlmssp.hmacmd5 = crypto_alloc_shash("hmac(md5)", 0, 0); + if (!server->ntlmssp.hmacmd5 || + IS_ERR(server->ntlmssp.hmacmd5)) { + cERROR(1, "could not allocate crypto hmacmd5\n"); + return 1; + } + + server->ntlmssp.md5 = crypto_alloc_shash("md5", 0, 0); + if (!server->ntlmssp.md5 || IS_ERR(server->ntlmssp.md5)) { + cERROR(1, "could not allocate crypto md5\n"); + rc = 1; + goto cifs_crypto_shash_allocate_ret1; + } + + size = sizeof(struct shash_desc) + + crypto_shash_descsize(server->ntlmssp.hmacmd5); + server->ntlmssp.sdeschmacmd5 = kmalloc(size, GFP_KERNEL); + if (!server->ntlmssp.sdeschmacmd5) { + cERROR(1, "cifs_crypto_shash_allocate: can't alloc hmacmd5\n"); + rc = -ENOMEM; + goto cifs_crypto_shash_allocate_ret2; + } + server->ntlmssp.sdeschmacmd5->shash.tfm = server->ntlmssp.hmacmd5; + server->ntlmssp.sdeschmacmd5->shash.flags = 0x0; + + + size = sizeof(struct shash_desc) + + crypto_shash_descsize(server->ntlmssp.md5); + server->ntlmssp.sdescmd5 = kmalloc(size, GFP_KERNEL); + if (!server->ntlmssp.sdescmd5) { + cERROR(1, "cifs_crypto_shash_allocate: can't alloc md5\n"); + rc = -ENOMEM; + goto cifs_crypto_shash_allocate_ret3; + } + server->ntlmssp.sdescmd5->shash.tfm = server->ntlmssp.md5; + server->ntlmssp.sdescmd5->shash.flags = 0x0; + + return 0; + +cifs_crypto_shash_allocate_ret3: + kfree(server->ntlmssp.sdeschmacmd5); + +cifs_crypto_shash_allocate_ret2: + crypto_free_shash(server->ntlmssp.md5); + +cifs_crypto_shash_allocate_ret1: + crypto_free_shash(server->ntlmssp.hmacmd5); + + return rc; } diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 0cdfb8c32ac6..c9d0cfc086eb 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -25,6 +25,9 @@ #include <linux/workqueue.h> #include "cifs_fs_sb.h" #include "cifsacl.h" +#include <crypto/internal/hash.h> +#include <linux/scatterlist.h> + /* * The sizes of various internal tables and strings */ @@ -97,7 +100,7 @@ enum protocolEnum { /* Netbios frames protocol not supported at this time */ }; -struct mac_key { +struct session_key { unsigned int len; union { char ntlm[CIFS_SESS_KEY_SIZE + 16]; @@ -120,6 +123,21 @@ struct cifs_cred { struct cifs_ace *aces; }; +struct sdesc { + struct shash_desc shash; + char ctx[]; +}; + +struct ntlmssp_auth { + __u32 client_flags; + __u32 server_flags; + unsigned char ciphertext[CIFS_CPHTXT_SIZE]; + struct crypto_shash *hmacmd5; + struct crypto_shash *md5; + struct sdesc *sdeschmacmd5; + struct sdesc *sdescmd5; +}; + /* ***************************************************************** * Except the CIFS PDUs themselves all the @@ -182,11 +200,14 @@ struct TCP_Server_Info { /* 16th byte of RFC1001 workstation name is always null */ char workstation_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL]; __u32 sequence_number; /* needed for CIFS PDU signature */ - struct mac_key mac_signing_key; + struct session_key session_key; char ntlmv2_hash[16]; unsigned long lstrp; /* when we got last response from this server */ u16 dialect; /* dialect index that server chose */ /* extended security flavors that server supports */ + unsigned int tilen; /* length of the target info blob */ + unsigned char *tiblob; /* target info blob in challenge response */ + struct ntlmssp_auth ntlmssp; /* various keys, ciphers, flags */ bool sec_kerberos; /* supports plain Kerberos */ bool sec_mskerberos; /* supports legacy MS Kerberos */ bool sec_kerberosu2u; /* supports U2U Kerberos */ diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h index 14d036d8db11..320e0fd0ba7b 100644 --- a/fs/cifs/cifspdu.h +++ b/fs/cifs/cifspdu.h @@ -134,6 +134,12 @@ * Size of the session key (crypto key encrypted with the password */ #define CIFS_SESS_KEY_SIZE (24) +#define CIFS_CLIENT_CHALLENGE_SIZE (8) +#define CIFS_SERVER_CHALLENGE_SIZE (8) +#define CIFS_HMAC_MD5_HASH_SIZE (16) +#define CIFS_CPHTXT_SIZE (16) +#define CIFS_NTLMV2_SESSKEY_SIZE (16) +#define CIFS_NTHASH_SIZE (16) /* * Maximum user name length @@ -663,7 +669,6 @@ struct ntlmv2_resp { __le64 time; __u64 client_chal; /* random */ __u32 reserved2; - struct ntlmssp2_name names[2]; /* array of name entries could follow ending in minimum 4 byte struct */ } __attribute__((packed)); diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 1f5450814087..1378d9133844 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -361,15 +361,15 @@ extern int cifs_sign_smb(struct smb_hdr *, struct TCP_Server_Info *, __u32 *); extern int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *, __u32 *); extern int cifs_verify_signature(struct smb_hdr *, - const struct mac_key *mac_key, + struct TCP_Server_Info *server, __u32 expected_sequence_number); -extern int cifs_calculate_mac_key(struct mac_key *key, const char *rn, +extern int cifs_calculate_session_key(struct session_key *key, const char *rn, const char *pass); -extern int CalcNTLMv2_partial_mac_key(struct cifsSesInfo *, - const struct nls_table *); -extern void CalcNTLMv2_response(const struct cifsSesInfo *, char *); -extern void setup_ntlmv2_rsp(struct cifsSesInfo *, char *, +extern int setup_ntlmv2_rsp(struct cifsSesInfo *, char *, const struct nls_table *); +extern int cifs_crypto_shash_allocate(struct TCP_Server_Info *); +extern void cifs_crypto_shash_release(struct TCP_Server_Info *); +extern int calc_seckey(struct TCP_Server_Info *); #ifdef CONFIG_CIFS_WEAK_PW_HASH extern void calc_lanman_hash(const char *password, const char *cryptkey, bool encrypt, char *lnm_session_key); diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index c65c3419dd37..4bda920d1f75 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -604,11 +604,14 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses) else rc = -EINVAL; - if (server->sec_kerberos || server->sec_mskerberos) - server->secType = Kerberos; - else if (server->sec_ntlmssp) - server->secType = RawNTLMSSP; - else + if (server->secType == Kerberos) { + if (!server->sec_kerberos && + !server->sec_mskerberos) + rc = -EOPNOTSUPP; + } else if (server->secType == RawNTLMSSP) { + if (!server->sec_ntlmssp) + rc = -EOPNOTSUPP; + } else rc = -EOPNOTSUPP; } } else diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 95c2ea67edfb..ec0ea4a43bdb 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1673,7 +1673,9 @@ cifs_find_smb_ses(struct TCP_Server_Info *server, struct smb_vol *vol) MAX_USERNAME_SIZE)) continue; if (strlen(vol->username) != 0 && - strncmp(ses->password, vol->password, + ses->password != NULL && + strncmp(ses->password, + vol->password ? vol->password : "", MAX_PASSWORD_SIZE)) continue; } @@ -1706,6 +1708,7 @@ cifs_put_smb_ses(struct cifsSesInfo *ses) CIFSSMBLogoff(xid, ses); _FreeXid(xid); } + cifs_crypto_shash_release(server); sesInfoFree(ses); cifs_put_tcp_session(server); } @@ -1785,13 +1788,23 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info) ses->linux_uid = volume_info->linux_uid; ses->overrideSecFlg = volume_info->secFlg; + rc = cifs_crypto_shash_allocate(server); + if (rc) { + cERROR(1, "could not setup hash structures rc %d", rc); + goto get_ses_fail; + } + server->tilen = 0; + server->tiblob = NULL; + mutex_lock(&ses->session_mutex); rc = cifs_negotiate_protocol(xid, ses); if (!rc) rc = cifs_setup_session(xid, ses, volume_info->local_nls); mutex_unlock(&ses->session_mutex); - if (rc) + if (rc) { + cifs_crypto_shash_release(ses->server); goto get_ses_fail; + } /* success, put it on the list */ write_lock(&cifs_tcp_ses_lock); diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 578d88c5b46e..f9ed0751cc12 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -305,8 +305,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, full_path = build_path_from_dentry(direntry); if (full_path == NULL) { rc = -ENOMEM; - FreeXid(xid); - return rc; + goto cifs_create_out; } if (oplockEnabled) @@ -365,9 +364,8 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL); if (buf == NULL) { - kfree(full_path); - FreeXid(xid); - return -ENOMEM; + rc = -ENOMEM; + goto cifs_create_out; } /* @@ -496,6 +494,11 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode, struct cifsTconInfo *pTcon; char *full_path = NULL; struct inode *newinode = NULL; + int oplock = 0; + u16 fileHandle; + FILE_ALL_INFO *buf = NULL; + unsigned int bytes_written; + struct win_dev *pdev; if (!old_valid_dev(device_number)) return -EINVAL; @@ -506,9 +509,12 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode, pTcon = cifs_sb->tcon; full_path = build_path_from_dentry(direntry); - if (full_path == NULL) + if (full_path == NULL) { rc = -ENOMEM; - else if (pTcon->unix_ext) { + goto mknod_out; + } + + if (pTcon->unix_ext) { struct cifs_unix_set_info_args args = { .mode = mode & ~current_umask(), .ctime = NO_CHANGE_64, @@ -527,87 +533,78 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); + if (rc) + goto mknod_out; - if (!rc) { - rc = cifs_get_inode_info_unix(&newinode, full_path, + rc = cifs_get_inode_info_unix(&newinode, full_path, inode->i_sb, xid); - if (pTcon->nocase) - direntry->d_op = &cifs_ci_dentry_ops; - else - direntry->d_op = &cifs_dentry_ops; - if (rc == 0) - d_instantiate(direntry, newinode); - } - } else { - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) { - int oplock = 0; - u16 fileHandle; - FILE_ALL_INFO *buf; + if (pTcon->nocase) + direntry->d_op = &cifs_ci_dentry_ops; + else + direntry->d_op = &cifs_dentry_ops; - cFYI(1, "sfu compat create special file"); + if (rc == 0) + d_instantiate(direntry, newinode); + goto mknod_out; + } - buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL); - if (buf == NULL) { - kfree(full_path); - rc = -ENOMEM; - FreeXid(xid); - return rc; - } + if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL)) + goto mknod_out; - rc = CIFSSMBOpen(xid, pTcon, full_path, - FILE_CREATE, /* fail if exists */ - GENERIC_WRITE /* BB would - WRITE_OWNER | WRITE_DAC be better? */, - /* Create a file and set the - file attribute to SYSTEM */ - CREATE_NOT_DIR | CREATE_OPTION_SPECIAL, - &fileHandle, &oplock, buf, - cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); - - /* BB FIXME - add handling for backlevel servers - which need legacy open and check for all - calls to SMBOpen for fallback to SMBLeagcyOpen */ - if (!rc) { - /* BB Do not bother to decode buf since no - local inode yet to put timestamps in, - but we can reuse it safely */ - unsigned int bytes_written; - struct win_dev *pdev; - pdev = (struct win_dev *)buf; - if (S_ISCHR(mode)) { - memcpy(pdev->type, "IntxCHR", 8); - pdev->major = - cpu_to_le64(MAJOR(device_number)); - pdev->minor = - cpu_to_le64(MINOR(device_number)); - rc = CIFSSMBWrite(xid, pTcon, - fileHandle, - sizeof(struct win_dev), - 0, &bytes_written, (char *)pdev, - NULL, 0); - } else if (S_ISBLK(mode)) { - memcpy(pdev->type, "IntxBLK", 8); - pdev->major = - cpu_to_le64(MAJOR(device_number)); - pdev->minor = - cpu_to_le64(MINOR(device_number)); - rc = CIFSSMBWrite(xid, pTcon, - fileHandle, - sizeof(struct win_dev), - 0, &bytes_written, (char *)pdev, - NULL, 0); - } /* else if(S_ISFIFO */ - CIFSSMBClose(xid, pTcon, fileHandle); - d_drop(direntry); - } - kfree(buf); - /* add code here to set EAs */ - } + + cFYI(1, "sfu compat create special file"); + + buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL); + if (buf == NULL) { + kfree(full_path); + rc = -ENOMEM; + FreeXid(xid); + return rc; } + /* FIXME: would WRITE_OWNER | WRITE_DAC be better? */ + rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_CREATE, + GENERIC_WRITE, CREATE_NOT_DIR | CREATE_OPTION_SPECIAL, + &fileHandle, &oplock, buf, cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); + if (rc) + goto mknod_out; + + /* BB Do not bother to decode buf since no local inode yet to put + * timestamps in, but we can reuse it safely */ + + pdev = (struct win_dev *)buf; + if (S_ISCHR(mode)) { + memcpy(pdev->type, "IntxCHR", 8); + pdev->major = + cpu_to_le64(MAJOR(device_number)); + pdev->minor = + cpu_to_le64(MINOR(device_number)); + rc = CIFSSMBWrite(xid, pTcon, + fileHandle, + sizeof(struct win_dev), + 0, &bytes_written, (char *)pdev, + NULL, 0); + } else if (S_ISBLK(mode)) { + memcpy(pdev->type, "IntxBLK", 8); + pdev->major = + cpu_to_le64(MAJOR(device_number)); + pdev->minor = + cpu_to_le64(MINOR(device_number)); + rc = CIFSSMBWrite(xid, pTcon, + fileHandle, + sizeof(struct win_dev), + 0, &bytes_written, (char *)pdev, + NULL, 0); + } /* else if (S_ISFIFO) */ + CIFSSMBClose(xid, pTcon, fileHandle); + d_drop(direntry); + + /* FIXME: add code here to set EAs */ + +mknod_out: kfree(full_path); + kfree(buf); FreeXid(xid); return rc; } diff --git a/fs/cifs/file.c b/fs/cifs/file.c index db11fdef0e92..de748c652d11 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -242,8 +242,7 @@ int cifs_open(struct inode *inode, struct file *file) full_path = build_path_from_dentry(file->f_path.dentry); if (full_path == NULL) { rc = -ENOMEM; - FreeXid(xid); - return rc; + goto out; } cFYI(1, "inode = 0x%p file flags are 0x%x for %s", diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 4bc47e5b5f29..86a164f08a74 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -834,7 +834,7 @@ struct inode *cifs_root_iget(struct super_block *sb, unsigned long ino) xid, NULL); if (!inode) - return ERR_PTR(-ENOMEM); + return ERR_PTR(rc); #ifdef CONFIG_CIFS_FSCACHE /* populate tcon->resource_id */ diff --git a/fs/cifs/ntlmssp.h b/fs/cifs/ntlmssp.h index 49c9a4e75319..1db0f0746a5b 100644 --- a/fs/cifs/ntlmssp.h +++ b/fs/cifs/ntlmssp.h @@ -61,6 +61,19 @@ #define NTLMSSP_NEGOTIATE_KEY_XCH 0x40000000 #define NTLMSSP_NEGOTIATE_56 0x80000000 +/* Define AV Pair Field IDs */ +#define NTLMSSP_AV_EOL 0 +#define NTLMSSP_AV_NB_COMPUTER_NAME 1 +#define NTLMSSP_AV_NB_DOMAIN_NAME 2 +#define NTLMSSP_AV_DNS_COMPUTER_NAME 3 +#define NTLMSSP_AV_DNS_DOMAIN_NAME 4 +#define NTLMSSP_AV_DNS_TREE_NAME 5 +#define NTLMSSP_AV_FLAGS 6 +#define NTLMSSP_AV_TIMESTAMP 7 +#define NTLMSSP_AV_RESTRICTION 8 +#define NTLMSSP_AV_TARGET_NAME 9 +#define NTLMSSP_AV_CHANNEL_BINDINGS 10 + /* Although typedefs are not commonly used for structure definitions */ /* in the Linux kernel, in this particular case they are useful */ /* to more closely match the standards document for NTLMSSP from */ diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 0a57cb7db5dd..795095f4eac6 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -383,6 +383,9 @@ static int decode_ascii_ssetup(char **pbcc_area, int bleft, static int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len, struct cifsSesInfo *ses) { + unsigned int tioffset; /* challeng message target info area */ + unsigned int tilen; /* challeng message target info area length */ + CHALLENGE_MESSAGE *pblob = (CHALLENGE_MESSAGE *)bcc_ptr; if (blob_len < sizeof(CHALLENGE_MESSAGE)) { @@ -405,6 +408,20 @@ static int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len, /* BB spec says that if AvId field of MsvAvTimestamp is populated then we must set the MIC field of the AUTHENTICATE_MESSAGE */ + ses->server->ntlmssp.server_flags = le32_to_cpu(pblob->NegotiateFlags); + + tioffset = cpu_to_le16(pblob->TargetInfoArray.BufferOffset); + tilen = cpu_to_le16(pblob->TargetInfoArray.Length); + ses->server->tilen = tilen; + if (tilen) { + ses->server->tiblob = kmalloc(tilen, GFP_KERNEL); + if (!ses->server->tiblob) { + cERROR(1, "Challenge target info allocation failure"); + return -ENOMEM; + } + memcpy(ses->server->tiblob, bcc_ptr + tioffset, tilen); + } + return 0; } @@ -425,12 +442,13 @@ static void build_ntlmssp_negotiate_blob(unsigned char *pbuffer, /* BB is NTLMV2 session security format easier to use here? */ flags = NTLMSSP_NEGOTIATE_56 | NTLMSSP_REQUEST_TARGET | NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE | - NTLMSSP_NEGOTIATE_NT_ONLY | NTLMSSP_NEGOTIATE_NTLM; + NTLMSSP_NEGOTIATE_NTLM; if (ses->server->secMode & - (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) - flags |= NTLMSSP_NEGOTIATE_SIGN; - if (ses->server->secMode & SECMODE_SIGN_REQUIRED) - flags |= NTLMSSP_NEGOTIATE_ALWAYS_SIGN; + (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) { + flags |= NTLMSSP_NEGOTIATE_SIGN | + NTLMSSP_NEGOTIATE_KEY_XCH | + NTLMSSP_NEGOTIATE_EXTENDED_SEC; + } sec_blob->NegotiateFlags |= cpu_to_le32(flags); @@ -451,10 +469,12 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer, struct cifsSesInfo *ses, const struct nls_table *nls_cp, bool first) { + int rc; + unsigned int size; AUTHENTICATE_MESSAGE *sec_blob = (AUTHENTICATE_MESSAGE *)pbuffer; __u32 flags; unsigned char *tmp; - char ntlm_session_key[CIFS_SESS_KEY_SIZE]; + struct ntlmv2_resp ntlmv2_response = {}; memcpy(sec_blob->Signature, NTLMSSP_SIGNATURE, 8); sec_blob->MessageType = NtLmAuthenticate; @@ -477,19 +497,25 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer, sec_blob->LmChallengeResponse.Length = 0; sec_blob->LmChallengeResponse.MaximumLength = 0; - /* calculate session key, BB what about adding similar ntlmv2 path? */ - SMBNTencrypt(ses->password, ses->server->cryptKey, ntlm_session_key); - if (first) - cifs_calculate_mac_key(&ses->server->mac_signing_key, - ntlm_session_key, ses->password); - - memcpy(tmp, ntlm_session_key, CIFS_SESS_KEY_SIZE); sec_blob->NtChallengeResponse.BufferOffset = cpu_to_le32(tmp - pbuffer); - sec_blob->NtChallengeResponse.Length = cpu_to_le16(CIFS_SESS_KEY_SIZE); - sec_blob->NtChallengeResponse.MaximumLength = - cpu_to_le16(CIFS_SESS_KEY_SIZE); + rc = setup_ntlmv2_rsp(ses, (char *)&ntlmv2_response, nls_cp); + if (rc) { + cERROR(1, "error rc: %d during ntlmssp ntlmv2 setup", rc); + goto setup_ntlmv2_ret; + } + size = sizeof(struct ntlmv2_resp); + memcpy(tmp, (char *)&ntlmv2_response, size); + tmp += size; + if (ses->server->tilen > 0) { + memcpy(tmp, ses->server->tiblob, ses->server->tilen); + tmp += ses->server->tilen; + } else + ses->server->tilen = 0; - tmp += CIFS_SESS_KEY_SIZE; + sec_blob->NtChallengeResponse.Length = cpu_to_le16(size + + ses->server->tilen); + sec_blob->NtChallengeResponse.MaximumLength = + cpu_to_le16(size + ses->server->tilen); if (ses->domainName == NULL) { sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer); @@ -501,7 +527,6 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer, len = cifs_strtoUCS((__le16 *)tmp, ses->domainName, MAX_USERNAME_SIZE, nls_cp); len *= 2; /* unicode is 2 bytes each */ - len += 2; /* trailing null */ sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer); sec_blob->DomainName.Length = cpu_to_le16(len); sec_blob->DomainName.MaximumLength = cpu_to_le16(len); @@ -518,7 +543,6 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer, len = cifs_strtoUCS((__le16 *)tmp, ses->userName, MAX_USERNAME_SIZE, nls_cp); len *= 2; /* unicode is 2 bytes each */ - len += 2; /* trailing null */ sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - pbuffer); sec_blob->UserName.Length = cpu_to_le16(len); sec_blob->UserName.MaximumLength = cpu_to_le16(len); @@ -530,9 +554,26 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer, sec_blob->WorkstationName.MaximumLength = 0; tmp += 2; - sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer); - sec_blob->SessionKey.Length = 0; - sec_blob->SessionKey.MaximumLength = 0; + if ((ses->server->ntlmssp.server_flags & NTLMSSP_NEGOTIATE_KEY_XCH) && + !calc_seckey(ses->server)) { + memcpy(tmp, ses->server->ntlmssp.ciphertext, CIFS_CPHTXT_SIZE); + sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer); + sec_blob->SessionKey.Length = cpu_to_le16(CIFS_CPHTXT_SIZE); + sec_blob->SessionKey.MaximumLength = + cpu_to_le16(CIFS_CPHTXT_SIZE); + tmp += CIFS_CPHTXT_SIZE; + } else { + sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer); + sec_blob->SessionKey.Length = 0; + sec_blob->SessionKey.MaximumLength = 0; + } + + ses->server->sequence_number = 0; + +setup_ntlmv2_ret: + if (ses->server->tilen > 0) + kfree(ses->server->tiblob); + return tmp - pbuffer; } @@ -546,15 +587,14 @@ static void setup_ntlmssp_neg_req(SESSION_SETUP_ANDX *pSMB, return; } -static int setup_ntlmssp_auth_req(SESSION_SETUP_ANDX *pSMB, +static int setup_ntlmssp_auth_req(char *ntlmsspblob, struct cifsSesInfo *ses, const struct nls_table *nls, bool first_time) { int bloblen; - bloblen = build_ntlmssp_auth_blob(&pSMB->req.SecurityBlob[0], ses, nls, + bloblen = build_ntlmssp_auth_blob(ntlmsspblob, ses, nls, first_time); - pSMB->req.SecurityBlobLength = cpu_to_le16(bloblen); return bloblen; } @@ -690,7 +730,7 @@ ssetup_ntlmssp_authenticate: if (first_time) /* should this be moved into common code with similar ntlmv2 path? */ - cifs_calculate_mac_key(&ses->server->mac_signing_key, + cifs_calculate_session_key(&ses->server->session_key, ntlm_session_key, ses->password); /* copy session key */ @@ -729,12 +769,21 @@ ssetup_ntlmssp_authenticate: cpu_to_le16(sizeof(struct ntlmv2_resp)); /* calculate session key */ - setup_ntlmv2_rsp(ses, v2_sess_key, nls_cp); + rc = setup_ntlmv2_rsp(ses, v2_sess_key, nls_cp); + if (rc) { + kfree(v2_sess_key); + goto ssetup_exit; + } /* FIXME: calculate MAC key */ memcpy(bcc_ptr, (char *)v2_sess_key, sizeof(struct ntlmv2_resp)); bcc_ptr += sizeof(struct ntlmv2_resp); kfree(v2_sess_key); + if (ses->server->tilen > 0) { + memcpy(bcc_ptr, ses->server->tiblob, + ses->server->tilen); + bcc_ptr += ses->server->tilen; + } if (ses->capabilities & CAP_UNICODE) { if (iov[0].iov_len % 2) { *bcc_ptr = 0; @@ -765,15 +814,15 @@ ssetup_ntlmssp_authenticate: } /* bail out if key is too long */ if (msg->sesskey_len > - sizeof(ses->server->mac_signing_key.data.krb5)) { + sizeof(ses->server->session_key.data.krb5)) { cERROR(1, "Kerberos signing key too long (%u bytes)", msg->sesskey_len); rc = -EOVERFLOW; goto ssetup_exit; } if (first_time) { - ses->server->mac_signing_key.len = msg->sesskey_len; - memcpy(ses->server->mac_signing_key.data.krb5, + ses->server->session_key.len = msg->sesskey_len; + memcpy(ses->server->session_key.data.krb5, msg->data, msg->sesskey_len); } pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC; @@ -815,12 +864,28 @@ ssetup_ntlmssp_authenticate: if (phase == NtLmNegotiate) { setup_ntlmssp_neg_req(pSMB, ses); iov[1].iov_len = sizeof(NEGOTIATE_MESSAGE); + iov[1].iov_base = &pSMB->req.SecurityBlob[0]; } else if (phase == NtLmAuthenticate) { int blob_len; - blob_len = setup_ntlmssp_auth_req(pSMB, ses, - nls_cp, - first_time); + char *ntlmsspblob; + + ntlmsspblob = kmalloc(5 * + sizeof(struct _AUTHENTICATE_MESSAGE), + GFP_KERNEL); + if (!ntlmsspblob) { + cERROR(1, "Can't allocate NTLMSSP"); + rc = -ENOMEM; + goto ssetup_exit; + } + + blob_len = setup_ntlmssp_auth_req(ntlmsspblob, + ses, + nls_cp, + first_time); iov[1].iov_len = blob_len; + iov[1].iov_base = ntlmsspblob; + pSMB->req.SecurityBlobLength = + cpu_to_le16(blob_len); /* Make sure that we tell the server that we are using the uid that it just gave us back on the response (challenge) */ @@ -830,7 +895,6 @@ ssetup_ntlmssp_authenticate: rc = -ENOSYS; goto ssetup_exit; } - iov[1].iov_base = &pSMB->req.SecurityBlob[0]; /* unicode strings must be word aligned */ if ((iov[0].iov_len + iov[1].iov_len) % 2) { *bcc_ptr = 0; diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 82f78c4d6978..e0588cdf4cc5 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -543,7 +543,7 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses, (ses->server->secMode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))) { rc = cifs_verify_signature(midQ->resp_buf, - &ses->server->mac_signing_key, + ses->server, midQ->sequence_number+1); if (rc) { cERROR(1, "Unexpected SMB signature"); @@ -731,7 +731,7 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses, (ses->server->secMode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))) { rc = cifs_verify_signature(out_buf, - &ses->server->mac_signing_key, + ses->server, midQ->sequence_number+1); if (rc) { cERROR(1, "Unexpected SMB signature"); @@ -981,7 +981,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon, (ses->server->secMode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))) { rc = cifs_verify_signature(out_buf, - &ses->server->mac_signing_key, + ses->server, midQ->sequence_number+1); if (rc) { cERROR(1, "Unexpected SMB signature"); diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index a53b130b366c..1e7a33028d33 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c @@ -80,7 +80,7 @@ static struct inode *get_cramfs_inode(struct super_block *sb, } } else { inode = iget_locked(sb, CRAMINO(cramfs_inode)); - if (inode) { + if (inode && (inode->i_state & I_NEW)) { setup_inode(inode, cramfs_inode); unlock_new_inode(inode); } diff --git a/fs/dcache.c b/fs/dcache.c index 4d13bf50b7b1..83293be48149 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1332,31 +1332,13 @@ EXPORT_SYMBOL(d_add_ci); * d_lookup - search for a dentry * @parent: parent dentry * @name: qstr of name we wish to find + * Returns: dentry, or NULL * - * Searches the children of the parent dentry for the name in question. If - * the dentry is found its reference count is incremented and the dentry - * is returned. The caller must use dput to free the entry when it has - * finished using it. %NULL is returned on failure. - * - * __d_lookup is dcache_lock free. The hash list is protected using RCU. - * Memory barriers are used while updating and doing lockless traversal. - * To avoid races with d_move while rename is happening, d_lock is used. - * - * Overflows in memcmp(), while d_move, are avoided by keeping the length - * and name pointer in one structure pointed by d_qstr. - * - * rcu_read_lock() and rcu_read_unlock() are used to disable preemption while - * lookup is going on. - * - * The dentry unused LRU is not updated even if lookup finds the required dentry - * in there. It is updated in places such as prune_dcache, shrink_dcache_sb, - * select_parent and __dget_locked. This laziness saves lookup from dcache_lock - * acquisition. - * - * d_lookup() is protected against the concurrent renames in some unrelated - * directory using the seqlockt_t rename_lock. + * d_lookup searches the children of the parent dentry for the name in + * question. If the dentry is found its reference count is incremented and the + * dentry is returned. The caller must use dput to free the entry when it has + * finished using it. %NULL is returned if the dentry does not exist. */ - struct dentry * d_lookup(struct dentry * parent, struct qstr * name) { struct dentry * dentry = NULL; @@ -1372,6 +1354,21 @@ struct dentry * d_lookup(struct dentry * parent, struct qstr * name) } EXPORT_SYMBOL(d_lookup); +/* + * __d_lookup - search for a dentry (racy) + * @parent: parent dentry + * @name: qstr of name we wish to find + * Returns: dentry, or NULL + * + * __d_lookup is like d_lookup, however it may (rarely) return a + * false-negative result due to unrelated rename activity. + * + * __d_lookup is slightly faster by avoiding rename_lock read seqlock, + * however it must be used carefully, eg. with a following d_lookup in + * the case of failure. + * + * __d_lookup callers must be commented. + */ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) { unsigned int len = name->len; @@ -1382,6 +1379,19 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) struct hlist_node *node; struct dentry *dentry; + /* + * The hash list is protected using RCU. + * + * Take d_lock when comparing a candidate dentry, to avoid races + * with d_move(). + * + * It is possible that concurrent renames can mess up our list + * walk here and result in missing our dentry, resulting in the + * false-negative result. d_lookup() protects against concurrent + * renames using rename_lock seqlock. + * + * See Documentation/vfs/dcache-locking.txt for more details. + */ rcu_read_lock(); hlist_for_each_entry_rcu(dentry, node, head, d_hash) { @@ -1396,8 +1406,8 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) /* * Recheck the dentry after taking the lock - d_move may have - * changed things. Don't bother checking the hash because we're - * about to compare the whole name anyway. + * changed things. Don't bother checking the hash because + * we're about to compare the whole name anyway. */ if (dentry->d_parent != parent) goto next; @@ -1925,7 +1935,7 @@ static int prepend_path(const struct path *path, struct path *root, bool slash = false; int error = 0; - spin_lock(&vfsmount_lock); + br_read_lock(vfsmount_lock); while (dentry != root->dentry || vfsmnt != root->mnt) { struct dentry * parent; @@ -1954,7 +1964,7 @@ out: if (!error && !slash) error = prepend(buffer, buflen, "/", 1); - spin_unlock(&vfsmount_lock); + br_read_unlock(vfsmount_lock); return error; global_root: @@ -2292,11 +2302,12 @@ int path_is_under(struct path *path1, struct path *path2) struct vfsmount *mnt = path1->mnt; struct dentry *dentry = path1->dentry; int res; - spin_lock(&vfsmount_lock); + + br_read_lock(vfsmount_lock); if (mnt != path2->mnt) { for (;;) { if (mnt->mnt_parent == mnt) { - spin_unlock(&vfsmount_lock); + br_read_unlock(vfsmount_lock); return 0; } if (mnt->mnt_parent == path2->mnt) @@ -2306,7 +2317,7 @@ int path_is_under(struct path *path1, struct path *path2) dentry = mnt->mnt_mountpoint; } res = is_subdir(dentry, path2->dentry); - spin_unlock(&vfsmount_lock); + br_read_unlock(vfsmount_lock); return res; } EXPORT_SYMBOL(path_is_under); diff --git a/fs/exec.c b/fs/exec.c index 7761837e4500..2d9455282744 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -361,13 +361,13 @@ err: /* * count() counts the number of strings in array ARGV. */ -static int count(char __user * __user * argv, int max) +static int count(const char __user * const __user * argv, int max) { int i = 0; if (argv != NULL) { for (;;) { - char __user * p; + const char __user * p; if (get_user(p, argv)) return -EFAULT; @@ -387,7 +387,7 @@ static int count(char __user * __user * argv, int max) * processes's memory to the new process's stack. The call to get_user_pages() * ensures the destination page is created and not swapped out. */ -static int copy_strings(int argc, char __user * __user * argv, +static int copy_strings(int argc, const char __user *const __user *argv, struct linux_binprm *bprm) { struct page *kmapped_page = NULL; @@ -396,7 +396,7 @@ static int copy_strings(int argc, char __user * __user * argv, int ret; while (argc-- > 0) { - char __user *str; + const char __user *str; int len; unsigned long pos; @@ -470,12 +470,13 @@ out: /* * Like copy_strings, but get argv and its values from kernel memory. */ -int copy_strings_kernel(int argc,char ** argv, struct linux_binprm *bprm) +int copy_strings_kernel(int argc, const char *const *argv, + struct linux_binprm *bprm) { int r; mm_segment_t oldfs = get_fs(); set_fs(KERNEL_DS); - r = copy_strings(argc, (char __user * __user *)argv, bprm); + r = copy_strings(argc, (const char __user *const __user *)argv, bprm); set_fs(oldfs); return r; } @@ -997,7 +998,7 @@ EXPORT_SYMBOL(flush_old_exec); void setup_new_exec(struct linux_binprm * bprm) { int i, ch; - char * name; + const char *name; char tcomm[sizeof(current->comm)]; arch_pick_mmap_layout(current->mm); @@ -1117,7 +1118,7 @@ int check_unsafe_exec(struct linux_binprm *bprm) bprm->unsafe = tracehook_unsafe_exec(p); n_fs = 1; - write_lock(&p->fs->lock); + spin_lock(&p->fs->lock); rcu_read_lock(); for (t = next_thread(p); t != p; t = next_thread(t)) { if (t->fs == p->fs) @@ -1134,7 +1135,7 @@ int check_unsafe_exec(struct linux_binprm *bprm) res = 1; } } - write_unlock(&p->fs->lock); + spin_unlock(&p->fs->lock); return res; } @@ -1316,9 +1317,9 @@ EXPORT_SYMBOL(search_binary_handler); /* * sys_execve() executes a new program. */ -int do_execve(char * filename, - char __user *__user *argv, - char __user *__user *envp, +int do_execve(const char * filename, + const char __user *const __user *argv, + const char __user *const __user *envp, struct pt_regs * regs) { struct linux_binprm *bprm; diff --git a/fs/fat/misc.c b/fs/fat/misc.c index 1fa23f6ffba5..1736f2356388 100644 --- a/fs/fat/misc.c +++ b/fs/fat/misc.c @@ -250,7 +250,9 @@ int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs) { int i, err = 0; - ll_rw_block(SWRITE, nr_bhs, bhs); + for (i = 0; i < nr_bhs; i++) + write_dirty_buffer(bhs[i], WRITE); + for (i = 0; i < nr_bhs; i++) { wait_on_buffer(bhs[i]); if (buffer_eopnotsupp(bhs[i])) { diff --git a/fs/file_table.c b/fs/file_table.c index edecd36fed9b..a04bdd81c11c 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -20,7 +20,9 @@ #include <linux/cdev.h> #include <linux/fsnotify.h> #include <linux/sysctl.h> +#include <linux/lglock.h> #include <linux/percpu_counter.h> +#include <linux/percpu.h> #include <linux/ima.h> #include <asm/atomic.h> @@ -32,8 +34,8 @@ struct files_stat_struct files_stat = { .max_files = NR_FILE }; -/* public. Not pretty! */ -__cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock); +DECLARE_LGLOCK(files_lglock); +DEFINE_LGLOCK(files_lglock); /* SLAB cache for file structures */ static struct kmem_cache *filp_cachep __read_mostly; @@ -249,7 +251,7 @@ static void __fput(struct file *file) cdev_put(inode->i_cdev); fops_put(file->f_op); put_pid(file->f_owner.pid); - file_kill(file); + file_sb_list_del(file); if (file->f_mode & FMODE_WRITE) drop_file_write_access(file); file->f_path.dentry = NULL; @@ -328,41 +330,107 @@ struct file *fget_light(unsigned int fd, int *fput_needed) return file; } - void put_filp(struct file *file) { if (atomic_long_dec_and_test(&file->f_count)) { security_file_free(file); - file_kill(file); + file_sb_list_del(file); file_free(file); } } -void file_move(struct file *file, struct list_head *list) +static inline int file_list_cpu(struct file *file) { - if (!list) - return; - file_list_lock(); - list_move(&file->f_u.fu_list, list); - file_list_unlock(); +#ifdef CONFIG_SMP + return file->f_sb_list_cpu; +#else + return smp_processor_id(); +#endif +} + +/* helper for file_sb_list_add to reduce ifdefs */ +static inline void __file_sb_list_add(struct file *file, struct super_block *sb) +{ + struct list_head *list; +#ifdef CONFIG_SMP + int cpu; + cpu = smp_processor_id(); + file->f_sb_list_cpu = cpu; + list = per_cpu_ptr(sb->s_files, cpu); +#else + list = &sb->s_files; +#endif + list_add(&file->f_u.fu_list, list); } -void file_kill(struct file *file) +/** + * file_sb_list_add - add a file to the sb's file list + * @file: file to add + * @sb: sb to add it to + * + * Use this function to associate a file with the superblock of the inode it + * refers to. + */ +void file_sb_list_add(struct file *file, struct super_block *sb) +{ + lg_local_lock(files_lglock); + __file_sb_list_add(file, sb); + lg_local_unlock(files_lglock); +} + +/** + * file_sb_list_del - remove a file from the sb's file list + * @file: file to remove + * @sb: sb to remove it from + * + * Use this function to remove a file from its superblock. + */ +void file_sb_list_del(struct file *file) { if (!list_empty(&file->f_u.fu_list)) { - file_list_lock(); + lg_local_lock_cpu(files_lglock, file_list_cpu(file)); list_del_init(&file->f_u.fu_list); - file_list_unlock(); + lg_local_unlock_cpu(files_lglock, file_list_cpu(file)); } } +#ifdef CONFIG_SMP + +/* + * These macros iterate all files on all CPUs for a given superblock. + * files_lglock must be held globally. + */ +#define do_file_list_for_each_entry(__sb, __file) \ +{ \ + int i; \ + for_each_possible_cpu(i) { \ + struct list_head *list; \ + list = per_cpu_ptr((__sb)->s_files, i); \ + list_for_each_entry((__file), list, f_u.fu_list) + +#define while_file_list_for_each_entry \ + } \ +} + +#else + +#define do_file_list_for_each_entry(__sb, __file) \ +{ \ + struct list_head *list; \ + list = &(sb)->s_files; \ + list_for_each_entry((__file), list, f_u.fu_list) + +#define while_file_list_for_each_entry \ +} + +#endif + int fs_may_remount_ro(struct super_block *sb) { struct file *file; - /* Check that no files are currently opened for writing. */ - file_list_lock(); - list_for_each_entry(file, &sb->s_files, f_u.fu_list) { + lg_global_lock(files_lglock); + do_file_list_for_each_entry(sb, file) { struct inode *inode = file->f_path.dentry->d_inode; /* File with pending delete? */ @@ -372,11 +440,11 @@ int fs_may_remount_ro(struct super_block *sb) /* Writeable file? */ if (S_ISREG(inode->i_mode) && (file->f_mode & FMODE_WRITE)) goto too_bad; - } - file_list_unlock(); + } while_file_list_for_each_entry; + lg_global_unlock(files_lglock); return 1; /* Tis' cool bro. */ too_bad: - file_list_unlock(); + lg_global_unlock(files_lglock); return 0; } @@ -392,8 +460,8 @@ void mark_files_ro(struct super_block *sb) struct file *f; retry: - file_list_lock(); - list_for_each_entry(f, &sb->s_files, f_u.fu_list) { + lg_global_lock(files_lglock); + do_file_list_for_each_entry(sb, f) { struct vfsmount *mnt; if (!S_ISREG(f->f_path.dentry->d_inode->i_mode)) continue; @@ -408,16 +476,13 @@ retry: continue; file_release_write(f); mnt = mntget(f->f_path.mnt); - file_list_unlock(); - /* - * This can sleep, so we can't hold - * the file_list_lock() spinlock. - */ + /* This can sleep, so we can't hold the spinlock. */ + lg_global_unlock(files_lglock); mnt_drop_write(mnt); mntput(mnt); goto retry; - } - file_list_unlock(); + } while_file_list_for_each_entry; + lg_global_unlock(files_lglock); } void __init files_init(unsigned long mempages) @@ -437,5 +502,6 @@ void __init files_init(unsigned long mempages) if (files_stat.max_files < NR_FILE) files_stat.max_files = NR_FILE; files_defer_init(); + lg_lock_init(files_lglock); percpu_counter_init(&nr_files, 0); } diff --git a/fs/fs_struct.c b/fs/fs_struct.c index 1ee40eb9a2c0..ed45a9cf5f3d 100644 --- a/fs/fs_struct.c +++ b/fs/fs_struct.c @@ -13,11 +13,11 @@ void set_fs_root(struct fs_struct *fs, struct path *path) { struct path old_root; - write_lock(&fs->lock); + spin_lock(&fs->lock); old_root = fs->root; fs->root = *path; path_get(path); - write_unlock(&fs->lock); + spin_unlock(&fs->lock); if (old_root.dentry) path_put(&old_root); } @@ -30,11 +30,11 @@ void set_fs_pwd(struct fs_struct *fs, struct path *path) { struct path old_pwd; - write_lock(&fs->lock); + spin_lock(&fs->lock); old_pwd = fs->pwd; fs->pwd = *path; path_get(path); - write_unlock(&fs->lock); + spin_unlock(&fs->lock); if (old_pwd.dentry) path_put(&old_pwd); @@ -51,7 +51,7 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root) task_lock(p); fs = p->fs; if (fs) { - write_lock(&fs->lock); + spin_lock(&fs->lock); if (fs->root.dentry == old_root->dentry && fs->root.mnt == old_root->mnt) { path_get(new_root); @@ -64,7 +64,7 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root) fs->pwd = *new_root; count++; } - write_unlock(&fs->lock); + spin_unlock(&fs->lock); } task_unlock(p); } while_each_thread(g, p); @@ -87,10 +87,10 @@ void exit_fs(struct task_struct *tsk) if (fs) { int kill; task_lock(tsk); - write_lock(&fs->lock); + spin_lock(&fs->lock); tsk->fs = NULL; kill = !--fs->users; - write_unlock(&fs->lock); + spin_unlock(&fs->lock); task_unlock(tsk); if (kill) free_fs_struct(fs); @@ -104,7 +104,7 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old) if (fs) { fs->users = 1; fs->in_exec = 0; - rwlock_init(&fs->lock); + spin_lock_init(&fs->lock); fs->umask = old->umask; get_fs_root_and_pwd(old, &fs->root, &fs->pwd); } @@ -121,10 +121,10 @@ int unshare_fs_struct(void) return -ENOMEM; task_lock(current); - write_lock(&fs->lock); + spin_lock(&fs->lock); kill = !--fs->users; current->fs = new_fs; - write_unlock(&fs->lock); + spin_unlock(&fs->lock); task_unlock(current); if (kill) @@ -143,7 +143,7 @@ EXPORT_SYMBOL(current_umask); /* to be mentioned only in INIT_TASK */ struct fs_struct init_fs = { .users = 1, - .lock = __RW_LOCK_UNLOCKED(init_fs.lock), + .lock = __SPIN_LOCK_UNLOCKED(init_fs.lock), .umask = 0022, }; @@ -156,14 +156,14 @@ void daemonize_fs_struct(void) task_lock(current); - write_lock(&init_fs.lock); + spin_lock(&init_fs.lock); init_fs.users++; - write_unlock(&init_fs.lock); + spin_unlock(&init_fs.lock); - write_lock(&fs->lock); + spin_lock(&fs->lock); current->fs = &init_fs; kill = !--fs->users; - write_unlock(&fs->lock); + spin_unlock(&fs->lock); task_unlock(current); if (kill) diff --git a/fs/generic_acl.c b/fs/generic_acl.c index 99800e564157..6bc9e3a5a693 100644 --- a/fs/generic_acl.c +++ b/fs/generic_acl.c @@ -94,6 +94,7 @@ generic_acl_set(struct dentry *dentry, const char *name, const void *value, if (error < 0) goto failed; inode->i_mode = mode; + inode->i_ctime = CURRENT_TIME; if (error == 0) { posix_acl_release(acl); acl = NULL; diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index dd1e55535a4e..f7dc9b5f9ef8 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -104,7 +104,7 @@ static char *__dentry_name(struct dentry *dentry, char *name) __putname(name); return NULL; } - strncpy(name, root, PATH_MAX); + strlcpy(name, root, PATH_MAX); if (len > p - name) { __putname(name); return NULL; @@ -876,7 +876,7 @@ static void *hostfs_follow_link(struct dentry *dentry, struct nameidata *nd) char *path = dentry_name(dentry); int err = -ENOMEM; if (path) { - int err = hostfs_do_readlink(path, link, PATH_MAX); + err = hostfs_do_readlink(path, link, PATH_MAX); if (err == PATH_MAX) err = -E2BIG; __putname(path); diff --git a/fs/internal.h b/fs/internal.h index 6b706bc60a66..a6910e91cee8 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -9,6 +9,8 @@ * 2 of the License, or (at your option) any later version. */ +#include <linux/lglock.h> + struct super_block; struct linux_binprm; struct path; @@ -70,7 +72,8 @@ extern struct vfsmount *copy_tree(struct vfsmount *, struct dentry *, int); extern void __init mnt_init(void); -extern spinlock_t vfsmount_lock; +DECLARE_BRLOCK(vfsmount_lock); + /* * fs_struct.c @@ -80,6 +83,8 @@ extern void chroot_fs_refs(struct path *, struct path *); /* * file_table.c */ +extern void file_sb_list_add(struct file *f, struct super_block *sb); +extern void file_sb_list_del(struct file *f); extern void mark_files_ro(struct super_block *); extern struct file *get_empty_filp(void); diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c index b0435dd0654d..05a38b9c4c0e 100644 --- a/fs/jbd/checkpoint.c +++ b/fs/jbd/checkpoint.c @@ -254,7 +254,9 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count) { int i; - ll_rw_block(SWRITE, *batch_count, bhs); + for (i = 0; i < *batch_count; i++) + write_dirty_buffer(bhs[i], WRITE); + for (i = 0; i < *batch_count; i++) { struct buffer_head *bh = bhs[i]; clear_buffer_jwrite(bh); diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 28a9ddaa0c49..95d8c11c929e 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c @@ -119,7 +119,6 @@ static int journal_write_commit_record(journal_t *journal, struct buffer_head *bh; journal_header_t *header; int ret; - int barrier_done = 0; if (is_journal_aborted(journal)) return 0; @@ -137,34 +136,36 @@ static int journal_write_commit_record(journal_t *journal, JBUFFER_TRACE(descriptor, "write commit block"); set_buffer_dirty(bh); + if (journal->j_flags & JFS_BARRIER) { - set_buffer_ordered(bh); - barrier_done = 1; - } - ret = sync_dirty_buffer(bh); - if (barrier_done) - clear_buffer_ordered(bh); - /* is it possible for another commit to fail at roughly - * the same time as this one? If so, we don't want to - * trust the barrier flag in the super, but instead want - * to remember if we sent a barrier request - */ - if (ret == -EOPNOTSUPP && barrier_done) { - char b[BDEVNAME_SIZE]; + ret = __sync_dirty_buffer(bh, WRITE_SYNC | WRITE_BARRIER); - printk(KERN_WARNING - "JBD: barrier-based sync failed on %s - " - "disabling barriers\n", - bdevname(journal->j_dev, b)); - spin_lock(&journal->j_state_lock); - journal->j_flags &= ~JFS_BARRIER; - spin_unlock(&journal->j_state_lock); + /* + * Is it possible for another commit to fail at roughly + * the same time as this one? If so, we don't want to + * trust the barrier flag in the super, but instead want + * to remember if we sent a barrier request + */ + if (ret == -EOPNOTSUPP) { + char b[BDEVNAME_SIZE]; - /* And try again, without the barrier */ - set_buffer_uptodate(bh); - set_buffer_dirty(bh); + printk(KERN_WARNING + "JBD: barrier-based sync failed on %s - " + "disabling barriers\n", + bdevname(journal->j_dev, b)); + spin_lock(&journal->j_state_lock); + journal->j_flags &= ~JFS_BARRIER; + spin_unlock(&journal->j_state_lock); + + /* And try again, without the barrier */ + set_buffer_uptodate(bh); + set_buffer_dirty(bh); + ret = sync_dirty_buffer(bh); + } + } else { ret = sync_dirty_buffer(bh); } + put_bh(bh); /* One for getblk() */ journal_put_journal_head(descriptor); diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index f19ce94693d8..2c4b1f109da9 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c @@ -1024,7 +1024,7 @@ void journal_update_superblock(journal_t *journal, int wait) if (wait) sync_dirty_buffer(bh); else - ll_rw_block(SWRITE, 1, &bh); + write_dirty_buffer(bh, WRITE); out: /* If we have just flushed the log (by marking s_start==0), then diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c index ad717328343a..d29018307e2e 100644 --- a/fs/jbd/revoke.c +++ b/fs/jbd/revoke.c @@ -617,7 +617,7 @@ static void flush_descriptor(journal_t *journal, set_buffer_jwrite(bh); BUFFER_TRACE(bh, "write"); set_buffer_dirty(bh); - ll_rw_block((write_op == WRITE) ? SWRITE : SWRITE_SYNC_PLUG, 1, &bh); + write_dirty_buffer(bh, write_op); } #endif diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 1c23a0f4e8a3..5247e7ffdcb4 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -255,7 +255,9 @@ __flush_batch(journal_t *journal, int *batch_count) { int i; - ll_rw_block(SWRITE, *batch_count, journal->j_chkpt_bhs); + for (i = 0; i < *batch_count; i++) + write_dirty_buffer(journal->j_chkpt_bhs[i], WRITE); + for (i = 0; i < *batch_count; i++) { struct buffer_head *bh = journal->j_chkpt_bhs[i]; clear_buffer_jwrite(bh); diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index f52e5e8049f1..7c068c189d80 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -101,7 +101,6 @@ static int journal_submit_commit_record(journal_t *journal, struct commit_header *tmp; struct buffer_head *bh; int ret; - int barrier_done = 0; struct timespec now = current_kernel_time(); if (is_journal_aborted(journal)) @@ -136,30 +135,22 @@ static int journal_submit_commit_record(journal_t *journal, if (journal->j_flags & JBD2_BARRIER && !JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { - set_buffer_ordered(bh); - barrier_done = 1; - } - ret = submit_bh(WRITE_SYNC_PLUG, bh); - if (barrier_done) - clear_buffer_ordered(bh); - - /* is it possible for another commit to fail at roughly - * the same time as this one? If so, we don't want to - * trust the barrier flag in the super, but instead want - * to remember if we sent a barrier request - */ - if (ret == -EOPNOTSUPP && barrier_done) { - printk(KERN_WARNING - "JBD2: Disabling barriers on %s, " - "not supported by device\n", journal->j_devname); - write_lock(&journal->j_state_lock); - journal->j_flags &= ~JBD2_BARRIER; - write_unlock(&journal->j_state_lock); + ret = submit_bh(WRITE_SYNC_PLUG | WRITE_BARRIER, bh); + if (ret == -EOPNOTSUPP) { + printk(KERN_WARNING + "JBD2: Disabling barriers on %s, " + "not supported by device\n", journal->j_devname); + write_lock(&journal->j_state_lock); + journal->j_flags &= ~JBD2_BARRIER; + write_unlock(&journal->j_state_lock); - /* And try again, without the barrier */ - lock_buffer(bh); - set_buffer_uptodate(bh); - clear_buffer_dirty(bh); + /* And try again, without the barrier */ + lock_buffer(bh); + set_buffer_uptodate(bh); + clear_buffer_dirty(bh); + ret = submit_bh(WRITE_SYNC_PLUG, bh); + } + } else { ret = submit_bh(WRITE_SYNC_PLUG, bh); } *cbh = bh; diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index ad5866aaf0f9..0e8014ea6b94 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1124,7 +1124,7 @@ void jbd2_journal_update_superblock(journal_t *journal, int wait) set_buffer_uptodate(bh); } } else - ll_rw_block(SWRITE, 1, &bh); + write_dirty_buffer(bh, WRITE); out: /* If we have just flushed the log (by marking s_start==0), then diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index a360b06af2e3..9ad321fd63fd 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c @@ -625,7 +625,7 @@ static void flush_descriptor(journal_t *journal, set_buffer_jwrite(bh); BUFFER_TRACE(bh, "write"); set_buffer_dirty(bh); - ll_rw_block((write_op == WRITE) ? SWRITE : SWRITE_SYNC_PLUG, 1, &bh); + write_dirty_buffer(bh, write_op); } #endif diff --git a/fs/mbcache.c b/fs/mbcache.c index cf4e6cdfd15b..93444747237b 100644 --- a/fs/mbcache.c +++ b/fs/mbcache.c @@ -80,6 +80,7 @@ struct mb_cache { struct list_head c_cache_list; const char *c_name; atomic_t c_entry_count; + int c_max_entries; int c_bucket_bits; struct kmem_cache *c_entry_cache; struct list_head *c_block_hash; @@ -243,6 +244,12 @@ mb_cache_create(const char *name, int bucket_bits) if (!cache->c_entry_cache) goto fail2; + /* + * Set an upper limit on the number of cache entries so that the hash + * chains won't grow too long. + */ + cache->c_max_entries = bucket_count << 4; + spin_lock(&mb_cache_spinlock); list_add(&cache->c_cache_list, &mb_cache_list); spin_unlock(&mb_cache_spinlock); @@ -333,7 +340,6 @@ mb_cache_destroy(struct mb_cache *cache) kfree(cache); } - /* * mb_cache_entry_alloc() * @@ -345,17 +351,29 @@ mb_cache_destroy(struct mb_cache *cache) struct mb_cache_entry * mb_cache_entry_alloc(struct mb_cache *cache, gfp_t gfp_flags) { - struct mb_cache_entry *ce; - - ce = kmem_cache_alloc(cache->c_entry_cache, gfp_flags); - if (ce) { + struct mb_cache_entry *ce = NULL; + + if (atomic_read(&cache->c_entry_count) >= cache->c_max_entries) { + spin_lock(&mb_cache_spinlock); + if (!list_empty(&mb_cache_lru_list)) { + ce = list_entry(mb_cache_lru_list.next, + struct mb_cache_entry, e_lru_list); + list_del_init(&ce->e_lru_list); + __mb_cache_entry_unhash(ce); + } + spin_unlock(&mb_cache_spinlock); + } + if (!ce) { + ce = kmem_cache_alloc(cache->c_entry_cache, gfp_flags); + if (!ce) + return NULL; atomic_inc(&cache->c_entry_count); INIT_LIST_HEAD(&ce->e_lru_list); INIT_LIST_HEAD(&ce->e_block_list); ce->e_cache = cache; - ce->e_used = 1 + MB_CACHE_WRITER; ce->e_queued = 0; } + ce->e_used = 1 + MB_CACHE_WRITER; return ce; } diff --git a/fs/namei.c b/fs/namei.c index 17ea76bf2fbe..24896e833565 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -595,15 +595,16 @@ int follow_up(struct path *path) { struct vfsmount *parent; struct dentry *mountpoint; - spin_lock(&vfsmount_lock); + + br_read_lock(vfsmount_lock); parent = path->mnt->mnt_parent; if (parent == path->mnt) { - spin_unlock(&vfsmount_lock); + br_read_unlock(vfsmount_lock); return 0; } mntget(parent); mountpoint = dget(path->mnt->mnt_mountpoint); - spin_unlock(&vfsmount_lock); + br_read_unlock(vfsmount_lock); dput(path->dentry); path->dentry = mountpoint; mntput(path->mnt); @@ -686,6 +687,35 @@ static __always_inline void follow_dotdot(struct nameidata *nd) } /* + * Allocate a dentry with name and parent, and perform a parent + * directory ->lookup on it. Returns the new dentry, or ERR_PTR + * on error. parent->d_inode->i_mutex must be held. d_lookup must + * have verified that no child exists while under i_mutex. + */ +static struct dentry *d_alloc_and_lookup(struct dentry *parent, + struct qstr *name, struct nameidata *nd) +{ + struct inode *inode = parent->d_inode; + struct dentry *dentry; + struct dentry *old; + + /* Don't create child dentry for a dead directory. */ + if (unlikely(IS_DEADDIR(inode))) + return ERR_PTR(-ENOENT); + + dentry = d_alloc(parent, name); + if (unlikely(!dentry)) + return ERR_PTR(-ENOMEM); + + old = inode->i_op->lookup(inode, dentry, nd); + if (unlikely(old)) { + dput(dentry); + dentry = old; + } + return dentry; +} + +/* * It's more convoluted than I'd like it to be, but... it's still fairly * small and for now I'd prefer to have fast path as straight as possible. * It _is_ time-critical. @@ -706,9 +736,15 @@ static int do_lookup(struct nameidata *nd, struct qstr *name, return err; } + /* + * Rename seqlock is not required here because in the off chance + * of a false negative due to a concurrent rename, we're going to + * do the non-racy lookup, below. + */ dentry = __d_lookup(nd->path.dentry, name); if (!dentry) goto need_lookup; +found: if (dentry->d_op && dentry->d_op->d_revalidate) goto need_revalidate; done: @@ -724,56 +760,28 @@ need_lookup: mutex_lock(&dir->i_mutex); /* * First re-do the cached lookup just in case it was created - * while we waited for the directory semaphore.. + * while we waited for the directory semaphore, or the first + * lookup failed due to an unrelated rename. * - * FIXME! This could use version numbering or similar to - * avoid unnecessary cache lookups. - * - * The "dcache_lock" is purely to protect the RCU list walker - * from concurrent renames at this point (we mustn't get false - * negatives from the RCU list walk here, unlike the optimistic - * fast walk). - * - * so doing d_lookup() (with seqlock), instead of lockfree __d_lookup + * This could use version numbering or similar to avoid unnecessary + * cache lookups, but then we'd have to do the first lookup in the + * non-racy way. However in the common case here, everything should + * be hot in cache, so would it be a big win? */ dentry = d_lookup(parent, name); - if (!dentry) { - struct dentry *new; - - /* Don't create child dentry for a dead directory. */ - dentry = ERR_PTR(-ENOENT); - if (IS_DEADDIR(dir)) - goto out_unlock; - - new = d_alloc(parent, name); - dentry = ERR_PTR(-ENOMEM); - if (new) { - dentry = dir->i_op->lookup(dir, new, nd); - if (dentry) - dput(new); - else - dentry = new; - } -out_unlock: + if (likely(!dentry)) { + dentry = d_alloc_and_lookup(parent, name, nd); mutex_unlock(&dir->i_mutex); if (IS_ERR(dentry)) goto fail; goto done; } - /* * Uhhuh! Nasty case: the cache was re-populated while * we waited on the semaphore. Need to revalidate. */ mutex_unlock(&dir->i_mutex); - if (dentry->d_op && dentry->d_op->d_revalidate) { - dentry = do_revalidate(dentry, nd); - if (!dentry) - dentry = ERR_PTR(-ENOENT); - } - if (IS_ERR(dentry)) - goto fail; - goto done; + goto found; need_revalidate: dentry = do_revalidate(dentry, nd); @@ -1130,35 +1138,18 @@ static struct dentry *__lookup_hash(struct qstr *name, goto out; } - dentry = __d_lookup(base, name); - - /* lockess __d_lookup may fail due to concurrent d_move() - * in some unrelated directory, so try with d_lookup + /* + * Don't bother with __d_lookup: callers are for creat as + * well as unlink, so a lot of the time it would cost + * a double lookup. */ - if (!dentry) - dentry = d_lookup(base, name); + dentry = d_lookup(base, name); if (dentry && dentry->d_op && dentry->d_op->d_revalidate) dentry = do_revalidate(dentry, nd); - if (!dentry) { - struct dentry *new; - - /* Don't create child dentry for a dead directory. */ - dentry = ERR_PTR(-ENOENT); - if (IS_DEADDIR(inode)) - goto out; - - new = d_alloc(base, name); - dentry = ERR_PTR(-ENOMEM); - if (!new) - goto out; - dentry = inode->i_op->lookup(inode, new, nd); - if (!dentry) - dentry = new; - else - dput(new); - } + if (!dentry) + dentry = d_alloc_and_lookup(base, name, nd); out: return dentry; } diff --git a/fs/namespace.c b/fs/namespace.c index 2e10cb19c5b0..de402eb6eafb 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -11,6 +11,8 @@ #include <linux/syscalls.h> #include <linux/slab.h> #include <linux/sched.h> +#include <linux/spinlock.h> +#include <linux/percpu.h> #include <linux/smp_lock.h> #include <linux/init.h> #include <linux/kernel.h> @@ -38,12 +40,10 @@ #define HASH_SHIFT ilog2(PAGE_SIZE / sizeof(struct list_head)) #define HASH_SIZE (1UL << HASH_SHIFT) -/* spinlock for vfsmount related operations, inplace of dcache_lock */ -__cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock); - static int event; static DEFINE_IDA(mnt_id_ida); static DEFINE_IDA(mnt_group_ida); +static DEFINE_SPINLOCK(mnt_id_lock); static int mnt_id_start = 0; static int mnt_group_start = 1; @@ -55,6 +55,16 @@ static struct rw_semaphore namespace_sem; struct kobject *fs_kobj; EXPORT_SYMBOL_GPL(fs_kobj); +/* + * vfsmount lock may be taken for read to prevent changes to the + * vfsmount hash, ie. during mountpoint lookups or walking back + * up the tree. + * + * It should be taken for write in all cases where the vfsmount + * tree or hash is modified or when a vfsmount structure is modified. + */ +DEFINE_BRLOCK(vfsmount_lock); + static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry) { unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES); @@ -65,18 +75,21 @@ static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry) #define MNT_WRITER_UNDERFLOW_LIMIT -(1<<16) -/* allocation is serialized by namespace_sem */ +/* + * allocation is serialized by namespace_sem, but we need the spinlock to + * serialize with freeing. + */ static int mnt_alloc_id(struct vfsmount *mnt) { int res; retry: ida_pre_get(&mnt_id_ida, GFP_KERNEL); - spin_lock(&vfsmount_lock); + spin_lock(&mnt_id_lock); res = ida_get_new_above(&mnt_id_ida, mnt_id_start, &mnt->mnt_id); if (!res) mnt_id_start = mnt->mnt_id + 1; - spin_unlock(&vfsmount_lock); + spin_unlock(&mnt_id_lock); if (res == -EAGAIN) goto retry; @@ -86,11 +99,11 @@ retry: static void mnt_free_id(struct vfsmount *mnt) { int id = mnt->mnt_id; - spin_lock(&vfsmount_lock); + spin_lock(&mnt_id_lock); ida_remove(&mnt_id_ida, id); if (mnt_id_start > id) mnt_id_start = id; - spin_unlock(&vfsmount_lock); + spin_unlock(&mnt_id_lock); } /* @@ -348,7 +361,7 @@ static int mnt_make_readonly(struct vfsmount *mnt) { int ret = 0; - spin_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); mnt->mnt_flags |= MNT_WRITE_HOLD; /* * After storing MNT_WRITE_HOLD, we'll read the counters. This store @@ -382,15 +395,15 @@ static int mnt_make_readonly(struct vfsmount *mnt) */ smp_wmb(); mnt->mnt_flags &= ~MNT_WRITE_HOLD; - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); return ret; } static void __mnt_unmake_readonly(struct vfsmount *mnt) { - spin_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); mnt->mnt_flags &= ~MNT_READONLY; - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); } void simple_set_mnt(struct vfsmount *mnt, struct super_block *sb) @@ -414,6 +427,7 @@ void free_vfsmnt(struct vfsmount *mnt) /* * find the first or last mount at @dentry on vfsmount @mnt depending on * @dir. If @dir is set return the first mount else return the last mount. + * vfsmount_lock must be held for read or write. */ struct vfsmount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry, int dir) @@ -443,10 +457,11 @@ struct vfsmount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry, struct vfsmount *lookup_mnt(struct path *path) { struct vfsmount *child_mnt; - spin_lock(&vfsmount_lock); + + br_read_lock(vfsmount_lock); if ((child_mnt = __lookup_mnt(path->mnt, path->dentry, 1))) mntget(child_mnt); - spin_unlock(&vfsmount_lock); + br_read_unlock(vfsmount_lock); return child_mnt; } @@ -455,6 +470,9 @@ static inline int check_mnt(struct vfsmount *mnt) return mnt->mnt_ns == current->nsproxy->mnt_ns; } +/* + * vfsmount lock must be held for write + */ static void touch_mnt_namespace(struct mnt_namespace *ns) { if (ns) { @@ -463,6 +481,9 @@ static void touch_mnt_namespace(struct mnt_namespace *ns) } } +/* + * vfsmount lock must be held for write + */ static void __touch_mnt_namespace(struct mnt_namespace *ns) { if (ns && ns->event != event) { @@ -471,6 +492,9 @@ static void __touch_mnt_namespace(struct mnt_namespace *ns) } } +/* + * vfsmount lock must be held for write + */ static void detach_mnt(struct vfsmount *mnt, struct path *old_path) { old_path->dentry = mnt->mnt_mountpoint; @@ -482,6 +506,9 @@ static void detach_mnt(struct vfsmount *mnt, struct path *old_path) old_path->dentry->d_mounted--; } +/* + * vfsmount lock must be held for write + */ void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry, struct vfsmount *child_mnt) { @@ -490,6 +517,9 @@ void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry, dentry->d_mounted++; } +/* + * vfsmount lock must be held for write + */ static void attach_mnt(struct vfsmount *mnt, struct path *path) { mnt_set_mountpoint(path->mnt, path->dentry, mnt); @@ -499,7 +529,7 @@ static void attach_mnt(struct vfsmount *mnt, struct path *path) } /* - * the caller must hold vfsmount_lock + * vfsmount lock must be held for write */ static void commit_tree(struct vfsmount *mnt) { @@ -623,39 +653,43 @@ static inline void __mntput(struct vfsmount *mnt) void mntput_no_expire(struct vfsmount *mnt) { repeat: - if (atomic_dec_and_lock(&mnt->mnt_count, &vfsmount_lock)) { - if (likely(!mnt->mnt_pinned)) { - spin_unlock(&vfsmount_lock); - __mntput(mnt); - return; - } - atomic_add(mnt->mnt_pinned + 1, &mnt->mnt_count); - mnt->mnt_pinned = 0; - spin_unlock(&vfsmount_lock); - acct_auto_close_mnt(mnt); - goto repeat; + if (atomic_add_unless(&mnt->mnt_count, -1, 1)) + return; + br_write_lock(vfsmount_lock); + if (!atomic_dec_and_test(&mnt->mnt_count)) { + br_write_unlock(vfsmount_lock); + return; + } + if (likely(!mnt->mnt_pinned)) { + br_write_unlock(vfsmount_lock); + __mntput(mnt); + return; } + atomic_add(mnt->mnt_pinned + 1, &mnt->mnt_count); + mnt->mnt_pinned = 0; + br_write_unlock(vfsmount_lock); + acct_auto_close_mnt(mnt); + goto repeat; } - EXPORT_SYMBOL(mntput_no_expire); void mnt_pin(struct vfsmount *mnt) { - spin_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); mnt->mnt_pinned++; - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); } EXPORT_SYMBOL(mnt_pin); void mnt_unpin(struct vfsmount *mnt) { - spin_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); if (mnt->mnt_pinned) { atomic_inc(&mnt->mnt_count); mnt->mnt_pinned--; } - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); } EXPORT_SYMBOL(mnt_unpin); @@ -746,12 +780,12 @@ int mnt_had_events(struct proc_mounts *p) struct mnt_namespace *ns = p->ns; int res = 0; - spin_lock(&vfsmount_lock); + br_read_lock(vfsmount_lock); if (p->event != ns->event) { p->event = ns->event; res = 1; } - spin_unlock(&vfsmount_lock); + br_read_unlock(vfsmount_lock); return res; } @@ -952,12 +986,12 @@ int may_umount_tree(struct vfsmount *mnt) int minimum_refs = 0; struct vfsmount *p; - spin_lock(&vfsmount_lock); + br_read_lock(vfsmount_lock); for (p = mnt; p; p = next_mnt(p, mnt)) { actual_refs += atomic_read(&p->mnt_count); minimum_refs += 2; } - spin_unlock(&vfsmount_lock); + br_read_unlock(vfsmount_lock); if (actual_refs > minimum_refs) return 0; @@ -984,10 +1018,10 @@ int may_umount(struct vfsmount *mnt) { int ret = 1; down_read(&namespace_sem); - spin_lock(&vfsmount_lock); + br_read_lock(vfsmount_lock); if (propagate_mount_busy(mnt, 2)) ret = 0; - spin_unlock(&vfsmount_lock); + br_read_unlock(vfsmount_lock); up_read(&namespace_sem); return ret; } @@ -1003,13 +1037,14 @@ void release_mounts(struct list_head *head) if (mnt->mnt_parent != mnt) { struct dentry *dentry; struct vfsmount *m; - spin_lock(&vfsmount_lock); + + br_write_lock(vfsmount_lock); dentry = mnt->mnt_mountpoint; m = mnt->mnt_parent; mnt->mnt_mountpoint = mnt->mnt_root; mnt->mnt_parent = mnt; m->mnt_ghosts--; - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); dput(dentry); mntput(m); } @@ -1017,6 +1052,10 @@ void release_mounts(struct list_head *head) } } +/* + * vfsmount lock must be held for write + * namespace_sem must be held for write + */ void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill) { struct vfsmount *p; @@ -1107,7 +1146,7 @@ static int do_umount(struct vfsmount *mnt, int flags) } down_write(&namespace_sem); - spin_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); event++; if (!(flags & MNT_DETACH)) @@ -1119,7 +1158,7 @@ static int do_umount(struct vfsmount *mnt, int flags) umount_tree(mnt, 1, &umount_list); retval = 0; } - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); up_write(&namespace_sem); release_mounts(&umount_list); return retval; @@ -1231,19 +1270,19 @@ struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry, q = clone_mnt(p, p->mnt_root, flag); if (!q) goto Enomem; - spin_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); list_add_tail(&q->mnt_list, &res->mnt_list); attach_mnt(q, &path); - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); } } return res; Enomem: if (res) { LIST_HEAD(umount_list); - spin_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); umount_tree(res, 0, &umount_list); - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); release_mounts(&umount_list); } return NULL; @@ -1262,9 +1301,9 @@ void drop_collected_mounts(struct vfsmount *mnt) { LIST_HEAD(umount_list); down_write(&namespace_sem); - spin_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); umount_tree(mnt, 0, &umount_list); - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); up_write(&namespace_sem); release_mounts(&umount_list); } @@ -1392,7 +1431,7 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt, if (err) goto out_cleanup_ids; - spin_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); if (IS_MNT_SHARED(dest_mnt)) { for (p = source_mnt; p; p = next_mnt(p, source_mnt)) @@ -1411,7 +1450,8 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt, list_del_init(&child->mnt_hash); commit_tree(child); } - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); + return 0; out_cleanup_ids: @@ -1466,10 +1506,10 @@ static int do_change_type(struct path *path, int flag) goto out_unlock; } - spin_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL)) change_mnt_propagation(m, type); - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); out_unlock: up_write(&namespace_sem); @@ -1513,9 +1553,10 @@ static int do_loopback(struct path *path, char *old_name, err = graft_tree(mnt, path); if (err) { LIST_HEAD(umount_list); - spin_lock(&vfsmount_lock); + + br_write_lock(vfsmount_lock); umount_tree(mnt, 0, &umount_list); - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); release_mounts(&umount_list); } @@ -1568,16 +1609,16 @@ static int do_remount(struct path *path, int flags, int mnt_flags, else err = do_remount_sb(sb, flags, data, 0); if (!err) { - spin_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); mnt_flags |= path->mnt->mnt_flags & MNT_PROPAGATION_MASK; path->mnt->mnt_flags = mnt_flags; - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); } up_write(&sb->s_umount); if (!err) { - spin_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); touch_mnt_namespace(path->mnt->mnt_ns); - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); } return err; } @@ -1754,7 +1795,7 @@ void mark_mounts_for_expiry(struct list_head *mounts) return; down_write(&namespace_sem); - spin_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); /* extract from the expiration list every vfsmount that matches the * following criteria: @@ -1773,7 +1814,7 @@ void mark_mounts_for_expiry(struct list_head *mounts) touch_mnt_namespace(mnt->mnt_ns); umount_tree(mnt, 1, &umounts); } - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); up_write(&namespace_sem); release_mounts(&umounts); @@ -1830,6 +1871,8 @@ resume: /* * process a list of expirable mountpoints with the intent of discarding any * submounts of a specific parent mountpoint + * + * vfsmount_lock must be held for write */ static void shrink_submounts(struct vfsmount *mnt, struct list_head *umounts) { @@ -2048,9 +2091,9 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, kfree(new_ns); return ERR_PTR(-ENOMEM); } - spin_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); list_add_tail(&new_ns->list, &new_ns->root->mnt_list); - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); /* * Second pass: switch the tsk->fs->* elements and mark new vfsmounts @@ -2244,7 +2287,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, goto out2; /* not attached */ /* make sure we can reach put_old from new_root */ tmp = old.mnt; - spin_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); if (tmp != new.mnt) { for (;;) { if (tmp->mnt_parent == tmp) @@ -2264,7 +2307,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, /* mount new_root on / */ attach_mnt(new.mnt, &root_parent); touch_mnt_namespace(current->nsproxy->mnt_ns); - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); chroot_fs_refs(&root, &new); error = 0; path_put(&root_parent); @@ -2279,7 +2322,7 @@ out1: out0: return error; out3: - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); goto out2; } @@ -2326,6 +2369,8 @@ void __init mnt_init(void) for (u = 0; u < HASH_SIZE; u++) INIT_LIST_HEAD(&mount_hashtable[u]); + br_lock_init(vfsmount_lock); + err = sysfs_init(); if (err) printk(KERN_WARNING "%s: sysfs_init error: %d\n", @@ -2344,9 +2389,9 @@ void put_mnt_ns(struct mnt_namespace *ns) if (!atomic_dec_and_test(&ns->count)) return; down_write(&namespace_sem); - spin_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); umount_tree(ns->root, 0, &umount_list); - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); up_write(&namespace_sem); release_mounts(&umount_list); kfree(ns); diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 26a510a7be09..6c2aad49d731 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -63,7 +63,6 @@ config NFS_V3_ACL config NFS_V4 bool "NFS client support for NFS version 4" depends on NFS_FS - select RPCSEC_GSS_KRB5 help This option enables support for version 4 of the NFS protocol (RFC 3530) in the kernel's NFS client. diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 29539ceeb745..e257172d438c 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -140,6 +140,13 @@ nfs_opendir(struct inode *inode, struct file *filp) /* Call generic open code in order to cache credentials */ res = nfs_open(inode, filp); + if (filp->f_path.dentry == filp->f_path.mnt->mnt_root) { + /* This is a mountpoint, so d_revalidate will never + * have been called, so we need to refresh the + * inode (for close-open consistency) ourselves. + */ + __nfs_revalidate_inode(NFS_SERVER(inode), inode); + } return res; } @@ -1103,7 +1110,7 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd) if ((openflags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL)) goto no_open_dput; /* We can't create new files, or truncate existing ones here */ - openflags &= ~(O_CREAT|O_TRUNC); + openflags &= ~(O_CREAT|O_EXCL|O_TRUNC); /* * Note: we're not holding inode->i_mutex and so may be racing with diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 2d141a74ae82..eb51bd6201da 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -323,7 +323,7 @@ nfs_file_fsync(struct file *file, int datasync) have_error |= test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); if (have_error) ret = xchg(&ctx->error, 0); - if (!ret) + if (!ret && status < 0) ret = status; return ret; } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 7ffbb98ddec3..089da5b5d20a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2036,7 +2036,8 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) struct rpc_cred *cred; struct nfs4_state *state; struct dentry *res; - fmode_t fmode = nd->intent.open.flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC); + int open_flags = nd->intent.open.flags; + fmode_t fmode = open_flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC); if (nd->flags & LOOKUP_CREATE) { attr.ia_mode = nd->intent.open.create_mode; @@ -2044,8 +2045,9 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) if (!IS_POSIXACL(dir)) attr.ia_mode &= ~current_umask(); } else { + open_flags &= ~O_EXCL; attr.ia_valid = 0; - BUG_ON(nd->intent.open.flags & O_CREAT); + BUG_ON(open_flags & O_CREAT); } cred = rpc_lookup_cred(); @@ -2054,7 +2056,7 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) parent = dentry->d_parent; /* Protect against concurrent sillydeletes */ nfs_block_sillyrename(parent); - state = nfs4_do_open(dir, &path, fmode, nd->intent.open.flags, &attr, cred); + state = nfs4_do_open(dir, &path, fmode, open_flags, &attr, cred); put_rpccred(cred); if (IS_ERR(state)) { if (PTR_ERR(state) == -ENOENT) { @@ -2273,8 +2275,7 @@ static int nfs4_get_referral(struct inode *dir, const struct qstr *name, struct out: if (page) __free_page(page); - if (locations) - kfree(locations); + kfree(locations); return status; } diff --git a/fs/nfs/super.c b/fs/nfs/super.c index ee26316ad1f4..ec3966e4706b 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -655,6 +655,13 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, if (nfss->options & NFS_OPTION_FSCACHE) seq_printf(m, ",fsc"); + + if (nfss->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG) { + if (nfss->flags & NFS_MOUNT_LOOKUP_CACHE_NONE) + seq_printf(m, ",lookupcache=none"); + else + seq_printf(m, ",lookupcache=pos"); + } } /* diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index 503b9da159a3..95932f523aef 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig @@ -69,7 +69,6 @@ config NFSD_V4 depends on NFSD && PROC_FS && EXPERIMENTAL select NFSD_V3 select FS_POSIX_ACL - select RPCSEC_GSS_KRB5 help This option enables support in your system's NFS server for version 4 of the NFS protocol (RFC 3530). diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 1fa86b9df73b..922263393c76 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -175,24 +175,24 @@ static int nilfs_sync_super(struct nilfs_sb_info *sbi, int flag) { struct the_nilfs *nilfs = sbi->s_nilfs; int err; - int barrier_done = 0; - if (nilfs_test_opt(sbi, BARRIER)) { - set_buffer_ordered(nilfs->ns_sbh[0]); - barrier_done = 1; - } retry: set_buffer_dirty(nilfs->ns_sbh[0]); - err = sync_dirty_buffer(nilfs->ns_sbh[0]); - if (err == -EOPNOTSUPP && barrier_done) { - nilfs_warning(sbi->s_super, __func__, - "barrier-based sync failed. " - "disabling barriers\n"); - nilfs_clear_opt(sbi, BARRIER); - barrier_done = 0; - clear_buffer_ordered(nilfs->ns_sbh[0]); - goto retry; + + if (nilfs_test_opt(sbi, BARRIER)) { + err = __sync_dirty_buffer(nilfs->ns_sbh[0], + WRITE_SYNC | WRITE_BARRIER); + if (err == -EOPNOTSUPP) { + nilfs_warning(sbi->s_super, __func__, + "barrier-based sync failed. " + "disabling barriers\n"); + nilfs_clear_opt(sbi, BARRIER); + goto retry; + } + } else { + err = sync_dirty_buffer(nilfs->ns_sbh[0]); } + if (unlikely(err)) { printk(KERN_ERR "NILFS: unable to write superblock (err=%d)\n", err); @@ -400,9 +400,10 @@ int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno) list_add(&sbi->s_list, &nilfs->ns_supers); up_write(&nilfs->ns_super_sem); + err = -ENOMEM; sbi->s_ifile = nilfs_ifile_new(sbi, nilfs->ns_inode_size); if (!sbi->s_ifile) - return -ENOMEM; + goto delist; down_read(&nilfs->ns_segctor_sem); err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, cno, 0, &raw_cp, @@ -433,6 +434,7 @@ int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno) nilfs_mdt_destroy(sbi->s_ifile); sbi->s_ifile = NULL; + delist: down_write(&nilfs->ns_super_sem); list_del_init(&sbi->s_list); up_write(&nilfs->ns_super_sem); diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index 37de1f062d81..4317f177ea7c 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -608,11 +608,11 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs, return -EINVAL; } - if (swp) { + if (!valid[!swp]) printk(KERN_WARNING "NILFS warning: broken superblock. " "using spare superblock.\n"); + if (swp) nilfs_swap_super_block(nilfs); - } nilfs->ns_sbwcount = 0; nilfs->ns_sbwtime = le64_to_cpu(sbp[0]->s_wtime); @@ -775,6 +775,7 @@ int nilfs_discard_segments(struct the_nilfs *nilfs, __u64 *segnump, start * sects_per_block, nblocks * sects_per_block, GFP_NOFS, + BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER); if (ret < 0) return ret; @@ -785,7 +786,8 @@ int nilfs_discard_segments(struct the_nilfs *nilfs, __u64 *segnump, ret = blkdev_issue_discard(nilfs->ns_bdev, start * sects_per_block, nblocks * sects_per_block, - GFP_NOFS, BLKDEV_IFL_BARRIER); + GFP_NOFS, + BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER); return ret; } diff --git a/fs/open.c b/fs/open.c index 630715f9f73d..d74e1983e8dc 100644 --- a/fs/open.c +++ b/fs/open.c @@ -675,7 +675,7 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, f->f_path.mnt = mnt; f->f_pos = 0; f->f_op = fops_get(inode->i_fop); - file_move(f, &inode->i_sb->s_files); + file_sb_list_add(f, inode->i_sb); error = security_dentry_open(f, cred); if (error) @@ -721,7 +721,7 @@ cleanup_all: mnt_drop_write(mnt); } } - file_kill(f); + file_sb_list_del(f); f->f_path.dentry = NULL; f->f_path.mnt = NULL; cleanup_file: diff --git a/fs/pnode.c b/fs/pnode.c index 5cc564a83149..8066b8dd748f 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -126,6 +126,9 @@ static int do_make_slave(struct vfsmount *mnt) return 0; } +/* + * vfsmount lock must be held for write + */ void change_mnt_propagation(struct vfsmount *mnt, int type) { if (type == MS_SHARED) { @@ -270,12 +273,12 @@ int propagate_mnt(struct vfsmount *dest_mnt, struct dentry *dest_dentry, prev_src_mnt = child; } out: - spin_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); while (!list_empty(&tmp_list)) { child = list_first_entry(&tmp_list, struct vfsmount, mnt_hash); umount_tree(child, 0, &umount_list); } - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); release_mounts(&umount_list); return ret; } @@ -296,6 +299,8 @@ static inline int do_refcount_check(struct vfsmount *mnt, int count) * other mounts its parent propagates to. * Check if any of these mounts that **do not have submounts** * have more references than 'refcnt'. If so return busy. + * + * vfsmount lock must be held for read or write */ int propagate_mount_busy(struct vfsmount *mnt, int refcnt) { @@ -353,6 +358,8 @@ static void __propagate_umount(struct vfsmount *mnt) * collect all mounts that receive propagation from the mount in @list, * and return these additional mounts in the same list. * @list: the list of mounts to be unmounted. + * + * vfsmount lock must be held for write */ int propagate_umount(struct list_head *list) { diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index ae35413dcbe1..caa758377d66 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -83,6 +83,7 @@ void reiserfs_evict_inode(struct inode *inode) dquot_drop(inode); inode->i_blocks = 0; reiserfs_write_unlock_once(inode->i_sb, depth); + return; no_delete: end_writeback(inode); diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index 1ec952b1f036..812e2c05aa29 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -2311,7 +2311,7 @@ static int journal_read_transaction(struct super_block *sb, /* flush out the real blocks */ for (i = 0; i < get_desc_trans_len(desc); i++) { set_buffer_dirty(real_blocks[i]); - ll_rw_block(SWRITE, 1, real_blocks + i); + write_dirty_buffer(real_blocks[i], WRITE); } for (i = 0; i < get_desc_trans_len(desc); i++) { wait_on_buffer(real_blocks[i]); diff --git a/fs/super.c b/fs/super.c index 9674ab2c8718..8819e3a7ff20 100644 --- a/fs/super.c +++ b/fs/super.c @@ -54,7 +54,22 @@ static struct super_block *alloc_super(struct file_system_type *type) s = NULL; goto out; } +#ifdef CONFIG_SMP + s->s_files = alloc_percpu(struct list_head); + if (!s->s_files) { + security_sb_free(s); + kfree(s); + s = NULL; + goto out; + } else { + int i; + + for_each_possible_cpu(i) + INIT_LIST_HEAD(per_cpu_ptr(s->s_files, i)); + } +#else INIT_LIST_HEAD(&s->s_files); +#endif INIT_LIST_HEAD(&s->s_instances); INIT_HLIST_HEAD(&s->s_anon); INIT_LIST_HEAD(&s->s_inodes); @@ -108,6 +123,9 @@ out: */ static inline void destroy_super(struct super_block *s) { +#ifdef CONFIG_SMP + free_percpu(s->s_files); +#endif security_sb_free(s); kfree(s->s_subtype); kfree(s->s_options); diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c index 048484fb10d2..46f7a807bbc1 100644 --- a/fs/ufs/balloc.c +++ b/fs/ufs/balloc.c @@ -114,10 +114,8 @@ void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count) ubh_mark_buffer_dirty (USPI_UBH(uspi)); ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); - if (sb->s_flags & MS_SYNCHRONOUS) { - ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); - ubh_wait_on_buffer (UCPI_UBH(ucpi)); - } + if (sb->s_flags & MS_SYNCHRONOUS) + ubh_sync_block(UCPI_UBH(ucpi)); sb->s_dirt = 1; unlock_super (sb); @@ -207,10 +205,8 @@ do_more: ubh_mark_buffer_dirty (USPI_UBH(uspi)); ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); - if (sb->s_flags & MS_SYNCHRONOUS) { - ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); - ubh_wait_on_buffer (UCPI_UBH(ucpi)); - } + if (sb->s_flags & MS_SYNCHRONOUS) + ubh_sync_block(UCPI_UBH(ucpi)); if (overflow) { fragment += count; @@ -558,10 +554,8 @@ static u64 ufs_add_fragments(struct inode *inode, u64 fragment, ubh_mark_buffer_dirty (USPI_UBH(uspi)); ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); - if (sb->s_flags & MS_SYNCHRONOUS) { - ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); - ubh_wait_on_buffer (UCPI_UBH(ucpi)); - } + if (sb->s_flags & MS_SYNCHRONOUS) + ubh_sync_block(UCPI_UBH(ucpi)); sb->s_dirt = 1; UFSD("EXIT, fragment %llu\n", (unsigned long long)fragment); @@ -680,10 +674,8 @@ cg_found: succed: ubh_mark_buffer_dirty (USPI_UBH(uspi)); ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); - if (sb->s_flags & MS_SYNCHRONOUS) { - ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); - ubh_wait_on_buffer (UCPI_UBH(ucpi)); - } + if (sb->s_flags & MS_SYNCHRONOUS) + ubh_sync_block(UCPI_UBH(ucpi)); sb->s_dirt = 1; result += cgno * uspi->s_fpg; diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c index 428017e018fe..2eabf04af3de 100644 --- a/fs/ufs/ialloc.c +++ b/fs/ufs/ialloc.c @@ -113,10 +113,8 @@ void ufs_free_inode (struct inode * inode) ubh_mark_buffer_dirty (USPI_UBH(uspi)); ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); - if (sb->s_flags & MS_SYNCHRONOUS) { - ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); - ubh_wait_on_buffer (UCPI_UBH(ucpi)); - } + if (sb->s_flags & MS_SYNCHRONOUS) + ubh_sync_block(UCPI_UBH(ucpi)); sb->s_dirt = 1; unlock_super (sb); @@ -156,10 +154,8 @@ static void ufs2_init_inodes_chunk(struct super_block *sb, fs32_add(sb, &ucg->cg_u.cg_u2.cg_initediblk, uspi->s_inopb); ubh_mark_buffer_dirty(UCPI_UBH(ucpi)); - if (sb->s_flags & MS_SYNCHRONOUS) { - ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); - ubh_wait_on_buffer(UCPI_UBH(ucpi)); - } + if (sb->s_flags & MS_SYNCHRONOUS) + ubh_sync_block(UCPI_UBH(ucpi)); UFSD("EXIT\n"); } @@ -290,10 +286,8 @@ cg_found: } ubh_mark_buffer_dirty (USPI_UBH(uspi)); ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); - if (sb->s_flags & MS_SYNCHRONOUS) { - ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); - ubh_wait_on_buffer (UCPI_UBH(ucpi)); - } + if (sb->s_flags & MS_SYNCHRONOUS) + ubh_sync_block(UCPI_UBH(ucpi)); sb->s_dirt = 1; inode->i_ino = cg * uspi->s_ipg + bit; diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c index 34d5cb135320..a58f9155fc9a 100644 --- a/fs/ufs/truncate.c +++ b/fs/ufs/truncate.c @@ -243,10 +243,8 @@ static int ufs_trunc_indirect(struct inode *inode, u64 offset, void *p) ubh_bforget(ind_ubh); ind_ubh = NULL; } - if (IS_SYNC(inode) && ind_ubh && ubh_buffer_dirty(ind_ubh)) { - ubh_ll_rw_block(SWRITE, ind_ubh); - ubh_wait_on_buffer (ind_ubh); - } + if (IS_SYNC(inode) && ind_ubh && ubh_buffer_dirty(ind_ubh)) + ubh_sync_block(ind_ubh); ubh_brelse (ind_ubh); UFSD("EXIT: ino %lu\n", inode->i_ino); @@ -307,10 +305,8 @@ static int ufs_trunc_dindirect(struct inode *inode, u64 offset, void *p) ubh_bforget(dind_bh); dind_bh = NULL; } - if (IS_SYNC(inode) && dind_bh && ubh_buffer_dirty(dind_bh)) { - ubh_ll_rw_block(SWRITE, dind_bh); - ubh_wait_on_buffer (dind_bh); - } + if (IS_SYNC(inode) && dind_bh && ubh_buffer_dirty(dind_bh)) + ubh_sync_block(dind_bh); ubh_brelse (dind_bh); UFSD("EXIT: ino %lu\n", inode->i_ino); @@ -367,10 +363,8 @@ static int ufs_trunc_tindirect(struct inode *inode) ubh_bforget(tind_bh); tind_bh = NULL; } - if (IS_SYNC(inode) && tind_bh && ubh_buffer_dirty(tind_bh)) { - ubh_ll_rw_block(SWRITE, tind_bh); - ubh_wait_on_buffer (tind_bh); - } + if (IS_SYNC(inode) && tind_bh && ubh_buffer_dirty(tind_bh)) + ubh_sync_block(tind_bh); ubh_brelse (tind_bh); UFSD("EXIT: ino %lu\n", inode->i_ino); diff --git a/fs/ufs/util.c b/fs/ufs/util.c index 85a7fc9e4a4e..d2c36d53fe66 100644 --- a/fs/ufs/util.c +++ b/fs/ufs/util.c @@ -113,21 +113,17 @@ void ubh_mark_buffer_uptodate (struct ufs_buffer_head * ubh, int flag) } } -void ubh_ll_rw_block(int rw, struct ufs_buffer_head *ubh) +void ubh_sync_block(struct ufs_buffer_head *ubh) { - if (!ubh) - return; + if (ubh) { + unsigned i; - ll_rw_block(rw, ubh->count, ubh->bh); -} + for (i = 0; i < ubh->count; i++) + write_dirty_buffer(ubh->bh[i], WRITE); -void ubh_wait_on_buffer (struct ufs_buffer_head * ubh) -{ - unsigned i; - if (!ubh) - return; - for ( i = 0; i < ubh->count; i++ ) - wait_on_buffer (ubh->bh[i]); + for (i = 0; i < ubh->count; i++) + wait_on_buffer(ubh->bh[i]); + } } void ubh_bforget (struct ufs_buffer_head * ubh) diff --git a/fs/ufs/util.h b/fs/ufs/util.h index 0466036912f1..9f8775ce381c 100644 --- a/fs/ufs/util.h +++ b/fs/ufs/util.h @@ -269,8 +269,7 @@ extern void ubh_brelse (struct ufs_buffer_head *); extern void ubh_brelse_uspi (struct ufs_sb_private_info *); extern void ubh_mark_buffer_dirty (struct ufs_buffer_head *); extern void ubh_mark_buffer_uptodate (struct ufs_buffer_head *, int); -extern void ubh_ll_rw_block(int, struct ufs_buffer_head *); -extern void ubh_wait_on_buffer (struct ufs_buffer_head *); +extern void ubh_sync_block(struct ufs_buffer_head *); extern void ubh_bforget (struct ufs_buffer_head *); extern int ubh_buffer_dirty (struct ufs_buffer_head *); #define ubh_ubhcpymem(mem,ubh,size) _ubh_ubhcpymem_(uspi,mem,ubh,size) diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index 15412fe15c3a..b552f816de15 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -852,8 +852,8 @@ xfs_convert_page( SetPageUptodate(page); if (count) { - wbc->nr_to_write--; - if (wbc->nr_to_write <= 0) + if (--wbc->nr_to_write <= 0 && + wbc->sync_mode == WB_SYNC_NONE) done = 1; } xfs_start_page_writeback(page, !page_dirty, count); @@ -1068,7 +1068,7 @@ xfs_vm_writepage( * by themselves. */ if ((current->flags & (PF_MEMALLOC|PF_KSWAPD)) == PF_MEMALLOC) - goto out_fail; + goto redirty; /* * We need a transaction if there are delalloc or unwritten buffers @@ -1080,7 +1080,7 @@ xfs_vm_writepage( */ xfs_count_page_state(page, &delalloc, &unwritten); if ((current->flags & PF_FSTRANS) && (delalloc || unwritten)) - goto out_fail; + goto redirty; /* Is this page beyond the end of the file? */ offset = i_size_read(inode); @@ -1245,12 +1245,15 @@ error: if (iohead) xfs_cancel_ioend(iohead); + if (err == -EAGAIN) + goto redirty; + xfs_aops_discard_page(page); ClearPageUptodate(page); unlock_page(page); return err; -out_fail: +redirty: redirty_page_for_writepage(wbc, page); unlock_page(page); return 0; diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 15c35b62ff14..a4e07974955b 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -1226,6 +1226,7 @@ xfs_fs_statfs( struct xfs_inode *ip = XFS_I(dentry->d_inode); __uint64_t fakeinos, id; xfs_extlen_t lsize; + __int64_t ffree; statp->f_type = XFS_SB_MAGIC; statp->f_namelen = MAXNAMELEN - 1; @@ -1249,7 +1250,11 @@ xfs_fs_statfs( statp->f_files = min_t(typeof(statp->f_files), statp->f_files, mp->m_maxicount); - statp->f_ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree); + + /* make sure statp->f_ffree does not underflow */ + ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree); + statp->f_ffree = max_t(__int64_t, ffree, 0); + spin_unlock(&mp->m_sb_lock); if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) || @@ -1402,7 +1407,7 @@ xfs_fs_freeze( xfs_save_resvblks(mp); xfs_quiesce_attr(mp); - return -xfs_fs_log_dummy(mp); + return -xfs_fs_log_dummy(mp, SYNC_WAIT); } STATIC int diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index dfcbd98d1599..d59c4a65d492 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -34,6 +34,7 @@ #include "xfs_inode_item.h" #include "xfs_quota.h" #include "xfs_trace.h" +#include "xfs_fsops.h" #include <linux/kthread.h> #include <linux/freezer.h> @@ -341,38 +342,6 @@ xfs_sync_attr( } STATIC int -xfs_commit_dummy_trans( - struct xfs_mount *mp, - uint flags) -{ - struct xfs_inode *ip = mp->m_rootip; - struct xfs_trans *tp; - int error; - - /* - * Put a dummy transaction in the log to tell recovery - * that all others are OK. - */ - tp = xfs_trans_alloc(mp, XFS_TRANS_DUMMY1); - error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0); - if (error) { - xfs_trans_cancel(tp, 0); - return error; - } - - xfs_ilock(ip, XFS_ILOCK_EXCL); - - xfs_trans_ijoin(tp, ip); - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - error = xfs_trans_commit(tp, 0); - xfs_iunlock(ip, XFS_ILOCK_EXCL); - - /* the log force ensures this transaction is pushed to disk */ - xfs_log_force(mp, (flags & SYNC_WAIT) ? XFS_LOG_SYNC : 0); - return error; -} - -STATIC int xfs_sync_fsdata( struct xfs_mount *mp) { @@ -432,7 +401,7 @@ xfs_quiesce_data( /* mark the log as covered if needed */ if (xfs_log_need_covered(mp)) - error2 = xfs_commit_dummy_trans(mp, SYNC_WAIT); + error2 = xfs_fs_log_dummy(mp, SYNC_WAIT); /* flush data-only devices */ if (mp->m_rtdev_targp) @@ -563,7 +532,7 @@ xfs_flush_inodes( /* * Every sync period we need to unpin all items, reclaim inodes and sync * disk quotas. We might need to cover the log to indicate that the - * filesystem is idle. + * filesystem is idle and not frozen. */ STATIC void xfs_sync_worker( @@ -577,8 +546,9 @@ xfs_sync_worker( xfs_reclaim_inodes(mp, 0); /* dgc: errors ignored here */ error = xfs_qm_sync(mp, SYNC_TRYLOCK); - if (xfs_log_need_covered(mp)) - error = xfs_commit_dummy_trans(mp, 0); + if (mp->m_super->s_frozen == SB_UNFROZEN && + xfs_log_need_covered(mp)) + error = xfs_fs_log_dummy(mp, 0); } mp->m_sync_seq++; wake_up(&mp->m_wait_single_sync_task); diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index dbca5f5c37ba..43b1d5699335 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -604,31 +604,36 @@ out: return 0; } +/* + * Dump a transaction into the log that contains no real change. This is needed + * to be able to make the log dirty or stamp the current tail LSN into the log + * during the covering operation. + * + * We cannot use an inode here for this - that will push dirty state back up + * into the VFS and then periodic inode flushing will prevent log covering from + * making progress. Hence we log a field in the superblock instead. + */ int xfs_fs_log_dummy( - xfs_mount_t *mp) + xfs_mount_t *mp, + int flags) { xfs_trans_t *tp; - xfs_inode_t *ip; int error; tp = _xfs_trans_alloc(mp, XFS_TRANS_DUMMY1, KM_SLEEP); - error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0); + error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, + XFS_DEFAULT_LOG_COUNT); if (error) { xfs_trans_cancel(tp, 0); return error; } - ip = mp->m_rootip; - xfs_ilock(ip, XFS_ILOCK_EXCL); - - xfs_trans_ijoin(tp, ip); - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - xfs_trans_set_sync(tp); - error = xfs_trans_commit(tp, 0); - - xfs_iunlock(ip, XFS_ILOCK_EXCL); - return error; + /* log the UUID because it is an unchanging field */ + xfs_mod_sb(tp, XFS_SB_UUID); + if (flags & SYNC_WAIT) + xfs_trans_set_sync(tp); + return xfs_trans_commit(tp, 0); } int diff --git a/fs/xfs/xfs_fsops.h b/fs/xfs/xfs_fsops.h index 88435e0a77c9..a786c5212c1e 100644 --- a/fs/xfs/xfs_fsops.h +++ b/fs/xfs/xfs_fsops.h @@ -25,6 +25,6 @@ extern int xfs_fs_counts(xfs_mount_t *mp, xfs_fsop_counts_t *cnt); extern int xfs_reserve_blocks(xfs_mount_t *mp, __uint64_t *inval, xfs_fsop_resblks_t *outval); extern int xfs_fs_goingdown(xfs_mount_t *mp, __uint32_t inflags); -extern int xfs_fs_log_dummy(xfs_mount_t *mp); +extern int xfs_fs_log_dummy(xfs_mount_t *mp, int flags); #endif /* __XFS_FSOPS_H__ */ diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index abf80ae1e95b..5371d2dc360e 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -1213,7 +1213,6 @@ xfs_imap_lookup( struct xfs_inobt_rec_incore rec; struct xfs_btree_cur *cur; struct xfs_buf *agbp; - xfs_agino_t startino; int error; int i; @@ -1227,13 +1226,13 @@ xfs_imap_lookup( } /* - * derive and lookup the exact inode record for the given agino. If the - * record cannot be found, then it's an invalid inode number and we - * should abort. + * Lookup the inode record for the given agino. If the record cannot be + * found, then it's an invalid inode number and we should abort. Once + * we have a record, we need to ensure it contains the inode number + * we are looking up. */ cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); - startino = agino & ~(XFS_IALLOC_INODES(mp) - 1); - error = xfs_inobt_lookup(cur, startino, XFS_LOOKUP_EQ, &i); + error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i); if (!error) { if (i) error = xfs_inobt_get_rec(cur, &rec, &i); @@ -1246,6 +1245,11 @@ xfs_imap_lookup( if (error) return error; + /* check that the returned record contains the required inode */ + if (rec.ir_startino > agino || + rec.ir_startino + XFS_IALLOC_INODES(mp) <= agino) + return EINVAL; + /* for untrusted inodes check it is allocated first */ if ((flags & XFS_IGET_UNTRUSTED) && (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino))) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 68415cb4f23c..34798f391c49 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1914,6 +1914,11 @@ xfs_iunlink_remove( return 0; } +/* + * A big issue when freeing the inode cluster is is that we _cannot_ skip any + * inodes that are in memory - they all must be marked stale and attached to + * the cluster buffer. + */ STATIC void xfs_ifree_cluster( xfs_inode_t *free_ip, @@ -1945,8 +1950,6 @@ xfs_ifree_cluster( } for (j = 0; j < nbufs; j++, inum += ninodes) { - int found = 0; - blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum), XFS_INO_TO_AGBNO(mp, inum)); @@ -1965,7 +1968,9 @@ xfs_ifree_cluster( /* * Walk the inodes already attached to the buffer and mark them * stale. These will all have the flush locks held, so an - * in-memory inode walk can't lock them. + * in-memory inode walk can't lock them. By marking them all + * stale first, we will not attempt to lock them in the loop + * below as the XFS_ISTALE flag will be set. */ lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); while (lip) { @@ -1977,11 +1982,11 @@ xfs_ifree_cluster( &iip->ili_flush_lsn, &iip->ili_item.li_lsn); xfs_iflags_set(iip->ili_inode, XFS_ISTALE); - found++; } lip = lip->li_bio_list; } + /* * For each inode in memory attempt to add it to the inode * buffer and set it up for being staled on buffer IO @@ -1993,6 +1998,7 @@ xfs_ifree_cluster( * even trying to lock them. */ for (i = 0; i < ninodes; i++) { +retry: read_lock(&pag->pag_ici_lock); ip = radix_tree_lookup(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, (inum + i))); @@ -2003,38 +2009,36 @@ xfs_ifree_cluster( continue; } - /* don't try to lock/unlock the current inode */ + /* + * Don't try to lock/unlock the current inode, but we + * _cannot_ skip the other inodes that we did not find + * in the list attached to the buffer and are not + * already marked stale. If we can't lock it, back off + * and retry. + */ if (ip != free_ip && !xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { read_unlock(&pag->pag_ici_lock); - continue; + delay(1); + goto retry; } read_unlock(&pag->pag_ici_lock); - if (!xfs_iflock_nowait(ip)) { - if (ip != free_ip) - xfs_iunlock(ip, XFS_ILOCK_EXCL); - continue; - } - + xfs_iflock(ip); xfs_iflags_set(ip, XFS_ISTALE); - if (xfs_inode_clean(ip)) { - ASSERT(ip != free_ip); - xfs_ifunlock(ip); - xfs_iunlock(ip, XFS_ILOCK_EXCL); - continue; - } + /* + * we don't need to attach clean inodes or those only + * with unlogged changes (which we throw away, anyway). + */ iip = ip->i_itemp; - if (!iip) { - /* inode with unlogged changes only */ + if (!iip || xfs_inode_clean(ip)) { ASSERT(ip != free_ip); ip->i_update_core = 0; xfs_ifunlock(ip); xfs_iunlock(ip, XFS_ILOCK_EXCL); continue; } - found++; iip->ili_last_fields = iip->ili_format.ilf_fields; iip->ili_format.ilf_fields = 0; @@ -2049,8 +2053,7 @@ xfs_ifree_cluster( xfs_iunlock(ip, XFS_ILOCK_EXCL); } - if (found) - xfs_trans_stale_inode_buf(tp, bp); + xfs_trans_stale_inode_buf(tp, bp); xfs_trans_binval(tp, bp); } diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 925d572bf0f4..33f718f92a48 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -3015,7 +3015,8 @@ _xfs_log_force( XFS_STATS_INC(xs_log_force); - xlog_cil_push(log, 1); + if (log->l_cilp) + xlog_cil_force(log); spin_lock(&log->l_icloglock); @@ -3167,7 +3168,7 @@ _xfs_log_force_lsn( XFS_STATS_INC(xs_log_force); if (log->l_cilp) { - lsn = xlog_cil_push_lsn(log, lsn); + lsn = xlog_cil_force_lsn(log, lsn); if (lsn == NULLCOMMITLSN) return 0; } @@ -3724,7 +3725,7 @@ xfs_log_force_umount( * call below. */ if (!logerror && (mp->m_flags & XFS_MOUNT_DELAYLOG)) - xlog_cil_push(log, 1); + xlog_cil_force(log); /* * We must hold both the GRANT lock and the LOG lock, diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index 31e4ea2d19ac..ed575fb4b495 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -68,6 +68,7 @@ xlog_cil_init( ctx->sequence = 1; ctx->cil = cil; cil->xc_ctx = ctx; + cil->xc_current_sequence = ctx->sequence; cil->xc_log = log; log->l_cilp = cil; @@ -269,15 +270,10 @@ xlog_cil_insert( static void xlog_cil_format_items( struct log *log, - struct xfs_log_vec *log_vector, - struct xlog_ticket *ticket, - xfs_lsn_t *start_lsn) + struct xfs_log_vec *log_vector) { struct xfs_log_vec *lv; - if (start_lsn) - *start_lsn = log->l_cilp->xc_ctx->sequence; - ASSERT(log_vector); for (lv = log_vector; lv; lv = lv->lv_next) { void *ptr; @@ -301,9 +297,24 @@ xlog_cil_format_items( ptr += vec->i_len; } ASSERT(ptr == lv->lv_buf + lv->lv_buf_len); + } +} + +static void +xlog_cil_insert_items( + struct log *log, + struct xfs_log_vec *log_vector, + struct xlog_ticket *ticket, + xfs_lsn_t *start_lsn) +{ + struct xfs_log_vec *lv; + + if (start_lsn) + *start_lsn = log->l_cilp->xc_ctx->sequence; + ASSERT(log_vector); + for (lv = log_vector; lv; lv = lv->lv_next) xlog_cil_insert(log, ticket, lv->lv_item, lv); - } } static void @@ -321,80 +332,6 @@ xlog_cil_free_logvec( } /* - * Commit a transaction with the given vector to the Committed Item List. - * - * To do this, we need to format the item, pin it in memory if required and - * account for the space used by the transaction. Once we have done that we - * need to release the unused reservation for the transaction, attach the - * transaction to the checkpoint context so we carry the busy extents through - * to checkpoint completion, and then unlock all the items in the transaction. - * - * For more specific information about the order of operations in - * xfs_log_commit_cil() please refer to the comments in - * xfs_trans_commit_iclog(). - * - * Called with the context lock already held in read mode to lock out - * background commit, returns without it held once background commits are - * allowed again. - */ -int -xfs_log_commit_cil( - struct xfs_mount *mp, - struct xfs_trans *tp, - struct xfs_log_vec *log_vector, - xfs_lsn_t *commit_lsn, - int flags) -{ - struct log *log = mp->m_log; - int log_flags = 0; - int push = 0; - - if (flags & XFS_TRANS_RELEASE_LOG_RES) - log_flags = XFS_LOG_REL_PERM_RESERV; - - if (XLOG_FORCED_SHUTDOWN(log)) { - xlog_cil_free_logvec(log_vector); - return XFS_ERROR(EIO); - } - - /* lock out background commit */ - down_read(&log->l_cilp->xc_ctx_lock); - xlog_cil_format_items(log, log_vector, tp->t_ticket, commit_lsn); - - /* check we didn't blow the reservation */ - if (tp->t_ticket->t_curr_res < 0) - xlog_print_tic_res(log->l_mp, tp->t_ticket); - - /* attach the transaction to the CIL if it has any busy extents */ - if (!list_empty(&tp->t_busy)) { - spin_lock(&log->l_cilp->xc_cil_lock); - list_splice_init(&tp->t_busy, - &log->l_cilp->xc_ctx->busy_extents); - spin_unlock(&log->l_cilp->xc_cil_lock); - } - - tp->t_commit_lsn = *commit_lsn; - xfs_log_done(mp, tp->t_ticket, NULL, log_flags); - xfs_trans_unreserve_and_mod_sb(tp); - - /* check for background commit before unlock */ - if (log->l_cilp->xc_ctx->space_used > XLOG_CIL_SPACE_LIMIT(log)) - push = 1; - up_read(&log->l_cilp->xc_ctx_lock); - - /* - * We need to push CIL every so often so we don't cache more than we - * can fit in the log. The limit really is that a checkpoint can't be - * more than half the log (the current checkpoint is not allowed to - * overwrite the previous checkpoint), but commit latency and memory - * usage limit this to a smaller size in most cases. - */ - if (push) - xlog_cil_push(log, 0); - return 0; -} - -/* * Mark all items committed and clear busy extents. We free the log vector * chains in a separate pass so that we unpin the log items as quickly as * possible. @@ -427,13 +364,23 @@ xlog_cil_committed( } /* - * Push the Committed Item List to the log. If the push_now flag is not set, - * then it is a background flush and so we can chose to ignore it. + * Push the Committed Item List to the log. If @push_seq flag is zero, then it + * is a background flush and so we can chose to ignore it. Otherwise, if the + * current sequence is the same as @push_seq we need to do a flush. If + * @push_seq is less than the current sequence, then it has already been + * flushed and we don't need to do anything - the caller will wait for it to + * complete if necessary. + * + * @push_seq is a value rather than a flag because that allows us to do an + * unlocked check of the sequence number for a match. Hence we can allows log + * forces to run racily and not issue pushes for the same sequence twice. If we + * get a race between multiple pushes for the same sequence they will block on + * the first one and then abort, hence avoiding needless pushes. */ -int +STATIC int xlog_cil_push( struct log *log, - int push_now) + xfs_lsn_t push_seq) { struct xfs_cil *cil = log->l_cilp; struct xfs_log_vec *lv; @@ -453,12 +400,14 @@ xlog_cil_push( if (!cil) return 0; + ASSERT(!push_seq || push_seq <= cil->xc_ctx->sequence); + new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_SLEEP|KM_NOFS); new_ctx->ticket = xlog_cil_ticket_alloc(log); /* lock out transaction commit, but don't block on background push */ if (!down_write_trylock(&cil->xc_ctx_lock)) { - if (!push_now) + if (!push_seq) goto out_free_ticket; down_write(&cil->xc_ctx_lock); } @@ -469,7 +418,11 @@ xlog_cil_push( goto out_skip; /* check for spurious background flush */ - if (!push_now && cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log)) + if (!push_seq && cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log)) + goto out_skip; + + /* check for a previously pushed seqeunce */ + if (push_seq < cil->xc_ctx->sequence) goto out_skip; /* @@ -515,6 +468,13 @@ xlog_cil_push( cil->xc_ctx = new_ctx; /* + * mirror the new sequence into the cil structure so that we can do + * unlocked checks against the current sequence in log forces without + * risking deferencing a freed context pointer. + */ + cil->xc_current_sequence = new_ctx->sequence; + + /* * The switch is now done, so we can drop the context lock and move out * of a shared context. We can't just go straight to the commit record, * though - we need to synchronise with previous and future commits so @@ -626,6 +586,102 @@ out_abort: } /* + * Commit a transaction with the given vector to the Committed Item List. + * + * To do this, we need to format the item, pin it in memory if required and + * account for the space used by the transaction. Once we have done that we + * need to release the unused reservation for the transaction, attach the + * transaction to the checkpoint context so we carry the busy extents through + * to checkpoint completion, and then unlock all the items in the transaction. + * + * For more specific information about the order of operations in + * xfs_log_commit_cil() please refer to the comments in + * xfs_trans_commit_iclog(). + * + * Called with the context lock already held in read mode to lock out + * background commit, returns without it held once background commits are + * allowed again. + */ +int +xfs_log_commit_cil( + struct xfs_mount *mp, + struct xfs_trans *tp, + struct xfs_log_vec *log_vector, + xfs_lsn_t *commit_lsn, + int flags) +{ + struct log *log = mp->m_log; + int log_flags = 0; + int push = 0; + + if (flags & XFS_TRANS_RELEASE_LOG_RES) + log_flags = XFS_LOG_REL_PERM_RESERV; + + if (XLOG_FORCED_SHUTDOWN(log)) { + xlog_cil_free_logvec(log_vector); + return XFS_ERROR(EIO); + } + + /* + * do all the hard work of formatting items (including memory + * allocation) outside the CIL context lock. This prevents stalling CIL + * pushes when we are low on memory and a transaction commit spends a + * lot of time in memory reclaim. + */ + xlog_cil_format_items(log, log_vector); + + /* lock out background commit */ + down_read(&log->l_cilp->xc_ctx_lock); + xlog_cil_insert_items(log, log_vector, tp->t_ticket, commit_lsn); + + /* check we didn't blow the reservation */ + if (tp->t_ticket->t_curr_res < 0) + xlog_print_tic_res(log->l_mp, tp->t_ticket); + + /* attach the transaction to the CIL if it has any busy extents */ + if (!list_empty(&tp->t_busy)) { + spin_lock(&log->l_cilp->xc_cil_lock); + list_splice_init(&tp->t_busy, + &log->l_cilp->xc_ctx->busy_extents); + spin_unlock(&log->l_cilp->xc_cil_lock); + } + + tp->t_commit_lsn = *commit_lsn; + xfs_log_done(mp, tp->t_ticket, NULL, log_flags); + xfs_trans_unreserve_and_mod_sb(tp); + + /* + * Once all the items of the transaction have been copied to the CIL, + * the items can be unlocked and freed. + * + * This needs to be done before we drop the CIL context lock because we + * have to update state in the log items and unlock them before they go + * to disk. If we don't, then the CIL checkpoint can race with us and + * we can run checkpoint completion before we've updated and unlocked + * the log items. This affects (at least) processing of stale buffers, + * inodes and EFIs. + */ + xfs_trans_free_items(tp, *commit_lsn, 0); + + /* check for background commit before unlock */ + if (log->l_cilp->xc_ctx->space_used > XLOG_CIL_SPACE_LIMIT(log)) + push = 1; + + up_read(&log->l_cilp->xc_ctx_lock); + + /* + * We need to push CIL every so often so we don't cache more than we + * can fit in the log. The limit really is that a checkpoint can't be + * more than half the log (the current checkpoint is not allowed to + * overwrite the previous checkpoint), but commit latency and memory + * usage limit this to a smaller size in most cases. + */ + if (push) + xlog_cil_push(log, 0); + return 0; +} + +/* * Conditionally push the CIL based on the sequence passed in. * * We only need to push if we haven't already pushed the sequence @@ -639,39 +695,34 @@ out_abort: * commit lsn is there. It'll be empty, so this is broken for now. */ xfs_lsn_t -xlog_cil_push_lsn( +xlog_cil_force_lsn( struct log *log, - xfs_lsn_t push_seq) + xfs_lsn_t sequence) { struct xfs_cil *cil = log->l_cilp; struct xfs_cil_ctx *ctx; xfs_lsn_t commit_lsn = NULLCOMMITLSN; -restart: - down_write(&cil->xc_ctx_lock); - ASSERT(push_seq <= cil->xc_ctx->sequence); - - /* check to see if we need to force out the current context */ - if (push_seq == cil->xc_ctx->sequence) { - up_write(&cil->xc_ctx_lock); - xlog_cil_push(log, 1); - goto restart; - } + ASSERT(sequence <= cil->xc_current_sequence); + + /* + * check to see if we need to force out the current context. + * xlog_cil_push() handles racing pushes for the same sequence, + * so no need to deal with it here. + */ + if (sequence == cil->xc_current_sequence) + xlog_cil_push(log, sequence); /* * See if we can find a previous sequence still committing. - * We can drop the flush lock as soon as we have the cil lock - * because we are now only comparing contexts protected by - * the cil lock. - * * We need to wait for all previous sequence commits to complete * before allowing the force of push_seq to go ahead. Hence block * on commits for those as well. */ +restart: spin_lock(&cil->xc_cil_lock); - up_write(&cil->xc_ctx_lock); list_for_each_entry(ctx, &cil->xc_committing, committing) { - if (ctx->sequence > push_seq) + if (ctx->sequence > sequence) continue; if (!ctx->commit_lsn) { /* @@ -681,7 +732,7 @@ restart: sv_wait(&cil->xc_commit_wait, 0, &cil->xc_cil_lock, 0); goto restart; } - if (ctx->sequence != push_seq) + if (ctx->sequence != sequence) continue; /* found it! */ commit_lsn = ctx->commit_lsn; diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 8c072618965c..ced52b98b322 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -422,6 +422,7 @@ struct xfs_cil { struct rw_semaphore xc_ctx_lock; struct list_head xc_committing; sv_t xc_commit_wait; + xfs_lsn_t xc_current_sequence; }; /* @@ -562,8 +563,16 @@ int xlog_cil_init(struct log *log); void xlog_cil_init_post_recovery(struct log *log); void xlog_cil_destroy(struct log *log); -int xlog_cil_push(struct log *log, int push_now); -xfs_lsn_t xlog_cil_push_lsn(struct log *log, xfs_lsn_t push_sequence); +/* + * CIL force routines + */ +xfs_lsn_t xlog_cil_force_lsn(struct log *log, xfs_lsn_t sequence); + +static inline void +xlog_cil_force(struct log *log) +{ + xlog_cil_force_lsn(log, log->l_cilp->xc_current_sequence); +} /* * Unmount record type is used as a pseudo transaction type for the ticket. diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index fdca7416c754..1c47edaea0d2 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -1167,7 +1167,7 @@ xfs_trans_del_item( * Unlock all of the items of a transaction and free all the descriptors * of that transaction. */ -STATIC void +void xfs_trans_free_items( struct xfs_trans *tp, xfs_lsn_t commit_lsn, @@ -1653,9 +1653,6 @@ xfs_trans_commit_cil( return error; current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); - - /* xfs_trans_free_items() unlocks them first */ - xfs_trans_free_items(tp, *commit_lsn, 0); xfs_trans_free(tp); return 0; } diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h index e2d93d8ead7b..62da86c90de5 100644 --- a/fs/xfs/xfs_trans_priv.h +++ b/fs/xfs/xfs_trans_priv.h @@ -25,7 +25,8 @@ struct xfs_trans; void xfs_trans_add_item(struct xfs_trans *, struct xfs_log_item *); void xfs_trans_del_item(struct xfs_log_item *); - +void xfs_trans_free_items(struct xfs_trans *tp, xfs_lsn_t commit_lsn, + int flags); void xfs_trans_item_committed(struct xfs_log_item *lip, xfs_lsn_t commit_lsn, int aborted); void xfs_trans_unreserve_and_mod_sb(struct xfs_trans *tp); |