summaryrefslogtreecommitdiffstats
path: root/fs/cifs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/cifs')
-rw-r--r--fs/cifs/Kconfig23
-rw-r--r--fs/cifs/Makefile2
-rw-r--r--fs/cifs/cifs_debug.c208
-rw-r--r--fs/cifs/cifsacl.c2
-rw-r--r--fs/cifs/cifsencrypt.c3
-rw-r--r--fs/cifs/cifsfs.c18
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/cifsglob.h35
-rw-r--r--fs/cifs/cifsproto.h4
-rw-r--r--fs/cifs/cifssmb.c26
-rw-r--r--fs/cifs/connect.c251
-rw-r--r--fs/cifs/file.c43
-rw-r--r--fs/cifs/inode.c2
-rw-r--r--fs/cifs/misc.c14
-rw-r--r--fs/cifs/smb1ops.c4
-rw-r--r--fs/cifs/smb2file.c2
-rw-r--r--fs/cifs/smb2misc.c2
-rw-r--r--fs/cifs/smb2ops.c77
-rw-r--r--fs/cifs/smb2pdu.c578
-rw-r--r--fs/cifs/smb2pdu.h174
-rw-r--r--fs/cifs/smb2proto.h3
-rw-r--r--fs/cifs/smbdirect.c2612
-rw-r--r--fs/cifs/smbdirect.h338
-rw-r--r--fs/cifs/transport.c69
24 files changed, 4032 insertions, 460 deletions
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index d5b2e12b5d02..687da62daf4e 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -108,14 +108,13 @@ config CIFS_XATTR
depends on CIFS
help
Extended attributes are name:value pairs associated with inodes by
- the kernel or by users (see the attr(5) manual page, or visit
- <http://acl.bestbits.at/> for details). CIFS maps the name of
- extended attributes beginning with the user namespace prefix
- to SMB/CIFS EAs. EAs are stored on Windows servers without the
- user namespace prefix, but their names are seen by Linux cifs clients
- prefaced by the user namespace prefix. The system namespace
- (used by some filesystems to store ACLs) is not supported at
- this time.
+ the kernel or by users (see the attr(5) manual page for details).
+ CIFS maps the name of extended attributes beginning with the user
+ namespace prefix to SMB/CIFS EAs. EAs are stored on Windows
+ servers without the user namespace prefix, but their names are
+ seen by Linux cifs clients prefaced by the user namespace prefix.
+ The system namespace (used by some filesystems to store ACLs) is
+ not supported at this time.
If unsure, say Y.
@@ -196,6 +195,14 @@ config CIFS_SMB311
This dialect includes improved security negotiation features.
If unsure, say N
+config CIFS_SMB_DIRECT
+ bool "SMB Direct support (Experimental)"
+ depends on CIFS=m && INFINIBAND || CIFS=y && INFINIBAND=y
+ help
+ Enables SMB Direct experimental support for SMB 3.0, 3.02 and 3.1.1.
+ SMB Direct allows transferring SMB packets over RDMA. If unsure,
+ say N.
+
config CIFS_FSCACHE
bool "Provide CIFS client caching support"
depends on CIFS=m && FSCACHE || CIFS=y && FSCACHE=y
diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile
index 7134f182720b..7e4a1e2f0696 100644
--- a/fs/cifs/Makefile
+++ b/fs/cifs/Makefile
@@ -19,3 +19,5 @@ cifs-$(CONFIG_CIFS_UPCALL) += cifs_spnego.o
cifs-$(CONFIG_CIFS_DFS_UPCALL) += dns_resolve.o cifs_dfs_ref.o
cifs-$(CONFIG_CIFS_FSCACHE) += fscache.o cache.o
+
+cifs-$(CONFIG_CIFS_SMB_DIRECT) += smbdirect.o
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index cbb9534b89b4..e35e711db68e 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -30,6 +30,9 @@
#include "cifsproto.h"
#include "cifs_debug.h"
#include "cifsfs.h"
+#ifdef CONFIG_CIFS_SMB_DIRECT
+#include "smbdirect.h"
+#endif
void
cifs_dump_mem(char *label, void *data, int length)
@@ -107,6 +110,36 @@ void cifs_dump_mids(struct TCP_Server_Info *server)
}
#ifdef CONFIG_PROC_FS
+static void cifs_debug_tcon(struct seq_file *m, struct cifs_tcon *tcon)
+{
+ __u32 dev_type = le32_to_cpu(tcon->fsDevInfo.DeviceType);
+
+ seq_printf(m, "%s Mounts: %d ", tcon->treeName, tcon->tc_count);
+ if (tcon->nativeFileSystem)
+ seq_printf(m, "Type: %s ", tcon->nativeFileSystem);
+ seq_printf(m, "DevInfo: 0x%x Attributes: 0x%x\n\tPathComponentMax: %d Status: %d",
+ le32_to_cpu(tcon->fsDevInfo.DeviceCharacteristics),
+ le32_to_cpu(tcon->fsAttrInfo.Attributes),
+ le32_to_cpu(tcon->fsAttrInfo.MaxPathNameComponentLength),
+ tcon->tidStatus);
+ if (dev_type == FILE_DEVICE_DISK)
+ seq_puts(m, " type: DISK ");
+ else if (dev_type == FILE_DEVICE_CD_ROM)
+ seq_puts(m, " type: CDROM ");
+ else
+ seq_printf(m, " type: %d ", dev_type);
+ if (tcon->seal)
+ seq_printf(m, " Encrypted");
+ if (tcon->unix_ext)
+ seq_printf(m, " POSIX Extensions");
+ if (tcon->ses->server->ops->dump_share_caps)
+ tcon->ses->server->ops->dump_share_caps(m, tcon);
+
+ if (tcon->need_reconnect)
+ seq_puts(m, "\tDISCONNECTED ");
+ seq_putc(m, '\n');
+}
+
static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
{
struct list_head *tmp1, *tmp2, *tmp3;
@@ -115,7 +148,6 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
struct cifs_ses *ses;
struct cifs_tcon *tcon;
int i, j;
- __u32 dev_type;
seq_puts(m,
"Display Internal CIFS Data Structures for Debugging\n"
@@ -152,7 +184,76 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
list_for_each(tmp1, &cifs_tcp_ses_list) {
server = list_entry(tmp1, struct TCP_Server_Info,
tcp_ses_list);
- seq_printf(m, "\nNumber of credits: %d", server->credits);
+
+#ifdef CONFIG_CIFS_SMB_DIRECT
+ if (!server->rdma)
+ goto skip_rdma;
+
+ seq_printf(m, "\nSMBDirect (in hex) protocol version: %x "
+ "transport status: %x",
+ server->smbd_conn->protocol,
+ server->smbd_conn->transport_status);
+ seq_printf(m, "\nConn receive_credit_max: %x "
+ "send_credit_target: %x max_send_size: %x",
+ server->smbd_conn->receive_credit_max,
+ server->smbd_conn->send_credit_target,
+ server->smbd_conn->max_send_size);
+ seq_printf(m, "\nConn max_fragmented_recv_size: %x "
+ "max_fragmented_send_size: %x max_receive_size:%x",
+ server->smbd_conn->max_fragmented_recv_size,
+ server->smbd_conn->max_fragmented_send_size,
+ server->smbd_conn->max_receive_size);
+ seq_printf(m, "\nConn keep_alive_interval: %x "
+ "max_readwrite_size: %x rdma_readwrite_threshold: %x",
+ server->smbd_conn->keep_alive_interval,
+ server->smbd_conn->max_readwrite_size,
+ server->smbd_conn->rdma_readwrite_threshold);
+ seq_printf(m, "\nDebug count_get_receive_buffer: %x "
+ "count_put_receive_buffer: %x count_send_empty: %x",
+ server->smbd_conn->count_get_receive_buffer,
+ server->smbd_conn->count_put_receive_buffer,
+ server->smbd_conn->count_send_empty);
+ seq_printf(m, "\nRead Queue count_reassembly_queue: %x "
+ "count_enqueue_reassembly_queue: %x "
+ "count_dequeue_reassembly_queue: %x "
+ "fragment_reassembly_remaining: %x "
+ "reassembly_data_length: %x "
+ "reassembly_queue_length: %x",
+ server->smbd_conn->count_reassembly_queue,
+ server->smbd_conn->count_enqueue_reassembly_queue,
+ server->smbd_conn->count_dequeue_reassembly_queue,
+ server->smbd_conn->fragment_reassembly_remaining,
+ server->smbd_conn->reassembly_data_length,
+ server->smbd_conn->reassembly_queue_length);
+ seq_printf(m, "\nCurrent Credits send_credits: %x "
+ "receive_credits: %x receive_credit_target: %x",
+ atomic_read(&server->smbd_conn->send_credits),
+ atomic_read(&server->smbd_conn->receive_credits),
+ server->smbd_conn->receive_credit_target);
+ seq_printf(m, "\nPending send_pending: %x send_payload_pending:"
+ " %x smbd_send_pending: %x smbd_recv_pending: %x",
+ atomic_read(&server->smbd_conn->send_pending),
+ atomic_read(&server->smbd_conn->send_payload_pending),
+ server->smbd_conn->smbd_send_pending,
+ server->smbd_conn->smbd_recv_pending);
+ seq_printf(m, "\nReceive buffers count_receive_queue: %x "
+ "count_empty_packet_queue: %x",
+ server->smbd_conn->count_receive_queue,
+ server->smbd_conn->count_empty_packet_queue);
+ seq_printf(m, "\nMR responder_resources: %x "
+ "max_frmr_depth: %x mr_type: %x",
+ server->smbd_conn->responder_resources,
+ server->smbd_conn->max_frmr_depth,
+ server->smbd_conn->mr_type);
+ seq_printf(m, "\nMR mr_ready_count: %x mr_used_count: %x",
+ atomic_read(&server->smbd_conn->mr_ready_count),
+ atomic_read(&server->smbd_conn->mr_used_count));
+skip_rdma:
+#endif
+ seq_printf(m, "\nNumber of credits: %d Dialect 0x%x",
+ server->credits, server->dialect);
+ if (server->sign)
+ seq_printf(m, " signed");
i++;
list_for_each(tmp2, &server->smb_ses_list) {
ses = list_entry(tmp2, struct cifs_ses,
@@ -176,6 +277,8 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
ses->ses_count, ses->serverOS, ses->serverNOS,
ses->capabilities, ses->status);
}
+ if (server->rdma)
+ seq_printf(m, "RDMA\n\t");
seq_printf(m, "TCP status: %d\n\tLocal Users To "
"Server: %d SecMode: 0x%x Req On Wire: %d",
server->tcpStatus, server->srv_count,
@@ -189,35 +292,19 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
seq_puts(m, "\n\tShares:");
j = 0;
+
+ seq_printf(m, "\n\t%d) IPC: ", j);
+ if (ses->tcon_ipc)
+ cifs_debug_tcon(m, ses->tcon_ipc);
+ else
+ seq_puts(m, "none\n");
+
list_for_each(tmp3, &ses->tcon_list) {
tcon = list_entry(tmp3, struct cifs_tcon,
tcon_list);
++j;
- dev_type = le32_to_cpu(tcon->fsDevInfo.DeviceType);
- seq_printf(m, "\n\t%d) %s Mounts: %d ", j,
- tcon->treeName, tcon->tc_count);
- if (tcon->nativeFileSystem) {
- seq_printf(m, "Type: %s ",
- tcon->nativeFileSystem);
- }
- seq_printf(m, "DevInfo: 0x%x Attributes: 0x%x"
- "\n\tPathComponentMax: %d Status: %d",
- le32_to_cpu(tcon->fsDevInfo.DeviceCharacteristics),
- le32_to_cpu(tcon->fsAttrInfo.Attributes),
- le32_to_cpu(tcon->fsAttrInfo.MaxPathNameComponentLength),
- tcon->tidStatus);
- if (dev_type == FILE_DEVICE_DISK)
- seq_puts(m, " type: DISK ");
- else if (dev_type == FILE_DEVICE_CD_ROM)
- seq_puts(m, " type: CDROM ");
- else
- seq_printf(m, " type: %d ", dev_type);
- if (server->ops->dump_share_caps)
- server->ops->dump_share_caps(m, tcon);
-
- if (tcon->need_reconnect)
- seq_puts(m, "\tDISCONNECTED ");
- seq_putc(m, '\n');
+ seq_printf(m, "\n\t%d) ", j);
+ cifs_debug_tcon(m, tcon);
}
seq_puts(m, "\n\tMIDs:\n");
@@ -374,6 +461,45 @@ static const struct file_operations cifs_stats_proc_fops = {
};
#endif /* STATS */
+#ifdef CONFIG_CIFS_SMB_DIRECT
+#define PROC_FILE_DEFINE(name) \
+static ssize_t name##_write(struct file *file, const char __user *buffer, \
+ size_t count, loff_t *ppos) \
+{ \
+ int rc; \
+ rc = kstrtoint_from_user(buffer, count, 10, & name); \
+ if (rc) \
+ return rc; \
+ return count; \
+} \
+static int name##_proc_show(struct seq_file *m, void *v) \
+{ \
+ seq_printf(m, "%d\n", name ); \
+ return 0; \
+} \
+static int name##_open(struct inode *inode, struct file *file) \
+{ \
+ return single_open(file, name##_proc_show, NULL); \
+} \
+\
+static const struct file_operations cifs_##name##_proc_fops = { \
+ .open = name##_open, \
+ .read = seq_read, \
+ .llseek = seq_lseek, \
+ .release = single_release, \
+ .write = name##_write, \
+}
+
+PROC_FILE_DEFINE(rdma_readwrite_threshold);
+PROC_FILE_DEFINE(smbd_max_frmr_depth);
+PROC_FILE_DEFINE(smbd_keep_alive_interval);
+PROC_FILE_DEFINE(smbd_max_receive_size);
+PROC_FILE_DEFINE(smbd_max_fragmented_recv_size);
+PROC_FILE_DEFINE(smbd_max_send_size);
+PROC_FILE_DEFINE(smbd_send_credit_target);
+PROC_FILE_DEFINE(smbd_receive_credit_max);
+#endif
+
static struct proc_dir_entry *proc_fs_cifs;
static const struct file_operations cifsFYI_proc_fops;
static const struct file_operations cifs_lookup_cache_proc_fops;
@@ -401,6 +527,24 @@ cifs_proc_init(void)
&cifs_security_flags_proc_fops);
proc_create("LookupCacheEnabled", 0, proc_fs_cifs,
&cifs_lookup_cache_proc_fops);
+#ifdef CONFIG_CIFS_SMB_DIRECT
+ proc_create("rdma_readwrite_threshold", 0, proc_fs_cifs,
+ &cifs_rdma_readwrite_threshold_proc_fops);
+ proc_create("smbd_max_frmr_depth", 0, proc_fs_cifs,
+ &cifs_smbd_max_frmr_depth_proc_fops);
+ proc_create("smbd_keep_alive_interval", 0, proc_fs_cifs,
+ &cifs_smbd_keep_alive_interval_proc_fops);
+ proc_create("smbd_max_receive_size", 0, proc_fs_cifs,
+ &cifs_smbd_max_receive_size_proc_fops);
+ proc_create("smbd_max_fragmented_recv_size", 0, proc_fs_cifs,
+ &cifs_smbd_max_fragmented_recv_size_proc_fops);
+ proc_create("smbd_max_send_size", 0, proc_fs_cifs,
+ &cifs_smbd_max_send_size_proc_fops);
+ proc_create("smbd_send_credit_target", 0, proc_fs_cifs,
+ &cifs_smbd_send_credit_target_proc_fops);
+ proc_create("smbd_receive_credit_max", 0, proc_fs_cifs,
+ &cifs_smbd_receive_credit_max_proc_fops);
+#endif
}
void
@@ -418,6 +562,16 @@ cifs_proc_clean(void)
remove_proc_entry("SecurityFlags", proc_fs_cifs);
remove_proc_entry("LinuxExtensionsEnabled", proc_fs_cifs);
remove_proc_entry("LookupCacheEnabled", proc_fs_cifs);
+#ifdef CONFIG_CIFS_SMB_DIRECT
+ remove_proc_entry("rdma_readwrite_threshold", proc_fs_cifs);
+ remove_proc_entry("smbd_max_frmr_depth", proc_fs_cifs);
+ remove_proc_entry("smbd_keep_alive_interval", proc_fs_cifs);
+ remove_proc_entry("smbd_max_receive_size", proc_fs_cifs);
+ remove_proc_entry("smbd_max_fragmented_recv_size", proc_fs_cifs);
+ remove_proc_entry("smbd_max_send_size", proc_fs_cifs);
+ remove_proc_entry("smbd_send_credit_target", proc_fs_cifs);
+ remove_proc_entry("smbd_receive_credit_max", proc_fs_cifs);
+#endif
remove_proc_entry("fs/cifs", NULL);
}
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index b98436f5c7c7..13a8a77322c9 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -1125,7 +1125,7 @@ out:
return rc;
}
-/* Translate the CIFS ACL (simlar to NTFS ACL) for a file into mode bits */
+/* Translate the CIFS ACL (similar to NTFS ACL) for a file into mode bits */
int
cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr,
struct inode *inode, const char *path,
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 68abbb0db608..f2b0a7f124da 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -325,9 +325,8 @@ int calc_lanman_hash(const char *password, const char *cryptkey, bool encrypt,
{
int i;
int rc;
- char password_with_pad[CIFS_ENCPWD_SIZE];
+ char password_with_pad[CIFS_ENCPWD_SIZE] = {0};
- memset(password_with_pad, 0, CIFS_ENCPWD_SIZE);
if (password)
strncpy(password_with_pad, password, CIFS_ENCPWD_SIZE);
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 31b7565b1617..32cdea67bbfd 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -327,6 +327,8 @@ cifs_show_address(struct seq_file *s, struct TCP_Server_Info *server)
default:
seq_puts(s, "(unknown)");
}
+ if (server->rdma)
+ seq_puts(s, ",rdma");
}
static void
@@ -1068,6 +1070,7 @@ const struct file_operations cifs_file_ops = {
.flush = cifs_flush,
.mmap = cifs_file_mmap,
.splice_read = generic_file_splice_read,
+ .splice_write = iter_file_splice_write,
.llseek = cifs_llseek,
.unlocked_ioctl = cifs_ioctl,
.copy_file_range = cifs_copy_file_range,
@@ -1086,6 +1089,7 @@ const struct file_operations cifs_file_strict_ops = {
.flush = cifs_flush,
.mmap = cifs_file_strict_mmap,
.splice_read = generic_file_splice_read,
+ .splice_write = iter_file_splice_write,
.llseek = cifs_llseek,
.unlocked_ioctl = cifs_ioctl,
.copy_file_range = cifs_copy_file_range,
@@ -1105,6 +1109,7 @@ const struct file_operations cifs_file_direct_ops = {
.flush = cifs_flush,
.mmap = cifs_file_mmap,
.splice_read = generic_file_splice_read,
+ .splice_write = iter_file_splice_write,
.unlocked_ioctl = cifs_ioctl,
.copy_file_range = cifs_copy_file_range,
.clone_file_range = cifs_clone_file_range,
@@ -1122,6 +1127,7 @@ const struct file_operations cifs_file_nobrl_ops = {
.flush = cifs_flush,
.mmap = cifs_file_mmap,
.splice_read = generic_file_splice_read,
+ .splice_write = iter_file_splice_write,
.llseek = cifs_llseek,
.unlocked_ioctl = cifs_ioctl,
.copy_file_range = cifs_copy_file_range,
@@ -1139,6 +1145,7 @@ const struct file_operations cifs_file_strict_nobrl_ops = {
.flush = cifs_flush,
.mmap = cifs_file_strict_mmap,
.splice_read = generic_file_splice_read,
+ .splice_write = iter_file_splice_write,
.llseek = cifs_llseek,
.unlocked_ioctl = cifs_ioctl,
.copy_file_range = cifs_copy_file_range,
@@ -1157,6 +1164,7 @@ const struct file_operations cifs_file_direct_nobrl_ops = {
.flush = cifs_flush,
.mmap = cifs_file_mmap,
.splice_read = generic_file_splice_read,
+ .splice_write = iter_file_splice_write,
.unlocked_ioctl = cifs_ioctl,
.copy_file_range = cifs_copy_file_range,
.clone_file_range = cifs_clone_file_range,
@@ -1231,9 +1239,11 @@ cifs_init_request_bufs(void)
cifs_dbg(VFS, "CIFSMaxBufSize %d 0x%x\n",
CIFSMaxBufSize, CIFSMaxBufSize);
*/
- cifs_req_cachep = kmem_cache_create("cifs_request",
+ cifs_req_cachep = kmem_cache_create_usercopy("cifs_request",
CIFSMaxBufSize + max_hdr_size, 0,
- SLAB_HWCACHE_ALIGN, NULL);
+ SLAB_HWCACHE_ALIGN, 0,
+ CIFSMaxBufSize + max_hdr_size,
+ NULL);
if (cifs_req_cachep == NULL)
return -ENOMEM;
@@ -1259,9 +1269,9 @@ cifs_init_request_bufs(void)
more SMBs to use small buffer alloc and is still much more
efficient to alloc 1 per page off the slab compared to 17K (5page)
alloc of large cifs buffers even when page debugging is on */
- cifs_sm_req_cachep = kmem_cache_create("cifs_small_rq",
+ cifs_sm_req_cachep = kmem_cache_create_usercopy("cifs_small_rq",
MAX_CIFS_SMALL_BUFFER_SIZE, 0, SLAB_HWCACHE_ALIGN,
- NULL);
+ 0, MAX_CIFS_SMALL_BUFFER_SIZE, NULL);
if (cifs_sm_req_cachep == NULL) {
mempool_destroy(cifs_req_poolp);
kmem_cache_destroy(cifs_req_cachep);
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 5a10e566f0e6..013ba2aed8d9 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -149,5 +149,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
extern const struct export_operations cifs_export_ops;
#endif /* CONFIG_CIFS_NFSD_EXPORT */
-#define CIFS_VERSION "2.10"
+#define CIFS_VERSION "2.11"
#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index b16583594d1a..48f7c197cd2d 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -64,8 +64,8 @@
#define RFC1001_NAME_LEN 15
#define RFC1001_NAME_LEN_WITH_NULL (RFC1001_NAME_LEN + 1)
-/* currently length of NIP6_FMT */
-#define SERVER_NAME_LENGTH 40
+/* maximum length of ip addr as a string (including ipv6 and sctp) */
+#define SERVER_NAME_LENGTH 80
#define SERVER_NAME_LEN_WITH_NULL (SERVER_NAME_LENGTH + 1)
/* echo interval in seconds */
@@ -230,8 +230,14 @@ struct smb_version_operations {
__u64 (*get_next_mid)(struct TCP_Server_Info *);
/* data offset from read response message */
unsigned int (*read_data_offset)(char *);
- /* data length from read response message */
- unsigned int (*read_data_length)(char *);
+ /*
+ * Data length from read response message
+ * When in_remaining is true, the returned data length is in
+ * message field DataRemaining for out-of-band data read (e.g through
+ * Memory Registration RDMA write in SMBD).
+ * Otherwise, the returned data length is in message field DataLength.
+ */
+ unsigned int (*read_data_length)(char *, bool in_remaining);
/* map smb to linux error */
int (*map_error)(char *, bool);
/* find mid corresponding to the response message */
@@ -532,6 +538,7 @@ struct smb_vol {
bool nopersistent:1;
bool resilient:1; /* noresilient not required since not fored for CA */
bool domainauto:1;
+ bool rdma:1;
unsigned int rsize;
unsigned int wsize;
bool sockopt_tcp_nodelay:1;
@@ -648,6 +655,10 @@ struct TCP_Server_Info {
bool sec_kerberos; /* supports plain Kerberos */
bool sec_mskerberos; /* supports legacy MS Kerberos */
bool large_buf; /* is current buffer large? */
+ /* use SMBD connection instead of socket */
+ bool rdma;
+ /* point to the SMBD connection if RDMA is used instead of socket */
+ struct smbd_connection *smbd_conn;
struct delayed_work echo; /* echo ping workqueue job */
char *smallbuf; /* pointer to current "small" buffer */
char *bigbuf; /* pointer to current "big" buffer */
@@ -822,12 +833,12 @@ static inline void cifs_set_net_ns(struct TCP_Server_Info *srv, struct net *net)
struct cifs_ses {
struct list_head smb_ses_list;
struct list_head tcon_list;
+ struct cifs_tcon *tcon_ipc;
struct mutex session_mutex;
struct TCP_Server_Info *server; /* pointer to server info */
int ses_count; /* reference counter */
enum statusEnum status;
unsigned overrideSecFlg; /* if non-zero override global sec flags */
- __u32 ipc_tid; /* special tid for connection to IPC share */
char *serverOS; /* name of operating system underlying server */
char *serverNOS; /* name of network operating system of server */
char *serverDomain; /* security realm of server */
@@ -835,8 +846,7 @@ struct cifs_ses {
kuid_t linux_uid; /* overriding owner of files on the mount */
kuid_t cred_uid; /* owner of credentials */
unsigned int capabilities;
- char serverName[SERVER_NAME_LEN_WITH_NULL * 2]; /* BB make bigger for
- TCP names - will ipv6 and sctp addresses fit? */
+ char serverName[SERVER_NAME_LEN_WITH_NULL];
char *user_name; /* must not be null except during init of sess
and after mount option parsing we fill it */
char *domainName;
@@ -931,7 +941,9 @@ struct cifs_tcon {
FILE_SYSTEM_DEVICE_INFO fsDevInfo;
FILE_SYSTEM_ATTRIBUTE_INFO fsAttrInfo; /* ok if fs name truncated */
FILE_SYSTEM_UNIX_INFO fsUnixInfo;
- bool ipc:1; /* set if connection to IPC$ eg for RPC/PIPES */
+ bool ipc:1; /* set if connection to IPC$ share (always also pipe) */
+ bool pipe:1; /* set if connection to pipe share */
+ bool print:1; /* set if connection to printer share */
bool retry:1;
bool nocase:1;
bool seal:1; /* transport encryption for this mounted share */
@@ -944,7 +956,6 @@ struct cifs_tcon {
bool need_reopen_files:1; /* need to reopen tcon file handles */
bool use_resilient:1; /* use resilient instead of durable handles */
bool use_persistent:1; /* use persistent instead of durable handles */
- bool print:1; /* set if connection to printer share */
__le32 capabilities;
__u32 share_flags;
__u32 maximal_access;
@@ -1147,6 +1158,9 @@ struct cifs_readdata {
struct cifs_readdata *rdata,
struct iov_iter *iter);
struct kvec iov[2];
+#ifdef CONFIG_CIFS_SMB_DIRECT
+ struct smbd_mr *mr;
+#endif
unsigned int pagesz;
unsigned int tailsz;
unsigned int credits;
@@ -1169,6 +1183,9 @@ struct cifs_writedata {
pid_t pid;
unsigned int bytes;
int result;
+#ifdef CONFIG_CIFS_SMB_DIRECT
+ struct smbd_mr *mr;
+#endif
unsigned int pagesz;
unsigned int tailsz;
unsigned int credits;
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 4143c9dec463..93d565186698 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -106,6 +106,10 @@ extern int SendReceive2(const unsigned int /* xid */ , struct cifs_ses *,
struct kvec *, int /* nvec to send */,
int * /* type of buf returned */, const int flags,
struct kvec * /* resp vec */);
+extern int smb2_send_recv(const unsigned int xid, struct cifs_ses *pses,
+ struct kvec *pkvec, int nvec_to_send,
+ int *pbuftype, const int flags,
+ struct kvec *presp);
extern int SendReceiveBlockingLock(const unsigned int xid,
struct cifs_tcon *ptcon,
struct smb_hdr *in_buf ,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 35dc5bf01ee2..9ceebf30eb22 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -43,6 +43,7 @@
#include "cifs_unicode.h"
#include "cifs_debug.h"
#include "fscache.h"
+#include "smbdirect.h"
#ifdef CONFIG_CIFS_POSIX
static struct {
@@ -1454,6 +1455,7 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
struct cifs_readdata *rdata = mid->callback_data;
char *buf = server->smallbuf;
unsigned int buflen = get_rfc1002_length(buf) + 4;
+ bool use_rdma_mr = false;
cifs_dbg(FYI, "%s: mid=%llu offset=%llu bytes=%u\n",
__func__, mid->mid, rdata->offset, rdata->bytes);
@@ -1542,8 +1544,11 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
rdata->iov[0].iov_base, server->total_read);
/* how much data is in the response? */
- data_len = server->ops->read_data_length(buf);
- if (data_offset + data_len > buflen) {
+#ifdef CONFIG_CIFS_SMB_DIRECT
+ use_rdma_mr = rdata->mr;
+#endif
+ data_len = server->ops->read_data_length(buf, use_rdma_mr);
+ if (!use_rdma_mr && (data_offset + data_len > buflen)) {
/* data_len is corrupt -- discard frame */
rdata->result = -EIO;
return cifs_readv_discard(server, mid);
@@ -1923,6 +1928,12 @@ cifs_writedata_release(struct kref *refcount)
{
struct cifs_writedata *wdata = container_of(refcount,
struct cifs_writedata, refcount);
+#ifdef CONFIG_CIFS_SMB_DIRECT
+ if (wdata->mr) {
+ smbd_deregister_mr(wdata->mr);
+ wdata->mr = NULL;
+ }
+#endif
if (wdata->cfile)
cifsFileInfo_put(wdata->cfile);
@@ -4822,10 +4833,11 @@ CIFSGetDFSRefer(const unsigned int xid, struct cifs_ses *ses,
*target_nodes = NULL;
cifs_dbg(FYI, "In GetDFSRefer the path %s\n", search_name);
- if (ses == NULL)
+ if (ses == NULL || ses->tcon_ipc == NULL)
return -ENODEV;
+
getDFSRetry:
- rc = smb_init(SMB_COM_TRANSACTION2, 15, NULL, (void **) &pSMB,
+ rc = smb_init(SMB_COM_TRANSACTION2, 15, ses->tcon_ipc, (void **) &pSMB,
(void **) &pSMBr);
if (rc)
return rc;
@@ -4833,7 +4845,7 @@ getDFSRetry:
/* server pointer checked in called function,
but should never be null here anyway */
pSMB->hdr.Mid = get_next_mid(ses->server);
- pSMB->hdr.Tid = ses->ipc_tid;
+ pSMB->hdr.Tid = ses->tcon_ipc->tid;
pSMB->hdr.Uid = ses->Suid;
if (ses->capabilities & CAP_STATUS32)
pSMB->hdr.Flags2 |= SMBFLG2_ERR_STATUS;
@@ -6331,9 +6343,7 @@ SetEARetry:
pSMB->InformationLevel =
cpu_to_le16(SMB_SET_FILE_EA);
- parm_data =
- (struct fealist *) (((char *) &pSMB->hdr.Protocol) +
- offset);
+ parm_data = (void *)pSMB + offsetof(struct smb_hdr, Protocol) + offset;
pSMB->ParameterOffset = cpu_to_le16(param_offset);
pSMB->DataOffset = cpu_to_le16(offset);
pSMB->SetupCount = 1;
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 0bfc2280436d..a726f524fb84 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -44,7 +44,6 @@
#include <net/ipv6.h>
#include <linux/parser.h>
#include <linux/bvec.h>
-
#include "cifspdu.h"
#include "cifsglob.h"
#include "cifsproto.h"
@@ -56,6 +55,7 @@
#include "rfc1002pdu.h"
#include "fscache.h"
#include "smb2proto.h"
+#include "smbdirect.h"
#define CIFS_PORT 445
#define RFC1001_PORT 139
@@ -92,7 +92,7 @@ enum {
Opt_multiuser, Opt_sloppy, Opt_nosharesock,
Opt_persistent, Opt_nopersistent,
Opt_resilient, Opt_noresilient,
- Opt_domainauto,
+ Opt_domainauto, Opt_rdma,
/* Mount options which take numeric value */
Opt_backupuid, Opt_backupgid, Opt_uid,
@@ -183,6 +183,7 @@ static const match_table_t cifs_mount_option_tokens = {
{ Opt_resilient, "resilienthandles"},
{ Opt_noresilient, "noresilienthandles"},
{ Opt_domainauto, "domainauto"},
+ { Opt_rdma, "rdma"},
{ Opt_backupuid, "backupuid=%s" },
{ Opt_backupgid, "backupgid=%s" },
@@ -353,11 +354,12 @@ cifs_reconnect(struct TCP_Server_Info *server)
list_for_each(tmp, &server->smb_ses_list) {
ses = list_entry(tmp, struct cifs_ses, smb_ses_list);
ses->need_reconnect = true;
- ses->ipc_tid = 0;
list_for_each(tmp2, &ses->tcon_list) {
tcon = list_entry(tmp2, struct cifs_tcon, tcon_list);
tcon->need_reconnect = true;
}
+ if (ses->tcon_ipc)
+ ses->tcon_ipc->need_reconnect = true;
}
spin_unlock(&cifs_tcp_ses_lock);
@@ -405,7 +407,10 @@ cifs_reconnect(struct TCP_Server_Info *server)
/* we should try only the port we connected to before */
mutex_lock(&server->srv_mutex);
- rc = generic_ip_connect(server);
+ if (cifs_rdma_enabled(server))
+ rc = smbd_reconnect(server);
+ else
+ rc = generic_ip_connect(server);
if (rc) {
cifs_dbg(FYI, "reconnect error %d\n", rc);
mutex_unlock(&server->srv_mutex);
@@ -538,8 +543,10 @@ cifs_readv_from_socket(struct TCP_Server_Info *server, struct msghdr *smb_msg)
if (server_unresponsive(server))
return -ECONNABORTED;
-
- length = sock_recvmsg(server->ssocket, smb_msg, 0);
+ if (cifs_rdma_enabled(server) && server->smbd_conn)
+ length = smbd_recv(server->smbd_conn, smb_msg);
+ else
+ length = sock_recvmsg(server->ssocket, smb_msg, 0);
if (server->tcpStatus == CifsExiting)
return -ESHUTDOWN;
@@ -700,7 +707,10 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server)
wake_up_all(&server->request_q);
/* give those requests time to exit */
msleep(125);
-
+ if (cifs_rdma_enabled(server) && server->smbd_conn) {
+ smbd_destroy(server->smbd_conn);
+ server->smbd_conn = NULL;
+ }
if (server->ssocket) {
sock_release(server->ssocket);
server->ssocket = NULL;
@@ -1550,6 +1560,9 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
case Opt_domainauto:
vol->domainauto = true;
break;
+ case Opt_rdma:
+ vol->rdma = true;
+ break;
/* Numeric Values */
case Opt_backupuid:
@@ -1707,7 +1720,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
tmp_end++;
if (!(tmp_end < end && tmp_end[1] == delim)) {
/* No it is not. Set the password to NULL */
- kfree(vol->password);
+ kzfree(vol->password);
vol->password = NULL;
break;
}
@@ -1745,7 +1758,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
options = end;
}
- kfree(vol->password);
+ kzfree(vol->password);
/* Now build new password string */
temp_len = strlen(value);
vol->password = kzalloc(temp_len+1, GFP_KERNEL);
@@ -1951,6 +1964,19 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
goto cifs_parse_mount_err;
}
+ if (vol->rdma && vol->vals->protocol_id < SMB30_PROT_ID) {
+ cifs_dbg(VFS, "SMB Direct requires Version >=3.0\n");
+ goto cifs_parse_mount_err;
+ }
+
+#ifdef CONFIG_CIFS_SMB_DIRECT
+ if (vol->rdma && vol->sign) {
+ cifs_dbg(VFS, "Currently SMB direct doesn't support signing."
+ " This is being fixed\n");
+ goto cifs_parse_mount_err;
+ }
+#endif
+
#ifndef CONFIG_KEYS
/* Muliuser mounts require CONFIG_KEYS support */
if (vol->multiuser) {
@@ -2162,6 +2188,9 @@ static int match_server(struct TCP_Server_Info *server, struct smb_vol *vol)
if (server->echo_interval != vol->echo_interval * HZ)
return 0;
+ if (server->rdma != vol->rdma)
+ return 0;
+
return 1;
}
@@ -2260,6 +2289,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
tcp_ses->noblocksnd = volume_info->noblocksnd;
tcp_ses->noautotune = volume_info->noautotune;
tcp_ses->tcp_nodelay = volume_info->sockopt_tcp_nodelay;
+ tcp_ses->rdma = volume_info->rdma;
tcp_ses->in_flight = 0;
tcp_ses->credits = 1;
init_waitqueue_head(&tcp_ses->response_q);
@@ -2297,13 +2327,29 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
tcp_ses->echo_interval = volume_info->echo_interval * HZ;
else
tcp_ses->echo_interval = SMB_ECHO_INTERVAL_DEFAULT * HZ;
-
+ if (tcp_ses->rdma) {
+#ifndef CONFIG_CIFS_SMB_DIRECT
+ cifs_dbg(VFS, "CONFIG_CIFS_SMB_DIRECT is not enabled\n");
+ rc = -ENOENT;
+ goto out_err_crypto_release;
+#endif
+ tcp_ses->smbd_conn = smbd_get_connection(
+ tcp_ses, (struct sockaddr *)&volume_info->dstaddr);
+ if (tcp_ses->smbd_conn) {
+ cifs_dbg(VFS, "RDMA transport established\n");
+ rc = 0;
+ goto smbd_connected;
+ } else {
+ rc = -ENOENT;
+ goto out_err_crypto_release;
+ }
+ }
rc = ip_connect(tcp_ses);
if (rc < 0) {
cifs_dbg(VFS, "Error connecting to socket. Aborting operation.\n");
goto out_err_crypto_release;
}
-
+smbd_connected:
/*
* since we're in a cifs function already, we know that
* this will succeed. No need for try_module_get().
@@ -2381,6 +2427,93 @@ static int match_session(struct cifs_ses *ses, struct smb_vol *vol)
return 1;
}
+/**
+ * cifs_setup_ipc - helper to setup the IPC tcon for the session
+ *
+ * A new IPC connection is made and stored in the session
+ * tcon_ipc. The IPC tcon has the same lifetime as the session.
+ */
+static int
+cifs_setup_ipc(struct cifs_ses *ses, struct smb_vol *volume_info)
+{
+ int rc = 0, xid;
+ struct cifs_tcon *tcon;
+ struct nls_table *nls_codepage;
+ char unc[SERVER_NAME_LENGTH + sizeof("//x/IPC$")] = {0};
+ bool seal = false;
+
+ /*
+ * If the mount request that resulted in the creation of the
+ * session requires encryption, force IPC to be encrypted too.
+ */
+ if (volume_info->seal) {
+ if (ses->server->capabilities & SMB2_GLOBAL_CAP_ENCRYPTION)
+ seal = true;
+ else {
+ cifs_dbg(VFS,
+ "IPC: server doesn't support encryption\n");
+ return -EOPNOTSUPP;
+ }
+ }
+
+ tcon = tconInfoAlloc();
+ if (tcon == NULL)
+ return -ENOMEM;
+
+ snprintf(unc, sizeof(unc), "\\\\%s\\IPC$", ses->serverName);
+
+ /* cannot fail */
+ nls_codepage = load_nls_default();
+
+ xid = get_xid();
+ tcon->ses = ses;
+ tcon->ipc = true;
+ tcon->seal = seal;
+ rc = ses->server->ops->tree_connect(xid, ses, unc, tcon, nls_codepage);
+ free_xid(xid);
+
+ if (rc) {
+ cifs_dbg(VFS, "failed to connect to IPC (rc=%d)\n", rc);
+ tconInfoFree(tcon);
+ goto out;
+ }
+
+ cifs_dbg(FYI, "IPC tcon rc = %d ipc tid = %d\n", rc, tcon->tid);
+
+ ses->tcon_ipc = tcon;
+out:
+ unload_nls(nls_codepage);
+ return rc;
+}
+
+/**
+ * cifs_free_ipc - helper to release the session IPC tcon
+ *
+ * Needs to be called everytime a session is destroyed
+ */
+static int
+cifs_free_ipc(struct cifs_ses *ses)
+{
+ int rc = 0, xid;
+ struct cifs_tcon *tcon = ses->tcon_ipc;
+
+ if (tcon == NULL)
+ return 0;
+
+ if (ses->server->ops->tree_disconnect) {
+ xid = get_xid();
+ rc = ses->server->ops->tree_disconnect(xid, tcon);
+ free_xid(xid);
+ }
+
+ if (rc)
+ cifs_dbg(FYI, "failed to disconnect IPC tcon (rc=%d)\n", rc);
+
+ tconInfoFree(tcon);
+ ses->tcon_ipc = NULL;
+ return rc;
+}
+
static struct cifs_ses *
cifs_find_smb_ses(struct TCP_Server_Info *server, struct smb_vol *vol)
{
@@ -2421,6 +2554,8 @@ cifs_put_smb_ses(struct cifs_ses *ses)
ses->status = CifsExiting;
spin_unlock(&cifs_tcp_ses_lock);
+ cifs_free_ipc(ses);
+
if (ses->status == CifsExiting && server->ops->logoff) {
xid = get_xid();
rc = server->ops->logoff(xid, ses);
@@ -2569,6 +2704,13 @@ cifs_set_cifscreds(struct smb_vol *vol __attribute__((unused)),
}
#endif /* CONFIG_KEYS */
+/**
+ * cifs_get_smb_ses - get a session matching @volume_info data from @server
+ *
+ * This function assumes it is being called from cifs_mount() where we
+ * already got a server reference (server refcount +1). See
+ * cifs_get_tcon() for refcount explanations.
+ */
static struct cifs_ses *
cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
{
@@ -2665,6 +2807,9 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
spin_unlock(&cifs_tcp_ses_lock);
free_xid(xid);
+
+ cifs_setup_ipc(ses, volume_info);
+
return ses;
get_ses_fail:
@@ -2709,8 +2854,16 @@ void
cifs_put_tcon(struct cifs_tcon *tcon)
{
unsigned int xid;
- struct cifs_ses *ses = tcon->ses;
+ struct cifs_ses *ses;
+
+ /*
+ * IPC tcon share the lifetime of their session and are
+ * destroyed in the session put function
+ */
+ if (tcon == NULL || tcon->ipc)
+ return;
+ ses = tcon->ses;
cifs_dbg(FYI, "%s: tc_count=%d\n", __func__, tcon->tc_count);
spin_lock(&cifs_tcp_ses_lock);
if (--tcon->tc_count > 0) {
@@ -2731,6 +2884,26 @@ cifs_put_tcon(struct cifs_tcon *tcon)
cifs_put_smb_ses(ses);
}
+/**
+ * cifs_get_tcon - get a tcon matching @volume_info data from @ses
+ *
+ * - tcon refcount is the number of mount points using the tcon.
+ * - ses refcount is the number of tcon using the session.
+ *
+ * 1. This function assumes it is being called from cifs_mount() where
+ * we already got a session reference (ses refcount +1).
+ *
+ * 2. Since we're in the context of adding a mount point, the end
+ * result should be either:
+ *
+ * a) a new tcon already allocated with refcount=1 (1 mount point) and
+ * its session refcount incremented (1 new tcon). This +1 was
+ * already done in (1).
+ *
+ * b) an existing tcon with refcount+1 (add a mount point to it) and
+ * identical ses refcount (no new tcon). Because of (1) we need to
+ * decrement the ses refcount.
+ */
static struct cifs_tcon *
cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info)
{
@@ -2739,8 +2912,11 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info)
tcon = cifs_find_tcon(ses, volume_info);
if (tcon) {
+ /*
+ * tcon has refcount already incremented but we need to
+ * decrement extra ses reference gotten by caller (case b)
+ */
cifs_dbg(FYI, "Found match on UNC path\n");
- /* existing tcon already has a reference */
cifs_put_smb_ses(ses);
return tcon;
}
@@ -2986,39 +3162,17 @@ get_dfs_path(const unsigned int xid, struct cifs_ses *ses, const char *old_path,
const struct nls_table *nls_codepage, unsigned int *num_referrals,
struct dfs_info3_param **referrals, int remap)
{
- char *temp_unc;
int rc = 0;
- if (!ses->server->ops->tree_connect || !ses->server->ops->get_dfs_refer)
+ if (!ses->server->ops->get_dfs_refer)
return -ENOSYS;
*num_referrals = 0;
*referrals = NULL;
- if (ses->ipc_tid == 0) {
- temp_unc = kmalloc(2 /* for slashes */ +
- strnlen(ses->serverName, SERVER_NAME_LEN_WITH_NULL * 2)
- + 1 + 4 /* slash IPC$ */ + 2, GFP_KERNEL);
- if (temp_unc == NULL)
- return -ENOMEM;
- temp_unc[0] = '\\';
- temp_unc[1] = '\\';
- strcpy(temp_unc + 2, ses->serverName);
- strcpy(temp_unc + 2 + strlen(ses->serverName), "\\IPC$");
- rc = ses->server->ops->tree_connect(xid, ses, temp_unc, NULL,
- nls_codepage);
- cifs_dbg(FYI, "Tcon rc = %d ipc_tid = %d\n", rc, ses->ipc_tid);
- kfree(temp_unc);
- }
- if (rc == 0)
- rc = ses->server->ops->get_dfs_refer(xid, ses, old_path,
- referrals, num_referrals,
- nls_codepage, remap);
- /*
- * BB - map targetUNCs to dfs_info3 structures, here or in
- * ses->server->ops->get_dfs_refer.
- */
-
+ rc = ses->server->ops->get_dfs_refer(xid, ses, old_path,
+ referrals, num_referrals,
+ nls_codepage, remap);
return rc;
}
@@ -3783,7 +3937,7 @@ try_mount_again:
tcon->unix_ext = 0; /* server does not support them */
/* do not care if a following call succeed - informational */
- if (!tcon->ipc && server->ops->qfs_tcon)
+ if (!tcon->pipe && server->ops->qfs_tcon)
server->ops->qfs_tcon(xid, tcon);
cifs_sb->wsize = server->ops->negotiate_wsize(tcon, volume_info);
@@ -3913,8 +4067,7 @@ out:
}
/*
- * Issue a TREE_CONNECT request. Note that for IPC$ shares, that the tcon
- * pointer may be NULL.
+ * Issue a TREE_CONNECT request.
*/
int
CIFSTCon(const unsigned int xid, struct cifs_ses *ses,
@@ -3950,7 +4103,7 @@ CIFSTCon(const unsigned int xid, struct cifs_ses *ses,
pSMB->AndXCommand = 0xFF;
pSMB->Flags = cpu_to_le16(TCON_EXTENDED_SECINFO);
bcc_ptr = &pSMB->Password[0];
- if (!tcon || (ses->server->sec_mode & SECMODE_USER)) {
+ if (tcon->pipe || (ses->server->sec_mode & SECMODE_USER)) {
pSMB->PasswordLength = cpu_to_le16(1); /* minimum */
*bcc_ptr = 0; /* password is null byte */
bcc_ptr++; /* skip password */
@@ -4022,7 +4175,7 @@ CIFSTCon(const unsigned int xid, struct cifs_ses *ses,
0);
/* above now done in SendReceive */
- if ((rc == 0) && (tcon != NULL)) {
+ if (rc == 0) {
bool is_unicode;
tcon->tidStatus = CifsGood;
@@ -4042,7 +4195,8 @@ CIFSTCon(const unsigned int xid, struct cifs_ses *ses,
if ((bcc_ptr[0] == 'I') && (bcc_ptr[1] == 'P') &&
(bcc_ptr[2] == 'C')) {
cifs_dbg(FYI, "IPC connection\n");
- tcon->ipc = 1;
+ tcon->ipc = true;
+ tcon->pipe = true;
}
} else if (length == 2) {
if ((bcc_ptr[0] == 'A') && (bcc_ptr[1] == ':')) {
@@ -4069,9 +4223,6 @@ CIFSTCon(const unsigned int xid, struct cifs_ses *ses,
else
tcon->Flags = 0;
cifs_dbg(FYI, "Tcon flags: 0x%x\n", tcon->Flags);
- } else if ((rc == 0) && tcon == NULL) {
- /* all we need to save for IPC$ connection */
- ses->ipc_tid = smb_buffer_response->Tid;
}
cifs_buf_release(smb_buffer);
@@ -4235,7 +4386,7 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid)
reset_cifs_unix_caps(0, tcon, NULL, vol_info);
out:
kfree(vol_info->username);
- kfree(vol_info->password);
+ kzfree(vol_info->password);
kfree(vol_info);
return tcon;
@@ -4387,7 +4538,7 @@ cifs_prune_tlinks(struct work_struct *work)
struct cifs_sb_info *cifs_sb = container_of(work, struct cifs_sb_info,
prune_tlinks.work);
struct rb_root *root = &cifs_sb->tlink_tree;
- struct rb_node *node = rb_first(root);
+ struct rb_node *node;
struct rb_node *tmp;
struct tcon_link *tlink;
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index df9f682708c6..7cee97b93a61 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -42,7 +42,7 @@
#include "cifs_debug.h"
#include "cifs_fs_sb.h"
#include "fscache.h"
-
+#include "smbdirect.h"
static inline int cifs_convert_flags(unsigned int flags)
{
@@ -2902,7 +2902,12 @@ cifs_readdata_release(struct kref *refcount)
{
struct cifs_readdata *rdata = container_of(refcount,
struct cifs_readdata, refcount);
-
+#ifdef CONFIG_CIFS_SMB_DIRECT
+ if (rdata->mr) {
+ smbd_deregister_mr(rdata->mr);
+ rdata->mr = NULL;
+ }
+#endif
if (rdata->cfile)
cifsFileInfo_put(rdata->cfile);
@@ -3031,6 +3036,10 @@ uncached_fill_pages(struct TCP_Server_Info *server,
}
if (iter)
result = copy_page_from_iter(page, 0, n, iter);
+#ifdef CONFIG_CIFS_SMB_DIRECT
+ else if (rdata->mr)
+ result = n;
+#endif
else
result = cifs_read_page_from_socket(server, page, n);
if (result < 0)
@@ -3471,20 +3480,18 @@ static const struct vm_operations_struct cifs_file_vm_ops = {
int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
{
- int rc, xid;
+ int xid, rc = 0;
struct inode *inode = file_inode(file);
xid = get_xid();
- if (!CIFS_CACHE_READ(CIFS_I(inode))) {
+ if (!CIFS_CACHE_READ(CIFS_I(inode)))
rc = cifs_zap_mapping(inode);
- if (rc)
- return rc;
- }
-
- rc = generic_file_mmap(file, vma);
- if (rc == 0)
+ if (!rc)
+ rc = generic_file_mmap(file, vma);
+ if (!rc)
vma->vm_ops = &cifs_file_vm_ops;
+
free_xid(xid);
return rc;
}
@@ -3494,16 +3501,16 @@ int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
int rc, xid;
xid = get_xid();
+
rc = cifs_revalidate_file(file);
- if (rc) {
+ if (rc)
cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
rc);
- free_xid(xid);
- return rc;
- }
- rc = generic_file_mmap(file, vma);
- if (rc == 0)
+ if (!rc)
+ rc = generic_file_mmap(file, vma);
+ if (!rc)
vma->vm_ops = &cifs_file_vm_ops;
+
free_xid(xid);
return rc;
}
@@ -3600,6 +3607,10 @@ readpages_fill_pages(struct TCP_Server_Info *server,
if (iter)
result = copy_page_from_iter(page, 0, n, iter);
+#ifdef CONFIG_CIFS_SMB_DIRECT
+ else if (rdata->mr)
+ result = n;
+#endif
else
result = cifs_read_page_from_socket(server, page, n);
if (result < 0)
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index ecb99079363a..8f9a8cc7cc62 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1049,7 +1049,7 @@ iget_no_retry:
tcon->resource_id = CIFS_I(inode)->uniqueid;
#endif
- if (rc && tcon->ipc) {
+ if (rc && tcon->pipe) {
cifs_dbg(FYI, "ipc connection - fake read inode\n");
spin_lock(&inode->i_lock);
inode->i_mode |= S_IFDIR;
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index eea93ac15ef0..a0dbced4a45c 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -98,14 +98,11 @@ sesInfoFree(struct cifs_ses *buf_to_free)
kfree(buf_to_free->serverOS);
kfree(buf_to_free->serverDomain);
kfree(buf_to_free->serverNOS);
- if (buf_to_free->password) {
- memset(buf_to_free->password, 0, strlen(buf_to_free->password));
- kfree(buf_to_free->password);
- }
+ kzfree(buf_to_free->password);
kfree(buf_to_free->user_name);
kfree(buf_to_free->domainName);
- kfree(buf_to_free->auth_key.response);
- kfree(buf_to_free);
+ kzfree(buf_to_free->auth_key.response);
+ kzfree(buf_to_free);
}
struct cifs_tcon *
@@ -136,10 +133,7 @@ tconInfoFree(struct cifs_tcon *buf_to_free)
}
atomic_dec(&tconInfoAllocCount);
kfree(buf_to_free->nativeFileSystem);
- if (buf_to_free->password) {
- memset(buf_to_free->password, 0, strlen(buf_to_free->password));
- kfree(buf_to_free->password);
- }
+ kzfree(buf_to_free->password);
kfree(buf_to_free);
}
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index a723df3e0197..3d495e440c87 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -87,9 +87,11 @@ cifs_read_data_offset(char *buf)
}
static unsigned int
-cifs_read_data_length(char *buf)
+cifs_read_data_length(char *buf, bool in_remaining)
{
READ_RSP *rsp = (READ_RSP *)buf;
+ /* It's a bug reading remaining data for SMB1 packets */
+ WARN_ON(in_remaining);
return (le16_to_cpu(rsp->DataLengthHigh) << 16) +
le16_to_cpu(rsp->DataLength);
}
diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c
index b4b1f0305f29..12af5dba742b 100644
--- a/fs/cifs/smb2file.c
+++ b/fs/cifs/smb2file.c
@@ -74,7 +74,7 @@ smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms,
nr_ioctl_req.Reserved = 0;
rc = SMB2_ioctl(xid, oparms->tcon, fid->persistent_fid,
fid->volatile_fid, FSCTL_LMR_REQUEST_RESILIENCY,
- true /* is_fsctl */, false /* use_ipc */,
+ true /* is_fsctl */,
(char *)&nr_ioctl_req, sizeof(nr_ioctl_req),
NULL, NULL /* no return info */);
if (rc == -EOPNOTSUPP) {
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index 7b08a1446a7f..76d03abaa38c 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -578,7 +578,7 @@ smb2_is_valid_lease_break(char *buffer)
bool
smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server)
{
- struct smb2_oplock_break *rsp = (struct smb2_oplock_break *)buffer;
+ struct smb2_oplock_break_rsp *rsp = (struct smb2_oplock_break_rsp *)buffer;
struct list_head *tmp, *tmp1, *tmp2;
struct cifs_ses *ses;
struct cifs_tcon *tcon;
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index ed88ab8a4774..eb68e2fcc500 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -32,6 +32,7 @@
#include "smb2status.h"
#include "smb2glob.h"
#include "cifs_ioctl.h"
+#include "smbdirect.h"
static int
change_conf(struct TCP_Server_Info *server)
@@ -250,7 +251,11 @@ smb2_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *volume_info)
/* start with specified wsize, or default */
wsize = volume_info->wsize ? volume_info->wsize : CIFS_DEFAULT_IOSIZE;
wsize = min_t(unsigned int, wsize, server->max_write);
-
+#ifdef CONFIG_CIFS_SMB_DIRECT
+ if (server->rdma)
+ wsize = min_t(unsigned int,
+ wsize, server->smbd_conn->max_readwrite_size);
+#endif
if (!(server->capabilities & SMB2_GLOBAL_CAP_LARGE_MTU))
wsize = min_t(unsigned int, wsize, SMB2_MAX_BUFFER_SIZE);
@@ -266,6 +271,11 @@ smb2_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *volume_info)
/* start with specified rsize, or default */
rsize = volume_info->rsize ? volume_info->rsize : CIFS_DEFAULT_IOSIZE;
rsize = min_t(unsigned int, rsize, server->max_read);
+#ifdef CONFIG_CIFS_SMB_DIRECT
+ if (server->rdma)
+ rsize = min_t(unsigned int,
+ rsize, server->smbd_conn->max_readwrite_size);
+#endif
if (!(server->capabilities & SMB2_GLOBAL_CAP_LARGE_MTU))
rsize = min_t(unsigned int, rsize, SMB2_MAX_BUFFER_SIZE);
@@ -283,7 +293,6 @@ SMB3_request_interfaces(const unsigned int xid, struct cifs_tcon *tcon)
rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID,
FSCTL_QUERY_NETWORK_INTERFACE_INFO, true /* is_fsctl */,
- false /* use_ipc */,
NULL /* no data input */, 0 /* no data input */,
(char **)&out_buf, &ret_data_len);
if (rc != 0)
@@ -782,7 +791,6 @@ SMB2_request_res_key(const unsigned int xid, struct cifs_tcon *tcon,
rc = SMB2_ioctl(xid, tcon, persistent_fid, volatile_fid,
FSCTL_SRV_REQUEST_RESUME_KEY, true /* is_fsctl */,
- false /* use_ipc */,
NULL, 0 /* no input */,
(char **)&res_key, &ret_data_len);
@@ -848,8 +856,7 @@ smb2_copychunk_range(const unsigned int xid,
/* Request server copy to target from src identified by key */
rc = SMB2_ioctl(xid, tcon, trgtfile->fid.persistent_fid,
trgtfile->fid.volatile_fid, FSCTL_SRV_COPYCHUNK_WRITE,
- true /* is_fsctl */, false /* use_ipc */,
- (char *)pcchunk,
+ true /* is_fsctl */, (char *)pcchunk,
sizeof(struct copychunk_ioctl), (char **)&retbuf,
&ret_data_len);
if (rc == 0) {
@@ -947,9 +954,13 @@ smb2_read_data_offset(char *buf)
}
static unsigned int
-smb2_read_data_length(char *buf)
+smb2_read_data_length(char *buf, bool in_remaining)
{
struct smb2_read_rsp *rsp = (struct smb2_read_rsp *)buf;
+
+ if (in_remaining)
+ return le32_to_cpu(rsp->DataRemaining);
+
return le32_to_cpu(rsp->DataLength);
}
@@ -1006,7 +1017,7 @@ static bool smb2_set_sparse(const unsigned int xid, struct cifs_tcon *tcon,
rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid,
cfile->fid.volatile_fid, FSCTL_SET_SPARSE,
- true /* is_fctl */, false /* use_ipc */,
+ true /* is_fctl */,
&setsparse, 1, NULL, NULL);
if (rc) {
tcon->broken_sparse_sup = true;
@@ -1077,7 +1088,7 @@ smb2_duplicate_extents(const unsigned int xid,
rc = SMB2_ioctl(xid, tcon, trgtfile->fid.persistent_fid,
trgtfile->fid.volatile_fid,
FSCTL_DUPLICATE_EXTENTS_TO_FILE,
- true /* is_fsctl */, false /* use_ipc */,
+ true /* is_fsctl */,
(char *)&dup_ext_buf,
sizeof(struct duplicate_extents_to_file),
NULL,
@@ -1112,7 +1123,7 @@ smb3_set_integrity(const unsigned int xid, struct cifs_tcon *tcon,
return SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid,
cfile->fid.volatile_fid,
FSCTL_SET_INTEGRITY_INFORMATION,
- true /* is_fsctl */, false /* use_ipc */,
+ true /* is_fsctl */,
(char *)&integr_info,
sizeof(struct fsctl_set_integrity_information_req),
NULL,
@@ -1132,7 +1143,7 @@ smb3_enum_snapshots(const unsigned int xid, struct cifs_tcon *tcon,
rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid,
cfile->fid.volatile_fid,
FSCTL_SRV_ENUMERATE_SNAPSHOTS,
- true /* is_fsctl */, false /* use_ipc */,
+ true /* is_fsctl */,
NULL, 0 /* no input data */,
(char **)&retbuf,
&ret_data_len);
@@ -1351,16 +1362,20 @@ smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses,
cifs_dbg(FYI, "smb2_get_dfs_refer path <%s>\n", search_name);
/*
- * Use any tcon from the current session. Here, the first one.
+ * Try to use the IPC tcon, otherwise just use any
*/
- spin_lock(&cifs_tcp_ses_lock);
- tcon = list_first_entry_or_null(&ses->tcon_list, struct cifs_tcon,
- tcon_list);
- if (tcon)
- tcon->tc_count++;
- spin_unlock(&cifs_tcp_ses_lock);
+ tcon = ses->tcon_ipc;
+ if (tcon == NULL) {
+ spin_lock(&cifs_tcp_ses_lock);
+ tcon = list_first_entry_or_null(&ses->tcon_list,
+ struct cifs_tcon,
+ tcon_list);
+ if (tcon)
+ tcon->tc_count++;
+ spin_unlock(&cifs_tcp_ses_lock);
+ }
- if (!tcon) {
+ if (tcon == NULL) {
cifs_dbg(VFS, "session %p has no tcon available for a dfs referral request\n",
ses);
rc = -ENOTCONN;
@@ -1389,20 +1404,11 @@ smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses,
memcpy(dfs_req->RequestFileName, utf16_path, utf16_path_len);
do {
- /* try first with IPC */
rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID,
FSCTL_DFS_GET_REFERRALS,
- true /* is_fsctl */, true /* use_ipc */,
+ true /* is_fsctl */,
(char *)dfs_req, dfs_req_size,
(char **)&dfs_rsp, &dfs_rsp_size);
- if (rc == -ENOTCONN) {
- /* try with normal tcon */
- rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID,
- FSCTL_DFS_GET_REFERRALS,
- true /* is_fsctl */, false /*use_ipc*/,
- (char *)dfs_req, dfs_req_size,
- (char **)&dfs_rsp, &dfs_rsp_size);
- }
} while (rc == -EAGAIN);
if (rc) {
@@ -1421,7 +1427,8 @@ smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses,
}
out:
- if (tcon) {
+ if (tcon && !tcon->ipc) {
+ /* ipc tcons are not refcounted */
spin_lock(&cifs_tcp_ses_lock);
tcon->tc_count--;
spin_unlock(&cifs_tcp_ses_lock);
@@ -1713,8 +1720,7 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon,
rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid,
cfile->fid.volatile_fid, FSCTL_SET_ZERO_DATA,
- true /* is_fctl */, false /* use_ipc */,
- (char *)&fsctl_buf,
+ true /* is_fctl */, (char *)&fsctl_buf,
sizeof(struct file_zero_data_information), NULL, NULL);
free_xid(xid);
return rc;
@@ -1748,8 +1754,7 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon,
rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid,
cfile->fid.volatile_fid, FSCTL_SET_ZERO_DATA,
- true /* is_fctl */, false /* use_ipc */,
- (char *)&fsctl_buf,
+ true /* is_fctl */, (char *)&fsctl_buf,
sizeof(struct file_zero_data_information), NULL, NULL);
free_xid(xid);
return rc;
@@ -2411,6 +2416,7 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid,
struct iov_iter iter;
struct kvec iov;
int length;
+ bool use_rdma_mr = false;
if (shdr->Command != SMB2_READ) {
cifs_dbg(VFS, "only big read responses are supported\n");
@@ -2437,7 +2443,10 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid,
}
data_offset = server->ops->read_data_offset(buf) + 4;
- data_len = server->ops->read_data_length(buf);
+#ifdef CONFIG_CIFS_SMB_DIRECT
+ use_rdma_mr = rdata->mr;
+#endif
+ data_len = server->ops->read_data_length(buf, use_rdma_mr);
if (data_offset < server->vals->read_rsp_size) {
/*
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 01346b8b6edb..63778ac22fd9 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -48,6 +48,7 @@
#include "smb2glob.h"
#include "cifspdu.h"
#include "cifs_spnego.h"
+#include "smbdirect.h"
/*
* The following table defines the expected "StructureSize" of SMB2 requests
@@ -319,54 +320,16 @@ fill_small_buf(__le16 smb2_command, struct cifs_tcon *tcon, void *buf,
*total_len = parmsize + sizeof(struct smb2_sync_hdr);
}
-/* init request without RFC1001 length at the beginning */
-static int
-smb2_plain_req_init(__le16 smb2_command, struct cifs_tcon *tcon,
- void **request_buf, unsigned int *total_len)
-{
- int rc;
- struct smb2_sync_hdr *shdr;
-
- rc = smb2_reconnect(smb2_command, tcon);
- if (rc)
- return rc;
-
- /* BB eventually switch this to SMB2 specific small buf size */
- *request_buf = cifs_small_buf_get();
- if (*request_buf == NULL) {
- /* BB should we add a retry in here if not a writepage? */
- return -ENOMEM;
- }
-
- shdr = (struct smb2_sync_hdr *)(*request_buf);
-
- fill_small_buf(smb2_command, tcon, shdr, total_len);
-
- if (tcon != NULL) {
-#ifdef CONFIG_CIFS_STATS2
- uint16_t com_code = le16_to_cpu(smb2_command);
-
- cifs_stats_inc(&tcon->stats.smb2_stats.smb2_com_sent[com_code]);
-#endif
- cifs_stats_inc(&tcon->num_smbs_sent);
- }
-
- return rc;
-}
-
/*
* Allocate and return pointer to an SMB request hdr, and set basic
* SMB information in the SMB header. If the return code is zero, this
- * function must have filled in request_buf pointer. The returned buffer
- * has RFC1001 length at the beginning.
+ * function must have filled in request_buf pointer.
*/
static int
-small_smb2_init(__le16 smb2_command, struct cifs_tcon *tcon,
- void **request_buf)
+smb2_plain_req_init(__le16 smb2_command, struct cifs_tcon *tcon,
+ void **request_buf, unsigned int *total_len)
{
int rc;
- unsigned int total_len;
- struct smb2_pdu *pdu;
rc = smb2_reconnect(smb2_command, tcon);
if (rc)
@@ -379,12 +342,9 @@ small_smb2_init(__le16 smb2_command, struct cifs_tcon *tcon,
return -ENOMEM;
}
- pdu = (struct smb2_pdu *)(*request_buf);
-
- fill_small_buf(smb2_command, tcon, get_sync_hdr(pdu), &total_len);
-
- /* Note this is only network field converted to big endian */
- pdu->hdr.smb2_buf_length = cpu_to_be32(total_len);
+ fill_small_buf(smb2_command, tcon,
+ (struct smb2_sync_hdr *)(*request_buf),
+ total_len);
if (tcon != NULL) {
#ifdef CONFIG_CIFS_STATS2
@@ -398,8 +358,8 @@ small_smb2_init(__le16 smb2_command, struct cifs_tcon *tcon,
}
#ifdef CONFIG_CIFS_SMB311
-/* offset is sizeof smb2_negotiate_req - 4 but rounded up to 8 bytes */
-#define OFFSET_OF_NEG_CONTEXT 0x68 /* sizeof(struct smb2_negotiate_req) - 4 */
+/* offset is sizeof smb2_negotiate_req but rounded up to 8 bytes */
+#define OFFSET_OF_NEG_CONTEXT 0x68 /* sizeof(struct smb2_negotiate_req) */
#define SMB2_PREAUTH_INTEGRITY_CAPABILITIES cpu_to_le16(1)
@@ -427,23 +387,25 @@ build_encrypt_ctxt(struct smb2_encryption_neg_context *pneg_ctxt)
}
static void
-assemble_neg_contexts(struct smb2_negotiate_req *req)
+assemble_neg_contexts(struct smb2_negotiate_req *req,
+ unsigned int *total_len)
{
-
- /* +4 is to account for the RFC1001 len field */
- char *pneg_ctxt = (char *)req + OFFSET_OF_NEG_CONTEXT + 4;
+ char *pneg_ctxt = (char *)req + OFFSET_OF_NEG_CONTEXT;
build_preauth_ctxt((struct smb2_preauth_neg_context *)pneg_ctxt);
/* Add 2 to size to round to 8 byte boundary */
+
pneg_ctxt += 2 + sizeof(struct smb2_preauth_neg_context);
build_encrypt_ctxt((struct smb2_encryption_neg_context *)pneg_ctxt);
req->NegotiateContextOffset = cpu_to_le32(OFFSET_OF_NEG_CONTEXT);
req->NegotiateContextCount = cpu_to_le16(2);
- inc_rfc1001_len(req, 4 + sizeof(struct smb2_preauth_neg_context)
- + sizeof(struct smb2_encryption_neg_context)); /* calculate hash */
+
+ *total_len += 4 + sizeof(struct smb2_preauth_neg_context)
+ + sizeof(struct smb2_encryption_neg_context);
}
#else
-static void assemble_neg_contexts(struct smb2_negotiate_req *req)
+static void assemble_neg_contexts(struct smb2_negotiate_req *req,
+ unsigned int *total_len)
{
return;
}
@@ -477,6 +439,7 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
int blob_offset, blob_length;
char *security_blob;
int flags = CIFS_NEG_OP;
+ unsigned int total_len;
cifs_dbg(FYI, "Negotiate protocol\n");
@@ -485,30 +448,30 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
return -EIO;
}
- rc = small_smb2_init(SMB2_NEGOTIATE, NULL, (void **) &req);
+ rc = smb2_plain_req_init(SMB2_NEGOTIATE, NULL, (void **) &req, &total_len);
if (rc)
return rc;
- req->hdr.sync_hdr.SessionId = 0;
+ req->sync_hdr.SessionId = 0;
if (strcmp(ses->server->vals->version_string,
SMB3ANY_VERSION_STRING) == 0) {
req->Dialects[0] = cpu_to_le16(SMB30_PROT_ID);
req->Dialects[1] = cpu_to_le16(SMB302_PROT_ID);
req->DialectCount = cpu_to_le16(2);
- inc_rfc1001_len(req, 4);
+ total_len += 4;
} else if (strcmp(ses->server->vals->version_string,
SMBDEFAULT_VERSION_STRING) == 0) {
req->Dialects[0] = cpu_to_le16(SMB21_PROT_ID);
req->Dialects[1] = cpu_to_le16(SMB30_PROT_ID);
req->Dialects[2] = cpu_to_le16(SMB302_PROT_ID);
req->DialectCount = cpu_to_le16(3);
- inc_rfc1001_len(req, 6);
+ total_len += 6;
} else {
/* otherwise send specific dialect */
req->Dialects[0] = cpu_to_le16(ses->server->vals->protocol_id);
req->DialectCount = cpu_to_le16(1);
- inc_rfc1001_len(req, 2);
+ total_len += 2;
}
/* only one of SMB2 signing flags may be set in SMB2 request */
@@ -528,13 +491,12 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
memcpy(req->ClientGUID, server->client_guid,
SMB2_CLIENT_GUID_SIZE);
if (ses->server->vals->protocol_id == SMB311_PROT_ID)
- assemble_neg_contexts(req);
+ assemble_neg_contexts(req, &total_len);
}
iov[0].iov_base = (char *)req;
- /* 4 for rfc1002 length field */
- iov[0].iov_len = get_rfc1002_length(req) + 4;
+ iov[0].iov_len = total_len;
- rc = SendReceive2(xid, ses, iov, 1, &resp_buftype, flags, &rsp_iov);
+ rc = smb2_send_recv(xid, ses, iov, 1, &resp_buftype, flags, &rsp_iov);
cifs_small_buf_release(req);
rsp = (struct smb2_negotiate_rsp *)rsp_iov.iov_base;
/*
@@ -654,6 +616,11 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon)
cifs_dbg(FYI, "validate negotiate\n");
+#ifdef CONFIG_CIFS_SMB_DIRECT
+ if (tcon->ses->server->rdma)
+ return 0;
+#endif
+
/*
* validation ioctl must be signed, so no point sending this if we
* can not sign it (ie are not known user). Even if signing is not
@@ -713,7 +680,6 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon)
rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID,
FSCTL_VALIDATE_NEGOTIATE_INFO, true /* is_fsctl */,
- false /* use_ipc */,
(char *)&vneg_inbuf, sizeof(struct validate_negotiate_info_req),
(char **)&pneg_rsp, &rsplen);
@@ -733,8 +699,7 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon)
}
/* check validate negotiate info response matches what we got earlier */
- if (pneg_rsp->Dialect !=
- cpu_to_le16(tcon->ses->server->vals->protocol_id))
+ if (pneg_rsp->Dialect != cpu_to_le16(tcon->ses->server->dialect))
goto vneg_out;
if (pneg_rsp->SecurityMode != cpu_to_le16(tcon->ses->server->sec_mode))
@@ -806,20 +771,22 @@ SMB2_sess_alloc_buffer(struct SMB2_sess_data *sess_data)
struct cifs_ses *ses = sess_data->ses;
struct smb2_sess_setup_req *req;
struct TCP_Server_Info *server = ses->server;
+ unsigned int total_len;
- rc = small_smb2_init(SMB2_SESSION_SETUP, NULL, (void **) &req);
+ rc = smb2_plain_req_init(SMB2_SESSION_SETUP, NULL, (void **) &req,
+ &total_len);
if (rc)
return rc;
/* First session, not a reauthenticate */
- req->hdr.sync_hdr.SessionId = 0;
+ req->sync_hdr.SessionId = 0;
/* if reconnect, we need to send previous sess id, otherwise it is 0 */
req->PreviousSessionId = sess_data->previous_session;
req->Flags = 0; /* MBZ */
/* to enable echos and oplocks */
- req->hdr.sync_hdr.CreditRequest = cpu_to_le16(3);
+ req->sync_hdr.CreditRequest = cpu_to_le16(3);
/* only one of SMB2 signing flags may be set in SMB2 request */
if (server->sign)
@@ -833,8 +800,8 @@ SMB2_sess_alloc_buffer(struct SMB2_sess_data *sess_data)
req->Channel = 0; /* MBZ */
sess_data->iov[0].iov_base = (char *)req;
- /* 4 for rfc1002 length field and 1 for pad */
- sess_data->iov[0].iov_len = get_rfc1002_length(req) + 4 - 1;
+ /* 1 for pad */
+ sess_data->iov[0].iov_len = total_len - 1;
/*
* This variable will be used to clear the buffer
* allocated above in case of any error in the calling function.
@@ -860,18 +827,15 @@ SMB2_sess_sendreceive(struct SMB2_sess_data *sess_data)
/* Testing shows that buffer offset must be at location of Buffer[0] */
req->SecurityBufferOffset =
- cpu_to_le16(sizeof(struct smb2_sess_setup_req) -
- 1 /* pad */ - 4 /* rfc1001 len */);
+ cpu_to_le16(sizeof(struct smb2_sess_setup_req) - 1 /* pad */);
req->SecurityBufferLength = cpu_to_le16(sess_data->iov[1].iov_len);
- inc_rfc1001_len(req, sess_data->iov[1].iov_len - 1 /* pad */);
-
/* BB add code to build os and lm fields */
- rc = SendReceive2(sess_data->xid, sess_data->ses,
- sess_data->iov, 2,
- &sess_data->buf0_type,
- CIFS_LOG_ERROR | CIFS_NEG_OP, &rsp_iov);
+ rc = smb2_send_recv(sess_data->xid, sess_data->ses,
+ sess_data->iov, 2,
+ &sess_data->buf0_type,
+ CIFS_LOG_ERROR | CIFS_NEG_OP, &rsp_iov);
cifs_small_buf_release(sess_data->iov[0].iov_base);
memcpy(&sess_data->iov[0], &rsp_iov, sizeof(struct kvec));
@@ -1092,7 +1056,7 @@ SMB2_sess_auth_rawntlmssp_authenticate(struct SMB2_sess_data *sess_data)
goto out;
req = (struct smb2_sess_setup_req *) sess_data->iov[0].iov_base;
- req->hdr.sync_hdr.SessionId = ses->Suid;
+ req->sync_hdr.SessionId = ses->Suid;
rc = build_ntlmssp_auth_blob(&ntlmssp_blob, &blob_length, ses,
sess_data->nls_cp);
@@ -1202,6 +1166,10 @@ SMB2_logoff(const unsigned int xid, struct cifs_ses *ses)
int rc = 0;
struct TCP_Server_Info *server;
int flags = 0;
+ unsigned int total_len;
+ struct kvec iov[1];
+ struct kvec rsp_iov;
+ int resp_buf_type;
cifs_dbg(FYI, "disconnect session %p\n", ses);
@@ -1214,19 +1182,24 @@ SMB2_logoff(const unsigned int xid, struct cifs_ses *ses)
if (ses->need_reconnect)
goto smb2_session_already_dead;
- rc = small_smb2_init(SMB2_LOGOFF, NULL, (void **) &req);
+ rc = smb2_plain_req_init(SMB2_LOGOFF, NULL, (void **) &req, &total_len);
if (rc)
return rc;
/* since no tcon, smb2_init can not do this, so do here */
- req->hdr.sync_hdr.SessionId = ses->Suid;
+ req->sync_hdr.SessionId = ses->Suid;
if (ses->session_flags & SMB2_SESSION_FLAG_ENCRYPT_DATA)
flags |= CIFS_TRANSFORM_REQ;
else if (server->sign)
- req->hdr.sync_hdr.Flags |= SMB2_FLAGS_SIGNED;
+ req->sync_hdr.Flags |= SMB2_FLAGS_SIGNED;
+
+ flags |= CIFS_NO_RESP;
+
+ iov[0].iov_base = (char *)req;
+ iov[0].iov_len = total_len;
- rc = SendReceiveNoRsp(xid, ses, (char *) req, flags);
+ rc = smb2_send_recv(xid, ses, iov, 1, &resp_buf_type, flags, &rsp_iov);
cifs_small_buf_release(req);
/*
* No tcon so can't do
@@ -1265,6 +1238,7 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
int unc_path_len;
__le16 *unc_path = NULL;
int flags = 0;
+ unsigned int total_len;
cifs_dbg(FYI, "TCON\n");
@@ -1283,40 +1257,30 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
}
/* SMB2 TREE_CONNECT request must be called with TreeId == 0 */
- if (tcon)
- tcon->tid = 0;
+ tcon->tid = 0;
- rc = small_smb2_init(SMB2_TREE_CONNECT, tcon, (void **) &req);
+ rc = smb2_plain_req_init(SMB2_TREE_CONNECT, tcon, (void **) &req,
+ &total_len);
if (rc) {
kfree(unc_path);
return rc;
}
- if (tcon == NULL) {
- if ((ses->session_flags & SMB2_SESSION_FLAG_ENCRYPT_DATA))
- flags |= CIFS_TRANSFORM_REQ;
-
- /* since no tcon, smb2_init can not do this, so do here */
- req->hdr.sync_hdr.SessionId = ses->Suid;
- if (ses->server->sign)
- req->hdr.sync_hdr.Flags |= SMB2_FLAGS_SIGNED;
- } else if (encryption_required(tcon))
+ if (encryption_required(tcon))
flags |= CIFS_TRANSFORM_REQ;
iov[0].iov_base = (char *)req;
- /* 4 for rfc1002 length field and 1 for pad */
- iov[0].iov_len = get_rfc1002_length(req) + 4 - 1;
+ /* 1 for pad */
+ iov[0].iov_len = total_len - 1;
/* Testing shows that buffer offset must be at location of Buffer[0] */
req->PathOffset = cpu_to_le16(sizeof(struct smb2_tree_connect_req)
- - 1 /* pad */ - 4 /* do not count rfc1001 len field */);
+ - 1 /* pad */);
req->PathLength = cpu_to_le16(unc_path_len - 2);
iov[1].iov_base = unc_path;
iov[1].iov_len = unc_path_len;
- inc_rfc1001_len(req, unc_path_len - 1 /* pad */);
-
- rc = SendReceive2(xid, ses, iov, 2, &resp_buftype, flags, &rsp_iov);
+ rc = smb2_send_recv(xid, ses, iov, 2, &resp_buftype, flags, &rsp_iov);
cifs_small_buf_release(req);
rsp = (struct smb2_tree_connect_rsp *)rsp_iov.iov_base;
@@ -1328,21 +1292,16 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
goto tcon_error_exit;
}
- if (tcon == NULL) {
- ses->ipc_tid = rsp->hdr.sync_hdr.TreeId;
- goto tcon_exit;
- }
-
switch (rsp->ShareType) {
case SMB2_SHARE_TYPE_DISK:
cifs_dbg(FYI, "connection to disk share\n");
break;
case SMB2_SHARE_TYPE_PIPE:
- tcon->ipc = true;
+ tcon->pipe = true;
cifs_dbg(FYI, "connection to pipe share\n");
break;
case SMB2_SHARE_TYPE_PRINT:
- tcon->ipc = true;
+ tcon->print = true;
cifs_dbg(FYI, "connection to printer\n");
break;
default:
@@ -1389,6 +1348,10 @@ SMB2_tdis(const unsigned int xid, struct cifs_tcon *tcon)
int rc = 0;
struct cifs_ses *ses = tcon->ses;
int flags = 0;
+ unsigned int total_len;
+ struct kvec iov[1];
+ struct kvec rsp_iov;
+ int resp_buf_type;
cifs_dbg(FYI, "Tree Disconnect\n");
@@ -1398,14 +1361,20 @@ SMB2_tdis(const unsigned int xid, struct cifs_tcon *tcon)
if ((tcon->need_reconnect) || (tcon->ses->need_reconnect))
return 0;
- rc = small_smb2_init(SMB2_TREE_DISCONNECT, tcon, (void **) &req);
+ rc = smb2_plain_req_init(SMB2_TREE_DISCONNECT, tcon, (void **) &req,
+ &total_len);
if (rc)
return rc;
if (encryption_required(tcon))
flags |= CIFS_TRANSFORM_REQ;
- rc = SendReceiveNoRsp(xid, ses, (char *)req, flags);
+ flags |= CIFS_NO_RESP;
+
+ iov[0].iov_base = (char *)req;
+ iov[0].iov_len = total_len;
+
+ rc = smb2_send_recv(xid, ses, iov, 1, &resp_buf_type, flags, &rsp_iov);
cifs_small_buf_release(req);
if (rc)
cifs_stats_fail_inc(tcon, SMB2_TREE_DISCONNECT_HE);
@@ -1505,11 +1474,10 @@ add_lease_context(struct TCP_Server_Info *server, struct kvec *iov,
req->RequestedOplockLevel = SMB2_OPLOCK_LEVEL_LEASE;
if (!req->CreateContextsOffset)
req->CreateContextsOffset = cpu_to_le32(
- sizeof(struct smb2_create_req) - 4 +
+ sizeof(struct smb2_create_req) +
iov[num - 1].iov_len);
le32_add_cpu(&req->CreateContextsLength,
server->vals->create_lease_size);
- inc_rfc1001_len(&req->hdr, server->vals->create_lease_size);
*num_iovec = num + 1;
return 0;
}
@@ -1589,10 +1557,9 @@ add_durable_v2_context(struct kvec *iov, unsigned int *num_iovec,
iov[num].iov_len = sizeof(struct create_durable_v2);
if (!req->CreateContextsOffset)
req->CreateContextsOffset =
- cpu_to_le32(sizeof(struct smb2_create_req) - 4 +
+ cpu_to_le32(sizeof(struct smb2_create_req) +
iov[1].iov_len);
le32_add_cpu(&req->CreateContextsLength, sizeof(struct create_durable_v2));
- inc_rfc1001_len(&req->hdr, sizeof(struct create_durable_v2));
*num_iovec = num + 1;
return 0;
}
@@ -1613,12 +1580,10 @@ add_durable_reconnect_v2_context(struct kvec *iov, unsigned int *num_iovec,
iov[num].iov_len = sizeof(struct create_durable_handle_reconnect_v2);
if (!req->CreateContextsOffset)
req->CreateContextsOffset =
- cpu_to_le32(sizeof(struct smb2_create_req) - 4 +
+ cpu_to_le32(sizeof(struct smb2_create_req) +
iov[1].iov_len);
le32_add_cpu(&req->CreateContextsLength,
sizeof(struct create_durable_handle_reconnect_v2));
- inc_rfc1001_len(&req->hdr,
- sizeof(struct create_durable_handle_reconnect_v2));
*num_iovec = num + 1;
return 0;
}
@@ -1649,10 +1614,9 @@ add_durable_context(struct kvec *iov, unsigned int *num_iovec,
iov[num].iov_len = sizeof(struct create_durable);
if (!req->CreateContextsOffset)
req->CreateContextsOffset =
- cpu_to_le32(sizeof(struct smb2_create_req) - 4 +
+ cpu_to_le32(sizeof(struct smb2_create_req) +
iov[1].iov_len);
le32_add_cpu(&req->CreateContextsLength, sizeof(struct create_durable));
- inc_rfc1001_len(&req->hdr, sizeof(struct create_durable));
*num_iovec = num + 1;
return 0;
}
@@ -1723,6 +1687,7 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
__u32 file_attributes = 0;
char *dhc_buf = NULL, *lc_buf = NULL;
int flags = 0;
+ unsigned int total_len;
cifs_dbg(FYI, "create/open\n");
@@ -1731,7 +1696,8 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
else
return -EIO;
- rc = small_smb2_init(SMB2_CREATE, tcon, (void **) &req);
+ rc = smb2_plain_req_init(SMB2_CREATE, tcon, (void **) &req, &total_len);
+
if (rc)
return rc;
@@ -1752,12 +1718,10 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
req->CreateOptions = cpu_to_le32(oparms->create_options & CREATE_OPTIONS_MASK);
iov[0].iov_base = (char *)req;
- /* 4 for rfc1002 length field */
- iov[0].iov_len = get_rfc1002_length(req) + 4;
/* -1 since last byte is buf[0] which is sent below (path) */
- iov[0].iov_len--;
+ iov[0].iov_len = total_len - 1;
- req->NameOffset = cpu_to_le16(sizeof(struct smb2_create_req) - 4);
+ req->NameOffset = cpu_to_le16(sizeof(struct smb2_create_req));
/* [MS-SMB2] 2.2.13 NameOffset:
* If SMB2_FLAGS_DFS_OPERATIONS is set in the Flags field of
@@ -1770,7 +1734,7 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
if (tcon->share_flags & SHI1005_FLAGS_DFS) {
int name_len;
- req->hdr.sync_hdr.Flags |= SMB2_FLAGS_DFS_OPERATIONS;
+ req->sync_hdr.Flags |= SMB2_FLAGS_DFS_OPERATIONS;
rc = alloc_path_with_tree_prefix(&copy_path, &copy_size,
&name_len,
tcon->treeName, path);
@@ -1797,8 +1761,6 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
iov[1].iov_len = uni_path_len;
iov[1].iov_base = path;
- /* -1 since last byte is buf[0] which was counted in smb2_buf_len */
- inc_rfc1001_len(req, uni_path_len - 1);
if (!server->oplocks)
*oplock = SMB2_OPLOCK_LEVEL_NONE;
@@ -1836,7 +1798,8 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
dhc_buf = iov[n_iov-1].iov_base;
}
- rc = SendReceive2(xid, ses, iov, n_iov, &resp_buftype, flags, &rsp_iov);
+ rc = smb2_send_recv(xid, ses, iov, n_iov, &resp_buftype, flags,
+ &rsp_iov);
cifs_small_buf_release(req);
rsp = (struct smb2_create_rsp *)rsp_iov.iov_base;
@@ -1877,7 +1840,7 @@ creat_exit:
*/
int
SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
- u64 volatile_fid, u32 opcode, bool is_fsctl, bool use_ipc,
+ u64 volatile_fid, u32 opcode, bool is_fsctl,
char *in_data, u32 indatalen,
char **out_data, u32 *plen /* returned data len */)
{
@@ -1891,6 +1854,7 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
int n_iov;
int rc = 0;
int flags = 0;
+ unsigned int total_len;
cifs_dbg(FYI, "SMB2 IOCTL\n");
@@ -1909,20 +1873,10 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
if (!ses || !(ses->server))
return -EIO;
- rc = small_smb2_init(SMB2_IOCTL, tcon, (void **) &req);
+ rc = smb2_plain_req_init(SMB2_IOCTL, tcon, (void **) &req, &total_len);
if (rc)
return rc;
- if (use_ipc) {
- if (ses->ipc_tid == 0) {
- cifs_small_buf_release(req);
- return -ENOTCONN;
- }
-
- cifs_dbg(FYI, "replacing tid 0x%x with IPC tid 0x%x\n",
- req->hdr.sync_hdr.TreeId, ses->ipc_tid);
- req->hdr.sync_hdr.TreeId = ses->ipc_tid;
- }
if (encryption_required(tcon))
flags |= CIFS_TRANSFORM_REQ;
@@ -1934,7 +1888,7 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
req->InputCount = cpu_to_le32(indatalen);
/* do not set InputOffset if no input data */
req->InputOffset =
- cpu_to_le32(offsetof(struct smb2_ioctl_req, Buffer) - 4);
+ cpu_to_le32(offsetof(struct smb2_ioctl_req, Buffer));
iov[1].iov_base = in_data;
iov[1].iov_len = indatalen;
n_iov = 2;
@@ -1969,21 +1923,20 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
* but if input data passed to ioctl, we do not
* want to double count this, so we do not send
* the dummy one byte of data in iovec[0] if sending
- * input data (in iovec[1]). We also must add 4 bytes
- * in first iovec to allow for rfc1002 length field.
+ * input data (in iovec[1]).
*/
if (indatalen) {
- iov[0].iov_len = get_rfc1002_length(req) + 4 - 1;
- inc_rfc1001_len(req, indatalen - 1);
+ iov[0].iov_len = total_len - 1;
} else
- iov[0].iov_len = get_rfc1002_length(req) + 4;
+ iov[0].iov_len = total_len;
/* validate negotiate request must be signed - see MS-SMB2 3.2.5.5 */
if (opcode == FSCTL_VALIDATE_NEGOTIATE_INFO)
- req->hdr.sync_hdr.Flags |= SMB2_FLAGS_SIGNED;
+ req->sync_hdr.Flags |= SMB2_FLAGS_SIGNED;
- rc = SendReceive2(xid, ses, iov, n_iov, &resp_buftype, flags, &rsp_iov);
+ rc = smb2_send_recv(xid, ses, iov, n_iov, &resp_buftype, flags,
+ &rsp_iov);
cifs_small_buf_release(req);
rsp = (struct smb2_ioctl_rsp *)rsp_iov.iov_base;
@@ -2052,7 +2005,6 @@ SMB2_set_compression(const unsigned int xid, struct cifs_tcon *tcon,
rc = SMB2_ioctl(xid, tcon, persistent_fid, volatile_fid,
FSCTL_SET_COMPRESSION, true /* is_fsctl */,
- false /* use_ipc */,
(char *)&fsctl_input /* data input */,
2 /* in data len */, &ret_data /* out data */, NULL);
@@ -2073,13 +2025,14 @@ SMB2_close(const unsigned int xid, struct cifs_tcon *tcon,
int resp_buftype;
int rc = 0;
int flags = 0;
+ unsigned int total_len;
cifs_dbg(FYI, "Close\n");
if (!ses || !(ses->server))
return -EIO;
- rc = small_smb2_init(SMB2_CLOSE, tcon, (void **) &req);
+ rc = smb2_plain_req_init(SMB2_CLOSE, tcon, (void **) &req, &total_len);
if (rc)
return rc;
@@ -2090,10 +2043,9 @@ SMB2_close(const unsigned int xid, struct cifs_tcon *tcon,
req->VolatileFileId = volatile_fid;
iov[0].iov_base = (char *)req;
- /* 4 for rfc1002 length field */
- iov[0].iov_len = get_rfc1002_length(req) + 4;
+ iov[0].iov_len = total_len;
- rc = SendReceive2(xid, ses, iov, 1, &resp_buftype, flags, &rsp_iov);
+ rc = smb2_send_recv(xid, ses, iov, 1, &resp_buftype, flags, &rsp_iov);
cifs_small_buf_release(req);
rsp = (struct smb2_close_rsp *)rsp_iov.iov_base;
@@ -2180,13 +2132,15 @@ query_info(const unsigned int xid, struct cifs_tcon *tcon,
int resp_buftype;
struct cifs_ses *ses = tcon->ses;
int flags = 0;
+ unsigned int total_len;
cifs_dbg(FYI, "Query Info\n");
if (!ses || !(ses->server))
return -EIO;
- rc = small_smb2_init(SMB2_QUERY_INFO, tcon, (void **) &req);
+ rc = smb2_plain_req_init(SMB2_QUERY_INFO, tcon, (void **) &req,
+ &total_len);
if (rc)
return rc;
@@ -2203,15 +2157,14 @@ query_info(const unsigned int xid, struct cifs_tcon *tcon,
* We do not use the input buffer (do not send extra byte)
*/
req->InputBufferOffset = 0;
- inc_rfc1001_len(req, -1);
req->OutputBufferLength = cpu_to_le32(output_len);
iov[0].iov_base = (char *)req;
- /* 4 for rfc1002 length field */
- iov[0].iov_len = get_rfc1002_length(req) + 4;
+ /* 1 for Buffer */
+ iov[0].iov_len = total_len - 1;
- rc = SendReceive2(xid, ses, iov, 1, &resp_buftype, flags, &rsp_iov);
+ rc = smb2_send_recv(xid, ses, iov, 1, &resp_buftype, flags, &rsp_iov);
cifs_small_buf_release(req);
rsp = (struct smb2_query_info_rsp *)rsp_iov.iov_base;
@@ -2338,6 +2291,10 @@ void smb2_reconnect_server(struct work_struct *work)
tcon_exist = true;
}
}
+ if (ses->tcon_ipc && ses->tcon_ipc->need_reconnect) {
+ list_add_tail(&ses->tcon_ipc->rlist, &tmp_list);
+ tcon_exist = true;
+ }
}
/*
* Get the reference to server struct to be sure that the last call of
@@ -2376,6 +2333,8 @@ SMB2_echo(struct TCP_Server_Info *server)
struct kvec iov[2];
struct smb_rqst rqst = { .rq_iov = iov,
.rq_nvec = 2 };
+ unsigned int total_len;
+ __be32 rfc1002_marker;
cifs_dbg(FYI, "In echo request\n");
@@ -2385,17 +2344,17 @@ SMB2_echo(struct TCP_Server_Info *server)
return rc;
}
- rc = small_smb2_init(SMB2_ECHO, NULL, (void **)&req);
+ rc = smb2_plain_req_init(SMB2_ECHO, NULL, (void **)&req, &total_len);
if (rc)
return rc;
- req->hdr.sync_hdr.CreditRequest = cpu_to_le16(1);
+ req->sync_hdr.CreditRequest = cpu_to_le16(1);
- /* 4 for rfc1002 length field */
iov[0].iov_len = 4;
- iov[0].iov_base = (char *)req;
- iov[1].iov_len = get_rfc1002_length(req);
- iov[1].iov_base = (char *)req + 4;
+ rfc1002_marker = cpu_to_be32(total_len);
+ iov[0].iov_base = &rfc1002_marker;
+ iov[1].iov_len = total_len;
+ iov[1].iov_base = (char *)req;
rc = cifs_call_async(server, &rqst, NULL, smb2_echo_callback, NULL,
server, CIFS_ECHO_OP);
@@ -2417,13 +2376,14 @@ SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
int resp_buftype;
int rc = 0;
int flags = 0;
+ unsigned int total_len;
cifs_dbg(FYI, "Flush\n");
if (!ses || !(ses->server))
return -EIO;
- rc = small_smb2_init(SMB2_FLUSH, tcon, (void **) &req);
+ rc = smb2_plain_req_init(SMB2_FLUSH, tcon, (void **) &req, &total_len);
if (rc)
return rc;
@@ -2434,10 +2394,9 @@ SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
req->VolatileFileId = volatile_fid;
iov[0].iov_base = (char *)req;
- /* 4 for rfc1002 length field */
- iov[0].iov_len = get_rfc1002_length(req) + 4;
+ iov[0].iov_len = total_len;
- rc = SendReceive2(xid, ses, iov, 1, &resp_buftype, flags, &rsp_iov);
+ rc = smb2_send_recv(xid, ses, iov, 1, &resp_buftype, flags, &rsp_iov);
cifs_small_buf_release(req);
if (rc != 0)
@@ -2453,18 +2412,21 @@ SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
*/
static int
smb2_new_read_req(void **buf, unsigned int *total_len,
- struct cifs_io_parms *io_parms, unsigned int remaining_bytes,
- int request_type)
+ struct cifs_io_parms *io_parms, struct cifs_readdata *rdata,
+ unsigned int remaining_bytes, int request_type)
{
int rc = -EACCES;
struct smb2_read_plain_req *req = NULL;
struct smb2_sync_hdr *shdr;
+ struct TCP_Server_Info *server;
rc = smb2_plain_req_init(SMB2_READ, io_parms->tcon, (void **) &req,
total_len);
if (rc)
return rc;
- if (io_parms->tcon->ses->server == NULL)
+
+ server = io_parms->tcon->ses->server;
+ if (server == NULL)
return -ECONNABORTED;
shdr = &req->sync_hdr;
@@ -2478,7 +2440,40 @@ smb2_new_read_req(void **buf, unsigned int *total_len,
req->MinimumCount = 0;
req->Length = cpu_to_le32(io_parms->length);
req->Offset = cpu_to_le64(io_parms->offset);
-
+#ifdef CONFIG_CIFS_SMB_DIRECT
+ /*
+ * If we want to do a RDMA write, fill in and append
+ * smbd_buffer_descriptor_v1 to the end of read request
+ */
+ if (server->rdma && rdata &&
+ rdata->bytes >= server->smbd_conn->rdma_readwrite_threshold) {
+
+ struct smbd_buffer_descriptor_v1 *v1;
+ bool need_invalidate =
+ io_parms->tcon->ses->server->dialect == SMB30_PROT_ID;
+
+ rdata->mr = smbd_register_mr(
+ server->smbd_conn, rdata->pages,
+ rdata->nr_pages, rdata->tailsz,
+ true, need_invalidate);
+ if (!rdata->mr)
+ return -ENOBUFS;
+
+ req->Channel = SMB2_CHANNEL_RDMA_V1_INVALIDATE;
+ if (need_invalidate)
+ req->Channel = SMB2_CHANNEL_RDMA_V1;
+ req->ReadChannelInfoOffset =
+ cpu_to_le16(offsetof(struct smb2_read_plain_req, Buffer));
+ req->ReadChannelInfoLength =
+ cpu_to_le16(sizeof(struct smbd_buffer_descriptor_v1));
+ v1 = (struct smbd_buffer_descriptor_v1 *) &req->Buffer[0];
+ v1->offset = cpu_to_le64(rdata->mr->mr->iova);
+ v1->token = cpu_to_le32(rdata->mr->mr->rkey);
+ v1->length = cpu_to_le32(rdata->mr->mr->length);
+
+ *total_len += sizeof(*v1) - 1;
+ }
+#endif
if (request_type & CHAINED_REQUEST) {
if (!(request_type & END_OF_CHAIN)) {
/* next 8-byte aligned request */
@@ -2557,7 +2552,17 @@ smb2_readv_callback(struct mid_q_entry *mid)
if (rdata->result != -ENODATA)
rdata->result = -EIO;
}
-
+#ifdef CONFIG_CIFS_SMB_DIRECT
+ /*
+ * If this rdata has a memmory registered, the MR can be freed
+ * MR needs to be freed as soon as I/O finishes to prevent deadlock
+ * because they have limited number and are used for future I/Os
+ */
+ if (rdata->mr) {
+ smbd_deregister_mr(rdata->mr);
+ rdata->mr = NULL;
+ }
+#endif
if (rdata->result)
cifs_stats_fail_inc(tcon, SMB2_READ_HE);
@@ -2592,7 +2597,8 @@ smb2_async_readv(struct cifs_readdata *rdata)
server = io_parms.tcon->ses->server;
- rc = smb2_new_read_req((void **) &buf, &total_len, &io_parms, 0, 0);
+ rc = smb2_new_read_req(
+ (void **) &buf, &total_len, &io_parms, rdata, 0, 0);
if (rc) {
if (rc == -EAGAIN && rdata->credits) {
/* credits was reset by reconnect */
@@ -2650,31 +2656,24 @@ SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms,
struct smb2_read_plain_req *req = NULL;
struct smb2_read_rsp *rsp = NULL;
struct smb2_sync_hdr *shdr;
- struct kvec iov[2];
+ struct kvec iov[1];
struct kvec rsp_iov;
unsigned int total_len;
- __be32 req_len;
- struct smb_rqst rqst = { .rq_iov = iov,
- .rq_nvec = 2 };
int flags = CIFS_LOG_ERROR;
struct cifs_ses *ses = io_parms->tcon->ses;
*nbytes = 0;
- rc = smb2_new_read_req((void **)&req, &total_len, io_parms, 0, 0);
+ rc = smb2_new_read_req((void **)&req, &total_len, io_parms, NULL, 0, 0);
if (rc)
return rc;
if (encryption_required(io_parms->tcon))
flags |= CIFS_TRANSFORM_REQ;
- req_len = cpu_to_be32(total_len);
-
- iov[0].iov_base = &req_len;
- iov[0].iov_len = sizeof(__be32);
- iov[1].iov_base = req;
- iov[1].iov_len = total_len;
+ iov[0].iov_base = (char *)req;
+ iov[0].iov_len = total_len;
- rc = cifs_send_recv(xid, ses, &rqst, &resp_buftype, flags, &rsp_iov);
+ rc = smb2_send_recv(xid, ses, iov, 1, &resp_buftype, flags, &rsp_iov);
cifs_small_buf_release(req);
rsp = (struct smb2_read_rsp *)rsp_iov.iov_base;
@@ -2755,7 +2754,19 @@ smb2_writev_callback(struct mid_q_entry *mid)
wdata->result = -EIO;
break;
}
-
+#ifdef CONFIG_CIFS_SMB_DIRECT
+ /*
+ * If this wdata has a memory registered, the MR can be freed
+ * The number of MRs available is limited, it's important to recover
+ * used MR as soon as I/O is finished. Hold MR longer in the later
+ * I/O process can possibly result in I/O deadlock due to lack of MR
+ * to send request on I/O retry
+ */
+ if (wdata->mr) {
+ smbd_deregister_mr(wdata->mr);
+ wdata->mr = NULL;
+ }
+#endif
if (wdata->result)
cifs_stats_fail_inc(tcon, SMB2_WRITE_HE);
@@ -2776,8 +2787,10 @@ smb2_async_writev(struct cifs_writedata *wdata,
struct TCP_Server_Info *server = tcon->ses->server;
struct kvec iov[2];
struct smb_rqst rqst = { };
+ unsigned int total_len;
+ __be32 rfc1002_marker;
- rc = small_smb2_init(SMB2_WRITE, tcon, (void **) &req);
+ rc = smb2_plain_req_init(SMB2_WRITE, tcon, (void **) &req, &total_len);
if (rc) {
if (rc == -EAGAIN && wdata->credits) {
/* credits was reset by reconnect */
@@ -2793,7 +2806,7 @@ smb2_async_writev(struct cifs_writedata *wdata,
if (encryption_required(tcon))
flags |= CIFS_TRANSFORM_REQ;
- shdr = get_sync_hdr(req);
+ shdr = (struct smb2_sync_hdr *)req;
shdr->ProcessId = cpu_to_le32(wdata->cfile->pid);
req->PersistentFileId = wdata->cfile->fid.persistent_fid;
@@ -2802,16 +2815,51 @@ smb2_async_writev(struct cifs_writedata *wdata,
req->WriteChannelInfoLength = 0;
req->Channel = 0;
req->Offset = cpu_to_le64(wdata->offset);
- /* 4 for rfc1002 length field */
req->DataOffset = cpu_to_le16(
- offsetof(struct smb2_write_req, Buffer) - 4);
+ offsetof(struct smb2_write_req, Buffer));
req->RemainingBytes = 0;
-
+#ifdef CONFIG_CIFS_SMB_DIRECT
+ /*
+ * If we want to do a server RDMA read, fill in and append
+ * smbd_buffer_descriptor_v1 to the end of write request
+ */
+ if (server->rdma && wdata->bytes >=
+ server->smbd_conn->rdma_readwrite_threshold) {
+
+ struct smbd_buffer_descriptor_v1 *v1;
+ bool need_invalidate = server->dialect == SMB30_PROT_ID;
+
+ wdata->mr = smbd_register_mr(
+ server->smbd_conn, wdata->pages,
+ wdata->nr_pages, wdata->tailsz,
+ false, need_invalidate);
+ if (!wdata->mr) {
+ rc = -ENOBUFS;
+ goto async_writev_out;
+ }
+ req->Length = 0;
+ req->DataOffset = 0;
+ req->RemainingBytes =
+ cpu_to_le32((wdata->nr_pages-1)*PAGE_SIZE + wdata->tailsz);
+ req->Channel = SMB2_CHANNEL_RDMA_V1_INVALIDATE;
+ if (need_invalidate)
+ req->Channel = SMB2_CHANNEL_RDMA_V1;
+ req->WriteChannelInfoOffset =
+ cpu_to_le16(offsetof(struct smb2_write_req, Buffer));
+ req->WriteChannelInfoLength =
+ cpu_to_le16(sizeof(struct smbd_buffer_descriptor_v1));
+ v1 = (struct smbd_buffer_descriptor_v1 *) &req->Buffer[0];
+ v1->offset = cpu_to_le64(wdata->mr->mr->iova);
+ v1->token = cpu_to_le32(wdata->mr->mr->rkey);
+ v1->length = cpu_to_le32(wdata->mr->mr->length);
+ }
+#endif
/* 4 for rfc1002 length field and 1 for Buffer */
iov[0].iov_len = 4;
- iov[0].iov_base = req;
- iov[1].iov_len = get_rfc1002_length(req) - 1;
- iov[1].iov_base = (char *)req + 4;
+ rfc1002_marker = cpu_to_be32(total_len - 1 + wdata->bytes);
+ iov[0].iov_base = &rfc1002_marker;
+ iov[1].iov_len = total_len - 1;
+ iov[1].iov_base = (char *)req;
rqst.rq_iov = iov;
rqst.rq_nvec = 2;
@@ -2819,13 +2867,22 @@ smb2_async_writev(struct cifs_writedata *wdata,
rqst.rq_npages = wdata->nr_pages;
rqst.rq_pagesz = wdata->pagesz;
rqst.rq_tailsz = wdata->tailsz;
-
+#ifdef CONFIG_CIFS_SMB_DIRECT
+ if (wdata->mr) {
+ iov[1].iov_len += sizeof(struct smbd_buffer_descriptor_v1);
+ rqst.rq_npages = 0;
+ }
+#endif
cifs_dbg(FYI, "async write at %llu %u bytes\n",
wdata->offset, wdata->bytes);
+#ifdef CONFIG_CIFS_SMB_DIRECT
+ /* For RDMA read, I/O size is in RemainingBytes not in Length */
+ if (!wdata->mr)
+ req->Length = cpu_to_le32(wdata->bytes);
+#else
req->Length = cpu_to_le32(wdata->bytes);
-
- inc_rfc1001_len(&req->hdr, wdata->bytes - 1 /* Buffer */);
+#endif
if (wdata->credits) {
shdr->CreditCharge = cpu_to_le16(DIV_ROUND_UP(wdata->bytes,
@@ -2869,13 +2926,15 @@ SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms,
int resp_buftype;
struct kvec rsp_iov;
int flags = 0;
+ unsigned int total_len;
*nbytes = 0;
if (n_vec < 1)
return rc;
- rc = small_smb2_init(SMB2_WRITE, io_parms->tcon, (void **) &req);
+ rc = smb2_plain_req_init(SMB2_WRITE, io_parms->tcon, (void **) &req,
+ &total_len);
if (rc)
return rc;
@@ -2885,7 +2944,7 @@ SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms,
if (encryption_required(io_parms->tcon))
flags |= CIFS_TRANSFORM_REQ;
- req->hdr.sync_hdr.ProcessId = cpu_to_le32(io_parms->pid);
+ req->sync_hdr.ProcessId = cpu_to_le32(io_parms->pid);
req->PersistentFileId = io_parms->persistent_fid;
req->VolatileFileId = io_parms->volatile_fid;
@@ -2894,20 +2953,16 @@ SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms,
req->Channel = 0;
req->Length = cpu_to_le32(io_parms->length);
req->Offset = cpu_to_le64(io_parms->offset);
- /* 4 for rfc1002 length field */
req->DataOffset = cpu_to_le16(
- offsetof(struct smb2_write_req, Buffer) - 4);
+ offsetof(struct smb2_write_req, Buffer));
req->RemainingBytes = 0;
iov[0].iov_base = (char *)req;
- /* 4 for rfc1002 length field and 1 for Buffer */
- iov[0].iov_len = get_rfc1002_length(req) + 4 - 1;
+ /* 1 for Buffer */
+ iov[0].iov_len = total_len - 1;
- /* length of entire message including data to be written */
- inc_rfc1001_len(req, io_parms->length - 1 /* Buffer */);
-
- rc = SendReceive2(xid, io_parms->tcon->ses, iov, n_vec + 1,
- &resp_buftype, flags, &rsp_iov);
+ rc = smb2_send_recv(xid, io_parms->tcon->ses, iov, n_vec + 1,
+ &resp_buftype, flags, &rsp_iov);
cifs_small_buf_release(req);
rsp = (struct smb2_write_rsp *)rsp_iov.iov_base;
@@ -2984,13 +3039,15 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon,
unsigned int output_size = CIFSMaxBufSize;
size_t info_buf_size;
int flags = 0;
+ unsigned int total_len;
if (ses && (ses->server))
server = ses->server;
else
return -EIO;
- rc = small_smb2_init(SMB2_QUERY_DIRECTORY, tcon, (void **) &req);
+ rc = smb2_plain_req_init(SMB2_QUERY_DIRECTORY, tcon, (void **) &req,
+ &total_len);
if (rc)
return rc;
@@ -3022,7 +3079,7 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon,
memcpy(bufptr, &asteriks, len);
req->FileNameOffset =
- cpu_to_le16(sizeof(struct smb2_query_directory_req) - 1 - 4);
+ cpu_to_le16(sizeof(struct smb2_query_directory_req) - 1);
req->FileNameLength = cpu_to_le16(len);
/*
* BB could be 30 bytes or so longer if we used SMB2 specific
@@ -3033,15 +3090,13 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon,
req->OutputBufferLength = cpu_to_le32(output_size);
iov[0].iov_base = (char *)req;
- /* 4 for RFC1001 length and 1 for Buffer */
- iov[0].iov_len = get_rfc1002_length(req) + 4 - 1;
+ /* 1 for Buffer */
+ iov[0].iov_len = total_len - 1;
iov[1].iov_base = (char *)(req->Buffer);
iov[1].iov_len = len;
- inc_rfc1001_len(req, len - 1 /* Buffer */);
-
- rc = SendReceive2(xid, ses, iov, 2, &resp_buftype, flags, &rsp_iov);
+ rc = smb2_send_recv(xid, ses, iov, 2, &resp_buftype, flags, &rsp_iov);
cifs_small_buf_release(req);
rsp = (struct smb2_query_directory_rsp *)rsp_iov.iov_base;
@@ -3110,6 +3165,7 @@ send_set_info(const unsigned int xid, struct cifs_tcon *tcon,
unsigned int i;
struct cifs_ses *ses = tcon->ses;
int flags = 0;
+ unsigned int total_len;
if (!ses || !(ses->server))
return -EIO;
@@ -3121,7 +3177,7 @@ send_set_info(const unsigned int xid, struct cifs_tcon *tcon,
if (!iov)
return -ENOMEM;
- rc = small_smb2_init(SMB2_SET_INFO, tcon, (void **) &req);
+ rc = smb2_plain_req_init(SMB2_SET_INFO, tcon, (void **) &req, &total_len);
if (rc) {
kfree(iov);
return rc;
@@ -3130,7 +3186,7 @@ send_set_info(const unsigned int xid, struct cifs_tcon *tcon,
if (encryption_required(tcon))
flags |= CIFS_TRANSFORM_REQ;
- req->hdr.sync_hdr.ProcessId = cpu_to_le32(pid);
+ req->sync_hdr.ProcessId = cpu_to_le32(pid);
req->InfoType = info_type;
req->FileInfoClass = info_class;
@@ -3138,27 +3194,25 @@ send_set_info(const unsigned int xid, struct cifs_tcon *tcon,
req->VolatileFileId = volatile_fid;
req->AdditionalInformation = cpu_to_le32(additional_info);
- /* 4 for RFC1001 length and 1 for Buffer */
req->BufferOffset =
- cpu_to_le16(sizeof(struct smb2_set_info_req) - 1 - 4);
+ cpu_to_le16(sizeof(struct smb2_set_info_req) - 1);
req->BufferLength = cpu_to_le32(*size);
- inc_rfc1001_len(req, *size - 1 /* Buffer */);
-
memcpy(req->Buffer, *data, *size);
+ total_len += *size;
iov[0].iov_base = (char *)req;
- /* 4 for RFC1001 length */
- iov[0].iov_len = get_rfc1002_length(req) + 4;
+ /* 1 for Buffer */
+ iov[0].iov_len = total_len - 1;
for (i = 1; i < num; i++) {
- inc_rfc1001_len(req, size[i]);
le32_add_cpu(&req->BufferLength, size[i]);
iov[i].iov_base = (char *)data[i];
iov[i].iov_len = size[i];
}
- rc = SendReceive2(xid, ses, iov, num, &resp_buftype, flags, &rsp_iov);
+ rc = smb2_send_recv(xid, ses, iov, num, &resp_buftype, flags,
+ &rsp_iov);
cifs_small_buf_release(req);
rsp = (struct smb2_set_info_rsp *)rsp_iov.iov_base;
@@ -3310,11 +3364,17 @@ SMB2_oplock_break(const unsigned int xid, struct cifs_tcon *tcon,
__u8 oplock_level)
{
int rc;
- struct smb2_oplock_break *req = NULL;
+ struct smb2_oplock_break_req *req = NULL;
+ struct cifs_ses *ses = tcon->ses;
int flags = CIFS_OBREAK_OP;
+ unsigned int total_len;
+ struct kvec iov[1];
+ struct kvec rsp_iov;
+ int resp_buf_type;
cifs_dbg(FYI, "SMB2_oplock_break\n");
- rc = small_smb2_init(SMB2_OPLOCK_BREAK, tcon, (void **) &req);
+ rc = smb2_plain_req_init(SMB2_OPLOCK_BREAK, tcon, (void **) &req,
+ &total_len);
if (rc)
return rc;
@@ -3324,9 +3384,14 @@ SMB2_oplock_break(const unsigned int xid, struct cifs_tcon *tcon,
req->VolatileFid = volatile_fid;
req->PersistentFid = persistent_fid;
req->OplockLevel = oplock_level;
- req->hdr.sync_hdr.CreditRequest = cpu_to_le16(1);
+ req->sync_hdr.CreditRequest = cpu_to_le16(1);
- rc = SendReceiveNoRsp(xid, tcon->ses, (char *) req, flags);
+ flags |= CIFS_NO_RESP;
+
+ iov[0].iov_base = (char *)req;
+ iov[0].iov_len = total_len;
+
+ rc = smb2_send_recv(xid, ses, iov, 1, &resp_buf_type, flags, &rsp_iov);
cifs_small_buf_release(req);
if (rc) {
@@ -3355,13 +3420,15 @@ build_qfs_info_req(struct kvec *iov, struct cifs_tcon *tcon, int level,
{
int rc;
struct smb2_query_info_req *req;
+ unsigned int total_len;
cifs_dbg(FYI, "Query FSInfo level %d\n", level);
if ((tcon->ses == NULL) || (tcon->ses->server == NULL))
return -EIO;
- rc = small_smb2_init(SMB2_QUERY_INFO, tcon, (void **) &req);
+ rc = smb2_plain_req_init(SMB2_QUERY_INFO, tcon, (void **) &req,
+ &total_len);
if (rc)
return rc;
@@ -3369,15 +3436,14 @@ build_qfs_info_req(struct kvec *iov, struct cifs_tcon *tcon, int level,
req->FileInfoClass = level;
req->PersistentFileId = persistent_fid;
req->VolatileFileId = volatile_fid;
- /* 4 for rfc1002 length field and 1 for pad */
+ /* 1 for pad */
req->InputBufferOffset =
- cpu_to_le16(sizeof(struct smb2_query_info_req) - 1 - 4);
+ cpu_to_le16(sizeof(struct smb2_query_info_req) - 1);
req->OutputBufferLength = cpu_to_le32(
outbuf_len + sizeof(struct smb2_query_info_rsp) - 1 - 4);
iov->iov_base = (char *)req;
- /* 4 for rfc1002 length field */
- iov->iov_len = get_rfc1002_length(req) + 4;
+ iov->iov_len = total_len;
return 0;
}
@@ -3403,7 +3469,7 @@ SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon,
if (encryption_required(tcon))
flags |= CIFS_TRANSFORM_REQ;
- rc = SendReceive2(xid, ses, &iov, 1, &resp_buftype, flags, &rsp_iov);
+ rc = smb2_send_recv(xid, ses, &iov, 1, &resp_buftype, flags, &rsp_iov);
cifs_small_buf_release(iov.iov_base);
if (rc) {
cifs_stats_fail_inc(tcon, SMB2_QUERY_INFO_HE);
@@ -3459,7 +3525,7 @@ SMB2_QFS_attr(const unsigned int xid, struct cifs_tcon *tcon,
if (encryption_required(tcon))
flags |= CIFS_TRANSFORM_REQ;
- rc = SendReceive2(xid, ses, &iov, 1, &resp_buftype, flags, &rsp_iov);
+ rc = smb2_send_recv(xid, ses, &iov, 1, &resp_buftype, flags, &rsp_iov);
cifs_small_buf_release(iov.iov_base);
if (rc) {
cifs_stats_fail_inc(tcon, SMB2_QUERY_INFO_HE);
@@ -3505,34 +3571,33 @@ smb2_lockv(const unsigned int xid, struct cifs_tcon *tcon,
int resp_buf_type;
unsigned int count;
int flags = CIFS_NO_RESP;
+ unsigned int total_len;
cifs_dbg(FYI, "smb2_lockv num lock %d\n", num_lock);
- rc = small_smb2_init(SMB2_LOCK, tcon, (void **) &req);
+ rc = smb2_plain_req_init(SMB2_LOCK, tcon, (void **) &req, &total_len);
if (rc)
return rc;
if (encryption_required(tcon))
flags |= CIFS_TRANSFORM_REQ;
- req->hdr.sync_hdr.ProcessId = cpu_to_le32(pid);
+ req->sync_hdr.ProcessId = cpu_to_le32(pid);
req->LockCount = cpu_to_le16(num_lock);
req->PersistentFileId = persist_fid;
req->VolatileFileId = volatile_fid;
count = num_lock * sizeof(struct smb2_lock_element);
- inc_rfc1001_len(req, count - sizeof(struct smb2_lock_element));
iov[0].iov_base = (char *)req;
- /* 4 for rfc1002 length field and count for all locks */
- iov[0].iov_len = get_rfc1002_length(req) + 4 - count;
+ iov[0].iov_len = total_len - sizeof(struct smb2_lock_element);
iov[1].iov_base = (char *)buf;
iov[1].iov_len = count;
cifs_stats_inc(&tcon->stats.cifs_stats.num_locks);
- rc = SendReceive2(xid, tcon->ses, iov, 2, &resp_buf_type, flags,
- &rsp_iov);
+ rc = smb2_send_recv(xid, tcon->ses, iov, 2, &resp_buf_type, flags,
+ &rsp_iov);
cifs_small_buf_release(req);
if (rc) {
cifs_dbg(FYI, "Send error in smb2_lockv = %d\n", rc);
@@ -3565,24 +3630,35 @@ SMB2_lease_break(const unsigned int xid, struct cifs_tcon *tcon,
{
int rc;
struct smb2_lease_ack *req = NULL;
+ struct cifs_ses *ses = tcon->ses;
int flags = CIFS_OBREAK_OP;
+ unsigned int total_len;
+ struct kvec iov[1];
+ struct kvec rsp_iov;
+ int resp_buf_type;
cifs_dbg(FYI, "SMB2_lease_break\n");
- rc = small_smb2_init(SMB2_OPLOCK_BREAK, tcon, (void **) &req);
+ rc = smb2_plain_req_init(SMB2_OPLOCK_BREAK, tcon, (void **) &req,
+ &total_len);
if (rc)
return rc;
if (encryption_required(tcon))
flags |= CIFS_TRANSFORM_REQ;
- req->hdr.sync_hdr.CreditRequest = cpu_to_le16(1);
+ req->sync_hdr.CreditRequest = cpu_to_le16(1);
req->StructureSize = cpu_to_le16(36);
- inc_rfc1001_len(req, 12);
+ total_len += 12;
memcpy(req->LeaseKey, lease_key, 16);
req->LeaseState = lease_state;
- rc = SendReceiveNoRsp(xid, tcon->ses, (char *) req, flags);
+ flags |= CIFS_NO_RESP;
+
+ iov[0].iov_base = (char *)req;
+ iov[0].iov_len = total_len;
+
+ rc = smb2_send_recv(xid, ses, iov, 1, &resp_buf_type, flags, &rsp_iov);
cifs_small_buf_release(req);
if (rc) {
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index c2ec934be968..2a2b34ccaf49 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -192,10 +192,39 @@ struct smb2_symlink_err_rsp {
__u8 PathBuffer[0];
} __packed;
+/* SMB 3.1.1 and later dialects. See MS-SMB2 section 2.2.2.1 */
+struct smb2_error_context_rsp {
+ __le32 ErrorDataLength;
+ __le32 ErrorId;
+ __u8 ErrorContextData; /* ErrorDataLength long array */
+} __packed;
+
+/* Defines for Type field below (see MS-SMB2 2.2.2.2.2.1) */
+#define MOVE_DST_IPADDR_V4 cpu_to_le32(0x00000001)
+#define MOVE_DST_IPADDR_V6 cpu_to_le32(0x00000002)
+
+struct move_dst_ipaddr {
+ __le32 Type;
+ __u32 Reserved;
+ __u8 address[16]; /* IPv4 followed by 12 bytes rsvd or IPv6 address */
+} __packed;
+
+struct share_redirect_error_context_rsp {
+ __le32 StructureSize;
+ __le32 NotificationType;
+ __le32 ResourceNameOffset;
+ __le32 ResourceNameLength;
+ __le16 Flags;
+ __le16 TargetType;
+ __le32 IPAddrCount;
+ struct move_dst_ipaddr IpAddrMoveList[0];
+ /* __u8 ResourceName[] */ /* Name of share as counted Unicode string */
+} __packed;
+
#define SMB2_CLIENT_GUID_SIZE 16
struct smb2_negotiate_req {
- struct smb2_hdr hdr;
+ struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* Must be 36 */
__le16 DialectCount;
__le16 SecurityMode;
@@ -282,7 +311,7 @@ struct smb2_negotiate_rsp {
#define SMB2_SESSION_REQ_FLAG_ENCRYPT_DATA 0x04
struct smb2_sess_setup_req {
- struct smb2_hdr hdr;
+ struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* Must be 25 */
__u8 Flags;
__u8 SecurityMode;
@@ -308,7 +337,7 @@ struct smb2_sess_setup_rsp {
} __packed;
struct smb2_logoff_req {
- struct smb2_hdr hdr;
+ struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* Must be 4 */
__le16 Reserved;
} __packed;
@@ -320,10 +349,12 @@ struct smb2_logoff_rsp {
} __packed;
/* Flags/Reserved for SMB3.1.1 */
-#define SMB2_SHAREFLAG_CLUSTER_RECONNECT 0x0001
+#define SMB2_TREE_CONNECT_FLAG_CLUSTER_RECONNECT cpu_to_le16(0x0001)
+#define SMB2_TREE_CONNECT_FLAG_REDIRECT_TO_OWNER cpu_to_le16(0x0002)
+#define SMB2_TREE_CONNECT_FLAG_EXTENSION_PRESENT cpu_to_le16(0x0004)
struct smb2_tree_connect_req {
- struct smb2_hdr hdr;
+ struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* Must be 9 */
__le16 Reserved; /* Flags in SMB3.1.1 */
__le16 PathOffset;
@@ -331,6 +362,82 @@ struct smb2_tree_connect_req {
__u8 Buffer[1]; /* variable length */
} __packed;
+/* See MS-SMB2 section 2.2.9.2 */
+/* Context Types */
+#define SMB2_RESERVED_TREE_CONNECT_CONTEXT_ID 0x0000
+#define SMB2_REMOTED_IDENTITY_TREE_CONNECT_CONTEXT_ID cpu_to_le16(0x0001)
+
+struct tree_connect_contexts {
+ __le16 ContextType;
+ __le16 DataLength;
+ __le32 Reserved;
+ __u8 Data[0];
+} __packed;
+
+/* Remoted identity tree connect context structures - see MS-SMB2 2.2.9.2.1 */
+struct smb3_blob_data {
+ __le16 BlobSize;
+ __u8 BlobData[0];
+} __packed;
+
+/* Valid values for Attr */
+#define SE_GROUP_MANDATORY 0x00000001
+#define SE_GROUP_ENABLED_BY_DEFAULT 0x00000002
+#define SE_GROUP_ENABLED 0x00000004
+#define SE_GROUP_OWNER 0x00000008
+#define SE_GROUP_USE_FOR_DENY_ONLY 0x00000010
+#define SE_GROUP_INTEGRITY 0x00000020
+#define SE_GROUP_INTEGRITY_ENABLED 0x00000040
+#define SE_GROUP_RESOURCE 0x20000000
+#define SE_GROUP_LOGON_ID 0xC0000000
+
+/* struct sid_attr_data is SidData array in BlobData format then le32 Attr */
+
+struct sid_array_data {
+ __le16 SidAttrCount;
+ /* SidAttrList - array of sid_attr_data structs */
+} __packed;
+
+struct luid_attr_data {
+
+} __packed;
+
+/*
+ * struct privilege_data is the same as BLOB_DATA - see MS-SMB2 2.2.9.2.1.5
+ * but with size of LUID_ATTR_DATA struct and BlobData set to LUID_ATTR DATA
+ */
+
+struct privilege_array_data {
+ __le16 PrivilegeCount;
+ /* array of privilege_data structs */
+} __packed;
+
+struct remoted_identity_tcon_context {
+ __le16 TicketType; /* must be 0x0001 */
+ __le16 TicketSize; /* total size of this struct */
+ __le16 User; /* offset to SID_ATTR_DATA struct with user info */
+ __le16 UserName; /* offset to null terminated Unicode username string */
+ __le16 Domain; /* offset to null terminated Unicode domain name */
+ __le16 Groups; /* offset to SID_ARRAY_DATA struct with group info */
+ __le16 RestrictedGroups; /* similar to above */
+ __le16 Privileges; /* offset to PRIVILEGE_ARRAY_DATA struct */
+ __le16 PrimaryGroup; /* offset to SID_ARRAY_DATA struct */
+ __le16 Owner; /* offset to BLOB_DATA struct */
+ __le16 DefaultDacl; /* offset to BLOB_DATA struct */
+ __le16 DeviceGroups; /* offset to SID_ARRAY_DATA struct */
+ __le16 UserClaims; /* offset to BLOB_DATA struct */
+ __le16 DeviceClaims; /* offset to BLOB_DATA struct */
+ __u8 TicketInfo[0]; /* variable length buf - remoted identity data */
+} __packed;
+
+struct smb2_tree_connect_req_extension {
+ __le32 TreeConnectContextOffset;
+ __le16 TreeConnectContextCount;
+ __u8 Reserved[10];
+ __u8 PathName[0]; /* variable sized array */
+ /* followed by array of TreeConnectContexts */
+} __packed;
+
struct smb2_tree_connect_rsp {
struct smb2_hdr hdr;
__le16 StructureSize; /* Must be 16 */
@@ -365,7 +472,8 @@ struct smb2_tree_connect_rsp {
#define SHI1005_FLAGS_ENABLE_HASH_V1 0x00002000
#define SHI1005_FLAGS_ENABLE_HASH_V2 0x00004000
#define SHI1005_FLAGS_ENCRYPT_DATA 0x00008000
-#define SHI1005_FLAGS_ALL 0x0000FF33
+#define SMB2_SHAREFLAG_IDENTITY_REMOTING 0x00040000 /* 3.1.1 */
+#define SHI1005_FLAGS_ALL 0x0004FF33
/* Possible share capabilities */
#define SMB2_SHARE_CAP_DFS cpu_to_le32(0x00000008) /* all dialects */
@@ -373,9 +481,10 @@ struct smb2_tree_connect_rsp {
#define SMB2_SHARE_CAP_SCALEOUT cpu_to_le32(0x00000020) /* 3.0 */
#define SMB2_SHARE_CAP_CLUSTER cpu_to_le32(0x00000040) /* 3.0 */
#define SMB2_SHARE_CAP_ASYMMETRIC cpu_to_le32(0x00000080) /* 3.02 */
+#define SMB2_SHARE_CAP_REDIRECT_TO_OWNER cpu_to_le32(0x00000100) /* 3.1.1 */
struct smb2_tree_disconnect_req {
- struct smb2_hdr hdr;
+ struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* Must be 4 */
__le16 Reserved;
} __packed;
@@ -496,7 +605,7 @@ struct smb2_tree_disconnect_rsp {
#define SVHDX_OPEN_DEVICE_CONTEXT 0x83CE6F1AD851E0986E34401CC9BCFCE9
struct smb2_create_req {
- struct smb2_hdr hdr;
+ struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* Must be 57 */
__u8 SecurityFlags;
__u8 RequestedOplockLevel;
@@ -556,6 +665,7 @@ struct create_context {
#define SMB2_LEASE_WRITE_CACHING cpu_to_le32(0x04)
#define SMB2_LEASE_FLAG_BREAK_IN_PROGRESS cpu_to_le32(0x02)
+#define SMB2_LEASE_FLAG_PARENT_LEASE_KEY_SET cpu_to_le32(0x00000004)
#define SMB2_LEASE_KEY_SIZE 16
@@ -753,7 +863,7 @@ struct duplicate_extents_to_file {
} __packed;
struct smb2_ioctl_req {
- struct smb2_hdr hdr;
+ struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* Must be 57 */
__u16 Reserved;
__le32 CtlCode;
@@ -789,7 +899,7 @@ struct smb2_ioctl_rsp {
/* Currently defined values for close flags */
#define SMB2_CLOSE_FLAG_POSTQUERY_ATTRIB cpu_to_le16(0x0001)
struct smb2_close_req {
- struct smb2_hdr hdr;
+ struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* Must be 24 */
__le16 Flags;
__le32 Reserved;
@@ -812,7 +922,7 @@ struct smb2_close_rsp {
} __packed;
struct smb2_flush_req {
- struct smb2_hdr hdr;
+ struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* Must be 24 */
__le16 Reserved1;
__le32 Reserved2;
@@ -830,9 +940,9 @@ struct smb2_flush_rsp {
#define SMB2_READFLAG_READ_UNBUFFERED 0x01
/* Channel field for read and write: exactly one of following flags can be set*/
-#define SMB2_CHANNEL_NONE 0x00000000
-#define SMB2_CHANNEL_RDMA_V1 0x00000001 /* SMB3 or later */
-#define SMB2_CHANNEL_RDMA_V1_INVALIDATE 0x00000002 /* SMB3.02 or later */
+#define SMB2_CHANNEL_NONE cpu_to_le32(0x00000000)
+#define SMB2_CHANNEL_RDMA_V1 cpu_to_le32(0x00000001) /* SMB3 or later */
+#define SMB2_CHANNEL_RDMA_V1_INVALIDATE cpu_to_le32(0x00000002) /* >= SMB3.02 */
/* SMB2 read request without RFC1001 length at the beginning */
struct smb2_read_plain_req {
@@ -847,8 +957,8 @@ struct smb2_read_plain_req {
__le32 MinimumCount;
__le32 Channel; /* MBZ except for SMB3 or later */
__le32 RemainingBytes;
- __le16 ReadChannelInfoOffset; /* Reserved MBZ */
- __le16 ReadChannelInfoLength; /* Reserved MBZ */
+ __le16 ReadChannelInfoOffset;
+ __le16 ReadChannelInfoLength;
__u8 Buffer[1];
} __packed;
@@ -868,7 +978,7 @@ struct smb2_read_rsp {
#define SMB2_WRITEFLAG_WRITE_UNBUFFERED 0x00000002 /* SMB3.02 or later */
struct smb2_write_req {
- struct smb2_hdr hdr;
+ struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* Must be 49 */
__le16 DataOffset; /* offset from start of SMB2 header to write data */
__le32 Length;
@@ -877,8 +987,8 @@ struct smb2_write_req {
__u64 VolatileFileId; /* opaque endianness */
__le32 Channel; /* Reserved MBZ */
__le32 RemainingBytes;
- __le16 WriteChannelInfoOffset; /* Reserved MBZ */
- __le16 WriteChannelInfoLength; /* Reserved MBZ */
+ __le16 WriteChannelInfoOffset;
+ __le16 WriteChannelInfoLength;
__le32 Flags;
__u8 Buffer[1];
} __packed;
@@ -907,7 +1017,7 @@ struct smb2_lock_element {
} __packed;
struct smb2_lock_req {
- struct smb2_hdr hdr;
+ struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* Must be 48 */
__le16 LockCount;
__le32 Reserved;
@@ -924,7 +1034,7 @@ struct smb2_lock_rsp {
} __packed;
struct smb2_echo_req {
- struct smb2_hdr hdr;
+ struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* Must be 4 */
__u16 Reserved;
} __packed;
@@ -942,7 +1052,7 @@ struct smb2_echo_rsp {
#define SMB2_REOPEN 0x10
struct smb2_query_directory_req {
- struct smb2_hdr hdr;
+ struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* Must be 33 */
__u8 FileInformationClass;
__u8 Flags;
@@ -989,7 +1099,7 @@ struct smb2_query_directory_rsp {
#define SL_INDEX_SPECIFIED 0x00000004
struct smb2_query_info_req {
- struct smb2_hdr hdr;
+ struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* Must be 41 */
__u8 InfoType;
__u8 FileInfoClass;
@@ -1013,7 +1123,7 @@ struct smb2_query_info_rsp {
} __packed;
struct smb2_set_info_req {
- struct smb2_hdr hdr;
+ struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* Must be 33 */
__u8 InfoType;
__u8 FileInfoClass;
@@ -1031,7 +1141,19 @@ struct smb2_set_info_rsp {
__le16 StructureSize; /* Must be 2 */
} __packed;
-struct smb2_oplock_break {
+/* oplock break without an rfc1002 header */
+struct smb2_oplock_break_req {
+ struct smb2_sync_hdr sync_hdr;
+ __le16 StructureSize; /* Must be 24 */
+ __u8 OplockLevel;
+ __u8 Reserved;
+ __le32 Reserved2;
+ __u64 PersistentFid;
+ __u64 VolatileFid;
+} __packed;
+
+/* oplock break with an rfc1002 header */
+struct smb2_oplock_break_rsp {
struct smb2_hdr hdr;
__le16 StructureSize; /* Must be 24 */
__u8 OplockLevel;
@@ -1057,7 +1179,7 @@ struct smb2_lease_break {
} __packed;
struct smb2_lease_ack {
- struct smb2_hdr hdr;
+ struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* Must be 36 */
__le16 Reserved;
__le32 Flags;
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index e9ab5227e7a8..05287b01f596 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -125,8 +125,7 @@ extern int SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms,
struct smb2_err_rsp **err_buf);
extern int SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_fid, u64 volatile_fid, u32 opcode,
- bool is_fsctl, bool use_ipc,
- char *in_data, u32 indatalen,
+ bool is_fsctl, char *in_data, u32 indatalen,
char **out_data, u32 *plen /* returned data len */);
extern int SMB2_close(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_file_id, u64 volatile_file_id);
diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c
new file mode 100644
index 000000000000..91710eb571fb
--- /dev/null
+++ b/fs/cifs/smbdirect.c
@@ -0,0 +1,2612 @@
+/*
+ * Copyright (C) 2017, Microsoft Corporation.
+ *
+ * Author(s): Long Li <longli@microsoft.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ */
+#include <linux/module.h>
+#include <linux/highmem.h>
+#include "smbdirect.h"
+#include "cifs_debug.h"
+
+static struct smbd_response *get_empty_queue_buffer(
+ struct smbd_connection *info);
+static struct smbd_response *get_receive_buffer(
+ struct smbd_connection *info);
+static void put_receive_buffer(
+ struct smbd_connection *info,
+ struct smbd_response *response);
+static int allocate_receive_buffers(struct smbd_connection *info, int num_buf);
+static void destroy_receive_buffers(struct smbd_connection *info);
+
+static void put_empty_packet(
+ struct smbd_connection *info, struct smbd_response *response);
+static void enqueue_reassembly(
+ struct smbd_connection *info,
+ struct smbd_response *response, int data_length);
+static struct smbd_response *_get_first_reassembly(
+ struct smbd_connection *info);
+
+static int smbd_post_recv(
+ struct smbd_connection *info,
+ struct smbd_response *response);
+
+static int smbd_post_send_empty(struct smbd_connection *info);
+static int smbd_post_send_data(
+ struct smbd_connection *info,
+ struct kvec *iov, int n_vec, int remaining_data_length);
+static int smbd_post_send_page(struct smbd_connection *info,
+ struct page *page, unsigned long offset,
+ size_t size, int remaining_data_length);
+
+static void destroy_mr_list(struct smbd_connection *info);
+static int allocate_mr_list(struct smbd_connection *info);
+
+/* SMBD version number */
+#define SMBD_V1 0x0100
+
+/* Port numbers for SMBD transport */
+#define SMB_PORT 445
+#define SMBD_PORT 5445
+
+/* Address lookup and resolve timeout in ms */
+#define RDMA_RESOLVE_TIMEOUT 5000
+
+/* SMBD negotiation timeout in seconds */
+#define SMBD_NEGOTIATE_TIMEOUT 120
+
+/* SMBD minimum receive size and fragmented sized defined in [MS-SMBD] */
+#define SMBD_MIN_RECEIVE_SIZE 128
+#define SMBD_MIN_FRAGMENTED_SIZE 131072
+
+/*
+ * Default maximum number of RDMA read/write outstanding on this connection
+ * This value is possibly decreased during QP creation on hardware limit
+ */
+#define SMBD_CM_RESPONDER_RESOURCES 32
+
+/* Maximum number of retries on data transfer operations */
+#define SMBD_CM_RETRY 6
+/* No need to retry on Receiver Not Ready since SMBD manages credits */
+#define SMBD_CM_RNR_RETRY 0
+
+/*
+ * User configurable initial values per SMBD transport connection
+ * as defined in [MS-SMBD] 3.1.1.1
+ * Those may change after a SMBD negotiation
+ */
+/* The local peer's maximum number of credits to grant to the peer */
+int smbd_receive_credit_max = 255;
+
+/* The remote peer's credit request of local peer */
+int smbd_send_credit_target = 255;
+
+/* The maximum single message size can be sent to remote peer */
+int smbd_max_send_size = 1364;
+
+/* The maximum fragmented upper-layer payload receive size supported */
+int smbd_max_fragmented_recv_size = 1024 * 1024;
+
+/* The maximum single-message size which can be received */
+int smbd_max_receive_size = 8192;
+
+/* The timeout to initiate send of a keepalive message on idle */
+int smbd_keep_alive_interval = 120;
+
+/*
+ * User configurable initial values for RDMA transport
+ * The actual values used may be lower and are limited to hardware capabilities
+ */
+/* Default maximum number of SGEs in a RDMA write/read */
+int smbd_max_frmr_depth = 2048;
+
+/* If payload is less than this byte, use RDMA send/recv not read/write */
+int rdma_readwrite_threshold = 4096;
+
+/* Transport logging functions
+ * Logging are defined as classes. They can be OR'ed to define the actual
+ * logging level via module parameter smbd_logging_class
+ * e.g. cifs.smbd_logging_class=0xa0 will log all log_rdma_recv() and
+ * log_rdma_event()
+ */
+#define LOG_OUTGOING 0x1
+#define LOG_INCOMING 0x2
+#define LOG_READ 0x4
+#define LOG_WRITE 0x8
+#define LOG_RDMA_SEND 0x10
+#define LOG_RDMA_RECV 0x20
+#define LOG_KEEP_ALIVE 0x40
+#define LOG_RDMA_EVENT 0x80
+#define LOG_RDMA_MR 0x100
+static unsigned int smbd_logging_class;
+module_param(smbd_logging_class, uint, 0644);
+MODULE_PARM_DESC(smbd_logging_class,
+ "Logging class for SMBD transport 0x0 to 0x100");
+
+#define ERR 0x0
+#define INFO 0x1
+static unsigned int smbd_logging_level = ERR;
+module_param(smbd_logging_level, uint, 0644);
+MODULE_PARM_DESC(smbd_logging_level,
+ "Logging level for SMBD transport, 0 (default): error, 1: info");
+
+#define log_rdma(level, class, fmt, args...) \
+do { \
+ if (level <= smbd_logging_level || class & smbd_logging_class) \
+ cifs_dbg(VFS, "%s:%d " fmt, __func__, __LINE__, ##args);\
+} while (0)
+
+#define log_outgoing(level, fmt, args...) \
+ log_rdma(level, LOG_OUTGOING, fmt, ##args)
+#define log_incoming(level, fmt, args...) \
+ log_rdma(level, LOG_INCOMING, fmt, ##args)
+#define log_read(level, fmt, args...) log_rdma(level, LOG_READ, fmt, ##args)
+#define log_write(level, fmt, args...) log_rdma(level, LOG_WRITE, fmt, ##args)
+#define log_rdma_send(level, fmt, args...) \
+ log_rdma(level, LOG_RDMA_SEND, fmt, ##args)
+#define log_rdma_recv(level, fmt, args...) \
+ log_rdma(level, LOG_RDMA_RECV, fmt, ##args)
+#define log_keep_alive(level, fmt, args...) \
+ log_rdma(level, LOG_KEEP_ALIVE, fmt, ##args)
+#define log_rdma_event(level, fmt, args...) \
+ log_rdma(level, LOG_RDMA_EVENT, fmt, ##args)
+#define log_rdma_mr(level, fmt, args...) \
+ log_rdma(level, LOG_RDMA_MR, fmt, ##args)
+
+/*
+ * Destroy the transport and related RDMA and memory resources
+ * Need to go through all the pending counters and make sure on one is using
+ * the transport while it is destroyed
+ */
+static void smbd_destroy_rdma_work(struct work_struct *work)
+{
+ struct smbd_response *response;
+ struct smbd_connection *info =
+ container_of(work, struct smbd_connection, destroy_work);
+ unsigned long flags;
+
+ log_rdma_event(INFO, "destroying qp\n");
+ ib_drain_qp(info->id->qp);
+ rdma_destroy_qp(info->id);
+
+ /* Unblock all I/O waiting on the send queue */
+ wake_up_interruptible_all(&info->wait_send_queue);
+
+ log_rdma_event(INFO, "cancelling idle timer\n");
+ cancel_delayed_work_sync(&info->idle_timer_work);
+ log_rdma_event(INFO, "cancelling send immediate work\n");
+ cancel_delayed_work_sync(&info->send_immediate_work);
+
+ log_rdma_event(INFO, "wait for all send to finish\n");
+ wait_event(info->wait_smbd_send_pending,
+ info->smbd_send_pending == 0);
+
+ log_rdma_event(INFO, "wait for all recv to finish\n");
+ wake_up_interruptible(&info->wait_reassembly_queue);
+ wait_event(info->wait_smbd_recv_pending,
+ info->smbd_recv_pending == 0);
+
+ log_rdma_event(INFO, "wait for all send posted to IB to finish\n");
+ wait_event(info->wait_send_pending,
+ atomic_read(&info->send_pending) == 0);
+ wait_event(info->wait_send_payload_pending,
+ atomic_read(&info->send_payload_pending) == 0);
+
+ log_rdma_event(INFO, "freeing mr list\n");
+ wake_up_interruptible_all(&info->wait_mr);
+ wait_event(info->wait_for_mr_cleanup,
+ atomic_read(&info->mr_used_count) == 0);
+ destroy_mr_list(info);
+
+ /* It's not posssible for upper layer to get to reassembly */
+ log_rdma_event(INFO, "drain the reassembly queue\n");
+ do {
+ spin_lock_irqsave(&info->reassembly_queue_lock, flags);
+ response = _get_first_reassembly(info);
+ if (response) {
+ list_del(&response->list);
+ spin_unlock_irqrestore(
+ &info->reassembly_queue_lock, flags);
+ put_receive_buffer(info, response);
+ } else
+ spin_unlock_irqrestore(&info->reassembly_queue_lock, flags);
+ } while (response);
+
+ info->reassembly_data_length = 0;
+
+ log_rdma_event(INFO, "free receive buffers\n");
+ wait_event(info->wait_receive_queues,
+ info->count_receive_queue + info->count_empty_packet_queue
+ == info->receive_credit_max);
+ destroy_receive_buffers(info);
+
+ ib_free_cq(info->send_cq);
+ ib_free_cq(info->recv_cq);
+ ib_dealloc_pd(info->pd);
+ rdma_destroy_id(info->id);
+
+ /* free mempools */
+ mempool_destroy(info->request_mempool);
+ kmem_cache_destroy(info->request_cache);
+
+ mempool_destroy(info->response_mempool);
+ kmem_cache_destroy(info->response_cache);
+
+ info->transport_status = SMBD_DESTROYED;
+ wake_up_all(&info->wait_destroy);
+}
+
+static int smbd_process_disconnected(struct smbd_connection *info)
+{
+ schedule_work(&info->destroy_work);
+ return 0;
+}
+
+static void smbd_disconnect_rdma_work(struct work_struct *work)
+{
+ struct smbd_connection *info =
+ container_of(work, struct smbd_connection, disconnect_work);
+
+ if (info->transport_status == SMBD_CONNECTED) {
+ info->transport_status = SMBD_DISCONNECTING;
+ rdma_disconnect(info->id);
+ }
+}
+
+static void smbd_disconnect_rdma_connection(struct smbd_connection *info)
+{
+ queue_work(info->workqueue, &info->disconnect_work);
+}
+
+/* Upcall from RDMA CM */
+static int smbd_conn_upcall(
+ struct rdma_cm_id *id, struct rdma_cm_event *event)
+{
+ struct smbd_connection *info = id->context;
+
+ log_rdma_event(INFO, "event=%d status=%d\n",
+ event->event, event->status);
+
+ switch (event->event) {
+ case RDMA_CM_EVENT_ADDR_RESOLVED:
+ case RDMA_CM_EVENT_ROUTE_RESOLVED:
+ info->ri_rc = 0;
+ complete(&info->ri_done);
+ break;
+
+ case RDMA_CM_EVENT_ADDR_ERROR:
+ info->ri_rc = -EHOSTUNREACH;
+ complete(&info->ri_done);
+ break;
+
+ case RDMA_CM_EVENT_ROUTE_ERROR:
+ info->ri_rc = -ENETUNREACH;
+ complete(&info->ri_done);
+ break;
+
+ case RDMA_CM_EVENT_ESTABLISHED:
+ log_rdma_event(INFO, "connected event=%d\n", event->event);
+ info->transport_status = SMBD_CONNECTED;
+ wake_up_interruptible(&info->conn_wait);
+ break;
+
+ case RDMA_CM_EVENT_CONNECT_ERROR:
+ case RDMA_CM_EVENT_UNREACHABLE:
+ case RDMA_CM_EVENT_REJECTED:
+ log_rdma_event(INFO, "connecting failed event=%d\n", event->event);
+ info->transport_status = SMBD_DISCONNECTED;
+ wake_up_interruptible(&info->conn_wait);
+ break;
+
+ case RDMA_CM_EVENT_DEVICE_REMOVAL:
+ case RDMA_CM_EVENT_DISCONNECTED:
+ /* This happenes when we fail the negotiation */
+ if (info->transport_status == SMBD_NEGOTIATE_FAILED) {
+ info->transport_status = SMBD_DISCONNECTED;
+ wake_up(&info->conn_wait);
+ break;
+ }
+
+ info->transport_status = SMBD_DISCONNECTED;
+ smbd_process_disconnected(info);
+ break;
+
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+/* Upcall from RDMA QP */
+static void
+smbd_qp_async_error_upcall(struct ib_event *event, void *context)
+{
+ struct smbd_connection *info = context;
+
+ log_rdma_event(ERR, "%s on device %s info %p\n",
+ ib_event_msg(event->event), event->device->name, info);
+
+ switch (event->event) {
+ case IB_EVENT_CQ_ERR:
+ case IB_EVENT_QP_FATAL:
+ smbd_disconnect_rdma_connection(info);
+
+ default:
+ break;
+ }
+}
+
+static inline void *smbd_request_payload(struct smbd_request *request)
+{
+ return (void *)request->packet;
+}
+
+static inline void *smbd_response_payload(struct smbd_response *response)
+{
+ return (void *)response->packet;
+}
+
+/* Called when a RDMA send is done */
+static void send_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+ int i;
+ struct smbd_request *request =
+ container_of(wc->wr_cqe, struct smbd_request, cqe);
+
+ log_rdma_send(INFO, "smbd_request %p completed wc->status=%d\n",
+ request, wc->status);
+
+ if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_SEND) {
+ log_rdma_send(ERR, "wc->status=%d wc->opcode=%d\n",
+ wc->status, wc->opcode);
+ smbd_disconnect_rdma_connection(request->info);
+ }
+
+ for (i = 0; i < request->num_sge; i++)
+ ib_dma_unmap_single(request->info->id->device,
+ request->sge[i].addr,
+ request->sge[i].length,
+ DMA_TO_DEVICE);
+
+ if (request->has_payload) {
+ if (atomic_dec_and_test(&request->info->send_payload_pending))
+ wake_up(&request->info->wait_send_payload_pending);
+ } else {
+ if (atomic_dec_and_test(&request->info->send_pending))
+ wake_up(&request->info->wait_send_pending);
+ }
+
+ mempool_free(request, request->info->request_mempool);
+}
+
+static void dump_smbd_negotiate_resp(struct smbd_negotiate_resp *resp)
+{
+ log_rdma_event(INFO, "resp message min_version %u max_version %u "
+ "negotiated_version %u credits_requested %u "
+ "credits_granted %u status %u max_readwrite_size %u "
+ "preferred_send_size %u max_receive_size %u "
+ "max_fragmented_size %u\n",
+ resp->min_version, resp->max_version, resp->negotiated_version,
+ resp->credits_requested, resp->credits_granted, resp->status,
+ resp->max_readwrite_size, resp->preferred_send_size,
+ resp->max_receive_size, resp->max_fragmented_size);
+}
+
+/*
+ * Process a negotiation response message, according to [MS-SMBD]3.1.5.7
+ * response, packet_length: the negotiation response message
+ * return value: true if negotiation is a success, false if failed
+ */
+static bool process_negotiation_response(
+ struct smbd_response *response, int packet_length)
+{
+ struct smbd_connection *info = response->info;
+ struct smbd_negotiate_resp *packet = smbd_response_payload(response);
+
+ if (packet_length < sizeof(struct smbd_negotiate_resp)) {
+ log_rdma_event(ERR,
+ "error: packet_length=%d\n", packet_length);
+ return false;
+ }
+
+ if (le16_to_cpu(packet->negotiated_version) != SMBD_V1) {
+ log_rdma_event(ERR, "error: negotiated_version=%x\n",
+ le16_to_cpu(packet->negotiated_version));
+ return false;
+ }
+ info->protocol = le16_to_cpu(packet->negotiated_version);
+
+ if (packet->credits_requested == 0) {
+ log_rdma_event(ERR, "error: credits_requested==0\n");
+ return false;
+ }
+ info->receive_credit_target = le16_to_cpu(packet->credits_requested);
+
+ if (packet->credits_granted == 0) {
+ log_rdma_event(ERR, "error: credits_granted==0\n");
+ return false;
+ }
+ atomic_set(&info->send_credits, le16_to_cpu(packet->credits_granted));
+
+ atomic_set(&info->receive_credits, 0);
+
+ if (le32_to_cpu(packet->preferred_send_size) > info->max_receive_size) {
+ log_rdma_event(ERR, "error: preferred_send_size=%d\n",
+ le32_to_cpu(packet->preferred_send_size));
+ return false;
+ }
+ info->max_receive_size = le32_to_cpu(packet->preferred_send_size);
+
+ if (le32_to_cpu(packet->max_receive_size) < SMBD_MIN_RECEIVE_SIZE) {
+ log_rdma_event(ERR, "error: max_receive_size=%d\n",
+ le32_to_cpu(packet->max_receive_size));
+ return false;
+ }
+ info->max_send_size = min_t(int, info->max_send_size,
+ le32_to_cpu(packet->max_receive_size));
+
+ if (le32_to_cpu(packet->max_fragmented_size) <
+ SMBD_MIN_FRAGMENTED_SIZE) {
+ log_rdma_event(ERR, "error: max_fragmented_size=%d\n",
+ le32_to_cpu(packet->max_fragmented_size));
+ return false;
+ }
+ info->max_fragmented_send_size =
+ le32_to_cpu(packet->max_fragmented_size);
+ info->rdma_readwrite_threshold =
+ rdma_readwrite_threshold > info->max_fragmented_send_size ?
+ info->max_fragmented_send_size :
+ rdma_readwrite_threshold;
+
+
+ info->max_readwrite_size = min_t(u32,
+ le32_to_cpu(packet->max_readwrite_size),
+ info->max_frmr_depth * PAGE_SIZE);
+ info->max_frmr_depth = info->max_readwrite_size / PAGE_SIZE;
+
+ return true;
+}
+
+/*
+ * Check and schedule to send an immediate packet
+ * This is used to extend credtis to remote peer to keep the transport busy
+ */
+static void check_and_send_immediate(struct smbd_connection *info)
+{
+ if (info->transport_status != SMBD_CONNECTED)
+ return;
+
+ info->send_immediate = true;
+
+ /*
+ * Promptly send a packet if our peer is running low on receive
+ * credits
+ */
+ if (atomic_read(&info->receive_credits) <
+ info->receive_credit_target - 1)
+ queue_delayed_work(
+ info->workqueue, &info->send_immediate_work, 0);
+}
+
+static void smbd_post_send_credits(struct work_struct *work)
+{
+ int ret = 0;
+ int use_receive_queue = 1;
+ int rc;
+ struct smbd_response *response;
+ struct smbd_connection *info =
+ container_of(work, struct smbd_connection,
+ post_send_credits_work);
+
+ if (info->transport_status != SMBD_CONNECTED) {
+ wake_up(&info->wait_receive_queues);
+ return;
+ }
+
+ if (info->receive_credit_target >
+ atomic_read(&info->receive_credits)) {
+ while (true) {
+ if (use_receive_queue)
+ response = get_receive_buffer(info);
+ else
+ response = get_empty_queue_buffer(info);
+ if (!response) {
+ /* now switch to emtpy packet queue */
+ if (use_receive_queue) {
+ use_receive_queue = 0;
+ continue;
+ } else
+ break;
+ }
+
+ response->type = SMBD_TRANSFER_DATA;
+ response->first_segment = false;
+ rc = smbd_post_recv(info, response);
+ if (rc) {
+ log_rdma_recv(ERR,
+ "post_recv failed rc=%d\n", rc);
+ put_receive_buffer(info, response);
+ break;
+ }
+
+ ret++;
+ }
+ }
+
+ spin_lock(&info->lock_new_credits_offered);
+ info->new_credits_offered += ret;
+ spin_unlock(&info->lock_new_credits_offered);
+
+ atomic_add(ret, &info->receive_credits);
+
+ /* Check if we can post new receive and grant credits to peer */
+ check_and_send_immediate(info);
+}
+
+static void smbd_recv_done_work(struct work_struct *work)
+{
+ struct smbd_connection *info =
+ container_of(work, struct smbd_connection, recv_done_work);
+
+ /*
+ * We may have new send credits granted from remote peer
+ * If any sender is blcoked on lack of credets, unblock it
+ */
+ if (atomic_read(&info->send_credits))
+ wake_up_interruptible(&info->wait_send_queue);
+
+ /*
+ * Check if we need to send something to remote peer to
+ * grant more credits or respond to KEEP_ALIVE packet
+ */
+ check_and_send_immediate(info);
+}
+
+/* Called from softirq, when recv is done */
+static void recv_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+ struct smbd_data_transfer *data_transfer;
+ struct smbd_response *response =
+ container_of(wc->wr_cqe, struct smbd_response, cqe);
+ struct smbd_connection *info = response->info;
+ int data_length = 0;
+
+ log_rdma_recv(INFO, "response=%p type=%d wc status=%d wc opcode %d "
+ "byte_len=%d pkey_index=%x\n",
+ response, response->type, wc->status, wc->opcode,
+ wc->byte_len, wc->pkey_index);
+
+ if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_RECV) {
+ log_rdma_recv(INFO, "wc->status=%d opcode=%d\n",
+ wc->status, wc->opcode);
+ smbd_disconnect_rdma_connection(info);
+ goto error;
+ }
+
+ ib_dma_sync_single_for_cpu(
+ wc->qp->device,
+ response->sge.addr,
+ response->sge.length,
+ DMA_FROM_DEVICE);
+
+ switch (response->type) {
+ /* SMBD negotiation response */
+ case SMBD_NEGOTIATE_RESP:
+ dump_smbd_negotiate_resp(smbd_response_payload(response));
+ info->full_packet_received = true;
+ info->negotiate_done =
+ process_negotiation_response(response, wc->byte_len);
+ complete(&info->negotiate_completion);
+ break;
+
+ /* SMBD data transfer packet */
+ case SMBD_TRANSFER_DATA:
+ data_transfer = smbd_response_payload(response);
+ data_length = le32_to_cpu(data_transfer->data_length);
+
+ /*
+ * If this is a packet with data playload place the data in
+ * reassembly queue and wake up the reading thread
+ */
+ if (data_length) {
+ if (info->full_packet_received)
+ response->first_segment = true;
+
+ if (le32_to_cpu(data_transfer->remaining_data_length))
+ info->full_packet_received = false;
+ else
+ info->full_packet_received = true;
+
+ enqueue_reassembly(
+ info,
+ response,
+ data_length);
+ } else
+ put_empty_packet(info, response);
+
+ if (data_length)
+ wake_up_interruptible(&info->wait_reassembly_queue);
+
+ atomic_dec(&info->receive_credits);
+ info->receive_credit_target =
+ le16_to_cpu(data_transfer->credits_requested);
+ atomic_add(le16_to_cpu(data_transfer->credits_granted),
+ &info->send_credits);
+
+ log_incoming(INFO, "data flags %d data_offset %d "
+ "data_length %d remaining_data_length %d\n",
+ le16_to_cpu(data_transfer->flags),
+ le32_to_cpu(data_transfer->data_offset),
+ le32_to_cpu(data_transfer->data_length),
+ le32_to_cpu(data_transfer->remaining_data_length));
+
+ /* Send a KEEP_ALIVE response right away if requested */
+ info->keep_alive_requested = KEEP_ALIVE_NONE;
+ if (le16_to_cpu(data_transfer->flags) &
+ SMB_DIRECT_RESPONSE_REQUESTED) {
+ info->keep_alive_requested = KEEP_ALIVE_PENDING;
+ }
+
+ queue_work(info->workqueue, &info->recv_done_work);
+ return;
+
+ default:
+ log_rdma_recv(ERR,
+ "unexpected response type=%d\n", response->type);
+ }
+
+error:
+ put_receive_buffer(info, response);
+}
+
+static struct rdma_cm_id *smbd_create_id(
+ struct smbd_connection *info,
+ struct sockaddr *dstaddr, int port)
+{
+ struct rdma_cm_id *id;
+ int rc;
+ __be16 *sport;
+
+ id = rdma_create_id(&init_net, smbd_conn_upcall, info,
+ RDMA_PS_TCP, IB_QPT_RC);
+ if (IS_ERR(id)) {
+ rc = PTR_ERR(id);
+ log_rdma_event(ERR, "rdma_create_id() failed %i\n", rc);
+ return id;
+ }
+
+ if (dstaddr->sa_family == AF_INET6)
+ sport = &((struct sockaddr_in6 *)dstaddr)->sin6_port;
+ else
+ sport = &((struct sockaddr_in *)dstaddr)->sin_port;
+
+ *sport = htons(port);
+
+ init_completion(&info->ri_done);
+ info->ri_rc = -ETIMEDOUT;
+
+ rc = rdma_resolve_addr(id, NULL, (struct sockaddr *)dstaddr,
+ RDMA_RESOLVE_TIMEOUT);
+ if (rc) {
+ log_rdma_event(ERR, "rdma_resolve_addr() failed %i\n", rc);
+ goto out;
+ }
+ wait_for_completion_interruptible_timeout(
+ &info->ri_done, msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT));
+ rc = info->ri_rc;
+ if (rc) {
+ log_rdma_event(ERR, "rdma_resolve_addr() completed %i\n", rc);
+ goto out;
+ }
+
+ info->ri_rc = -ETIMEDOUT;
+ rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT);
+ if (rc) {
+ log_rdma_event(ERR, "rdma_resolve_route() failed %i\n", rc);
+ goto out;
+ }
+ wait_for_completion_interruptible_timeout(
+ &info->ri_done, msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT));
+ rc = info->ri_rc;
+ if (rc) {
+ log_rdma_event(ERR, "rdma_resolve_route() completed %i\n", rc);
+ goto out;
+ }
+
+ return id;
+
+out:
+ rdma_destroy_id(id);
+ return ERR_PTR(rc);
+}
+
+/*
+ * Test if FRWR (Fast Registration Work Requests) is supported on the device
+ * This implementation requries FRWR on RDMA read/write
+ * return value: true if it is supported
+ */
+static bool frwr_is_supported(struct ib_device_attr *attrs)
+{
+ if (!(attrs->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS))
+ return false;
+ if (attrs->max_fast_reg_page_list_len == 0)
+ return false;
+ return true;
+}
+
+static int smbd_ia_open(
+ struct smbd_connection *info,
+ struct sockaddr *dstaddr, int port)
+{
+ int rc;
+
+ info->id = smbd_create_id(info, dstaddr, port);
+ if (IS_ERR(info->id)) {
+ rc = PTR_ERR(info->id);
+ goto out1;
+ }
+
+ if (!frwr_is_supported(&info->id->device->attrs)) {
+ log_rdma_event(ERR,
+ "Fast Registration Work Requests "
+ "(FRWR) is not supported\n");
+ log_rdma_event(ERR,
+ "Device capability flags = %llx "
+ "max_fast_reg_page_list_len = %u\n",
+ info->id->device->attrs.device_cap_flags,
+ info->id->device->attrs.max_fast_reg_page_list_len);
+ rc = -EPROTONOSUPPORT;
+ goto out2;
+ }
+ info->max_frmr_depth = min_t(int,
+ smbd_max_frmr_depth,
+ info->id->device->attrs.max_fast_reg_page_list_len);
+ info->mr_type = IB_MR_TYPE_MEM_REG;
+ if (info->id->device->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG)
+ info->mr_type = IB_MR_TYPE_SG_GAPS;
+
+ info->pd = ib_alloc_pd(info->id->device, 0);
+ if (IS_ERR(info->pd)) {
+ rc = PTR_ERR(info->pd);
+ log_rdma_event(ERR, "ib_alloc_pd() returned %d\n", rc);
+ goto out2;
+ }
+
+ return 0;
+
+out2:
+ rdma_destroy_id(info->id);
+ info->id = NULL;
+
+out1:
+ return rc;
+}
+
+/*
+ * Send a negotiation request message to the peer
+ * The negotiation procedure is in [MS-SMBD] 3.1.5.2 and 3.1.5.3
+ * After negotiation, the transport is connected and ready for
+ * carrying upper layer SMB payload
+ */
+static int smbd_post_send_negotiate_req(struct smbd_connection *info)
+{
+ struct ib_send_wr send_wr, *send_wr_fail;
+ int rc = -ENOMEM;
+ struct smbd_request *request;
+ struct smbd_negotiate_req *packet;
+
+ request = mempool_alloc(info->request_mempool, GFP_KERNEL);
+ if (!request)
+ return rc;
+
+ request->info = info;
+
+ packet = smbd_request_payload(request);
+ packet->min_version = cpu_to_le16(SMBD_V1);
+ packet->max_version = cpu_to_le16(SMBD_V1);
+ packet->reserved = 0;
+ packet->credits_requested = cpu_to_le16(info->send_credit_target);
+ packet->preferred_send_size = cpu_to_le32(info->max_send_size);
+ packet->max_receive_size = cpu_to_le32(info->max_receive_size);
+ packet->max_fragmented_size =
+ cpu_to_le32(info->max_fragmented_recv_size);
+
+ request->num_sge = 1;
+ request->sge[0].addr = ib_dma_map_single(
+ info->id->device, (void *)packet,
+ sizeof(*packet), DMA_TO_DEVICE);
+ if (ib_dma_mapping_error(info->id->device, request->sge[0].addr)) {
+ rc = -EIO;
+ goto dma_mapping_failed;
+ }
+
+ request->sge[0].length = sizeof(*packet);
+ request->sge[0].lkey = info->pd->local_dma_lkey;
+
+ ib_dma_sync_single_for_device(
+ info->id->device, request->sge[0].addr,
+ request->sge[0].length, DMA_TO_DEVICE);
+
+ request->cqe.done = send_done;
+
+ send_wr.next = NULL;
+ send_wr.wr_cqe = &request->cqe;
+ send_wr.sg_list = request->sge;
+ send_wr.num_sge = request->num_sge;
+ send_wr.opcode = IB_WR_SEND;
+ send_wr.send_flags = IB_SEND_SIGNALED;
+
+ log_rdma_send(INFO, "sge addr=%llx length=%x lkey=%x\n",
+ request->sge[0].addr,
+ request->sge[0].length, request->sge[0].lkey);
+
+ request->has_payload = false;
+ atomic_inc(&info->send_pending);
+ rc = ib_post_send(info->id->qp, &send_wr, &send_wr_fail);
+ if (!rc)
+ return 0;
+
+ /* if we reach here, post send failed */
+ log_rdma_send(ERR, "ib_post_send failed rc=%d\n", rc);
+ atomic_dec(&info->send_pending);
+ ib_dma_unmap_single(info->id->device, request->sge[0].addr,
+ request->sge[0].length, DMA_TO_DEVICE);
+
+dma_mapping_failed:
+ mempool_free(request, info->request_mempool);
+ return rc;
+}
+
+/*
+ * Extend the credits to remote peer
+ * This implements [MS-SMBD] 3.1.5.9
+ * The idea is that we should extend credits to remote peer as quickly as
+ * it's allowed, to maintain data flow. We allocate as much receive
+ * buffer as possible, and extend the receive credits to remote peer
+ * return value: the new credtis being granted.
+ */
+static int manage_credits_prior_sending(struct smbd_connection *info)
+{
+ int new_credits;
+
+ spin_lock(&info->lock_new_credits_offered);
+ new_credits = info->new_credits_offered;
+ info->new_credits_offered = 0;
+ spin_unlock(&info->lock_new_credits_offered);
+
+ return new_credits;
+}
+
+/*
+ * Check if we need to send a KEEP_ALIVE message
+ * The idle connection timer triggers a KEEP_ALIVE message when expires
+ * SMB_DIRECT_RESPONSE_REQUESTED is set in the message flag to have peer send
+ * back a response.
+ * return value:
+ * 1 if SMB_DIRECT_RESPONSE_REQUESTED needs to be set
+ * 0: otherwise
+ */
+static int manage_keep_alive_before_sending(struct smbd_connection *info)
+{
+ if (info->keep_alive_requested == KEEP_ALIVE_PENDING) {
+ info->keep_alive_requested = KEEP_ALIVE_SENT;
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * Build and prepare the SMBD packet header
+ * This function waits for avaialbe send credits and build a SMBD packet
+ * header. The caller then optional append payload to the packet after
+ * the header
+ * intput values
+ * size: the size of the payload
+ * remaining_data_length: remaining data to send if this is part of a
+ * fragmented packet
+ * output values
+ * request_out: the request allocated from this function
+ * return values: 0 on success, otherwise actual error code returned
+ */
+static int smbd_create_header(struct smbd_connection *info,
+ int size, int remaining_data_length,
+ struct smbd_request **request_out)
+{
+ struct smbd_request *request;
+ struct smbd_data_transfer *packet;
+ int header_length;
+ int rc;
+
+ /* Wait for send credits. A SMBD packet needs one credit */
+ rc = wait_event_interruptible(info->wait_send_queue,
+ atomic_read(&info->send_credits) > 0 ||
+ info->transport_status != SMBD_CONNECTED);
+ if (rc)
+ return rc;
+
+ if (info->transport_status != SMBD_CONNECTED) {
+ log_outgoing(ERR, "disconnected not sending\n");
+ return -ENOENT;
+ }
+ atomic_dec(&info->send_credits);
+
+ request = mempool_alloc(info->request_mempool, GFP_KERNEL);
+ if (!request) {
+ rc = -ENOMEM;
+ goto err;
+ }
+
+ request->info = info;
+
+ /* Fill in the packet header */
+ packet = smbd_request_payload(request);
+ packet->credits_requested = cpu_to_le16(info->send_credit_target);
+ packet->credits_granted =
+ cpu_to_le16(manage_credits_prior_sending(info));
+ info->send_immediate = false;
+
+ packet->flags = 0;
+ if (manage_keep_alive_before_sending(info))
+ packet->flags |= cpu_to_le16(SMB_DIRECT_RESPONSE_REQUESTED);
+
+ packet->reserved = 0;
+ if (!size)
+ packet->data_offset = 0;
+ else
+ packet->data_offset = cpu_to_le32(24);
+ packet->data_length = cpu_to_le32(size);
+ packet->remaining_data_length = cpu_to_le32(remaining_data_length);
+ packet->padding = 0;
+
+ log_outgoing(INFO, "credits_requested=%d credits_granted=%d "
+ "data_offset=%d data_length=%d remaining_data_length=%d\n",
+ le16_to_cpu(packet->credits_requested),
+ le16_to_cpu(packet->credits_granted),
+ le32_to_cpu(packet->data_offset),
+ le32_to_cpu(packet->data_length),
+ le32_to_cpu(packet->remaining_data_length));
+
+ /* Map the packet to DMA */
+ header_length = sizeof(struct smbd_data_transfer);
+ /* If this is a packet without payload, don't send padding */
+ if (!size)
+ header_length = offsetof(struct smbd_data_transfer, padding);
+
+ request->num_sge = 1;
+ request->sge[0].addr = ib_dma_map_single(info->id->device,
+ (void *)packet,
+ header_length,
+ DMA_BIDIRECTIONAL);
+ if (ib_dma_mapping_error(info->id->device, request->sge[0].addr)) {
+ mempool_free(request, info->request_mempool);
+ rc = -EIO;
+ goto err;
+ }
+
+ request->sge[0].length = header_length;
+ request->sge[0].lkey = info->pd->local_dma_lkey;
+
+ *request_out = request;
+ return 0;
+
+err:
+ atomic_inc(&info->send_credits);
+ return rc;
+}
+
+static void smbd_destroy_header(struct smbd_connection *info,
+ struct smbd_request *request)
+{
+
+ ib_dma_unmap_single(info->id->device,
+ request->sge[0].addr,
+ request->sge[0].length,
+ DMA_TO_DEVICE);
+ mempool_free(request, info->request_mempool);
+ atomic_inc(&info->send_credits);
+}
+
+/* Post the send request */
+static int smbd_post_send(struct smbd_connection *info,
+ struct smbd_request *request, bool has_payload)
+{
+ struct ib_send_wr send_wr, *send_wr_fail;
+ int rc, i;
+
+ for (i = 0; i < request->num_sge; i++) {
+ log_rdma_send(INFO,
+ "rdma_request sge[%d] addr=%llu legnth=%u\n",
+ i, request->sge[0].addr, request->sge[0].length);
+ ib_dma_sync_single_for_device(
+ info->id->device,
+ request->sge[i].addr,
+ request->sge[i].length,
+ DMA_TO_DEVICE);
+ }
+
+ request->cqe.done = send_done;
+
+ send_wr.next = NULL;
+ send_wr.wr_cqe = &request->cqe;
+ send_wr.sg_list = request->sge;
+ send_wr.num_sge = request->num_sge;
+ send_wr.opcode = IB_WR_SEND;
+ send_wr.send_flags = IB_SEND_SIGNALED;
+
+ if (has_payload) {
+ request->has_payload = true;
+ atomic_inc(&info->send_payload_pending);
+ } else {
+ request->has_payload = false;
+ atomic_inc(&info->send_pending);
+ }
+
+ rc = ib_post_send(info->id->qp, &send_wr, &send_wr_fail);
+ if (rc) {
+ log_rdma_send(ERR, "ib_post_send failed rc=%d\n", rc);
+ if (has_payload) {
+ if (atomic_dec_and_test(&info->send_payload_pending))
+ wake_up(&info->wait_send_payload_pending);
+ } else {
+ if (atomic_dec_and_test(&info->send_pending))
+ wake_up(&info->wait_send_pending);
+ }
+ } else
+ /* Reset timer for idle connection after packet is sent */
+ mod_delayed_work(info->workqueue, &info->idle_timer_work,
+ info->keep_alive_interval*HZ);
+
+ return rc;
+}
+
+static int smbd_post_send_sgl(struct smbd_connection *info,
+ struct scatterlist *sgl, int data_length, int remaining_data_length)
+{
+ int num_sgs;
+ int i, rc;
+ struct smbd_request *request;
+ struct scatterlist *sg;
+
+ rc = smbd_create_header(
+ info, data_length, remaining_data_length, &request);
+ if (rc)
+ return rc;
+
+ num_sgs = sgl ? sg_nents(sgl) : 0;
+ for_each_sg(sgl, sg, num_sgs, i) {
+ request->sge[i+1].addr =
+ ib_dma_map_page(info->id->device, sg_page(sg),
+ sg->offset, sg->length, DMA_BIDIRECTIONAL);
+ if (ib_dma_mapping_error(
+ info->id->device, request->sge[i+1].addr)) {
+ rc = -EIO;
+ request->sge[i+1].addr = 0;
+ goto dma_mapping_failure;
+ }
+ request->sge[i+1].length = sg->length;
+ request->sge[i+1].lkey = info->pd->local_dma_lkey;
+ request->num_sge++;
+ }
+
+ rc = smbd_post_send(info, request, data_length);
+ if (!rc)
+ return 0;
+
+dma_mapping_failure:
+ for (i = 1; i < request->num_sge; i++)
+ if (request->sge[i].addr)
+ ib_dma_unmap_single(info->id->device,
+ request->sge[i].addr,
+ request->sge[i].length,
+ DMA_TO_DEVICE);
+ smbd_destroy_header(info, request);
+ return rc;
+}
+
+/*
+ * Send a page
+ * page: the page to send
+ * offset: offset in the page to send
+ * size: length in the page to send
+ * remaining_data_length: remaining data to send in this payload
+ */
+static int smbd_post_send_page(struct smbd_connection *info, struct page *page,
+ unsigned long offset, size_t size, int remaining_data_length)
+{
+ struct scatterlist sgl;
+
+ sg_init_table(&sgl, 1);
+ sg_set_page(&sgl, page, size, offset);
+
+ return smbd_post_send_sgl(info, &sgl, size, remaining_data_length);
+}
+
+/*
+ * Send an empty message
+ * Empty message is used to extend credits to peer to for keep live
+ * while there is no upper layer payload to send at the time
+ */
+static int smbd_post_send_empty(struct smbd_connection *info)
+{
+ info->count_send_empty++;
+ return smbd_post_send_sgl(info, NULL, 0, 0);
+}
+
+/*
+ * Send a data buffer
+ * iov: the iov array describing the data buffers
+ * n_vec: number of iov array
+ * remaining_data_length: remaining data to send following this packet
+ * in segmented SMBD packet
+ */
+static int smbd_post_send_data(
+ struct smbd_connection *info, struct kvec *iov, int n_vec,
+ int remaining_data_length)
+{
+ int i;
+ u32 data_length = 0;
+ struct scatterlist sgl[SMBDIRECT_MAX_SGE];
+
+ if (n_vec > SMBDIRECT_MAX_SGE) {
+ cifs_dbg(VFS, "Can't fit data to SGL, n_vec=%d\n", n_vec);
+ return -ENOMEM;
+ }
+
+ sg_init_table(sgl, n_vec);
+ for (i = 0; i < n_vec; i++) {
+ data_length += iov[i].iov_len;
+ sg_set_buf(&sgl[i], iov[i].iov_base, iov[i].iov_len);
+ }
+
+ return smbd_post_send_sgl(info, sgl, data_length, remaining_data_length);
+}
+
+/*
+ * Post a receive request to the transport
+ * The remote peer can only send data when a receive request is posted
+ * The interaction is controlled by send/receive credit system
+ */
+static int smbd_post_recv(
+ struct smbd_connection *info, struct smbd_response *response)
+{
+ struct ib_recv_wr recv_wr, *recv_wr_fail = NULL;
+ int rc = -EIO;
+
+ response->sge.addr = ib_dma_map_single(
+ info->id->device, response->packet,
+ info->max_receive_size, DMA_FROM_DEVICE);
+ if (ib_dma_mapping_error(info->id->device, response->sge.addr))
+ return rc;
+
+ response->sge.length = info->max_receive_size;
+ response->sge.lkey = info->pd->local_dma_lkey;
+
+ response->cqe.done = recv_done;
+
+ recv_wr.wr_cqe = &response->cqe;
+ recv_wr.next = NULL;
+ recv_wr.sg_list = &response->sge;
+ recv_wr.num_sge = 1;
+
+ rc = ib_post_recv(info->id->qp, &recv_wr, &recv_wr_fail);
+ if (rc) {
+ ib_dma_unmap_single(info->id->device, response->sge.addr,
+ response->sge.length, DMA_FROM_DEVICE);
+
+ log_rdma_recv(ERR, "ib_post_recv failed rc=%d\n", rc);
+ }
+
+ return rc;
+}
+
+/* Perform SMBD negotiate according to [MS-SMBD] 3.1.5.2 */
+static int smbd_negotiate(struct smbd_connection *info)
+{
+ int rc;
+ struct smbd_response *response = get_receive_buffer(info);
+
+ response->type = SMBD_NEGOTIATE_RESP;
+ rc = smbd_post_recv(info, response);
+ log_rdma_event(INFO,
+ "smbd_post_recv rc=%d iov.addr=%llx iov.length=%x "
+ "iov.lkey=%x\n",
+ rc, response->sge.addr,
+ response->sge.length, response->sge.lkey);
+ if (rc)
+ return rc;
+
+ init_completion(&info->negotiate_completion);
+ info->negotiate_done = false;
+ rc = smbd_post_send_negotiate_req(info);
+ if (rc)
+ return rc;
+
+ rc = wait_for_completion_interruptible_timeout(
+ &info->negotiate_completion, SMBD_NEGOTIATE_TIMEOUT * HZ);
+ log_rdma_event(INFO, "wait_for_completion_timeout rc=%d\n", rc);
+
+ if (info->negotiate_done)
+ return 0;
+
+ if (rc == 0)
+ rc = -ETIMEDOUT;
+ else if (rc == -ERESTARTSYS)
+ rc = -EINTR;
+ else
+ rc = -ENOTCONN;
+
+ return rc;
+}
+
+static void put_empty_packet(
+ struct smbd_connection *info, struct smbd_response *response)
+{
+ spin_lock(&info->empty_packet_queue_lock);
+ list_add_tail(&response->list, &info->empty_packet_queue);
+ info->count_empty_packet_queue++;
+ spin_unlock(&info->empty_packet_queue_lock);
+
+ queue_work(info->workqueue, &info->post_send_credits_work);
+}
+
+/*
+ * Implement Connection.FragmentReassemblyBuffer defined in [MS-SMBD] 3.1.1.1
+ * This is a queue for reassembling upper layer payload and present to upper
+ * layer. All the inncoming payload go to the reassembly queue, regardless of
+ * if reassembly is required. The uuper layer code reads from the queue for all
+ * incoming payloads.
+ * Put a received packet to the reassembly queue
+ * response: the packet received
+ * data_length: the size of payload in this packet
+ */
+static void enqueue_reassembly(
+ struct smbd_connection *info,
+ struct smbd_response *response,
+ int data_length)
+{
+ spin_lock(&info->reassembly_queue_lock);
+ list_add_tail(&response->list, &info->reassembly_queue);
+ info->reassembly_queue_length++;
+ /*
+ * Make sure reassembly_data_length is updated after list and
+ * reassembly_queue_length are updated. On the dequeue side
+ * reassembly_data_length is checked without a lock to determine
+ * if reassembly_queue_length and list is up to date
+ */
+ virt_wmb();
+ info->reassembly_data_length += data_length;
+ spin_unlock(&info->reassembly_queue_lock);
+ info->count_reassembly_queue++;
+ info->count_enqueue_reassembly_queue++;
+}
+
+/*
+ * Get the first entry at the front of reassembly queue
+ * Caller is responsible for locking
+ * return value: the first entry if any, NULL if queue is empty
+ */
+static struct smbd_response *_get_first_reassembly(struct smbd_connection *info)
+{
+ struct smbd_response *ret = NULL;
+
+ if (!list_empty(&info->reassembly_queue)) {
+ ret = list_first_entry(
+ &info->reassembly_queue,
+ struct smbd_response, list);
+ }
+ return ret;
+}
+
+static struct smbd_response *get_empty_queue_buffer(
+ struct smbd_connection *info)
+{
+ struct smbd_response *ret = NULL;
+ unsigned long flags;
+
+ spin_lock_irqsave(&info->empty_packet_queue_lock, flags);
+ if (!list_empty(&info->empty_packet_queue)) {
+ ret = list_first_entry(
+ &info->empty_packet_queue,
+ struct smbd_response, list);
+ list_del(&ret->list);
+ info->count_empty_packet_queue--;
+ }
+ spin_unlock_irqrestore(&info->empty_packet_queue_lock, flags);
+
+ return ret;
+}
+
+/*
+ * Get a receive buffer
+ * For each remote send, we need to post a receive. The receive buffers are
+ * pre-allocated in advance.
+ * return value: the receive buffer, NULL if none is available
+ */
+static struct smbd_response *get_receive_buffer(struct smbd_connection *info)
+{
+ struct smbd_response *ret = NULL;
+ unsigned long flags;
+
+ spin_lock_irqsave(&info->receive_queue_lock, flags);
+ if (!list_empty(&info->receive_queue)) {
+ ret = list_first_entry(
+ &info->receive_queue,
+ struct smbd_response, list);
+ list_del(&ret->list);
+ info->count_receive_queue--;
+ info->count_get_receive_buffer++;
+ }
+ spin_unlock_irqrestore(&info->receive_queue_lock, flags);
+
+ return ret;
+}
+
+/*
+ * Return a receive buffer
+ * Upon returning of a receive buffer, we can post new receive and extend
+ * more receive credits to remote peer. This is done immediately after a
+ * receive buffer is returned.
+ */
+static void put_receive_buffer(
+ struct smbd_connection *info, struct smbd_response *response)
+{
+ unsigned long flags;
+
+ ib_dma_unmap_single(info->id->device, response->sge.addr,
+ response->sge.length, DMA_FROM_DEVICE);
+
+ spin_lock_irqsave(&info->receive_queue_lock, flags);
+ list_add_tail(&response->list, &info->receive_queue);
+ info->count_receive_queue++;
+ info->count_put_receive_buffer++;
+ spin_unlock_irqrestore(&info->receive_queue_lock, flags);
+
+ queue_work(info->workqueue, &info->post_send_credits_work);
+}
+
+/* Preallocate all receive buffer on transport establishment */
+static int allocate_receive_buffers(struct smbd_connection *info, int num_buf)
+{
+ int i;
+ struct smbd_response *response;
+
+ INIT_LIST_HEAD(&info->reassembly_queue);
+ spin_lock_init(&info->reassembly_queue_lock);
+ info->reassembly_data_length = 0;
+ info->reassembly_queue_length = 0;
+
+ INIT_LIST_HEAD(&info->receive_queue);
+ spin_lock_init(&info->receive_queue_lock);
+ info->count_receive_queue = 0;
+
+ INIT_LIST_HEAD(&info->empty_packet_queue);
+ spin_lock_init(&info->empty_packet_queue_lock);
+ info->count_empty_packet_queue = 0;
+
+ init_waitqueue_head(&info->wait_receive_queues);
+
+ for (i = 0; i < num_buf; i++) {
+ response = mempool_alloc(info->response_mempool, GFP_KERNEL);
+ if (!response)
+ goto allocate_failed;
+
+ response->info = info;
+ list_add_tail(&response->list, &info->receive_queue);
+ info->count_receive_queue++;
+ }
+
+ return 0;
+
+allocate_failed:
+ while (!list_empty(&info->receive_queue)) {
+ response = list_first_entry(
+ &info->receive_queue,
+ struct smbd_response, list);
+ list_del(&response->list);
+ info->count_receive_queue--;
+
+ mempool_free(response, info->response_mempool);
+ }
+ return -ENOMEM;
+}
+
+static void destroy_receive_buffers(struct smbd_connection *info)
+{
+ struct smbd_response *response;
+
+ while ((response = get_receive_buffer(info)))
+ mempool_free(response, info->response_mempool);
+
+ while ((response = get_empty_queue_buffer(info)))
+ mempool_free(response, info->response_mempool);
+}
+
+/*
+ * Check and send an immediate or keep alive packet
+ * The condition to send those packets are defined in [MS-SMBD] 3.1.1.1
+ * Connection.KeepaliveRequested and Connection.SendImmediate
+ * The idea is to extend credits to server as soon as it becomes available
+ */
+static void send_immediate_work(struct work_struct *work)
+{
+ struct smbd_connection *info = container_of(
+ work, struct smbd_connection,
+ send_immediate_work.work);
+
+ if (info->keep_alive_requested == KEEP_ALIVE_PENDING ||
+ info->send_immediate) {
+ log_keep_alive(INFO, "send an empty message\n");
+ smbd_post_send_empty(info);
+ }
+}
+
+/* Implement idle connection timer [MS-SMBD] 3.1.6.2 */
+static void idle_connection_timer(struct work_struct *work)
+{
+ struct smbd_connection *info = container_of(
+ work, struct smbd_connection,
+ idle_timer_work.work);
+
+ if (info->keep_alive_requested != KEEP_ALIVE_NONE) {
+ log_keep_alive(ERR,
+ "error status info->keep_alive_requested=%d\n",
+ info->keep_alive_requested);
+ smbd_disconnect_rdma_connection(info);
+ return;
+ }
+
+ log_keep_alive(INFO, "about to send an empty idle message\n");
+ smbd_post_send_empty(info);
+
+ /* Setup the next idle timeout work */
+ queue_delayed_work(info->workqueue, &info->idle_timer_work,
+ info->keep_alive_interval*HZ);
+}
+
+/* Destroy this SMBD connection, called from upper layer */
+void smbd_destroy(struct smbd_connection *info)
+{
+ log_rdma_event(INFO, "destroying rdma session\n");
+
+ /* Kick off the disconnection process */
+ smbd_disconnect_rdma_connection(info);
+
+ log_rdma_event(INFO, "wait for transport being destroyed\n");
+ wait_event(info->wait_destroy,
+ info->transport_status == SMBD_DESTROYED);
+
+ destroy_workqueue(info->workqueue);
+ kfree(info);
+}
+
+/*
+ * Reconnect this SMBD connection, called from upper layer
+ * return value: 0 on success, or actual error code
+ */
+int smbd_reconnect(struct TCP_Server_Info *server)
+{
+ log_rdma_event(INFO, "reconnecting rdma session\n");
+
+ if (!server->smbd_conn) {
+ log_rdma_event(ERR, "rdma session already destroyed\n");
+ return -EINVAL;
+ }
+
+ /*
+ * This is possible if transport is disconnected and we haven't received
+ * notification from RDMA, but upper layer has detected timeout
+ */
+ if (server->smbd_conn->transport_status == SMBD_CONNECTED) {
+ log_rdma_event(INFO, "disconnecting transport\n");
+ smbd_disconnect_rdma_connection(server->smbd_conn);
+ }
+
+ /* wait until the transport is destroyed */
+ wait_event(server->smbd_conn->wait_destroy,
+ server->smbd_conn->transport_status == SMBD_DESTROYED);
+
+ destroy_workqueue(server->smbd_conn->workqueue);
+ kfree(server->smbd_conn);
+
+ log_rdma_event(INFO, "creating rdma session\n");
+ server->smbd_conn = smbd_get_connection(
+ server, (struct sockaddr *) &server->dstaddr);
+
+ return server->smbd_conn ? 0 : -ENOENT;
+}
+
+static void destroy_caches_and_workqueue(struct smbd_connection *info)
+{
+ destroy_receive_buffers(info);
+ destroy_workqueue(info->workqueue);
+ mempool_destroy(info->response_mempool);
+ kmem_cache_destroy(info->response_cache);
+ mempool_destroy(info->request_mempool);
+ kmem_cache_destroy(info->request_cache);
+}
+
+#define MAX_NAME_LEN 80
+static int allocate_caches_and_workqueue(struct smbd_connection *info)
+{
+ char name[MAX_NAME_LEN];
+ int rc;
+
+ snprintf(name, MAX_NAME_LEN, "smbd_request_%p", info);
+ info->request_cache =
+ kmem_cache_create(
+ name,
+ sizeof(struct smbd_request) +
+ sizeof(struct smbd_data_transfer),
+ 0, SLAB_HWCACHE_ALIGN, NULL);
+ if (!info->request_cache)
+ return -ENOMEM;
+
+ info->request_mempool =
+ mempool_create(info->send_credit_target, mempool_alloc_slab,
+ mempool_free_slab, info->request_cache);
+ if (!info->request_mempool)
+ goto out1;
+
+ snprintf(name, MAX_NAME_LEN, "smbd_response_%p", info);
+ info->response_cache =
+ kmem_cache_create(
+ name,
+ sizeof(struct smbd_response) +
+ info->max_receive_size,
+ 0, SLAB_HWCACHE_ALIGN, NULL);
+ if (!info->response_cache)
+ goto out2;
+
+ info->response_mempool =
+ mempool_create(info->receive_credit_max, mempool_alloc_slab,
+ mempool_free_slab, info->response_cache);
+ if (!info->response_mempool)
+ goto out3;
+
+ snprintf(name, MAX_NAME_LEN, "smbd_%p", info);
+ info->workqueue = create_workqueue(name);
+ if (!info->workqueue)
+ goto out4;
+
+ rc = allocate_receive_buffers(info, info->receive_credit_max);
+ if (rc) {
+ log_rdma_event(ERR, "failed to allocate receive buffers\n");
+ goto out5;
+ }
+
+ return 0;
+
+out5:
+ destroy_workqueue(info->workqueue);
+out4:
+ mempool_destroy(info->response_mempool);
+out3:
+ kmem_cache_destroy(info->response_cache);
+out2:
+ mempool_destroy(info->request_mempool);
+out1:
+ kmem_cache_destroy(info->request_cache);
+ return -ENOMEM;
+}
+
+/* Create a SMBD connection, called by upper layer */
+static struct smbd_connection *_smbd_get_connection(
+ struct TCP_Server_Info *server, struct sockaddr *dstaddr, int port)
+{
+ int rc;
+ struct smbd_connection *info;
+ struct rdma_conn_param conn_param;
+ struct ib_qp_init_attr qp_attr;
+ struct sockaddr_in *addr_in = (struct sockaddr_in *) dstaddr;
+ struct ib_port_immutable port_immutable;
+ u32 ird_ord_hdr[2];
+
+ info = kzalloc(sizeof(struct smbd_connection), GFP_KERNEL);
+ if (!info)
+ return NULL;
+
+ info->transport_status = SMBD_CONNECTING;
+ rc = smbd_ia_open(info, dstaddr, port);
+ if (rc) {
+ log_rdma_event(INFO, "smbd_ia_open rc=%d\n", rc);
+ goto create_id_failed;
+ }
+
+ if (smbd_send_credit_target > info->id->device->attrs.max_cqe ||
+ smbd_send_credit_target > info->id->device->attrs.max_qp_wr) {
+ log_rdma_event(ERR,
+ "consider lowering send_credit_target = %d. "
+ "Possible CQE overrun, device "
+ "reporting max_cpe %d max_qp_wr %d\n",
+ smbd_send_credit_target,
+ info->id->device->attrs.max_cqe,
+ info->id->device->attrs.max_qp_wr);
+ goto config_failed;
+ }
+
+ if (smbd_receive_credit_max > info->id->device->attrs.max_cqe ||
+ smbd_receive_credit_max > info->id->device->attrs.max_qp_wr) {
+ log_rdma_event(ERR,
+ "consider lowering receive_credit_max = %d. "
+ "Possible CQE overrun, device "
+ "reporting max_cpe %d max_qp_wr %d\n",
+ smbd_receive_credit_max,
+ info->id->device->attrs.max_cqe,
+ info->id->device->attrs.max_qp_wr);
+ goto config_failed;
+ }
+
+ info->receive_credit_max = smbd_receive_credit_max;
+ info->send_credit_target = smbd_send_credit_target;
+ info->max_send_size = smbd_max_send_size;
+ info->max_fragmented_recv_size = smbd_max_fragmented_recv_size;
+ info->max_receive_size = smbd_max_receive_size;
+ info->keep_alive_interval = smbd_keep_alive_interval;
+
+ if (info->id->device->attrs.max_sge < SMBDIRECT_MAX_SGE) {
+ log_rdma_event(ERR, "warning: device max_sge = %d too small\n",
+ info->id->device->attrs.max_sge);
+ log_rdma_event(ERR, "Queue Pair creation may fail\n");
+ }
+
+ info->send_cq = NULL;
+ info->recv_cq = NULL;
+ info->send_cq = ib_alloc_cq(info->id->device, info,
+ info->send_credit_target, 0, IB_POLL_SOFTIRQ);
+ if (IS_ERR(info->send_cq)) {
+ info->send_cq = NULL;
+ goto alloc_cq_failed;
+ }
+
+ info->recv_cq = ib_alloc_cq(info->id->device, info,
+ info->receive_credit_max, 0, IB_POLL_SOFTIRQ);
+ if (IS_ERR(info->recv_cq)) {
+ info->recv_cq = NULL;
+ goto alloc_cq_failed;
+ }
+
+ memset(&qp_attr, 0, sizeof(qp_attr));
+ qp_attr.event_handler = smbd_qp_async_error_upcall;
+ qp_attr.qp_context = info;
+ qp_attr.cap.max_send_wr = info->send_credit_target;
+ qp_attr.cap.max_recv_wr = info->receive_credit_max;
+ qp_attr.cap.max_send_sge = SMBDIRECT_MAX_SGE;
+ qp_attr.cap.max_recv_sge = SMBDIRECT_MAX_SGE;
+ qp_attr.cap.max_inline_data = 0;
+ qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
+ qp_attr.qp_type = IB_QPT_RC;
+ qp_attr.send_cq = info->send_cq;
+ qp_attr.recv_cq = info->recv_cq;
+ qp_attr.port_num = ~0;
+
+ rc = rdma_create_qp(info->id, info->pd, &qp_attr);
+ if (rc) {
+ log_rdma_event(ERR, "rdma_create_qp failed %i\n", rc);
+ goto create_qp_failed;
+ }
+
+ memset(&conn_param, 0, sizeof(conn_param));
+ conn_param.initiator_depth = 0;
+
+ conn_param.responder_resources =
+ info->id->device->attrs.max_qp_rd_atom
+ < SMBD_CM_RESPONDER_RESOURCES ?
+ info->id->device->attrs.max_qp_rd_atom :
+ SMBD_CM_RESPONDER_RESOURCES;
+ info->responder_resources = conn_param.responder_resources;
+ log_rdma_mr(INFO, "responder_resources=%d\n",
+ info->responder_resources);
+
+ /* Need to send IRD/ORD in private data for iWARP */
+ info->id->device->get_port_immutable(
+ info->id->device, info->id->port_num, &port_immutable);
+ if (port_immutable.core_cap_flags & RDMA_CORE_PORT_IWARP) {
+ ird_ord_hdr[0] = info->responder_resources;
+ ird_ord_hdr[1] = 1;
+ conn_param.private_data = ird_ord_hdr;
+ conn_param.private_data_len = sizeof(ird_ord_hdr);
+ } else {
+ conn_param.private_data = NULL;
+ conn_param.private_data_len = 0;
+ }
+
+ conn_param.retry_count = SMBD_CM_RETRY;
+ conn_param.rnr_retry_count = SMBD_CM_RNR_RETRY;
+ conn_param.flow_control = 0;
+ init_waitqueue_head(&info->wait_destroy);
+
+ log_rdma_event(INFO, "connecting to IP %pI4 port %d\n",
+ &addr_in->sin_addr, port);
+
+ init_waitqueue_head(&info->conn_wait);
+ rc = rdma_connect(info->id, &conn_param);
+ if (rc) {
+ log_rdma_event(ERR, "rdma_connect() failed with %i\n", rc);
+ goto rdma_connect_failed;
+ }
+
+ wait_event_interruptible(
+ info->conn_wait, info->transport_status != SMBD_CONNECTING);
+
+ if (info->transport_status != SMBD_CONNECTED) {
+ log_rdma_event(ERR, "rdma_connect failed port=%d\n", port);
+ goto rdma_connect_failed;
+ }
+
+ log_rdma_event(INFO, "rdma_connect connected\n");
+
+ rc = allocate_caches_and_workqueue(info);
+ if (rc) {
+ log_rdma_event(ERR, "cache allocation failed\n");
+ goto allocate_cache_failed;
+ }
+
+ init_waitqueue_head(&info->wait_send_queue);
+ init_waitqueue_head(&info->wait_reassembly_queue);
+
+ INIT_DELAYED_WORK(&info->idle_timer_work, idle_connection_timer);
+ INIT_DELAYED_WORK(&info->send_immediate_work, send_immediate_work);
+ queue_delayed_work(info->workqueue, &info->idle_timer_work,
+ info->keep_alive_interval*HZ);
+
+ init_waitqueue_head(&info->wait_smbd_send_pending);
+ info->smbd_send_pending = 0;
+
+ init_waitqueue_head(&info->wait_smbd_recv_pending);
+ info->smbd_recv_pending = 0;
+
+ init_waitqueue_head(&info->wait_send_pending);
+ atomic_set(&info->send_pending, 0);
+
+ init_waitqueue_head(&info->wait_send_payload_pending);
+ atomic_set(&info->send_payload_pending, 0);
+
+ INIT_WORK(&info->disconnect_work, smbd_disconnect_rdma_work);
+ INIT_WORK(&info->destroy_work, smbd_destroy_rdma_work);
+ INIT_WORK(&info->recv_done_work, smbd_recv_done_work);
+ INIT_WORK(&info->post_send_credits_work, smbd_post_send_credits);
+ info->new_credits_offered = 0;
+ spin_lock_init(&info->lock_new_credits_offered);
+
+ rc = smbd_negotiate(info);
+ if (rc) {
+ log_rdma_event(ERR, "smbd_negotiate rc=%d\n", rc);
+ goto negotiation_failed;
+ }
+
+ rc = allocate_mr_list(info);
+ if (rc) {
+ log_rdma_mr(ERR, "memory registration allocation failed\n");
+ goto allocate_mr_failed;
+ }
+
+ return info;
+
+allocate_mr_failed:
+ /* At this point, need to a full transport shutdown */
+ smbd_destroy(info);
+ return NULL;
+
+negotiation_failed:
+ cancel_delayed_work_sync(&info->idle_timer_work);
+ destroy_caches_and_workqueue(info);
+ info->transport_status = SMBD_NEGOTIATE_FAILED;
+ init_waitqueue_head(&info->conn_wait);
+ rdma_disconnect(info->id);
+ wait_event(info->conn_wait,
+ info->transport_status == SMBD_DISCONNECTED);
+
+allocate_cache_failed:
+rdma_connect_failed:
+ rdma_destroy_qp(info->id);
+
+create_qp_failed:
+alloc_cq_failed:
+ if (info->send_cq)
+ ib_free_cq(info->send_cq);
+ if (info->recv_cq)
+ ib_free_cq(info->recv_cq);
+
+config_failed:
+ ib_dealloc_pd(info->pd);
+ rdma_destroy_id(info->id);
+
+create_id_failed:
+ kfree(info);
+ return NULL;
+}
+
+struct smbd_connection *smbd_get_connection(
+ struct TCP_Server_Info *server, struct sockaddr *dstaddr)
+{
+ struct smbd_connection *ret;
+ int port = SMBD_PORT;
+
+try_again:
+ ret = _smbd_get_connection(server, dstaddr, port);
+
+ /* Try SMB_PORT if SMBD_PORT doesn't work */
+ if (!ret && port == SMBD_PORT) {
+ port = SMB_PORT;
+ goto try_again;
+ }
+ return ret;
+}
+
+/*
+ * Receive data from receive reassembly queue
+ * All the incoming data packets are placed in reassembly queue
+ * buf: the buffer to read data into
+ * size: the length of data to read
+ * return value: actual data read
+ * Note: this implementation copies the data from reassebmly queue to receive
+ * buffers used by upper layer. This is not the optimal code path. A better way
+ * to do it is to not have upper layer allocate its receive buffers but rather
+ * borrow the buffer from reassembly queue, and return it after data is
+ * consumed. But this will require more changes to upper layer code, and also
+ * need to consider packet boundaries while they still being reassembled.
+ */
+static int smbd_recv_buf(struct smbd_connection *info, char *buf,
+ unsigned int size)
+{
+ struct smbd_response *response;
+ struct smbd_data_transfer *data_transfer;
+ int to_copy, to_read, data_read, offset;
+ u32 data_length, remaining_data_length, data_offset;
+ int rc;
+
+again:
+ if (info->transport_status != SMBD_CONNECTED) {
+ log_read(ERR, "disconnected\n");
+ return -ENODEV;
+ }
+
+ /*
+ * No need to hold the reassembly queue lock all the time as we are
+ * the only one reading from the front of the queue. The transport
+ * may add more entries to the back of the queue at the same time
+ */
+ log_read(INFO, "size=%d info->reassembly_data_length=%d\n", size,
+ info->reassembly_data_length);
+ if (info->reassembly_data_length >= size) {
+ int queue_length;
+ int queue_removed = 0;
+
+ /*
+ * Need to make sure reassembly_data_length is read before
+ * reading reassembly_queue_length and calling
+ * _get_first_reassembly. This call is lock free
+ * as we never read at the end of the queue which are being
+ * updated in SOFTIRQ as more data is received
+ */
+ virt_rmb();
+ queue_length = info->reassembly_queue_length;
+ data_read = 0;
+ to_read = size;
+ offset = info->first_entry_offset;
+ while (data_read < size) {
+ response = _get_first_reassembly(info);
+ data_transfer = smbd_response_payload(response);
+ data_length = le32_to_cpu(data_transfer->data_length);
+ remaining_data_length =
+ le32_to_cpu(
+ data_transfer->remaining_data_length);
+ data_offset = le32_to_cpu(data_transfer->data_offset);
+
+ /*
+ * The upper layer expects RFC1002 length at the
+ * beginning of the payload. Return it to indicate
+ * the total length of the packet. This minimize the
+ * change to upper layer packet processing logic. This
+ * will be eventually remove when an intermediate
+ * transport layer is added
+ */
+ if (response->first_segment && size == 4) {
+ unsigned int rfc1002_len =
+ data_length + remaining_data_length;
+ *((__be32 *)buf) = cpu_to_be32(rfc1002_len);
+ data_read = 4;
+ response->first_segment = false;
+ log_read(INFO, "returning rfc1002 length %d\n",
+ rfc1002_len);
+ goto read_rfc1002_done;
+ }
+
+ to_copy = min_t(int, data_length - offset, to_read);
+ memcpy(
+ buf + data_read,
+ (char *)data_transfer + data_offset + offset,
+ to_copy);
+
+ /* move on to the next buffer? */
+ if (to_copy == data_length - offset) {
+ queue_length--;
+ /*
+ * No need to lock if we are not at the
+ * end of the queue
+ */
+ if (queue_length)
+ list_del(&response->list);
+ else {
+ spin_lock_irq(
+ &info->reassembly_queue_lock);
+ list_del(&response->list);
+ spin_unlock_irq(
+ &info->reassembly_queue_lock);
+ }
+ queue_removed++;
+ info->count_reassembly_queue--;
+ info->count_dequeue_reassembly_queue++;
+ put_receive_buffer(info, response);
+ offset = 0;
+ log_read(INFO, "put_receive_buffer offset=0\n");
+ } else
+ offset += to_copy;
+
+ to_read -= to_copy;
+ data_read += to_copy;
+
+ log_read(INFO, "_get_first_reassembly memcpy %d bytes "
+ "data_transfer_length-offset=%d after that "
+ "to_read=%d data_read=%d offset=%d\n",
+ to_copy, data_length - offset,
+ to_read, data_read, offset);
+ }
+
+ spin_lock_irq(&info->reassembly_queue_lock);
+ info->reassembly_data_length -= data_read;
+ info->reassembly_queue_length -= queue_removed;
+ spin_unlock_irq(&info->reassembly_queue_lock);
+
+ info->first_entry_offset = offset;
+ log_read(INFO, "returning to thread data_read=%d "
+ "reassembly_data_length=%d first_entry_offset=%d\n",
+ data_read, info->reassembly_data_length,
+ info->first_entry_offset);
+read_rfc1002_done:
+ return data_read;
+ }
+
+ log_read(INFO, "wait_event on more data\n");
+ rc = wait_event_interruptible(
+ info->wait_reassembly_queue,
+ info->reassembly_data_length >= size ||
+ info->transport_status != SMBD_CONNECTED);
+ /* Don't return any data if interrupted */
+ if (rc)
+ return -ENODEV;
+
+ goto again;
+}
+
+/*
+ * Receive a page from receive reassembly queue
+ * page: the page to read data into
+ * to_read: the length of data to read
+ * return value: actual data read
+ */
+static int smbd_recv_page(struct smbd_connection *info,
+ struct page *page, unsigned int to_read)
+{
+ int ret;
+ char *to_address;
+
+ /* make sure we have the page ready for read */
+ ret = wait_event_interruptible(
+ info->wait_reassembly_queue,
+ info->reassembly_data_length >= to_read ||
+ info->transport_status != SMBD_CONNECTED);
+ if (ret)
+ return 0;
+
+ /* now we can read from reassembly queue and not sleep */
+ to_address = kmap_atomic(page);
+
+ log_read(INFO, "reading from page=%p address=%p to_read=%d\n",
+ page, to_address, to_read);
+
+ ret = smbd_recv_buf(info, to_address, to_read);
+ kunmap_atomic(to_address);
+
+ return ret;
+}
+
+/*
+ * Receive data from transport
+ * msg: a msghdr point to the buffer, can be ITER_KVEC or ITER_BVEC
+ * return: total bytes read, or 0. SMB Direct will not do partial read.
+ */
+int smbd_recv(struct smbd_connection *info, struct msghdr *msg)
+{
+ char *buf;
+ struct page *page;
+ unsigned int to_read;
+ int rc;
+
+ info->smbd_recv_pending++;
+
+ switch (msg->msg_iter.type) {
+ case READ | ITER_KVEC:
+ buf = msg->msg_iter.kvec->iov_base;
+ to_read = msg->msg_iter.kvec->iov_len;
+ rc = smbd_recv_buf(info, buf, to_read);
+ break;
+
+ case READ | ITER_BVEC:
+ page = msg->msg_iter.bvec->bv_page;
+ to_read = msg->msg_iter.bvec->bv_len;
+ rc = smbd_recv_page(info, page, to_read);
+ break;
+
+ default:
+ /* It's a bug in upper layer to get there */
+ cifs_dbg(VFS, "CIFS: invalid msg type %d\n",
+ msg->msg_iter.type);
+ rc = -EIO;
+ }
+
+ info->smbd_recv_pending--;
+ wake_up(&info->wait_smbd_recv_pending);
+
+ /* SMBDirect will read it all or nothing */
+ if (rc > 0)
+ msg->msg_iter.count = 0;
+ return rc;
+}
+
+/*
+ * Send data to transport
+ * Each rqst is transported as a SMBDirect payload
+ * rqst: the data to write
+ * return value: 0 if successfully write, otherwise error code
+ */
+int smbd_send(struct smbd_connection *info, struct smb_rqst *rqst)
+{
+ struct kvec vec;
+ int nvecs;
+ int size;
+ int buflen = 0, remaining_data_length;
+ int start, i, j;
+ int max_iov_size =
+ info->max_send_size - sizeof(struct smbd_data_transfer);
+ struct kvec iov[SMBDIRECT_MAX_SGE];
+ int rc;
+
+ info->smbd_send_pending++;
+ if (info->transport_status != SMBD_CONNECTED) {
+ rc = -ENODEV;
+ goto done;
+ }
+
+ /*
+ * This usually means a configuration error
+ * We use RDMA read/write for packet size > rdma_readwrite_threshold
+ * as long as it's properly configured we should never get into this
+ * situation
+ */
+ if (rqst->rq_nvec + rqst->rq_npages > SMBDIRECT_MAX_SGE) {
+ log_write(ERR, "maximum send segment %x exceeding %x\n",
+ rqst->rq_nvec + rqst->rq_npages, SMBDIRECT_MAX_SGE);
+ rc = -EINVAL;
+ goto done;
+ }
+
+ /*
+ * Remove the RFC1002 length defined in MS-SMB2 section 2.1
+ * It is used only for TCP transport
+ * In future we may want to add a transport layer under protocol
+ * layer so this will only be issued to TCP transport
+ */
+ iov[0].iov_base = (char *)rqst->rq_iov[0].iov_base + 4;
+ iov[0].iov_len = rqst->rq_iov[0].iov_len - 4;
+ buflen += iov[0].iov_len;
+
+ /* total up iov array first */
+ for (i = 1; i < rqst->rq_nvec; i++) {
+ iov[i].iov_base = rqst->rq_iov[i].iov_base;
+ iov[i].iov_len = rqst->rq_iov[i].iov_len;
+ buflen += iov[i].iov_len;
+ }
+
+ /* add in the page array if there is one */
+ if (rqst->rq_npages) {
+ buflen += rqst->rq_pagesz * (rqst->rq_npages - 1);
+ buflen += rqst->rq_tailsz;
+ }
+
+ if (buflen + sizeof(struct smbd_data_transfer) >
+ info->max_fragmented_send_size) {
+ log_write(ERR, "payload size %d > max size %d\n",
+ buflen, info->max_fragmented_send_size);
+ rc = -EINVAL;
+ goto done;
+ }
+
+ remaining_data_length = buflen;
+
+ log_write(INFO, "rqst->rq_nvec=%d rqst->rq_npages=%d rq_pagesz=%d "
+ "rq_tailsz=%d buflen=%d\n",
+ rqst->rq_nvec, rqst->rq_npages, rqst->rq_pagesz,
+ rqst->rq_tailsz, buflen);
+
+ start = i = iov[0].iov_len ? 0 : 1;
+ buflen = 0;
+ while (true) {
+ buflen += iov[i].iov_len;
+ if (buflen > max_iov_size) {
+ if (i > start) {
+ remaining_data_length -=
+ (buflen-iov[i].iov_len);
+ log_write(INFO, "sending iov[] from start=%d "
+ "i=%d nvecs=%d "
+ "remaining_data_length=%d\n",
+ start, i, i-start,
+ remaining_data_length);
+ rc = smbd_post_send_data(
+ info, &iov[start], i-start,
+ remaining_data_length);
+ if (rc)
+ goto done;
+ } else {
+ /* iov[start] is too big, break it */
+ nvecs = (buflen+max_iov_size-1)/max_iov_size;
+ log_write(INFO, "iov[%d] iov_base=%p buflen=%d"
+ " break to %d vectors\n",
+ start, iov[start].iov_base,
+ buflen, nvecs);
+ for (j = 0; j < nvecs; j++) {
+ vec.iov_base =
+ (char *)iov[start].iov_base +
+ j*max_iov_size;
+ vec.iov_len = max_iov_size;
+ if (j == nvecs-1)
+ vec.iov_len =
+ buflen -
+ max_iov_size*(nvecs-1);
+ remaining_data_length -= vec.iov_len;
+ log_write(INFO,
+ "sending vec j=%d iov_base=%p"
+ " iov_len=%zu "
+ "remaining_data_length=%d\n",
+ j, vec.iov_base, vec.iov_len,
+ remaining_data_length);
+ rc = smbd_post_send_data(
+ info, &vec, 1,
+ remaining_data_length);
+ if (rc)
+ goto done;
+ }
+ i++;
+ }
+ start = i;
+ buflen = 0;
+ } else {
+ i++;
+ if (i == rqst->rq_nvec) {
+ /* send out all remaining vecs */
+ remaining_data_length -= buflen;
+ log_write(INFO,
+ "sending iov[] from start=%d i=%d "
+ "nvecs=%d remaining_data_length=%d\n",
+ start, i, i-start,
+ remaining_data_length);
+ rc = smbd_post_send_data(info, &iov[start],
+ i-start, remaining_data_length);
+ if (rc)
+ goto done;
+ break;
+ }
+ }
+ log_write(INFO, "looping i=%d buflen=%d\n", i, buflen);
+ }
+
+ /* now sending pages if there are any */
+ for (i = 0; i < rqst->rq_npages; i++) {
+ buflen = (i == rqst->rq_npages-1) ?
+ rqst->rq_tailsz : rqst->rq_pagesz;
+ nvecs = (buflen + max_iov_size - 1) / max_iov_size;
+ log_write(INFO, "sending pages buflen=%d nvecs=%d\n",
+ buflen, nvecs);
+ for (j = 0; j < nvecs; j++) {
+ size = max_iov_size;
+ if (j == nvecs-1)
+ size = buflen - j*max_iov_size;
+ remaining_data_length -= size;
+ log_write(INFO, "sending pages i=%d offset=%d size=%d"
+ " remaining_data_length=%d\n",
+ i, j*max_iov_size, size, remaining_data_length);
+ rc = smbd_post_send_page(
+ info, rqst->rq_pages[i], j*max_iov_size,
+ size, remaining_data_length);
+ if (rc)
+ goto done;
+ }
+ }
+
+done:
+ /*
+ * As an optimization, we don't wait for individual I/O to finish
+ * before sending the next one.
+ * Send them all and wait for pending send count to get to 0
+ * that means all the I/Os have been out and we are good to return
+ */
+
+ wait_event(info->wait_send_payload_pending,
+ atomic_read(&info->send_payload_pending) == 0);
+
+ info->smbd_send_pending--;
+ wake_up(&info->wait_smbd_send_pending);
+
+ return rc;
+}
+
+static void register_mr_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+ struct smbd_mr *mr;
+ struct ib_cqe *cqe;
+
+ if (wc->status) {
+ log_rdma_mr(ERR, "status=%d\n", wc->status);
+ cqe = wc->wr_cqe;
+ mr = container_of(cqe, struct smbd_mr, cqe);
+ smbd_disconnect_rdma_connection(mr->conn);
+ }
+}
+
+/*
+ * The work queue function that recovers MRs
+ * We need to call ib_dereg_mr() and ib_alloc_mr() before this MR can be used
+ * again. Both calls are slow, so finish them in a workqueue. This will not
+ * block I/O path.
+ * There is one workqueue that recovers MRs, there is no need to lock as the
+ * I/O requests calling smbd_register_mr will never update the links in the
+ * mr_list.
+ */
+static void smbd_mr_recovery_work(struct work_struct *work)
+{
+ struct smbd_connection *info =
+ container_of(work, struct smbd_connection, mr_recovery_work);
+ struct smbd_mr *smbdirect_mr;
+ int rc;
+
+ list_for_each_entry(smbdirect_mr, &info->mr_list, list) {
+ if (smbdirect_mr->state == MR_INVALIDATED ||
+ smbdirect_mr->state == MR_ERROR) {
+
+ if (smbdirect_mr->state == MR_INVALIDATED) {
+ ib_dma_unmap_sg(
+ info->id->device, smbdirect_mr->sgl,
+ smbdirect_mr->sgl_count,
+ smbdirect_mr->dir);
+ smbdirect_mr->state = MR_READY;
+ } else if (smbdirect_mr->state == MR_ERROR) {
+
+ /* recover this MR entry */
+ rc = ib_dereg_mr(smbdirect_mr->mr);
+ if (rc) {
+ log_rdma_mr(ERR,
+ "ib_dereg_mr faield rc=%x\n",
+ rc);
+ smbd_disconnect_rdma_connection(info);
+ }
+
+ smbdirect_mr->mr = ib_alloc_mr(
+ info->pd, info->mr_type,
+ info->max_frmr_depth);
+ if (IS_ERR(smbdirect_mr->mr)) {
+ log_rdma_mr(ERR,
+ "ib_alloc_mr failed mr_type=%x "
+ "max_frmr_depth=%x\n",
+ info->mr_type,
+ info->max_frmr_depth);
+ smbd_disconnect_rdma_connection(info);
+ }
+
+ smbdirect_mr->state = MR_READY;
+ }
+ /* smbdirect_mr->state is updated by this function
+ * and is read and updated by I/O issuing CPUs trying
+ * to get a MR, the call to atomic_inc_return
+ * implicates a memory barrier and guarantees this
+ * value is updated before waking up any calls to
+ * get_mr() from the I/O issuing CPUs
+ */
+ if (atomic_inc_return(&info->mr_ready_count) == 1)
+ wake_up_interruptible(&info->wait_mr);
+ }
+ }
+}
+
+static void destroy_mr_list(struct smbd_connection *info)
+{
+ struct smbd_mr *mr, *tmp;
+
+ cancel_work_sync(&info->mr_recovery_work);
+ list_for_each_entry_safe(mr, tmp, &info->mr_list, list) {
+ if (mr->state == MR_INVALIDATED)
+ ib_dma_unmap_sg(info->id->device, mr->sgl,
+ mr->sgl_count, mr->dir);
+ ib_dereg_mr(mr->mr);
+ kfree(mr->sgl);
+ kfree(mr);
+ }
+}
+
+/*
+ * Allocate MRs used for RDMA read/write
+ * The number of MRs will not exceed hardware capability in responder_resources
+ * All MRs are kept in mr_list. The MR can be recovered after it's used
+ * Recovery is done in smbd_mr_recovery_work. The content of list entry changes
+ * as MRs are used and recovered for I/O, but the list links will not change
+ */
+static int allocate_mr_list(struct smbd_connection *info)
+{
+ int i;
+ struct smbd_mr *smbdirect_mr, *tmp;
+
+ INIT_LIST_HEAD(&info->mr_list);
+ init_waitqueue_head(&info->wait_mr);
+ spin_lock_init(&info->mr_list_lock);
+ atomic_set(&info->mr_ready_count, 0);
+ atomic_set(&info->mr_used_count, 0);
+ init_waitqueue_head(&info->wait_for_mr_cleanup);
+ /* Allocate more MRs (2x) than hardware responder_resources */
+ for (i = 0; i < info->responder_resources * 2; i++) {
+ smbdirect_mr = kzalloc(sizeof(*smbdirect_mr), GFP_KERNEL);
+ if (!smbdirect_mr)
+ goto out;
+ smbdirect_mr->mr = ib_alloc_mr(info->pd, info->mr_type,
+ info->max_frmr_depth);
+ if (IS_ERR(smbdirect_mr->mr)) {
+ log_rdma_mr(ERR, "ib_alloc_mr failed mr_type=%x "
+ "max_frmr_depth=%x\n",
+ info->mr_type, info->max_frmr_depth);
+ goto out;
+ }
+ smbdirect_mr->sgl = kcalloc(
+ info->max_frmr_depth,
+ sizeof(struct scatterlist),
+ GFP_KERNEL);
+ if (!smbdirect_mr->sgl) {
+ log_rdma_mr(ERR, "failed to allocate sgl\n");
+ ib_dereg_mr(smbdirect_mr->mr);
+ goto out;
+ }
+ smbdirect_mr->state = MR_READY;
+ smbdirect_mr->conn = info;
+
+ list_add_tail(&smbdirect_mr->list, &info->mr_list);
+ atomic_inc(&info->mr_ready_count);
+ }
+ INIT_WORK(&info->mr_recovery_work, smbd_mr_recovery_work);
+ return 0;
+
+out:
+ kfree(smbdirect_mr);
+
+ list_for_each_entry_safe(smbdirect_mr, tmp, &info->mr_list, list) {
+ ib_dereg_mr(smbdirect_mr->mr);
+ kfree(smbdirect_mr->sgl);
+ kfree(smbdirect_mr);
+ }
+ return -ENOMEM;
+}
+
+/*
+ * Get a MR from mr_list. This function waits until there is at least one
+ * MR available in the list. It may access the list while the
+ * smbd_mr_recovery_work is recovering the MR list. This doesn't need a lock
+ * as they never modify the same places. However, there may be several CPUs
+ * issueing I/O trying to get MR at the same time, mr_list_lock is used to
+ * protect this situation.
+ */
+static struct smbd_mr *get_mr(struct smbd_connection *info)
+{
+ struct smbd_mr *ret;
+ int rc;
+again:
+ rc = wait_event_interruptible(info->wait_mr,
+ atomic_read(&info->mr_ready_count) ||
+ info->transport_status != SMBD_CONNECTED);
+ if (rc) {
+ log_rdma_mr(ERR, "wait_event_interruptible rc=%x\n", rc);
+ return NULL;
+ }
+
+ if (info->transport_status != SMBD_CONNECTED) {
+ log_rdma_mr(ERR, "info->transport_status=%x\n",
+ info->transport_status);
+ return NULL;
+ }
+
+ spin_lock(&info->mr_list_lock);
+ list_for_each_entry(ret, &info->mr_list, list) {
+ if (ret->state == MR_READY) {
+ ret->state = MR_REGISTERED;
+ spin_unlock(&info->mr_list_lock);
+ atomic_dec(&info->mr_ready_count);
+ atomic_inc(&info->mr_used_count);
+ return ret;
+ }
+ }
+
+ spin_unlock(&info->mr_list_lock);
+ /*
+ * It is possible that we could fail to get MR because other processes may
+ * try to acquire a MR at the same time. If this is the case, retry it.
+ */
+ goto again;
+}
+
+/*
+ * Register memory for RDMA read/write
+ * pages[]: the list of pages to register memory with
+ * num_pages: the number of pages to register
+ * tailsz: if non-zero, the bytes to register in the last page
+ * writing: true if this is a RDMA write (SMB read), false for RDMA read
+ * need_invalidate: true if this MR needs to be locally invalidated after I/O
+ * return value: the MR registered, NULL if failed.
+ */
+struct smbd_mr *smbd_register_mr(
+ struct smbd_connection *info, struct page *pages[], int num_pages,
+ int tailsz, bool writing, bool need_invalidate)
+{
+ struct smbd_mr *smbdirect_mr;
+ int rc, i;
+ enum dma_data_direction dir;
+ struct ib_reg_wr *reg_wr;
+ struct ib_send_wr *bad_wr;
+
+ if (num_pages > info->max_frmr_depth) {
+ log_rdma_mr(ERR, "num_pages=%d max_frmr_depth=%d\n",
+ num_pages, info->max_frmr_depth);
+ return NULL;
+ }
+
+ smbdirect_mr = get_mr(info);
+ if (!smbdirect_mr) {
+ log_rdma_mr(ERR, "get_mr returning NULL\n");
+ return NULL;
+ }
+ smbdirect_mr->need_invalidate = need_invalidate;
+ smbdirect_mr->sgl_count = num_pages;
+ sg_init_table(smbdirect_mr->sgl, num_pages);
+
+ for (i = 0; i < num_pages - 1; i++)
+ sg_set_page(&smbdirect_mr->sgl[i], pages[i], PAGE_SIZE, 0);
+
+ sg_set_page(&smbdirect_mr->sgl[i], pages[i],
+ tailsz ? tailsz : PAGE_SIZE, 0);
+
+ dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
+ smbdirect_mr->dir = dir;
+ rc = ib_dma_map_sg(info->id->device, smbdirect_mr->sgl, num_pages, dir);
+ if (!rc) {
+ log_rdma_mr(INFO, "ib_dma_map_sg num_pages=%x dir=%x rc=%x\n",
+ num_pages, dir, rc);
+ goto dma_map_error;
+ }
+
+ rc = ib_map_mr_sg(smbdirect_mr->mr, smbdirect_mr->sgl, num_pages,
+ NULL, PAGE_SIZE);
+ if (rc != num_pages) {
+ log_rdma_mr(INFO,
+ "ib_map_mr_sg failed rc = %x num_pages = %x\n",
+ rc, num_pages);
+ goto map_mr_error;
+ }
+
+ ib_update_fast_reg_key(smbdirect_mr->mr,
+ ib_inc_rkey(smbdirect_mr->mr->rkey));
+ reg_wr = &smbdirect_mr->wr;
+ reg_wr->wr.opcode = IB_WR_REG_MR;
+ smbdirect_mr->cqe.done = register_mr_done;
+ reg_wr->wr.wr_cqe = &smbdirect_mr->cqe;
+ reg_wr->wr.num_sge = 0;
+ reg_wr->wr.send_flags = IB_SEND_SIGNALED;
+ reg_wr->mr = smbdirect_mr->mr;
+ reg_wr->key = smbdirect_mr->mr->rkey;
+ reg_wr->access = writing ?
+ IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
+ IB_ACCESS_REMOTE_READ;
+
+ /*
+ * There is no need for waiting for complemtion on ib_post_send
+ * on IB_WR_REG_MR. Hardware enforces a barrier and order of execution
+ * on the next ib_post_send when we actaully send I/O to remote peer
+ */
+ rc = ib_post_send(info->id->qp, &reg_wr->wr, &bad_wr);
+ if (!rc)
+ return smbdirect_mr;
+
+ log_rdma_mr(ERR, "ib_post_send failed rc=%x reg_wr->key=%x\n",
+ rc, reg_wr->key);
+
+ /* If all failed, attempt to recover this MR by setting it MR_ERROR*/
+map_mr_error:
+ ib_dma_unmap_sg(info->id->device, smbdirect_mr->sgl,
+ smbdirect_mr->sgl_count, smbdirect_mr->dir);
+
+dma_map_error:
+ smbdirect_mr->state = MR_ERROR;
+ if (atomic_dec_and_test(&info->mr_used_count))
+ wake_up(&info->wait_for_mr_cleanup);
+
+ return NULL;
+}
+
+static void local_inv_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+ struct smbd_mr *smbdirect_mr;
+ struct ib_cqe *cqe;
+
+ cqe = wc->wr_cqe;
+ smbdirect_mr = container_of(cqe, struct smbd_mr, cqe);
+ smbdirect_mr->state = MR_INVALIDATED;
+ if (wc->status != IB_WC_SUCCESS) {
+ log_rdma_mr(ERR, "invalidate failed status=%x\n", wc->status);
+ smbdirect_mr->state = MR_ERROR;
+ }
+ complete(&smbdirect_mr->invalidate_done);
+}
+
+/*
+ * Deregister a MR after I/O is done
+ * This function may wait if remote invalidation is not used
+ * and we have to locally invalidate the buffer to prevent data is being
+ * modified by remote peer after upper layer consumes it
+ */
+int smbd_deregister_mr(struct smbd_mr *smbdirect_mr)
+{
+ struct ib_send_wr *wr, *bad_wr;
+ struct smbd_connection *info = smbdirect_mr->conn;
+ int rc = 0;
+
+ if (smbdirect_mr->need_invalidate) {
+ /* Need to finish local invalidation before returning */
+ wr = &smbdirect_mr->inv_wr;
+ wr->opcode = IB_WR_LOCAL_INV;
+ smbdirect_mr->cqe.done = local_inv_done;
+ wr->wr_cqe = &smbdirect_mr->cqe;
+ wr->num_sge = 0;
+ wr->ex.invalidate_rkey = smbdirect_mr->mr->rkey;
+ wr->send_flags = IB_SEND_SIGNALED;
+
+ init_completion(&smbdirect_mr->invalidate_done);
+ rc = ib_post_send(info->id->qp, wr, &bad_wr);
+ if (rc) {
+ log_rdma_mr(ERR, "ib_post_send failed rc=%x\n", rc);
+ smbd_disconnect_rdma_connection(info);
+ goto done;
+ }
+ wait_for_completion(&smbdirect_mr->invalidate_done);
+ smbdirect_mr->need_invalidate = false;
+ } else
+ /*
+ * For remote invalidation, just set it to MR_INVALIDATED
+ * and defer to mr_recovery_work to recover the MR for next use
+ */
+ smbdirect_mr->state = MR_INVALIDATED;
+
+ /*
+ * Schedule the work to do MR recovery for future I/Os
+ * MR recovery is slow and we don't want it to block the current I/O
+ */
+ queue_work(info->workqueue, &info->mr_recovery_work);
+
+done:
+ if (atomic_dec_and_test(&info->mr_used_count))
+ wake_up(&info->wait_for_mr_cleanup);
+
+ return rc;
+}
diff --git a/fs/cifs/smbdirect.h b/fs/cifs/smbdirect.h
new file mode 100644
index 000000000000..f9038daea194
--- /dev/null
+++ b/fs/cifs/smbdirect.h
@@ -0,0 +1,338 @@
+/*
+ * Copyright (C) 2017, Microsoft Corporation.
+ *
+ * Author(s): Long Li <longli@microsoft.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ */
+#ifndef _SMBDIRECT_H
+#define _SMBDIRECT_H
+
+#ifdef CONFIG_CIFS_SMB_DIRECT
+#define cifs_rdma_enabled(server) ((server)->rdma)
+
+#include "cifsglob.h"
+#include <rdma/ib_verbs.h>
+#include <rdma/rdma_cm.h>
+#include <linux/mempool.h>
+
+extern int rdma_readwrite_threshold;
+extern int smbd_max_frmr_depth;
+extern int smbd_keep_alive_interval;
+extern int smbd_max_receive_size;
+extern int smbd_max_fragmented_recv_size;
+extern int smbd_max_send_size;
+extern int smbd_send_credit_target;
+extern int smbd_receive_credit_max;
+
+enum keep_alive_status {
+ KEEP_ALIVE_NONE,
+ KEEP_ALIVE_PENDING,
+ KEEP_ALIVE_SENT,
+};
+
+enum smbd_connection_status {
+ SMBD_CREATED,
+ SMBD_CONNECTING,
+ SMBD_CONNECTED,
+ SMBD_NEGOTIATE_FAILED,
+ SMBD_DISCONNECTING,
+ SMBD_DISCONNECTED,
+ SMBD_DESTROYED
+};
+
+/*
+ * The context for the SMBDirect transport
+ * Everything related to the transport is here. It has several logical parts
+ * 1. RDMA related structures
+ * 2. SMBDirect connection parameters
+ * 3. Memory registrations
+ * 4. Receive and reassembly queues for data receive path
+ * 5. mempools for allocating packets
+ */
+struct smbd_connection {
+ enum smbd_connection_status transport_status;
+
+ /* RDMA related */
+ struct rdma_cm_id *id;
+ struct ib_qp_init_attr qp_attr;
+ struct ib_pd *pd;
+ struct ib_cq *send_cq, *recv_cq;
+ struct ib_device_attr dev_attr;
+ int ri_rc;
+ struct completion ri_done;
+ wait_queue_head_t conn_wait;
+ wait_queue_head_t wait_destroy;
+
+ struct completion negotiate_completion;
+ bool negotiate_done;
+
+ struct work_struct destroy_work;
+ struct work_struct disconnect_work;
+ struct work_struct recv_done_work;
+ struct work_struct post_send_credits_work;
+
+ spinlock_t lock_new_credits_offered;
+ int new_credits_offered;
+
+ /* Connection parameters defined in [MS-SMBD] 3.1.1.1 */
+ int receive_credit_max;
+ int send_credit_target;
+ int max_send_size;
+ int max_fragmented_recv_size;
+ int max_fragmented_send_size;
+ int max_receive_size;
+ int keep_alive_interval;
+ int max_readwrite_size;
+ enum keep_alive_status keep_alive_requested;
+ int protocol;
+ atomic_t send_credits;
+ atomic_t receive_credits;
+ int receive_credit_target;
+ int fragment_reassembly_remaining;
+
+ /* Memory registrations */
+ /* Maximum number of RDMA read/write outstanding on this connection */
+ int responder_resources;
+ /* Maximum number of SGEs in a RDMA write/read */
+ int max_frmr_depth;
+ /*
+ * If payload is less than or equal to the threshold,
+ * use RDMA send/recv to send upper layer I/O.
+ * If payload is more than the threshold,
+ * use RDMA read/write through memory registration for I/O.
+ */
+ int rdma_readwrite_threshold;
+ enum ib_mr_type mr_type;
+ struct list_head mr_list;
+ spinlock_t mr_list_lock;
+ /* The number of available MRs ready for memory registration */
+ atomic_t mr_ready_count;
+ atomic_t mr_used_count;
+ wait_queue_head_t wait_mr;
+ struct work_struct mr_recovery_work;
+ /* Used by transport to wait until all MRs are returned */
+ wait_queue_head_t wait_for_mr_cleanup;
+
+ /* Activity accoutning */
+ /* Pending reqeusts issued from upper layer */
+ int smbd_send_pending;
+ wait_queue_head_t wait_smbd_send_pending;
+
+ int smbd_recv_pending;
+ wait_queue_head_t wait_smbd_recv_pending;
+
+ atomic_t send_pending;
+ wait_queue_head_t wait_send_pending;
+ atomic_t send_payload_pending;
+ wait_queue_head_t wait_send_payload_pending;
+
+ /* Receive queue */
+ struct list_head receive_queue;
+ int count_receive_queue;
+ spinlock_t receive_queue_lock;
+
+ struct list_head empty_packet_queue;
+ int count_empty_packet_queue;
+ spinlock_t empty_packet_queue_lock;
+
+ wait_queue_head_t wait_receive_queues;
+
+ /* Reassembly queue */
+ struct list_head reassembly_queue;
+ spinlock_t reassembly_queue_lock;
+ wait_queue_head_t wait_reassembly_queue;
+
+ /* total data length of reassembly queue */
+ int reassembly_data_length;
+ int reassembly_queue_length;
+ /* the offset to first buffer in reassembly queue */
+ int first_entry_offset;
+
+ bool send_immediate;
+
+ wait_queue_head_t wait_send_queue;
+
+ /*
+ * Indicate if we have received a full packet on the connection
+ * This is used to identify the first SMBD packet of a assembled
+ * payload (SMB packet) in reassembly queue so we can return a
+ * RFC1002 length to upper layer to indicate the length of the SMB
+ * packet received
+ */
+ bool full_packet_received;
+
+ struct workqueue_struct *workqueue;
+ struct delayed_work idle_timer_work;
+ struct delayed_work send_immediate_work;
+
+ /* Memory pool for preallocating buffers */
+ /* request pool for RDMA send */
+ struct kmem_cache *request_cache;
+ mempool_t *request_mempool;
+
+ /* response pool for RDMA receive */
+ struct kmem_cache *response_cache;
+ mempool_t *response_mempool;
+
+ /* for debug purposes */
+ unsigned int count_get_receive_buffer;
+ unsigned int count_put_receive_buffer;
+ unsigned int count_reassembly_queue;
+ unsigned int count_enqueue_reassembly_queue;
+ unsigned int count_dequeue_reassembly_queue;
+ unsigned int count_send_empty;
+};
+
+enum smbd_message_type {
+ SMBD_NEGOTIATE_RESP,
+ SMBD_TRANSFER_DATA,
+};
+
+#define SMB_DIRECT_RESPONSE_REQUESTED 0x0001
+
+/* SMBD negotiation request packet [MS-SMBD] 2.2.1 */
+struct smbd_negotiate_req {
+ __le16 min_version;
+ __le16 max_version;
+ __le16 reserved;
+ __le16 credits_requested;
+ __le32 preferred_send_size;
+ __le32 max_receive_size;
+ __le32 max_fragmented_size;
+} __packed;
+
+/* SMBD negotiation response packet [MS-SMBD] 2.2.2 */
+struct smbd_negotiate_resp {
+ __le16 min_version;
+ __le16 max_version;
+ __le16 negotiated_version;
+ __le16 reserved;
+ __le16 credits_requested;
+ __le16 credits_granted;
+ __le32 status;
+ __le32 max_readwrite_size;
+ __le32 preferred_send_size;
+ __le32 max_receive_size;
+ __le32 max_fragmented_size;
+} __packed;
+
+/* SMBD data transfer packet with payload [MS-SMBD] 2.2.3 */
+struct smbd_data_transfer {
+ __le16 credits_requested;
+ __le16 credits_granted;
+ __le16 flags;
+ __le16 reserved;
+ __le32 remaining_data_length;
+ __le32 data_offset;
+ __le32 data_length;
+ __le32 padding;
+ __u8 buffer[];
+} __packed;
+
+/* The packet fields for a registered RDMA buffer */
+struct smbd_buffer_descriptor_v1 {
+ __le64 offset;
+ __le32 token;
+ __le32 length;
+} __packed;
+
+/* Default maximum number of SGEs in a RDMA send/recv */
+#define SMBDIRECT_MAX_SGE 16
+/* The context for a SMBD request */
+struct smbd_request {
+ struct smbd_connection *info;
+ struct ib_cqe cqe;
+
+ /* true if this request carries upper layer payload */
+ bool has_payload;
+
+ /* the SGE entries for this packet */
+ struct ib_sge sge[SMBDIRECT_MAX_SGE];
+ int num_sge;
+
+ /* SMBD packet header follows this structure */
+ u8 packet[];
+};
+
+/* The context for a SMBD response */
+struct smbd_response {
+ struct smbd_connection *info;
+ struct ib_cqe cqe;
+ struct ib_sge sge;
+
+ enum smbd_message_type type;
+
+ /* Link to receive queue or reassembly queue */
+ struct list_head list;
+
+ /* Indicate if this is the 1st packet of a payload */
+ bool first_segment;
+
+ /* SMBD packet header and payload follows this structure */
+ u8 packet[];
+};
+
+/* Create a SMBDirect session */
+struct smbd_connection *smbd_get_connection(
+ struct TCP_Server_Info *server, struct sockaddr *dstaddr);
+
+/* Reconnect SMBDirect session */
+int smbd_reconnect(struct TCP_Server_Info *server);
+/* Destroy SMBDirect session */
+void smbd_destroy(struct smbd_connection *info);
+
+/* Interface for carrying upper layer I/O through send/recv */
+int smbd_recv(struct smbd_connection *info, struct msghdr *msg);
+int smbd_send(struct smbd_connection *info, struct smb_rqst *rqst);
+
+enum mr_state {
+ MR_READY,
+ MR_REGISTERED,
+ MR_INVALIDATED,
+ MR_ERROR
+};
+
+struct smbd_mr {
+ struct smbd_connection *conn;
+ struct list_head list;
+ enum mr_state state;
+ struct ib_mr *mr;
+ struct scatterlist *sgl;
+ int sgl_count;
+ enum dma_data_direction dir;
+ union {
+ struct ib_reg_wr wr;
+ struct ib_send_wr inv_wr;
+ };
+ struct ib_cqe cqe;
+ bool need_invalidate;
+ struct completion invalidate_done;
+};
+
+/* Interfaces to register and deregister MR for RDMA read/write */
+struct smbd_mr *smbd_register_mr(
+ struct smbd_connection *info, struct page *pages[], int num_pages,
+ int tailsz, bool writing, bool need_invalidate);
+int smbd_deregister_mr(struct smbd_mr *mr);
+
+#else
+#define cifs_rdma_enabled(server) 0
+struct smbd_connection {};
+static inline void *smbd_get_connection(
+ struct TCP_Server_Info *server, struct sockaddr *dstaddr) {return NULL;}
+static inline int smbd_reconnect(struct TCP_Server_Info *server) {return -1; }
+static inline void smbd_destroy(struct smbd_connection *info) {}
+static inline int smbd_recv(struct smbd_connection *info, struct msghdr *msg) {return -1; }
+static inline int smbd_send(struct smbd_connection *info, struct smb_rqst *rqst) {return -1; }
+#endif
+
+#endif
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 7efbab013957..9779b3292d8e 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -37,6 +37,10 @@
#include "cifsglob.h"
#include "cifsproto.h"
#include "cifs_debug.h"
+#include "smbdirect.h"
+
+/* Max number of iovectors we can use off the stack when sending requests. */
+#define CIFS_MAX_IOV_SIZE 8
void
cifs_wake_up_task(struct mid_q_entry *mid)
@@ -229,7 +233,10 @@ __smb_send_rqst(struct TCP_Server_Info *server, struct smb_rqst *rqst)
struct socket *ssocket = server->ssocket;
struct msghdr smb_msg;
int val = 1;
-
+ if (cifs_rdma_enabled(server) && server->smbd_conn) {
+ rc = smbd_send(server->smbd_conn, rqst);
+ goto smbd_done;
+ }
if (ssocket == NULL)
return -ENOTSOCK;
@@ -298,7 +305,7 @@ uncork:
*/
server->tcpStatus = CifsNeedReconnect;
}
-
+smbd_done:
if (rc < 0 && rc != -EINTR)
cifs_dbg(VFS, "Error %d sending data on socket to server\n",
rc);
@@ -803,12 +810,16 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses,
const int flags, struct kvec *resp_iov)
{
struct smb_rqst rqst;
- struct kvec *new_iov;
+ struct kvec s_iov[CIFS_MAX_IOV_SIZE], *new_iov;
int rc;
- new_iov = kmalloc(sizeof(struct kvec) * (n_vec + 1), GFP_KERNEL);
- if (!new_iov)
- return -ENOMEM;
+ if (n_vec + 1 > CIFS_MAX_IOV_SIZE) {
+ new_iov = kmalloc(sizeof(struct kvec) * (n_vec + 1),
+ GFP_KERNEL);
+ if (!new_iov)
+ return -ENOMEM;
+ } else
+ new_iov = s_iov;
/* 1st iov is a RFC1001 length followed by the rest of the packet */
memcpy(new_iov + 1, iov, (sizeof(struct kvec) * n_vec));
@@ -823,7 +834,51 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses,
rqst.rq_nvec = n_vec + 1;
rc = cifs_send_recv(xid, ses, &rqst, resp_buf_type, flags, resp_iov);
- kfree(new_iov);
+ if (n_vec + 1 > CIFS_MAX_IOV_SIZE)
+ kfree(new_iov);
+ return rc;
+}
+
+/* Like SendReceive2 but iov[0] does not contain an rfc1002 header */
+int
+smb2_send_recv(const unsigned int xid, struct cifs_ses *ses,
+ struct kvec *iov, int n_vec, int *resp_buf_type /* ret */,
+ const int flags, struct kvec *resp_iov)
+{
+ struct smb_rqst rqst;
+ struct kvec s_iov[CIFS_MAX_IOV_SIZE], *new_iov;
+ int rc;
+ int i;
+ __u32 count;
+ __be32 rfc1002_marker;
+
+ if (n_vec + 1 > CIFS_MAX_IOV_SIZE) {
+ new_iov = kmalloc(sizeof(struct kvec) * (n_vec + 1),
+ GFP_KERNEL);
+ if (!new_iov)
+ return -ENOMEM;
+ } else
+ new_iov = s_iov;
+
+ /* 1st iov is an RFC1002 Session Message length */
+ memcpy(new_iov + 1, iov, (sizeof(struct kvec) * n_vec));
+
+ count = 0;
+ for (i = 1; i < n_vec + 1; i++)
+ count += new_iov[i].iov_len;
+
+ rfc1002_marker = cpu_to_be32(count);
+
+ new_iov[0].iov_base = &rfc1002_marker;
+ new_iov[0].iov_len = 4;
+
+ memset(&rqst, 0, sizeof(struct smb_rqst));
+ rqst.rq_iov = new_iov;
+ rqst.rq_nvec = n_vec + 1;
+
+ rc = cifs_send_recv(xid, ses, &rqst, resp_buf_type, flags, resp_iov);
+ if (n_vec + 1 > CIFS_MAX_IOV_SIZE)
+ kfree(new_iov);
return rc;
}