diff options
Diffstat (limited to 'drivers/infiniband')
68 files changed, 12685 insertions, 4592 deletions
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index 3cc3ff0cccb1..32cdfb30e9b4 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -1,12 +1,23 @@ menu "InfiniBand support" config INFINIBAND + depends on PCI || BROKEN tristate "InfiniBand support" ---help--- Core support for InfiniBand (IB). Make sure to also select any protocols you wish to use as well as drivers for your InfiniBand hardware. +config INFINIBAND_USER_VERBS + tristate "InfiniBand userspace verbs support" + depends on INFINIBAND + ---help--- + Userspace InfiniBand verbs support. This is the kernel side + of userspace verbs, which allows userspace processes to + directly access InfiniBand hardware for fast-path + operations. You will also need libibverbs and a hardware + driver library from <http://www.openib.org>. + source "drivers/infiniband/hw/mthca/Kconfig" source "drivers/infiniband/ulp/ipoib/Kconfig" diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile index d2dbfb52c0a3..678a7e097f32 100644 --- a/drivers/infiniband/core/Makefile +++ b/drivers/infiniband/core/Makefile @@ -1,12 +1,18 @@ -EXTRA_CFLAGS += -Idrivers/infiniband/include - -obj-$(CONFIG_INFINIBAND) += ib_core.o ib_mad.o ib_sa.o ib_umad.o +obj-$(CONFIG_INFINIBAND) += ib_core.o ib_mad.o ib_sa.o \ + ib_cm.o ib_umad.o ib_ucm.o +obj-$(CONFIG_INFINIBAND_USER_VERBS) += ib_uverbs.o ib_core-y := packer.o ud_header.o verbs.o sysfs.o \ device.o fmr_pool.o cache.o -ib_mad-y := mad.o smi.o agent.o +ib_mad-y := mad.o smi.o agent.o mad_rmpp.o ib_sa-y := sa_query.o +ib_cm-y := cm.o + ib_umad-y := user_mad.o + +ib_ucm-y := ucm.o + +ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_mem.o diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c index 23d1957c4b29..5ac86f566dc0 100644 --- a/drivers/infiniband/core/agent.c +++ b/drivers/infiniband/core/agent.c @@ -1,9 +1,10 @@ /* - * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved. - * Copyright (c) 2004 Infinicon Corporation. All rights reserved. - * Copyright (c) 2004 Intel Corporation. All rights reserved. - * Copyright (c) 2004 Topspin Corporation. All rights reserved. - * Copyright (c) 2004 Voltaire Corporation. All rights reserved. + * Copyright (c) 2004, 2005 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2004, 2005 Infinicon Corporation. All rights reserved. + * Copyright (c) 2004, 2005 Intel Corporation. All rights reserved. + * Copyright (c) 2004, 2005 Topspin Corporation. All rights reserved. + * Copyright (c) 2004, 2005 Voltaire Corporation. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -40,7 +41,7 @@ #include <asm/bug.h> -#include <ib_smi.h> +#include <rdma/ib_smi.h> #include "smi.h" #include "agent_priv.h" @@ -134,7 +135,7 @@ static int agent_mad_send(struct ib_mad_agent *mad_agent, sizeof(mad_priv->mad), DMA_TO_DEVICE); gather_list.length = sizeof(mad_priv->mad); - gather_list.lkey = (*port_priv->mr).lkey; + gather_list.lkey = mad_agent->mr->lkey; send_wr.next = NULL; send_wr.opcode = IB_WR_SEND; @@ -156,10 +157,10 @@ static int agent_mad_send(struct ib_mad_agent *mad_agent, /* Should sgid be looked up ? */ ah_attr.grh.sgid_index = 0; ah_attr.grh.hop_limit = grh->hop_limit; - ah_attr.grh.flow_label = be32_to_cpup( - &grh->version_tclass_flow) & 0xfffff; - ah_attr.grh.traffic_class = (be32_to_cpup( - &grh->version_tclass_flow) >> 20) & 0xff; + ah_attr.grh.flow_label = be32_to_cpu( + grh->version_tclass_flow) & 0xfffff; + ah_attr.grh.traffic_class = (be32_to_cpu( + grh->version_tclass_flow) >> 20) & 0xff; memcpy(ah_attr.grh.dgid.raw, grh->sgid.raw, sizeof(ah_attr.grh.dgid)); @@ -322,22 +323,12 @@ int ib_agent_port_open(struct ib_device *device, int port_num) goto error3; } - port_priv->mr = ib_get_dma_mr(port_priv->smp_agent->qp->pd, - IB_ACCESS_LOCAL_WRITE); - if (IS_ERR(port_priv->mr)) { - printk(KERN_ERR SPFX "Couldn't get DMA MR\n"); - ret = PTR_ERR(port_priv->mr); - goto error4; - } - spin_lock_irqsave(&ib_agent_port_list_lock, flags); list_add_tail(&port_priv->port_list, &ib_agent_port_list); spin_unlock_irqrestore(&ib_agent_port_list_lock, flags); return 0; -error4: - ib_unregister_mad_agent(port_priv->perf_mgmt_agent); error3: ib_unregister_mad_agent(port_priv->smp_agent); error2: @@ -361,8 +352,6 @@ int ib_agent_port_close(struct ib_device *device, int port_num) list_del(&port_priv->port_list); spin_unlock_irqrestore(&ib_agent_port_list_lock, flags); - ib_dereg_mr(port_priv->mr); - ib_unregister_mad_agent(port_priv->perf_mgmt_agent); ib_unregister_mad_agent(port_priv->smp_agent); kfree(port_priv); diff --git a/drivers/infiniband/core/agent_priv.h b/drivers/infiniband/core/agent_priv.h index 17a0cce5813c..2ec6d7f1b7d0 100644 --- a/drivers/infiniband/core/agent_priv.h +++ b/drivers/infiniband/core/agent_priv.h @@ -1,9 +1,9 @@ /* - * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved. - * Copyright (c) 2004 Infinicon Corporation. All rights reserved. - * Copyright (c) 2004 Intel Corporation. All rights reserved. - * Copyright (c) 2004 Topspin Corporation. All rights reserved. - * Copyright (c) 2004 Voltaire Corporation. All rights reserved. + * Copyright (c) 2004, 2005 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2004, 2005 Infinicon Corporation. All rights reserved. + * Copyright (c) 2004, 2005 Intel Corporation. All rights reserved. + * Copyright (c) 2004, 2005 Topspin Corporation. All rights reserved. + * Copyright (c) 2004, 2005 Voltaire Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -33,7 +33,7 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * - * $Id: agent_priv.h 1389 2004-12-27 22:56:47Z roland $ + * $Id: agent_priv.h 1640 2005-01-24 22:39:02Z halr $ */ #ifndef __IB_AGENT_PRIV_H__ @@ -57,7 +57,6 @@ struct ib_agent_port_private { int port_num; struct ib_mad_agent *smp_agent; /* SM class */ struct ib_mad_agent *perf_mgmt_agent; /* PerfMgmt class */ - struct ib_mr *mr; }; #endif /* __IB_AGENT_PRIV_H__ */ diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 3042360c97e1..f014e639088c 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -1,5 +1,8 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Intel Corporation. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2005 Voltaire, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -32,12 +35,11 @@ * $Id: cache.c 1349 2004-12-16 21:09:43Z roland $ */ -#include <linux/version.h> #include <linux/module.h> #include <linux/errno.h> #include <linux/slab.h> -#include <ib_cache.h> +#include <rdma/ib_cache.h> #include "core_priv.h" diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c new file mode 100644 index 000000000000..4de93ba274a6 --- /dev/null +++ b/drivers/infiniband/core/cm.c @@ -0,0 +1,3327 @@ +/* + * Copyright (c) 2004, 2005 Intel Corporation. All rights reserved. + * Copyright (c) 2004 Topspin Corporation. All rights reserved. + * Copyright (c) 2004, 2005 Voltaire Corporation. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: cm.c 2821 2005-07-08 17:07:28Z sean.hefty $ + */ +#include <linux/dma-mapping.h> +#include <linux/err.h> +#include <linux/idr.h> +#include <linux/interrupt.h> +#include <linux/pci.h> +#include <linux/rbtree.h> +#include <linux/spinlock.h> +#include <linux/workqueue.h> + +#include <rdma/ib_cache.h> +#include <rdma/ib_cm.h> +#include "cm_msgs.h" + +MODULE_AUTHOR("Sean Hefty"); +MODULE_DESCRIPTION("InfiniBand CM"); +MODULE_LICENSE("Dual BSD/GPL"); + +static void cm_add_one(struct ib_device *device); +static void cm_remove_one(struct ib_device *device); + +static struct ib_client cm_client = { + .name = "cm", + .add = cm_add_one, + .remove = cm_remove_one +}; + +static struct ib_cm { + spinlock_t lock; + struct list_head device_list; + rwlock_t device_lock; + struct rb_root listen_service_table; + u64 listen_service_id; + /* struct rb_root peer_service_table; todo: fix peer to peer */ + struct rb_root remote_qp_table; + struct rb_root remote_id_table; + struct rb_root remote_sidr_table; + struct idr local_id_table; + struct workqueue_struct *wq; +} cm; + +struct cm_port { + struct cm_device *cm_dev; + struct ib_mad_agent *mad_agent; + u8 port_num; +}; + +struct cm_device { + struct list_head list; + struct ib_device *device; + __be64 ca_guid; + struct cm_port port[0]; +}; + +struct cm_av { + struct cm_port *port; + union ib_gid dgid; + struct ib_ah_attr ah_attr; + u16 pkey_index; + u8 packet_life_time; +}; + +struct cm_work { + struct work_struct work; + struct list_head list; + struct cm_port *port; + struct ib_mad_recv_wc *mad_recv_wc; /* Received MADs */ + __be32 local_id; /* Established / timewait */ + __be32 remote_id; + struct ib_cm_event cm_event; + struct ib_sa_path_rec path[0]; +}; + +struct cm_timewait_info { + struct cm_work work; /* Must be first. */ + struct rb_node remote_qp_node; + struct rb_node remote_id_node; + __be64 remote_ca_guid; + __be32 remote_qpn; + u8 inserted_remote_qp; + u8 inserted_remote_id; +}; + +struct cm_id_private { + struct ib_cm_id id; + + struct rb_node service_node; + struct rb_node sidr_id_node; + spinlock_t lock; + wait_queue_head_t wait; + atomic_t refcount; + + struct ib_mad_send_buf *msg; + struct cm_timewait_info *timewait_info; + /* todo: use alternate port on send failure */ + struct cm_av av; + struct cm_av alt_av; + + void *private_data; + __be64 tid; + __be32 local_qpn; + __be32 remote_qpn; + __be32 sq_psn; + __be32 rq_psn; + int timeout_ms; + enum ib_mtu path_mtu; + u8 private_data_len; + u8 max_cm_retries; + u8 peer_to_peer; + u8 responder_resources; + u8 initiator_depth; + u8 local_ack_timeout; + u8 retry_count; + u8 rnr_retry_count; + u8 service_timeout; + + struct list_head work_list; + atomic_t work_count; +}; + +static void cm_work_handler(void *data); + +static inline void cm_deref_id(struct cm_id_private *cm_id_priv) +{ + if (atomic_dec_and_test(&cm_id_priv->refcount)) + wake_up(&cm_id_priv->wait); +} + +static int cm_alloc_msg(struct cm_id_private *cm_id_priv, + struct ib_mad_send_buf **msg) +{ + struct ib_mad_agent *mad_agent; + struct ib_mad_send_buf *m; + struct ib_ah *ah; + + mad_agent = cm_id_priv->av.port->mad_agent; + ah = ib_create_ah(mad_agent->qp->pd, &cm_id_priv->av.ah_attr); + if (IS_ERR(ah)) + return PTR_ERR(ah); + + m = ib_create_send_mad(mad_agent, 1, cm_id_priv->av.pkey_index, + ah, 0, sizeof(struct ib_mad_hdr), + sizeof(struct ib_mad)-sizeof(struct ib_mad_hdr), + GFP_ATOMIC); + if (IS_ERR(m)) { + ib_destroy_ah(ah); + return PTR_ERR(m); + } + + /* Timeout set by caller if response is expected. */ + m->send_wr.wr.ud.retries = cm_id_priv->max_cm_retries; + + atomic_inc(&cm_id_priv->refcount); + m->context[0] = cm_id_priv; + *msg = m; + return 0; +} + +static int cm_alloc_response_msg(struct cm_port *port, + struct ib_mad_recv_wc *mad_recv_wc, + struct ib_mad_send_buf **msg) +{ + struct ib_mad_send_buf *m; + struct ib_ah *ah; + + ah = ib_create_ah_from_wc(port->mad_agent->qp->pd, mad_recv_wc->wc, + mad_recv_wc->recv_buf.grh, port->port_num); + if (IS_ERR(ah)) + return PTR_ERR(ah); + + m = ib_create_send_mad(port->mad_agent, 1, mad_recv_wc->wc->pkey_index, + ah, 0, sizeof(struct ib_mad_hdr), + sizeof(struct ib_mad)-sizeof(struct ib_mad_hdr), + GFP_ATOMIC); + if (IS_ERR(m)) { + ib_destroy_ah(ah); + return PTR_ERR(m); + } + *msg = m; + return 0; +} + +static void cm_free_msg(struct ib_mad_send_buf *msg) +{ + ib_destroy_ah(msg->send_wr.wr.ud.ah); + if (msg->context[0]) + cm_deref_id(msg->context[0]); + ib_free_send_mad(msg); +} + +static void * cm_copy_private_data(const void *private_data, + u8 private_data_len) +{ + void *data; + + if (!private_data || !private_data_len) + return NULL; + + data = kmalloc(private_data_len, GFP_KERNEL); + if (!data) + return ERR_PTR(-ENOMEM); + + memcpy(data, private_data, private_data_len); + return data; +} + +static void cm_set_private_data(struct cm_id_private *cm_id_priv, + void *private_data, u8 private_data_len) +{ + if (cm_id_priv->private_data && cm_id_priv->private_data_len) + kfree(cm_id_priv->private_data); + + cm_id_priv->private_data = private_data; + cm_id_priv->private_data_len = private_data_len; +} + +static void cm_set_ah_attr(struct ib_ah_attr *ah_attr, u8 port_num, + u16 dlid, u8 sl, u16 src_path_bits) +{ + memset(ah_attr, 0, sizeof ah_attr); + ah_attr->dlid = dlid; + ah_attr->sl = sl; + ah_attr->src_path_bits = src_path_bits; + ah_attr->port_num = port_num; +} + +static void cm_init_av_for_response(struct cm_port *port, + struct ib_wc *wc, struct cm_av *av) +{ + av->port = port; + av->pkey_index = wc->pkey_index; + cm_set_ah_attr(&av->ah_attr, port->port_num, wc->slid, + wc->sl, wc->dlid_path_bits); +} + +static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av) +{ + struct cm_device *cm_dev; + struct cm_port *port = NULL; + unsigned long flags; + int ret; + u8 p; + + read_lock_irqsave(&cm.device_lock, flags); + list_for_each_entry(cm_dev, &cm.device_list, list) { + if (!ib_find_cached_gid(cm_dev->device, &path->sgid, + &p, NULL)) { + port = &cm_dev->port[p-1]; + break; + } + } + read_unlock_irqrestore(&cm.device_lock, flags); + + if (!port) + return -EINVAL; + + ret = ib_find_cached_pkey(cm_dev->device, port->port_num, + be16_to_cpu(path->pkey), &av->pkey_index); + if (ret) + return ret; + + av->port = port; + cm_set_ah_attr(&av->ah_attr, av->port->port_num, + be16_to_cpu(path->dlid), path->sl, + be16_to_cpu(path->slid) & 0x7F); + av->packet_life_time = path->packet_life_time; + return 0; +} + +static int cm_alloc_id(struct cm_id_private *cm_id_priv) +{ + unsigned long flags; + int ret; + + do { + spin_lock_irqsave(&cm.lock, flags); + ret = idr_get_new_above(&cm.local_id_table, cm_id_priv, 1, + (__force int *) &cm_id_priv->id.local_id); + spin_unlock_irqrestore(&cm.lock, flags); + } while( (ret == -EAGAIN) && idr_pre_get(&cm.local_id_table, GFP_KERNEL) ); + return ret; +} + +static void cm_free_id(__be32 local_id) +{ + unsigned long flags; + + spin_lock_irqsave(&cm.lock, flags); + idr_remove(&cm.local_id_table, (__force int) local_id); + spin_unlock_irqrestore(&cm.lock, flags); +} + +static struct cm_id_private * cm_get_id(__be32 local_id, __be32 remote_id) +{ + struct cm_id_private *cm_id_priv; + + cm_id_priv = idr_find(&cm.local_id_table, (__force int) local_id); + if (cm_id_priv) { + if (cm_id_priv->id.remote_id == remote_id) + atomic_inc(&cm_id_priv->refcount); + else + cm_id_priv = NULL; + } + + return cm_id_priv; +} + +static struct cm_id_private * cm_acquire_id(__be32 local_id, __be32 remote_id) +{ + struct cm_id_private *cm_id_priv; + unsigned long flags; + + spin_lock_irqsave(&cm.lock, flags); + cm_id_priv = cm_get_id(local_id, remote_id); + spin_unlock_irqrestore(&cm.lock, flags); + + return cm_id_priv; +} + +static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv) +{ + struct rb_node **link = &cm.listen_service_table.rb_node; + struct rb_node *parent = NULL; + struct cm_id_private *cur_cm_id_priv; + __be64 service_id = cm_id_priv->id.service_id; + __be64 service_mask = cm_id_priv->id.service_mask; + + while (*link) { + parent = *link; + cur_cm_id_priv = rb_entry(parent, struct cm_id_private, + service_node); + if ((cur_cm_id_priv->id.service_mask & service_id) == + (service_mask & cur_cm_id_priv->id.service_id)) + return cm_id_priv; + if (service_id < cur_cm_id_priv->id.service_id) + link = &(*link)->rb_left; + else + link = &(*link)->rb_right; + } + rb_link_node(&cm_id_priv->service_node, parent, link); + rb_insert_color(&cm_id_priv->service_node, &cm.listen_service_table); + return NULL; +} + +static struct cm_id_private * cm_find_listen(__be64 service_id) +{ + struct rb_node *node = cm.listen_service_table.rb_node; + struct cm_id_private *cm_id_priv; + + while (node) { + cm_id_priv = rb_entry(node, struct cm_id_private, service_node); + if ((cm_id_priv->id.service_mask & service_id) == + (cm_id_priv->id.service_mask & cm_id_priv->id.service_id)) + return cm_id_priv; + if (service_id < cm_id_priv->id.service_id) + node = node->rb_left; + else + node = node->rb_right; + } + return NULL; +} + +static struct cm_timewait_info * cm_insert_remote_id(struct cm_timewait_info + *timewait_info) +{ + struct rb_node **link = &cm.remote_id_table.rb_node; + struct rb_node *parent = NULL; + struct cm_timewait_info *cur_timewait_info; + __be64 remote_ca_guid = timewait_info->remote_ca_guid; + __be32 remote_id = timewait_info->work.remote_id; + + while (*link) { + parent = *link; + cur_timewait_info = rb_entry(parent, struct cm_timewait_info, + remote_id_node); + if (remote_id < cur_timewait_info->work.remote_id) + link = &(*link)->rb_left; + else if (remote_id > cur_timewait_info->work.remote_id) + link = &(*link)->rb_right; + else if (remote_ca_guid < cur_timewait_info->remote_ca_guid) + link = &(*link)->rb_left; + else if (remote_ca_guid > cur_timewait_info->remote_ca_guid) + link = &(*link)->rb_right; + else + return cur_timewait_info; + } + timewait_info->inserted_remote_id = 1; + rb_link_node(&timewait_info->remote_id_node, parent, link); + rb_insert_color(&timewait_info->remote_id_node, &cm.remote_id_table); + return NULL; +} + +static struct cm_timewait_info * cm_find_remote_id(__be64 remote_ca_guid, + __be32 remote_id) +{ + struct rb_node *node = cm.remote_id_table.rb_node; + struct cm_timewait_info *timewait_info; + + while (node) { + timewait_info = rb_entry(node, struct cm_timewait_info, + remote_id_node); + if (remote_id < timewait_info->work.remote_id) + node = node->rb_left; + else if (remote_id > timewait_info->work.remote_id) + node = node->rb_right; + else if (remote_ca_guid < timewait_info->remote_ca_guid) + node = node->rb_left; + else if (remote_ca_guid > timewait_info->remote_ca_guid) + node = node->rb_right; + else + return timewait_info; + } + return NULL; +} + +static struct cm_timewait_info * cm_insert_remote_qpn(struct cm_timewait_info + *timewait_info) +{ + struct rb_node **link = &cm.remote_qp_table.rb_node; + struct rb_node *parent = NULL; + struct cm_timewait_info *cur_timewait_info; + __be64 remote_ca_guid = timewait_info->remote_ca_guid; + __be32 remote_qpn = timewait_info->remote_qpn; + + while (*link) { + parent = *link; + cur_timewait_info = rb_entry(parent, struct cm_timewait_info, + remote_qp_node); + if (remote_qpn < cur_timewait_info->remote_qpn) + link = &(*link)->rb_left; + else if (remote_qpn > cur_timewait_info->remote_qpn) + link = &(*link)->rb_right; + else if (remote_ca_guid < cur_timewait_info->remote_ca_guid) + link = &(*link)->rb_left; + else if (remote_ca_guid > cur_timewait_info->remote_ca_guid) + link = &(*link)->rb_right; + else + return cur_timewait_info; + } + timewait_info->inserted_remote_qp = 1; + rb_link_node(&timewait_info->remote_qp_node, parent, link); + rb_insert_color(&timewait_info->remote_qp_node, &cm.remote_qp_table); + return NULL; +} + +static struct cm_id_private * cm_insert_remote_sidr(struct cm_id_private + *cm_id_priv) +{ + struct rb_node **link = &cm.remote_sidr_table.rb_node; + struct rb_node *parent = NULL; + struct cm_id_private *cur_cm_id_priv; + union ib_gid *port_gid = &cm_id_priv->av.dgid; + __be32 remote_id = cm_id_priv->id.remote_id; + + while (*link) { + parent = *link; + cur_cm_id_priv = rb_entry(parent, struct cm_id_private, + sidr_id_node); + if (remote_id < cur_cm_id_priv->id.remote_id) + link = &(*link)->rb_left; + else if (remote_id > cur_cm_id_priv->id.remote_id) + link = &(*link)->rb_right; + else { + int cmp; + cmp = memcmp(port_gid, &cur_cm_id_priv->av.dgid, + sizeof *port_gid); + if (cmp < 0) + link = &(*link)->rb_left; + else if (cmp > 0) + link = &(*link)->rb_right; + else + return cur_cm_id_priv; + } + } + rb_link_node(&cm_id_priv->sidr_id_node, parent, link); + rb_insert_color(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table); + return NULL; +} + +static void cm_reject_sidr_req(struct cm_id_private *cm_id_priv, + enum ib_cm_sidr_status status) +{ + struct ib_cm_sidr_rep_param param; + + memset(¶m, 0, sizeof param); + param.status = status; + ib_send_cm_sidr_rep(&cm_id_priv->id, ¶m); +} + +struct ib_cm_id *ib_create_cm_id(ib_cm_handler cm_handler, + void *context) +{ + struct cm_id_private *cm_id_priv; + int ret; + + cm_id_priv = kmalloc(sizeof *cm_id_priv, GFP_KERNEL); + if (!cm_id_priv) + return ERR_PTR(-ENOMEM); + + memset(cm_id_priv, 0, sizeof *cm_id_priv); + cm_id_priv->id.state = IB_CM_IDLE; + cm_id_priv->id.cm_handler = cm_handler; + cm_id_priv->id.context = context; + ret = cm_alloc_id(cm_id_priv); + if (ret) + goto error; + + spin_lock_init(&cm_id_priv->lock); + init_waitqueue_head(&cm_id_priv->wait); + INIT_LIST_HEAD(&cm_id_priv->work_list); + atomic_set(&cm_id_priv->work_count, -1); + atomic_set(&cm_id_priv->refcount, 1); + return &cm_id_priv->id; + +error: + kfree(cm_id_priv); + return ERR_PTR(-ENOMEM); +} +EXPORT_SYMBOL(ib_create_cm_id); + +static struct cm_work * cm_dequeue_work(struct cm_id_private *cm_id_priv) +{ + struct cm_work *work; + + if (list_empty(&cm_id_priv->work_list)) + return NULL; + + work = list_entry(cm_id_priv->work_list.next, struct cm_work, list); + list_del(&work->list); + return work; +} + +static void cm_free_work(struct cm_work *work) +{ + if (work->mad_recv_wc) + ib_free_recv_mad(work->mad_recv_wc); + kfree(work); +} + +static inline int cm_convert_to_ms(int iba_time) +{ + /* approximate conversion to ms from 4.096us x 2^iba_time */ + return 1 << max(iba_time - 8, 0); +} + +static void cm_cleanup_timewait(struct cm_timewait_info *timewait_info) +{ + unsigned long flags; + + if (!timewait_info->inserted_remote_id && + !timewait_info->inserted_remote_qp) + return; + + spin_lock_irqsave(&cm.lock, flags); + if (timewait_info->inserted_remote_id) { + rb_erase(&timewait_info->remote_id_node, &cm.remote_id_table); + timewait_info->inserted_remote_id = 0; + } + + if (timewait_info->inserted_remote_qp) { + rb_erase(&timewait_info->remote_qp_node, &cm.remote_qp_table); + timewait_info->inserted_remote_qp = 0; + } + spin_unlock_irqrestore(&cm.lock, flags); +} + +static struct cm_timewait_info * cm_create_timewait_info(__be32 local_id) +{ + struct cm_timewait_info *timewait_info; + + timewait_info = kmalloc(sizeof *timewait_info, GFP_KERNEL); + if (!timewait_info) + return ERR_PTR(-ENOMEM); + memset(timewait_info, 0, sizeof *timewait_info); + + timewait_info->work.local_id = local_id; + INIT_WORK(&timewait_info->work.work, cm_work_handler, + &timewait_info->work); + timewait_info->work.cm_event.event = IB_CM_TIMEWAIT_EXIT; + return timewait_info; +} + +static void cm_enter_timewait(struct cm_id_private *cm_id_priv) +{ + int wait_time; + + /* + * The cm_id could be destroyed by the user before we exit timewait. + * To protect against this, we search for the cm_id after exiting + * timewait before notifying the user that we've exited timewait. + */ + cm_id_priv->id.state = IB_CM_TIMEWAIT; + wait_time = cm_convert_to_ms(cm_id_priv->local_ack_timeout); + queue_delayed_work(cm.wq, &cm_id_priv->timewait_info->work.work, + msecs_to_jiffies(wait_time)); + cm_id_priv->timewait_info = NULL; +} + +static void cm_reset_to_idle(struct cm_id_private *cm_id_priv) +{ + cm_id_priv->id.state = IB_CM_IDLE; + if (cm_id_priv->timewait_info) { + cm_cleanup_timewait(cm_id_priv->timewait_info); + kfree(cm_id_priv->timewait_info); + cm_id_priv->timewait_info = NULL; + } +} + +void ib_destroy_cm_id(struct ib_cm_id *cm_id) +{ + struct cm_id_private *cm_id_priv; + struct cm_work *work; + unsigned long flags; + + cm_id_priv = container_of(cm_id, struct cm_id_private, id); +retest: + spin_lock_irqsave(&cm_id_priv->lock, flags); + switch (cm_id->state) { + case IB_CM_LISTEN: + cm_id->state = IB_CM_IDLE; + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_lock_irqsave(&cm.lock, flags); + rb_erase(&cm_id_priv->service_node, &cm.listen_service_table); + spin_unlock_irqrestore(&cm.lock, flags); + break; + case IB_CM_SIDR_REQ_SENT: + cm_id->state = IB_CM_IDLE; + ib_cancel_mad(cm_id_priv->av.port->mad_agent, + (unsigned long) cm_id_priv->msg); + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + break; + case IB_CM_SIDR_REQ_RCVD: + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + cm_reject_sidr_req(cm_id_priv, IB_SIDR_REJECT); + break; + case IB_CM_REQ_SENT: + case IB_CM_MRA_REQ_RCVD: + case IB_CM_REP_SENT: + case IB_CM_MRA_REP_RCVD: + ib_cancel_mad(cm_id_priv->av.port->mad_agent, + (unsigned long) cm_id_priv->msg); + /* Fall through */ + case IB_CM_REQ_RCVD: + case IB_CM_MRA_REQ_SENT: + case IB_CM_REP_RCVD: + case IB_CM_MRA_REP_SENT: + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + ib_send_cm_rej(cm_id, IB_CM_REJ_TIMEOUT, + &cm_id_priv->av.port->cm_dev->ca_guid, + sizeof cm_id_priv->av.port->cm_dev->ca_guid, + NULL, 0); + break; + case IB_CM_ESTABLISHED: + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + ib_send_cm_dreq(cm_id, NULL, 0); + goto retest; + case IB_CM_DREQ_SENT: + ib_cancel_mad(cm_id_priv->av.port->mad_agent, + (unsigned long) cm_id_priv->msg); + cm_enter_timewait(cm_id_priv); + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + break; + case IB_CM_DREQ_RCVD: + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + ib_send_cm_drep(cm_id, NULL, 0); + break; + default: + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + break; + } + + cm_free_id(cm_id->local_id); + atomic_dec(&cm_id_priv->refcount); + wait_event(cm_id_priv->wait, !atomic_read(&cm_id_priv->refcount)); + while ((work = cm_dequeue_work(cm_id_priv)) != NULL) + cm_free_work(work); + if (cm_id_priv->private_data && cm_id_priv->private_data_len) + kfree(cm_id_priv->private_data); + kfree(cm_id_priv); +} +EXPORT_SYMBOL(ib_destroy_cm_id); + +int ib_cm_listen(struct ib_cm_id *cm_id, + __be64 service_id, + __be64 service_mask) +{ + struct cm_id_private *cm_id_priv, *cur_cm_id_priv; + unsigned long flags; + int ret = 0; + + service_mask = service_mask ? service_mask : + __constant_cpu_to_be64(~0ULL); + service_id &= service_mask; + if ((service_id & IB_SERVICE_ID_AGN_MASK) == IB_CM_ASSIGN_SERVICE_ID && + (service_id != IB_CM_ASSIGN_SERVICE_ID)) + return -EINVAL; + + cm_id_priv = container_of(cm_id, struct cm_id_private, id); + BUG_ON(cm_id->state != IB_CM_IDLE); + + cm_id->state = IB_CM_LISTEN; + + spin_lock_irqsave(&cm.lock, flags); + if (service_id == IB_CM_ASSIGN_SERVICE_ID) { + cm_id->service_id = cpu_to_be64(cm.listen_service_id++); + cm_id->service_mask = __constant_cpu_to_be64(~0ULL); + } else { + cm_id->service_id = service_id; + cm_id->service_mask = service_mask; + } + cur_cm_id_priv = cm_insert_listen(cm_id_priv); + spin_unlock_irqrestore(&cm.lock, flags); + + if (cur_cm_id_priv) { + cm_id->state = IB_CM_IDLE; + ret = -EBUSY; + } + return ret; +} +EXPORT_SYMBOL(ib_cm_listen); + +static __be64 cm_form_tid(struct cm_id_private *cm_id_priv, + enum cm_msg_sequence msg_seq) +{ + u64 hi_tid, low_tid; + + hi_tid = ((u64) cm_id_priv->av.port->mad_agent->hi_tid) << 32; + low_tid = (u64) ((__force u32)cm_id_priv->id.local_id | + (msg_seq << 30)); + return cpu_to_be64(hi_tid | low_tid); +} + +static void cm_format_mad_hdr(struct ib_mad_hdr *hdr, + __be16 attr_id, __be64 tid) +{ + hdr->base_version = IB_MGMT_BASE_VERSION; + hdr->mgmt_class = IB_MGMT_CLASS_CM; + hdr->class_version = IB_CM_CLASS_VERSION; + hdr->method = IB_MGMT_METHOD_SEND; + hdr->attr_id = attr_id; + hdr->tid = tid; +} + +static void cm_format_req(struct cm_req_msg *req_msg, + struct cm_id_private *cm_id_priv, + struct ib_cm_req_param *param) +{ + cm_format_mad_hdr(&req_msg->hdr, CM_REQ_ATTR_ID, + cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_REQ)); + + req_msg->local_comm_id = cm_id_priv->id.local_id; + req_msg->service_id = param->service_id; + req_msg->local_ca_guid = cm_id_priv->av.port->cm_dev->ca_guid; + cm_req_set_local_qpn(req_msg, cpu_to_be32(param->qp_num)); + cm_req_set_resp_res(req_msg, param->responder_resources); + cm_req_set_init_depth(req_msg, param->initiator_depth); + cm_req_set_remote_resp_timeout(req_msg, + param->remote_cm_response_timeout); + cm_req_set_qp_type(req_msg, param->qp_type); + cm_req_set_flow_ctrl(req_msg, param->flow_control); + cm_req_set_starting_psn(req_msg, cpu_to_be32(param->starting_psn)); + cm_req_set_local_resp_timeout(req_msg, + param->local_cm_response_timeout); + cm_req_set_retry_count(req_msg, param->retry_count); + req_msg->pkey = param->primary_path->pkey; + cm_req_set_path_mtu(req_msg, param->primary_path->mtu); + cm_req_set_rnr_retry_count(req_msg, param->rnr_retry_count); + cm_req_set_max_cm_retries(req_msg, param->max_cm_retries); + cm_req_set_srq(req_msg, param->srq); + + req_msg->primary_local_lid = param->primary_path->slid; + req_msg->primary_remote_lid = param->primary_path->dlid; + req_msg->primary_local_gid = param->primary_path->sgid; + req_msg->primary_remote_gid = param->primary_path->dgid; + cm_req_set_primary_flow_label(req_msg, param->primary_path->flow_label); + cm_req_set_primary_packet_rate(req_msg, param->primary_path->rate); + req_msg->primary_traffic_class = param->primary_path->traffic_class; + req_msg->primary_hop_limit = param->primary_path->hop_limit; + cm_req_set_primary_sl(req_msg, param->primary_path->sl); + cm_req_set_primary_subnet_local(req_msg, 1); /* local only... */ + cm_req_set_primary_local_ack_timeout(req_msg, + min(31, param->primary_path->packet_life_time + 1)); + + if (param->alternate_path) { + req_msg->alt_local_lid = param->alternate_path->slid; + req_msg->alt_remote_lid = param->alternate_path->dlid; + req_msg->alt_local_gid = param->alternate_path->sgid; + req_msg->alt_remote_gid = param->alternate_path->dgid; + cm_req_set_alt_flow_label(req_msg, + param->alternate_path->flow_label); + cm_req_set_alt_packet_rate(req_msg, param->alternate_path->rate); + req_msg->alt_traffic_class = param->alternate_path->traffic_class; + req_msg->alt_hop_limit = param->alternate_path->hop_limit; + cm_req_set_alt_sl(req_msg, param->alternate_path->sl); + cm_req_set_alt_subnet_local(req_msg, 1); /* local only... */ + cm_req_set_alt_local_ack_timeout(req_msg, + min(31, param->alternate_path->packet_life_time + 1)); + } + + if (param->private_data && param->private_data_len) + memcpy(req_msg->private_data, param->private_data, + param->private_data_len); +} + +static inline int cm_validate_req_param(struct ib_cm_req_param *param) +{ + /* peer-to-peer not supported */ + if (param->peer_to_peer) + return -EINVAL; + + if (!param->primary_path) + return -EINVAL; + + if (param->qp_type != IB_QPT_RC && param->qp_type != IB_QPT_UC) + return -EINVAL; + + if (param->private_data && + param->private_data_len > IB_CM_REQ_PRIVATE_DATA_SIZE) + return -EINVAL; + + if (param->alternate_path && + (param->alternate_path->pkey != param->primary_path->pkey || + param->alternate_path->mtu != param->primary_path->mtu)) + return -EINVAL; + + return 0; +} + +int ib_send_cm_req(struct ib_cm_id *cm_id, + struct ib_cm_req_param *param) +{ + struct cm_id_private *cm_id_priv; + struct ib_send_wr *bad_send_wr; + struct cm_req_msg *req_msg; + unsigned long flags; + int ret; + + ret = cm_validate_req_param(param); + if (ret) + return ret; + + /* Verify that we're not in timewait. */ + cm_id_priv = container_of(cm_id, struct cm_id_private, id); + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id->state != IB_CM_IDLE) { + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + ret = -EINVAL; + goto out; + } + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv-> + id.local_id); + if (IS_ERR(cm_id_priv->timewait_info)) + goto out; + + ret = cm_init_av_by_path(param->primary_path, &cm_id_priv->av); + if (ret) + goto error1; + if (param->alternate_path) { + ret = cm_init_av_by_path(param->alternate_path, + &cm_id_priv->alt_av); + if (ret) + goto error1; + } + cm_id->service_id = param->service_id; + cm_id->service_mask = __constant_cpu_to_be64(~0ULL); + cm_id_priv->timeout_ms = cm_convert_to_ms( + param->primary_path->packet_life_time) * 2 + + cm_convert_to_ms( + param->remote_cm_response_timeout); + cm_id_priv->max_cm_retries = param->max_cm_retries; + cm_id_priv->initiator_depth = param->initiator_depth; + cm_id_priv->responder_resources = param->responder_resources; + cm_id_priv->retry_count = param->retry_count; + cm_id_priv->path_mtu = param->primary_path->mtu; + + ret = cm_alloc_msg(cm_id_priv, &cm_id_priv->msg); + if (ret) + goto error1; + + req_msg = (struct cm_req_msg *) cm_id_priv->msg->mad; + cm_format_req(req_msg, cm_id_priv, param); + cm_id_priv->tid = req_msg->hdr.tid; + cm_id_priv->msg->send_wr.wr.ud.timeout_ms = cm_id_priv->timeout_ms; + cm_id_priv->msg->context[1] = (void *) (unsigned long) IB_CM_REQ_SENT; + + cm_id_priv->local_qpn = cm_req_get_local_qpn(req_msg); + cm_id_priv->rq_psn = cm_req_get_starting_psn(req_msg); + cm_id_priv->local_ack_timeout = + cm_req_get_primary_local_ack_timeout(req_msg); + + spin_lock_irqsave(&cm_id_priv->lock, flags); + ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, + &cm_id_priv->msg->send_wr, &bad_send_wr); + if (ret) { + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + goto error2; + } + BUG_ON(cm_id->state != IB_CM_IDLE); + cm_id->state = IB_CM_REQ_SENT; + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + return 0; + +error2: cm_free_msg(cm_id_priv->msg); +error1: kfree(cm_id_priv->timewait_info); +out: return ret; +} +EXPORT_SYMBOL(ib_send_cm_req); + +static int cm_issue_rej(struct cm_port *port, + struct ib_mad_recv_wc *mad_recv_wc, + enum ib_cm_rej_reason reason, + enum cm_msg_response msg_rejected, + void *ari, u8 ari_length) +{ + struct ib_mad_send_buf *msg = NULL; + struct ib_send_wr *bad_send_wr; + struct cm_rej_msg *rej_msg, *rcv_msg; + int ret; + + ret = cm_alloc_response_msg(port, mad_recv_wc, &msg); + if (ret) + return ret; + + /* We just need common CM header information. Cast to any message. */ + rcv_msg = (struct cm_rej_msg *) mad_recv_wc->recv_buf.mad; + rej_msg = (struct cm_rej_msg *) msg->mad; + + cm_format_mad_hdr(&rej_msg->hdr, CM_REJ_ATTR_ID, rcv_msg->hdr.tid); + rej_msg->remote_comm_id = rcv_msg->local_comm_id; + rej_msg->local_comm_id = rcv_msg->remote_comm_id; + cm_rej_set_msg_rejected(rej_msg, msg_rejected); + rej_msg->reason = cpu_to_be16(reason); + + if (ari && ari_length) { + cm_rej_set_reject_info_len(rej_msg, ari_length); + memcpy(rej_msg->ari, ari, ari_length); + } + + ret = ib_post_send_mad(port->mad_agent, &msg->send_wr, &bad_send_wr); + if (ret) + cm_free_msg(msg); + + return ret; +} + +static inline int cm_is_active_peer(__be64 local_ca_guid, __be64 remote_ca_guid, + __be32 local_qpn, __be32 remote_qpn) +{ + return (be64_to_cpu(local_ca_guid) > be64_to_cpu(remote_ca_guid) || + ((local_ca_guid == remote_ca_guid) && + (be32_to_cpu(local_qpn) > be32_to_cpu(remote_qpn)))); +} + +static inline void cm_format_paths_from_req(struct cm_req_msg *req_msg, + struct ib_sa_path_rec *primary_path, + struct ib_sa_path_rec *alt_path) +{ + memset(primary_path, 0, sizeof *primary_path); + primary_path->dgid = req_msg->primary_local_gid; + primary_path->sgid = req_msg->primary_remote_gid; + primary_path->dlid = req_msg->primary_local_lid; + primary_path->slid = req_msg->primary_remote_lid; + primary_path->flow_label = cm_req_get_primary_flow_label(req_msg); + primary_path->hop_limit = req_msg->primary_hop_limit; + primary_path->traffic_class = req_msg->primary_traffic_class; + primary_path->reversible = 1; + primary_path->pkey = req_msg->pkey; + primary_path->sl = cm_req_get_primary_sl(req_msg); + primary_path->mtu_selector = IB_SA_EQ; + primary_path->mtu = cm_req_get_path_mtu(req_msg); + primary_path->rate_selector = IB_SA_EQ; + primary_path->rate = cm_req_get_primary_packet_rate(req_msg); + primary_path->packet_life_time_selector = IB_SA_EQ; + primary_path->packet_life_time = + cm_req_get_primary_local_ack_timeout(req_msg); + primary_path->packet_life_time -= (primary_path->packet_life_time > 0); + + if (req_msg->alt_local_lid) { + memset(alt_path, 0, sizeof *alt_path); + alt_path->dgid = req_msg->alt_local_gid; + alt_path->sgid = req_msg->alt_remote_gid; + alt_path->dlid = req_msg->alt_local_lid; + alt_path->slid = req_msg->alt_remote_lid; + alt_path->flow_label = cm_req_get_alt_flow_label(req_msg); + alt_path->hop_limit = req_msg->alt_hop_limit; + alt_path->traffic_class = req_msg->alt_traffic_class; + alt_path->reversible = 1; + alt_path->pkey = req_msg->pkey; + alt_path->sl = cm_req_get_alt_sl(req_msg); + alt_path->mtu_selector = IB_SA_EQ; + alt_path->mtu = cm_req_get_path_mtu(req_msg); + alt_path->rate_selector = IB_SA_EQ; + alt_path->rate = cm_req_get_alt_packet_rate(req_msg); + alt_path->packet_life_time_selector = IB_SA_EQ; + alt_path->packet_life_time = + cm_req_get_alt_local_ack_timeout(req_msg); + alt_path->packet_life_time -= (alt_path->packet_life_time > 0); + } +} + +static void cm_format_req_event(struct cm_work *work, + struct cm_id_private *cm_id_priv, + struct ib_cm_id *listen_id) +{ + struct cm_req_msg *req_msg; + struct ib_cm_req_event_param *param; + + req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad; + param = &work->cm_event.param.req_rcvd; + param->listen_id = listen_id; + param->device = cm_id_priv->av.port->mad_agent->device; + param->port = cm_id_priv->av.port->port_num; + param->primary_path = &work->path[0]; + if (req_msg->alt_local_lid) + param->alternate_path = &work->path[1]; + else + param->alternate_path = NULL; + param->remote_ca_guid = req_msg->local_ca_guid; + param->remote_qkey = be32_to_cpu(req_msg->local_qkey); + param->remote_qpn = be32_to_cpu(cm_req_get_local_qpn(req_msg)); + param->qp_type = cm_req_get_qp_type(req_msg); + param->starting_psn = be32_to_cpu(cm_req_get_starting_psn(req_msg)); + param->responder_resources = cm_req_get_init_depth(req_msg); + param->initiator_depth = cm_req_get_resp_res(req_msg); + param->local_cm_response_timeout = + cm_req_get_remote_resp_timeout(req_msg); + param->flow_control = cm_req_get_flow_ctrl(req_msg); + param->remote_cm_response_timeout = + cm_req_get_local_resp_timeout(req_msg); + param->retry_count = cm_req_get_retry_count(req_msg); + param->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg); + param->srq = cm_req_get_srq(req_msg); + work->cm_event.private_data = &req_msg->private_data; +} + +static void cm_process_work(struct cm_id_private *cm_id_priv, + struct cm_work *work) +{ + unsigned long flags; + int ret; + + /* We will typically only have the current event to report. */ + ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &work->cm_event); + cm_free_work(work); + + while (!ret && !atomic_add_negative(-1, &cm_id_priv->work_count)) { + spin_lock_irqsave(&cm_id_priv->lock, flags); + work = cm_dequeue_work(cm_id_priv); + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + BUG_ON(!work); + ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, + &work->cm_event); + cm_free_work(work); + } + cm_deref_id(cm_id_priv); + if (ret) + ib_destroy_cm_id(&cm_id_priv->id); +} + +static void cm_format_mra(struct cm_mra_msg *mra_msg, + struct cm_id_private *cm_id_priv, + enum cm_msg_response msg_mraed, u8 service_timeout, + const void *private_data, u8 private_data_len) +{ + cm_format_mad_hdr(&mra_msg->hdr, CM_MRA_ATTR_ID, cm_id_priv->tid); + cm_mra_set_msg_mraed(mra_msg, msg_mraed); + mra_msg->local_comm_id = cm_id_priv->id.local_id; + mra_msg->remote_comm_id = cm_id_priv->id.remote_id; + cm_mra_set_service_timeout(mra_msg, service_timeout); + + if (private_data && private_data_len) + memcpy(mra_msg->private_data, private_data, private_data_len); +} + +static void cm_format_rej(struct cm_rej_msg *rej_msg, + struct cm_id_private *cm_id_priv, + enum ib_cm_rej_reason reason, + void *ari, + u8 ari_length, + const void *private_data, + u8 private_data_len) +{ + cm_format_mad_hdr(&rej_msg->hdr, CM_REJ_ATTR_ID, cm_id_priv->tid); + rej_msg->remote_comm_id = cm_id_priv->id.remote_id; + + switch(cm_id_priv->id.state) { + case IB_CM_REQ_RCVD: + rej_msg->local_comm_id = 0; + cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REQ); + break; + case IB_CM_MRA_REQ_SENT: + rej_msg->local_comm_id = cm_id_priv->id.local_id; + cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REQ); + break; + case IB_CM_REP_RCVD: + case IB_CM_MRA_REP_SENT: + rej_msg->local_comm_id = cm_id_priv->id.local_id; + cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REP); + break; + default: + rej_msg->local_comm_id = cm_id_priv->id.local_id; + cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_OTHER); + break; + } + + rej_msg->reason = cpu_to_be16(reason); + if (ari && ari_length) { + cm_rej_set_reject_info_len(rej_msg, ari_length); + memcpy(rej_msg->ari, ari, ari_length); + } + + if (private_data && private_data_len) + memcpy(rej_msg->private_data, private_data, private_data_len); +} + +static void cm_dup_req_handler(struct cm_work *work, + struct cm_id_private *cm_id_priv) +{ + struct ib_mad_send_buf *msg = NULL; + struct ib_send_wr *bad_send_wr; + unsigned long flags; + int ret; + + /* Quick state check to discard duplicate REQs. */ + if (cm_id_priv->id.state == IB_CM_REQ_RCVD) + return; + + ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg); + if (ret) + return; + + spin_lock_irqsave(&cm_id_priv->lock, flags); + switch (cm_id_priv->id.state) { + case IB_CM_MRA_REQ_SENT: + cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv, + CM_MSG_RESPONSE_REQ, cm_id_priv->service_timeout, + cm_id_priv->private_data, + cm_id_priv->private_data_len); + break; + case IB_CM_TIMEWAIT: + cm_format_rej((struct cm_rej_msg *) msg->mad, cm_id_priv, + IB_CM_REJ_STALE_CONN, NULL, 0, NULL, 0); + break; + default: + goto unlock; + } + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, &msg->send_wr, + &bad_send_wr); + if (ret) + goto free; + return; + +unlock: spin_unlock_irqrestore(&cm_id_priv->lock, flags); +free: cm_free_msg(msg); +} + +static struct cm_id_private * cm_match_req(struct cm_work *work, + struct cm_id_private *cm_id_priv) +{ + struct cm_id_private *listen_cm_id_priv, *cur_cm_id_priv; + struct cm_timewait_info *timewait_info; + struct cm_req_msg *req_msg; + unsigned long flags; + + req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad; + + /* Check for duplicate REQ and stale connections. */ + spin_lock_irqsave(&cm.lock, flags); + timewait_info = cm_insert_remote_id(cm_id_priv->timewait_info); + if (!timewait_info) + timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info); + + if (timewait_info) { + cur_cm_id_priv = cm_get_id(timewait_info->work.local_id, + timewait_info->work.remote_id); + spin_unlock_irqrestore(&cm.lock, flags); + if (cur_cm_id_priv) { + cm_dup_req_handler(work, cur_cm_id_priv); + cm_deref_id(cur_cm_id_priv); + } else + cm_issue_rej(work->port, work->mad_recv_wc, + IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REQ, + NULL, 0); + goto error; + } + + /* Find matching listen request. */ + listen_cm_id_priv = cm_find_listen(req_msg->service_id); + if (!listen_cm_id_priv) { + spin_unlock_irqrestore(&cm.lock, flags); + cm_issue_rej(work->port, work->mad_recv_wc, + IB_CM_REJ_INVALID_SERVICE_ID, CM_MSG_RESPONSE_REQ, + NULL, 0); + goto error; + } + atomic_inc(&listen_cm_id_priv->refcount); + atomic_inc(&cm_id_priv->refcount); + cm_id_priv->id.state = IB_CM_REQ_RCVD; + atomic_inc(&cm_id_priv->work_count); + spin_unlock_irqrestore(&cm.lock, flags); + return listen_cm_id_priv; + +error: cm_cleanup_timewait(cm_id_priv->timewait_info); + return NULL; +} + +static int cm_req_handler(struct cm_work *work) +{ + struct ib_cm_id *cm_id; + struct cm_id_private *cm_id_priv, *listen_cm_id_priv; + struct cm_req_msg *req_msg; + int ret; + + req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad; + + cm_id = ib_create_cm_id(NULL, NULL); + if (IS_ERR(cm_id)) + return PTR_ERR(cm_id); + + cm_id_priv = container_of(cm_id, struct cm_id_private, id); + cm_id_priv->id.remote_id = req_msg->local_comm_id; + cm_init_av_for_response(work->port, work->mad_recv_wc->wc, + &cm_id_priv->av); + cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv-> + id.local_id); + if (IS_ERR(cm_id_priv->timewait_info)) { + ret = PTR_ERR(cm_id_priv->timewait_info); + goto error1; + } + cm_id_priv->timewait_info->work.remote_id = req_msg->local_comm_id; + cm_id_priv->timewait_info->remote_ca_guid = req_msg->local_ca_guid; + cm_id_priv->timewait_info->remote_qpn = cm_req_get_local_qpn(req_msg); + + listen_cm_id_priv = cm_match_req(work, cm_id_priv); + if (!listen_cm_id_priv) { + ret = -EINVAL; + goto error2; + } + + cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler; + cm_id_priv->id.context = listen_cm_id_priv->id.context; + cm_id_priv->id.service_id = req_msg->service_id; + cm_id_priv->id.service_mask = __constant_cpu_to_be64(~0ULL); + + cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]); + ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av); + if (ret) + goto error3; + if (req_msg->alt_local_lid) { + ret = cm_init_av_by_path(&work->path[1], &cm_id_priv->alt_av); + if (ret) + goto error3; + } + cm_id_priv->tid = req_msg->hdr.tid; + cm_id_priv->timeout_ms = cm_convert_to_ms( + cm_req_get_local_resp_timeout(req_msg)); + cm_id_priv->max_cm_retries = cm_req_get_max_cm_retries(req_msg); + cm_id_priv->remote_qpn = cm_req_get_local_qpn(req_msg); + cm_id_priv->initiator_depth = cm_req_get_resp_res(req_msg); + cm_id_priv->responder_resources = cm_req_get_init_depth(req_msg); + cm_id_priv->path_mtu = cm_req_get_path_mtu(req_msg); + cm_id_priv->sq_psn = cm_req_get_starting_psn(req_msg); + cm_id_priv->local_ack_timeout = + cm_req_get_primary_local_ack_timeout(req_msg); + cm_id_priv->retry_count = cm_req_get_retry_count(req_msg); + cm_id_priv->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg); + + cm_format_req_event(work, cm_id_priv, &listen_cm_id_priv->id); + cm_process_work(cm_id_priv, work); + cm_deref_id(listen_cm_id_priv); + return 0; + +error3: atomic_dec(&cm_id_priv->refcount); + cm_deref_id(listen_cm_id_priv); + cm_cleanup_timewait(cm_id_priv->timewait_info); +error2: kfree(cm_id_priv->timewait_info); +error1: ib_destroy_cm_id(&cm_id_priv->id); + return ret; +} + +static void cm_format_rep(struct cm_rep_msg *rep_msg, + struct cm_id_private *cm_id_priv, + struct ib_cm_rep_param *param) +{ + cm_format_mad_hdr(&rep_msg->hdr, CM_REP_ATTR_ID, cm_id_priv->tid); + rep_msg->local_comm_id = cm_id_priv->id.local_id; + rep_msg->remote_comm_id = cm_id_priv->id.remote_id; + cm_rep_set_local_qpn(rep_msg, cpu_to_be32(param->qp_num)); + cm_rep_set_starting_psn(rep_msg, cpu_to_be32(param->starting_psn)); + rep_msg->resp_resources = param->responder_resources; + rep_msg->initiator_depth = param->initiator_depth; + cm_rep_set_target_ack_delay(rep_msg, param->target_ack_delay); + cm_rep_set_failover(rep_msg, param->failover_accepted); + cm_rep_set_flow_ctrl(rep_msg, param->flow_control); + cm_rep_set_rnr_retry_count(rep_msg, param->rnr_retry_count); + cm_rep_set_srq(rep_msg, param->srq); + rep_msg->local_ca_guid = cm_id_priv->av.port->cm_dev->ca_guid; + + if (param->private_data && param->private_data_len) + memcpy(rep_msg->private_data, param->private_data, + param->private_data_len); +} + +int ib_send_cm_rep(struct ib_cm_id *cm_id, + struct ib_cm_rep_param *param) +{ + struct cm_id_private *cm_id_priv; + struct ib_mad_send_buf *msg; + struct cm_rep_msg *rep_msg; + struct ib_send_wr *bad_send_wr; + unsigned long flags; + int ret; + + if (param->private_data && + param->private_data_len > IB_CM_REP_PRIVATE_DATA_SIZE) + return -EINVAL; + + cm_id_priv = container_of(cm_id, struct cm_id_private, id); + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id->state != IB_CM_REQ_RCVD && + cm_id->state != IB_CM_MRA_REQ_SENT) { + ret = -EINVAL; + goto out; + } + + ret = cm_alloc_msg(cm_id_priv, &msg); + if (ret) + goto out; + + rep_msg = (struct cm_rep_msg *) msg->mad; + cm_format_rep(rep_msg, cm_id_priv, param); + msg->send_wr.wr.ud.timeout_ms = cm_id_priv->timeout_ms; + msg->context[1] = (void *) (unsigned long) IB_CM_REP_SENT; + + ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, + &msg->send_wr, &bad_send_wr); + if (ret) { + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + cm_free_msg(msg); + return ret; + } + + cm_id->state = IB_CM_REP_SENT; + cm_id_priv->msg = msg; + cm_id_priv->initiator_depth = param->initiator_depth; + cm_id_priv->responder_resources = param->responder_resources; + cm_id_priv->rq_psn = cm_rep_get_starting_psn(rep_msg); + cm_id_priv->local_qpn = cm_rep_get_local_qpn(rep_msg); + +out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); + return ret; +} +EXPORT_SYMBOL(ib_send_cm_rep); + +static void cm_format_rtu(struct cm_rtu_msg *rtu_msg, + struct cm_id_private *cm_id_priv, + const void *private_data, + u8 private_data_len) +{ + cm_format_mad_hdr(&rtu_msg->hdr, CM_RTU_ATTR_ID, cm_id_priv->tid); + rtu_msg->local_comm_id = cm_id_priv->id.local_id; + rtu_msg->remote_comm_id = cm_id_priv->id.remote_id; + + if (private_data && private_data_len) + memcpy(rtu_msg->private_data, private_data, private_data_len); +} + +int ib_send_cm_rtu(struct ib_cm_id *cm_id, + const void *private_data, + u8 private_data_len) +{ + struct cm_id_private *cm_id_priv; + struct ib_mad_send_buf *msg; + struct ib_send_wr *bad_send_wr; + unsigned long flags; + void *data; + int ret; + + if (private_data && private_data_len > IB_CM_RTU_PRIVATE_DATA_SIZE) + return -EINVAL; + + data = cm_copy_private_data(private_data, private_data_len); + if (IS_ERR(data)) + return PTR_ERR(data); + + cm_id_priv = container_of(cm_id, struct cm_id_private, id); + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id->state != IB_CM_REP_RCVD && + cm_id->state != IB_CM_MRA_REP_SENT) { + ret = -EINVAL; + goto error; + } + + ret = cm_alloc_msg(cm_id_priv, &msg); + if (ret) + goto error; + + cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv, + private_data, private_data_len); + + ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, + &msg->send_wr, &bad_send_wr); + if (ret) { + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + cm_free_msg(msg); + kfree(data); + return ret; + } + + cm_id->state = IB_CM_ESTABLISHED; + cm_set_private_data(cm_id_priv, data, private_data_len); + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + return 0; + +error: spin_unlock_irqrestore(&cm_id_priv->lock, flags); + kfree(data); + return ret; +} +EXPORT_SYMBOL(ib_send_cm_rtu); + +static void cm_format_rep_event(struct cm_work *work) +{ + struct cm_rep_msg *rep_msg; + struct ib_cm_rep_event_param *param; + + rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad; + param = &work->cm_event.param.rep_rcvd; + param->remote_ca_guid = rep_msg->local_ca_guid; + param->remote_qkey = be32_to_cpu(rep_msg->local_qkey); + param->remote_qpn = be32_to_cpu(cm_rep_get_local_qpn(rep_msg)); + param->starting_psn = be32_to_cpu(cm_rep_get_starting_psn(rep_msg)); + param->responder_resources = rep_msg->initiator_depth; + param->initiator_depth = rep_msg->resp_resources; + param->target_ack_delay = cm_rep_get_target_ack_delay(rep_msg); + param->failover_accepted = cm_rep_get_failover(rep_msg); + param->flow_control = cm_rep_get_flow_ctrl(rep_msg); + param->rnr_retry_count = cm_rep_get_rnr_retry_count(rep_msg); + param->srq = cm_rep_get_srq(rep_msg); + work->cm_event.private_data = &rep_msg->private_data; +} + +static void cm_dup_rep_handler(struct cm_work *work) +{ + struct cm_id_private *cm_id_priv; + struct cm_rep_msg *rep_msg; + struct ib_mad_send_buf *msg = NULL; + struct ib_send_wr *bad_send_wr; + unsigned long flags; + int ret; + + rep_msg = (struct cm_rep_msg *) work->mad_recv_wc->recv_buf.mad; + cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id, + rep_msg->local_comm_id); + if (!cm_id_priv) + return; + + ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg); + if (ret) + goto deref; + + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id_priv->id.state == IB_CM_ESTABLISHED) + cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv, + cm_id_priv->private_data, + cm_id_priv->private_data_len); + else if (cm_id_priv->id.state == IB_CM_MRA_REP_SENT) + cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv, + CM_MSG_RESPONSE_REP, cm_id_priv->service_timeout, + cm_id_priv->private_data, + cm_id_priv->private_data_len); + else + goto unlock; + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, &msg->send_wr, + &bad_send_wr); + if (ret) + goto free; + goto deref; + +unlock: spin_unlock_irqrestore(&cm_id_priv->lock, flags); +free: cm_free_msg(msg); +deref: cm_deref_id(cm_id_priv); +} + +static int cm_rep_handler(struct cm_work *work) +{ + struct cm_id_private *cm_id_priv; + struct cm_rep_msg *rep_msg; + unsigned long flags; + int ret; + + rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad; + cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id, 0); + if (!cm_id_priv) { + cm_dup_rep_handler(work); + return -EINVAL; + } + + cm_id_priv->timewait_info->work.remote_id = rep_msg->local_comm_id; + cm_id_priv->timewait_info->remote_ca_guid = rep_msg->local_ca_guid; + cm_id_priv->timewait_info->remote_qpn = cm_rep_get_local_qpn(rep_msg); + + spin_lock_irqsave(&cm.lock, flags); + /* Check for duplicate REP. */ + if (cm_insert_remote_id(cm_id_priv->timewait_info)) { + spin_unlock_irqrestore(&cm.lock, flags); + ret = -EINVAL; + goto error; + } + /* Check for a stale connection. */ + if (cm_insert_remote_qpn(cm_id_priv->timewait_info)) { + spin_unlock_irqrestore(&cm.lock, flags); + cm_issue_rej(work->port, work->mad_recv_wc, + IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REP, + NULL, 0); + ret = -EINVAL; + goto error; + } + spin_unlock_irqrestore(&cm.lock, flags); + + cm_format_rep_event(work); + + spin_lock_irqsave(&cm_id_priv->lock, flags); + switch (cm_id_priv->id.state) { + case IB_CM_REQ_SENT: + case IB_CM_MRA_REQ_RCVD: + break; + default: + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + ret = -EINVAL; + goto error; + } + cm_id_priv->id.state = IB_CM_REP_RCVD; + cm_id_priv->id.remote_id = rep_msg->local_comm_id; + cm_id_priv->remote_qpn = cm_rep_get_local_qpn(rep_msg); + cm_id_priv->initiator_depth = rep_msg->resp_resources; + cm_id_priv->responder_resources = rep_msg->initiator_depth; + cm_id_priv->sq_psn = cm_rep_get_starting_psn(rep_msg); + cm_id_priv->rnr_retry_count = cm_rep_get_rnr_retry_count(rep_msg); + + /* todo: handle peer_to_peer */ + + ib_cancel_mad(cm_id_priv->av.port->mad_agent, + (unsigned long) cm_id_priv->msg); + ret = atomic_inc_and_test(&cm_id_priv->work_count); + if (!ret) + list_add_tail(&work->list, &cm_id_priv->work_list); + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + if (ret) + cm_process_work(cm_id_priv, work); + else + cm_deref_id(cm_id_priv); + return 0; + +error: cm_cleanup_timewait(cm_id_priv->timewait_info); + cm_deref_id(cm_id_priv); + return ret; +} + +static int cm_establish_handler(struct cm_work *work) +{ + struct cm_id_private *cm_id_priv; + unsigned long flags; + int ret; + + /* See comment in ib_cm_establish about lookup. */ + cm_id_priv = cm_acquire_id(work->local_id, work->remote_id); + if (!cm_id_priv) + return -EINVAL; + + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id_priv->id.state != IB_CM_ESTABLISHED) { + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + goto out; + } + + ib_cancel_mad(cm_id_priv->av.port->mad_agent, + (unsigned long) cm_id_priv->msg); + ret = atomic_inc_and_test(&cm_id_priv->work_count); + if (!ret) + list_add_tail(&work->list, &cm_id_priv->work_list); + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + if (ret) + cm_process_work(cm_id_priv, work); + else + cm_deref_id(cm_id_priv); + return 0; +out: + cm_deref_id(cm_id_priv); + return -EINVAL; +} + +static int cm_rtu_handler(struct cm_work *work) +{ + struct cm_id_private *cm_id_priv; + struct cm_rtu_msg *rtu_msg; + unsigned long flags; + int ret; + + rtu_msg = (struct cm_rtu_msg *)work->mad_recv_wc->recv_buf.mad; + cm_id_priv = cm_acquire_id(rtu_msg->remote_comm_id, + rtu_msg->local_comm_id); + if (!cm_id_priv) + return -EINVAL; + + work->cm_event.private_data = &rtu_msg->private_data; + + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id_priv->id.state != IB_CM_REP_SENT && + cm_id_priv->id.state != IB_CM_MRA_REP_RCVD) { + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + goto out; + } + cm_id_priv->id.state = IB_CM_ESTABLISHED; + + ib_cancel_mad(cm_id_priv->av.port->mad_agent, + (unsigned long) cm_id_priv->msg); + ret = atomic_inc_and_test(&cm_id_priv->work_count); + if (!ret) + list_add_tail(&work->list, &cm_id_priv->work_list); + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + if (ret) + cm_process_work(cm_id_priv, work); + else + cm_deref_id(cm_id_priv); + return 0; +out: + cm_deref_id(cm_id_priv); + return -EINVAL; +} + +static void cm_format_dreq(struct cm_dreq_msg *dreq_msg, + struct cm_id_private *cm_id_priv, + const void *private_data, + u8 private_data_len) +{ + cm_format_mad_hdr(&dreq_msg->hdr, CM_DREQ_ATTR_ID, + cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_DREQ)); + dreq_msg->local_comm_id = cm_id_priv->id.local_id; + dreq_msg->remote_comm_id = cm_id_priv->id.remote_id; + cm_dreq_set_remote_qpn(dreq_msg, cm_id_priv->remote_qpn); + + if (private_data && private_data_len) + memcpy(dreq_msg->private_data, private_data, private_data_len); +} + +int ib_send_cm_dreq(struct ib_cm_id *cm_id, + const void *private_data, + u8 private_data_len) +{ + struct cm_id_private *cm_id_priv; + struct ib_mad_send_buf *msg; + struct ib_send_wr *bad_send_wr; + unsigned long flags; + int ret; + + if (private_data && private_data_len > IB_CM_DREQ_PRIVATE_DATA_SIZE) + return -EINVAL; + + cm_id_priv = container_of(cm_id, struct cm_id_private, id); + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id->state != IB_CM_ESTABLISHED) { + ret = -EINVAL; + goto out; + } + + ret = cm_alloc_msg(cm_id_priv, &msg); + if (ret) { + cm_enter_timewait(cm_id_priv); + goto out; + } + + cm_format_dreq((struct cm_dreq_msg *) msg->mad, cm_id_priv, + private_data, private_data_len); + msg->send_wr.wr.ud.timeout_ms = cm_id_priv->timeout_ms; + msg->context[1] = (void *) (unsigned long) IB_CM_DREQ_SENT; + + ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, + &msg->send_wr, &bad_send_wr); + if (ret) { + cm_enter_timewait(cm_id_priv); + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + cm_free_msg(msg); + return ret; + } + + cm_id->state = IB_CM_DREQ_SENT; + cm_id_priv->msg = msg; +out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); + return ret; +} +EXPORT_SYMBOL(ib_send_cm_dreq); + +static void cm_format_drep(struct cm_drep_msg *drep_msg, + struct cm_id_private *cm_id_priv, + const void *private_data, + u8 private_data_len) +{ + cm_format_mad_hdr(&drep_msg->hdr, CM_DREP_ATTR_ID, cm_id_priv->tid); + drep_msg->local_comm_id = cm_id_priv->id.local_id; + drep_msg->remote_comm_id = cm_id_priv->id.remote_id; + + if (private_data && private_data_len) + memcpy(drep_msg->private_data, private_data, private_data_len); +} + +int ib_send_cm_drep(struct ib_cm_id *cm_id, + const void *private_data, + u8 private_data_len) +{ + struct cm_id_private *cm_id_priv; + struct ib_mad_send_buf *msg; + struct ib_send_wr *bad_send_wr; + unsigned long flags; + void *data; + int ret; + + if (private_data && private_data_len > IB_CM_DREP_PRIVATE_DATA_SIZE) + return -EINVAL; + + data = cm_copy_private_data(private_data, private_data_len); + if (IS_ERR(data)) + return PTR_ERR(data); + + cm_id_priv = container_of(cm_id, struct cm_id_private, id); + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id->state != IB_CM_DREQ_RCVD) { + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + kfree(data); + return -EINVAL; + } + + cm_set_private_data(cm_id_priv, data, private_data_len); + cm_enter_timewait(cm_id_priv); + + ret = cm_alloc_msg(cm_id_priv, &msg); + if (ret) + goto out; + + cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv, + private_data, private_data_len); + + ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, &msg->send_wr, + &bad_send_wr); + if (ret) { + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + cm_free_msg(msg); + return ret; + } + +out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); + return ret; +} +EXPORT_SYMBOL(ib_send_cm_drep); + +static int cm_dreq_handler(struct cm_work *work) +{ + struct cm_id_private *cm_id_priv; + struct cm_dreq_msg *dreq_msg; + struct ib_mad_send_buf *msg = NULL; + struct ib_send_wr *bad_send_wr; + unsigned long flags; + int ret; + + dreq_msg = (struct cm_dreq_msg *)work->mad_recv_wc->recv_buf.mad; + cm_id_priv = cm_acquire_id(dreq_msg->remote_comm_id, + dreq_msg->local_comm_id); + if (!cm_id_priv) + return -EINVAL; + + work->cm_event.private_data = &dreq_msg->private_data; + + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id_priv->local_qpn != cm_dreq_get_remote_qpn(dreq_msg)) + goto unlock; + + switch (cm_id_priv->id.state) { + case IB_CM_REP_SENT: + case IB_CM_DREQ_SENT: + ib_cancel_mad(cm_id_priv->av.port->mad_agent, + (unsigned long) cm_id_priv->msg); + break; + case IB_CM_ESTABLISHED: + case IB_CM_MRA_REP_RCVD: + break; + case IB_CM_TIMEWAIT: + if (cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg)) + goto unlock; + + cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv, + cm_id_priv->private_data, + cm_id_priv->private_data_len); + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + if (ib_post_send_mad(cm_id_priv->av.port->mad_agent, + &msg->send_wr, &bad_send_wr)) + cm_free_msg(msg); + goto deref; + default: + goto unlock; + } + cm_id_priv->id.state = IB_CM_DREQ_RCVD; + cm_id_priv->tid = dreq_msg->hdr.tid; + ret = atomic_inc_and_test(&cm_id_priv->work_count); + if (!ret) + list_add_tail(&work->list, &cm_id_priv->work_list); + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + if (ret) + cm_process_work(cm_id_priv, work); + else + cm_deref_id(cm_id_priv); + return 0; + +unlock: spin_unlock_irqrestore(&cm_id_priv->lock, flags); +deref: cm_deref_id(cm_id_priv); + return -EINVAL; +} + +static int cm_drep_handler(struct cm_work *work) +{ + struct cm_id_private *cm_id_priv; + struct cm_drep_msg *drep_msg; + unsigned long flags; + int ret; + + drep_msg = (struct cm_drep_msg *)work->mad_recv_wc->recv_buf.mad; + cm_id_priv = cm_acquire_id(drep_msg->remote_comm_id, + drep_msg->local_comm_id); + if (!cm_id_priv) + return -EINVAL; + + work->cm_event.private_data = &drep_msg->private_data; + + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id_priv->id.state != IB_CM_DREQ_SENT && + cm_id_priv->id.state != IB_CM_DREQ_RCVD) { + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + goto out; + } + cm_enter_timewait(cm_id_priv); + + ib_cancel_mad(cm_id_priv->av.port->mad_agent, + (unsigned long) cm_id_priv->msg); + ret = atomic_inc_and_test(&cm_id_priv->work_count); + if (!ret) + list_add_tail(&work->list, &cm_id_priv->work_list); + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + if (ret) + cm_process_work(cm_id_priv, work); + else + cm_deref_id(cm_id_priv); + return 0; +out: + cm_deref_id(cm_id_priv); + return -EINVAL; +} + +int ib_send_cm_rej(struct ib_cm_id *cm_id, + enum ib_cm_rej_reason reason, + void *ari, + u8 ari_length, + const void *private_data, + u8 private_data_len) +{ + struct cm_id_private *cm_id_priv; + struct ib_mad_send_buf *msg; + struct ib_send_wr *bad_send_wr; + unsigned long flags; + int ret; + + if ((private_data && private_data_len > IB_CM_REJ_PRIVATE_DATA_SIZE) || + (ari && ari_length > IB_CM_REJ_ARI_LENGTH)) + return -EINVAL; + + cm_id_priv = container_of(cm_id, struct cm_id_private, id); + + spin_lock_irqsave(&cm_id_priv->lock, flags); + switch (cm_id->state) { + case IB_CM_REQ_SENT: + case IB_CM_MRA_REQ_RCVD: + case IB_CM_REQ_RCVD: + case IB_CM_MRA_REQ_SENT: + case IB_CM_REP_RCVD: + case IB_CM_MRA_REP_SENT: + ret = cm_alloc_msg(cm_id_priv, &msg); + if (!ret) + cm_format_rej((struct cm_rej_msg *) msg->mad, + cm_id_priv, reason, ari, ari_length, + private_data, private_data_len); + + cm_reset_to_idle(cm_id_priv); + break; + case IB_CM_REP_SENT: + case IB_CM_MRA_REP_RCVD: + ret = cm_alloc_msg(cm_id_priv, &msg); + if (!ret) + cm_format_rej((struct cm_rej_msg *) msg->mad, + cm_id_priv, reason, ari, ari_length, + private_data, private_data_len); + + cm_enter_timewait(cm_id_priv); + break; + default: + ret = -EINVAL; + goto out; + } + + if (ret) + goto out; + + ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, + &msg->send_wr, &bad_send_wr); + if (ret) + cm_free_msg(msg); + +out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); + return ret; +} +EXPORT_SYMBOL(ib_send_cm_rej); + +static void cm_format_rej_event(struct cm_work *work) +{ + struct cm_rej_msg *rej_msg; + struct ib_cm_rej_event_param *param; + + rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad; + param = &work->cm_event.param.rej_rcvd; + param->ari = rej_msg->ari; + param->ari_length = cm_rej_get_reject_info_len(rej_msg); + param->reason = __be16_to_cpu(rej_msg->reason); + work->cm_event.private_data = &rej_msg->private_data; +} + +static struct cm_id_private * cm_acquire_rejected_id(struct cm_rej_msg *rej_msg) +{ + struct cm_timewait_info *timewait_info; + struct cm_id_private *cm_id_priv; + unsigned long flags; + __be32 remote_id; + + remote_id = rej_msg->local_comm_id; + + if (__be16_to_cpu(rej_msg->reason) == IB_CM_REJ_TIMEOUT) { + spin_lock_irqsave(&cm.lock, flags); + timewait_info = cm_find_remote_id( *((__be64 *) rej_msg->ari), + remote_id); + if (!timewait_info) { + spin_unlock_irqrestore(&cm.lock, flags); + return NULL; + } + cm_id_priv = idr_find(&cm.local_id_table, + (__force int) timewait_info->work.local_id); + if (cm_id_priv) { + if (cm_id_priv->id.remote_id == remote_id) + atomic_inc(&cm_id_priv->refcount); + else + cm_id_priv = NULL; + } + spin_unlock_irqrestore(&cm.lock, flags); + } else if (cm_rej_get_msg_rejected(rej_msg) == CM_MSG_RESPONSE_REQ) + cm_id_priv = cm_acquire_id(rej_msg->remote_comm_id, 0); + else + cm_id_priv = cm_acquire_id(rej_msg->remote_comm_id, remote_id); + + return cm_id_priv; +} + +static int cm_rej_handler(struct cm_work *work) +{ + struct cm_id_private *cm_id_priv; + struct cm_rej_msg *rej_msg; + unsigned long flags; + int ret; + + rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad; + cm_id_priv = cm_acquire_rejected_id(rej_msg); + if (!cm_id_priv) + return -EINVAL; + + cm_format_rej_event(work); + + spin_lock_irqsave(&cm_id_priv->lock, flags); + switch (cm_id_priv->id.state) { + case IB_CM_REQ_SENT: + case IB_CM_MRA_REQ_RCVD: + case IB_CM_REP_SENT: + case IB_CM_MRA_REP_RCVD: + ib_cancel_mad(cm_id_priv->av.port->mad_agent, + (unsigned long) cm_id_priv->msg); + /* fall through */ + case IB_CM_REQ_RCVD: + case IB_CM_MRA_REQ_SENT: + if (__be16_to_cpu(rej_msg->reason) == IB_CM_REJ_STALE_CONN) + cm_enter_timewait(cm_id_priv); + else + cm_reset_to_idle(cm_id_priv); + break; + case IB_CM_DREQ_SENT: + ib_cancel_mad(cm_id_priv->av.port->mad_agent, + (unsigned long) cm_id_priv->msg); + /* fall through */ + case IB_CM_REP_RCVD: + case IB_CM_MRA_REP_SENT: + case IB_CM_ESTABLISHED: + cm_enter_timewait(cm_id_priv); + break; + default: + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + ret = -EINVAL; + goto out; + } + + ret = atomic_inc_and_test(&cm_id_priv->work_count); + if (!ret) + list_add_tail(&work->list, &cm_id_priv->work_list); + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + if (ret) + cm_process_work(cm_id_priv, work); + else + cm_deref_id(cm_id_priv); + return 0; +out: + cm_deref_id(cm_id_priv); + return -EINVAL; +} + +int ib_send_cm_mra(struct ib_cm_id *cm_id, + u8 service_timeout, + const void *private_data, + u8 private_data_len) +{ + struct cm_id_private *cm_id_priv; + struct ib_mad_send_buf *msg; + struct ib_send_wr *bad_send_wr; + void *data; + unsigned long flags; + int ret; + + if (private_data && private_data_len > IB_CM_MRA_PRIVATE_DATA_SIZE) + return -EINVAL; + + data = cm_copy_private_data(private_data, private_data_len); + if (IS_ERR(data)) + return PTR_ERR(data); + + cm_id_priv = container_of(cm_id, struct cm_id_private, id); + + spin_lock_irqsave(&cm_id_priv->lock, flags); + switch(cm_id_priv->id.state) { + case IB_CM_REQ_RCVD: + ret = cm_alloc_msg(cm_id_priv, &msg); + if (ret) + goto error1; + + cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv, + CM_MSG_RESPONSE_REQ, service_timeout, + private_data, private_data_len); + ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, + &msg->send_wr, &bad_send_wr); + if (ret) + goto error2; + cm_id->state = IB_CM_MRA_REQ_SENT; + break; + case IB_CM_REP_RCVD: + ret = cm_alloc_msg(cm_id_priv, &msg); + if (ret) + goto error1; + + cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv, + CM_MSG_RESPONSE_REP, service_timeout, + private_data, private_data_len); + ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, + &msg->send_wr, &bad_send_wr); + if (ret) + goto error2; + cm_id->state = IB_CM_MRA_REP_SENT; + break; + case IB_CM_ESTABLISHED: + ret = cm_alloc_msg(cm_id_priv, &msg); + if (ret) + goto error1; + + cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv, + CM_MSG_RESPONSE_OTHER, service_timeout, + private_data, private_data_len); + ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, + &msg->send_wr, &bad_send_wr); + if (ret) + goto error2; + cm_id->lap_state = IB_CM_MRA_LAP_SENT; + break; + default: + ret = -EINVAL; + goto error1; + } + cm_id_priv->service_timeout = service_timeout; + cm_set_private_data(cm_id_priv, data, private_data_len); + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + return 0; + +error1: spin_unlock_irqrestore(&cm_id_priv->lock, flags); + kfree(data); + return ret; + +error2: spin_unlock_irqrestore(&cm_id_priv->lock, flags); + kfree(data); + cm_free_msg(msg); + return ret; +} +EXPORT_SYMBOL(ib_send_cm_mra); + +static struct cm_id_private * cm_acquire_mraed_id(struct cm_mra_msg *mra_msg) +{ + switch (cm_mra_get_msg_mraed(mra_msg)) { + case CM_MSG_RESPONSE_REQ: + return cm_acquire_id(mra_msg->remote_comm_id, 0); + case CM_MSG_RESPONSE_REP: + case CM_MSG_RESPONSE_OTHER: + return cm_acquire_id(mra_msg->remote_comm_id, + mra_msg->local_comm_id); + default: + return NULL; + } +} + +static int cm_mra_handler(struct cm_work *work) +{ + struct cm_id_private *cm_id_priv; + struct cm_mra_msg *mra_msg; + unsigned long flags; + int timeout, ret; + + mra_msg = (struct cm_mra_msg *)work->mad_recv_wc->recv_buf.mad; + cm_id_priv = cm_acquire_mraed_id(mra_msg); + if (!cm_id_priv) + return -EINVAL; + + work->cm_event.private_data = &mra_msg->private_data; + work->cm_event.param.mra_rcvd.service_timeout = + cm_mra_get_service_timeout(mra_msg); + timeout = cm_convert_to_ms(cm_mra_get_service_timeout(mra_msg)) + + cm_convert_to_ms(cm_id_priv->av.packet_life_time); + + spin_lock_irqsave(&cm_id_priv->lock, flags); + switch (cm_id_priv->id.state) { + case IB_CM_REQ_SENT: + if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REQ || + ib_modify_mad(cm_id_priv->av.port->mad_agent, + (unsigned long) cm_id_priv->msg, timeout)) + goto out; + cm_id_priv->id.state = IB_CM_MRA_REQ_RCVD; + break; + case IB_CM_REP_SENT: + if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REP || + ib_modify_mad(cm_id_priv->av.port->mad_agent, + (unsigned long) cm_id_priv->msg, timeout)) + goto out; + cm_id_priv->id.state = IB_CM_MRA_REP_RCVD; + break; + case IB_CM_ESTABLISHED: + if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_OTHER || + cm_id_priv->id.lap_state != IB_CM_LAP_SENT || + ib_modify_mad(cm_id_priv->av.port->mad_agent, + (unsigned long) cm_id_priv->msg, timeout)) + goto out; + cm_id_priv->id.lap_state = IB_CM_MRA_LAP_RCVD; + break; + default: + goto out; + } + + cm_id_priv->msg->context[1] = (void *) (unsigned long) + cm_id_priv->id.state; + ret = atomic_inc_and_test(&cm_id_priv->work_count); + if (!ret) + list_add_tail(&work->list, &cm_id_priv->work_list); + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + if (ret) + cm_process_work(cm_id_priv, work); + else + cm_deref_id(cm_id_priv); + return 0; +out: + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + cm_deref_id(cm_id_priv); + return -EINVAL; +} + +static void cm_format_lap(struct cm_lap_msg *lap_msg, + struct cm_id_private *cm_id_priv, + struct ib_sa_path_rec *alternate_path, + const void *private_data, + u8 private_data_len) +{ + cm_format_mad_hdr(&lap_msg->hdr, CM_LAP_ATTR_ID, + cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_LAP)); + lap_msg->local_comm_id = cm_id_priv->id.local_id; + lap_msg->remote_comm_id = cm_id_priv->id.remote_id; + cm_lap_set_remote_qpn(lap_msg, cm_id_priv->remote_qpn); + /* todo: need remote CM response timeout */ + cm_lap_set_remote_resp_timeout(lap_msg, 0x1F); + lap_msg->alt_local_lid = alternate_path->slid; + lap_msg->alt_remote_lid = alternate_path->dlid; + lap_msg->alt_local_gid = alternate_path->sgid; + lap_msg->alt_remote_gid = alternate_path->dgid; + cm_lap_set_flow_label(lap_msg, alternate_path->flow_label); + cm_lap_set_traffic_class(lap_msg, alternate_path->traffic_class); + lap_msg->alt_hop_limit = alternate_path->hop_limit; + cm_lap_set_packet_rate(lap_msg, alternate_path->rate); + cm_lap_set_sl(lap_msg, alternate_path->sl); + cm_lap_set_subnet_local(lap_msg, 1); /* local only... */ + cm_lap_set_local_ack_timeout(lap_msg, + min(31, alternate_path->packet_life_time + 1)); + + if (private_data && private_data_len) + memcpy(lap_msg->private_data, private_data, private_data_len); +} + +int ib_send_cm_lap(struct ib_cm_id *cm_id, + struct ib_sa_path_rec *alternate_path, + const void *private_data, + u8 private_data_len) +{ + struct cm_id_private *cm_id_priv; + struct ib_mad_send_buf *msg; + struct ib_send_wr *bad_send_wr; + unsigned long flags; + int ret; + + if (private_data && private_data_len > IB_CM_LAP_PRIVATE_DATA_SIZE) + return -EINVAL; + + cm_id_priv = container_of(cm_id, struct cm_id_private, id); + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id->state != IB_CM_ESTABLISHED || + cm_id->lap_state != IB_CM_LAP_IDLE) { + ret = -EINVAL; + goto out; + } + + ret = cm_alloc_msg(cm_id_priv, &msg); + if (ret) + goto out; + + cm_format_lap((struct cm_lap_msg *) msg->mad, cm_id_priv, + alternate_path, private_data, private_data_len); + msg->send_wr.wr.ud.timeout_ms = cm_id_priv->timeout_ms; + msg->context[1] = (void *) (unsigned long) IB_CM_ESTABLISHED; + + ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, + &msg->send_wr, &bad_send_wr); + if (ret) { + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + cm_free_msg(msg); + return ret; + } + + cm_id->lap_state = IB_CM_LAP_SENT; + cm_id_priv->msg = msg; + +out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); + return ret; +} +EXPORT_SYMBOL(ib_send_cm_lap); + +static void cm_format_path_from_lap(struct ib_sa_path_rec *path, + struct cm_lap_msg *lap_msg) +{ + memset(path, 0, sizeof *path); + path->dgid = lap_msg->alt_local_gid; + path->sgid = lap_msg->alt_remote_gid; + path->dlid = lap_msg->alt_local_lid; + path->slid = lap_msg->alt_remote_lid; + path->flow_label = cm_lap_get_flow_label(lap_msg); + path->hop_limit = lap_msg->alt_hop_limit; + path->traffic_class = cm_lap_get_traffic_class(lap_msg); + path->reversible = 1; + /* pkey is same as in REQ */ + path->sl = cm_lap_get_sl(lap_msg); + path->mtu_selector = IB_SA_EQ; + /* mtu is same as in REQ */ + path->rate_selector = IB_SA_EQ; + path->rate = cm_lap_get_packet_rate(lap_msg); + path->packet_life_time_selector = IB_SA_EQ; + path->packet_life_time = cm_lap_get_local_ack_timeout(lap_msg); + path->packet_life_time -= (path->packet_life_time > 0); +} + +static int cm_lap_handler(struct cm_work *work) +{ + struct cm_id_private *cm_id_priv; + struct cm_lap_msg *lap_msg; + struct ib_cm_lap_event_param *param; + struct ib_mad_send_buf *msg = NULL; + struct ib_send_wr *bad_send_wr; + unsigned long flags; + int ret; + + /* todo: verify LAP request and send reject APR if invalid. */ + lap_msg = (struct cm_lap_msg *)work->mad_recv_wc->recv_buf.mad; + cm_id_priv = cm_acquire_id(lap_msg->remote_comm_id, + lap_msg->local_comm_id); + if (!cm_id_priv) + return -EINVAL; + + param = &work->cm_event.param.lap_rcvd; + param->alternate_path = &work->path[0]; + cm_format_path_from_lap(param->alternate_path, lap_msg); + work->cm_event.private_data = &lap_msg->private_data; + + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id_priv->id.state != IB_CM_ESTABLISHED) + goto unlock; + + switch (cm_id_priv->id.lap_state) { + case IB_CM_LAP_IDLE: + break; + case IB_CM_MRA_LAP_SENT: + if (cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg)) + goto unlock; + + cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv, + CM_MSG_RESPONSE_OTHER, + cm_id_priv->service_timeout, + cm_id_priv->private_data, + cm_id_priv->private_data_len); + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + if (ib_post_send_mad(cm_id_priv->av.port->mad_agent, + &msg->send_wr, &bad_send_wr)) + cm_free_msg(msg); + goto deref; + default: + goto unlock; + } + + cm_id_priv->id.lap_state = IB_CM_LAP_RCVD; + cm_id_priv->tid = lap_msg->hdr.tid; + ret = atomic_inc_and_test(&cm_id_priv->work_count); + if (!ret) + list_add_tail(&work->list, &cm_id_priv->work_list); + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + if (ret) + cm_process_work(cm_id_priv, work); + else + cm_deref_id(cm_id_priv); + return 0; + +unlock: spin_unlock_irqrestore(&cm_id_priv->lock, flags); +deref: cm_deref_id(cm_id_priv); + return -EINVAL; +} + +static void cm_format_apr(struct cm_apr_msg *apr_msg, + struct cm_id_private *cm_id_priv, + enum ib_cm_apr_status status, + void *info, + u8 info_length, + const void *private_data, + u8 private_data_len) +{ + cm_format_mad_hdr(&apr_msg->hdr, CM_APR_ATTR_ID, cm_id_priv->tid); + apr_msg->local_comm_id = cm_id_priv->id.local_id; + apr_msg->remote_comm_id = cm_id_priv->id.remote_id; + apr_msg->ap_status = (u8) status; + + if (info && info_length) { + apr_msg->info_length = info_length; + memcpy(apr_msg->info, info, info_length); + } + + if (private_data && private_data_len) + memcpy(apr_msg->private_data, private_data, private_data_len); +} + +int ib_send_cm_apr(struct ib_cm_id *cm_id, + enum ib_cm_apr_status status, + void *info, + u8 info_length, + const void *private_data, + u8 private_data_len) +{ + struct cm_id_private *cm_id_priv; + struct ib_mad_send_buf *msg; + struct ib_send_wr *bad_send_wr; + unsigned long flags; + int ret; + + if ((private_data && private_data_len > IB_CM_APR_PRIVATE_DATA_SIZE) || + (info && info_length > IB_CM_APR_INFO_LENGTH)) + return -EINVAL; + + cm_id_priv = container_of(cm_id, struct cm_id_private, id); + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id->state != IB_CM_ESTABLISHED || + (cm_id->lap_state != IB_CM_LAP_RCVD && + cm_id->lap_state != IB_CM_MRA_LAP_SENT)) { + ret = -EINVAL; + goto out; + } + + ret = cm_alloc_msg(cm_id_priv, &msg); + if (ret) + goto out; + + cm_format_apr((struct cm_apr_msg *) msg->mad, cm_id_priv, status, + info, info_length, private_data, private_data_len); + ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, + &msg->send_wr, &bad_send_wr); + if (ret) { + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + cm_free_msg(msg); + return ret; + } + + cm_id->lap_state = IB_CM_LAP_IDLE; +out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); + return ret; +} +EXPORT_SYMBOL(ib_send_cm_apr); + +static int cm_apr_handler(struct cm_work *work) +{ + struct cm_id_private *cm_id_priv; + struct cm_apr_msg *apr_msg; + unsigned long flags; + int ret; + + apr_msg = (struct cm_apr_msg *)work->mad_recv_wc->recv_buf.mad; + cm_id_priv = cm_acquire_id(apr_msg->remote_comm_id, + apr_msg->local_comm_id); + if (!cm_id_priv) + return -EINVAL; /* Unmatched reply. */ + + work->cm_event.param.apr_rcvd.ap_status = apr_msg->ap_status; + work->cm_event.param.apr_rcvd.apr_info = &apr_msg->info; + work->cm_event.param.apr_rcvd.info_len = apr_msg->info_length; + work->cm_event.private_data = &apr_msg->private_data; + + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id_priv->id.state != IB_CM_ESTABLISHED || + (cm_id_priv->id.lap_state != IB_CM_LAP_SENT && + cm_id_priv->id.lap_state != IB_CM_MRA_LAP_RCVD)) { + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + goto out; + } + cm_id_priv->id.lap_state = IB_CM_LAP_IDLE; + ib_cancel_mad(cm_id_priv->av.port->mad_agent, + (unsigned long) cm_id_priv->msg); + cm_id_priv->msg = NULL; + + ret = atomic_inc_and_test(&cm_id_priv->work_count); + if (!ret) + list_add_tail(&work->list, &cm_id_priv->work_list); + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + if (ret) + cm_process_work(cm_id_priv, work); + else + cm_deref_id(cm_id_priv); + return 0; +out: + cm_deref_id(cm_id_priv); + return -EINVAL; +} + +static int cm_timewait_handler(struct cm_work *work) +{ + struct cm_timewait_info *timewait_info; + struct cm_id_private *cm_id_priv; + unsigned long flags; + int ret; + + timewait_info = (struct cm_timewait_info *)work; + cm_cleanup_timewait(timewait_info); + + cm_id_priv = cm_acquire_id(timewait_info->work.local_id, + timewait_info->work.remote_id); + if (!cm_id_priv) + return -EINVAL; + + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id_priv->id.state != IB_CM_TIMEWAIT || + cm_id_priv->remote_qpn != timewait_info->remote_qpn) { + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + goto out; + } + cm_id_priv->id.state = IB_CM_IDLE; + ret = atomic_inc_and_test(&cm_id_priv->work_count); + if (!ret) + list_add_tail(&work->list, &cm_id_priv->work_list); + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + if (ret) + cm_process_work(cm_id_priv, work); + else + cm_deref_id(cm_id_priv); + return 0; +out: + cm_deref_id(cm_id_priv); + return -EINVAL; +} + +static void cm_format_sidr_req(struct cm_sidr_req_msg *sidr_req_msg, + struct cm_id_private *cm_id_priv, + struct ib_cm_sidr_req_param *param) +{ + cm_format_mad_hdr(&sidr_req_msg->hdr, CM_SIDR_REQ_ATTR_ID, + cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_SIDR)); + sidr_req_msg->request_id = cm_id_priv->id.local_id; + sidr_req_msg->pkey = cpu_to_be16(param->pkey); + sidr_req_msg->service_id = param->service_id; + + if (param->private_data && param->private_data_len) + memcpy(sidr_req_msg->private_data, param->private_data, + param->private_data_len); +} + +int ib_send_cm_sidr_req(struct ib_cm_id *cm_id, + struct ib_cm_sidr_req_param *param) +{ + struct cm_id_private *cm_id_priv; + struct ib_mad_send_buf *msg; + struct ib_send_wr *bad_send_wr; + unsigned long flags; + int ret; + + if (!param->path || (param->private_data && + param->private_data_len > IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE)) + return -EINVAL; + + cm_id_priv = container_of(cm_id, struct cm_id_private, id); + ret = cm_init_av_by_path(param->path, &cm_id_priv->av); + if (ret) + goto out; + + cm_id->service_id = param->service_id; + cm_id->service_mask = __constant_cpu_to_be64(~0ULL); + cm_id_priv->timeout_ms = param->timeout_ms; + cm_id_priv->max_cm_retries = param->max_cm_retries; + ret = cm_alloc_msg(cm_id_priv, &msg); + if (ret) + goto out; + + cm_format_sidr_req((struct cm_sidr_req_msg *) msg->mad, cm_id_priv, + param); + msg->send_wr.wr.ud.timeout_ms = cm_id_priv->timeout_ms; + msg->context[1] = (void *) (unsigned long) IB_CM_SIDR_REQ_SENT; + + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id->state == IB_CM_IDLE) + ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, + &msg->send_wr, &bad_send_wr); + else + ret = -EINVAL; + + if (ret) { + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + cm_free_msg(msg); + goto out; + } + cm_id->state = IB_CM_SIDR_REQ_SENT; + cm_id_priv->msg = msg; + spin_unlock_irqrestore(&cm_id_priv->lock, flags); +out: + return ret; +} +EXPORT_SYMBOL(ib_send_cm_sidr_req); + +static void cm_format_sidr_req_event(struct cm_work *work, + struct ib_cm_id *listen_id) +{ + struct cm_sidr_req_msg *sidr_req_msg; + struct ib_cm_sidr_req_event_param *param; + + sidr_req_msg = (struct cm_sidr_req_msg *) + work->mad_recv_wc->recv_buf.mad; + param = &work->cm_event.param.sidr_req_rcvd; + param->pkey = __be16_to_cpu(sidr_req_msg->pkey); + param->listen_id = listen_id; + param->device = work->port->mad_agent->device; + param->port = work->port->port_num; + work->cm_event.private_data = &sidr_req_msg->private_data; +} + +static int cm_sidr_req_handler(struct cm_work *work) +{ + struct ib_cm_id *cm_id; + struct cm_id_private *cm_id_priv, *cur_cm_id_priv; + struct cm_sidr_req_msg *sidr_req_msg; + struct ib_wc *wc; + unsigned long flags; + + cm_id = ib_create_cm_id(NULL, NULL); + if (IS_ERR(cm_id)) + return PTR_ERR(cm_id); + cm_id_priv = container_of(cm_id, struct cm_id_private, id); + + /* Record SGID/SLID and request ID for lookup. */ + sidr_req_msg = (struct cm_sidr_req_msg *) + work->mad_recv_wc->recv_buf.mad; + wc = work->mad_recv_wc->wc; + cm_id_priv->av.dgid.global.subnet_prefix = cpu_to_be64(wc->slid); + cm_id_priv->av.dgid.global.interface_id = 0; + cm_init_av_for_response(work->port, work->mad_recv_wc->wc, + &cm_id_priv->av); + cm_id_priv->id.remote_id = sidr_req_msg->request_id; + cm_id_priv->id.state = IB_CM_SIDR_REQ_RCVD; + cm_id_priv->tid = sidr_req_msg->hdr.tid; + atomic_inc(&cm_id_priv->work_count); + + spin_lock_irqsave(&cm.lock, flags); + cur_cm_id_priv = cm_insert_remote_sidr(cm_id_priv); + if (cur_cm_id_priv) { + spin_unlock_irqrestore(&cm.lock, flags); + goto out; /* Duplicate message. */ + } + cur_cm_id_priv = cm_find_listen(sidr_req_msg->service_id); + if (!cur_cm_id_priv) { + rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table); + spin_unlock_irqrestore(&cm.lock, flags); + /* todo: reply with no match */ + goto out; /* No match. */ + } + atomic_inc(&cur_cm_id_priv->refcount); + spin_unlock_irqrestore(&cm.lock, flags); + + cm_id_priv->id.cm_handler = cur_cm_id_priv->id.cm_handler; + cm_id_priv->id.context = cur_cm_id_priv->id.context; + cm_id_priv->id.service_id = sidr_req_msg->service_id; + cm_id_priv->id.service_mask = __constant_cpu_to_be64(~0ULL); + + cm_format_sidr_req_event(work, &cur_cm_id_priv->id); + cm_process_work(cm_id_priv, work); + cm_deref_id(cur_cm_id_priv); + return 0; +out: + ib_destroy_cm_id(&cm_id_priv->id); + return -EINVAL; +} + +static void cm_format_sidr_rep(struct cm_sidr_rep_msg *sidr_rep_msg, + struct cm_id_private *cm_id_priv, + struct ib_cm_sidr_rep_param *param) +{ + cm_format_mad_hdr(&sidr_rep_msg->hdr, CM_SIDR_REP_ATTR_ID, + cm_id_priv->tid); + sidr_rep_msg->request_id = cm_id_priv->id.remote_id; + sidr_rep_msg->status = param->status; + cm_sidr_rep_set_qpn(sidr_rep_msg, cpu_to_be32(param->qp_num)); + sidr_rep_msg->service_id = cm_id_priv->id.service_id; + sidr_rep_msg->qkey = cpu_to_be32(param->qkey); + + if (param->info && param->info_length) + memcpy(sidr_rep_msg->info, param->info, param->info_length); + + if (param->private_data && param->private_data_len) + memcpy(sidr_rep_msg->private_data, param->private_data, + param->private_data_len); +} + +int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id, + struct ib_cm_sidr_rep_param *param) +{ + struct cm_id_private *cm_id_priv; + struct ib_mad_send_buf *msg; + struct ib_send_wr *bad_send_wr; + unsigned long flags; + int ret; + + if ((param->info && param->info_length > IB_CM_SIDR_REP_INFO_LENGTH) || + (param->private_data && + param->private_data_len > IB_CM_SIDR_REP_PRIVATE_DATA_SIZE)) + return -EINVAL; + + cm_id_priv = container_of(cm_id, struct cm_id_private, id); + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id->state != IB_CM_SIDR_REQ_RCVD) { + ret = -EINVAL; + goto error; + } + + ret = cm_alloc_msg(cm_id_priv, &msg); + if (ret) + goto error; + + cm_format_sidr_rep((struct cm_sidr_rep_msg *) msg->mad, cm_id_priv, + param); + ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, + &msg->send_wr, &bad_send_wr); + if (ret) { + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + cm_free_msg(msg); + return ret; + } + cm_id->state = IB_CM_IDLE; + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + spin_lock_irqsave(&cm.lock, flags); + rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table); + spin_unlock_irqrestore(&cm.lock, flags); + return 0; + +error: spin_unlock_irqrestore(&cm_id_priv->lock, flags); + return ret; +} +EXPORT_SYMBOL(ib_send_cm_sidr_rep); + +static void cm_format_sidr_rep_event(struct cm_work *work) +{ + struct cm_sidr_rep_msg *sidr_rep_msg; + struct ib_cm_sidr_rep_event_param *param; + + sidr_rep_msg = (struct cm_sidr_rep_msg *) + work->mad_recv_wc->recv_buf.mad; + param = &work->cm_event.param.sidr_rep_rcvd; + param->status = sidr_rep_msg->status; + param->qkey = be32_to_cpu(sidr_rep_msg->qkey); + param->qpn = be32_to_cpu(cm_sidr_rep_get_qpn(sidr_rep_msg)); + param->info = &sidr_rep_msg->info; + param->info_len = sidr_rep_msg->info_length; + work->cm_event.private_data = &sidr_rep_msg->private_data; +} + +static int cm_sidr_rep_handler(struct cm_work *work) +{ + struct cm_sidr_rep_msg *sidr_rep_msg; + struct cm_id_private *cm_id_priv; + unsigned long flags; + + sidr_rep_msg = (struct cm_sidr_rep_msg *) + work->mad_recv_wc->recv_buf.mad; + cm_id_priv = cm_acquire_id(sidr_rep_msg->request_id, 0); + if (!cm_id_priv) + return -EINVAL; /* Unmatched reply. */ + + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id_priv->id.state != IB_CM_SIDR_REQ_SENT) { + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + goto out; + } + cm_id_priv->id.state = IB_CM_IDLE; + ib_cancel_mad(cm_id_priv->av.port->mad_agent, + (unsigned long) cm_id_priv->msg); + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + cm_format_sidr_rep_event(work); + cm_process_work(cm_id_priv, work); + return 0; +out: + cm_deref_id(cm_id_priv); + return -EINVAL; +} + +static void cm_process_send_error(struct ib_mad_send_buf *msg, + enum ib_wc_status wc_status) +{ + struct cm_id_private *cm_id_priv; + struct ib_cm_event cm_event; + enum ib_cm_state state; + unsigned long flags; + int ret; + + memset(&cm_event, 0, sizeof cm_event); + cm_id_priv = msg->context[0]; + + /* Discard old sends or ones without a response. */ + spin_lock_irqsave(&cm_id_priv->lock, flags); + state = (enum ib_cm_state) (unsigned long) msg->context[1]; + if (msg != cm_id_priv->msg || state != cm_id_priv->id.state) + goto discard; + + switch (state) { + case IB_CM_REQ_SENT: + case IB_CM_MRA_REQ_RCVD: + cm_reset_to_idle(cm_id_priv); + cm_event.event = IB_CM_REQ_ERROR; + break; + case IB_CM_REP_SENT: + case IB_CM_MRA_REP_RCVD: + cm_reset_to_idle(cm_id_priv); + cm_event.event = IB_CM_REP_ERROR; + break; + case IB_CM_DREQ_SENT: + cm_enter_timewait(cm_id_priv); + cm_event.event = IB_CM_DREQ_ERROR; + break; + case IB_CM_SIDR_REQ_SENT: + cm_id_priv->id.state = IB_CM_IDLE; + cm_event.event = IB_CM_SIDR_REQ_ERROR; + break; + default: + goto discard; + } + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + cm_event.param.send_status = wc_status; + + /* No other events can occur on the cm_id at this point. */ + ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &cm_event); + cm_free_msg(msg); + if (ret) + ib_destroy_cm_id(&cm_id_priv->id); + return; +discard: + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + cm_free_msg(msg); +} + +static void cm_send_handler(struct ib_mad_agent *mad_agent, + struct ib_mad_send_wc *mad_send_wc) +{ + struct ib_mad_send_buf *msg; + + msg = (struct ib_mad_send_buf *)(unsigned long)mad_send_wc->wr_id; + + switch (mad_send_wc->status) { + case IB_WC_SUCCESS: + case IB_WC_WR_FLUSH_ERR: + cm_free_msg(msg); + break; + default: + if (msg->context[0] && msg->context[1]) + cm_process_send_error(msg, mad_send_wc->status); + else + cm_free_msg(msg); + break; + } +} + +static void cm_work_handler(void *data) +{ + struct cm_work *work = data; + int ret; + + switch (work->cm_event.event) { + case IB_CM_REQ_RECEIVED: + ret = cm_req_handler(work); + break; + case IB_CM_MRA_RECEIVED: + ret = cm_mra_handler(work); + break; + case IB_CM_REJ_RECEIVED: + ret = cm_rej_handler(work); + break; + case IB_CM_REP_RECEIVED: + ret = cm_rep_handler(work); + break; + case IB_CM_RTU_RECEIVED: + ret = cm_rtu_handler(work); + break; + case IB_CM_USER_ESTABLISHED: + ret = cm_establish_handler(work); + break; + case IB_CM_DREQ_RECEIVED: + ret = cm_dreq_handler(work); + break; + case IB_CM_DREP_RECEIVED: + ret = cm_drep_handler(work); + break; + case IB_CM_SIDR_REQ_RECEIVED: + ret = cm_sidr_req_handler(work); + break; + case IB_CM_SIDR_REP_RECEIVED: + ret = cm_sidr_rep_handler(work); + break; + case IB_CM_LAP_RECEIVED: + ret = cm_lap_handler(work); + break; + case IB_CM_APR_RECEIVED: + ret = cm_apr_handler(work); + break; + case IB_CM_TIMEWAIT_EXIT: + ret = cm_timewait_handler(work); + break; + default: + ret = -EINVAL; + break; + } + if (ret) + cm_free_work(work); +} + +int ib_cm_establish(struct ib_cm_id *cm_id) +{ + struct cm_id_private *cm_id_priv; + struct cm_work *work; + unsigned long flags; + int ret = 0; + + work = kmalloc(sizeof *work, GFP_ATOMIC); + if (!work) + return -ENOMEM; + + cm_id_priv = container_of(cm_id, struct cm_id_private, id); + spin_lock_irqsave(&cm_id_priv->lock, flags); + switch (cm_id->state) + { + case IB_CM_REP_SENT: + case IB_CM_MRA_REP_RCVD: + cm_id->state = IB_CM_ESTABLISHED; + break; + case IB_CM_ESTABLISHED: + ret = -EISCONN; + break; + default: + ret = -EINVAL; + break; + } + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + if (ret) { + kfree(work); + goto out; + } + + /* + * The CM worker thread may try to destroy the cm_id before it + * can execute this work item. To prevent potential deadlock, + * we need to find the cm_id once we're in the context of the + * worker thread, rather than holding a reference on it. + */ + INIT_WORK(&work->work, cm_work_handler, work); + work->local_id = cm_id->local_id; + work->remote_id = cm_id->remote_id; + work->mad_recv_wc = NULL; + work->cm_event.event = IB_CM_USER_ESTABLISHED; + queue_work(cm.wq, &work->work); +out: + return ret; +} +EXPORT_SYMBOL(ib_cm_establish); + +static void cm_recv_handler(struct ib_mad_agent *mad_agent, + struct ib_mad_recv_wc *mad_recv_wc) +{ + struct cm_work *work; + enum ib_cm_event_type event; + int paths = 0; + + switch (mad_recv_wc->recv_buf.mad->mad_hdr.attr_id) { + case CM_REQ_ATTR_ID: + paths = 1 + (((struct cm_req_msg *) mad_recv_wc->recv_buf.mad)-> + alt_local_lid != 0); + event = IB_CM_REQ_RECEIVED; + break; + case CM_MRA_ATTR_ID: + event = IB_CM_MRA_RECEIVED; + break; + case CM_REJ_ATTR_ID: + event = IB_CM_REJ_RECEIVED; + break; + case CM_REP_ATTR_ID: + event = IB_CM_REP_RECEIVED; + break; + case CM_RTU_ATTR_ID: + event = IB_CM_RTU_RECEIVED; + break; + case CM_DREQ_ATTR_ID: + event = IB_CM_DREQ_RECEIVED; + break; + case CM_DREP_ATTR_ID: + event = IB_CM_DREP_RECEIVED; + break; + case CM_SIDR_REQ_ATTR_ID: + event = IB_CM_SIDR_REQ_RECEIVED; + break; + case CM_SIDR_REP_ATTR_ID: + event = IB_CM_SIDR_REP_RECEIVED; + break; + case CM_LAP_ATTR_ID: + paths = 1; + event = IB_CM_LAP_RECEIVED; + break; + case CM_APR_ATTR_ID: + event = IB_CM_APR_RECEIVED; + break; + default: + ib_free_recv_mad(mad_recv_wc); + return; + } + + work = kmalloc(sizeof *work + sizeof(struct ib_sa_path_rec) * paths, + GFP_KERNEL); + if (!work) { + ib_free_recv_mad(mad_recv_wc); + return; + } + + INIT_WORK(&work->work, cm_work_handler, work); + work->cm_event.event = event; + work->mad_recv_wc = mad_recv_wc; + work->port = (struct cm_port *)mad_agent->context; + queue_work(cm.wq, &work->work); +} + +static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv, + struct ib_qp_attr *qp_attr, + int *qp_attr_mask) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&cm_id_priv->lock, flags); + switch (cm_id_priv->id.state) { + case IB_CM_REQ_SENT: + case IB_CM_MRA_REQ_RCVD: + case IB_CM_REQ_RCVD: + case IB_CM_MRA_REQ_SENT: + case IB_CM_REP_RCVD: + case IB_CM_MRA_REP_SENT: + case IB_CM_REP_SENT: + case IB_CM_MRA_REP_RCVD: + case IB_CM_ESTABLISHED: + *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS | + IB_QP_PKEY_INDEX | IB_QP_PORT; + qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE; + if (cm_id_priv->responder_resources) + qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_WRITE | + IB_ACCESS_REMOTE_READ; + qp_attr->pkey_index = cm_id_priv->av.pkey_index; + qp_attr->port_num = cm_id_priv->av.port->port_num; + ret = 0; + break; + default: + ret = -EINVAL; + break; + } + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + return ret; +} + +static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv, + struct ib_qp_attr *qp_attr, + int *qp_attr_mask) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&cm_id_priv->lock, flags); + switch (cm_id_priv->id.state) { + case IB_CM_REQ_RCVD: + case IB_CM_MRA_REQ_SENT: + case IB_CM_REP_RCVD: + case IB_CM_MRA_REP_SENT: + case IB_CM_REP_SENT: + case IB_CM_MRA_REP_RCVD: + case IB_CM_ESTABLISHED: + *qp_attr_mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU | + IB_QP_DEST_QPN | IB_QP_RQ_PSN | + IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER; + qp_attr->ah_attr = cm_id_priv->av.ah_attr; + qp_attr->path_mtu = cm_id_priv->path_mtu; + qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn); + qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn); + qp_attr->max_dest_rd_atomic = cm_id_priv->responder_resources; + qp_attr->min_rnr_timer = 0; + if (cm_id_priv->alt_av.ah_attr.dlid) { + *qp_attr_mask |= IB_QP_ALT_PATH; + qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr; + } + ret = 0; + break; + default: + ret = -EINVAL; + break; + } + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + return ret; +} + +static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv, + struct ib_qp_attr *qp_attr, + int *qp_attr_mask) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&cm_id_priv->lock, flags); + switch (cm_id_priv->id.state) { + case IB_CM_REP_RCVD: + case IB_CM_MRA_REP_SENT: + case IB_CM_REP_SENT: + case IB_CM_MRA_REP_RCVD: + case IB_CM_ESTABLISHED: + *qp_attr_mask = IB_QP_STATE | IB_QP_TIMEOUT | IB_QP_RETRY_CNT | + IB_QP_RNR_RETRY | IB_QP_SQ_PSN | + IB_QP_MAX_QP_RD_ATOMIC; + qp_attr->timeout = cm_id_priv->local_ack_timeout; + qp_attr->retry_cnt = cm_id_priv->retry_count; + qp_attr->rnr_retry = cm_id_priv->rnr_retry_count; + qp_attr->sq_psn = be32_to_cpu(cm_id_priv->sq_psn); + qp_attr->max_rd_atomic = cm_id_priv->initiator_depth; + if (cm_id_priv->alt_av.ah_attr.dlid) { + *qp_attr_mask |= IB_QP_PATH_MIG_STATE; + qp_attr->path_mig_state = IB_MIG_REARM; + } + ret = 0; + break; + default: + ret = -EINVAL; + break; + } + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + return ret; +} + +int ib_cm_init_qp_attr(struct ib_cm_id *cm_id, + struct ib_qp_attr *qp_attr, + int *qp_attr_mask) +{ + struct cm_id_private *cm_id_priv; + int ret; + + cm_id_priv = container_of(cm_id, struct cm_id_private, id); + switch (qp_attr->qp_state) { + case IB_QPS_INIT: + ret = cm_init_qp_init_attr(cm_id_priv, qp_attr, qp_attr_mask); + break; + case IB_QPS_RTR: + ret = cm_init_qp_rtr_attr(cm_id_priv, qp_attr, qp_attr_mask); + break; + case IB_QPS_RTS: + ret = cm_init_qp_rts_attr(cm_id_priv, qp_attr, qp_attr_mask); + break; + default: + ret = -EINVAL; + break; + } + return ret; +} +EXPORT_SYMBOL(ib_cm_init_qp_attr); + +static __be64 cm_get_ca_guid(struct ib_device *device) +{ + struct ib_device_attr *device_attr; + __be64 guid; + int ret; + + device_attr = kmalloc(sizeof *device_attr, GFP_KERNEL); + if (!device_attr) + return 0; + + ret = ib_query_device(device, device_attr); + guid = ret ? 0 : device_attr->node_guid; + kfree(device_attr); + return guid; +} + +static void cm_add_one(struct ib_device *device) +{ + struct cm_device *cm_dev; + struct cm_port *port; + struct ib_mad_reg_req reg_req = { + .mgmt_class = IB_MGMT_CLASS_CM, + .mgmt_class_version = IB_CM_CLASS_VERSION + }; + struct ib_port_modify port_modify = { + .set_port_cap_mask = IB_PORT_CM_SUP + }; + unsigned long flags; + int ret; + u8 i; + + cm_dev = kmalloc(sizeof(*cm_dev) + sizeof(*port) * + device->phys_port_cnt, GFP_KERNEL); + if (!cm_dev) + return; + + cm_dev->device = device; + cm_dev->ca_guid = cm_get_ca_guid(device); + if (!cm_dev->ca_guid) + goto error1; + + set_bit(IB_MGMT_METHOD_SEND, reg_req.method_mask); + for (i = 1; i <= device->phys_port_cnt; i++) { + port = &cm_dev->port[i-1]; + port->cm_dev = cm_dev; + port->port_num = i; + port->mad_agent = ib_register_mad_agent(device, i, + IB_QPT_GSI, + ®_req, + 0, + cm_send_handler, + cm_recv_handler, + port); + if (IS_ERR(port->mad_agent)) + goto error2; + + ret = ib_modify_port(device, i, 0, &port_modify); + if (ret) + goto error3; + } + ib_set_client_data(device, &cm_client, cm_dev); + + write_lock_irqsave(&cm.device_lock, flags); + list_add_tail(&cm_dev->list, &cm.device_list); + write_unlock_irqrestore(&cm.device_lock, flags); + return; + +error3: + ib_unregister_mad_agent(port->mad_agent); +error2: + port_modify.set_port_cap_mask = 0; + port_modify.clr_port_cap_mask = IB_PORT_CM_SUP; + while (--i) { + port = &cm_dev->port[i-1]; + ib_modify_port(device, port->port_num, 0, &port_modify); + ib_unregister_mad_agent(port->mad_agent); + } +error1: + kfree(cm_dev); +} + +static void cm_remove_one(struct ib_device *device) +{ + struct cm_device *cm_dev; + struct cm_port *port; + struct ib_port_modify port_modify = { + .clr_port_cap_mask = IB_PORT_CM_SUP + }; + unsigned long flags; + int i; + + cm_dev = ib_get_client_data(device, &cm_client); + if (!cm_dev) + return; + + write_lock_irqsave(&cm.device_lock, flags); + list_del(&cm_dev->list); + write_unlock_irqrestore(&cm.device_lock, flags); + + for (i = 1; i <= device->phys_port_cnt; i++) { + port = &cm_dev->port[i-1]; + ib_modify_port(device, port->port_num, 0, &port_modify); + ib_unregister_mad_agent(port->mad_agent); + } + kfree(cm_dev); +} + +static int __init ib_cm_init(void) +{ + int ret; + + memset(&cm, 0, sizeof cm); + INIT_LIST_HEAD(&cm.device_list); + rwlock_init(&cm.device_lock); + spin_lock_init(&cm.lock); + cm.listen_service_table = RB_ROOT; + cm.listen_service_id = __constant_be64_to_cpu(IB_CM_ASSIGN_SERVICE_ID); + cm.remote_id_table = RB_ROOT; + cm.remote_qp_table = RB_ROOT; + cm.remote_sidr_table = RB_ROOT; + idr_init(&cm.local_id_table); + idr_pre_get(&cm.local_id_table, GFP_KERNEL); + + cm.wq = create_workqueue("ib_cm"); + if (!cm.wq) + return -ENOMEM; + + ret = ib_register_client(&cm_client); + if (ret) + goto error; + + return 0; +error: + destroy_workqueue(cm.wq); + return ret; +} + +static void __exit ib_cm_cleanup(void) +{ + flush_workqueue(cm.wq); + destroy_workqueue(cm.wq); + ib_unregister_client(&cm_client); +} + +module_init(ib_cm_init); +module_exit(ib_cm_cleanup); + diff --git a/drivers/infiniband/core/cm_msgs.h b/drivers/infiniband/core/cm_msgs.h new file mode 100644 index 000000000000..813ab70bf6d5 --- /dev/null +++ b/drivers/infiniband/core/cm_msgs.h @@ -0,0 +1,817 @@ +/* + * Copyright (c) 2004 Intel Corporation. All rights reserved. + * Copyright (c) 2004 Topspin Corporation. All rights reserved. + * Copyright (c) 2004 Voltaire Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING the madirectory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use source and binary forms, with or + * withmodification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retathe above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHWARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS THE + * SOFTWARE. + */ +#if !defined(CM_MSGS_H) +#define CM_MSGS_H + +#include <rdma/ib_mad.h> + +/* + * Parameters to routines below should be in network-byte order, and values + * are returned in network-byte order. + */ + +#define IB_CM_CLASS_VERSION 2 /* IB specification 1.2 */ + +#define CM_REQ_ATTR_ID __constant_htons(0x0010) +#define CM_MRA_ATTR_ID __constant_htons(0x0011) +#define CM_REJ_ATTR_ID __constant_htons(0x0012) +#define CM_REP_ATTR_ID __constant_htons(0x0013) +#define CM_RTU_ATTR_ID __constant_htons(0x0014) +#define CM_DREQ_ATTR_ID __constant_htons(0x0015) +#define CM_DREP_ATTR_ID __constant_htons(0x0016) +#define CM_SIDR_REQ_ATTR_ID __constant_htons(0x0017) +#define CM_SIDR_REP_ATTR_ID __constant_htons(0x0018) +#define CM_LAP_ATTR_ID __constant_htons(0x0019) +#define CM_APR_ATTR_ID __constant_htons(0x001A) + +enum cm_msg_sequence { + CM_MSG_SEQUENCE_REQ, + CM_MSG_SEQUENCE_LAP, + CM_MSG_SEQUENCE_DREQ, + CM_MSG_SEQUENCE_SIDR +}; + +struct cm_req_msg { + struct ib_mad_hdr hdr; + + __be32 local_comm_id; + __be32 rsvd4; + __be64 service_id; + __be64 local_ca_guid; + __be32 rsvd24; + __be32 local_qkey; + /* local QPN:24, responder resources:8 */ + __be32 offset32; + /* local EECN:24, initiator depth:8 */ + __be32 offset36; + /* + * remote EECN:24, remote CM response timeout:5, + * transport service type:2, end-to-end flow control:1 + */ + __be32 offset40; + /* starting PSN:24, local CM response timeout:5, retry count:3 */ + __be32 offset44; + __be16 pkey; + /* path MTU:4, RDC exists:1, RNR retry count:3. */ + u8 offset50; + /* max CM Retries:4, SRQ:1, rsvd:3 */ + u8 offset51; + + __be16 primary_local_lid; + __be16 primary_remote_lid; + union ib_gid primary_local_gid; + union ib_gid primary_remote_gid; + /* flow label:20, rsvd:6, packet rate:6 */ + __be32 primary_offset88; + u8 primary_traffic_class; + u8 primary_hop_limit; + /* SL:4, subnet local:1, rsvd:3 */ + u8 primary_offset94; + /* local ACK timeout:5, rsvd:3 */ + u8 primary_offset95; + + __be16 alt_local_lid; + __be16 alt_remote_lid; + union ib_gid alt_local_gid; + union ib_gid alt_remote_gid; + /* flow label:20, rsvd:6, packet rate:6 */ + __be32 alt_offset132; + u8 alt_traffic_class; + u8 alt_hop_limit; + /* SL:4, subnet local:1, rsvd:3 */ + u8 alt_offset138; + /* local ACK timeout:5, rsvd:3 */ + u8 alt_offset139; + + u8 private_data[IB_CM_REQ_PRIVATE_DATA_SIZE]; + +} __attribute__ ((packed)); + +static inline __be32 cm_req_get_local_qpn(struct cm_req_msg *req_msg) +{ + return cpu_to_be32(be32_to_cpu(req_msg->offset32) >> 8); +} + +static inline void cm_req_set_local_qpn(struct cm_req_msg *req_msg, __be32 qpn) +{ + req_msg->offset32 = cpu_to_be32((be32_to_cpu(qpn) << 8) | + (be32_to_cpu(req_msg->offset32) & + 0x000000FF)); +} + +static inline u8 cm_req_get_resp_res(struct cm_req_msg *req_msg) +{ + return (u8) be32_to_cpu(req_msg->offset32); +} + +static inline void cm_req_set_resp_res(struct cm_req_msg *req_msg, u8 resp_res) +{ + req_msg->offset32 = cpu_to_be32(resp_res | + (be32_to_cpu(req_msg->offset32) & + 0xFFFFFF00)); +} + +static inline u8 cm_req_get_init_depth(struct cm_req_msg *req_msg) +{ + return (u8) be32_to_cpu(req_msg->offset36); +} + +static inline void cm_req_set_init_depth(struct cm_req_msg *req_msg, + u8 init_depth) +{ + req_msg->offset36 = cpu_to_be32(init_depth | + (be32_to_cpu(req_msg->offset36) & + 0xFFFFFF00)); +} + +static inline u8 cm_req_get_remote_resp_timeout(struct cm_req_msg *req_msg) +{ + return (u8) ((be32_to_cpu(req_msg->offset40) & 0xF8) >> 3); +} + +static inline void cm_req_set_remote_resp_timeout(struct cm_req_msg *req_msg, + u8 resp_timeout) +{ + req_msg->offset40 = cpu_to_be32((resp_timeout << 3) | + (be32_to_cpu(req_msg->offset40) & + 0xFFFFFF07)); +} + +static inline enum ib_qp_type cm_req_get_qp_type(struct cm_req_msg *req_msg) +{ + u8 transport_type = (u8) (be32_to_cpu(req_msg->offset40) & 0x06) >> 1; + switch(transport_type) { + case 0: return IB_QPT_RC; + case 1: return IB_QPT_UC; + default: return 0; + } +} + +static inline void cm_req_set_qp_type(struct cm_req_msg *req_msg, + enum ib_qp_type qp_type) +{ + switch(qp_type) { + case IB_QPT_UC: + req_msg->offset40 = cpu_to_be32((be32_to_cpu( + req_msg->offset40) & + 0xFFFFFFF9) | 0x2); + default: + req_msg->offset40 = cpu_to_be32(be32_to_cpu( + req_msg->offset40) & + 0xFFFFFFF9); + } +} + +static inline u8 cm_req_get_flow_ctrl(struct cm_req_msg *req_msg) +{ + return be32_to_cpu(req_msg->offset40) & 0x1; +} + +static inline void cm_req_set_flow_ctrl(struct cm_req_msg *req_msg, + u8 flow_ctrl) +{ + req_msg->offset40 = cpu_to_be32((flow_ctrl & 0x1) | + (be32_to_cpu(req_msg->offset40) & + 0xFFFFFFFE)); +} + +static inline __be32 cm_req_get_starting_psn(struct cm_req_msg *req_msg) +{ + return cpu_to_be32(be32_to_cpu(req_msg->offset44) >> 8); +} + +static inline void cm_req_set_starting_psn(struct cm_req_msg *req_msg, + __be32 starting_psn) +{ + req_msg->offset44 = cpu_to_be32((be32_to_cpu(starting_psn) << 8) | + (be32_to_cpu(req_msg->offset44) & 0x000000FF)); +} + +static inline u8 cm_req_get_local_resp_timeout(struct cm_req_msg *req_msg) +{ + return (u8) ((be32_to_cpu(req_msg->offset44) & 0xF8) >> 3); +} + +static inline void cm_req_set_local_resp_timeout(struct cm_req_msg *req_msg, + u8 resp_timeout) +{ + req_msg->offset44 = cpu_to_be32((resp_timeout << 3) | + (be32_to_cpu(req_msg->offset44) & 0xFFFFFF07)); +} + +static inline u8 cm_req_get_retry_count(struct cm_req_msg *req_msg) +{ + return (u8) (be32_to_cpu(req_msg->offset44) & 0x7); +} + +static inline void cm_req_set_retry_count(struct cm_req_msg *req_msg, + u8 retry_count) +{ + req_msg->offset44 = cpu_to_be32((retry_count & 0x7) | + (be32_to_cpu(req_msg->offset44) & 0xFFFFFFF8)); +} + +static inline u8 cm_req_get_path_mtu(struct cm_req_msg *req_msg) +{ + return req_msg->offset50 >> 4; +} + +static inline void cm_req_set_path_mtu(struct cm_req_msg *req_msg, u8 path_mtu) +{ + req_msg->offset50 = (u8) ((req_msg->offset50 & 0xF) | (path_mtu << 4)); +} + +static inline u8 cm_req_get_rnr_retry_count(struct cm_req_msg *req_msg) +{ + return req_msg->offset50 & 0x7; +} + +static inline void cm_req_set_rnr_retry_count(struct cm_req_msg *req_msg, + u8 rnr_retry_count) +{ + req_msg->offset50 = (u8) ((req_msg->offset50 & 0xF8) | + (rnr_retry_count & 0x7)); +} + +static inline u8 cm_req_get_max_cm_retries(struct cm_req_msg *req_msg) +{ + return req_msg->offset51 >> 4; +} + +static inline void cm_req_set_max_cm_retries(struct cm_req_msg *req_msg, + u8 retries) +{ + req_msg->offset51 = (u8) ((req_msg->offset51 & 0xF) | (retries << 4)); +} + +static inline u8 cm_req_get_srq(struct cm_req_msg *req_msg) +{ + return (req_msg->offset51 & 0x8) >> 3; +} + +static inline void cm_req_set_srq(struct cm_req_msg *req_msg, u8 srq) +{ + req_msg->offset51 = (u8) ((req_msg->offset51 & 0xF7) | + ((srq & 0x1) << 3)); +} + +static inline __be32 cm_req_get_primary_flow_label(struct cm_req_msg *req_msg) +{ + return cpu_to_be32(be32_to_cpu(req_msg->primary_offset88) >> 12); +} + +static inline void cm_req_set_primary_flow_label(struct cm_req_msg *req_msg, + __be32 flow_label) +{ + req_msg->primary_offset88 = cpu_to_be32( + (be32_to_cpu(req_msg->primary_offset88) & + 0x00000FFF) | + (be32_to_cpu(flow_label) << 12)); +} + +static inline u8 cm_req_get_primary_packet_rate(struct cm_req_msg *req_msg) +{ + return (u8) (be32_to_cpu(req_msg->primary_offset88) & 0x3F); +} + +static inline void cm_req_set_primary_packet_rate(struct cm_req_msg *req_msg, + u8 rate) +{ + req_msg->primary_offset88 = cpu_to_be32( + (be32_to_cpu(req_msg->primary_offset88) & + 0xFFFFFFC0) | (rate & 0x3F)); +} + +static inline u8 cm_req_get_primary_sl(struct cm_req_msg *req_msg) +{ + return (u8) (req_msg->primary_offset94 >> 4); +} + +static inline void cm_req_set_primary_sl(struct cm_req_msg *req_msg, u8 sl) +{ + req_msg->primary_offset94 = (u8) ((req_msg->primary_offset94 & 0x0F) | + (sl << 4)); +} + +static inline u8 cm_req_get_primary_subnet_local(struct cm_req_msg *req_msg) +{ + return (u8) ((req_msg->primary_offset94 & 0x08) >> 3); +} + +static inline void cm_req_set_primary_subnet_local(struct cm_req_msg *req_msg, + u8 subnet_local) +{ + req_msg->primary_offset94 = (u8) ((req_msg->primary_offset94 & 0xF7) | + ((subnet_local & 0x1) << 3)); +} + +static inline u8 cm_req_get_primary_local_ack_timeout(struct cm_req_msg *req_msg) +{ + return (u8) (req_msg->primary_offset95 >> 3); +} + +static inline void cm_req_set_primary_local_ack_timeout(struct cm_req_msg *req_msg, + u8 local_ack_timeout) +{ + req_msg->primary_offset95 = (u8) ((req_msg->primary_offset95 & 0x07) | + (local_ack_timeout << 3)); +} + +static inline __be32 cm_req_get_alt_flow_label(struct cm_req_msg *req_msg) +{ + return cpu_to_be32(be32_to_cpu(req_msg->alt_offset132) >> 12); +} + +static inline void cm_req_set_alt_flow_label(struct cm_req_msg *req_msg, + __be32 flow_label) +{ + req_msg->alt_offset132 = cpu_to_be32( + (be32_to_cpu(req_msg->alt_offset132) & + 0x00000FFF) | + (be32_to_cpu(flow_label) << 12)); +} + +static inline u8 cm_req_get_alt_packet_rate(struct cm_req_msg *req_msg) +{ + return (u8) (be32_to_cpu(req_msg->alt_offset132) & 0x3F); +} + +static inline void cm_req_set_alt_packet_rate(struct cm_req_msg *req_msg, + u8 rate) +{ + req_msg->alt_offset132 = cpu_to_be32( + (be32_to_cpu(req_msg->alt_offset132) & + 0xFFFFFFC0) | (rate & 0x3F)); +} + +static inline u8 cm_req_get_alt_sl(struct cm_req_msg *req_msg) +{ + return (u8) (req_msg->alt_offset138 >> 4); +} + +static inline void cm_req_set_alt_sl(struct cm_req_msg *req_msg, u8 sl) +{ + req_msg->alt_offset138 = (u8) ((req_msg->alt_offset138 & 0x0F) | + (sl << 4)); +} + +static inline u8 cm_req_get_alt_subnet_local(struct cm_req_msg *req_msg) +{ + return (u8) ((req_msg->alt_offset138 & 0x08) >> 3); +} + +static inline void cm_req_set_alt_subnet_local(struct cm_req_msg *req_msg, + u8 subnet_local) +{ + req_msg->alt_offset138 = (u8) ((req_msg->alt_offset138 & 0xF7) | + ((subnet_local & 0x1) << 3)); +} + +static inline u8 cm_req_get_alt_local_ack_timeout(struct cm_req_msg *req_msg) +{ + return (u8) (req_msg->alt_offset139 >> 3); +} + +static inline void cm_req_set_alt_local_ack_timeout(struct cm_req_msg *req_msg, + u8 local_ack_timeout) +{ + req_msg->alt_offset139 = (u8) ((req_msg->alt_offset139 & 0x07) | + (local_ack_timeout << 3)); +} + +/* Message REJected or MRAed */ +enum cm_msg_response { + CM_MSG_RESPONSE_REQ = 0x0, + CM_MSG_RESPONSE_REP = 0x1, + CM_MSG_RESPONSE_OTHER = 0x2 +}; + + struct cm_mra_msg { + struct ib_mad_hdr hdr; + + __be32 local_comm_id; + __be32 remote_comm_id; + /* message MRAed:2, rsvd:6 */ + u8 offset8; + /* service timeout:5, rsvd:3 */ + u8 offset9; + + u8 private_data[IB_CM_MRA_PRIVATE_DATA_SIZE]; + +} __attribute__ ((packed)); + +static inline u8 cm_mra_get_msg_mraed(struct cm_mra_msg *mra_msg) +{ + return (u8) (mra_msg->offset8 >> 6); +} + +static inline void cm_mra_set_msg_mraed(struct cm_mra_msg *mra_msg, u8 msg) +{ + mra_msg->offset8 = (u8) ((mra_msg->offset8 & 0x3F) | (msg << 6)); +} + +static inline u8 cm_mra_get_service_timeout(struct cm_mra_msg *mra_msg) +{ + return (u8) (mra_msg->offset9 >> 3); +} + +static inline void cm_mra_set_service_timeout(struct cm_mra_msg *mra_msg, + u8 service_timeout) +{ + mra_msg->offset9 = (u8) ((mra_msg->offset9 & 0x07) | + (service_timeout << 3)); +} + +struct cm_rej_msg { + struct ib_mad_hdr hdr; + + __be32 local_comm_id; + __be32 remote_comm_id; + /* message REJected:2, rsvd:6 */ + u8 offset8; + /* reject info length:7, rsvd:1. */ + u8 offset9; + __be16 reason; + u8 ari[IB_CM_REJ_ARI_LENGTH]; + + u8 private_data[IB_CM_REJ_PRIVATE_DATA_SIZE]; + +} __attribute__ ((packed)); + +static inline u8 cm_rej_get_msg_rejected(struct cm_rej_msg *rej_msg) +{ + return (u8) (rej_msg->offset8 >> 6); +} + +static inline void cm_rej_set_msg_rejected(struct cm_rej_msg *rej_msg, u8 msg) +{ + rej_msg->offset8 = (u8) ((rej_msg->offset8 & 0x3F) | (msg << 6)); +} + +static inline u8 cm_rej_get_reject_info_len(struct cm_rej_msg *rej_msg) +{ + return (u8) (rej_msg->offset9 >> 1); +} + +static inline void cm_rej_set_reject_info_len(struct cm_rej_msg *rej_msg, + u8 len) +{ + rej_msg->offset9 = (u8) ((rej_msg->offset9 & 0x1) | (len << 1)); +} + +struct cm_rep_msg { + struct ib_mad_hdr hdr; + + __be32 local_comm_id; + __be32 remote_comm_id; + __be32 local_qkey; + /* local QPN:24, rsvd:8 */ + __be32 offset12; + /* local EECN:24, rsvd:8 */ + __be32 offset16; + /* starting PSN:24 rsvd:8 */ + __be32 offset20; + u8 resp_resources; + u8 initiator_depth; + /* target ACK delay:5, failover accepted:2, end-to-end flow control:1 */ + u8 offset26; + /* RNR retry count:3, SRQ:1, rsvd:5 */ + u8 offset27; + __be64 local_ca_guid; + + u8 private_data[IB_CM_REP_PRIVATE_DATA_SIZE]; + +} __attribute__ ((packed)); + +static inline __be32 cm_rep_get_local_qpn(struct cm_rep_msg *rep_msg) +{ + return cpu_to_be32(be32_to_cpu(rep_msg->offset12) >> 8); +} + +static inline void cm_rep_set_local_qpn(struct cm_rep_msg *rep_msg, __be32 qpn) +{ + rep_msg->offset12 = cpu_to_be32((be32_to_cpu(qpn) << 8) | + (be32_to_cpu(rep_msg->offset12) & 0x000000FF)); +} + +static inline __be32 cm_rep_get_starting_psn(struct cm_rep_msg *rep_msg) +{ + return cpu_to_be32(be32_to_cpu(rep_msg->offset20) >> 8); +} + +static inline void cm_rep_set_starting_psn(struct cm_rep_msg *rep_msg, + __be32 starting_psn) +{ + rep_msg->offset20 = cpu_to_be32((be32_to_cpu(starting_psn) << 8) | + (be32_to_cpu(rep_msg->offset20) & 0x000000FF)); +} + +static inline u8 cm_rep_get_target_ack_delay(struct cm_rep_msg *rep_msg) +{ + return (u8) (rep_msg->offset26 >> 3); +} + +static inline void cm_rep_set_target_ack_delay(struct cm_rep_msg *rep_msg, + u8 target_ack_delay) +{ + rep_msg->offset26 = (u8) ((rep_msg->offset26 & 0x07) | + (target_ack_delay << 3)); +} + +static inline u8 cm_rep_get_failover(struct cm_rep_msg *rep_msg) +{ + return (u8) ((rep_msg->offset26 & 0x06) >> 1); +} + +static inline void cm_rep_set_failover(struct cm_rep_msg *rep_msg, u8 failover) +{ + rep_msg->offset26 = (u8) ((rep_msg->offset26 & 0xF9) | + ((failover & 0x3) << 1)); +} + +static inline u8 cm_rep_get_flow_ctrl(struct cm_rep_msg *rep_msg) +{ + return (u8) (rep_msg->offset26 & 0x01); +} + +static inline void cm_rep_set_flow_ctrl(struct cm_rep_msg *rep_msg, + u8 flow_ctrl) +{ + rep_msg->offset26 = (u8) ((rep_msg->offset26 & 0xFE) | + (flow_ctrl & 0x1)); +} + +static inline u8 cm_rep_get_rnr_retry_count(struct cm_rep_msg *rep_msg) +{ + return (u8) (rep_msg->offset27 >> 5); +} + +static inline void cm_rep_set_rnr_retry_count(struct cm_rep_msg *rep_msg, + u8 rnr_retry_count) +{ + rep_msg->offset27 = (u8) ((rep_msg->offset27 & 0x1F) | + (rnr_retry_count << 5)); +} + +static inline u8 cm_rep_get_srq(struct cm_rep_msg *rep_msg) +{ + return (u8) ((rep_msg->offset27 >> 4) & 0x1); +} + +static inline void cm_rep_set_srq(struct cm_rep_msg *rep_msg, u8 srq) +{ + rep_msg->offset27 = (u8) ((rep_msg->offset27 & 0xEF) | + ((srq & 0x1) << 4)); +} + +struct cm_rtu_msg { + struct ib_mad_hdr hdr; + + __be32 local_comm_id; + __be32 remote_comm_id; + + u8 private_data[IB_CM_RTU_PRIVATE_DATA_SIZE]; + +} __attribute__ ((packed)); + +struct cm_dreq_msg { + struct ib_mad_hdr hdr; + + __be32 local_comm_id; + __be32 remote_comm_id; + /* remote QPN/EECN:24, rsvd:8 */ + __be32 offset8; + + u8 private_data[IB_CM_DREQ_PRIVATE_DATA_SIZE]; + +} __attribute__ ((packed)); + +static inline __be32 cm_dreq_get_remote_qpn(struct cm_dreq_msg *dreq_msg) +{ + return cpu_to_be32(be32_to_cpu(dreq_msg->offset8) >> 8); +} + +static inline void cm_dreq_set_remote_qpn(struct cm_dreq_msg *dreq_msg, __be32 qpn) +{ + dreq_msg->offset8 = cpu_to_be32((be32_to_cpu(qpn) << 8) | + (be32_to_cpu(dreq_msg->offset8) & 0x000000FF)); +} + +struct cm_drep_msg { + struct ib_mad_hdr hdr; + + __be32 local_comm_id; + __be32 remote_comm_id; + + u8 private_data[IB_CM_DREP_PRIVATE_DATA_SIZE]; + +} __attribute__ ((packed)); + +struct cm_lap_msg { + struct ib_mad_hdr hdr; + + __be32 local_comm_id; + __be32 remote_comm_id; + + __be32 rsvd8; + /* remote QPN/EECN:24, remote CM response timeout:5, rsvd:3 */ + __be32 offset12; + __be32 rsvd16; + + __be16 alt_local_lid; + __be16 alt_remote_lid; + union ib_gid alt_local_gid; + union ib_gid alt_remote_gid; + /* flow label:20, rsvd:4, traffic class:8 */ + __be32 offset56; + u8 alt_hop_limit; + /* rsvd:2, packet rate:6 */ + u8 offset61; + /* SL:4, subnet local:1, rsvd:3 */ + u8 offset62; + /* local ACK timeout:5, rsvd:3 */ + u8 offset63; + + u8 private_data[IB_CM_LAP_PRIVATE_DATA_SIZE]; +} __attribute__ ((packed)); + +static inline __be32 cm_lap_get_remote_qpn(struct cm_lap_msg *lap_msg) +{ + return cpu_to_be32(be32_to_cpu(lap_msg->offset12) >> 8); +} + +static inline void cm_lap_set_remote_qpn(struct cm_lap_msg *lap_msg, __be32 qpn) +{ + lap_msg->offset12 = cpu_to_be32((be32_to_cpu(qpn) << 8) | + (be32_to_cpu(lap_msg->offset12) & + 0x000000FF)); +} + +static inline u8 cm_lap_get_remote_resp_timeout(struct cm_lap_msg *lap_msg) +{ + return (u8) ((be32_to_cpu(lap_msg->offset12) & 0xF8) >> 3); +} + +static inline void cm_lap_set_remote_resp_timeout(struct cm_lap_msg *lap_msg, + u8 resp_timeout) +{ + lap_msg->offset12 = cpu_to_be32((resp_timeout << 3) | + (be32_to_cpu(lap_msg->offset12) & + 0xFFFFFF07)); +} + +static inline __be32 cm_lap_get_flow_label(struct cm_lap_msg *lap_msg) +{ + return cpu_to_be32(be32_to_cpu(lap_msg->offset56) >> 12); +} + +static inline void cm_lap_set_flow_label(struct cm_lap_msg *lap_msg, + __be32 flow_label) +{ + lap_msg->offset56 = cpu_to_be32( + (be32_to_cpu(lap_msg->offset56) & 0x00000FFF) | + (be32_to_cpu(flow_label) << 12)); +} + +static inline u8 cm_lap_get_traffic_class(struct cm_lap_msg *lap_msg) +{ + return (u8) be32_to_cpu(lap_msg->offset56); +} + +static inline void cm_lap_set_traffic_class(struct cm_lap_msg *lap_msg, + u8 traffic_class) +{ + lap_msg->offset56 = cpu_to_be32(traffic_class | + (be32_to_cpu(lap_msg->offset56) & + 0xFFFFFF00)); +} + +static inline u8 cm_lap_get_packet_rate(struct cm_lap_msg *lap_msg) +{ + return lap_msg->offset61 & 0x3F; +} + +static inline void cm_lap_set_packet_rate(struct cm_lap_msg *lap_msg, + u8 packet_rate) +{ + lap_msg->offset61 = (packet_rate & 0x3F) | (lap_msg->offset61 & 0xC0); +} + +static inline u8 cm_lap_get_sl(struct cm_lap_msg *lap_msg) +{ + return lap_msg->offset62 >> 4; +} + +static inline void cm_lap_set_sl(struct cm_lap_msg *lap_msg, u8 sl) +{ + lap_msg->offset62 = (sl << 4) | (lap_msg->offset62 & 0x0F); +} + +static inline u8 cm_lap_get_subnet_local(struct cm_lap_msg *lap_msg) +{ + return (lap_msg->offset62 >> 3) & 0x1; +} + +static inline void cm_lap_set_subnet_local(struct cm_lap_msg *lap_msg, + u8 subnet_local) +{ + lap_msg->offset62 = ((subnet_local & 0x1) << 3) | + (lap_msg->offset61 & 0xF7); +} +static inline u8 cm_lap_get_local_ack_timeout(struct cm_lap_msg *lap_msg) +{ + return lap_msg->offset63 >> 3; +} + +static inline void cm_lap_set_local_ack_timeout(struct cm_lap_msg *lap_msg, + u8 local_ack_timeout) +{ + lap_msg->offset63 = (local_ack_timeout << 3) | + (lap_msg->offset63 & 0x07); +} + +struct cm_apr_msg { + struct ib_mad_hdr hdr; + + __be32 local_comm_id; + __be32 remote_comm_id; + + u8 info_length; + u8 ap_status; + u8 info[IB_CM_APR_INFO_LENGTH]; + + u8 private_data[IB_CM_APR_PRIVATE_DATA_SIZE]; +} __attribute__ ((packed)); + +struct cm_sidr_req_msg { + struct ib_mad_hdr hdr; + + __be32 request_id; + __be16 pkey; + __be16 rsvd; + __be64 service_id; + + u8 private_data[IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE]; +} __attribute__ ((packed)); + +struct cm_sidr_rep_msg { + struct ib_mad_hdr hdr; + + __be32 request_id; + u8 status; + u8 info_length; + __be16 rsvd; + /* QPN:24, rsvd:8 */ + __be32 offset8; + __be64 service_id; + __be32 qkey; + u8 info[IB_CM_SIDR_REP_INFO_LENGTH]; + + u8 private_data[IB_CM_SIDR_REP_PRIVATE_DATA_SIZE]; +} __attribute__ ((packed)); + +static inline __be32 cm_sidr_rep_get_qpn(struct cm_sidr_rep_msg *sidr_rep_msg) +{ + return cpu_to_be32(be32_to_cpu(sidr_rep_msg->offset8) >> 8); +} + +static inline void cm_sidr_rep_set_qpn(struct cm_sidr_rep_msg *sidr_rep_msg, + __be32 qpn) +{ + sidr_rep_msg->offset8 = cpu_to_be32((be32_to_cpu(qpn) << 8) | + (be32_to_cpu(sidr_rep_msg->offset8) & + 0x000000FF)); +} + +#endif /* CM_MSGS_H */ diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index 797049626ff6..7ad47a4b166b 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -38,7 +38,7 @@ #include <linux/list.h> #include <linux/spinlock.h> -#include <ib_verbs.h> +#include <rdma/ib_verbs.h> int ib_device_register_sysfs(struct ib_device *device); void ib_device_unregister_sysfs(struct ib_device *device); diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 9197e92d708a..d3cf84e01587 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c index 328feae2a5be..d34a6f1c4f4c 100644 --- a/drivers/infiniband/core/fmr_pool.c +++ b/drivers/infiniband/core/fmr_pool.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -29,7 +30,7 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * - * $Id: fmr_pool.c 1349 2004-12-16 21:09:43Z roland $ + * $Id: fmr_pool.c 2730 2005-06-28 16:43:03Z sean.hefty $ */ #include <linux/errno.h> @@ -38,7 +39,7 @@ #include <linux/jhash.h> #include <linux/kthread.h> -#include <ib_fmr_pool.h> +#include <rdma/ib_fmr_pool.h> #include "core_priv.h" @@ -329,10 +330,11 @@ EXPORT_SYMBOL(ib_create_fmr_pool); * * Destroy an FMR pool and free all associated resources. */ -int ib_destroy_fmr_pool(struct ib_fmr_pool *pool) +void ib_destroy_fmr_pool(struct ib_fmr_pool *pool) { struct ib_pool_fmr *fmr; struct ib_pool_fmr *tmp; + LIST_HEAD(fmr_list); int i; kthread_stop(pool->thread); @@ -340,6 +342,11 @@ int ib_destroy_fmr_pool(struct ib_fmr_pool *pool) i = 0; list_for_each_entry_safe(fmr, tmp, &pool->free_list, list) { + if (fmr->remap_count) { + INIT_LIST_HEAD(&fmr_list); + list_add_tail(&fmr->fmr->list, &fmr_list); + ib_unmap_fmr(&fmr_list); + } ib_dealloc_fmr(fmr->fmr); list_del(&fmr->list); kfree(fmr); @@ -352,8 +359,6 @@ int ib_destroy_fmr_pool(struct ib_fmr_pool *pool) kfree(pool->cache_bucket); kfree(pool); - - return 0; } EXPORT_SYMBOL(ib_destroy_fmr_pool); diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 23628c622a50..a4a4d9c1eef3 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -1,5 +1,7 @@ /* * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2005 Intel Corporation. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -29,12 +31,12 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * - * $Id: mad.c 1389 2004-12-27 22:56:47Z roland $ + * $Id: mad.c 2817 2005-07-07 11:29:26Z halr $ */ - #include <linux/dma-mapping.h> #include "mad_priv.h" +#include "mad_rmpp.h" #include "smi.h" #include "agent.h" @@ -45,6 +47,7 @@ MODULE_AUTHOR("Sean Hefty"); kmem_cache_t *ib_mad_cache; + static struct list_head ib_mad_port_list; static u32 ib_mad_client_id = 0; @@ -58,16 +61,12 @@ static int method_in_use(struct ib_mad_mgmt_method_table **method, static void remove_mad_reg_req(struct ib_mad_agent_private *priv); static struct ib_mad_agent_private *find_mad_agent( struct ib_mad_port_private *port_priv, - struct ib_mad *mad, int solicited); + struct ib_mad *mad); static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info, struct ib_mad_private *mad); static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv); -static void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr, - struct ib_mad_send_wc *mad_send_wc); static void timeout_sends(void *data); -static void cancel_sends(void *data); static void local_completions(void *data); -static int solicited_mad(struct ib_mad *mad); static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req, struct ib_mad_agent_private *agent_priv, u8 mgmt_class); @@ -197,8 +196,8 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, if (qpn == -1) goto error1; - if (rmpp_version) - goto error1; /* XXX: until RMPP implemented */ + if (rmpp_version && rmpp_version != IB_MGMT_RMPP_VERSION) + goto error1; /* Validate MAD registration request if supplied */ if (mad_reg_req) { @@ -261,22 +260,29 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, ret = ERR_PTR(-ENOMEM); goto error1; } + memset(mad_agent_priv, 0, sizeof *mad_agent_priv); + + mad_agent_priv->agent.mr = ib_get_dma_mr(port_priv->qp_info[qpn].qp->pd, + IB_ACCESS_LOCAL_WRITE); + if (IS_ERR(mad_agent_priv->agent.mr)) { + ret = ERR_PTR(-ENOMEM); + goto error2; + } if (mad_reg_req) { reg_req = kmalloc(sizeof *reg_req, GFP_KERNEL); if (!reg_req) { ret = ERR_PTR(-ENOMEM); - goto error2; + goto error3; } /* Make a copy of the MAD registration request */ memcpy(reg_req, mad_reg_req, sizeof *reg_req); } /* Now, fill in the various structures */ - memset(mad_agent_priv, 0, sizeof *mad_agent_priv); mad_agent_priv->qp_info = &port_priv->qp_info[qpn]; mad_agent_priv->reg_req = reg_req; - mad_agent_priv->rmpp_version = rmpp_version; + mad_agent_priv->agent.rmpp_version = rmpp_version; mad_agent_priv->agent.device = device; mad_agent_priv->agent.recv_handler = recv_handler; mad_agent_priv->agent.send_handler = send_handler; @@ -301,7 +307,7 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, if (method) { if (method_in_use(&method, mad_reg_req)) - goto error3; + goto error4; } } ret2 = add_nonoui_reg_req(mad_reg_req, mad_agent_priv, @@ -317,14 +323,14 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, if (is_vendor_method_in_use( vendor_class, mad_reg_req)) - goto error3; + goto error4; } } ret2 = add_oui_reg_req(mad_reg_req, mad_agent_priv); } if (ret2) { ret = ERR_PTR(ret2); - goto error3; + goto error4; } } @@ -335,22 +341,24 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, spin_lock_init(&mad_agent_priv->lock); INIT_LIST_HEAD(&mad_agent_priv->send_list); INIT_LIST_HEAD(&mad_agent_priv->wait_list); + INIT_LIST_HEAD(&mad_agent_priv->done_list); + INIT_LIST_HEAD(&mad_agent_priv->rmpp_list); INIT_WORK(&mad_agent_priv->timed_work, timeout_sends, mad_agent_priv); INIT_LIST_HEAD(&mad_agent_priv->local_list); INIT_WORK(&mad_agent_priv->local_work, local_completions, mad_agent_priv); - INIT_LIST_HEAD(&mad_agent_priv->canceled_list); - INIT_WORK(&mad_agent_priv->canceled_work, cancel_sends, mad_agent_priv); atomic_set(&mad_agent_priv->refcount, 1); init_waitqueue_head(&mad_agent_priv->wait); return &mad_agent_priv->agent; -error3: +error4: spin_unlock_irqrestore(&port_priv->reg_lock, flags); kfree(reg_req); -error2: +error3: kfree(mad_agent_priv); +error2: + ib_dereg_mr(mad_agent_priv->agent.mr); error1: return ret; } @@ -487,18 +495,16 @@ static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv) * MADs, preventing us from queuing additional work */ cancel_mads(mad_agent_priv); - port_priv = mad_agent_priv->qp_info->port_priv; - cancel_delayed_work(&mad_agent_priv->timed_work); - flush_workqueue(port_priv->wq); spin_lock_irqsave(&port_priv->reg_lock, flags); remove_mad_reg_req(mad_agent_priv); list_del(&mad_agent_priv->agent_list); spin_unlock_irqrestore(&port_priv->reg_lock, flags); - /* XXX: Cleanup pending RMPP receives for this agent */ + flush_workqueue(port_priv->wq); + ib_cancel_rmpp_recvs(mad_agent_priv); atomic_dec(&mad_agent_priv->refcount); wait_event(mad_agent_priv->wait, @@ -506,6 +512,7 @@ static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv) if (mad_agent_priv->reg_req) kfree(mad_agent_priv->reg_req); + ib_dereg_mr(mad_agent_priv->agent.mr); kfree(mad_agent_priv); } @@ -551,6 +558,13 @@ int ib_unregister_mad_agent(struct ib_mad_agent *mad_agent) } EXPORT_SYMBOL(ib_unregister_mad_agent); +static inline int response_mad(struct ib_mad *mad) +{ + /* Trap represses are responses although response bit is reset */ + return ((mad->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS) || + (mad->mad_hdr.method & IB_MGMT_METHOD_RESP)); +} + static void dequeue_mad(struct ib_mad_list_head *mad_list) { struct ib_mad_queue *mad_queue; @@ -643,7 +657,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, struct ib_smp *smp, struct ib_send_wr *send_wr) { - int ret, solicited; + int ret; unsigned long flags; struct ib_mad_local_private *local; struct ib_mad_private *mad_priv; @@ -679,7 +693,8 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, goto out; } - build_smp_wc(send_wr->wr_id, smp->dr_slid, send_wr->wr.ud.pkey_index, + build_smp_wc(send_wr->wr_id, be16_to_cpu(smp->dr_slid), + send_wr->wr.ud.pkey_index, send_wr->wr.ud.port_num, &mad_wc); /* No GRH for DR SMP */ @@ -689,11 +704,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, switch (ret) { case IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY: - /* - * See if response is solicited and - * there is a recv handler - */ - if (solicited_mad(&mad_priv->mad.mad) && + if (response_mad(&mad_priv->mad.mad) && mad_agent_priv->agent.recv_handler) { local->mad_priv = mad_priv; local->recv_mad_agent = mad_agent_priv; @@ -710,15 +721,13 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, break; case IB_MAD_RESULT_SUCCESS: /* Treat like an incoming receive MAD */ - solicited = solicited_mad(&mad_priv->mad.mad); port_priv = ib_get_mad_port(mad_agent_priv->agent.device, mad_agent_priv->agent.port_num); if (port_priv) { mad_priv->mad.mad.mad_hdr.tid = ((struct ib_mad *)smp)->mad_hdr.tid; recv_mad_agent = find_mad_agent(port_priv, - &mad_priv->mad.mad, - solicited); + &mad_priv->mad.mad); } if (!port_priv || !recv_mad_agent) { kmem_cache_free(ib_mad_cache, mad_priv); @@ -750,43 +759,133 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, list_add_tail(&local->completion_list, &mad_agent_priv->local_list); spin_unlock_irqrestore(&mad_agent_priv->lock, flags); queue_work(mad_agent_priv->qp_info->port_priv->wq, - &mad_agent_priv->local_work); + &mad_agent_priv->local_work); ret = 1; out: return ret; } -static int ib_send_mad(struct ib_mad_agent_private *mad_agent_priv, - struct ib_mad_send_wr_private *mad_send_wr) +static int get_buf_length(int hdr_len, int data_len) +{ + int seg_size, pad; + + seg_size = sizeof(struct ib_mad) - hdr_len; + if (data_len && seg_size) { + pad = seg_size - data_len % seg_size; + if (pad == seg_size) + pad = 0; + } else + pad = seg_size; + return hdr_len + data_len + pad; +} + +struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, + u32 remote_qpn, u16 pkey_index, + struct ib_ah *ah, int rmpp_active, + int hdr_len, int data_len, + unsigned int __nocast gfp_mask) +{ + struct ib_mad_agent_private *mad_agent_priv; + struct ib_mad_send_buf *send_buf; + int buf_size; + void *buf; + + mad_agent_priv = container_of(mad_agent, + struct ib_mad_agent_private, agent); + buf_size = get_buf_length(hdr_len, data_len); + + if ((!mad_agent->rmpp_version && + (rmpp_active || buf_size > sizeof(struct ib_mad))) || + (!rmpp_active && buf_size > sizeof(struct ib_mad))) + return ERR_PTR(-EINVAL); + + buf = kmalloc(sizeof *send_buf + buf_size, gfp_mask); + if (!buf) + return ERR_PTR(-ENOMEM); + memset(buf, 0, sizeof *send_buf + buf_size); + + send_buf = buf + buf_size; + send_buf->mad = buf; + + send_buf->sge.addr = dma_map_single(mad_agent->device->dma_device, + buf, buf_size, DMA_TO_DEVICE); + pci_unmap_addr_set(send_buf, mapping, send_buf->sge.addr); + send_buf->sge.length = buf_size; + send_buf->sge.lkey = mad_agent->mr->lkey; + + send_buf->send_wr.wr_id = (unsigned long) send_buf; + send_buf->send_wr.sg_list = &send_buf->sge; + send_buf->send_wr.num_sge = 1; + send_buf->send_wr.opcode = IB_WR_SEND; + send_buf->send_wr.send_flags = IB_SEND_SIGNALED; + send_buf->send_wr.wr.ud.ah = ah; + send_buf->send_wr.wr.ud.mad_hdr = &send_buf->mad->mad_hdr; + send_buf->send_wr.wr.ud.remote_qpn = remote_qpn; + send_buf->send_wr.wr.ud.remote_qkey = IB_QP_SET_QKEY; + send_buf->send_wr.wr.ud.pkey_index = pkey_index; + + if (rmpp_active) { + struct ib_rmpp_mad *rmpp_mad; + rmpp_mad = (struct ib_rmpp_mad *)send_buf->mad; + rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(hdr_len - + offsetof(struct ib_rmpp_mad, data) + data_len); + rmpp_mad->rmpp_hdr.rmpp_version = mad_agent->rmpp_version; + rmpp_mad->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_DATA; + ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, + IB_MGMT_RMPP_FLAG_ACTIVE); + } + + send_buf->mad_agent = mad_agent; + atomic_inc(&mad_agent_priv->refcount); + return send_buf; +} +EXPORT_SYMBOL(ib_create_send_mad); + +void ib_free_send_mad(struct ib_mad_send_buf *send_buf) +{ + struct ib_mad_agent_private *mad_agent_priv; + + mad_agent_priv = container_of(send_buf->mad_agent, + struct ib_mad_agent_private, agent); + + dma_unmap_single(send_buf->mad_agent->device->dma_device, + pci_unmap_addr(send_buf, mapping), + send_buf->sge.length, DMA_TO_DEVICE); + kfree(send_buf->mad); + + if (atomic_dec_and_test(&mad_agent_priv->refcount)) + wake_up(&mad_agent_priv->wait); +} +EXPORT_SYMBOL(ib_free_send_mad); + +int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr) { struct ib_mad_qp_info *qp_info; struct ib_send_wr *bad_send_wr; + struct list_head *list; unsigned long flags; int ret; - /* Replace user's WR ID with our own to find WR upon completion */ - qp_info = mad_agent_priv->qp_info; - mad_send_wr->wr_id = mad_send_wr->send_wr.wr_id; + /* Set WR ID to find mad_send_wr upon completion */ + qp_info = mad_send_wr->mad_agent_priv->qp_info; mad_send_wr->send_wr.wr_id = (unsigned long)&mad_send_wr->mad_list; mad_send_wr->mad_list.mad_queue = &qp_info->send_queue; spin_lock_irqsave(&qp_info->send_queue.lock, flags); - if (qp_info->send_queue.count++ < qp_info->send_queue.max_active) { - list_add_tail(&mad_send_wr->mad_list.list, - &qp_info->send_queue.list); - spin_unlock_irqrestore(&qp_info->send_queue.lock, flags); - ret = ib_post_send(mad_agent_priv->agent.qp, + if (qp_info->send_queue.count < qp_info->send_queue.max_active) { + ret = ib_post_send(mad_send_wr->mad_agent_priv->agent.qp, &mad_send_wr->send_wr, &bad_send_wr); - if (ret) { - printk(KERN_ERR PFX "ib_post_send failed: %d\n", ret); - dequeue_mad(&mad_send_wr->mad_list); - } + list = &qp_info->send_queue.list; } else { - list_add_tail(&mad_send_wr->mad_list.list, - &qp_info->overflow_list); - spin_unlock_irqrestore(&qp_info->send_queue.lock, flags); ret = 0; + list = &qp_info->overflow_list; } + + if (!ret) { + qp_info->send_queue.count++; + list_add_tail(&mad_send_wr->mad_list.list, list); + } + spin_unlock_irqrestore(&qp_info->send_queue.lock, flags); return ret; } @@ -860,18 +959,19 @@ int ib_post_send_mad(struct ib_mad_agent *mad_agent, ret = -ENOMEM; goto error2; } + memset(mad_send_wr, 0, sizeof *mad_send_wr); mad_send_wr->send_wr = *send_wr; mad_send_wr->send_wr.sg_list = mad_send_wr->sg_list; memcpy(mad_send_wr->sg_list, send_wr->sg_list, sizeof *send_wr->sg_list * send_wr->num_sge); - mad_send_wr->send_wr.next = NULL; + mad_send_wr->wr_id = send_wr->wr_id; mad_send_wr->tid = send_wr->wr.ud.mad_hdr->tid; - mad_send_wr->agent = mad_agent; + mad_send_wr->mad_agent_priv = mad_agent_priv; /* Timeout will be updated after send completes */ mad_send_wr->timeout = msecs_to_jiffies(send_wr->wr. ud.timeout_ms); - mad_send_wr->retry = 0; + mad_send_wr->retries = mad_send_wr->send_wr.wr.ud.retries; /* One reference for each work request to QP + response */ mad_send_wr->refcount = 1 + (mad_send_wr->timeout > 0); mad_send_wr->status = IB_WC_SUCCESS; @@ -883,8 +983,13 @@ int ib_post_send_mad(struct ib_mad_agent *mad_agent, &mad_agent_priv->send_list); spin_unlock_irqrestore(&mad_agent_priv->lock, flags); - ret = ib_send_mad(mad_agent_priv, mad_send_wr); - if (ret) { + if (mad_agent_priv->agent.rmpp_version) { + ret = ib_send_rmpp_mad(mad_send_wr); + if (ret >= 0 && ret != IB_RMPP_RESULT_CONSUMED) + ret = ib_send_mad(mad_send_wr); + } else + ret = ib_send_mad(mad_send_wr); + if (ret < 0) { /* Fail send request */ spin_lock_irqsave(&mad_agent_priv->lock, flags); list_del(&mad_send_wr->agent_list); @@ -910,41 +1015,28 @@ EXPORT_SYMBOL(ib_post_send_mad); */ void ib_free_recv_mad(struct ib_mad_recv_wc *mad_recv_wc) { - struct ib_mad_recv_buf *entry; + struct ib_mad_recv_buf *mad_recv_buf, *temp_recv_buf; struct ib_mad_private_header *mad_priv_hdr; struct ib_mad_private *priv; + struct list_head free_list; - mad_priv_hdr = container_of(mad_recv_wc, - struct ib_mad_private_header, - recv_wc); - priv = container_of(mad_priv_hdr, struct ib_mad_private, header); + INIT_LIST_HEAD(&free_list); + list_splice_init(&mad_recv_wc->rmpp_list, &free_list); - /* - * Walk receive buffer list associated with this WC - * No need to remove them from list of receive buffers - */ - list_for_each_entry(entry, &mad_recv_wc->recv_buf.list, list) { - /* Free previous receive buffer */ - kmem_cache_free(ib_mad_cache, priv); + list_for_each_entry_safe(mad_recv_buf, temp_recv_buf, + &free_list, list) { + mad_recv_wc = container_of(mad_recv_buf, struct ib_mad_recv_wc, + recv_buf); mad_priv_hdr = container_of(mad_recv_wc, struct ib_mad_private_header, recv_wc); priv = container_of(mad_priv_hdr, struct ib_mad_private, header); + kmem_cache_free(ib_mad_cache, priv); } - - /* Free last buffer */ - kmem_cache_free(ib_mad_cache, priv); } EXPORT_SYMBOL(ib_free_recv_mad); -void ib_coalesce_recv_mad(struct ib_mad_recv_wc *mad_recv_wc, - void *buf) -{ - printk(KERN_ERR PFX "ib_coalesce_recv_mad() not implemented yet\n"); -} -EXPORT_SYMBOL(ib_coalesce_recv_mad); - struct ib_mad_agent *ib_redirect_mad_qp(struct ib_qp *qp, u8 rmpp_version, ib_mad_send_handler send_handler, @@ -1338,42 +1430,15 @@ out: return; } -static int response_mad(struct ib_mad *mad) -{ - /* Trap represses are responses although response bit is reset */ - return ((mad->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS) || - (mad->mad_hdr.method & IB_MGMT_METHOD_RESP)); -} - -static int solicited_mad(struct ib_mad *mad) -{ - /* CM MADs are never solicited */ - if (mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_CM) { - return 0; - } - - /* XXX: Determine whether MAD is using RMPP */ - - /* Not using RMPP */ - /* Is this MAD a response to a previous MAD ? */ - return response_mad(mad); -} - static struct ib_mad_agent_private * find_mad_agent(struct ib_mad_port_private *port_priv, - struct ib_mad *mad, - int solicited) + struct ib_mad *mad) { struct ib_mad_agent_private *mad_agent = NULL; unsigned long flags; spin_lock_irqsave(&port_priv->reg_lock, flags); - - /* - * Whether MAD was solicited determines type of routing to - * MAD client. - */ - if (solicited) { + if (response_mad(mad)) { u32 hi_tid; struct ib_mad_agent_private *entry; @@ -1477,21 +1542,20 @@ out: return valid; } -/* - * Return start of fully reassembled MAD, or NULL, if MAD isn't assembled yet - */ -static struct ib_mad_private * -reassemble_recv(struct ib_mad_agent_private *mad_agent_priv, - struct ib_mad_private *recv) +static int is_data_mad(struct ib_mad_agent_private *mad_agent_priv, + struct ib_mad_hdr *mad_hdr) { - /* Until we have RMPP, all receives are reassembled!... */ - INIT_LIST_HEAD(&recv->header.recv_wc.recv_buf.list); - return recv; + struct ib_rmpp_mad *rmpp_mad; + + rmpp_mad = (struct ib_rmpp_mad *)mad_hdr; + return !mad_agent_priv->agent.rmpp_version || + !(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & + IB_MGMT_RMPP_FLAG_ACTIVE) || + (rmpp_mad->rmpp_hdr.rmpp_type == IB_MGMT_RMPP_TYPE_DATA); } -static struct ib_mad_send_wr_private* -find_send_req(struct ib_mad_agent_private *mad_agent_priv, - u64 tid) +struct ib_mad_send_wr_private* +ib_find_send_mad(struct ib_mad_agent_private *mad_agent_priv, __be64 tid) { struct ib_mad_send_wr_private *mad_send_wr; @@ -1507,7 +1571,9 @@ find_send_req(struct ib_mad_agent_private *mad_agent_priv, */ list_for_each_entry(mad_send_wr, &mad_agent_priv->send_list, agent_list) { - if (mad_send_wr->tid == tid && mad_send_wr->timeout) { + if (is_data_mad(mad_agent_priv, + mad_send_wr->send_wr.wr.ud.mad_hdr) && + mad_send_wr->tid == tid && mad_send_wr->timeout) { /* Verify request has not been canceled */ return (mad_send_wr->status == IB_WC_SUCCESS) ? mad_send_wr : NULL; @@ -1516,43 +1582,55 @@ find_send_req(struct ib_mad_agent_private *mad_agent_priv, return NULL; } +void ib_mark_mad_done(struct ib_mad_send_wr_private *mad_send_wr) +{ + mad_send_wr->timeout = 0; + if (mad_send_wr->refcount == 1) { + list_del(&mad_send_wr->agent_list); + list_add_tail(&mad_send_wr->agent_list, + &mad_send_wr->mad_agent_priv->done_list); + } +} + static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv, - struct ib_mad_private *recv, - int solicited) + struct ib_mad_recv_wc *mad_recv_wc) { struct ib_mad_send_wr_private *mad_send_wr; struct ib_mad_send_wc mad_send_wc; unsigned long flags; - - /* Fully reassemble receive before processing */ - recv = reassemble_recv(mad_agent_priv, recv); - if (!recv) { - if (atomic_dec_and_test(&mad_agent_priv->refcount)) - wake_up(&mad_agent_priv->wait); - return; + __be64 tid; + + INIT_LIST_HEAD(&mad_recv_wc->rmpp_list); + list_add(&mad_recv_wc->recv_buf.list, &mad_recv_wc->rmpp_list); + if (mad_agent_priv->agent.rmpp_version) { + mad_recv_wc = ib_process_rmpp_recv_wc(mad_agent_priv, + mad_recv_wc); + if (!mad_recv_wc) { + if (atomic_dec_and_test(&mad_agent_priv->refcount)) + wake_up(&mad_agent_priv->wait); + return; + } } /* Complete corresponding request */ - if (solicited) { + if (response_mad(mad_recv_wc->recv_buf.mad)) { + tid = mad_recv_wc->recv_buf.mad->mad_hdr.tid; spin_lock_irqsave(&mad_agent_priv->lock, flags); - mad_send_wr = find_send_req(mad_agent_priv, - recv->mad.mad.mad_hdr.tid); + mad_send_wr = ib_find_send_mad(mad_agent_priv, tid); if (!mad_send_wr) { spin_unlock_irqrestore(&mad_agent_priv->lock, flags); - ib_free_recv_mad(&recv->header.recv_wc); + ib_free_recv_mad(mad_recv_wc); if (atomic_dec_and_test(&mad_agent_priv->refcount)) wake_up(&mad_agent_priv->wait); return; } - /* Timeout = 0 means that we won't wait for a response */ - mad_send_wr->timeout = 0; + ib_mark_mad_done(mad_send_wr); spin_unlock_irqrestore(&mad_agent_priv->lock, flags); /* Defined behavior is to complete response before request */ - recv->header.recv_wc.wc->wr_id = mad_send_wr->wr_id; - mad_agent_priv->agent.recv_handler( - &mad_agent_priv->agent, - &recv->header.recv_wc); + mad_recv_wc->wc->wr_id = mad_send_wr->wr_id; + mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent, + mad_recv_wc); atomic_dec(&mad_agent_priv->refcount); mad_send_wc.status = IB_WC_SUCCESS; @@ -1560,9 +1638,8 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv, mad_send_wc.wr_id = mad_send_wr->wr_id; ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc); } else { - mad_agent_priv->agent.recv_handler( - &mad_agent_priv->agent, - &recv->header.recv_wc); + mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent, + mad_recv_wc); if (atomic_dec_and_test(&mad_agent_priv->refcount)) wake_up(&mad_agent_priv->wait); } @@ -1576,7 +1653,6 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv, struct ib_mad_private *recv, *response; struct ib_mad_list_head *mad_list; struct ib_mad_agent_private *mad_agent; - int solicited; response = kmem_cache_alloc(ib_mad_cache, GFP_KERNEL); if (!response) @@ -1662,11 +1738,9 @@ local: } } - /* Determine corresponding MAD agent for incoming receive MAD */ - solicited = solicited_mad(&recv->mad.mad); - mad_agent = find_mad_agent(port_priv, &recv->mad.mad, solicited); + mad_agent = find_mad_agent(port_priv, &recv->mad.mad); if (mad_agent) { - ib_mad_complete_recv(mad_agent, recv, solicited); + ib_mad_complete_recv(mad_agent, &recv->header.recv_wc); /* * recv is freed up in error cases in ib_mad_complete_recv * or via recv_handler in ib_mad_complete_recv() @@ -1710,26 +1784,31 @@ static void adjust_timeout(struct ib_mad_agent_private *mad_agent_priv) } } -static void wait_for_response(struct ib_mad_agent_private *mad_agent_priv, - struct ib_mad_send_wr_private *mad_send_wr ) +static void wait_for_response(struct ib_mad_send_wr_private *mad_send_wr) { + struct ib_mad_agent_private *mad_agent_priv; struct ib_mad_send_wr_private *temp_mad_send_wr; struct list_head *list_item; unsigned long delay; + mad_agent_priv = mad_send_wr->mad_agent_priv; list_del(&mad_send_wr->agent_list); delay = mad_send_wr->timeout; mad_send_wr->timeout += jiffies; - list_for_each_prev(list_item, &mad_agent_priv->wait_list) { - temp_mad_send_wr = list_entry(list_item, - struct ib_mad_send_wr_private, - agent_list); - if (time_after(mad_send_wr->timeout, - temp_mad_send_wr->timeout)) - break; + if (delay) { + list_for_each_prev(list_item, &mad_agent_priv->wait_list) { + temp_mad_send_wr = list_entry(list_item, + struct ib_mad_send_wr_private, + agent_list); + if (time_after(mad_send_wr->timeout, + temp_mad_send_wr->timeout)) + break; + } } + else + list_item = &mad_agent_priv->wait_list; list_add(&mad_send_wr->agent_list, list_item); /* Reschedule a work item if we have a shorter timeout */ @@ -1740,19 +1819,32 @@ static void wait_for_response(struct ib_mad_agent_private *mad_agent_priv, } } +void ib_reset_mad_timeout(struct ib_mad_send_wr_private *mad_send_wr, + int timeout_ms) +{ + mad_send_wr->timeout = msecs_to_jiffies(timeout_ms); + wait_for_response(mad_send_wr); +} + /* * Process a send work completion */ -static void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr, - struct ib_mad_send_wc *mad_send_wc) +void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr, + struct ib_mad_send_wc *mad_send_wc) { struct ib_mad_agent_private *mad_agent_priv; unsigned long flags; + int ret; - mad_agent_priv = container_of(mad_send_wr->agent, - struct ib_mad_agent_private, agent); - + mad_agent_priv = mad_send_wr->mad_agent_priv; spin_lock_irqsave(&mad_agent_priv->lock, flags); + if (mad_agent_priv->agent.rmpp_version) { + ret = ib_process_rmpp_send_wc(mad_send_wr, mad_send_wc); + if (ret == IB_RMPP_RESULT_CONSUMED) + goto done; + } else + ret = IB_RMPP_RESULT_UNHANDLED; + if (mad_send_wc->status != IB_WC_SUCCESS && mad_send_wr->status == IB_WC_SUCCESS) { mad_send_wr->status = mad_send_wc->status; @@ -1762,10 +1854,9 @@ static void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr, if (--mad_send_wr->refcount > 0) { if (mad_send_wr->refcount == 1 && mad_send_wr->timeout && mad_send_wr->status == IB_WC_SUCCESS) { - wait_for_response(mad_agent_priv, mad_send_wr); + wait_for_response(mad_send_wr); } - spin_unlock_irqrestore(&mad_agent_priv->lock, flags); - return; + goto done; } /* Remove send from MAD agent and notify client of completion */ @@ -1775,14 +1866,18 @@ static void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr, if (mad_send_wr->status != IB_WC_SUCCESS ) mad_send_wc->status = mad_send_wr->status; - mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, - mad_send_wc); + if (ret != IB_RMPP_RESULT_INTERNAL) + mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, + mad_send_wc); /* Release reference on agent taken when sending */ if (atomic_dec_and_test(&mad_agent_priv->refcount)) wake_up(&mad_agent_priv->wait); kfree(mad_send_wr); + return; +done: + spin_unlock_irqrestore(&mad_agent_priv->lock, flags); } static void ib_mad_send_done_handler(struct ib_mad_port_private *port_priv, @@ -1961,6 +2056,8 @@ static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv) /* Empty wait list to prevent receives from finding a request */ list_splice_init(&mad_agent_priv->wait_list, &cancel_list); + /* Empty local completion list as well */ + list_splice_init(&mad_agent_priv->local_list, &cancel_list); spin_unlock_irqrestore(&mad_agent_priv->lock, flags); /* Report all cancelled requests */ @@ -1980,8 +2077,7 @@ static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv) } static struct ib_mad_send_wr_private* -find_send_by_wr_id(struct ib_mad_agent_private *mad_agent_priv, - u64 wr_id) +find_send_by_wr_id(struct ib_mad_agent_private *mad_agent_priv, u64 wr_id) { struct ib_mad_send_wr_private *mad_send_wr; @@ -1993,79 +2089,50 @@ find_send_by_wr_id(struct ib_mad_agent_private *mad_agent_priv, list_for_each_entry(mad_send_wr, &mad_agent_priv->send_list, agent_list) { - if (mad_send_wr->wr_id == wr_id) + if (is_data_mad(mad_agent_priv, + mad_send_wr->send_wr.wr.ud.mad_hdr) && + mad_send_wr->wr_id == wr_id) return mad_send_wr; } return NULL; } -void cancel_sends(void *data) -{ - struct ib_mad_agent_private *mad_agent_priv; - struct ib_mad_send_wr_private *mad_send_wr; - struct ib_mad_send_wc mad_send_wc; - unsigned long flags; - - mad_agent_priv = data; - - mad_send_wc.status = IB_WC_WR_FLUSH_ERR; - mad_send_wc.vendor_err = 0; - - spin_lock_irqsave(&mad_agent_priv->lock, flags); - while (!list_empty(&mad_agent_priv->canceled_list)) { - mad_send_wr = list_entry(mad_agent_priv->canceled_list.next, - struct ib_mad_send_wr_private, - agent_list); - - list_del(&mad_send_wr->agent_list); - spin_unlock_irqrestore(&mad_agent_priv->lock, flags); - - mad_send_wc.wr_id = mad_send_wr->wr_id; - mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, - &mad_send_wc); - - kfree(mad_send_wr); - if (atomic_dec_and_test(&mad_agent_priv->refcount)) - wake_up(&mad_agent_priv->wait); - spin_lock_irqsave(&mad_agent_priv->lock, flags); - } - spin_unlock_irqrestore(&mad_agent_priv->lock, flags); -} - -void ib_cancel_mad(struct ib_mad_agent *mad_agent, - u64 wr_id) +int ib_modify_mad(struct ib_mad_agent *mad_agent, u64 wr_id, u32 timeout_ms) { struct ib_mad_agent_private *mad_agent_priv; struct ib_mad_send_wr_private *mad_send_wr; unsigned long flags; + int active; mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private, agent); spin_lock_irqsave(&mad_agent_priv->lock, flags); mad_send_wr = find_send_by_wr_id(mad_agent_priv, wr_id); - if (!mad_send_wr) { + if (!mad_send_wr || mad_send_wr->status != IB_WC_SUCCESS) { spin_unlock_irqrestore(&mad_agent_priv->lock, flags); - goto out; + return -EINVAL; } - if (mad_send_wr->status == IB_WC_SUCCESS) - mad_send_wr->refcount -= (mad_send_wr->timeout > 0); - - if (mad_send_wr->refcount != 0) { + active = (!mad_send_wr->timeout || mad_send_wr->refcount > 1); + if (!timeout_ms) { mad_send_wr->status = IB_WC_WR_FLUSH_ERR; - spin_unlock_irqrestore(&mad_agent_priv->lock, flags); - goto out; + mad_send_wr->refcount -= (mad_send_wr->timeout > 0); } - list_del(&mad_send_wr->agent_list); - list_add_tail(&mad_send_wr->agent_list, &mad_agent_priv->canceled_list); - adjust_timeout(mad_agent_priv); + mad_send_wr->send_wr.wr.ud.timeout_ms = timeout_ms; + if (active) + mad_send_wr->timeout = msecs_to_jiffies(timeout_ms); + else + ib_reset_mad_timeout(mad_send_wr, timeout_ms); + spin_unlock_irqrestore(&mad_agent_priv->lock, flags); + return 0; +} +EXPORT_SYMBOL(ib_modify_mad); - queue_work(mad_agent_priv->qp_info->port_priv->wq, - &mad_agent_priv->canceled_work); -out: - return; +void ib_cancel_mad(struct ib_mad_agent *mad_agent, u64 wr_id) +{ + ib_modify_mad(mad_agent, wr_id, 0); } EXPORT_SYMBOL(ib_cancel_mad); @@ -2075,6 +2142,7 @@ static void local_completions(void *data) struct ib_mad_local_private *local; struct ib_mad_agent_private *recv_mad_agent; unsigned long flags; + int recv = 0; struct ib_wc wc; struct ib_mad_send_wc mad_send_wc; @@ -2090,22 +2158,25 @@ static void local_completions(void *data) recv_mad_agent = local->recv_mad_agent; if (!recv_mad_agent) { printk(KERN_ERR PFX "No receive MAD agent for local completion\n"); - kmem_cache_free(ib_mad_cache, local->mad_priv); goto local_send_completion; } + recv = 1; /* * Defined behavior is to complete response * before request */ - build_smp_wc(local->wr_id, IB_LID_PERMISSIVE, + build_smp_wc(local->wr_id, + be16_to_cpu(IB_LID_PERMISSIVE), 0 /* pkey index */, recv_mad_agent->agent.port_num, &wc); local->mad_priv->header.recv_wc.wc = &wc; local->mad_priv->header.recv_wc.mad_len = sizeof(struct ib_mad); - INIT_LIST_HEAD(&local->mad_priv->header.recv_wc.recv_buf.list); + INIT_LIST_HEAD(&local->mad_priv->header.recv_wc.rmpp_list); + list_add(&local->mad_priv->header.recv_wc.recv_buf.list, + &local->mad_priv->header.recv_wc.rmpp_list); local->mad_priv->header.recv_wc.recv_buf.grh = NULL; local->mad_priv->header.recv_wc.recv_buf.mad = &local->mad_priv->mad.mad; @@ -2136,11 +2207,47 @@ local_send_completion: spin_lock_irqsave(&mad_agent_priv->lock, flags); list_del(&local->completion_list); atomic_dec(&mad_agent_priv->refcount); + if (!recv) + kmem_cache_free(ib_mad_cache, local->mad_priv); kfree(local); } spin_unlock_irqrestore(&mad_agent_priv->lock, flags); } +static int retry_send(struct ib_mad_send_wr_private *mad_send_wr) +{ + int ret; + + if (!mad_send_wr->retries--) + return -ETIMEDOUT; + + mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_wr. + wr.ud.timeout_ms); + + if (mad_send_wr->mad_agent_priv->agent.rmpp_version) { + ret = ib_retry_rmpp(mad_send_wr); + switch (ret) { + case IB_RMPP_RESULT_UNHANDLED: + ret = ib_send_mad(mad_send_wr); + break; + case IB_RMPP_RESULT_CONSUMED: + ret = 0; + break; + default: + ret = -ECOMM; + break; + } + } else + ret = ib_send_mad(mad_send_wr); + + if (!ret) { + mad_send_wr->refcount++; + list_add_tail(&mad_send_wr->agent_list, + &mad_send_wr->mad_agent_priv->send_list); + } + return ret; +} + static void timeout_sends(void *data) { struct ib_mad_agent_private *mad_agent_priv; @@ -2149,8 +2256,6 @@ static void timeout_sends(void *data) unsigned long flags, delay; mad_agent_priv = (struct ib_mad_agent_private *)data; - - mad_send_wc.status = IB_WC_RESP_TIMEOUT_ERR; mad_send_wc.vendor_err = 0; spin_lock_irqsave(&mad_agent_priv->lock, flags); @@ -2170,8 +2275,16 @@ static void timeout_sends(void *data) } list_del(&mad_send_wr->agent_list); + if (mad_send_wr->status == IB_WC_SUCCESS && + !retry_send(mad_send_wr)) + continue; + spin_unlock_irqrestore(&mad_agent_priv->lock, flags); + if (mad_send_wr->status == IB_WC_SUCCESS) + mad_send_wc.status = IB_WC_RESP_TIMEOUT_ERR; + else + mad_send_wc.status = mad_send_wr->status; mad_send_wc.wr_id = mad_send_wr->wr_id; mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, &mad_send_wc); @@ -2183,7 +2296,7 @@ static void timeout_sends(void *data) spin_unlock_irqrestore(&mad_agent_priv->lock, flags); } -static void ib_mad_thread_completion_handler(struct ib_cq *cq) +static void ib_mad_thread_completion_handler(struct ib_cq *cq, void *arg) { struct ib_mad_port_private *port_priv = cq->cq_context; @@ -2447,14 +2560,6 @@ static int ib_mad_port_open(struct ib_device *device, unsigned long flags; char name[sizeof "ib_mad123"]; - /* First, check if port already open at MAD layer */ - port_priv = ib_get_mad_port(device, port_num); - if (port_priv) { - printk(KERN_DEBUG PFX "%s port %d already open\n", - device->name, port_num); - return 0; - } - /* Create new device info */ port_priv = kmalloc(sizeof *port_priv, GFP_KERNEL); if (!port_priv) { @@ -2471,8 +2576,7 @@ static int ib_mad_port_open(struct ib_device *device, cq_size = (IB_MAD_QP_SEND_SIZE + IB_MAD_QP_RECV_SIZE) * 2; port_priv->cq = ib_create_cq(port_priv->device, - (ib_comp_handler) - ib_mad_thread_completion_handler, + ib_mad_thread_completion_handler, NULL, port_priv, cq_size); if (IS_ERR(port_priv->cq)) { printk(KERN_ERR PFX "Couldn't create ib_mad CQ\n"); @@ -2579,7 +2683,7 @@ static int ib_mad_port_close(struct ib_device *device, int port_num) static void ib_mad_init_device(struct ib_device *device) { - int ret, num_ports, cur_port, i, ret2; + int num_ports, cur_port, i; if (device->node_type == IB_NODE_SWITCH) { num_ports = 1; @@ -2589,47 +2693,37 @@ static void ib_mad_init_device(struct ib_device *device) cur_port = 1; } for (i = 0; i < num_ports; i++, cur_port++) { - ret = ib_mad_port_open(device, cur_port); - if (ret) { + if (ib_mad_port_open(device, cur_port)) { printk(KERN_ERR PFX "Couldn't open %s port %d\n", device->name, cur_port); goto error_device_open; } - ret = ib_agent_port_open(device, cur_port); - if (ret) { + if (ib_agent_port_open(device, cur_port)) { printk(KERN_ERR PFX "Couldn't open %s port %d " "for agents\n", device->name, cur_port); goto error_device_open; } } - - goto error_device_query; + return; error_device_open: while (i > 0) { cur_port--; - ret2 = ib_agent_port_close(device, cur_port); - if (ret2) { + if (ib_agent_port_close(device, cur_port)) printk(KERN_ERR PFX "Couldn't close %s port %d " "for agents\n", device->name, cur_port); - } - ret2 = ib_mad_port_close(device, cur_port); - if (ret2) { + if (ib_mad_port_close(device, cur_port)) printk(KERN_ERR PFX "Couldn't close %s port %d\n", device->name, cur_port); - } i--; } - -error_device_query: - return; } static void ib_mad_remove_device(struct ib_device *device) { - int ret = 0, i, num_ports, cur_port, ret2; + int i, num_ports, cur_port; if (device->node_type == IB_NODE_SWITCH) { num_ports = 1; @@ -2639,21 +2733,13 @@ static void ib_mad_remove_device(struct ib_device *device) cur_port = 1; } for (i = 0; i < num_ports; i++, cur_port++) { - ret2 = ib_agent_port_close(device, cur_port); - if (ret2) { + if (ib_agent_port_close(device, cur_port)) printk(KERN_ERR PFX "Couldn't close %s port %d " "for agents\n", device->name, cur_port); - if (!ret) - ret = ret2; - } - ret2 = ib_mad_port_close(device, cur_port); - if (ret2) { + if (ib_mad_port_close(device, cur_port)) printk(KERN_ERR PFX "Couldn't close %s port %d\n", device->name, cur_port); - if (!ret) - ret = ret2; - } } } @@ -2709,3 +2795,4 @@ static void __exit ib_mad_cleanup_module(void) module_init(ib_mad_init_module); module_exit(ib_mad_cleanup_module); + diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h index 008cbcb94b15..f1ba794e0daa 100644 --- a/drivers/infiniband/core/mad_priv.h +++ b/drivers/infiniband/core/mad_priv.h @@ -1,5 +1,7 @@ /* * Copyright (c) 2004, 2005, Voltaire, Inc. All rights reserved. + * Copyright (c) 2005 Intel Corporation. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -29,7 +31,7 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * - * $Id: mad_priv.h 1389 2004-12-27 22:56:47Z roland $ + * $Id: mad_priv.h 2730 2005-06-28 16:43:03Z sean.hefty $ */ #ifndef __IB_MAD_PRIV_H__ @@ -38,8 +40,8 @@ #include <linux/pci.h> #include <linux/kthread.h> #include <linux/workqueue.h> -#include <ib_mad.h> -#include <ib_smi.h> +#include <rdma/ib_mad.h> +#include <rdma/ib_smi.h> #define PFX "ib_mad: " @@ -92,16 +94,15 @@ struct ib_mad_agent_private { spinlock_t lock; struct list_head send_list; struct list_head wait_list; + struct list_head done_list; struct work_struct timed_work; unsigned long timeout; struct list_head local_list; struct work_struct local_work; - struct list_head canceled_list; - struct work_struct canceled_work; + struct list_head rmpp_list; atomic_t refcount; wait_queue_head_t wait; - u8 rmpp_version; }; struct ib_mad_snoop_private { @@ -116,15 +117,24 @@ struct ib_mad_snoop_private { struct ib_mad_send_wr_private { struct ib_mad_list_head mad_list; struct list_head agent_list; - struct ib_mad_agent *agent; + struct ib_mad_agent_private *mad_agent_priv; struct ib_send_wr send_wr; struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG]; u64 wr_id; /* client WR ID */ - u64 tid; + __be64 tid; unsigned long timeout; + int retries; int retry; int refcount; enum ib_wc_status status; + + /* RMPP control */ + int last_ack; + int seg_num; + int newwin; + int total_seg; + int data_offset; + int pad; }; struct ib_mad_local_private { @@ -134,7 +144,7 @@ struct ib_mad_local_private { struct ib_send_wr send_wr; struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG]; u64 wr_id; /* client WR ID */ - u64 tid; + __be64 tid; }; struct ib_mad_mgmt_method_table { @@ -197,4 +207,17 @@ struct ib_mad_port_private { extern kmem_cache_t *ib_mad_cache; +int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr); + +struct ib_mad_send_wr_private * +ib_find_send_mad(struct ib_mad_agent_private *mad_agent_priv, __be64 tid); + +void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr, + struct ib_mad_send_wc *mad_send_wc); + +void ib_mark_mad_done(struct ib_mad_send_wr_private *mad_send_wr); + +void ib_reset_mad_timeout(struct ib_mad_send_wr_private *mad_send_wr, + int timeout_ms); + #endif /* __IB_MAD_PRIV_H__ */ diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c new file mode 100644 index 000000000000..43fd805e0265 --- /dev/null +++ b/drivers/infiniband/core/mad_rmpp.c @@ -0,0 +1,944 @@ +/* + * Copyright (c) 2005 Intel Inc. All rights reserved. + * Copyright (c) 2005 Voltaire, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: mad_rmpp.c 1921 2005-03-02 22:58:44Z sean.hefty $ + */ + +#include <linux/dma-mapping.h> + +#include "mad_priv.h" +#include "mad_rmpp.h" + +enum rmpp_state { + RMPP_STATE_ACTIVE, + RMPP_STATE_TIMEOUT, + RMPP_STATE_COMPLETE +}; + +struct mad_rmpp_recv { + struct ib_mad_agent_private *agent; + struct list_head list; + struct work_struct timeout_work; + struct work_struct cleanup_work; + wait_queue_head_t wait; + enum rmpp_state state; + spinlock_t lock; + atomic_t refcount; + + struct ib_ah *ah; + struct ib_mad_recv_wc *rmpp_wc; + struct ib_mad_recv_buf *cur_seg_buf; + int last_ack; + int seg_num; + int newwin; + + __be64 tid; + u32 src_qp; + u16 slid; + u8 mgmt_class; + u8 class_version; + u8 method; +}; + +static void destroy_rmpp_recv(struct mad_rmpp_recv *rmpp_recv) +{ + atomic_dec(&rmpp_recv->refcount); + wait_event(rmpp_recv->wait, !atomic_read(&rmpp_recv->refcount)); + ib_destroy_ah(rmpp_recv->ah); + kfree(rmpp_recv); +} + +void ib_cancel_rmpp_recvs(struct ib_mad_agent_private *agent) +{ + struct mad_rmpp_recv *rmpp_recv, *temp_rmpp_recv; + unsigned long flags; + + spin_lock_irqsave(&agent->lock, flags); + list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) { + cancel_delayed_work(&rmpp_recv->timeout_work); + cancel_delayed_work(&rmpp_recv->cleanup_work); + } + spin_unlock_irqrestore(&agent->lock, flags); + + flush_workqueue(agent->qp_info->port_priv->wq); + + list_for_each_entry_safe(rmpp_recv, temp_rmpp_recv, + &agent->rmpp_list, list) { + list_del(&rmpp_recv->list); + if (rmpp_recv->state != RMPP_STATE_COMPLETE) + ib_free_recv_mad(rmpp_recv->rmpp_wc); + destroy_rmpp_recv(rmpp_recv); + } +} + +static int data_offset(u8 mgmt_class) +{ + if (mgmt_class == IB_MGMT_CLASS_SUBN_ADM) + return offsetof(struct ib_sa_mad, data); + else if ((mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) && + (mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END)) + return offsetof(struct ib_vendor_mad, data); + else + return offsetof(struct ib_rmpp_mad, data); +} + +static void format_ack(struct ib_rmpp_mad *ack, + struct ib_rmpp_mad *data, + struct mad_rmpp_recv *rmpp_recv) +{ + unsigned long flags; + + memcpy(&ack->mad_hdr, &data->mad_hdr, + data_offset(data->mad_hdr.mgmt_class)); + + ack->mad_hdr.method ^= IB_MGMT_METHOD_RESP; + ack->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_ACK; + ib_set_rmpp_flags(&ack->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE); + + spin_lock_irqsave(&rmpp_recv->lock, flags); + rmpp_recv->last_ack = rmpp_recv->seg_num; + ack->rmpp_hdr.seg_num = cpu_to_be32(rmpp_recv->seg_num); + ack->rmpp_hdr.paylen_newwin = cpu_to_be32(rmpp_recv->newwin); + spin_unlock_irqrestore(&rmpp_recv->lock, flags); +} + +static void ack_recv(struct mad_rmpp_recv *rmpp_recv, + struct ib_mad_recv_wc *recv_wc) +{ + struct ib_mad_send_buf *msg; + struct ib_send_wr *bad_send_wr; + int hdr_len, ret; + + hdr_len = sizeof(struct ib_mad_hdr) + sizeof(struct ib_rmpp_hdr); + msg = ib_create_send_mad(&rmpp_recv->agent->agent, recv_wc->wc->src_qp, + recv_wc->wc->pkey_index, rmpp_recv->ah, 1, + hdr_len, sizeof(struct ib_rmpp_mad) - hdr_len, + GFP_KERNEL); + if (!msg) + return; + + format_ack((struct ib_rmpp_mad *) msg->mad, + (struct ib_rmpp_mad *) recv_wc->recv_buf.mad, rmpp_recv); + ret = ib_post_send_mad(&rmpp_recv->agent->agent, &msg->send_wr, + &bad_send_wr); + if (ret) + ib_free_send_mad(msg); +} + +static int alloc_response_msg(struct ib_mad_agent *agent, + struct ib_mad_recv_wc *recv_wc, + struct ib_mad_send_buf **msg) +{ + struct ib_mad_send_buf *m; + struct ib_ah *ah; + int hdr_len; + + ah = ib_create_ah_from_wc(agent->qp->pd, recv_wc->wc, + recv_wc->recv_buf.grh, agent->port_num); + if (IS_ERR(ah)) + return PTR_ERR(ah); + + hdr_len = sizeof(struct ib_mad_hdr) + sizeof(struct ib_rmpp_hdr); + m = ib_create_send_mad(agent, recv_wc->wc->src_qp, + recv_wc->wc->pkey_index, ah, 1, hdr_len, + sizeof(struct ib_rmpp_mad) - hdr_len, + GFP_KERNEL); + if (IS_ERR(m)) { + ib_destroy_ah(ah); + return PTR_ERR(m); + } + *msg = m; + return 0; +} + +static void free_msg(struct ib_mad_send_buf *msg) +{ + ib_destroy_ah(msg->send_wr.wr.ud.ah); + ib_free_send_mad(msg); +} + +static void nack_recv(struct ib_mad_agent_private *agent, + struct ib_mad_recv_wc *recv_wc, u8 rmpp_status) +{ + struct ib_mad_send_buf *msg; + struct ib_rmpp_mad *rmpp_mad; + struct ib_send_wr *bad_send_wr; + int ret; + + ret = alloc_response_msg(&agent->agent, recv_wc, &msg); + if (ret) + return; + + rmpp_mad = (struct ib_rmpp_mad *) msg->mad; + memcpy(rmpp_mad, recv_wc->recv_buf.mad, + data_offset(recv_wc->recv_buf.mad->mad_hdr.mgmt_class)); + + rmpp_mad->mad_hdr.method ^= IB_MGMT_METHOD_RESP; + rmpp_mad->rmpp_hdr.rmpp_version = IB_MGMT_RMPP_VERSION; + rmpp_mad->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_ABORT; + ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE); + rmpp_mad->rmpp_hdr.rmpp_status = rmpp_status; + rmpp_mad->rmpp_hdr.seg_num = 0; + rmpp_mad->rmpp_hdr.paylen_newwin = 0; + + ret = ib_post_send_mad(&agent->agent, &msg->send_wr, &bad_send_wr); + if (ret) + free_msg(msg); +} + +static void recv_timeout_handler(void *data) +{ + struct mad_rmpp_recv *rmpp_recv = data; + struct ib_mad_recv_wc *rmpp_wc; + unsigned long flags; + + spin_lock_irqsave(&rmpp_recv->agent->lock, flags); + if (rmpp_recv->state != RMPP_STATE_ACTIVE) { + spin_unlock_irqrestore(&rmpp_recv->agent->lock, flags); + return; + } + rmpp_recv->state = RMPP_STATE_TIMEOUT; + list_del(&rmpp_recv->list); + spin_unlock_irqrestore(&rmpp_recv->agent->lock, flags); + + rmpp_wc = rmpp_recv->rmpp_wc; + nack_recv(rmpp_recv->agent, rmpp_wc, IB_MGMT_RMPP_STATUS_T2L); + destroy_rmpp_recv(rmpp_recv); + ib_free_recv_mad(rmpp_wc); +} + +static void recv_cleanup_handler(void *data) +{ + struct mad_rmpp_recv *rmpp_recv = data; + unsigned long flags; + + spin_lock_irqsave(&rmpp_recv->agent->lock, flags); + list_del(&rmpp_recv->list); + spin_unlock_irqrestore(&rmpp_recv->agent->lock, flags); + destroy_rmpp_recv(rmpp_recv); +} + +static struct mad_rmpp_recv * +create_rmpp_recv(struct ib_mad_agent_private *agent, + struct ib_mad_recv_wc *mad_recv_wc) +{ + struct mad_rmpp_recv *rmpp_recv; + struct ib_mad_hdr *mad_hdr; + + rmpp_recv = kmalloc(sizeof *rmpp_recv, GFP_KERNEL); + if (!rmpp_recv) + return NULL; + + rmpp_recv->ah = ib_create_ah_from_wc(agent->agent.qp->pd, + mad_recv_wc->wc, + mad_recv_wc->recv_buf.grh, + agent->agent.port_num); + if (IS_ERR(rmpp_recv->ah)) + goto error; + + rmpp_recv->agent = agent; + init_waitqueue_head(&rmpp_recv->wait); + INIT_WORK(&rmpp_recv->timeout_work, recv_timeout_handler, rmpp_recv); + INIT_WORK(&rmpp_recv->cleanup_work, recv_cleanup_handler, rmpp_recv); + spin_lock_init(&rmpp_recv->lock); + rmpp_recv->state = RMPP_STATE_ACTIVE; + atomic_set(&rmpp_recv->refcount, 1); + + rmpp_recv->rmpp_wc = mad_recv_wc; + rmpp_recv->cur_seg_buf = &mad_recv_wc->recv_buf; + rmpp_recv->newwin = 1; + rmpp_recv->seg_num = 1; + rmpp_recv->last_ack = 0; + + mad_hdr = &mad_recv_wc->recv_buf.mad->mad_hdr; + rmpp_recv->tid = mad_hdr->tid; + rmpp_recv->src_qp = mad_recv_wc->wc->src_qp; + rmpp_recv->slid = mad_recv_wc->wc->slid; + rmpp_recv->mgmt_class = mad_hdr->mgmt_class; + rmpp_recv->class_version = mad_hdr->class_version; + rmpp_recv->method = mad_hdr->method; + return rmpp_recv; + +error: kfree(rmpp_recv); + return NULL; +} + +static inline void deref_rmpp_recv(struct mad_rmpp_recv *rmpp_recv) +{ + if (atomic_dec_and_test(&rmpp_recv->refcount)) + wake_up(&rmpp_recv->wait); +} + +static struct mad_rmpp_recv * +find_rmpp_recv(struct ib_mad_agent_private *agent, + struct ib_mad_recv_wc *mad_recv_wc) +{ + struct mad_rmpp_recv *rmpp_recv; + struct ib_mad_hdr *mad_hdr = &mad_recv_wc->recv_buf.mad->mad_hdr; + + list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) { + if (rmpp_recv->tid == mad_hdr->tid && + rmpp_recv->src_qp == mad_recv_wc->wc->src_qp && + rmpp_recv->slid == mad_recv_wc->wc->slid && + rmpp_recv->mgmt_class == mad_hdr->mgmt_class && + rmpp_recv->class_version == mad_hdr->class_version && + rmpp_recv->method == mad_hdr->method) + return rmpp_recv; + } + return NULL; +} + +static struct mad_rmpp_recv * +acquire_rmpp_recv(struct ib_mad_agent_private *agent, + struct ib_mad_recv_wc *mad_recv_wc) +{ + struct mad_rmpp_recv *rmpp_recv; + unsigned long flags; + + spin_lock_irqsave(&agent->lock, flags); + rmpp_recv = find_rmpp_recv(agent, mad_recv_wc); + if (rmpp_recv) + atomic_inc(&rmpp_recv->refcount); + spin_unlock_irqrestore(&agent->lock, flags); + return rmpp_recv; +} + +static struct mad_rmpp_recv * +insert_rmpp_recv(struct ib_mad_agent_private *agent, + struct mad_rmpp_recv *rmpp_recv) +{ + struct mad_rmpp_recv *cur_rmpp_recv; + + cur_rmpp_recv = find_rmpp_recv(agent, rmpp_recv->rmpp_wc); + if (!cur_rmpp_recv) + list_add_tail(&rmpp_recv->list, &agent->rmpp_list); + + return cur_rmpp_recv; +} + +static inline int get_last_flag(struct ib_mad_recv_buf *seg) +{ + struct ib_rmpp_mad *rmpp_mad; + + rmpp_mad = (struct ib_rmpp_mad *) seg->mad; + return ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & IB_MGMT_RMPP_FLAG_LAST; +} + +static inline int get_seg_num(struct ib_mad_recv_buf *seg) +{ + struct ib_rmpp_mad *rmpp_mad; + + rmpp_mad = (struct ib_rmpp_mad *) seg->mad; + return be32_to_cpu(rmpp_mad->rmpp_hdr.seg_num); +} + +static inline struct ib_mad_recv_buf * get_next_seg(struct list_head *rmpp_list, + struct ib_mad_recv_buf *seg) +{ + if (seg->list.next == rmpp_list) + return NULL; + + return container_of(seg->list.next, struct ib_mad_recv_buf, list); +} + +static inline int window_size(struct ib_mad_agent_private *agent) +{ + return max(agent->qp_info->recv_queue.max_active >> 3, 1); +} + +static struct ib_mad_recv_buf * find_seg_location(struct list_head *rmpp_list, + int seg_num) +{ + struct ib_mad_recv_buf *seg_buf; + int cur_seg_num; + + list_for_each_entry_reverse(seg_buf, rmpp_list, list) { + cur_seg_num = get_seg_num(seg_buf); + if (seg_num > cur_seg_num) + return seg_buf; + if (seg_num == cur_seg_num) + break; + } + return NULL; +} + +static void update_seg_num(struct mad_rmpp_recv *rmpp_recv, + struct ib_mad_recv_buf *new_buf) +{ + struct list_head *rmpp_list = &rmpp_recv->rmpp_wc->rmpp_list; + + while (new_buf && (get_seg_num(new_buf) == rmpp_recv->seg_num + 1)) { + rmpp_recv->cur_seg_buf = new_buf; + rmpp_recv->seg_num++; + new_buf = get_next_seg(rmpp_list, new_buf); + } +} + +static inline int get_mad_len(struct mad_rmpp_recv *rmpp_recv) +{ + struct ib_rmpp_mad *rmpp_mad; + int hdr_size, data_size, pad; + + rmpp_mad = (struct ib_rmpp_mad *)rmpp_recv->cur_seg_buf->mad; + + hdr_size = data_offset(rmpp_mad->mad_hdr.mgmt_class); + data_size = sizeof(struct ib_rmpp_mad) - hdr_size; + pad = data_size - be32_to_cpu(rmpp_mad->rmpp_hdr.paylen_newwin); + if (pad > data_size || pad < 0) + pad = 0; + + return hdr_size + rmpp_recv->seg_num * data_size - pad; +} + +static struct ib_mad_recv_wc * complete_rmpp(struct mad_rmpp_recv *rmpp_recv) +{ + struct ib_mad_recv_wc *rmpp_wc; + + ack_recv(rmpp_recv, rmpp_recv->rmpp_wc); + if (rmpp_recv->seg_num > 1) + cancel_delayed_work(&rmpp_recv->timeout_work); + + rmpp_wc = rmpp_recv->rmpp_wc; + rmpp_wc->mad_len = get_mad_len(rmpp_recv); + /* 10 seconds until we can find the packet lifetime */ + queue_delayed_work(rmpp_recv->agent->qp_info->port_priv->wq, + &rmpp_recv->cleanup_work, msecs_to_jiffies(10000)); + return rmpp_wc; +} + +void ib_coalesce_recv_mad(struct ib_mad_recv_wc *mad_recv_wc, void *buf) +{ + struct ib_mad_recv_buf *seg_buf; + struct ib_rmpp_mad *rmpp_mad; + void *data; + int size, len, offset; + u8 flags; + + len = mad_recv_wc->mad_len; + if (len <= sizeof(struct ib_mad)) { + memcpy(buf, mad_recv_wc->recv_buf.mad, len); + return; + } + + offset = data_offset(mad_recv_wc->recv_buf.mad->mad_hdr.mgmt_class); + + list_for_each_entry(seg_buf, &mad_recv_wc->rmpp_list, list) { + rmpp_mad = (struct ib_rmpp_mad *)seg_buf->mad; + flags = ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr); + + if (flags & IB_MGMT_RMPP_FLAG_FIRST) { + data = rmpp_mad; + size = sizeof(*rmpp_mad); + } else { + data = (void *) rmpp_mad + offset; + if (flags & IB_MGMT_RMPP_FLAG_LAST) + size = len; + else + size = sizeof(*rmpp_mad) - offset; + } + + memcpy(buf, data, size); + len -= size; + buf += size; + } +} +EXPORT_SYMBOL(ib_coalesce_recv_mad); + +static struct ib_mad_recv_wc * +continue_rmpp(struct ib_mad_agent_private *agent, + struct ib_mad_recv_wc *mad_recv_wc) +{ + struct mad_rmpp_recv *rmpp_recv; + struct ib_mad_recv_buf *prev_buf; + struct ib_mad_recv_wc *done_wc; + int seg_num; + unsigned long flags; + + rmpp_recv = acquire_rmpp_recv(agent, mad_recv_wc); + if (!rmpp_recv) + goto drop1; + + seg_num = get_seg_num(&mad_recv_wc->recv_buf); + + spin_lock_irqsave(&rmpp_recv->lock, flags); + if ((rmpp_recv->state == RMPP_STATE_TIMEOUT) || + (seg_num > rmpp_recv->newwin)) + goto drop3; + + if ((seg_num <= rmpp_recv->last_ack) || + (rmpp_recv->state == RMPP_STATE_COMPLETE)) { + spin_unlock_irqrestore(&rmpp_recv->lock, flags); + ack_recv(rmpp_recv, mad_recv_wc); + goto drop2; + } + + prev_buf = find_seg_location(&rmpp_recv->rmpp_wc->rmpp_list, seg_num); + if (!prev_buf) + goto drop3; + + done_wc = NULL; + list_add(&mad_recv_wc->recv_buf.list, &prev_buf->list); + if (rmpp_recv->cur_seg_buf == prev_buf) { + update_seg_num(rmpp_recv, &mad_recv_wc->recv_buf); + if (get_last_flag(rmpp_recv->cur_seg_buf)) { + rmpp_recv->state = RMPP_STATE_COMPLETE; + spin_unlock_irqrestore(&rmpp_recv->lock, flags); + done_wc = complete_rmpp(rmpp_recv); + goto out; + } else if (rmpp_recv->seg_num == rmpp_recv->newwin) { + rmpp_recv->newwin += window_size(agent); + spin_unlock_irqrestore(&rmpp_recv->lock, flags); + ack_recv(rmpp_recv, mad_recv_wc); + goto out; + } + } + spin_unlock_irqrestore(&rmpp_recv->lock, flags); +out: + deref_rmpp_recv(rmpp_recv); + return done_wc; + +drop3: spin_unlock_irqrestore(&rmpp_recv->lock, flags); +drop2: deref_rmpp_recv(rmpp_recv); +drop1: ib_free_recv_mad(mad_recv_wc); + return NULL; +} + +static struct ib_mad_recv_wc * +start_rmpp(struct ib_mad_agent_private *agent, + struct ib_mad_recv_wc *mad_recv_wc) +{ + struct mad_rmpp_recv *rmpp_recv; + unsigned long flags; + + rmpp_recv = create_rmpp_recv(agent, mad_recv_wc); + if (!rmpp_recv) { + ib_free_recv_mad(mad_recv_wc); + return NULL; + } + + spin_lock_irqsave(&agent->lock, flags); + if (insert_rmpp_recv(agent, rmpp_recv)) { + spin_unlock_irqrestore(&agent->lock, flags); + /* duplicate first MAD */ + destroy_rmpp_recv(rmpp_recv); + return continue_rmpp(agent, mad_recv_wc); + } + atomic_inc(&rmpp_recv->refcount); + + if (get_last_flag(&mad_recv_wc->recv_buf)) { + rmpp_recv->state = RMPP_STATE_COMPLETE; + spin_unlock_irqrestore(&agent->lock, flags); + complete_rmpp(rmpp_recv); + } else { + spin_unlock_irqrestore(&agent->lock, flags); + /* 40 seconds until we can find the packet lifetimes */ + queue_delayed_work(agent->qp_info->port_priv->wq, + &rmpp_recv->timeout_work, + msecs_to_jiffies(40000)); + rmpp_recv->newwin += window_size(agent); + ack_recv(rmpp_recv, mad_recv_wc); + mad_recv_wc = NULL; + } + deref_rmpp_recv(rmpp_recv); + return mad_recv_wc; +} + +static inline u64 get_seg_addr(struct ib_mad_send_wr_private *mad_send_wr) +{ + return mad_send_wr->sg_list[0].addr + mad_send_wr->data_offset + + (sizeof(struct ib_rmpp_mad) - mad_send_wr->data_offset) * + (mad_send_wr->seg_num - 1); +} + +static int send_next_seg(struct ib_mad_send_wr_private *mad_send_wr) +{ + struct ib_rmpp_mad *rmpp_mad; + int timeout; + + rmpp_mad = (struct ib_rmpp_mad *)mad_send_wr->send_wr.wr.ud.mad_hdr; + ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE); + rmpp_mad->rmpp_hdr.seg_num = cpu_to_be32(mad_send_wr->seg_num); + + if (mad_send_wr->seg_num == 1) { + rmpp_mad->rmpp_hdr.rmpp_rtime_flags |= IB_MGMT_RMPP_FLAG_FIRST; + rmpp_mad->rmpp_hdr.paylen_newwin = + cpu_to_be32(mad_send_wr->total_seg * + (sizeof(struct ib_rmpp_mad) - + offsetof(struct ib_rmpp_mad, data))); + mad_send_wr->sg_list[0].length = sizeof(struct ib_rmpp_mad); + } else { + mad_send_wr->send_wr.num_sge = 2; + mad_send_wr->sg_list[0].length = mad_send_wr->data_offset; + mad_send_wr->sg_list[1].addr = get_seg_addr(mad_send_wr); + mad_send_wr->sg_list[1].length = sizeof(struct ib_rmpp_mad) - + mad_send_wr->data_offset; + mad_send_wr->sg_list[1].lkey = mad_send_wr->sg_list[0].lkey; + } + + if (mad_send_wr->seg_num == mad_send_wr->total_seg) { + rmpp_mad->rmpp_hdr.rmpp_rtime_flags |= IB_MGMT_RMPP_FLAG_LAST; + rmpp_mad->rmpp_hdr.paylen_newwin = + cpu_to_be32(sizeof(struct ib_rmpp_mad) - + offsetof(struct ib_rmpp_mad, data) - + mad_send_wr->pad); + } + + /* 2 seconds for an ACK until we can find the packet lifetime */ + timeout = mad_send_wr->send_wr.wr.ud.timeout_ms; + if (!timeout || timeout > 2000) + mad_send_wr->timeout = msecs_to_jiffies(2000); + mad_send_wr->seg_num++; + return ib_send_mad(mad_send_wr); +} + +static void abort_send(struct ib_mad_agent_private *agent, __be64 tid, + u8 rmpp_status) +{ + struct ib_mad_send_wr_private *mad_send_wr; + struct ib_mad_send_wc wc; + unsigned long flags; + + spin_lock_irqsave(&agent->lock, flags); + mad_send_wr = ib_find_send_mad(agent, tid); + if (!mad_send_wr) + goto out; /* Unmatched send */ + + if ((mad_send_wr->last_ack == mad_send_wr->total_seg) || + (!mad_send_wr->timeout) || (mad_send_wr->status != IB_WC_SUCCESS)) + goto out; /* Send is already done */ + + ib_mark_mad_done(mad_send_wr); + spin_unlock_irqrestore(&agent->lock, flags); + + wc.status = IB_WC_REM_ABORT_ERR; + wc.vendor_err = rmpp_status; + wc.wr_id = mad_send_wr->wr_id; + ib_mad_complete_send_wr(mad_send_wr, &wc); + return; +out: + spin_unlock_irqrestore(&agent->lock, flags); +} + +static void process_rmpp_ack(struct ib_mad_agent_private *agent, + struct ib_mad_recv_wc *mad_recv_wc) +{ + struct ib_mad_send_wr_private *mad_send_wr; + struct ib_rmpp_mad *rmpp_mad; + unsigned long flags; + int seg_num, newwin, ret; + + rmpp_mad = (struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad; + if (rmpp_mad->rmpp_hdr.rmpp_status) { + abort_send(agent, rmpp_mad->mad_hdr.tid, + IB_MGMT_RMPP_STATUS_BAD_STATUS); + nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS); + return; + } + + seg_num = be32_to_cpu(rmpp_mad->rmpp_hdr.seg_num); + newwin = be32_to_cpu(rmpp_mad->rmpp_hdr.paylen_newwin); + if (newwin < seg_num) { + abort_send(agent, rmpp_mad->mad_hdr.tid, + IB_MGMT_RMPP_STATUS_W2S); + nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_W2S); + return; + } + + spin_lock_irqsave(&agent->lock, flags); + mad_send_wr = ib_find_send_mad(agent, rmpp_mad->mad_hdr.tid); + if (!mad_send_wr) + goto out; /* Unmatched ACK */ + + if ((mad_send_wr->last_ack == mad_send_wr->total_seg) || + (!mad_send_wr->timeout) || (mad_send_wr->status != IB_WC_SUCCESS)) + goto out; /* Send is already done */ + + if (seg_num > mad_send_wr->total_seg || seg_num > mad_send_wr->newwin) { + spin_unlock_irqrestore(&agent->lock, flags); + abort_send(agent, rmpp_mad->mad_hdr.tid, + IB_MGMT_RMPP_STATUS_S2B); + nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_S2B); + return; + } + + if (newwin < mad_send_wr->newwin || seg_num < mad_send_wr->last_ack) + goto out; /* Old ACK */ + + if (seg_num > mad_send_wr->last_ack) { + mad_send_wr->last_ack = seg_num; + mad_send_wr->retries = mad_send_wr->send_wr.wr.ud.retries; + } + mad_send_wr->newwin = newwin; + if (mad_send_wr->last_ack == mad_send_wr->total_seg) { + /* If no response is expected, the ACK completes the send */ + if (!mad_send_wr->send_wr.wr.ud.timeout_ms) { + struct ib_mad_send_wc wc; + + ib_mark_mad_done(mad_send_wr); + spin_unlock_irqrestore(&agent->lock, flags); + + wc.status = IB_WC_SUCCESS; + wc.vendor_err = 0; + wc.wr_id = mad_send_wr->wr_id; + ib_mad_complete_send_wr(mad_send_wr, &wc); + return; + } + if (mad_send_wr->refcount == 1) + ib_reset_mad_timeout(mad_send_wr, mad_send_wr-> + send_wr.wr.ud.timeout_ms); + } else if (mad_send_wr->refcount == 1 && + mad_send_wr->seg_num < mad_send_wr->newwin && + mad_send_wr->seg_num <= mad_send_wr->total_seg) { + /* Send failure will just result in a timeout/retry */ + ret = send_next_seg(mad_send_wr); + if (ret) + goto out; + + mad_send_wr->refcount++; + list_del(&mad_send_wr->agent_list); + list_add_tail(&mad_send_wr->agent_list, + &mad_send_wr->mad_agent_priv->send_list); + } +out: + spin_unlock_irqrestore(&agent->lock, flags); +} + +static struct ib_mad_recv_wc * +process_rmpp_data(struct ib_mad_agent_private *agent, + struct ib_mad_recv_wc *mad_recv_wc) +{ + struct ib_rmpp_hdr *rmpp_hdr; + u8 rmpp_status; + + rmpp_hdr = &((struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad)->rmpp_hdr; + + if (rmpp_hdr->rmpp_status) { + rmpp_status = IB_MGMT_RMPP_STATUS_BAD_STATUS; + goto bad; + } + + if (rmpp_hdr->seg_num == __constant_htonl(1)) { + if (!(ib_get_rmpp_flags(rmpp_hdr) & IB_MGMT_RMPP_FLAG_FIRST)) { + rmpp_status = IB_MGMT_RMPP_STATUS_BAD_SEG; + goto bad; + } + return start_rmpp(agent, mad_recv_wc); + } else { + if (ib_get_rmpp_flags(rmpp_hdr) & IB_MGMT_RMPP_FLAG_FIRST) { + rmpp_status = IB_MGMT_RMPP_STATUS_BAD_SEG; + goto bad; + } + return continue_rmpp(agent, mad_recv_wc); + } +bad: + nack_recv(agent, mad_recv_wc, rmpp_status); + ib_free_recv_mad(mad_recv_wc); + return NULL; +} + +static void process_rmpp_stop(struct ib_mad_agent_private *agent, + struct ib_mad_recv_wc *mad_recv_wc) +{ + struct ib_rmpp_mad *rmpp_mad; + + rmpp_mad = (struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad; + + if (rmpp_mad->rmpp_hdr.rmpp_status != IB_MGMT_RMPP_STATUS_RESX) { + abort_send(agent, rmpp_mad->mad_hdr.tid, + IB_MGMT_RMPP_STATUS_BAD_STATUS); + nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS); + } else + abort_send(agent, rmpp_mad->mad_hdr.tid, + rmpp_mad->rmpp_hdr.rmpp_status); +} + +static void process_rmpp_abort(struct ib_mad_agent_private *agent, + struct ib_mad_recv_wc *mad_recv_wc) +{ + struct ib_rmpp_mad *rmpp_mad; + + rmpp_mad = (struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad; + + if (rmpp_mad->rmpp_hdr.rmpp_status < IB_MGMT_RMPP_STATUS_ABORT_MIN || + rmpp_mad->rmpp_hdr.rmpp_status > IB_MGMT_RMPP_STATUS_ABORT_MAX) { + abort_send(agent, rmpp_mad->mad_hdr.tid, + IB_MGMT_RMPP_STATUS_BAD_STATUS); + nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS); + } else + abort_send(agent, rmpp_mad->mad_hdr.tid, + rmpp_mad->rmpp_hdr.rmpp_status); +} + +struct ib_mad_recv_wc * +ib_process_rmpp_recv_wc(struct ib_mad_agent_private *agent, + struct ib_mad_recv_wc *mad_recv_wc) +{ + struct ib_rmpp_mad *rmpp_mad; + + rmpp_mad = (struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad; + if (!(rmpp_mad->rmpp_hdr.rmpp_rtime_flags & IB_MGMT_RMPP_FLAG_ACTIVE)) + return mad_recv_wc; + + if (rmpp_mad->rmpp_hdr.rmpp_version != IB_MGMT_RMPP_VERSION) { + abort_send(agent, rmpp_mad->mad_hdr.tid, + IB_MGMT_RMPP_STATUS_UNV); + nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_UNV); + goto out; + } + + switch (rmpp_mad->rmpp_hdr.rmpp_type) { + case IB_MGMT_RMPP_TYPE_DATA: + return process_rmpp_data(agent, mad_recv_wc); + case IB_MGMT_RMPP_TYPE_ACK: + process_rmpp_ack(agent, mad_recv_wc); + break; + case IB_MGMT_RMPP_TYPE_STOP: + process_rmpp_stop(agent, mad_recv_wc); + break; + case IB_MGMT_RMPP_TYPE_ABORT: + process_rmpp_abort(agent, mad_recv_wc); + break; + default: + abort_send(agent, rmpp_mad->mad_hdr.tid, + IB_MGMT_RMPP_STATUS_BADT); + nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BADT); + break; + } +out: + ib_free_recv_mad(mad_recv_wc); + return NULL; +} + +int ib_send_rmpp_mad(struct ib_mad_send_wr_private *mad_send_wr) +{ + struct ib_rmpp_mad *rmpp_mad; + int i, total_len, ret; + + rmpp_mad = (struct ib_rmpp_mad *)mad_send_wr->send_wr.wr.ud.mad_hdr; + if (!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & + IB_MGMT_RMPP_FLAG_ACTIVE)) + return IB_RMPP_RESULT_UNHANDLED; + + if (rmpp_mad->rmpp_hdr.rmpp_type != IB_MGMT_RMPP_TYPE_DATA) + return IB_RMPP_RESULT_INTERNAL; + + if (mad_send_wr->send_wr.num_sge > 1) + return -EINVAL; /* TODO: support num_sge > 1 */ + + mad_send_wr->seg_num = 1; + mad_send_wr->newwin = 1; + mad_send_wr->data_offset = data_offset(rmpp_mad->mad_hdr.mgmt_class); + + total_len = 0; + for (i = 0; i < mad_send_wr->send_wr.num_sge; i++) + total_len += mad_send_wr->send_wr.sg_list[i].length; + + mad_send_wr->total_seg = (total_len - mad_send_wr->data_offset) / + (sizeof(struct ib_rmpp_mad) - mad_send_wr->data_offset); + mad_send_wr->pad = total_len - offsetof(struct ib_rmpp_mad, data) - + be32_to_cpu(rmpp_mad->rmpp_hdr.paylen_newwin); + + /* We need to wait for the final ACK even if there isn't a response */ + mad_send_wr->refcount += (mad_send_wr->timeout == 0); + ret = send_next_seg(mad_send_wr); + if (!ret) + return IB_RMPP_RESULT_CONSUMED; + return ret; +} + +int ib_process_rmpp_send_wc(struct ib_mad_send_wr_private *mad_send_wr, + struct ib_mad_send_wc *mad_send_wc) +{ + struct ib_rmpp_mad *rmpp_mad; + struct ib_mad_send_buf *msg; + int ret; + + rmpp_mad = (struct ib_rmpp_mad *)mad_send_wr->send_wr.wr.ud.mad_hdr; + if (!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & + IB_MGMT_RMPP_FLAG_ACTIVE)) + return IB_RMPP_RESULT_UNHANDLED; /* RMPP not active */ + + if (rmpp_mad->rmpp_hdr.rmpp_type != IB_MGMT_RMPP_TYPE_DATA) { + msg = (struct ib_mad_send_buf *) (unsigned long) + mad_send_wc->wr_id; + if (rmpp_mad->rmpp_hdr.rmpp_type == IB_MGMT_RMPP_TYPE_ACK) + ib_free_send_mad(msg); + else + free_msg(msg); + return IB_RMPP_RESULT_INTERNAL; /* ACK, STOP, or ABORT */ + } + + if (mad_send_wc->status != IB_WC_SUCCESS || + mad_send_wr->status != IB_WC_SUCCESS) + return IB_RMPP_RESULT_PROCESSED; /* Canceled or send error */ + + if (!mad_send_wr->timeout) + return IB_RMPP_RESULT_PROCESSED; /* Response received */ + + if (mad_send_wr->last_ack == mad_send_wr->total_seg) { + mad_send_wr->timeout = + msecs_to_jiffies(mad_send_wr->send_wr.wr.ud.timeout_ms); + return IB_RMPP_RESULT_PROCESSED; /* Send done */ + } + + if (mad_send_wr->seg_num > mad_send_wr->newwin || + mad_send_wr->seg_num > mad_send_wr->total_seg) + return IB_RMPP_RESULT_PROCESSED; /* Wait for ACK */ + + ret = send_next_seg(mad_send_wr); + if (ret) { + mad_send_wc->status = IB_WC_GENERAL_ERR; + return IB_RMPP_RESULT_PROCESSED; + } + return IB_RMPP_RESULT_CONSUMED; +} + +int ib_retry_rmpp(struct ib_mad_send_wr_private *mad_send_wr) +{ + struct ib_rmpp_mad *rmpp_mad; + int ret; + + rmpp_mad = (struct ib_rmpp_mad *)mad_send_wr->send_wr.wr.ud.mad_hdr; + if (!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & + IB_MGMT_RMPP_FLAG_ACTIVE)) + return IB_RMPP_RESULT_UNHANDLED; /* RMPP not active */ + + if (mad_send_wr->last_ack == mad_send_wr->total_seg) + return IB_RMPP_RESULT_PROCESSED; + + mad_send_wr->seg_num = mad_send_wr->last_ack + 1; + ret = send_next_seg(mad_send_wr); + if (ret) + return IB_RMPP_RESULT_PROCESSED; + + return IB_RMPP_RESULT_CONSUMED; +} diff --git a/drivers/infiniband/core/mad_rmpp.h b/drivers/infiniband/core/mad_rmpp.h new file mode 100644 index 000000000000..c4924dfb8e75 --- /dev/null +++ b/drivers/infiniband/core/mad_rmpp.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2005 Intel Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: mad_rmpp.h 1921 2005-02-25 22:58:44Z sean.hefty $ + */ + +#ifndef __MAD_RMPP_H__ +#define __MAD_RMPP_H__ + +enum { + IB_RMPP_RESULT_PROCESSED, + IB_RMPP_RESULT_CONSUMED, + IB_RMPP_RESULT_INTERNAL, + IB_RMPP_RESULT_UNHANDLED +}; + +int ib_send_rmpp_mad(struct ib_mad_send_wr_private *mad_send_wr); + +struct ib_mad_recv_wc * +ib_process_rmpp_recv_wc(struct ib_mad_agent_private *agent, + struct ib_mad_recv_wc *mad_recv_wc); + +int ib_process_rmpp_send_wc(struct ib_mad_send_wr_private *mad_send_wr, + struct ib_mad_send_wc *mad_send_wc); + +void ib_cancel_rmpp_recvs(struct ib_mad_agent_private *agent); + +int ib_retry_rmpp(struct ib_mad_send_wr_private *mad_send_wr); + +#endif /* __MAD_RMPP_H__ */ diff --git a/drivers/infiniband/core/packer.c b/drivers/infiniband/core/packer.c index 5f15feffeae2..35df5010e723 100644 --- a/drivers/infiniband/core/packer.c +++ b/drivers/infiniband/core/packer.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2004 Topspin Corporation. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -32,7 +33,7 @@ * $Id: packer.c 1349 2004-12-16 21:09:43Z roland $ */ -#include <ib_pack.h> +#include <rdma/ib_pack.h> static u64 value_read(int offset, int size, void *structure) { @@ -96,7 +97,7 @@ void ib_pack(const struct ib_field *desc, else val = 0; - mask = cpu_to_be64(((1ull << desc[i].size_bits) - 1) << shift); + mask = cpu_to_be64((~0ull >> (64 - desc[i].size_bits)) << shift); addr = (__be64 *) ((__be32 *) buf + desc[i].offset_words); *addr = (*addr & ~mask) | (cpu_to_be64(val) & mask); } else { @@ -176,7 +177,7 @@ void ib_unpack(const struct ib_field *desc, __be64 *addr; shift = 64 - desc[i].offset_bits - desc[i].size_bits; - mask = ((1ull << desc[i].size_bits) - 1) << shift; + mask = (~0ull >> (64 - desc[i].size_bits)) << shift; addr = (__be64 *) buf + desc[i].offset_words; val = (be64_to_cpup(addr) & mask) >> shift; value_write(desc[i].struct_offset_bytes, diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 276e1a53010d..126ac80db7b8 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Voltaire, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -29,7 +30,7 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * - * $Id: sa_query.c 1389 2004-12-27 22:56:47Z roland $ + * $Id: sa_query.c 2811 2005-07-06 18:11:43Z halr $ */ #include <linux/module.h> @@ -43,33 +44,13 @@ #include <linux/kref.h> #include <linux/idr.h> -#include <ib_pack.h> -#include <ib_sa.h> +#include <rdma/ib_pack.h> +#include <rdma/ib_sa.h> MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("InfiniBand subnet administration query support"); MODULE_LICENSE("Dual BSD/GPL"); -/* - * These two structures must be packed because they have 64-bit fields - * that are only 32-bit aligned. 64-bit architectures will lay them - * out wrong otherwise. (And unfortunately they are sent on the wire - * so we can't change the layout) - */ -struct ib_sa_hdr { - u64 sm_key; - u16 attr_offset; - u16 reserved; - ib_sa_comp_mask comp_mask; -} __attribute__ ((packed)); - -struct ib_sa_mad { - struct ib_mad_hdr mad_hdr; - struct ib_rmpp_hdr rmpp_hdr; - struct ib_sa_hdr sa_hdr; - u8 data[200]; -} __attribute__ ((packed)); - struct ib_sa_sm_ah { struct ib_ah *ah; struct kref ref; @@ -77,7 +58,6 @@ struct ib_sa_sm_ah { struct ib_sa_port { struct ib_mad_agent *agent; - struct ib_mr *mr; struct ib_sa_sm_ah *sm_ah; struct work_struct update_task; spinlock_t ah_lock; @@ -100,6 +80,12 @@ struct ib_sa_query { int id; }; +struct ib_sa_service_query { + void (*callback)(int, struct ib_sa_service_rec *, void *); + void *context; + struct ib_sa_query sa_query; +}; + struct ib_sa_path_query { void (*callback)(int, struct ib_sa_path_rec *, void *); void *context; @@ -341,6 +327,54 @@ static const struct ib_field mcmember_rec_table[] = { .size_bits = 23 }, }; +#define SERVICE_REC_FIELD(field) \ + .struct_offset_bytes = offsetof(struct ib_sa_service_rec, field), \ + .struct_size_bytes = sizeof ((struct ib_sa_service_rec *) 0)->field, \ + .field_name = "sa_service_rec:" #field + +static const struct ib_field service_rec_table[] = { + { SERVICE_REC_FIELD(id), + .offset_words = 0, + .offset_bits = 0, + .size_bits = 64 }, + { SERVICE_REC_FIELD(gid), + .offset_words = 2, + .offset_bits = 0, + .size_bits = 128 }, + { SERVICE_REC_FIELD(pkey), + .offset_words = 6, + .offset_bits = 0, + .size_bits = 16 }, + { SERVICE_REC_FIELD(lease), + .offset_words = 7, + .offset_bits = 0, + .size_bits = 32 }, + { SERVICE_REC_FIELD(key), + .offset_words = 8, + .offset_bits = 0, + .size_bits = 128 }, + { SERVICE_REC_FIELD(name), + .offset_words = 12, + .offset_bits = 0, + .size_bits = 64*8 }, + { SERVICE_REC_FIELD(data8), + .offset_words = 28, + .offset_bits = 0, + .size_bits = 16*8 }, + { SERVICE_REC_FIELD(data16), + .offset_words = 32, + .offset_bits = 0, + .size_bits = 8*16 }, + { SERVICE_REC_FIELD(data32), + .offset_words = 36, + .offset_bits = 0, + .size_bits = 4*32 }, + { SERVICE_REC_FIELD(data64), + .offset_words = 40, + .offset_bits = 0, + .size_bits = 2*64 }, +}; + static void free_sm_ah(struct kref *kref) { struct ib_sa_sm_ah *sm_ah = container_of(kref, struct ib_sa_sm_ah, ref); @@ -463,7 +497,7 @@ static int send_mad(struct ib_sa_query *query, int timeout_ms) .mad_hdr = &query->mad->mad_hdr, .remote_qpn = 1, .remote_qkey = IB_QP1_QKEY, - .timeout_ms = timeout_ms + .timeout_ms = timeout_ms, } } }; @@ -492,7 +526,7 @@ retry: sizeof (struct ib_sa_mad), DMA_TO_DEVICE); gather_list.length = sizeof (struct ib_sa_mad); - gather_list.lkey = port->mr->lkey; + gather_list.lkey = port->agent->mr->lkey; pci_unmap_addr_set(query, mapping, gather_list.addr); ret = ib_post_send_mad(port->agent, &wr, &bad_wr); @@ -507,7 +541,13 @@ retry: spin_unlock_irqrestore(&idr_lock, flags); } - return ret; + /* + * It's not safe to dereference query any more, because the + * send may already have completed and freed the query in + * another context. So use wr.wr_id, which has a copy of the + * query's id. + */ + return ret ? ret : wr.wr_id; } static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query, @@ -560,7 +600,7 @@ static void ib_sa_path_rec_release(struct ib_sa_query *sa_query) int ib_sa_path_rec_get(struct ib_device *device, u8 port_num, struct ib_sa_path_rec *rec, ib_sa_comp_mask comp_mask, - int timeout_ms, int gfp_mask, + int timeout_ms, unsigned int __nocast gfp_mask, void (*callback)(int status, struct ib_sa_path_rec *resp, void *context), @@ -598,17 +638,126 @@ int ib_sa_path_rec_get(struct ib_device *device, u8 port_num, rec, query->sa_query.mad->data); *sa_query = &query->sa_query; + ret = send_mad(&query->sa_query, timeout_ms); - if (ret) { + if (ret < 0) { *sa_query = NULL; kfree(query->sa_query.mad); kfree(query); } - return ret ? ret : query->sa_query.id; + return ret; } EXPORT_SYMBOL(ib_sa_path_rec_get); +static void ib_sa_service_rec_callback(struct ib_sa_query *sa_query, + int status, + struct ib_sa_mad *mad) +{ + struct ib_sa_service_query *query = + container_of(sa_query, struct ib_sa_service_query, sa_query); + + if (mad) { + struct ib_sa_service_rec rec; + + ib_unpack(service_rec_table, ARRAY_SIZE(service_rec_table), + mad->data, &rec); + query->callback(status, &rec, query->context); + } else + query->callback(status, NULL, query->context); +} + +static void ib_sa_service_rec_release(struct ib_sa_query *sa_query) +{ + kfree(sa_query->mad); + kfree(container_of(sa_query, struct ib_sa_service_query, sa_query)); +} + +/** + * ib_sa_service_rec_query - Start Service Record operation + * @device:device to send request on + * @port_num: port number to send request on + * @method:SA method - should be get, set, or delete + * @rec:Service Record to send in request + * @comp_mask:component mask to send in request + * @timeout_ms:time to wait for response + * @gfp_mask:GFP mask to use for internal allocations + * @callback:function called when request completes, times out or is + * canceled + * @context:opaque user context passed to callback + * @sa_query:request context, used to cancel request + * + * Send a Service Record set/get/delete to the SA to register, + * unregister or query a service record. + * The callback function will be called when the request completes (or + * fails); status is 0 for a successful response, -EINTR if the query + * is canceled, -ETIMEDOUT is the query timed out, or -EIO if an error + * occurred sending the query. The resp parameter of the callback is + * only valid if status is 0. + * + * If the return value of ib_sa_service_rec_query() is negative, it is an + * error code. Otherwise it is a request ID that can be used to cancel + * the query. + */ +int ib_sa_service_rec_query(struct ib_device *device, u8 port_num, u8 method, + struct ib_sa_service_rec *rec, + ib_sa_comp_mask comp_mask, + int timeout_ms, unsigned int __nocast gfp_mask, + void (*callback)(int status, + struct ib_sa_service_rec *resp, + void *context), + void *context, + struct ib_sa_query **sa_query) +{ + struct ib_sa_service_query *query; + struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client); + struct ib_sa_port *port = &sa_dev->port[port_num - sa_dev->start_port]; + struct ib_mad_agent *agent = port->agent; + int ret; + + if (method != IB_MGMT_METHOD_GET && + method != IB_MGMT_METHOD_SET && + method != IB_SA_METHOD_DELETE) + return -EINVAL; + + query = kmalloc(sizeof *query, gfp_mask); + if (!query) + return -ENOMEM; + query->sa_query.mad = kmalloc(sizeof *query->sa_query.mad, gfp_mask); + if (!query->sa_query.mad) { + kfree(query); + return -ENOMEM; + } + + query->callback = callback; + query->context = context; + + init_mad(query->sa_query.mad, agent); + + query->sa_query.callback = callback ? ib_sa_service_rec_callback : NULL; + query->sa_query.release = ib_sa_service_rec_release; + query->sa_query.port = port; + query->sa_query.mad->mad_hdr.method = method; + query->sa_query.mad->mad_hdr.attr_id = + cpu_to_be16(IB_SA_ATTR_SERVICE_REC); + query->sa_query.mad->sa_hdr.comp_mask = comp_mask; + + ib_pack(service_rec_table, ARRAY_SIZE(service_rec_table), + rec, query->sa_query.mad->data); + + *sa_query = &query->sa_query; + + ret = send_mad(&query->sa_query, timeout_ms); + if (ret < 0) { + *sa_query = NULL; + kfree(query->sa_query.mad); + kfree(query); + } + + return ret; +} +EXPORT_SYMBOL(ib_sa_service_rec_query); + static void ib_sa_mcmember_rec_callback(struct ib_sa_query *sa_query, int status, struct ib_sa_mad *mad) @@ -636,7 +785,7 @@ int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num, u8 method, struct ib_sa_mcmember_rec *rec, ib_sa_comp_mask comp_mask, - int timeout_ms, int gfp_mask, + int timeout_ms, unsigned int __nocast gfp_mask, void (*callback)(int status, struct ib_sa_mcmember_rec *resp, void *context), @@ -674,14 +823,15 @@ int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num, rec, query->sa_query.mad->data); *sa_query = &query->sa_query; + ret = send_mad(&query->sa_query, timeout_ms); - if (ret) { + if (ret < 0) { *sa_query = NULL; kfree(query->sa_query.mad); kfree(query); } - return ret ? ret : query->sa_query.id; + return ret; } EXPORT_SYMBOL(ib_sa_mcmember_rec_query); @@ -772,7 +922,6 @@ static void ib_sa_add_one(struct ib_device *device) sa_dev->end_port = e; for (i = 0; i <= e - s; ++i) { - sa_dev->port[i].mr = NULL; sa_dev->port[i].sm_ah = NULL; sa_dev->port[i].port_num = i + s; spin_lock_init(&sa_dev->port[i].ah_lock); @@ -784,13 +933,6 @@ static void ib_sa_add_one(struct ib_device *device) if (IS_ERR(sa_dev->port[i].agent)) goto err; - sa_dev->port[i].mr = ib_get_dma_mr(sa_dev->port[i].agent->qp->pd, - IB_ACCESS_LOCAL_WRITE); - if (IS_ERR(sa_dev->port[i].mr)) { - ib_unregister_mad_agent(sa_dev->port[i].agent); - goto err; - } - INIT_WORK(&sa_dev->port[i].update_task, update_sm_ah, &sa_dev->port[i]); } @@ -814,10 +956,8 @@ static void ib_sa_add_one(struct ib_device *device) return; err: - while (--i >= 0) { - ib_dereg_mr(sa_dev->port[i].mr); + while (--i >= 0) ib_unregister_mad_agent(sa_dev->port[i].agent); - } kfree(sa_dev); diff --git a/drivers/infiniband/core/smi.c b/drivers/infiniband/core/smi.c index b4b284324a33..35852e794e26 100644 --- a/drivers/infiniband/core/smi.c +++ b/drivers/infiniband/core/smi.c @@ -1,9 +1,10 @@ /* - * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved. - * Copyright (c) 2004 Infinicon Corporation. All rights reserved. - * Copyright (c) 2004 Intel Corporation. All rights reserved. - * Copyright (c) 2004 Topspin Corporation. All rights reserved. - * Copyright (c) 2004 Voltaire Corporation. All rights reserved. + * Copyright (c) 2004, 2005 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2004, 2005 Infinicon Corporation. All rights reserved. + * Copyright (c) 2004, 2005 Intel Corporation. All rights reserved. + * Copyright (c) 2004, 2005 Topspin Corporation. All rights reserved. + * Copyright (c) 2004, 2005 Voltaire Corporation. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -36,7 +37,7 @@ * $Id: smi.c 1389 2004-12-27 22:56:47Z roland $ */ -#include <ib_smi.h> +#include <rdma/ib_smi.h> #include "smi.h" /* diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 90d51b179abe..fae1c2dcee51 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -1,5 +1,7 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -34,7 +36,7 @@ #include "core_priv.h" -#include <ib_mad.h> +#include <rdma/ib_mad.h> struct ib_port { struct kobject kobj; @@ -253,14 +255,14 @@ static ssize_t show_port_gid(struct ib_port *p, struct port_attribute *attr, return ret; return sprintf(buf, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n", - be16_to_cpu(((u16 *) gid.raw)[0]), - be16_to_cpu(((u16 *) gid.raw)[1]), - be16_to_cpu(((u16 *) gid.raw)[2]), - be16_to_cpu(((u16 *) gid.raw)[3]), - be16_to_cpu(((u16 *) gid.raw)[4]), - be16_to_cpu(((u16 *) gid.raw)[5]), - be16_to_cpu(((u16 *) gid.raw)[6]), - be16_to_cpu(((u16 *) gid.raw)[7])); + be16_to_cpu(((__be16 *) gid.raw)[0]), + be16_to_cpu(((__be16 *) gid.raw)[1]), + be16_to_cpu(((__be16 *) gid.raw)[2]), + be16_to_cpu(((__be16 *) gid.raw)[3]), + be16_to_cpu(((__be16 *) gid.raw)[4]), + be16_to_cpu(((__be16 *) gid.raw)[5]), + be16_to_cpu(((__be16 *) gid.raw)[6]), + be16_to_cpu(((__be16 *) gid.raw)[7])); } static ssize_t show_port_pkey(struct ib_port *p, struct port_attribute *attr, @@ -332,11 +334,11 @@ static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr, break; case 16: ret = sprintf(buf, "%u\n", - be16_to_cpup((u16 *)(out_mad->data + 40 + offset / 8))); + be16_to_cpup((__be16 *)(out_mad->data + 40 + offset / 8))); break; case 32: ret = sprintf(buf, "%u\n", - be32_to_cpup((u32 *)(out_mad->data + 40 + offset / 8))); + be32_to_cpup((__be32 *)(out_mad->data + 40 + offset / 8))); break; default: ret = 0; @@ -598,10 +600,10 @@ static ssize_t show_sys_image_guid(struct class_device *cdev, char *buf) return ret; return sprintf(buf, "%04x:%04x:%04x:%04x\n", - be16_to_cpu(((u16 *) &attr.sys_image_guid)[0]), - be16_to_cpu(((u16 *) &attr.sys_image_guid)[1]), - be16_to_cpu(((u16 *) &attr.sys_image_guid)[2]), - be16_to_cpu(((u16 *) &attr.sys_image_guid)[3])); + be16_to_cpu(((__be16 *) &attr.sys_image_guid)[0]), + be16_to_cpu(((__be16 *) &attr.sys_image_guid)[1]), + be16_to_cpu(((__be16 *) &attr.sys_image_guid)[2]), + be16_to_cpu(((__be16 *) &attr.sys_image_guid)[3])); } static ssize_t show_node_guid(struct class_device *cdev, char *buf) @@ -615,10 +617,10 @@ static ssize_t show_node_guid(struct class_device *cdev, char *buf) return ret; return sprintf(buf, "%04x:%04x:%04x:%04x\n", - be16_to_cpu(((u16 *) &attr.node_guid)[0]), - be16_to_cpu(((u16 *) &attr.node_guid)[1]), - be16_to_cpu(((u16 *) &attr.node_guid)[2]), - be16_to_cpu(((u16 *) &attr.node_guid)[3])); + be16_to_cpu(((__be16 *) &attr.node_guid)[0]), + be16_to_cpu(((__be16 *) &attr.node_guid)[1]), + be16_to_cpu(((__be16 *) &attr.node_guid)[2]), + be16_to_cpu(((__be16 *) &attr.node_guid)[3])); } static CLASS_DEVICE_ATTR(node_type, S_IRUGO, show_node_type, NULL); diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c new file mode 100644 index 000000000000..79595826ccc7 --- /dev/null +++ b/drivers/infiniband/core/ucm.c @@ -0,0 +1,1237 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: ucm.c 2594 2005-06-13 19:46:02Z libor $ + */ +#include <linux/init.h> +#include <linux/fs.h> +#include <linux/module.h> +#include <linux/device.h> +#include <linux/err.h> +#include <linux/poll.h> +#include <linux/file.h> +#include <linux/mount.h> +#include <linux/cdev.h> + +#include <asm/uaccess.h> + +#include "ucm.h" + +MODULE_AUTHOR("Libor Michalek"); +MODULE_DESCRIPTION("InfiniBand userspace Connection Manager access"); +MODULE_LICENSE("Dual BSD/GPL"); + +static int ucm_debug_level; + +module_param_named(debug_level, ucm_debug_level, int, 0644); +MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0"); + +enum { + IB_UCM_MAJOR = 231, + IB_UCM_MINOR = 255 +}; + +#define IB_UCM_DEV MKDEV(IB_UCM_MAJOR, IB_UCM_MINOR) + +#define PFX "UCM: " + +#define ucm_dbg(format, arg...) \ + do { \ + if (ucm_debug_level > 0) \ + printk(KERN_DEBUG PFX format, ## arg); \ + } while (0) + +static struct semaphore ctx_id_mutex; +static struct idr ctx_id_table; +static int ctx_id_rover = 0; + +static struct ib_ucm_context *ib_ucm_ctx_get(struct ib_ucm_file *file, int id) +{ + struct ib_ucm_context *ctx; + + down(&ctx_id_mutex); + ctx = idr_find(&ctx_id_table, id); + if (!ctx) + ctx = ERR_PTR(-ENOENT); + else if (ctx->file != file) + ctx = ERR_PTR(-EINVAL); + else + atomic_inc(&ctx->ref); + up(&ctx_id_mutex); + + return ctx; +} + +static void ib_ucm_ctx_put(struct ib_ucm_context *ctx) +{ + if (atomic_dec_and_test(&ctx->ref)) + wake_up(&ctx->wait); +} + +static ssize_t ib_ucm_destroy_ctx(struct ib_ucm_file *file, int id) +{ + struct ib_ucm_context *ctx; + struct ib_ucm_event *uevent; + + down(&ctx_id_mutex); + ctx = idr_find(&ctx_id_table, id); + if (!ctx) + ctx = ERR_PTR(-ENOENT); + else if (ctx->file != file) + ctx = ERR_PTR(-EINVAL); + else + idr_remove(&ctx_id_table, ctx->id); + up(&ctx_id_mutex); + + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + atomic_dec(&ctx->ref); + wait_event(ctx->wait, !atomic_read(&ctx->ref)); + + /* No new events will be generated after destroying the cm_id. */ + if (!IS_ERR(ctx->cm_id)) + ib_destroy_cm_id(ctx->cm_id); + + /* Cleanup events not yet reported to the user. */ + down(&file->mutex); + list_del(&ctx->file_list); + while (!list_empty(&ctx->events)) { + + uevent = list_entry(ctx->events.next, + struct ib_ucm_event, ctx_list); + list_del(&uevent->file_list); + list_del(&uevent->ctx_list); + + /* clear incoming connections. */ + if (uevent->cm_id) + ib_destroy_cm_id(uevent->cm_id); + + kfree(uevent); + } + up(&file->mutex); + + kfree(ctx); + return 0; +} + +static struct ib_ucm_context *ib_ucm_ctx_alloc(struct ib_ucm_file *file) +{ + struct ib_ucm_context *ctx; + int result; + + ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return NULL; + + atomic_set(&ctx->ref, 1); + init_waitqueue_head(&ctx->wait); + ctx->file = file; + + INIT_LIST_HEAD(&ctx->events); + + list_add_tail(&ctx->file_list, &file->ctxs); + + ctx_id_rover = (ctx_id_rover + 1) & INT_MAX; +retry: + result = idr_pre_get(&ctx_id_table, GFP_KERNEL); + if (!result) + goto error; + + down(&ctx_id_mutex); + result = idr_get_new_above(&ctx_id_table, ctx, ctx_id_rover, &ctx->id); + up(&ctx_id_mutex); + + if (result == -EAGAIN) + goto retry; + if (result) + goto error; + + ucm_dbg("Allocated CM ID <%d>\n", ctx->id); + + return ctx; +error: + list_del(&ctx->file_list); + kfree(ctx); + + return NULL; +} +/* + * Event portion of the API, handle CM events + * and allow event polling. + */ +static void ib_ucm_event_path_get(struct ib_ucm_path_rec *upath, + struct ib_sa_path_rec *kpath) +{ + if (!kpath || !upath) + return; + + memcpy(upath->dgid, kpath->dgid.raw, sizeof *upath->dgid); + memcpy(upath->sgid, kpath->sgid.raw, sizeof *upath->sgid); + + upath->dlid = kpath->dlid; + upath->slid = kpath->slid; + upath->raw_traffic = kpath->raw_traffic; + upath->flow_label = kpath->flow_label; + upath->hop_limit = kpath->hop_limit; + upath->traffic_class = kpath->traffic_class; + upath->reversible = kpath->reversible; + upath->numb_path = kpath->numb_path; + upath->pkey = kpath->pkey; + upath->sl = kpath->sl; + upath->mtu_selector = kpath->mtu_selector; + upath->mtu = kpath->mtu; + upath->rate_selector = kpath->rate_selector; + upath->rate = kpath->rate; + upath->packet_life_time = kpath->packet_life_time; + upath->preference = kpath->preference; + + upath->packet_life_time_selector = + kpath->packet_life_time_selector; +} + +static void ib_ucm_event_req_get(struct ib_ucm_context *ctx, + struct ib_ucm_req_event_resp *ureq, + struct ib_cm_req_event_param *kreq) +{ + ureq->listen_id = ctx->id; + + ureq->remote_ca_guid = kreq->remote_ca_guid; + ureq->remote_qkey = kreq->remote_qkey; + ureq->remote_qpn = kreq->remote_qpn; + ureq->qp_type = kreq->qp_type; + ureq->starting_psn = kreq->starting_psn; + ureq->responder_resources = kreq->responder_resources; + ureq->initiator_depth = kreq->initiator_depth; + ureq->local_cm_response_timeout = kreq->local_cm_response_timeout; + ureq->flow_control = kreq->flow_control; + ureq->remote_cm_response_timeout = kreq->remote_cm_response_timeout; + ureq->retry_count = kreq->retry_count; + ureq->rnr_retry_count = kreq->rnr_retry_count; + ureq->srq = kreq->srq; + + ib_ucm_event_path_get(&ureq->primary_path, kreq->primary_path); + ib_ucm_event_path_get(&ureq->alternate_path, kreq->alternate_path); +} + +static void ib_ucm_event_rep_get(struct ib_ucm_rep_event_resp *urep, + struct ib_cm_rep_event_param *krep) +{ + urep->remote_ca_guid = krep->remote_ca_guid; + urep->remote_qkey = krep->remote_qkey; + urep->remote_qpn = krep->remote_qpn; + urep->starting_psn = krep->starting_psn; + urep->responder_resources = krep->responder_resources; + urep->initiator_depth = krep->initiator_depth; + urep->target_ack_delay = krep->target_ack_delay; + urep->failover_accepted = krep->failover_accepted; + urep->flow_control = krep->flow_control; + urep->rnr_retry_count = krep->rnr_retry_count; + urep->srq = krep->srq; +} + +static void ib_ucm_event_sidr_req_get(struct ib_ucm_context *ctx, + struct ib_ucm_sidr_req_event_resp *ureq, + struct ib_cm_sidr_req_event_param *kreq) +{ + ureq->listen_id = ctx->id; + ureq->pkey = kreq->pkey; +} + +static void ib_ucm_event_sidr_rep_get(struct ib_ucm_sidr_rep_event_resp *urep, + struct ib_cm_sidr_rep_event_param *krep) +{ + urep->status = krep->status; + urep->qkey = krep->qkey; + urep->qpn = krep->qpn; +}; + +static int ib_ucm_event_process(struct ib_ucm_context *ctx, + struct ib_cm_event *evt, + struct ib_ucm_event *uvt) +{ + void *info = NULL; + + switch (evt->event) { + case IB_CM_REQ_RECEIVED: + ib_ucm_event_req_get(ctx, &uvt->resp.u.req_resp, + &evt->param.req_rcvd); + uvt->data_len = IB_CM_REQ_PRIVATE_DATA_SIZE; + uvt->resp.present = IB_UCM_PRES_PRIMARY; + uvt->resp.present |= (evt->param.req_rcvd.alternate_path ? + IB_UCM_PRES_ALTERNATE : 0); + break; + case IB_CM_REP_RECEIVED: + ib_ucm_event_rep_get(&uvt->resp.u.rep_resp, + &evt->param.rep_rcvd); + uvt->data_len = IB_CM_REP_PRIVATE_DATA_SIZE; + break; + case IB_CM_RTU_RECEIVED: + uvt->data_len = IB_CM_RTU_PRIVATE_DATA_SIZE; + uvt->resp.u.send_status = evt->param.send_status; + break; + case IB_CM_DREQ_RECEIVED: + uvt->data_len = IB_CM_DREQ_PRIVATE_DATA_SIZE; + uvt->resp.u.send_status = evt->param.send_status; + break; + case IB_CM_DREP_RECEIVED: + uvt->data_len = IB_CM_DREP_PRIVATE_DATA_SIZE; + uvt->resp.u.send_status = evt->param.send_status; + break; + case IB_CM_MRA_RECEIVED: + uvt->resp.u.mra_resp.timeout = + evt->param.mra_rcvd.service_timeout; + uvt->data_len = IB_CM_MRA_PRIVATE_DATA_SIZE; + break; + case IB_CM_REJ_RECEIVED: + uvt->resp.u.rej_resp.reason = evt->param.rej_rcvd.reason; + uvt->data_len = IB_CM_REJ_PRIVATE_DATA_SIZE; + uvt->info_len = evt->param.rej_rcvd.ari_length; + info = evt->param.rej_rcvd.ari; + break; + case IB_CM_LAP_RECEIVED: + ib_ucm_event_path_get(&uvt->resp.u.lap_resp.path, + evt->param.lap_rcvd.alternate_path); + uvt->data_len = IB_CM_LAP_PRIVATE_DATA_SIZE; + uvt->resp.present = IB_UCM_PRES_ALTERNATE; + break; + case IB_CM_APR_RECEIVED: + uvt->resp.u.apr_resp.status = evt->param.apr_rcvd.ap_status; + uvt->data_len = IB_CM_APR_PRIVATE_DATA_SIZE; + uvt->info_len = evt->param.apr_rcvd.info_len; + info = evt->param.apr_rcvd.apr_info; + break; + case IB_CM_SIDR_REQ_RECEIVED: + ib_ucm_event_sidr_req_get(ctx, &uvt->resp.u.sidr_req_resp, + &evt->param.sidr_req_rcvd); + uvt->data_len = IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE; + break; + case IB_CM_SIDR_REP_RECEIVED: + ib_ucm_event_sidr_rep_get(&uvt->resp.u.sidr_rep_resp, + &evt->param.sidr_rep_rcvd); + uvt->data_len = IB_CM_SIDR_REP_PRIVATE_DATA_SIZE; + uvt->info_len = evt->param.sidr_rep_rcvd.info_len; + info = evt->param.sidr_rep_rcvd.info; + break; + default: + uvt->resp.u.send_status = evt->param.send_status; + break; + } + + if (uvt->data_len) { + uvt->data = kmalloc(uvt->data_len, GFP_KERNEL); + if (!uvt->data) + goto err1; + + memcpy(uvt->data, evt->private_data, uvt->data_len); + uvt->resp.present |= IB_UCM_PRES_DATA; + } + + if (uvt->info_len) { + uvt->info = kmalloc(uvt->info_len, GFP_KERNEL); + if (!uvt->info) + goto err2; + + memcpy(uvt->info, info, uvt->info_len); + uvt->resp.present |= IB_UCM_PRES_INFO; + } + return 0; + +err2: + kfree(uvt->data); +err1: + return -ENOMEM; +} + +static int ib_ucm_event_handler(struct ib_cm_id *cm_id, + struct ib_cm_event *event) +{ + struct ib_ucm_event *uevent; + struct ib_ucm_context *ctx; + int result = 0; + int id; + + ctx = cm_id->context; + + if (event->event == IB_CM_REQ_RECEIVED || + event->event == IB_CM_SIDR_REQ_RECEIVED) + id = IB_UCM_CM_ID_INVALID; + else + id = ctx->id; + + uevent = kmalloc(sizeof(*uevent), GFP_KERNEL); + if (!uevent) + goto err1; + + memset(uevent, 0, sizeof(*uevent)); + uevent->resp.id = id; + uevent->resp.event = event->event; + + result = ib_ucm_event_process(ctx, event, uevent); + if (result) + goto err2; + + uevent->ctx = ctx; + uevent->cm_id = (id == IB_UCM_CM_ID_INVALID) ? cm_id : NULL; + + down(&ctx->file->mutex); + list_add_tail(&uevent->file_list, &ctx->file->events); + list_add_tail(&uevent->ctx_list, &ctx->events); + wake_up_interruptible(&ctx->file->poll_wait); + up(&ctx->file->mutex); + return 0; + +err2: + kfree(uevent); +err1: + /* Destroy new cm_id's */ + return (id == IB_UCM_CM_ID_INVALID); +} + +static ssize_t ib_ucm_event(struct ib_ucm_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + struct ib_ucm_context *ctx; + struct ib_ucm_event_get cmd; + struct ib_ucm_event *uevent = NULL; + int result = 0; + DEFINE_WAIT(wait); + + if (out_len < sizeof(struct ib_ucm_event_resp)) + return -ENOSPC; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + /* + * wait + */ + down(&file->mutex); + + while (list_empty(&file->events)) { + + if (file->filp->f_flags & O_NONBLOCK) { + result = -EAGAIN; + break; + } + + if (signal_pending(current)) { + result = -ERESTARTSYS; + break; + } + + prepare_to_wait(&file->poll_wait, &wait, TASK_INTERRUPTIBLE); + + up(&file->mutex); + schedule(); + down(&file->mutex); + + finish_wait(&file->poll_wait, &wait); + } + + if (result) + goto done; + + uevent = list_entry(file->events.next, struct ib_ucm_event, file_list); + + if (!uevent->cm_id) + goto user; + + ctx = ib_ucm_ctx_alloc(file); + if (!ctx) { + result = -ENOMEM; + goto done; + } + + ctx->cm_id = uevent->cm_id; + ctx->cm_id->context = ctx; + + uevent->resp.id = ctx->id; + +user: + if (copy_to_user((void __user *)(unsigned long)cmd.response, + &uevent->resp, sizeof(uevent->resp))) { + result = -EFAULT; + goto done; + } + + if (uevent->data) { + + if (cmd.data_len < uevent->data_len) { + result = -ENOMEM; + goto done; + } + + if (copy_to_user((void __user *)(unsigned long)cmd.data, + uevent->data, uevent->data_len)) { + result = -EFAULT; + goto done; + } + } + + if (uevent->info) { + + if (cmd.info_len < uevent->info_len) { + result = -ENOMEM; + goto done; + } + + if (copy_to_user((void __user *)(unsigned long)cmd.info, + uevent->info, uevent->info_len)) { + result = -EFAULT; + goto done; + } + } + + list_del(&uevent->file_list); + list_del(&uevent->ctx_list); + + kfree(uevent->data); + kfree(uevent->info); + kfree(uevent); +done: + up(&file->mutex); + return result; +} + + +static ssize_t ib_ucm_create_id(struct ib_ucm_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + struct ib_ucm_create_id cmd; + struct ib_ucm_create_id_resp resp; + struct ib_ucm_context *ctx; + int result; + + if (out_len < sizeof(resp)) + return -ENOSPC; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + down(&file->mutex); + ctx = ib_ucm_ctx_alloc(file); + up(&file->mutex); + if (!ctx) + return -ENOMEM; + + ctx->cm_id = ib_create_cm_id(ib_ucm_event_handler, ctx); + if (IS_ERR(ctx->cm_id)) { + result = PTR_ERR(ctx->cm_id); + goto err; + } + + resp.id = ctx->id; + if (copy_to_user((void __user *)(unsigned long)cmd.response, + &resp, sizeof(resp))) { + result = -EFAULT; + goto err; + } + + return 0; + +err: + ib_ucm_destroy_ctx(file, ctx->id); + return result; +} + +static ssize_t ib_ucm_destroy_id(struct ib_ucm_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + struct ib_ucm_destroy_id cmd; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + return ib_ucm_destroy_ctx(file, cmd.id); +} + +static ssize_t ib_ucm_attr_id(struct ib_ucm_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + struct ib_ucm_attr_id_resp resp; + struct ib_ucm_attr_id cmd; + struct ib_ucm_context *ctx; + int result = 0; + + if (out_len < sizeof(resp)) + return -ENOSPC; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + ctx = ib_ucm_ctx_get(file, cmd.id); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + resp.service_id = ctx->cm_id->service_id; + resp.service_mask = ctx->cm_id->service_mask; + resp.local_id = ctx->cm_id->local_id; + resp.remote_id = ctx->cm_id->remote_id; + + if (copy_to_user((void __user *)(unsigned long)cmd.response, + &resp, sizeof(resp))) + result = -EFAULT; + + ib_ucm_ctx_put(ctx); + return result; +} + +static ssize_t ib_ucm_listen(struct ib_ucm_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + struct ib_ucm_listen cmd; + struct ib_ucm_context *ctx; + int result; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + ctx = ib_ucm_ctx_get(file, cmd.id); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + result = ib_cm_listen(ctx->cm_id, cmd.service_id, cmd.service_mask); + ib_ucm_ctx_put(ctx); + return result; +} + +static ssize_t ib_ucm_establish(struct ib_ucm_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + struct ib_ucm_establish cmd; + struct ib_ucm_context *ctx; + int result; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + ctx = ib_ucm_ctx_get(file, cmd.id); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + result = ib_cm_establish(ctx->cm_id); + ib_ucm_ctx_put(ctx); + return result; +} + +static int ib_ucm_alloc_data(const void **dest, u64 src, u32 len) +{ + void *data; + + *dest = NULL; + + if (!len) + return 0; + + data = kmalloc(len, GFP_KERNEL); + if (!data) + return -ENOMEM; + + if (copy_from_user(data, (void __user *)(unsigned long)src, len)) { + kfree(data); + return -EFAULT; + } + + *dest = data; + return 0; +} + +static int ib_ucm_path_get(struct ib_sa_path_rec **path, u64 src) +{ + struct ib_ucm_path_rec ucm_path; + struct ib_sa_path_rec *sa_path; + + *path = NULL; + + if (!src) + return 0; + + sa_path = kmalloc(sizeof(*sa_path), GFP_KERNEL); + if (!sa_path) + return -ENOMEM; + + if (copy_from_user(&ucm_path, (void __user *)(unsigned long)src, + sizeof(ucm_path))) { + + kfree(sa_path); + return -EFAULT; + } + + memcpy(sa_path->dgid.raw, ucm_path.dgid, sizeof sa_path->dgid); + memcpy(sa_path->sgid.raw, ucm_path.sgid, sizeof sa_path->sgid); + + sa_path->dlid = ucm_path.dlid; + sa_path->slid = ucm_path.slid; + sa_path->raw_traffic = ucm_path.raw_traffic; + sa_path->flow_label = ucm_path.flow_label; + sa_path->hop_limit = ucm_path.hop_limit; + sa_path->traffic_class = ucm_path.traffic_class; + sa_path->reversible = ucm_path.reversible; + sa_path->numb_path = ucm_path.numb_path; + sa_path->pkey = ucm_path.pkey; + sa_path->sl = ucm_path.sl; + sa_path->mtu_selector = ucm_path.mtu_selector; + sa_path->mtu = ucm_path.mtu; + sa_path->rate_selector = ucm_path.rate_selector; + sa_path->rate = ucm_path.rate; + sa_path->packet_life_time = ucm_path.packet_life_time; + sa_path->preference = ucm_path.preference; + + sa_path->packet_life_time_selector = + ucm_path.packet_life_time_selector; + + *path = sa_path; + return 0; +} + +static ssize_t ib_ucm_send_req(struct ib_ucm_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + struct ib_cm_req_param param; + struct ib_ucm_context *ctx; + struct ib_ucm_req cmd; + int result; + + param.private_data = NULL; + param.primary_path = NULL; + param.alternate_path = NULL; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + result = ib_ucm_alloc_data(¶m.private_data, cmd.data, cmd.len); + if (result) + goto done; + + result = ib_ucm_path_get(¶m.primary_path, cmd.primary_path); + if (result) + goto done; + + result = ib_ucm_path_get(¶m.alternate_path, cmd.alternate_path); + if (result) + goto done; + + param.private_data_len = cmd.len; + param.service_id = cmd.sid; + param.qp_num = cmd.qpn; + param.qp_type = cmd.qp_type; + param.starting_psn = cmd.psn; + param.peer_to_peer = cmd.peer_to_peer; + param.responder_resources = cmd.responder_resources; + param.initiator_depth = cmd.initiator_depth; + param.remote_cm_response_timeout = cmd.remote_cm_response_timeout; + param.flow_control = cmd.flow_control; + param.local_cm_response_timeout = cmd.local_cm_response_timeout; + param.retry_count = cmd.retry_count; + param.rnr_retry_count = cmd.rnr_retry_count; + param.max_cm_retries = cmd.max_cm_retries; + param.srq = cmd.srq; + + ctx = ib_ucm_ctx_get(file, cmd.id); + if (!IS_ERR(ctx)) { + result = ib_send_cm_req(ctx->cm_id, ¶m); + ib_ucm_ctx_put(ctx); + } else + result = PTR_ERR(ctx); + +done: + kfree(param.private_data); + kfree(param.primary_path); + kfree(param.alternate_path); + return result; +} + +static ssize_t ib_ucm_send_rep(struct ib_ucm_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + struct ib_cm_rep_param param; + struct ib_ucm_context *ctx; + struct ib_ucm_rep cmd; + int result; + + param.private_data = NULL; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + result = ib_ucm_alloc_data(¶m.private_data, cmd.data, cmd.len); + if (result) + return result; + + param.qp_num = cmd.qpn; + param.starting_psn = cmd.psn; + param.private_data_len = cmd.len; + param.responder_resources = cmd.responder_resources; + param.initiator_depth = cmd.initiator_depth; + param.target_ack_delay = cmd.target_ack_delay; + param.failover_accepted = cmd.failover_accepted; + param.flow_control = cmd.flow_control; + param.rnr_retry_count = cmd.rnr_retry_count; + param.srq = cmd.srq; + + ctx = ib_ucm_ctx_get(file, cmd.id); + if (!IS_ERR(ctx)) { + result = ib_send_cm_rep(ctx->cm_id, ¶m); + ib_ucm_ctx_put(ctx); + } else + result = PTR_ERR(ctx); + + kfree(param.private_data); + return result; +} + +static ssize_t ib_ucm_send_private_data(struct ib_ucm_file *file, + const char __user *inbuf, int in_len, + int (*func)(struct ib_cm_id *cm_id, + const void *private_data, + u8 private_data_len)) +{ + struct ib_ucm_private_data cmd; + struct ib_ucm_context *ctx; + const void *private_data = NULL; + int result; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + result = ib_ucm_alloc_data(&private_data, cmd.data, cmd.len); + if (result) + return result; + + ctx = ib_ucm_ctx_get(file, cmd.id); + if (!IS_ERR(ctx)) { + result = func(ctx->cm_id, private_data, cmd.len); + ib_ucm_ctx_put(ctx); + } else + result = PTR_ERR(ctx); + + kfree(private_data); + return result; +} + +static ssize_t ib_ucm_send_rtu(struct ib_ucm_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + return ib_ucm_send_private_data(file, inbuf, in_len, ib_send_cm_rtu); +} + +static ssize_t ib_ucm_send_dreq(struct ib_ucm_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + return ib_ucm_send_private_data(file, inbuf, in_len, ib_send_cm_dreq); +} + +static ssize_t ib_ucm_send_drep(struct ib_ucm_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + return ib_ucm_send_private_data(file, inbuf, in_len, ib_send_cm_drep); +} + +static ssize_t ib_ucm_send_info(struct ib_ucm_file *file, + const char __user *inbuf, int in_len, + int (*func)(struct ib_cm_id *cm_id, + int status, + const void *info, + u8 info_len, + const void *data, + u8 data_len)) +{ + struct ib_ucm_context *ctx; + struct ib_ucm_info cmd; + const void *data = NULL; + const void *info = NULL; + int result; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + result = ib_ucm_alloc_data(&data, cmd.data, cmd.data_len); + if (result) + goto done; + + result = ib_ucm_alloc_data(&info, cmd.info, cmd.info_len); + if (result) + goto done; + + ctx = ib_ucm_ctx_get(file, cmd.id); + if (!IS_ERR(ctx)) { + result = func(ctx->cm_id, cmd.status, info, cmd.info_len, + data, cmd.data_len); + ib_ucm_ctx_put(ctx); + } else + result = PTR_ERR(ctx); + +done: + kfree(data); + kfree(info); + return result; +} + +static ssize_t ib_ucm_send_rej(struct ib_ucm_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + return ib_ucm_send_info(file, inbuf, in_len, (void *)ib_send_cm_rej); +} + +static ssize_t ib_ucm_send_apr(struct ib_ucm_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + return ib_ucm_send_info(file, inbuf, in_len, (void *)ib_send_cm_apr); +} + +static ssize_t ib_ucm_send_mra(struct ib_ucm_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + struct ib_ucm_context *ctx; + struct ib_ucm_mra cmd; + const void *data = NULL; + int result; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + result = ib_ucm_alloc_data(&data, cmd.data, cmd.len); + if (result) + return result; + + ctx = ib_ucm_ctx_get(file, cmd.id); + if (!IS_ERR(ctx)) { + result = ib_send_cm_mra(ctx->cm_id, cmd.timeout, data, cmd.len); + ib_ucm_ctx_put(ctx); + } else + result = PTR_ERR(ctx); + + kfree(data); + return result; +} + +static ssize_t ib_ucm_send_lap(struct ib_ucm_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + struct ib_ucm_context *ctx; + struct ib_sa_path_rec *path = NULL; + struct ib_ucm_lap cmd; + const void *data = NULL; + int result; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + result = ib_ucm_alloc_data(&data, cmd.data, cmd.len); + if (result) + goto done; + + result = ib_ucm_path_get(&path, cmd.path); + if (result) + goto done; + + ctx = ib_ucm_ctx_get(file, cmd.id); + if (!IS_ERR(ctx)) { + result = ib_send_cm_lap(ctx->cm_id, path, data, cmd.len); + ib_ucm_ctx_put(ctx); + } else + result = PTR_ERR(ctx); + +done: + kfree(data); + kfree(path); + return result; +} + +static ssize_t ib_ucm_send_sidr_req(struct ib_ucm_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + struct ib_cm_sidr_req_param param; + struct ib_ucm_context *ctx; + struct ib_ucm_sidr_req cmd; + int result; + + param.private_data = NULL; + param.path = NULL; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + result = ib_ucm_alloc_data(¶m.private_data, cmd.data, cmd.len); + if (result) + goto done; + + result = ib_ucm_path_get(¶m.path, cmd.path); + if (result) + goto done; + + param.private_data_len = cmd.len; + param.service_id = cmd.sid; + param.timeout_ms = cmd.timeout; + param.max_cm_retries = cmd.max_cm_retries; + param.pkey = cmd.pkey; + + ctx = ib_ucm_ctx_get(file, cmd.id); + if (!IS_ERR(ctx)) { + result = ib_send_cm_sidr_req(ctx->cm_id, ¶m); + ib_ucm_ctx_put(ctx); + } else + result = PTR_ERR(ctx); + +done: + kfree(param.private_data); + kfree(param.path); + return result; +} + +static ssize_t ib_ucm_send_sidr_rep(struct ib_ucm_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + struct ib_cm_sidr_rep_param param; + struct ib_ucm_sidr_rep cmd; + struct ib_ucm_context *ctx; + int result; + + param.info = NULL; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + result = ib_ucm_alloc_data(¶m.private_data, + cmd.data, cmd.data_len); + if (result) + goto done; + + result = ib_ucm_alloc_data(¶m.info, cmd.info, cmd.info_len); + if (result) + goto done; + + param.qp_num = cmd.qpn; + param.qkey = cmd.qkey; + param.status = cmd.status; + param.info_length = cmd.info_len; + param.private_data_len = cmd.data_len; + + ctx = ib_ucm_ctx_get(file, cmd.id); + if (!IS_ERR(ctx)) { + result = ib_send_cm_sidr_rep(ctx->cm_id, ¶m); + ib_ucm_ctx_put(ctx); + } else + result = PTR_ERR(ctx); + +done: + kfree(param.private_data); + kfree(param.info); + return result; +} + +static ssize_t (*ucm_cmd_table[])(struct ib_ucm_file *file, + const char __user *inbuf, + int in_len, int out_len) = { + [IB_USER_CM_CMD_CREATE_ID] = ib_ucm_create_id, + [IB_USER_CM_CMD_DESTROY_ID] = ib_ucm_destroy_id, + [IB_USER_CM_CMD_ATTR_ID] = ib_ucm_attr_id, + [IB_USER_CM_CMD_LISTEN] = ib_ucm_listen, + [IB_USER_CM_CMD_ESTABLISH] = ib_ucm_establish, + [IB_USER_CM_CMD_SEND_REQ] = ib_ucm_send_req, + [IB_USER_CM_CMD_SEND_REP] = ib_ucm_send_rep, + [IB_USER_CM_CMD_SEND_RTU] = ib_ucm_send_rtu, + [IB_USER_CM_CMD_SEND_DREQ] = ib_ucm_send_dreq, + [IB_USER_CM_CMD_SEND_DREP] = ib_ucm_send_drep, + [IB_USER_CM_CMD_SEND_REJ] = ib_ucm_send_rej, + [IB_USER_CM_CMD_SEND_MRA] = ib_ucm_send_mra, + [IB_USER_CM_CMD_SEND_LAP] = ib_ucm_send_lap, + [IB_USER_CM_CMD_SEND_APR] = ib_ucm_send_apr, + [IB_USER_CM_CMD_SEND_SIDR_REQ] = ib_ucm_send_sidr_req, + [IB_USER_CM_CMD_SEND_SIDR_REP] = ib_ucm_send_sidr_rep, + [IB_USER_CM_CMD_EVENT] = ib_ucm_event, +}; + +static ssize_t ib_ucm_write(struct file *filp, const char __user *buf, + size_t len, loff_t *pos) +{ + struct ib_ucm_file *file = filp->private_data; + struct ib_ucm_cmd_hdr hdr; + ssize_t result; + + if (len < sizeof(hdr)) + return -EINVAL; + + if (copy_from_user(&hdr, buf, sizeof(hdr))) + return -EFAULT; + + ucm_dbg("Write. cmd <%d> in <%d> out <%d> len <%Zu>\n", + hdr.cmd, hdr.in, hdr.out, len); + + if (hdr.cmd < 0 || hdr.cmd >= ARRAY_SIZE(ucm_cmd_table)) + return -EINVAL; + + if (hdr.in + sizeof(hdr) > len) + return -EINVAL; + + result = ucm_cmd_table[hdr.cmd](file, buf + sizeof(hdr), + hdr.in, hdr.out); + if (!result) + result = len; + + return result; +} + +static unsigned int ib_ucm_poll(struct file *filp, + struct poll_table_struct *wait) +{ + struct ib_ucm_file *file = filp->private_data; + unsigned int mask = 0; + + poll_wait(filp, &file->poll_wait, wait); + + if (!list_empty(&file->events)) + mask = POLLIN | POLLRDNORM; + + return mask; +} + +static int ib_ucm_open(struct inode *inode, struct file *filp) +{ + struct ib_ucm_file *file; + + file = kmalloc(sizeof(*file), GFP_KERNEL); + if (!file) + return -ENOMEM; + + INIT_LIST_HEAD(&file->events); + INIT_LIST_HEAD(&file->ctxs); + init_waitqueue_head(&file->poll_wait); + + init_MUTEX(&file->mutex); + + filp->private_data = file; + file->filp = filp; + + ucm_dbg("Created struct\n"); + + return 0; +} + +static int ib_ucm_close(struct inode *inode, struct file *filp) +{ + struct ib_ucm_file *file = filp->private_data; + struct ib_ucm_context *ctx; + + down(&file->mutex); + while (!list_empty(&file->ctxs)) { + + ctx = list_entry(file->ctxs.next, + struct ib_ucm_context, file_list); + + up(&file->mutex); + ib_ucm_destroy_ctx(file, ctx->id); + down(&file->mutex); + } + up(&file->mutex); + kfree(file); + return 0; +} + +static struct file_operations ib_ucm_fops = { + .owner = THIS_MODULE, + .open = ib_ucm_open, + .release = ib_ucm_close, + .write = ib_ucm_write, + .poll = ib_ucm_poll, +}; + + +static struct class *ib_ucm_class; +static struct cdev ib_ucm_cdev; + +static int __init ib_ucm_init(void) +{ + int result; + + result = register_chrdev_region(IB_UCM_DEV, 1, "infiniband_cm"); + if (result) { + ucm_dbg("Error <%d> registering dev\n", result); + goto err_chr; + } + + cdev_init(&ib_ucm_cdev, &ib_ucm_fops); + + result = cdev_add(&ib_ucm_cdev, IB_UCM_DEV, 1); + if (result) { + ucm_dbg("Error <%d> adding cdev\n", result); + goto err_cdev; + } + + ib_ucm_class = class_create(THIS_MODULE, "infiniband_cm"); + if (IS_ERR(ib_ucm_class)) { + result = PTR_ERR(ib_ucm_class); + ucm_dbg("Error <%d> creating class\n", result); + goto err_class; + } + + class_device_create(ib_ucm_class, IB_UCM_DEV, NULL, "ucm"); + + idr_init(&ctx_id_table); + init_MUTEX(&ctx_id_mutex); + + return 0; +err_class: + cdev_del(&ib_ucm_cdev); +err_cdev: + unregister_chrdev_region(IB_UCM_DEV, 1); +err_chr: + return result; +} + +static void __exit ib_ucm_cleanup(void) +{ + class_device_destroy(ib_ucm_class, IB_UCM_DEV); + class_destroy(ib_ucm_class); + cdev_del(&ib_ucm_cdev); + unregister_chrdev_region(IB_UCM_DEV, 1); +} + +module_init(ib_ucm_init); +module_exit(ib_ucm_cleanup); diff --git a/drivers/infiniband/core/ucm.h b/drivers/infiniband/core/ucm.h new file mode 100644 index 000000000000..c8819b928a1b --- /dev/null +++ b/drivers/infiniband/core/ucm.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: ucm.h 2208 2005-04-22 23:24:31Z libor $ + */ + +#ifndef UCM_H +#define UCM_H + +#include <linux/fs.h> +#include <linux/device.h> +#include <linux/cdev.h> +#include <linux/idr.h> + +#include <rdma/ib_cm.h> +#include <rdma/ib_user_cm.h> + +#define IB_UCM_CM_ID_INVALID 0xffffffff + +struct ib_ucm_file { + struct semaphore mutex; + struct file *filp; + + struct list_head ctxs; /* list of active connections */ + struct list_head events; /* list of pending events */ + wait_queue_head_t poll_wait; +}; + +struct ib_ucm_context { + int id; + wait_queue_head_t wait; + atomic_t ref; + + struct ib_ucm_file *file; + struct ib_cm_id *cm_id; + + struct list_head events; /* list of pending events. */ + struct list_head file_list; /* member in file ctx list */ +}; + +struct ib_ucm_event { + struct ib_ucm_context *ctx; + struct list_head file_list; /* member in file event list */ + struct list_head ctx_list; /* member in ctx event list */ + + struct ib_ucm_event_resp resp; + void *data; + void *info; + int data_len; + int info_len; + /* + * new connection identifiers needs to be saved until + * userspace can get a handle on them. + */ + struct ib_cm_id *cm_id; +}; + +#endif /* UCM_H */ diff --git a/drivers/infiniband/core/ud_header.c b/drivers/infiniband/core/ud_header.c index dc4eb1db5e96..527b23450ab3 100644 --- a/drivers/infiniband/core/ud_header.c +++ b/drivers/infiniband/core/ud_header.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2004 Topspin Corporation. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -34,7 +35,7 @@ #include <linux/errno.h> -#include <ib_pack.h> +#include <rdma/ib_pack.h> #define STRUCT_FIELD(header, field) \ .struct_offset_bytes = offsetof(struct ib_unpacked_ ## header, field), \ @@ -194,6 +195,7 @@ void ib_ud_header_init(int payload_bytes, struct ib_ud_header *header) { int header_len; + u16 packet_length; memset(header, 0, sizeof *header); @@ -208,7 +210,7 @@ void ib_ud_header_init(int payload_bytes, header->lrh.link_version = 0; header->lrh.link_next_header = grh_present ? IB_LNH_IBA_GLOBAL : IB_LNH_IBA_LOCAL; - header->lrh.packet_length = (IB_LRH_BYTES + + packet_length = (IB_LRH_BYTES + IB_BTH_BYTES + IB_DETH_BYTES + payload_bytes + @@ -217,8 +219,7 @@ void ib_ud_header_init(int payload_bytes, header->grh_present = grh_present; if (grh_present) { - header->lrh.packet_length += IB_GRH_BYTES / 4; - + packet_length += IB_GRH_BYTES / 4; header->grh.ip_version = 6; header->grh.payload_length = cpu_to_be16((IB_BTH_BYTES + @@ -229,7 +230,7 @@ void ib_ud_header_init(int payload_bytes, header->grh.next_header = 0x1b; } - cpu_to_be16s(&header->lrh.packet_length); + header->lrh.packet_length = cpu_to_be16(packet_length); if (header->immediate_present) header->bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE; diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index 9d912d6877ff..7c2f03057ddb 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -1,5 +1,7 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -29,7 +31,7 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * - * $Id: user_mad.c 1389 2004-12-27 22:56:47Z roland $ + * $Id: user_mad.c 2814 2005-07-06 19:14:09Z halr $ */ #include <linux/module.h> @@ -47,8 +49,8 @@ #include <asm/uaccess.h> #include <asm/semaphore.h> -#include <ib_mad.h> -#include <ib_user_mad.h> +#include <rdma/ib_mad.h> +#include <rdma/ib_user_mad.h> MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("InfiniBand userspace MAD packet access"); @@ -94,10 +96,12 @@ struct ib_umad_file { }; struct ib_umad_packet { - struct ib_user_mad mad; struct ib_ah *ah; + struct ib_mad_send_buf *msg; struct list_head list; + int length; DECLARE_PCI_UNMAP_ADDR(mapping) + struct ib_user_mad mad; }; static const dev_t base_dev = MKDEV(IB_UMAD_MAJOR, IB_UMAD_MINOR_BASE); @@ -114,10 +118,10 @@ static int queue_packet(struct ib_umad_file *file, int ret = 1; down_read(&file->agent_mutex); - for (packet->mad.id = 0; - packet->mad.id < IB_UMAD_MAX_AGENTS; - packet->mad.id++) - if (agent == file->agent[packet->mad.id]) { + for (packet->mad.hdr.id = 0; + packet->mad.hdr.id < IB_UMAD_MAX_AGENTS; + packet->mad.hdr.id++) + if (agent == file->agent[packet->mad.hdr.id]) { spin_lock_irq(&file->recv_lock); list_add_tail(&packet->list, &file->recv_list); spin_unlock_irq(&file->recv_lock); @@ -135,22 +139,30 @@ static void send_handler(struct ib_mad_agent *agent, struct ib_mad_send_wc *send_wc) { struct ib_umad_file *file = agent->context; - struct ib_umad_packet *packet = + struct ib_umad_packet *timeout, *packet = (void *) (unsigned long) send_wc->wr_id; - dma_unmap_single(agent->device->dma_device, - pci_unmap_addr(packet, mapping), - sizeof packet->mad.data, - DMA_TO_DEVICE); - ib_destroy_ah(packet->ah); + ib_destroy_ah(packet->msg->send_wr.wr.ud.ah); + ib_free_send_mad(packet->msg); if (send_wc->status == IB_WC_RESP_TIMEOUT_ERR) { - packet->mad.status = ETIMEDOUT; + timeout = kmalloc(sizeof *timeout + sizeof (struct ib_mad_hdr), + GFP_KERNEL); + if (!timeout) + goto out; - if (!queue_packet(file, agent, packet)) - return; - } + memset(timeout, 0, sizeof *timeout + sizeof (struct ib_mad_hdr)); + timeout->length = sizeof (struct ib_mad_hdr); + timeout->mad.hdr.id = packet->mad.hdr.id; + timeout->mad.hdr.status = ETIMEDOUT; + memcpy(timeout->mad.data, packet->mad.data, + sizeof (struct ib_mad_hdr)); + + if (!queue_packet(file, agent, timeout)) + return; + } +out: kfree(packet); } @@ -159,30 +171,35 @@ static void recv_handler(struct ib_mad_agent *agent, { struct ib_umad_file *file = agent->context; struct ib_umad_packet *packet; + int length; if (mad_recv_wc->wc->status != IB_WC_SUCCESS) goto out; - packet = kmalloc(sizeof *packet, GFP_KERNEL); + length = mad_recv_wc->mad_len; + packet = kmalloc(sizeof *packet + length, GFP_KERNEL); if (!packet) goto out; - memset(packet, 0, sizeof *packet); + memset(packet, 0, sizeof *packet + length); + packet->length = length; + + ib_coalesce_recv_mad(mad_recv_wc, packet->mad.data); - memcpy(packet->mad.data, mad_recv_wc->recv_buf.mad, sizeof packet->mad.data); - packet->mad.status = 0; - packet->mad.qpn = cpu_to_be32(mad_recv_wc->wc->src_qp); - packet->mad.lid = cpu_to_be16(mad_recv_wc->wc->slid); - packet->mad.sl = mad_recv_wc->wc->sl; - packet->mad.path_bits = mad_recv_wc->wc->dlid_path_bits; - packet->mad.grh_present = !!(mad_recv_wc->wc->wc_flags & IB_WC_GRH); - if (packet->mad.grh_present) { + packet->mad.hdr.status = 0; + packet->mad.hdr.length = length + sizeof (struct ib_user_mad); + packet->mad.hdr.qpn = cpu_to_be32(mad_recv_wc->wc->src_qp); + packet->mad.hdr.lid = cpu_to_be16(mad_recv_wc->wc->slid); + packet->mad.hdr.sl = mad_recv_wc->wc->sl; + packet->mad.hdr.path_bits = mad_recv_wc->wc->dlid_path_bits; + packet->mad.hdr.grh_present = !!(mad_recv_wc->wc->wc_flags & IB_WC_GRH); + if (packet->mad.hdr.grh_present) { /* XXX parse GRH */ - packet->mad.gid_index = 0; - packet->mad.hop_limit = 0; - packet->mad.traffic_class = 0; - memset(packet->mad.gid, 0, 16); - packet->mad.flow_label = 0; + packet->mad.hdr.gid_index = 0; + packet->mad.hdr.hop_limit = 0; + packet->mad.hdr.traffic_class = 0; + memset(packet->mad.hdr.gid, 0, 16); + packet->mad.hdr.flow_label = 0; } if (queue_packet(file, agent, packet)) @@ -199,7 +216,7 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf, struct ib_umad_packet *packet; ssize_t ret; - if (count < sizeof (struct ib_user_mad)) + if (count < sizeof (struct ib_user_mad) + sizeof (struct ib_mad)) return -EINVAL; spin_lock_irq(&file->recv_lock); @@ -222,12 +239,25 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf, spin_unlock_irq(&file->recv_lock); - if (copy_to_user(buf, &packet->mad, sizeof packet->mad)) + if (count < packet->length + sizeof (struct ib_user_mad)) { + /* Return length needed (and first RMPP segment) if too small */ + if (copy_to_user(buf, &packet->mad, + sizeof (struct ib_user_mad) + sizeof (struct ib_mad))) + ret = -EFAULT; + else + ret = -ENOSPC; + } else if (copy_to_user(buf, &packet->mad, + packet->length + sizeof (struct ib_user_mad))) ret = -EFAULT; else - ret = sizeof packet->mad; - - kfree(packet); + ret = packet->length + sizeof (struct ib_user_mad); + if (ret < 0) { + /* Requeue packet */ + spin_lock_irq(&file->recv_lock); + list_add(&packet->list, &file->recv_list); + spin_unlock_irq(&file->recv_lock); + } else + kfree(packet); return ret; } @@ -238,69 +268,57 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, struct ib_umad_packet *packet; struct ib_mad_agent *agent; struct ib_ah_attr ah_attr; - struct ib_sge gather_list; - struct ib_send_wr *bad_wr, wr = { - .opcode = IB_WR_SEND, - .sg_list = &gather_list, - .num_sge = 1, - .send_flags = IB_SEND_SIGNALED, - }; + struct ib_send_wr *bad_wr; + struct ib_rmpp_mad *rmpp_mad; u8 method; - u64 *tid; - int ret; + __be64 *tid; + int ret, length, hdr_len, data_len, rmpp_hdr_size; + int rmpp_active = 0; if (count < sizeof (struct ib_user_mad)) return -EINVAL; - packet = kmalloc(sizeof *packet, GFP_KERNEL); + length = count - sizeof (struct ib_user_mad); + packet = kmalloc(sizeof *packet + sizeof(struct ib_mad_hdr) + + sizeof(struct ib_rmpp_hdr), GFP_KERNEL); if (!packet) return -ENOMEM; - if (copy_from_user(&packet->mad, buf, sizeof packet->mad)) { - kfree(packet); - return -EFAULT; + if (copy_from_user(&packet->mad, buf, + sizeof (struct ib_user_mad) + + sizeof(struct ib_mad_hdr) + + sizeof(struct ib_rmpp_hdr))) { + ret = -EFAULT; + goto err; } - if (packet->mad.id < 0 || packet->mad.id >= IB_UMAD_MAX_AGENTS) { + if (packet->mad.hdr.id < 0 || + packet->mad.hdr.id >= IB_UMAD_MAX_AGENTS) { ret = -EINVAL; goto err; } + packet->length = length; + down_read(&file->agent_mutex); - agent = file->agent[packet->mad.id]; + agent = file->agent[packet->mad.hdr.id]; if (!agent) { ret = -EINVAL; goto err_up; } - /* - * If userspace is generating a request that will generate a - * response, we need to make sure the high-order part of the - * transaction ID matches the agent being used to send the - * MAD. - */ - method = ((struct ib_mad_hdr *) packet->mad.data)->method; - - if (!(method & IB_MGMT_METHOD_RESP) && - method != IB_MGMT_METHOD_TRAP_REPRESS && - method != IB_MGMT_METHOD_SEND) { - tid = &((struct ib_mad_hdr *) packet->mad.data)->tid; - *tid = cpu_to_be64(((u64) agent->hi_tid) << 32 | - (be64_to_cpup(tid) & 0xffffffff)); - } - memset(&ah_attr, 0, sizeof ah_attr); - ah_attr.dlid = be16_to_cpu(packet->mad.lid); - ah_attr.sl = packet->mad.sl; - ah_attr.src_path_bits = packet->mad.path_bits; + ah_attr.dlid = be16_to_cpu(packet->mad.hdr.lid); + ah_attr.sl = packet->mad.hdr.sl; + ah_attr.src_path_bits = packet->mad.hdr.path_bits; ah_attr.port_num = file->port->port_num; - if (packet->mad.grh_present) { + if (packet->mad.hdr.grh_present) { ah_attr.ah_flags = IB_AH_GRH; - memcpy(ah_attr.grh.dgid.raw, packet->mad.gid, 16); - ah_attr.grh.flow_label = packet->mad.flow_label; - ah_attr.grh.hop_limit = packet->mad.hop_limit; - ah_attr.grh.traffic_class = packet->mad.traffic_class; + memcpy(ah_attr.grh.dgid.raw, packet->mad.hdr.gid, 16); + ah_attr.grh.flow_label = be32_to_cpu(packet->mad.hdr.flow_label); + ah_attr.grh.hop_limit = packet->mad.hdr.hop_limit; + ah_attr.grh.traffic_class = packet->mad.hdr.traffic_class; } packet->ah = ib_create_ah(agent->qp->pd, &ah_attr); @@ -309,34 +327,104 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, goto err_up; } - gather_list.addr = dma_map_single(agent->device->dma_device, - packet->mad.data, - sizeof packet->mad.data, - DMA_TO_DEVICE); - gather_list.length = sizeof packet->mad.data; - gather_list.lkey = file->mr[packet->mad.id]->lkey; - pci_unmap_addr_set(packet, mapping, gather_list.addr); + rmpp_mad = (struct ib_rmpp_mad *) packet->mad.data; + if (ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE) { + /* RMPP active */ + if (!agent->rmpp_version) { + ret = -EINVAL; + goto err_ah; + } + /* Validate that management class can support RMPP */ + if (rmpp_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_ADM) { + hdr_len = offsetof(struct ib_sa_mad, data); + data_len = length; + } else if ((rmpp_mad->mad_hdr.mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) && + (rmpp_mad->mad_hdr.mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END)) { + hdr_len = offsetof(struct ib_vendor_mad, data); + data_len = length - hdr_len; + } else { + ret = -EINVAL; + goto err_ah; + } + rmpp_active = 1; + } else { + if (length > sizeof(struct ib_mad)) { + ret = -EINVAL; + goto err_ah; + } + hdr_len = offsetof(struct ib_mad, data); + data_len = length - hdr_len; + } + + packet->msg = ib_create_send_mad(agent, + be32_to_cpu(packet->mad.hdr.qpn), + 0, packet->ah, rmpp_active, + hdr_len, data_len, + GFP_KERNEL); + if (IS_ERR(packet->msg)) { + ret = PTR_ERR(packet->msg); + goto err_ah; + } - wr.wr.ud.mad_hdr = (struct ib_mad_hdr *) packet->mad.data; - wr.wr.ud.ah = packet->ah; - wr.wr.ud.remote_qpn = be32_to_cpu(packet->mad.qpn); - wr.wr.ud.remote_qkey = be32_to_cpu(packet->mad.qkey); - wr.wr.ud.timeout_ms = packet->mad.timeout_ms; + packet->msg->send_wr.wr.ud.timeout_ms = packet->mad.hdr.timeout_ms; + packet->msg->send_wr.wr.ud.retries = packet->mad.hdr.retries; - wr.wr_id = (unsigned long) packet; + /* Override send WR WRID initialized in ib_create_send_mad */ + packet->msg->send_wr.wr_id = (unsigned long) packet; - ret = ib_post_send_mad(agent, &wr, &bad_wr); - if (ret) { - dma_unmap_single(agent->device->dma_device, - pci_unmap_addr(packet, mapping), - sizeof packet->mad.data, - DMA_TO_DEVICE); - goto err_up; + if (!rmpp_active) { + /* Copy message from user into send buffer */ + if (copy_from_user(packet->msg->mad, + buf + sizeof(struct ib_user_mad), length)) { + ret = -EFAULT; + goto err_msg; + } + } else { + rmpp_hdr_size = sizeof(struct ib_mad_hdr) + + sizeof(struct ib_rmpp_hdr); + + /* Only copy MAD headers (RMPP header in place) */ + memcpy(packet->msg->mad, packet->mad.data, + sizeof(struct ib_mad_hdr)); + + /* Now, copy rest of message from user into send buffer */ + if (copy_from_user(((struct ib_rmpp_mad *) packet->msg->mad)->data, + buf + sizeof (struct ib_user_mad) + rmpp_hdr_size, + length - rmpp_hdr_size)) { + ret = -EFAULT; + goto err_msg; + } + } + + /* + * If userspace is generating a request that will generate a + * response, we need to make sure the high-order part of the + * transaction ID matches the agent being used to send the + * MAD. + */ + method = packet->msg->mad->mad_hdr.method; + + if (!(method & IB_MGMT_METHOD_RESP) && + method != IB_MGMT_METHOD_TRAP_REPRESS && + method != IB_MGMT_METHOD_SEND) { + tid = &packet->msg->mad->mad_hdr.tid; + *tid = cpu_to_be64(((u64) agent->hi_tid) << 32 | + (be64_to_cpup(tid) & 0xffffffff)); } + ret = ib_post_send_mad(agent, &packet->msg->send_wr, &bad_wr); + if (ret) + goto err_msg; + up_read(&file->agent_mutex); - return sizeof packet->mad; + return sizeof (struct ib_user_mad_hdr) + packet->length; + +err_msg: + ib_free_send_mad(packet->msg); + +err_ah: + ib_destroy_ah(packet->ah); err_up: up_read(&file->agent_mutex); @@ -399,7 +487,8 @@ found: agent = ib_register_mad_agent(file->port->ib_dev, file->port->port_num, ureq.qpn ? IB_QPT_GSI : IB_QPT_SMI, ureq.mgmt_class ? &req : NULL, - 0, send_handler, recv_handler, file); + ureq.rmpp_version, + send_handler, recv_handler, file); if (IS_ERR(agent)) { ret = PTR_ERR(agent); goto out; @@ -460,8 +549,8 @@ out: return ret; } -static long ib_umad_ioctl(struct file *filp, - unsigned int cmd, unsigned long arg) +static long ib_umad_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg) { switch (cmd) { case IB_USER_MAD_REGISTER_AGENT: @@ -517,14 +606,14 @@ static int ib_umad_close(struct inode *inode, struct file *filp) } static struct file_operations umad_fops = { - .owner = THIS_MODULE, - .read = ib_umad_read, - .write = ib_umad_write, - .poll = ib_umad_poll, + .owner = THIS_MODULE, + .read = ib_umad_read, + .write = ib_umad_write, + .poll = ib_umad_poll, .unlocked_ioctl = ib_umad_ioctl, - .compat_ioctl = ib_umad_ioctl, - .open = ib_umad_open, - .release = ib_umad_close + .compat_ioctl = ib_umad_ioctl, + .open = ib_umad_open, + .release = ib_umad_close }; static int ib_umad_sm_open(struct inode *inode, struct file *filp) diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h new file mode 100644 index 000000000000..180b3d4765e4 --- /dev/null +++ b/drivers/infiniband/core/uverbs.h @@ -0,0 +1,140 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2005 Voltaire, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: uverbs.h 2559 2005-06-06 19:43:16Z roland $ + */ + +#ifndef UVERBS_H +#define UVERBS_H + +/* Include device.h and fs.h until cdev.h is self-sufficient */ +#include <linux/fs.h> +#include <linux/device.h> +#include <linux/cdev.h> +#include <linux/kref.h> +#include <linux/idr.h> + +#include <rdma/ib_verbs.h> +#include <rdma/ib_user_verbs.h> + +struct ib_uverbs_device { + int devnum; + struct cdev dev; + struct class_device class_dev; + struct ib_device *ib_dev; + int num_comp; +}; + +struct ib_uverbs_event_file { + struct kref ref; + struct ib_uverbs_file *uverbs_file; + spinlock_t lock; + int fd; + int is_async; + wait_queue_head_t poll_wait; + struct fasync_struct *async_queue; + struct list_head event_list; +}; + +struct ib_uverbs_file { + struct kref ref; + struct ib_uverbs_device *device; + struct ib_ucontext *ucontext; + struct ib_event_handler event_handler; + struct ib_uverbs_event_file async_file; + struct ib_uverbs_event_file comp_file[1]; +}; + +struct ib_uverbs_async_event { + struct ib_uverbs_async_event_desc desc; + struct list_head list; +}; + +struct ib_uverbs_comp_event { + struct ib_uverbs_comp_event_desc desc; + struct list_head list; +}; + +struct ib_uobject_mr { + struct ib_uobject uobj; + struct page *page_list; + struct scatterlist *sg_list; +}; + +extern struct semaphore ib_uverbs_idr_mutex; +extern struct idr ib_uverbs_pd_idr; +extern struct idr ib_uverbs_mr_idr; +extern struct idr ib_uverbs_mw_idr; +extern struct idr ib_uverbs_ah_idr; +extern struct idr ib_uverbs_cq_idr; +extern struct idr ib_uverbs_qp_idr; +extern struct idr ib_uverbs_srq_idr; + +void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context); +void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr); +void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr); +void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr); + +int ib_umem_get(struct ib_device *dev, struct ib_umem *mem, + void *addr, size_t size, int write); +void ib_umem_release(struct ib_device *dev, struct ib_umem *umem); +void ib_umem_release_on_close(struct ib_device *dev, struct ib_umem *umem); + +#define IB_UVERBS_DECLARE_CMD(name) \ + ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \ + const char __user *buf, int in_len, \ + int out_len) + +IB_UVERBS_DECLARE_CMD(query_params); +IB_UVERBS_DECLARE_CMD(get_context); +IB_UVERBS_DECLARE_CMD(query_device); +IB_UVERBS_DECLARE_CMD(query_port); +IB_UVERBS_DECLARE_CMD(query_gid); +IB_UVERBS_DECLARE_CMD(query_pkey); +IB_UVERBS_DECLARE_CMD(alloc_pd); +IB_UVERBS_DECLARE_CMD(dealloc_pd); +IB_UVERBS_DECLARE_CMD(reg_mr); +IB_UVERBS_DECLARE_CMD(dereg_mr); +IB_UVERBS_DECLARE_CMD(create_cq); +IB_UVERBS_DECLARE_CMD(destroy_cq); +IB_UVERBS_DECLARE_CMD(create_qp); +IB_UVERBS_DECLARE_CMD(modify_qp); +IB_UVERBS_DECLARE_CMD(destroy_qp); +IB_UVERBS_DECLARE_CMD(attach_mcast); +IB_UVERBS_DECLARE_CMD(detach_mcast); +IB_UVERBS_DECLARE_CMD(create_srq); +IB_UVERBS_DECLARE_CMD(modify_srq); +IB_UVERBS_DECLARE_CMD(destroy_srq); + +#endif /* UVERBS_H */ diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c new file mode 100644 index 000000000000..ebccf9f38af9 --- /dev/null +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -0,0 +1,1184 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: uverbs_cmd.c 2708 2005-06-24 17:27:21Z roland $ + */ + +#include <asm/uaccess.h> + +#include "uverbs.h" + +#define INIT_UDATA(udata, ibuf, obuf, ilen, olen) \ + do { \ + (udata)->inbuf = (void __user *) (ibuf); \ + (udata)->outbuf = (void __user *) (obuf); \ + (udata)->inlen = (ilen); \ + (udata)->outlen = (olen); \ + } while (0) + +ssize_t ib_uverbs_query_params(struct ib_uverbs_file *file, + const char __user *buf, + int in_len, int out_len) +{ + struct ib_uverbs_query_params cmd; + struct ib_uverbs_query_params_resp resp; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + memset(&resp, 0, sizeof resp); + + resp.num_cq_events = file->device->num_comp; + + if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) + return -EFAULT; + + return in_len; +} + +ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, + const char __user *buf, + int in_len, int out_len) +{ + struct ib_uverbs_get_context cmd; + struct ib_uverbs_get_context_resp resp; + struct ib_udata udata; + struct ib_device *ibdev = file->device->ib_dev; + int i; + int ret = in_len; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + INIT_UDATA(&udata, buf + sizeof cmd, + (unsigned long) cmd.response + sizeof resp, + in_len - sizeof cmd, out_len - sizeof resp); + + file->ucontext = ibdev->alloc_ucontext(ibdev, &udata); + if (IS_ERR(file->ucontext)) { + ret = PTR_ERR(file->ucontext); + file->ucontext = NULL; + return ret; + } + + file->ucontext->device = ibdev; + INIT_LIST_HEAD(&file->ucontext->pd_list); + INIT_LIST_HEAD(&file->ucontext->mr_list); + INIT_LIST_HEAD(&file->ucontext->mw_list); + INIT_LIST_HEAD(&file->ucontext->cq_list); + INIT_LIST_HEAD(&file->ucontext->qp_list); + INIT_LIST_HEAD(&file->ucontext->srq_list); + INIT_LIST_HEAD(&file->ucontext->ah_list); + spin_lock_init(&file->ucontext->lock); + + resp.async_fd = file->async_file.fd; + for (i = 0; i < file->device->num_comp; ++i) + if (copy_to_user((void __user *) (unsigned long) cmd.cq_fd_tab + + i * sizeof (__u32), + &file->comp_file[i].fd, sizeof (__u32))) + goto err; + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) + goto err; + + return in_len; + +err: + ibdev->dealloc_ucontext(file->ucontext); + file->ucontext = NULL; + + return -EFAULT; +} + +ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file, + const char __user *buf, + int in_len, int out_len) +{ + struct ib_uverbs_query_device cmd; + struct ib_uverbs_query_device_resp resp; + struct ib_device_attr attr; + int ret; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + ret = ib_query_device(file->device->ib_dev, &attr); + if (ret) + return ret; + + memset(&resp, 0, sizeof resp); + + resp.fw_ver = attr.fw_ver; + resp.node_guid = attr.node_guid; + resp.sys_image_guid = attr.sys_image_guid; + resp.max_mr_size = attr.max_mr_size; + resp.page_size_cap = attr.page_size_cap; + resp.vendor_id = attr.vendor_id; + resp.vendor_part_id = attr.vendor_part_id; + resp.hw_ver = attr.hw_ver; + resp.max_qp = attr.max_qp; + resp.max_qp_wr = attr.max_qp_wr; + resp.device_cap_flags = attr.device_cap_flags; + resp.max_sge = attr.max_sge; + resp.max_sge_rd = attr.max_sge_rd; + resp.max_cq = attr.max_cq; + resp.max_cqe = attr.max_cqe; + resp.max_mr = attr.max_mr; + resp.max_pd = attr.max_pd; + resp.max_qp_rd_atom = attr.max_qp_rd_atom; + resp.max_ee_rd_atom = attr.max_ee_rd_atom; + resp.max_res_rd_atom = attr.max_res_rd_atom; + resp.max_qp_init_rd_atom = attr.max_qp_init_rd_atom; + resp.max_ee_init_rd_atom = attr.max_ee_init_rd_atom; + resp.atomic_cap = attr.atomic_cap; + resp.max_ee = attr.max_ee; + resp.max_rdd = attr.max_rdd; + resp.max_mw = attr.max_mw; + resp.max_raw_ipv6_qp = attr.max_raw_ipv6_qp; + resp.max_raw_ethy_qp = attr.max_raw_ethy_qp; + resp.max_mcast_grp = attr.max_mcast_grp; + resp.max_mcast_qp_attach = attr.max_mcast_qp_attach; + resp.max_total_mcast_qp_attach = attr.max_total_mcast_qp_attach; + resp.max_ah = attr.max_ah; + resp.max_fmr = attr.max_fmr; + resp.max_map_per_fmr = attr.max_map_per_fmr; + resp.max_srq = attr.max_srq; + resp.max_srq_wr = attr.max_srq_wr; + resp.max_srq_sge = attr.max_srq_sge; + resp.max_pkeys = attr.max_pkeys; + resp.local_ca_ack_delay = attr.local_ca_ack_delay; + resp.phys_port_cnt = file->device->ib_dev->phys_port_cnt; + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) + return -EFAULT; + + return in_len; +} + +ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file, + const char __user *buf, + int in_len, int out_len) +{ + struct ib_uverbs_query_port cmd; + struct ib_uverbs_query_port_resp resp; + struct ib_port_attr attr; + int ret; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + ret = ib_query_port(file->device->ib_dev, cmd.port_num, &attr); + if (ret) + return ret; + + memset(&resp, 0, sizeof resp); + + resp.state = attr.state; + resp.max_mtu = attr.max_mtu; + resp.active_mtu = attr.active_mtu; + resp.gid_tbl_len = attr.gid_tbl_len; + resp.port_cap_flags = attr.port_cap_flags; + resp.max_msg_sz = attr.max_msg_sz; + resp.bad_pkey_cntr = attr.bad_pkey_cntr; + resp.qkey_viol_cntr = attr.qkey_viol_cntr; + resp.pkey_tbl_len = attr.pkey_tbl_len; + resp.lid = attr.lid; + resp.sm_lid = attr.sm_lid; + resp.lmc = attr.lmc; + resp.max_vl_num = attr.max_vl_num; + resp.sm_sl = attr.sm_sl; + resp.subnet_timeout = attr.subnet_timeout; + resp.init_type_reply = attr.init_type_reply; + resp.active_width = attr.active_width; + resp.active_speed = attr.active_speed; + resp.phys_state = attr.phys_state; + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) + return -EFAULT; + + return in_len; +} + +ssize_t ib_uverbs_query_gid(struct ib_uverbs_file *file, + const char __user *buf, + int in_len, int out_len) +{ + struct ib_uverbs_query_gid cmd; + struct ib_uverbs_query_gid_resp resp; + int ret; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + memset(&resp, 0, sizeof resp); + + ret = ib_query_gid(file->device->ib_dev, cmd.port_num, cmd.index, + (union ib_gid *) resp.gid); + if (ret) + return ret; + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) + return -EFAULT; + + return in_len; +} + +ssize_t ib_uverbs_query_pkey(struct ib_uverbs_file *file, + const char __user *buf, + int in_len, int out_len) +{ + struct ib_uverbs_query_pkey cmd; + struct ib_uverbs_query_pkey_resp resp; + int ret; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + memset(&resp, 0, sizeof resp); + + ret = ib_query_pkey(file->device->ib_dev, cmd.port_num, cmd.index, + &resp.pkey); + if (ret) + return ret; + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) + return -EFAULT; + + return in_len; +} + +ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, + const char __user *buf, + int in_len, int out_len) +{ + struct ib_uverbs_alloc_pd cmd; + struct ib_uverbs_alloc_pd_resp resp; + struct ib_udata udata; + struct ib_uobject *uobj; + struct ib_pd *pd; + int ret; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + INIT_UDATA(&udata, buf + sizeof cmd, + (unsigned long) cmd.response + sizeof resp, + in_len - sizeof cmd, out_len - sizeof resp); + + uobj = kmalloc(sizeof *uobj, GFP_KERNEL); + if (!uobj) + return -ENOMEM; + + uobj->context = file->ucontext; + + pd = file->device->ib_dev->alloc_pd(file->device->ib_dev, + file->ucontext, &udata); + if (IS_ERR(pd)) { + ret = PTR_ERR(pd); + goto err; + } + + pd->device = file->device->ib_dev; + pd->uobject = uobj; + atomic_set(&pd->usecnt, 0); + +retry: + if (!idr_pre_get(&ib_uverbs_pd_idr, GFP_KERNEL)) { + ret = -ENOMEM; + goto err_pd; + } + + down(&ib_uverbs_idr_mutex); + ret = idr_get_new(&ib_uverbs_pd_idr, pd, &uobj->id); + up(&ib_uverbs_idr_mutex); + + if (ret == -EAGAIN) + goto retry; + if (ret) + goto err_pd; + + spin_lock_irq(&file->ucontext->lock); + list_add_tail(&uobj->list, &file->ucontext->pd_list); + spin_unlock_irq(&file->ucontext->lock); + + memset(&resp, 0, sizeof resp); + resp.pd_handle = uobj->id; + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) { + ret = -EFAULT; + goto err_list; + } + + return in_len; + +err_list: + spin_lock_irq(&file->ucontext->lock); + list_del(&uobj->list); + spin_unlock_irq(&file->ucontext->lock); + + down(&ib_uverbs_idr_mutex); + idr_remove(&ib_uverbs_pd_idr, uobj->id); + up(&ib_uverbs_idr_mutex); + +err_pd: + ib_dealloc_pd(pd); + +err: + kfree(uobj); + return ret; +} + +ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file, + const char __user *buf, + int in_len, int out_len) +{ + struct ib_uverbs_dealloc_pd cmd; + struct ib_pd *pd; + struct ib_uobject *uobj; + int ret = -EINVAL; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + down(&ib_uverbs_idr_mutex); + + pd = idr_find(&ib_uverbs_pd_idr, cmd.pd_handle); + if (!pd || pd->uobject->context != file->ucontext) + goto out; + + uobj = pd->uobject; + + ret = ib_dealloc_pd(pd); + if (ret) + goto out; + + idr_remove(&ib_uverbs_pd_idr, cmd.pd_handle); + + spin_lock_irq(&file->ucontext->lock); + list_del(&uobj->list); + spin_unlock_irq(&file->ucontext->lock); + + kfree(uobj); + +out: + up(&ib_uverbs_idr_mutex); + + return ret ? ret : in_len; +} + +ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_reg_mr cmd; + struct ib_uverbs_reg_mr_resp resp; + struct ib_udata udata; + struct ib_umem_object *obj; + struct ib_pd *pd; + struct ib_mr *mr; + int ret; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + INIT_UDATA(&udata, buf + sizeof cmd, + (unsigned long) cmd.response + sizeof resp, + in_len - sizeof cmd, out_len - sizeof resp); + + if ((cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK)) + return -EINVAL; + + obj = kmalloc(sizeof *obj, GFP_KERNEL); + if (!obj) + return -ENOMEM; + + obj->uobject.context = file->ucontext; + + /* + * We ask for writable memory if any access flags other than + * "remote read" are set. "Local write" and "remote write" + * obviously require write access. "Remote atomic" can do + * things like fetch and add, which will modify memory, and + * "MW bind" can change permissions by binding a window. + */ + ret = ib_umem_get(file->device->ib_dev, &obj->umem, + (void *) (unsigned long) cmd.start, cmd.length, + !!(cmd.access_flags & ~IB_ACCESS_REMOTE_READ)); + if (ret) + goto err_free; + + obj->umem.virt_base = cmd.hca_va; + + down(&ib_uverbs_idr_mutex); + + pd = idr_find(&ib_uverbs_pd_idr, cmd.pd_handle); + if (!pd || pd->uobject->context != file->ucontext) { + ret = -EINVAL; + goto err_up; + } + + if (!pd->device->reg_user_mr) { + ret = -ENOSYS; + goto err_up; + } + + mr = pd->device->reg_user_mr(pd, &obj->umem, cmd.access_flags, &udata); + if (IS_ERR(mr)) { + ret = PTR_ERR(mr); + goto err_up; + } + + mr->device = pd->device; + mr->pd = pd; + mr->uobject = &obj->uobject; + atomic_inc(&pd->usecnt); + atomic_set(&mr->usecnt, 0); + + memset(&resp, 0, sizeof resp); + resp.lkey = mr->lkey; + resp.rkey = mr->rkey; + +retry: + if (!idr_pre_get(&ib_uverbs_mr_idr, GFP_KERNEL)) { + ret = -ENOMEM; + goto err_unreg; + } + + ret = idr_get_new(&ib_uverbs_mr_idr, mr, &obj->uobject.id); + + if (ret == -EAGAIN) + goto retry; + if (ret) + goto err_unreg; + + resp.mr_handle = obj->uobject.id; + + spin_lock_irq(&file->ucontext->lock); + list_add_tail(&obj->uobject.list, &file->ucontext->mr_list); + spin_unlock_irq(&file->ucontext->lock); + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) { + ret = -EFAULT; + goto err_list; + } + + up(&ib_uverbs_idr_mutex); + + return in_len; + +err_list: + spin_lock_irq(&file->ucontext->lock); + list_del(&obj->uobject.list); + spin_unlock_irq(&file->ucontext->lock); + +err_unreg: + ib_dereg_mr(mr); + +err_up: + up(&ib_uverbs_idr_mutex); + + ib_umem_release(file->device->ib_dev, &obj->umem); + +err_free: + kfree(obj); + return ret; +} + +ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_dereg_mr cmd; + struct ib_mr *mr; + struct ib_umem_object *memobj; + int ret = -EINVAL; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + down(&ib_uverbs_idr_mutex); + + mr = idr_find(&ib_uverbs_mr_idr, cmd.mr_handle); + if (!mr || mr->uobject->context != file->ucontext) + goto out; + + memobj = container_of(mr->uobject, struct ib_umem_object, uobject); + + ret = ib_dereg_mr(mr); + if (ret) + goto out; + + idr_remove(&ib_uverbs_mr_idr, cmd.mr_handle); + + spin_lock_irq(&file->ucontext->lock); + list_del(&memobj->uobject.list); + spin_unlock_irq(&file->ucontext->lock); + + ib_umem_release(file->device->ib_dev, &memobj->umem); + kfree(memobj); + +out: + up(&ib_uverbs_idr_mutex); + + return ret ? ret : in_len; +} + +ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_create_cq cmd; + struct ib_uverbs_create_cq_resp resp; + struct ib_udata udata; + struct ib_uobject *uobj; + struct ib_cq *cq; + int ret; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + INIT_UDATA(&udata, buf + sizeof cmd, + (unsigned long) cmd.response + sizeof resp, + in_len - sizeof cmd, out_len - sizeof resp); + + if (cmd.event_handler >= file->device->num_comp) + return -EINVAL; + + uobj = kmalloc(sizeof *uobj, GFP_KERNEL); + if (!uobj) + return -ENOMEM; + + uobj->user_handle = cmd.user_handle; + uobj->context = file->ucontext; + + cq = file->device->ib_dev->create_cq(file->device->ib_dev, cmd.cqe, + file->ucontext, &udata); + if (IS_ERR(cq)) { + ret = PTR_ERR(cq); + goto err; + } + + cq->device = file->device->ib_dev; + cq->uobject = uobj; + cq->comp_handler = ib_uverbs_comp_handler; + cq->event_handler = ib_uverbs_cq_event_handler; + cq->cq_context = file; + atomic_set(&cq->usecnt, 0); + +retry: + if (!idr_pre_get(&ib_uverbs_cq_idr, GFP_KERNEL)) { + ret = -ENOMEM; + goto err_cq; + } + + down(&ib_uverbs_idr_mutex); + ret = idr_get_new(&ib_uverbs_cq_idr, cq, &uobj->id); + up(&ib_uverbs_idr_mutex); + + if (ret == -EAGAIN) + goto retry; + if (ret) + goto err_cq; + + spin_lock_irq(&file->ucontext->lock); + list_add_tail(&uobj->list, &file->ucontext->cq_list); + spin_unlock_irq(&file->ucontext->lock); + + memset(&resp, 0, sizeof resp); + resp.cq_handle = uobj->id; + resp.cqe = cq->cqe; + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) { + ret = -EFAULT; + goto err_list; + } + + return in_len; + +err_list: + spin_lock_irq(&file->ucontext->lock); + list_del(&uobj->list); + spin_unlock_irq(&file->ucontext->lock); + + down(&ib_uverbs_idr_mutex); + idr_remove(&ib_uverbs_cq_idr, uobj->id); + up(&ib_uverbs_idr_mutex); + +err_cq: + ib_destroy_cq(cq); + +err: + kfree(uobj); + return ret; +} + +ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_destroy_cq cmd; + struct ib_cq *cq; + struct ib_uobject *uobj; + int ret = -EINVAL; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + down(&ib_uverbs_idr_mutex); + + cq = idr_find(&ib_uverbs_cq_idr, cmd.cq_handle); + if (!cq || cq->uobject->context != file->ucontext) + goto out; + + uobj = cq->uobject; + + ret = ib_destroy_cq(cq); + if (ret) + goto out; + + idr_remove(&ib_uverbs_cq_idr, cmd.cq_handle); + + spin_lock_irq(&file->ucontext->lock); + list_del(&uobj->list); + spin_unlock_irq(&file->ucontext->lock); + + kfree(uobj); + +out: + up(&ib_uverbs_idr_mutex); + + return ret ? ret : in_len; +} + +ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_create_qp cmd; + struct ib_uverbs_create_qp_resp resp; + struct ib_udata udata; + struct ib_uobject *uobj; + struct ib_pd *pd; + struct ib_cq *scq, *rcq; + struct ib_srq *srq; + struct ib_qp *qp; + struct ib_qp_init_attr attr; + int ret; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + INIT_UDATA(&udata, buf + sizeof cmd, + (unsigned long) cmd.response + sizeof resp, + in_len - sizeof cmd, out_len - sizeof resp); + + uobj = kmalloc(sizeof *uobj, GFP_KERNEL); + if (!uobj) + return -ENOMEM; + + down(&ib_uverbs_idr_mutex); + + pd = idr_find(&ib_uverbs_pd_idr, cmd.pd_handle); + scq = idr_find(&ib_uverbs_cq_idr, cmd.send_cq_handle); + rcq = idr_find(&ib_uverbs_cq_idr, cmd.recv_cq_handle); + srq = cmd.is_srq ? idr_find(&ib_uverbs_srq_idr, cmd.srq_handle) : NULL; + + if (!pd || pd->uobject->context != file->ucontext || + !scq || scq->uobject->context != file->ucontext || + !rcq || rcq->uobject->context != file->ucontext || + (cmd.is_srq && (!srq || srq->uobject->context != file->ucontext))) { + ret = -EINVAL; + goto err_up; + } + + attr.event_handler = ib_uverbs_qp_event_handler; + attr.qp_context = file; + attr.send_cq = scq; + attr.recv_cq = rcq; + attr.srq = srq; + attr.sq_sig_type = cmd.sq_sig_all ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR; + attr.qp_type = cmd.qp_type; + + attr.cap.max_send_wr = cmd.max_send_wr; + attr.cap.max_recv_wr = cmd.max_recv_wr; + attr.cap.max_send_sge = cmd.max_send_sge; + attr.cap.max_recv_sge = cmd.max_recv_sge; + attr.cap.max_inline_data = cmd.max_inline_data; + + uobj->user_handle = cmd.user_handle; + uobj->context = file->ucontext; + + qp = pd->device->create_qp(pd, &attr, &udata); + if (IS_ERR(qp)) { + ret = PTR_ERR(qp); + goto err_up; + } + + qp->device = pd->device; + qp->pd = pd; + qp->send_cq = attr.send_cq; + qp->recv_cq = attr.recv_cq; + qp->srq = attr.srq; + qp->uobject = uobj; + qp->event_handler = attr.event_handler; + qp->qp_context = attr.qp_context; + qp->qp_type = attr.qp_type; + atomic_inc(&pd->usecnt); + atomic_inc(&attr.send_cq->usecnt); + atomic_inc(&attr.recv_cq->usecnt); + if (attr.srq) + atomic_inc(&attr.srq->usecnt); + + memset(&resp, 0, sizeof resp); + resp.qpn = qp->qp_num; + +retry: + if (!idr_pre_get(&ib_uverbs_qp_idr, GFP_KERNEL)) { + ret = -ENOMEM; + goto err_destroy; + } + + ret = idr_get_new(&ib_uverbs_qp_idr, qp, &uobj->id); + + if (ret == -EAGAIN) + goto retry; + if (ret) + goto err_destroy; + + resp.qp_handle = uobj->id; + + spin_lock_irq(&file->ucontext->lock); + list_add_tail(&uobj->list, &file->ucontext->qp_list); + spin_unlock_irq(&file->ucontext->lock); + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) { + ret = -EFAULT; + goto err_list; + } + + up(&ib_uverbs_idr_mutex); + + return in_len; + +err_list: + spin_lock_irq(&file->ucontext->lock); + list_del(&uobj->list); + spin_unlock_irq(&file->ucontext->lock); + +err_destroy: + ib_destroy_qp(qp); + +err_up: + up(&ib_uverbs_idr_mutex); + + kfree(uobj); + return ret; +} + +ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_modify_qp cmd; + struct ib_qp *qp; + struct ib_qp_attr *attr; + int ret; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + attr = kmalloc(sizeof *attr, GFP_KERNEL); + if (!attr) + return -ENOMEM; + + down(&ib_uverbs_idr_mutex); + + qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle); + if (!qp || qp->uobject->context != file->ucontext) { + ret = -EINVAL; + goto out; + } + + attr->qp_state = cmd.qp_state; + attr->cur_qp_state = cmd.cur_qp_state; + attr->path_mtu = cmd.path_mtu; + attr->path_mig_state = cmd.path_mig_state; + attr->qkey = cmd.qkey; + attr->rq_psn = cmd.rq_psn; + attr->sq_psn = cmd.sq_psn; + attr->dest_qp_num = cmd.dest_qp_num; + attr->qp_access_flags = cmd.qp_access_flags; + attr->pkey_index = cmd.pkey_index; + attr->alt_pkey_index = cmd.pkey_index; + attr->en_sqd_async_notify = cmd.en_sqd_async_notify; + attr->max_rd_atomic = cmd.max_rd_atomic; + attr->max_dest_rd_atomic = cmd.max_dest_rd_atomic; + attr->min_rnr_timer = cmd.min_rnr_timer; + attr->port_num = cmd.port_num; + attr->timeout = cmd.timeout; + attr->retry_cnt = cmd.retry_cnt; + attr->rnr_retry = cmd.rnr_retry; + attr->alt_port_num = cmd.alt_port_num; + attr->alt_timeout = cmd.alt_timeout; + + memcpy(attr->ah_attr.grh.dgid.raw, cmd.dest.dgid, 16); + attr->ah_attr.grh.flow_label = cmd.dest.flow_label; + attr->ah_attr.grh.sgid_index = cmd.dest.sgid_index; + attr->ah_attr.grh.hop_limit = cmd.dest.hop_limit; + attr->ah_attr.grh.traffic_class = cmd.dest.traffic_class; + attr->ah_attr.dlid = cmd.dest.dlid; + attr->ah_attr.sl = cmd.dest.sl; + attr->ah_attr.src_path_bits = cmd.dest.src_path_bits; + attr->ah_attr.static_rate = cmd.dest.static_rate; + attr->ah_attr.ah_flags = cmd.dest.is_global ? IB_AH_GRH : 0; + attr->ah_attr.port_num = cmd.dest.port_num; + + memcpy(attr->alt_ah_attr.grh.dgid.raw, cmd.alt_dest.dgid, 16); + attr->alt_ah_attr.grh.flow_label = cmd.alt_dest.flow_label; + attr->alt_ah_attr.grh.sgid_index = cmd.alt_dest.sgid_index; + attr->alt_ah_attr.grh.hop_limit = cmd.alt_dest.hop_limit; + attr->alt_ah_attr.grh.traffic_class = cmd.alt_dest.traffic_class; + attr->alt_ah_attr.dlid = cmd.alt_dest.dlid; + attr->alt_ah_attr.sl = cmd.alt_dest.sl; + attr->alt_ah_attr.src_path_bits = cmd.alt_dest.src_path_bits; + attr->alt_ah_attr.static_rate = cmd.alt_dest.static_rate; + attr->alt_ah_attr.ah_flags = cmd.alt_dest.is_global ? IB_AH_GRH : 0; + attr->alt_ah_attr.port_num = cmd.alt_dest.port_num; + + ret = ib_modify_qp(qp, attr, cmd.attr_mask); + if (ret) + goto out; + + ret = in_len; + +out: + up(&ib_uverbs_idr_mutex); + kfree(attr); + + return ret; +} + +ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_destroy_qp cmd; + struct ib_qp *qp; + struct ib_uobject *uobj; + int ret = -EINVAL; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + down(&ib_uverbs_idr_mutex); + + qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle); + if (!qp || qp->uobject->context != file->ucontext) + goto out; + + uobj = qp->uobject; + + ret = ib_destroy_qp(qp); + if (ret) + goto out; + + idr_remove(&ib_uverbs_qp_idr, cmd.qp_handle); + + spin_lock_irq(&file->ucontext->lock); + list_del(&uobj->list); + spin_unlock_irq(&file->ucontext->lock); + + kfree(uobj); + +out: + up(&ib_uverbs_idr_mutex); + + return ret ? ret : in_len; +} + +ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_attach_mcast cmd; + struct ib_qp *qp; + int ret = -EINVAL; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + down(&ib_uverbs_idr_mutex); + + qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle); + if (qp && qp->uobject->context == file->ucontext) + ret = ib_attach_mcast(qp, (union ib_gid *) cmd.gid, cmd.mlid); + + up(&ib_uverbs_idr_mutex); + + return ret ? ret : in_len; +} + +ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_detach_mcast cmd; + struct ib_qp *qp; + int ret = -EINVAL; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + down(&ib_uverbs_idr_mutex); + + qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle); + if (qp && qp->uobject->context == file->ucontext) + ret = ib_detach_mcast(qp, (union ib_gid *) cmd.gid, cmd.mlid); + + up(&ib_uverbs_idr_mutex); + + return ret ? ret : in_len; +} + +ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_create_srq cmd; + struct ib_uverbs_create_srq_resp resp; + struct ib_udata udata; + struct ib_uobject *uobj; + struct ib_pd *pd; + struct ib_srq *srq; + struct ib_srq_init_attr attr; + int ret; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + INIT_UDATA(&udata, buf + sizeof cmd, + (unsigned long) cmd.response + sizeof resp, + in_len - sizeof cmd, out_len - sizeof resp); + + uobj = kmalloc(sizeof *uobj, GFP_KERNEL); + if (!uobj) + return -ENOMEM; + + down(&ib_uverbs_idr_mutex); + + pd = idr_find(&ib_uverbs_pd_idr, cmd.pd_handle); + + if (!pd || pd->uobject->context != file->ucontext) { + ret = -EINVAL; + goto err_up; + } + + attr.event_handler = ib_uverbs_srq_event_handler; + attr.srq_context = file; + attr.attr.max_wr = cmd.max_wr; + attr.attr.max_sge = cmd.max_sge; + attr.attr.srq_limit = cmd.srq_limit; + + uobj->user_handle = cmd.user_handle; + uobj->context = file->ucontext; + + srq = pd->device->create_srq(pd, &attr, &udata); + if (IS_ERR(srq)) { + ret = PTR_ERR(srq); + goto err_up; + } + + srq->device = pd->device; + srq->pd = pd; + srq->uobject = uobj; + srq->event_handler = attr.event_handler; + srq->srq_context = attr.srq_context; + atomic_inc(&pd->usecnt); + atomic_set(&srq->usecnt, 0); + + memset(&resp, 0, sizeof resp); + +retry: + if (!idr_pre_get(&ib_uverbs_srq_idr, GFP_KERNEL)) { + ret = -ENOMEM; + goto err_destroy; + } + + ret = idr_get_new(&ib_uverbs_srq_idr, srq, &uobj->id); + + if (ret == -EAGAIN) + goto retry; + if (ret) + goto err_destroy; + + resp.srq_handle = uobj->id; + + spin_lock_irq(&file->ucontext->lock); + list_add_tail(&uobj->list, &file->ucontext->srq_list); + spin_unlock_irq(&file->ucontext->lock); + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) { + ret = -EFAULT; + goto err_list; + } + + up(&ib_uverbs_idr_mutex); + + return in_len; + +err_list: + spin_lock_irq(&file->ucontext->lock); + list_del(&uobj->list); + spin_unlock_irq(&file->ucontext->lock); + +err_destroy: + ib_destroy_srq(srq); + +err_up: + up(&ib_uverbs_idr_mutex); + + kfree(uobj); + return ret; +} + +ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_modify_srq cmd; + struct ib_srq *srq; + struct ib_srq_attr attr; + int ret; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + down(&ib_uverbs_idr_mutex); + + srq = idr_find(&ib_uverbs_srq_idr, cmd.srq_handle); + if (!srq || srq->uobject->context != file->ucontext) { + ret = -EINVAL; + goto out; + } + + attr.max_wr = cmd.max_wr; + attr.max_sge = cmd.max_sge; + attr.srq_limit = cmd.srq_limit; + + ret = ib_modify_srq(srq, &attr, cmd.attr_mask); + +out: + up(&ib_uverbs_idr_mutex); + + return ret ? ret : in_len; +} + +ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_destroy_srq cmd; + struct ib_srq *srq; + struct ib_uobject *uobj; + int ret = -EINVAL; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + down(&ib_uverbs_idr_mutex); + + srq = idr_find(&ib_uverbs_srq_idr, cmd.srq_handle); + if (!srq || srq->uobject->context != file->ucontext) + goto out; + + uobj = srq->uobject; + + ret = ib_destroy_srq(srq); + if (ret) + goto out; + + idr_remove(&ib_uverbs_srq_idr, cmd.srq_handle); + + spin_lock_irq(&file->ucontext->lock); + list_del(&uobj->list); + spin_unlock_irq(&file->ucontext->lock); + + kfree(uobj); + +out: + up(&ib_uverbs_idr_mutex); + + return ret ? ret : in_len; +} diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c new file mode 100644 index 000000000000..09caf5b1ef36 --- /dev/null +++ b/drivers/infiniband/core/uverbs_main.c @@ -0,0 +1,731 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2005 Voltaire, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: uverbs_main.c 2733 2005-06-28 19:14:34Z roland $ + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/device.h> +#include <linux/err.h> +#include <linux/fs.h> +#include <linux/poll.h> +#include <linux/file.h> +#include <linux/mount.h> + +#include <asm/uaccess.h> + +#include "uverbs.h" + +MODULE_AUTHOR("Roland Dreier"); +MODULE_DESCRIPTION("InfiniBand userspace verbs access"); +MODULE_LICENSE("Dual BSD/GPL"); + +#define INFINIBANDEVENTFS_MAGIC 0x49426576 /* "IBev" */ + +enum { + IB_UVERBS_MAJOR = 231, + IB_UVERBS_BASE_MINOR = 192, + IB_UVERBS_MAX_DEVICES = 32 +}; + +#define IB_UVERBS_BASE_DEV MKDEV(IB_UVERBS_MAJOR, IB_UVERBS_BASE_MINOR) + +DECLARE_MUTEX(ib_uverbs_idr_mutex); +DEFINE_IDR(ib_uverbs_pd_idr); +DEFINE_IDR(ib_uverbs_mr_idr); +DEFINE_IDR(ib_uverbs_mw_idr); +DEFINE_IDR(ib_uverbs_ah_idr); +DEFINE_IDR(ib_uverbs_cq_idr); +DEFINE_IDR(ib_uverbs_qp_idr); +DEFINE_IDR(ib_uverbs_srq_idr); + +static spinlock_t map_lock; +static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES); + +static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) = { + [IB_USER_VERBS_CMD_QUERY_PARAMS] = ib_uverbs_query_params, + [IB_USER_VERBS_CMD_GET_CONTEXT] = ib_uverbs_get_context, + [IB_USER_VERBS_CMD_QUERY_DEVICE] = ib_uverbs_query_device, + [IB_USER_VERBS_CMD_QUERY_PORT] = ib_uverbs_query_port, + [IB_USER_VERBS_CMD_QUERY_GID] = ib_uverbs_query_gid, + [IB_USER_VERBS_CMD_QUERY_PKEY] = ib_uverbs_query_pkey, + [IB_USER_VERBS_CMD_ALLOC_PD] = ib_uverbs_alloc_pd, + [IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd, + [IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr, + [IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr, + [IB_USER_VERBS_CMD_CREATE_CQ] = ib_uverbs_create_cq, + [IB_USER_VERBS_CMD_DESTROY_CQ] = ib_uverbs_destroy_cq, + [IB_USER_VERBS_CMD_CREATE_QP] = ib_uverbs_create_qp, + [IB_USER_VERBS_CMD_MODIFY_QP] = ib_uverbs_modify_qp, + [IB_USER_VERBS_CMD_DESTROY_QP] = ib_uverbs_destroy_qp, + [IB_USER_VERBS_CMD_ATTACH_MCAST] = ib_uverbs_attach_mcast, + [IB_USER_VERBS_CMD_DETACH_MCAST] = ib_uverbs_detach_mcast, + [IB_USER_VERBS_CMD_CREATE_SRQ] = ib_uverbs_create_srq, + [IB_USER_VERBS_CMD_MODIFY_SRQ] = ib_uverbs_modify_srq, + [IB_USER_VERBS_CMD_DESTROY_SRQ] = ib_uverbs_destroy_srq, +}; + +static struct vfsmount *uverbs_event_mnt; + +static void ib_uverbs_add_one(struct ib_device *device); +static void ib_uverbs_remove_one(struct ib_device *device); + +static int ib_dealloc_ucontext(struct ib_ucontext *context) +{ + struct ib_uobject *uobj, *tmp; + + if (!context) + return 0; + + down(&ib_uverbs_idr_mutex); + + /* XXX Free AHs */ + + list_for_each_entry_safe(uobj, tmp, &context->qp_list, list) { + struct ib_qp *qp = idr_find(&ib_uverbs_qp_idr, uobj->id); + idr_remove(&ib_uverbs_qp_idr, uobj->id); + ib_destroy_qp(qp); + list_del(&uobj->list); + kfree(uobj); + } + + list_for_each_entry_safe(uobj, tmp, &context->cq_list, list) { + struct ib_cq *cq = idr_find(&ib_uverbs_cq_idr, uobj->id); + idr_remove(&ib_uverbs_cq_idr, uobj->id); + ib_destroy_cq(cq); + list_del(&uobj->list); + kfree(uobj); + } + + list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) { + struct ib_srq *srq = idr_find(&ib_uverbs_srq_idr, uobj->id); + idr_remove(&ib_uverbs_srq_idr, uobj->id); + ib_destroy_srq(srq); + list_del(&uobj->list); + kfree(uobj); + } + + /* XXX Free MWs */ + + list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) { + struct ib_mr *mr = idr_find(&ib_uverbs_mr_idr, uobj->id); + struct ib_device *mrdev = mr->device; + struct ib_umem_object *memobj; + + idr_remove(&ib_uverbs_mr_idr, uobj->id); + ib_dereg_mr(mr); + + memobj = container_of(uobj, struct ib_umem_object, uobject); + ib_umem_release_on_close(mrdev, &memobj->umem); + + list_del(&uobj->list); + kfree(memobj); + } + + list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) { + struct ib_pd *pd = idr_find(&ib_uverbs_pd_idr, uobj->id); + idr_remove(&ib_uverbs_pd_idr, uobj->id); + ib_dealloc_pd(pd); + list_del(&uobj->list); + kfree(uobj); + } + + up(&ib_uverbs_idr_mutex); + + return context->device->dealloc_ucontext(context); +} + +static void ib_uverbs_release_file(struct kref *ref) +{ + struct ib_uverbs_file *file = + container_of(ref, struct ib_uverbs_file, ref); + + module_put(file->device->ib_dev->owner); + kfree(file); +} + +static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf, + size_t count, loff_t *pos) +{ + struct ib_uverbs_event_file *file = filp->private_data; + void *event; + int eventsz; + int ret = 0; + + spin_lock_irq(&file->lock); + + while (list_empty(&file->event_list) && file->fd >= 0) { + spin_unlock_irq(&file->lock); + + if (filp->f_flags & O_NONBLOCK) + return -EAGAIN; + + if (wait_event_interruptible(file->poll_wait, + !list_empty(&file->event_list) || + file->fd < 0)) + return -ERESTARTSYS; + + spin_lock_irq(&file->lock); + } + + if (file->fd < 0) { + spin_unlock_irq(&file->lock); + return -ENODEV; + } + + if (file->is_async) { + event = list_entry(file->event_list.next, + struct ib_uverbs_async_event, list); + eventsz = sizeof (struct ib_uverbs_async_event_desc); + } else { + event = list_entry(file->event_list.next, + struct ib_uverbs_comp_event, list); + eventsz = sizeof (struct ib_uverbs_comp_event_desc); + } + + if (eventsz > count) { + ret = -EINVAL; + event = NULL; + } else + list_del(file->event_list.next); + + spin_unlock_irq(&file->lock); + + if (event) { + if (copy_to_user(buf, event, eventsz)) + ret = -EFAULT; + else + ret = eventsz; + } + + kfree(event); + + return ret; +} + +static unsigned int ib_uverbs_event_poll(struct file *filp, + struct poll_table_struct *wait) +{ + unsigned int pollflags = 0; + struct ib_uverbs_event_file *file = filp->private_data; + + poll_wait(filp, &file->poll_wait, wait); + + spin_lock_irq(&file->lock); + if (file->fd < 0) + pollflags = POLLERR; + else if (!list_empty(&file->event_list)) + pollflags = POLLIN | POLLRDNORM; + spin_unlock_irq(&file->lock); + + return pollflags; +} + +static void ib_uverbs_event_release(struct ib_uverbs_event_file *file) +{ + struct list_head *entry, *tmp; + + spin_lock_irq(&file->lock); + if (file->fd != -1) { + file->fd = -1; + list_for_each_safe(entry, tmp, &file->event_list) + if (file->is_async) + kfree(list_entry(entry, struct ib_uverbs_async_event, list)); + else + kfree(list_entry(entry, struct ib_uverbs_comp_event, list)); + } + spin_unlock_irq(&file->lock); +} + +static int ib_uverbs_event_fasync(int fd, struct file *filp, int on) +{ + struct ib_uverbs_event_file *file = filp->private_data; + + return fasync_helper(fd, filp, on, &file->async_queue); +} + +static int ib_uverbs_event_close(struct inode *inode, struct file *filp) +{ + struct ib_uverbs_event_file *file = filp->private_data; + + ib_uverbs_event_release(file); + ib_uverbs_event_fasync(-1, filp, 0); + kref_put(&file->uverbs_file->ref, ib_uverbs_release_file); + + return 0; +} + +static struct file_operations uverbs_event_fops = { + /* + * No .owner field since we artificially create event files, + * so there is no increment to the module reference count in + * the open path. All event files come from a uverbs command + * file, which already takes a module reference, so this is OK. + */ + .read = ib_uverbs_event_read, + .poll = ib_uverbs_event_poll, + .release = ib_uverbs_event_close, + .fasync = ib_uverbs_event_fasync +}; + +void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context) +{ + struct ib_uverbs_file *file = cq_context; + struct ib_uverbs_comp_event *entry; + unsigned long flags; + + entry = kmalloc(sizeof *entry, GFP_ATOMIC); + if (!entry) + return; + + entry->desc.cq_handle = cq->uobject->user_handle; + + spin_lock_irqsave(&file->comp_file[0].lock, flags); + list_add_tail(&entry->list, &file->comp_file[0].event_list); + spin_unlock_irqrestore(&file->comp_file[0].lock, flags); + + wake_up_interruptible(&file->comp_file[0].poll_wait); + kill_fasync(&file->comp_file[0].async_queue, SIGIO, POLL_IN); +} + +static void ib_uverbs_async_handler(struct ib_uverbs_file *file, + __u64 element, __u64 event) +{ + struct ib_uverbs_async_event *entry; + unsigned long flags; + + entry = kmalloc(sizeof *entry, GFP_ATOMIC); + if (!entry) + return; + + entry->desc.element = element; + entry->desc.event_type = event; + + spin_lock_irqsave(&file->async_file.lock, flags); + list_add_tail(&entry->list, &file->async_file.event_list); + spin_unlock_irqrestore(&file->async_file.lock, flags); + + wake_up_interruptible(&file->async_file.poll_wait); + kill_fasync(&file->async_file.async_queue, SIGIO, POLL_IN); +} + +void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr) +{ + ib_uverbs_async_handler(context_ptr, + event->element.cq->uobject->user_handle, + event->event); +} + +void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr) +{ + ib_uverbs_async_handler(context_ptr, + event->element.qp->uobject->user_handle, + event->event); +} + +void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr) +{ + ib_uverbs_async_handler(context_ptr, + event->element.srq->uobject->user_handle, + event->event); +} + +static void ib_uverbs_event_handler(struct ib_event_handler *handler, + struct ib_event *event) +{ + struct ib_uverbs_file *file = + container_of(handler, struct ib_uverbs_file, event_handler); + + ib_uverbs_async_handler(file, event->element.port_num, event->event); +} + +static int ib_uverbs_event_init(struct ib_uverbs_event_file *file, + struct ib_uverbs_file *uverbs_file) +{ + struct file *filp; + + spin_lock_init(&file->lock); + INIT_LIST_HEAD(&file->event_list); + init_waitqueue_head(&file->poll_wait); + file->uverbs_file = uverbs_file; + file->async_queue = NULL; + + file->fd = get_unused_fd(); + if (file->fd < 0) + return file->fd; + + filp = get_empty_filp(); + if (!filp) { + put_unused_fd(file->fd); + return -ENFILE; + } + + filp->f_op = &uverbs_event_fops; + filp->f_vfsmnt = mntget(uverbs_event_mnt); + filp->f_dentry = dget(uverbs_event_mnt->mnt_root); + filp->f_mapping = filp->f_dentry->d_inode->i_mapping; + filp->f_flags = O_RDONLY; + filp->f_mode = FMODE_READ; + filp->private_data = file; + + fd_install(file->fd, filp); + + return 0; +} + +static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, + size_t count, loff_t *pos) +{ + struct ib_uverbs_file *file = filp->private_data; + struct ib_uverbs_cmd_hdr hdr; + + if (count < sizeof hdr) + return -EINVAL; + + if (copy_from_user(&hdr, buf, sizeof hdr)) + return -EFAULT; + + if (hdr.in_words * 4 != count) + return -EINVAL; + + if (hdr.command < 0 || hdr.command >= ARRAY_SIZE(uverbs_cmd_table)) + return -EINVAL; + + if (!file->ucontext && + hdr.command != IB_USER_VERBS_CMD_QUERY_PARAMS && + hdr.command != IB_USER_VERBS_CMD_GET_CONTEXT) + return -EINVAL; + + return uverbs_cmd_table[hdr.command](file, buf + sizeof hdr, + hdr.in_words * 4, hdr.out_words * 4); +} + +static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma) +{ + struct ib_uverbs_file *file = filp->private_data; + + if (!file->ucontext) + return -ENODEV; + else + return file->device->ib_dev->mmap(file->ucontext, vma); +} + +static int ib_uverbs_open(struct inode *inode, struct file *filp) +{ + struct ib_uverbs_device *dev = + container_of(inode->i_cdev, struct ib_uverbs_device, dev); + struct ib_uverbs_file *file; + int i = 0; + int ret; + + if (!try_module_get(dev->ib_dev->owner)) + return -ENODEV; + + file = kmalloc(sizeof *file + + (dev->num_comp - 1) * sizeof (struct ib_uverbs_event_file), + GFP_KERNEL); + if (!file) + return -ENOMEM; + + file->device = dev; + kref_init(&file->ref); + + file->ucontext = NULL; + + ret = ib_uverbs_event_init(&file->async_file, file); + if (ret) + goto err; + + file->async_file.is_async = 1; + + kref_get(&file->ref); + + for (i = 0; i < dev->num_comp; ++i) { + ret = ib_uverbs_event_init(&file->comp_file[i], file); + if (ret) + goto err_async; + kref_get(&file->ref); + file->comp_file[i].is_async = 0; + } + + + filp->private_data = file; + + INIT_IB_EVENT_HANDLER(&file->event_handler, dev->ib_dev, + ib_uverbs_event_handler); + if (ib_register_event_handler(&file->event_handler)) + goto err_async; + + return 0; + +err_async: + while (i--) + ib_uverbs_event_release(&file->comp_file[i]); + + ib_uverbs_event_release(&file->async_file); + +err: + kref_put(&file->ref, ib_uverbs_release_file); + + return ret; +} + +static int ib_uverbs_close(struct inode *inode, struct file *filp) +{ + struct ib_uverbs_file *file = filp->private_data; + int i; + + ib_unregister_event_handler(&file->event_handler); + ib_uverbs_event_release(&file->async_file); + ib_dealloc_ucontext(file->ucontext); + + for (i = 0; i < file->device->num_comp; ++i) + ib_uverbs_event_release(&file->comp_file[i]); + + kref_put(&file->ref, ib_uverbs_release_file); + + return 0; +} + +static struct file_operations uverbs_fops = { + .owner = THIS_MODULE, + .write = ib_uverbs_write, + .open = ib_uverbs_open, + .release = ib_uverbs_close +}; + +static struct file_operations uverbs_mmap_fops = { + .owner = THIS_MODULE, + .write = ib_uverbs_write, + .mmap = ib_uverbs_mmap, + .open = ib_uverbs_open, + .release = ib_uverbs_close +}; + +static struct ib_client uverbs_client = { + .name = "uverbs", + .add = ib_uverbs_add_one, + .remove = ib_uverbs_remove_one +}; + +static ssize_t show_ibdev(struct class_device *class_dev, char *buf) +{ + struct ib_uverbs_device *dev = + container_of(class_dev, struct ib_uverbs_device, class_dev); + + return sprintf(buf, "%s\n", dev->ib_dev->name); +} +static CLASS_DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); + +static void ib_uverbs_release_class_dev(struct class_device *class_dev) +{ + struct ib_uverbs_device *dev = + container_of(class_dev, struct ib_uverbs_device, class_dev); + + cdev_del(&dev->dev); + clear_bit(dev->devnum, dev_map); + kfree(dev); +} + +static struct class uverbs_class = { + .name = "infiniband_verbs", + .release = ib_uverbs_release_class_dev +}; + +static ssize_t show_abi_version(struct class *class, char *buf) +{ + return sprintf(buf, "%d\n", IB_USER_VERBS_ABI_VERSION); +} +static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL); + +static void ib_uverbs_add_one(struct ib_device *device) +{ + struct ib_uverbs_device *uverbs_dev; + + if (!device->alloc_ucontext) + return; + + uverbs_dev = kmalloc(sizeof *uverbs_dev, GFP_KERNEL); + if (!uverbs_dev) + return; + + memset(uverbs_dev, 0, sizeof *uverbs_dev); + + spin_lock(&map_lock); + uverbs_dev->devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES); + if (uverbs_dev->devnum >= IB_UVERBS_MAX_DEVICES) { + spin_unlock(&map_lock); + goto err; + } + set_bit(uverbs_dev->devnum, dev_map); + spin_unlock(&map_lock); + + uverbs_dev->ib_dev = device; + uverbs_dev->num_comp = 1; + + if (device->mmap) + cdev_init(&uverbs_dev->dev, &uverbs_mmap_fops); + else + cdev_init(&uverbs_dev->dev, &uverbs_fops); + uverbs_dev->dev.owner = THIS_MODULE; + kobject_set_name(&uverbs_dev->dev.kobj, "uverbs%d", uverbs_dev->devnum); + if (cdev_add(&uverbs_dev->dev, IB_UVERBS_BASE_DEV + uverbs_dev->devnum, 1)) + goto err; + + uverbs_dev->class_dev.class = &uverbs_class; + uverbs_dev->class_dev.dev = device->dma_device; + uverbs_dev->class_dev.devt = uverbs_dev->dev.dev; + snprintf(uverbs_dev->class_dev.class_id, BUS_ID_SIZE, "uverbs%d", uverbs_dev->devnum); + if (class_device_register(&uverbs_dev->class_dev)) + goto err_cdev; + + if (class_device_create_file(&uverbs_dev->class_dev, &class_device_attr_ibdev)) + goto err_class; + + ib_set_client_data(device, &uverbs_client, uverbs_dev); + + return; + +err_class: + class_device_unregister(&uverbs_dev->class_dev); + +err_cdev: + cdev_del(&uverbs_dev->dev); + clear_bit(uverbs_dev->devnum, dev_map); + +err: + kfree(uverbs_dev); + return; +} + +static void ib_uverbs_remove_one(struct ib_device *device) +{ + struct ib_uverbs_device *uverbs_dev = ib_get_client_data(device, &uverbs_client); + + if (!uverbs_dev) + return; + + class_device_unregister(&uverbs_dev->class_dev); +} + +static struct super_block *uverbs_event_get_sb(struct file_system_type *fs_type, int flags, + const char *dev_name, void *data) +{ + return get_sb_pseudo(fs_type, "infinibandevent:", NULL, + INFINIBANDEVENTFS_MAGIC); +} + +static struct file_system_type uverbs_event_fs = { + /* No owner field so module can be unloaded */ + .name = "infinibandeventfs", + .get_sb = uverbs_event_get_sb, + .kill_sb = kill_litter_super +}; + +static int __init ib_uverbs_init(void) +{ + int ret; + + spin_lock_init(&map_lock); + + ret = register_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES, + "infiniband_verbs"); + if (ret) { + printk(KERN_ERR "user_verbs: couldn't register device number\n"); + goto out; + } + + ret = class_register(&uverbs_class); + if (ret) { + printk(KERN_ERR "user_verbs: couldn't create class infiniband_verbs\n"); + goto out_chrdev; + } + + ret = class_create_file(&uverbs_class, &class_attr_abi_version); + if (ret) { + printk(KERN_ERR "user_verbs: couldn't create abi_version attribute\n"); + goto out_class; + } + + ret = register_filesystem(&uverbs_event_fs); + if (ret) { + printk(KERN_ERR "user_verbs: couldn't register infinibandeventfs\n"); + goto out_class; + } + + uverbs_event_mnt = kern_mount(&uverbs_event_fs); + if (IS_ERR(uverbs_event_mnt)) { + ret = PTR_ERR(uverbs_event_mnt); + printk(KERN_ERR "user_verbs: couldn't mount infinibandeventfs\n"); + goto out_fs; + } + + ret = ib_register_client(&uverbs_client); + if (ret) { + printk(KERN_ERR "user_verbs: couldn't register client\n"); + goto out_mnt; + } + + return 0; + +out_mnt: + mntput(uverbs_event_mnt); + +out_fs: + unregister_filesystem(&uverbs_event_fs); + +out_class: + class_unregister(&uverbs_class); + +out_chrdev: + unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES); + +out: + return ret; +} + +static void __exit ib_uverbs_cleanup(void) +{ + ib_unregister_client(&uverbs_client); + mntput(uverbs_event_mnt); + unregister_filesystem(&uverbs_event_fs); + class_unregister(&uverbs_class); + unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES); +} + +module_init(ib_uverbs_init); +module_exit(ib_uverbs_cleanup); diff --git a/drivers/infiniband/core/uverbs_mem.c b/drivers/infiniband/core/uverbs_mem.c new file mode 100644 index 000000000000..36a32c315668 --- /dev/null +++ b/drivers/infiniband/core/uverbs_mem.c @@ -0,0 +1,222 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: uverbs_mem.c 2743 2005-06-28 22:27:59Z roland $ + */ + +#include <linux/mm.h> +#include <linux/dma-mapping.h> + +#include "uverbs.h" + +struct ib_umem_account_work { + struct work_struct work; + struct mm_struct *mm; + unsigned long diff; +}; + + +static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty) +{ + struct ib_umem_chunk *chunk, *tmp; + int i; + + list_for_each_entry_safe(chunk, tmp, &umem->chunk_list, list) { + dma_unmap_sg(dev->dma_device, chunk->page_list, + chunk->nents, DMA_BIDIRECTIONAL); + for (i = 0; i < chunk->nents; ++i) { + if (umem->writable && dirty) + set_page_dirty_lock(chunk->page_list[i].page); + put_page(chunk->page_list[i].page); + } + + kfree(chunk); + } +} + +int ib_umem_get(struct ib_device *dev, struct ib_umem *mem, + void *addr, size_t size, int write) +{ + struct page **page_list; + struct ib_umem_chunk *chunk; + unsigned long locked; + unsigned long lock_limit; + unsigned long cur_base; + unsigned long npages; + int ret = 0; + int off; + int i; + + if (!can_do_mlock()) + return -EPERM; + + page_list = (struct page **) __get_free_page(GFP_KERNEL); + if (!page_list) + return -ENOMEM; + + mem->user_base = (unsigned long) addr; + mem->length = size; + mem->offset = (unsigned long) addr & ~PAGE_MASK; + mem->page_size = PAGE_SIZE; + mem->writable = write; + + INIT_LIST_HEAD(&mem->chunk_list); + + npages = PAGE_ALIGN(size + mem->offset) >> PAGE_SHIFT; + + down_write(¤t->mm->mmap_sem); + + locked = npages + current->mm->locked_vm; + lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; + + if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) { + ret = -ENOMEM; + goto out; + } + + cur_base = (unsigned long) addr & PAGE_MASK; + + while (npages) { + ret = get_user_pages(current, current->mm, cur_base, + min_t(int, npages, + PAGE_SIZE / sizeof (struct page *)), + 1, !write, page_list, NULL); + + if (ret < 0) + goto out; + + cur_base += ret * PAGE_SIZE; + npages -= ret; + + off = 0; + + while (ret) { + chunk = kmalloc(sizeof *chunk + sizeof (struct scatterlist) * + min_t(int, ret, IB_UMEM_MAX_PAGE_CHUNK), + GFP_KERNEL); + if (!chunk) { + ret = -ENOMEM; + goto out; + } + + chunk->nents = min_t(int, ret, IB_UMEM_MAX_PAGE_CHUNK); + for (i = 0; i < chunk->nents; ++i) { + chunk->page_list[i].page = page_list[i + off]; + chunk->page_list[i].offset = 0; + chunk->page_list[i].length = PAGE_SIZE; + } + + chunk->nmap = dma_map_sg(dev->dma_device, + &chunk->page_list[0], + chunk->nents, + DMA_BIDIRECTIONAL); + if (chunk->nmap <= 0) { + for (i = 0; i < chunk->nents; ++i) + put_page(chunk->page_list[i].page); + kfree(chunk); + + ret = -ENOMEM; + goto out; + } + + ret -= chunk->nents; + off += chunk->nents; + list_add_tail(&chunk->list, &mem->chunk_list); + } + + ret = 0; + } + +out: + if (ret < 0) + __ib_umem_release(dev, mem, 0); + else + current->mm->locked_vm = locked; + + up_write(¤t->mm->mmap_sem); + free_page((unsigned long) page_list); + + return ret; +} + +void ib_umem_release(struct ib_device *dev, struct ib_umem *umem) +{ + __ib_umem_release(dev, umem, 1); + + down_write(¤t->mm->mmap_sem); + current->mm->locked_vm -= + PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT; + up_write(¤t->mm->mmap_sem); +} + +static void ib_umem_account(void *work_ptr) +{ + struct ib_umem_account_work *work = work_ptr; + + down_write(&work->mm->mmap_sem); + work->mm->locked_vm -= work->diff; + up_write(&work->mm->mmap_sem); + mmput(work->mm); + kfree(work); +} + +void ib_umem_release_on_close(struct ib_device *dev, struct ib_umem *umem) +{ + struct ib_umem_account_work *work; + struct mm_struct *mm; + + __ib_umem_release(dev, umem, 1); + + mm = get_task_mm(current); + if (!mm) + return; + + /* + * We may be called with the mm's mmap_sem already held. This + * can happen when a userspace munmap() is the call that drops + * the last reference to our file and calls our release + * method. If there are memory regions to destroy, we'll end + * up here and not be able to take the mmap_sem. Therefore we + * defer the vm_locked accounting to the system workqueue. + */ + + work = kmalloc(sizeof *work, GFP_KERNEL); + if (!work) + return; + + INIT_WORK(&work->work, ib_umem_account, work); + work->mm = mm; + work->diff = PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT; + + schedule_work(&work->work); +} diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 7c08ed0cd7dd..5081d903e561 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -4,6 +4,8 @@ * Copyright (c) 2004 Intel Corporation. All rights reserved. * Copyright (c) 2004 Topspin Corporation. All rights reserved. * Copyright (c) 2004 Voltaire Corporation. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -39,7 +41,8 @@ #include <linux/errno.h> #include <linux/err.h> -#include <ib_verbs.h> +#include <rdma/ib_verbs.h> +#include <rdma/ib_cache.h> /* Protection domains */ @@ -47,10 +50,11 @@ struct ib_pd *ib_alloc_pd(struct ib_device *device) { struct ib_pd *pd; - pd = device->alloc_pd(device); + pd = device->alloc_pd(device, NULL, NULL); if (!IS_ERR(pd)) { - pd->device = device; + pd->device = device; + pd->uobject = NULL; atomic_set(&pd->usecnt, 0); } @@ -76,8 +80,9 @@ struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) ah = pd->device->create_ah(pd, ah_attr); if (!IS_ERR(ah)) { - ah->device = pd->device; - ah->pd = pd; + ah->device = pd->device; + ah->pd = pd; + ah->uobject = NULL; atomic_inc(&pd->usecnt); } @@ -85,6 +90,40 @@ struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) } EXPORT_SYMBOL(ib_create_ah); +struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, struct ib_wc *wc, + struct ib_grh *grh, u8 port_num) +{ + struct ib_ah_attr ah_attr; + u32 flow_class; + u16 gid_index; + int ret; + + memset(&ah_attr, 0, sizeof ah_attr); + ah_attr.dlid = wc->slid; + ah_attr.sl = wc->sl; + ah_attr.src_path_bits = wc->dlid_path_bits; + ah_attr.port_num = port_num; + + if (wc->wc_flags & IB_WC_GRH) { + ah_attr.ah_flags = IB_AH_GRH; + ah_attr.grh.dgid = grh->dgid; + + ret = ib_find_cached_gid(pd->device, &grh->sgid, &port_num, + &gid_index); + if (ret) + return ERR_PTR(ret); + + ah_attr.grh.sgid_index = (u8) gid_index; + flow_class = be32_to_cpu(grh->version_tclass_flow); + ah_attr.grh.flow_label = flow_class & 0xFFFFF; + ah_attr.grh.traffic_class = (flow_class >> 20) & 0xFF; + ah_attr.grh.hop_limit = grh->hop_limit; + } + + return ib_create_ah(pd, &ah_attr); +} +EXPORT_SYMBOL(ib_create_ah_from_wc); + int ib_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr) { return ah->device->modify_ah ? @@ -115,6 +154,66 @@ int ib_destroy_ah(struct ib_ah *ah) } EXPORT_SYMBOL(ib_destroy_ah); +/* Shared receive queues */ + +struct ib_srq *ib_create_srq(struct ib_pd *pd, + struct ib_srq_init_attr *srq_init_attr) +{ + struct ib_srq *srq; + + if (!pd->device->create_srq) + return ERR_PTR(-ENOSYS); + + srq = pd->device->create_srq(pd, srq_init_attr, NULL); + + if (!IS_ERR(srq)) { + srq->device = pd->device; + srq->pd = pd; + srq->uobject = NULL; + srq->event_handler = srq_init_attr->event_handler; + srq->srq_context = srq_init_attr->srq_context; + atomic_inc(&pd->usecnt); + atomic_set(&srq->usecnt, 0); + } + + return srq; +} +EXPORT_SYMBOL(ib_create_srq); + +int ib_modify_srq(struct ib_srq *srq, + struct ib_srq_attr *srq_attr, + enum ib_srq_attr_mask srq_attr_mask) +{ + return srq->device->modify_srq(srq, srq_attr, srq_attr_mask); +} +EXPORT_SYMBOL(ib_modify_srq); + +int ib_query_srq(struct ib_srq *srq, + struct ib_srq_attr *srq_attr) +{ + return srq->device->query_srq ? + srq->device->query_srq(srq, srq_attr) : -ENOSYS; +} +EXPORT_SYMBOL(ib_query_srq); + +int ib_destroy_srq(struct ib_srq *srq) +{ + struct ib_pd *pd; + int ret; + + if (atomic_read(&srq->usecnt)) + return -EBUSY; + + pd = srq->pd; + + ret = srq->device->destroy_srq(srq); + if (!ret) + atomic_dec(&pd->usecnt); + + return ret; +} +EXPORT_SYMBOL(ib_destroy_srq); + /* Queue pairs */ struct ib_qp *ib_create_qp(struct ib_pd *pd, @@ -122,7 +221,7 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd, { struct ib_qp *qp; - qp = pd->device->create_qp(pd, qp_init_attr); + qp = pd->device->create_qp(pd, qp_init_attr, NULL); if (!IS_ERR(qp)) { qp->device = pd->device; @@ -130,6 +229,7 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd, qp->send_cq = qp_init_attr->send_cq; qp->recv_cq = qp_init_attr->recv_cq; qp->srq = qp_init_attr->srq; + qp->uobject = NULL; qp->event_handler = qp_init_attr->event_handler; qp->qp_context = qp_init_attr->qp_context; qp->qp_type = qp_init_attr->qp_type; @@ -197,10 +297,11 @@ struct ib_cq *ib_create_cq(struct ib_device *device, { struct ib_cq *cq; - cq = device->create_cq(device, cqe); + cq = device->create_cq(device, cqe, NULL, NULL); if (!IS_ERR(cq)) { cq->device = device; + cq->uobject = NULL; cq->comp_handler = comp_handler; cq->event_handler = event_handler; cq->cq_context = cq_context; @@ -245,8 +346,9 @@ struct ib_mr *ib_get_dma_mr(struct ib_pd *pd, int mr_access_flags) mr = pd->device->get_dma_mr(pd, mr_access_flags); if (!IS_ERR(mr)) { - mr->device = pd->device; - mr->pd = pd; + mr->device = pd->device; + mr->pd = pd; + mr->uobject = NULL; atomic_inc(&pd->usecnt); atomic_set(&mr->usecnt, 0); } @@ -267,8 +369,9 @@ struct ib_mr *ib_reg_phys_mr(struct ib_pd *pd, mr_access_flags, iova_start); if (!IS_ERR(mr)) { - mr->device = pd->device; - mr->pd = pd; + mr->device = pd->device; + mr->pd = pd; + mr->uobject = NULL; atomic_inc(&pd->usecnt); atomic_set(&mr->usecnt, 0); } @@ -344,8 +447,9 @@ struct ib_mw *ib_alloc_mw(struct ib_pd *pd) mw = pd->device->alloc_mw(pd); if (!IS_ERR(mw)) { - mw->device = pd->device; - mw->pd = pd; + mw->device = pd->device; + mw->pd = pd; + mw->uobject = NULL; atomic_inc(&pd->usecnt); } diff --git a/drivers/infiniband/hw/mthca/Makefile b/drivers/infiniband/hw/mthca/Makefile index 5dcbd43073e2..c44f7bae5424 100644 --- a/drivers/infiniband/hw/mthca/Makefile +++ b/drivers/infiniband/hw/mthca/Makefile @@ -1,5 +1,3 @@ -EXTRA_CFLAGS += -Idrivers/infiniband/include - ifdef CONFIG_INFINIBAND_MTHCA_DEBUG EXTRA_CFLAGS += -DDEBUG endif @@ -9,4 +7,4 @@ obj-$(CONFIG_INFINIBAND_MTHCA) += ib_mthca.o ib_mthca-y := mthca_main.o mthca_cmd.o mthca_profile.o mthca_reset.o \ mthca_allocator.o mthca_eq.o mthca_pd.o mthca_cq.o \ mthca_mr.o mthca_qp.o mthca_av.o mthca_mcg.o mthca_mad.o \ - mthca_provider.o mthca_memfree.o mthca_uar.o + mthca_provider.o mthca_memfree.o mthca_uar.o mthca_srq.o diff --git a/drivers/infiniband/hw/mthca/mthca_allocator.c b/drivers/infiniband/hw/mthca/mthca_allocator.c index b1db48dd91d6..9ba3211cef7c 100644 --- a/drivers/infiniband/hw/mthca/mthca_allocator.c +++ b/drivers/infiniband/hw/mthca/mthca_allocator.c @@ -177,3 +177,119 @@ void mthca_array_cleanup(struct mthca_array *array, int nent) kfree(array->page_list); } + +/* + * Handling for queue buffers -- we allocate a bunch of memory and + * register it in a memory region at HCA virtual address 0. If the + * requested size is > max_direct, we split the allocation into + * multiple pages, so we don't require too much contiguous memory. + */ + +int mthca_buf_alloc(struct mthca_dev *dev, int size, int max_direct, + union mthca_buf *buf, int *is_direct, struct mthca_pd *pd, + int hca_write, struct mthca_mr *mr) +{ + int err = -ENOMEM; + int npages, shift; + u64 *dma_list = NULL; + dma_addr_t t; + int i; + + if (size <= max_direct) { + *is_direct = 1; + npages = 1; + shift = get_order(size) + PAGE_SHIFT; + + buf->direct.buf = dma_alloc_coherent(&dev->pdev->dev, + size, &t, GFP_KERNEL); + if (!buf->direct.buf) + return -ENOMEM; + + pci_unmap_addr_set(&buf->direct, mapping, t); + + memset(buf->direct.buf, 0, size); + + while (t & ((1 << shift) - 1)) { + --shift; + npages *= 2; + } + + dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL); + if (!dma_list) + goto err_free; + + for (i = 0; i < npages; ++i) + dma_list[i] = t + i * (1 << shift); + } else { + *is_direct = 0; + npages = (size + PAGE_SIZE - 1) / PAGE_SIZE; + shift = PAGE_SHIFT; + + dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL); + if (!dma_list) + return -ENOMEM; + + buf->page_list = kmalloc(npages * sizeof *buf->page_list, + GFP_KERNEL); + if (!buf->page_list) + goto err_out; + + for (i = 0; i < npages; ++i) + buf->page_list[i].buf = NULL; + + for (i = 0; i < npages; ++i) { + buf->page_list[i].buf = + dma_alloc_coherent(&dev->pdev->dev, PAGE_SIZE, + &t, GFP_KERNEL); + if (!buf->page_list[i].buf) + goto err_free; + + dma_list[i] = t; + pci_unmap_addr_set(&buf->page_list[i], mapping, t); + + memset(buf->page_list[i].buf, 0, PAGE_SIZE); + } + } + + err = mthca_mr_alloc_phys(dev, pd->pd_num, + dma_list, shift, npages, + 0, size, + MTHCA_MPT_FLAG_LOCAL_READ | + (hca_write ? MTHCA_MPT_FLAG_LOCAL_WRITE : 0), + mr); + if (err) + goto err_free; + + kfree(dma_list); + + return 0; + +err_free: + mthca_buf_free(dev, size, buf, *is_direct, NULL); + +err_out: + kfree(dma_list); + + return err; +} + +void mthca_buf_free(struct mthca_dev *dev, int size, union mthca_buf *buf, + int is_direct, struct mthca_mr *mr) +{ + int i; + + if (mr) + mthca_free_mr(dev, mr); + + if (is_direct) + dma_free_coherent(&dev->pdev->dev, size, buf->direct.buf, + pci_unmap_addr(&buf->direct, mapping)); + else { + for (i = 0; i < (size + PAGE_SIZE - 1) / PAGE_SIZE; ++i) + dma_free_coherent(&dev->pdev->dev, PAGE_SIZE, + buf->page_list[i].buf, + pci_unmap_addr(&buf->page_list[i], + mapping)); + kfree(buf->page_list); + } +} diff --git a/drivers/infiniband/hw/mthca/mthca_av.c b/drivers/infiniband/hw/mthca/mthca_av.c index 085baf393ca4..889e85096736 100644 --- a/drivers/infiniband/hw/mthca/mthca_av.c +++ b/drivers/infiniband/hw/mthca/mthca_av.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -34,22 +35,22 @@ #include <linux/init.h> -#include <ib_verbs.h> -#include <ib_cache.h> +#include <rdma/ib_verbs.h> +#include <rdma/ib_cache.h> #include "mthca_dev.h" struct mthca_av { - u32 port_pd; - u8 reserved1; - u8 g_slid; - u16 dlid; - u8 reserved2; - u8 gid_index; - u8 msg_sr; - u8 hop_limit; - u32 sl_tclass_flowlabel; - u32 dgid[4]; + __be32 port_pd; + u8 reserved1; + u8 g_slid; + __be16 dlid; + u8 reserved2; + u8 gid_index; + u8 msg_sr; + u8 hop_limit; + __be32 sl_tclass_flowlabel; + __be32 dgid[4]; }; int mthca_create_ah(struct mthca_dev *dev, @@ -127,7 +128,7 @@ on_hca_fail: av, (unsigned long) ah->avdma); for (j = 0; j < 8; ++j) printk(KERN_DEBUG " [%2x] %08x\n", - j * 4, be32_to_cpu(((u32 *) av)[j])); + j * 4, be32_to_cpu(((__be32 *) av)[j])); } if (ah->type == MTHCA_AH_ON_HCA) { @@ -168,7 +169,7 @@ int mthca_read_ah(struct mthca_dev *dev, struct mthca_ah *ah, header->lrh.service_level = be32_to_cpu(ah->av->sl_tclass_flowlabel) >> 28; header->lrh.destination_lid = ah->av->dlid; - header->lrh.source_lid = ah->av->g_slid & 0x7f; + header->lrh.source_lid = cpu_to_be16(ah->av->g_slid & 0x7f); if (ah->av->g_slid & 0x80) { header->grh_present = 1; header->grh.traffic_class = diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c index cd9ed958d92f..cc758a2d2bc6 100644 --- a/drivers/infiniband/hw/mthca/mthca_cmd.c +++ b/drivers/infiniband/hw/mthca/mthca_cmd.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -36,7 +37,7 @@ #include <linux/pci.h> #include <linux/errno.h> #include <asm/io.h> -#include <ib_mad.h> +#include <rdma/ib_mad.h> #include "mthca_dev.h" #include "mthca_config_reg.h" @@ -108,6 +109,7 @@ enum { CMD_SW2HW_SRQ = 0x35, CMD_HW2SW_SRQ = 0x36, CMD_QUERY_SRQ = 0x37, + CMD_ARM_SRQ = 0x40, /* QP/EE commands */ CMD_RST2INIT_QPEE = 0x19, @@ -219,20 +221,20 @@ static int mthca_cmd_post(struct mthca_dev *dev, * (and some architectures such as ia64 implement memcpy_toio * in terms of writeb). */ - __raw_writel(cpu_to_be32(in_param >> 32), dev->hcr + 0 * 4); - __raw_writel(cpu_to_be32(in_param & 0xfffffffful), dev->hcr + 1 * 4); - __raw_writel(cpu_to_be32(in_modifier), dev->hcr + 2 * 4); - __raw_writel(cpu_to_be32(out_param >> 32), dev->hcr + 3 * 4); - __raw_writel(cpu_to_be32(out_param & 0xfffffffful), dev->hcr + 4 * 4); - __raw_writel(cpu_to_be32(token << 16), dev->hcr + 5 * 4); + __raw_writel((__force u32) cpu_to_be32(in_param >> 32), dev->hcr + 0 * 4); + __raw_writel((__force u32) cpu_to_be32(in_param & 0xfffffffful), dev->hcr + 1 * 4); + __raw_writel((__force u32) cpu_to_be32(in_modifier), dev->hcr + 2 * 4); + __raw_writel((__force u32) cpu_to_be32(out_param >> 32), dev->hcr + 3 * 4); + __raw_writel((__force u32) cpu_to_be32(out_param & 0xfffffffful), dev->hcr + 4 * 4); + __raw_writel((__force u32) cpu_to_be32(token << 16), dev->hcr + 5 * 4); /* __raw_writel may not order writes. */ wmb(); - __raw_writel(cpu_to_be32((1 << HCR_GO_BIT) | - (event ? (1 << HCA_E_BIT) : 0) | - (op_modifier << HCR_OPMOD_SHIFT) | - op), dev->hcr + 6 * 4); + __raw_writel((__force u32) cpu_to_be32((1 << HCR_GO_BIT) | + (event ? (1 << HCA_E_BIT) : 0) | + (op_modifier << HCR_OPMOD_SHIFT) | + op), dev->hcr + 6 * 4); out: up(&dev->cmd.hcr_sem); @@ -273,12 +275,14 @@ static int mthca_cmd_poll(struct mthca_dev *dev, goto out; } - if (out_is_imm) { - memcpy_fromio(out_param, dev->hcr + HCR_OUT_PARAM_OFFSET, sizeof (u64)); - be64_to_cpus(out_param); - } + if (out_is_imm) + *out_param = + (u64) be32_to_cpu((__force __be32) + __raw_readl(dev->hcr + HCR_OUT_PARAM_OFFSET)) << 32 | + (u64) be32_to_cpu((__force __be32) + __raw_readl(dev->hcr + HCR_OUT_PARAM_OFFSET + 4)); - *status = be32_to_cpu(__raw_readl(dev->hcr + HCR_STATUS_OFFSET)) >> 24; + *status = be32_to_cpu((__force __be32) __raw_readl(dev->hcr + HCR_STATUS_OFFSET)) >> 24; out: up(&dev->cmd.poll_sem); @@ -431,6 +435,36 @@ static int mthca_cmd_imm(struct mthca_dev *dev, timeout, status); } +int mthca_cmd_init(struct mthca_dev *dev) +{ + sema_init(&dev->cmd.hcr_sem, 1); + sema_init(&dev->cmd.poll_sem, 1); + dev->cmd.use_events = 0; + + dev->hcr = ioremap(pci_resource_start(dev->pdev, 0) + MTHCA_HCR_BASE, + MTHCA_HCR_SIZE); + if (!dev->hcr) { + mthca_err(dev, "Couldn't map command register."); + return -ENOMEM; + } + + dev->cmd.pool = pci_pool_create("mthca_cmd", dev->pdev, + MTHCA_MAILBOX_SIZE, + MTHCA_MAILBOX_SIZE, 0); + if (!dev->cmd.pool) { + iounmap(dev->hcr); + return -ENOMEM; + } + + return 0; +} + +void mthca_cmd_cleanup(struct mthca_dev *dev) +{ + pci_pool_destroy(dev->cmd.pool); + iounmap(dev->hcr); +} + /* * Switch to using events to issue FW commands (should be called after * event queue to command events has been initialized). @@ -489,6 +523,33 @@ void mthca_cmd_use_polling(struct mthca_dev *dev) up(&dev->cmd.poll_sem); } +struct mthca_mailbox *mthca_alloc_mailbox(struct mthca_dev *dev, + unsigned int gfp_mask) +{ + struct mthca_mailbox *mailbox; + + mailbox = kmalloc(sizeof *mailbox, gfp_mask); + if (!mailbox) + return ERR_PTR(-ENOMEM); + + mailbox->buf = pci_pool_alloc(dev->cmd.pool, gfp_mask, &mailbox->dma); + if (!mailbox->buf) { + kfree(mailbox); + return ERR_PTR(-ENOMEM); + } + + return mailbox; +} + +void mthca_free_mailbox(struct mthca_dev *dev, struct mthca_mailbox *mailbox) +{ + if (!mailbox) + return; + + pci_pool_free(dev->cmd.pool, mailbox->buf, mailbox->dma); + kfree(mailbox); +} + int mthca_SYS_EN(struct mthca_dev *dev, u8 *status) { u64 out; @@ -513,20 +574,20 @@ int mthca_SYS_DIS(struct mthca_dev *dev, u8 *status) static int mthca_map_cmd(struct mthca_dev *dev, u16 op, struct mthca_icm *icm, u64 virt, u8 *status) { - u32 *inbox; - dma_addr_t indma; + struct mthca_mailbox *mailbox; struct mthca_icm_iter iter; + __be64 *pages; int lg; int nent = 0; int i; int err = 0; int ts = 0, tc = 0; - inbox = pci_alloc_consistent(dev->pdev, PAGE_SIZE, &indma); - if (!inbox) - return -ENOMEM; - - memset(inbox, 0, PAGE_SIZE); + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + memset(mailbox->buf, 0, MTHCA_MAILBOX_SIZE); + pages = mailbox->buf; for (mthca_icm_first(icm, &iter); !mthca_icm_last(&iter); @@ -546,19 +607,17 @@ static int mthca_map_cmd(struct mthca_dev *dev, u16 op, struct mthca_icm *icm, } for (i = 0; i < mthca_icm_size(&iter) / (1 << lg); ++i, ++nent) { if (virt != -1) { - *((__be64 *) (inbox + nent * 4)) = - cpu_to_be64(virt); + pages[nent * 2] = cpu_to_be64(virt); virt += 1 << lg; } - *((__be64 *) (inbox + nent * 4 + 2)) = - cpu_to_be64((mthca_icm_addr(&iter) + - (i << lg)) | (lg - 12)); + pages[nent * 2 + 1] = cpu_to_be64((mthca_icm_addr(&iter) + + (i << lg)) | (lg - 12)); ts += 1 << (lg - 10); ++tc; - if (nent == PAGE_SIZE / 16) { - err = mthca_cmd(dev, indma, nent, 0, op, + if (nent == MTHCA_MAILBOX_SIZE / 16) { + err = mthca_cmd(dev, mailbox->dma, nent, 0, op, CMD_TIME_CLASS_B, status); if (err || *status) goto out; @@ -568,7 +627,7 @@ static int mthca_map_cmd(struct mthca_dev *dev, u16 op, struct mthca_icm *icm, } if (nent) - err = mthca_cmd(dev, indma, nent, 0, op, + err = mthca_cmd(dev, mailbox->dma, nent, 0, op, CMD_TIME_CLASS_B, status); switch (op) { @@ -585,7 +644,7 @@ static int mthca_map_cmd(struct mthca_dev *dev, u16 op, struct mthca_icm *icm, } out: - pci_free_consistent(dev->pdev, PAGE_SIZE, inbox, indma); + mthca_free_mailbox(dev, mailbox); return err; } @@ -606,8 +665,8 @@ int mthca_RUN_FW(struct mthca_dev *dev, u8 *status) int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status) { + struct mthca_mailbox *mailbox; u32 *outbox; - dma_addr_t outdma; int err = 0; u8 lg; @@ -625,12 +684,12 @@ int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status) #define QUERY_FW_EQ_ARM_BASE_OFFSET 0x40 #define QUERY_FW_EQ_SET_CI_BASE_OFFSET 0x48 - outbox = pci_alloc_consistent(dev->pdev, QUERY_FW_OUT_SIZE, &outdma); - if (!outbox) { - return -ENOMEM; - } + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + outbox = mailbox->buf; - err = mthca_cmd_box(dev, 0, outdma, 0, 0, CMD_QUERY_FW, + err = mthca_cmd_box(dev, 0, mailbox->dma, 0, 0, CMD_QUERY_FW, CMD_TIME_CLASS_A, status); if (err) @@ -681,15 +740,15 @@ int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status) } out: - pci_free_consistent(dev->pdev, QUERY_FW_OUT_SIZE, outbox, outdma); + mthca_free_mailbox(dev, mailbox); return err; } int mthca_ENABLE_LAM(struct mthca_dev *dev, u8 *status) { + struct mthca_mailbox *mailbox; u8 info; u32 *outbox; - dma_addr_t outdma; int err = 0; #define ENABLE_LAM_OUT_SIZE 0x100 @@ -700,11 +759,12 @@ int mthca_ENABLE_LAM(struct mthca_dev *dev, u8 *status) #define ENABLE_LAM_INFO_HIDDEN_FLAG (1 << 4) #define ENABLE_LAM_INFO_ECC_MASK 0x3 - outbox = pci_alloc_consistent(dev->pdev, ENABLE_LAM_OUT_SIZE, &outdma); - if (!outbox) - return -ENOMEM; + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + outbox = mailbox->buf; - err = mthca_cmd_box(dev, 0, outdma, 0, 0, CMD_ENABLE_LAM, + err = mthca_cmd_box(dev, 0, mailbox->dma, 0, 0, CMD_ENABLE_LAM, CMD_TIME_CLASS_C, status); if (err) @@ -733,7 +793,7 @@ int mthca_ENABLE_LAM(struct mthca_dev *dev, u8 *status) (unsigned long long) dev->ddr_end); out: - pci_free_consistent(dev->pdev, ENABLE_LAM_OUT_SIZE, outbox, outdma); + mthca_free_mailbox(dev, mailbox); return err; } @@ -744,9 +804,9 @@ int mthca_DISABLE_LAM(struct mthca_dev *dev, u8 *status) int mthca_QUERY_DDR(struct mthca_dev *dev, u8 *status) { + struct mthca_mailbox *mailbox; u8 info; u32 *outbox; - dma_addr_t outdma; int err = 0; #define QUERY_DDR_OUT_SIZE 0x100 @@ -757,11 +817,12 @@ int mthca_QUERY_DDR(struct mthca_dev *dev, u8 *status) #define QUERY_DDR_INFO_HIDDEN_FLAG (1 << 4) #define QUERY_DDR_INFO_ECC_MASK 0x3 - outbox = pci_alloc_consistent(dev->pdev, QUERY_DDR_OUT_SIZE, &outdma); - if (!outbox) - return -ENOMEM; + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + outbox = mailbox->buf; - err = mthca_cmd_box(dev, 0, outdma, 0, 0, CMD_QUERY_DDR, + err = mthca_cmd_box(dev, 0, mailbox->dma, 0, 0, CMD_QUERY_DDR, CMD_TIME_CLASS_A, status); if (err) @@ -787,15 +848,15 @@ int mthca_QUERY_DDR(struct mthca_dev *dev, u8 *status) (unsigned long long) dev->ddr_end); out: - pci_free_consistent(dev->pdev, QUERY_DDR_OUT_SIZE, outbox, outdma); + mthca_free_mailbox(dev, mailbox); return err; } int mthca_QUERY_DEV_LIM(struct mthca_dev *dev, struct mthca_dev_lim *dev_lim, u8 *status) { + struct mthca_mailbox *mailbox; u32 *outbox; - dma_addr_t outdma; u8 field; u16 size; int err; @@ -860,11 +921,12 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev, #define QUERY_DEV_LIM_LAMR_OFFSET 0x9f #define QUERY_DEV_LIM_MAX_ICM_SZ_OFFSET 0xa0 - outbox = pci_alloc_consistent(dev->pdev, QUERY_DEV_LIM_OUT_SIZE, &outdma); - if (!outbox) - return -ENOMEM; + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + outbox = mailbox->buf; - err = mthca_cmd_box(dev, 0, outdma, 0, 0, CMD_QUERY_DEV_LIM, + err = mthca_cmd_box(dev, 0, mailbox->dma, 0, 0, CMD_QUERY_DEV_LIM, CMD_TIME_CLASS_A, status); if (err) @@ -971,6 +1033,8 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev, mthca_dbg(dev, "Max QPs: %d, reserved QPs: %d, entry size: %d\n", dev_lim->max_qps, dev_lim->reserved_qps, dev_lim->qpc_entry_sz); + mthca_dbg(dev, "Max SRQs: %d, reserved SRQs: %d, entry size: %d\n", + dev_lim->max_srqs, dev_lim->reserved_srqs, dev_lim->srq_entry_sz); mthca_dbg(dev, "Max CQs: %d, reserved CQs: %d, entry size: %d\n", dev_lim->max_cqs, dev_lim->reserved_cqs, dev_lim->cqc_entry_sz); mthca_dbg(dev, "Max EQs: %d, reserved EQs: %d, entry size: %d\n", @@ -1020,15 +1084,43 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev, } out: - pci_free_consistent(dev->pdev, QUERY_DEV_LIM_OUT_SIZE, outbox, outdma); + mthca_free_mailbox(dev, mailbox); return err; } +static void get_board_id(void *vsd, char *board_id) +{ + int i; + +#define VSD_OFFSET_SIG1 0x00 +#define VSD_OFFSET_SIG2 0xde +#define VSD_OFFSET_MLX_BOARD_ID 0xd0 +#define VSD_OFFSET_TS_BOARD_ID 0x20 + +#define VSD_SIGNATURE_TOPSPIN 0x5ad + + memset(board_id, 0, MTHCA_BOARD_ID_LEN); + + if (be16_to_cpup(vsd + VSD_OFFSET_SIG1) == VSD_SIGNATURE_TOPSPIN && + be16_to_cpup(vsd + VSD_OFFSET_SIG2) == VSD_SIGNATURE_TOPSPIN) { + strlcpy(board_id, vsd + VSD_OFFSET_TS_BOARD_ID, MTHCA_BOARD_ID_LEN); + } else { + /* + * The board ID is a string but the firmware byte + * swaps each 4-byte word before passing it back to + * us. Therefore we need to swab it before printing. + */ + for (i = 0; i < 4; ++i) + ((u32 *) board_id)[i] = + swab32(*(u32 *) (vsd + VSD_OFFSET_MLX_BOARD_ID + i * 4)); + } +} + int mthca_QUERY_ADAPTER(struct mthca_dev *dev, struct mthca_adapter *adapter, u8 *status) { + struct mthca_mailbox *mailbox; u32 *outbox; - dma_addr_t outdma; int err; #define QUERY_ADAPTER_OUT_SIZE 0x100 @@ -1036,24 +1128,29 @@ int mthca_QUERY_ADAPTER(struct mthca_dev *dev, #define QUERY_ADAPTER_DEVICE_ID_OFFSET 0x04 #define QUERY_ADAPTER_REVISION_ID_OFFSET 0x08 #define QUERY_ADAPTER_INTA_PIN_OFFSET 0x10 +#define QUERY_ADAPTER_VSD_OFFSET 0x20 - outbox = pci_alloc_consistent(dev->pdev, QUERY_ADAPTER_OUT_SIZE, &outdma); - if (!outbox) - return -ENOMEM; + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + outbox = mailbox->buf; - err = mthca_cmd_box(dev, 0, outdma, 0, 0, CMD_QUERY_ADAPTER, + err = mthca_cmd_box(dev, 0, mailbox->dma, 0, 0, CMD_QUERY_ADAPTER, CMD_TIME_CLASS_A, status); if (err) goto out; - MTHCA_GET(adapter->vendor_id, outbox, QUERY_ADAPTER_VENDOR_ID_OFFSET); - MTHCA_GET(adapter->device_id, outbox, QUERY_ADAPTER_DEVICE_ID_OFFSET); + MTHCA_GET(adapter->vendor_id, outbox, QUERY_ADAPTER_VENDOR_ID_OFFSET); + MTHCA_GET(adapter->device_id, outbox, QUERY_ADAPTER_DEVICE_ID_OFFSET); MTHCA_GET(adapter->revision_id, outbox, QUERY_ADAPTER_REVISION_ID_OFFSET); - MTHCA_GET(adapter->inta_pin, outbox, QUERY_ADAPTER_INTA_PIN_OFFSET); + MTHCA_GET(adapter->inta_pin, outbox, QUERY_ADAPTER_INTA_PIN_OFFSET); + + get_board_id(outbox + QUERY_ADAPTER_VSD_OFFSET / 4, + adapter->board_id); out: - pci_free_consistent(dev->pdev, QUERY_DEV_LIM_OUT_SIZE, outbox, outdma); + mthca_free_mailbox(dev, mailbox); return err; } @@ -1061,8 +1158,8 @@ int mthca_INIT_HCA(struct mthca_dev *dev, struct mthca_init_hca_param *param, u8 *status) { - u32 *inbox; - dma_addr_t indma; + struct mthca_mailbox *mailbox; + __be32 *inbox; int err; #define INIT_HCA_IN_SIZE 0x200 @@ -1102,9 +1199,10 @@ int mthca_INIT_HCA(struct mthca_dev *dev, #define INIT_HCA_UAR_SCATCH_BASE_OFFSET (INIT_HCA_UAR_OFFSET + 0x10) #define INIT_HCA_UAR_CTX_BASE_OFFSET (INIT_HCA_UAR_OFFSET + 0x18) - inbox = pci_alloc_consistent(dev->pdev, INIT_HCA_IN_SIZE, &indma); - if (!inbox) - return -ENOMEM; + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + inbox = mailbox->buf; memset(inbox, 0, INIT_HCA_IN_SIZE); @@ -1167,10 +1265,9 @@ int mthca_INIT_HCA(struct mthca_dev *dev, MTHCA_PUT(inbox, param->uarc_base, INIT_HCA_UAR_CTX_BASE_OFFSET); } - err = mthca_cmd(dev, indma, 0, 0, CMD_INIT_HCA, - HZ, status); + err = mthca_cmd(dev, mailbox->dma, 0, 0, CMD_INIT_HCA, HZ, status); - pci_free_consistent(dev->pdev, INIT_HCA_IN_SIZE, inbox, indma); + mthca_free_mailbox(dev, mailbox); return err; } @@ -1178,8 +1275,8 @@ int mthca_INIT_IB(struct mthca_dev *dev, struct mthca_init_ib_param *param, int port, u8 *status) { + struct mthca_mailbox *mailbox; u32 *inbox; - dma_addr_t indma; int err; u32 flags; @@ -1188,10 +1285,8 @@ int mthca_INIT_IB(struct mthca_dev *dev, #define INIT_IB_FLAG_SIG (1 << 18) #define INIT_IB_FLAG_NG (1 << 17) #define INIT_IB_FLAG_G0 (1 << 16) -#define INIT_IB_FLAG_1X (1 << 8) -#define INIT_IB_FLAG_4X (1 << 9) -#define INIT_IB_FLAG_12X (1 << 11) #define INIT_IB_VL_SHIFT 4 +#define INIT_IB_PORT_WIDTH_SHIFT 8 #define INIT_IB_MTU_SHIFT 12 #define INIT_IB_MAX_GID_OFFSET 0x06 #define INIT_IB_MAX_PKEY_OFFSET 0x0a @@ -1199,19 +1294,19 @@ int mthca_INIT_IB(struct mthca_dev *dev, #define INIT_IB_NODE_GUID_OFFSET 0x18 #define INIT_IB_SI_GUID_OFFSET 0x20 - inbox = pci_alloc_consistent(dev->pdev, INIT_IB_IN_SIZE, &indma); - if (!inbox) - return -ENOMEM; + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + inbox = mailbox->buf; memset(inbox, 0, INIT_IB_IN_SIZE); flags = 0; - flags |= param->enable_1x ? INIT_IB_FLAG_1X : 0; - flags |= param->enable_4x ? INIT_IB_FLAG_4X : 0; flags |= param->set_guid0 ? INIT_IB_FLAG_G0 : 0; flags |= param->set_node_guid ? INIT_IB_FLAG_NG : 0; flags |= param->set_si_guid ? INIT_IB_FLAG_SIG : 0; flags |= param->vl_cap << INIT_IB_VL_SHIFT; + flags |= param->port_width << INIT_IB_PORT_WIDTH_SHIFT; flags |= param->mtu_cap << INIT_IB_MTU_SHIFT; MTHCA_PUT(inbox, flags, INIT_IB_FLAGS_OFFSET); @@ -1221,10 +1316,10 @@ int mthca_INIT_IB(struct mthca_dev *dev, MTHCA_PUT(inbox, param->node_guid, INIT_IB_NODE_GUID_OFFSET); MTHCA_PUT(inbox, param->si_guid, INIT_IB_SI_GUID_OFFSET); - err = mthca_cmd(dev, indma, port, 0, CMD_INIT_IB, + err = mthca_cmd(dev, mailbox->dma, port, 0, CMD_INIT_IB, CMD_TIME_CLASS_A, status); - pci_free_consistent(dev->pdev, INIT_HCA_IN_SIZE, inbox, indma); + mthca_free_mailbox(dev, mailbox); return err; } @@ -1241,8 +1336,8 @@ int mthca_CLOSE_HCA(struct mthca_dev *dev, int panic, u8 *status) int mthca_SET_IB(struct mthca_dev *dev, struct mthca_set_ib_param *param, int port, u8 *status) { + struct mthca_mailbox *mailbox; u32 *inbox; - dma_addr_t indma; int err; u32 flags = 0; @@ -1253,9 +1348,10 @@ int mthca_SET_IB(struct mthca_dev *dev, struct mthca_set_ib_param *param, #define SET_IB_CAP_MASK_OFFSET 0x04 #define SET_IB_SI_GUID_OFFSET 0x08 - inbox = pci_alloc_consistent(dev->pdev, SET_IB_IN_SIZE, &indma); - if (!inbox) - return -ENOMEM; + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + inbox = mailbox->buf; memset(inbox, 0, SET_IB_IN_SIZE); @@ -1266,10 +1362,10 @@ int mthca_SET_IB(struct mthca_dev *dev, struct mthca_set_ib_param *param, MTHCA_PUT(inbox, param->cap_mask, SET_IB_CAP_MASK_OFFSET); MTHCA_PUT(inbox, param->si_guid, SET_IB_SI_GUID_OFFSET); - err = mthca_cmd(dev, indma, port, 0, CMD_SET_IB, + err = mthca_cmd(dev, mailbox->dma, port, 0, CMD_SET_IB, CMD_TIME_CLASS_B, status); - pci_free_consistent(dev->pdev, INIT_HCA_IN_SIZE, inbox, indma); + mthca_free_mailbox(dev, mailbox); return err; } @@ -1280,20 +1376,22 @@ int mthca_MAP_ICM(struct mthca_dev *dev, struct mthca_icm *icm, u64 virt, u8 *st int mthca_MAP_ICM_page(struct mthca_dev *dev, u64 dma_addr, u64 virt, u8 *status) { - u64 *inbox; - dma_addr_t indma; + struct mthca_mailbox *mailbox; + __be64 *inbox; int err; - inbox = pci_alloc_consistent(dev->pdev, 16, &indma); - if (!inbox) - return -ENOMEM; + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + inbox = mailbox->buf; inbox[0] = cpu_to_be64(virt); inbox[1] = cpu_to_be64(dma_addr); - err = mthca_cmd(dev, indma, 1, 0, CMD_MAP_ICM, CMD_TIME_CLASS_B, status); + err = mthca_cmd(dev, mailbox->dma, 1, 0, CMD_MAP_ICM, + CMD_TIME_CLASS_B, status); - pci_free_consistent(dev->pdev, 16, inbox, indma); + mthca_free_mailbox(dev, mailbox); if (!err) mthca_dbg(dev, "Mapped page at %llx to %llx for ICM.\n", @@ -1338,69 +1436,26 @@ int mthca_SET_ICM_SIZE(struct mthca_dev *dev, u64 icm_size, u64 *aux_pages, return 0; } -int mthca_SW2HW_MPT(struct mthca_dev *dev, void *mpt_entry, +int mthca_SW2HW_MPT(struct mthca_dev *dev, struct mthca_mailbox *mailbox, int mpt_index, u8 *status) { - dma_addr_t indma; - int err; - - indma = pci_map_single(dev->pdev, mpt_entry, - MTHCA_MPT_ENTRY_SIZE, - PCI_DMA_TODEVICE); - if (pci_dma_mapping_error(indma)) - return -ENOMEM; - - err = mthca_cmd(dev, indma, mpt_index, 0, CMD_SW2HW_MPT, - CMD_TIME_CLASS_B, status); - - pci_unmap_single(dev->pdev, indma, - MTHCA_MPT_ENTRY_SIZE, PCI_DMA_TODEVICE); - return err; + return mthca_cmd(dev, mailbox->dma, mpt_index, 0, CMD_SW2HW_MPT, + CMD_TIME_CLASS_B, status); } -int mthca_HW2SW_MPT(struct mthca_dev *dev, void *mpt_entry, +int mthca_HW2SW_MPT(struct mthca_dev *dev, struct mthca_mailbox *mailbox, int mpt_index, u8 *status) { - dma_addr_t outdma = 0; - int err; - - if (mpt_entry) { - outdma = pci_map_single(dev->pdev, mpt_entry, - MTHCA_MPT_ENTRY_SIZE, - PCI_DMA_FROMDEVICE); - if (pci_dma_mapping_error(outdma)) - return -ENOMEM; - } - - err = mthca_cmd_box(dev, 0, outdma, mpt_index, !mpt_entry, - CMD_HW2SW_MPT, - CMD_TIME_CLASS_B, status); - - if (mpt_entry) - pci_unmap_single(dev->pdev, outdma, - MTHCA_MPT_ENTRY_SIZE, - PCI_DMA_FROMDEVICE); - return err; + return mthca_cmd_box(dev, 0, mailbox ? mailbox->dma : 0, mpt_index, + !mailbox, CMD_HW2SW_MPT, + CMD_TIME_CLASS_B, status); } -int mthca_WRITE_MTT(struct mthca_dev *dev, u64 *mtt_entry, +int mthca_WRITE_MTT(struct mthca_dev *dev, struct mthca_mailbox *mailbox, int num_mtt, u8 *status) { - dma_addr_t indma; - int err; - - indma = pci_map_single(dev->pdev, mtt_entry, - (num_mtt + 2) * 8, - PCI_DMA_TODEVICE); - if (pci_dma_mapping_error(indma)) - return -ENOMEM; - - err = mthca_cmd(dev, indma, num_mtt, 0, CMD_WRITE_MTT, - CMD_TIME_CLASS_B, status); - - pci_unmap_single(dev->pdev, indma, - (num_mtt + 2) * 8, PCI_DMA_TODEVICE); - return err; + return mthca_cmd(dev, mailbox->dma, num_mtt, 0, CMD_WRITE_MTT, + CMD_TIME_CLASS_B, status); } int mthca_SYNC_TPT(struct mthca_dev *dev, u8 *status) @@ -1418,92 +1473,59 @@ int mthca_MAP_EQ(struct mthca_dev *dev, u64 event_mask, int unmap, 0, CMD_MAP_EQ, CMD_TIME_CLASS_B, status); } -int mthca_SW2HW_EQ(struct mthca_dev *dev, void *eq_context, +int mthca_SW2HW_EQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, int eq_num, u8 *status) { - dma_addr_t indma; - int err; - - indma = pci_map_single(dev->pdev, eq_context, - MTHCA_EQ_CONTEXT_SIZE, - PCI_DMA_TODEVICE); - if (pci_dma_mapping_error(indma)) - return -ENOMEM; - - err = mthca_cmd(dev, indma, eq_num, 0, CMD_SW2HW_EQ, - CMD_TIME_CLASS_A, status); - - pci_unmap_single(dev->pdev, indma, - MTHCA_EQ_CONTEXT_SIZE, PCI_DMA_TODEVICE); - return err; + return mthca_cmd(dev, mailbox->dma, eq_num, 0, CMD_SW2HW_EQ, + CMD_TIME_CLASS_A, status); } -int mthca_HW2SW_EQ(struct mthca_dev *dev, void *eq_context, +int mthca_HW2SW_EQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, int eq_num, u8 *status) { - dma_addr_t outdma = 0; - int err; - - outdma = pci_map_single(dev->pdev, eq_context, - MTHCA_EQ_CONTEXT_SIZE, - PCI_DMA_FROMDEVICE); - if (pci_dma_mapping_error(outdma)) - return -ENOMEM; - - err = mthca_cmd_box(dev, 0, outdma, eq_num, 0, - CMD_HW2SW_EQ, - CMD_TIME_CLASS_A, status); - - pci_unmap_single(dev->pdev, outdma, - MTHCA_EQ_CONTEXT_SIZE, - PCI_DMA_FROMDEVICE); - return err; + return mthca_cmd_box(dev, 0, mailbox->dma, eq_num, 0, + CMD_HW2SW_EQ, + CMD_TIME_CLASS_A, status); } -int mthca_SW2HW_CQ(struct mthca_dev *dev, void *cq_context, +int mthca_SW2HW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, int cq_num, u8 *status) { - dma_addr_t indma; - int err; - - indma = pci_map_single(dev->pdev, cq_context, - MTHCA_CQ_CONTEXT_SIZE, - PCI_DMA_TODEVICE); - if (pci_dma_mapping_error(indma)) - return -ENOMEM; - - err = mthca_cmd(dev, indma, cq_num, 0, CMD_SW2HW_CQ, + return mthca_cmd(dev, mailbox->dma, cq_num, 0, CMD_SW2HW_CQ, CMD_TIME_CLASS_A, status); - - pci_unmap_single(dev->pdev, indma, - MTHCA_CQ_CONTEXT_SIZE, PCI_DMA_TODEVICE); - return err; } -int mthca_HW2SW_CQ(struct mthca_dev *dev, void *cq_context, +int mthca_HW2SW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, int cq_num, u8 *status) { - dma_addr_t outdma = 0; - int err; + return mthca_cmd_box(dev, 0, mailbox->dma, cq_num, 0, + CMD_HW2SW_CQ, + CMD_TIME_CLASS_A, status); +} - outdma = pci_map_single(dev->pdev, cq_context, - MTHCA_CQ_CONTEXT_SIZE, - PCI_DMA_FROMDEVICE); - if (pci_dma_mapping_error(outdma)) - return -ENOMEM; +int mthca_SW2HW_SRQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, + int srq_num, u8 *status) +{ + return mthca_cmd(dev, mailbox->dma, srq_num, 0, CMD_SW2HW_SRQ, + CMD_TIME_CLASS_A, status); +} - err = mthca_cmd_box(dev, 0, outdma, cq_num, 0, - CMD_HW2SW_CQ, - CMD_TIME_CLASS_A, status); +int mthca_HW2SW_SRQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, + int srq_num, u8 *status) +{ + return mthca_cmd_box(dev, 0, mailbox->dma, srq_num, 0, + CMD_HW2SW_SRQ, + CMD_TIME_CLASS_A, status); +} - pci_unmap_single(dev->pdev, outdma, - MTHCA_CQ_CONTEXT_SIZE, - PCI_DMA_FROMDEVICE); - return err; +int mthca_ARM_SRQ(struct mthca_dev *dev, int srq_num, int limit, u8 *status) +{ + return mthca_cmd(dev, limit, srq_num, 0, CMD_ARM_SRQ, + CMD_TIME_CLASS_B, status); } int mthca_MODIFY_QP(struct mthca_dev *dev, int trans, u32 num, - int is_ee, void *qp_context, u32 optmask, + int is_ee, struct mthca_mailbox *mailbox, u32 optmask, u8 *status) { static const u16 op[] = { @@ -1520,36 +1542,34 @@ int mthca_MODIFY_QP(struct mthca_dev *dev, int trans, u32 num, [MTHCA_TRANS_ANY2RST] = CMD_ERR2RST_QPEE }; u8 op_mod = 0; - - dma_addr_t indma; + int my_mailbox = 0; int err; if (trans < 0 || trans >= ARRAY_SIZE(op)) return -EINVAL; if (trans == MTHCA_TRANS_ANY2RST) { - indma = 0; op_mod = 3; /* don't write outbox, any->reset */ /* For debugging */ - qp_context = pci_alloc_consistent(dev->pdev, MTHCA_QP_CONTEXT_SIZE, - &indma); - op_mod = 2; /* write outbox, any->reset */ + if (!mailbox) { + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (!IS_ERR(mailbox)) { + my_mailbox = 1; + op_mod = 2; /* write outbox, any->reset */ + } else + mailbox = NULL; + } } else { - indma = pci_map_single(dev->pdev, qp_context, - MTHCA_QP_CONTEXT_SIZE, - PCI_DMA_TODEVICE); - if (pci_dma_mapping_error(indma)) - return -ENOMEM; - if (0) { int i; mthca_dbg(dev, "Dumping QP context:\n"); - printk(" opt param mask: %08x\n", be32_to_cpup(qp_context)); + printk(" opt param mask: %08x\n", be32_to_cpup(mailbox->buf)); for (i = 0; i < 0x100 / 4; ++i) { if (i % 8 == 0) printk(" [%02x] ", i * 4); - printk(" %08x", be32_to_cpu(((u32 *) qp_context)[i + 2])); + printk(" %08x", + be32_to_cpu(((__be32 *) mailbox->buf)[i + 2])); if ((i + 1) % 8 == 0) printk("\n"); } @@ -1557,55 +1577,39 @@ int mthca_MODIFY_QP(struct mthca_dev *dev, int trans, u32 num, } if (trans == MTHCA_TRANS_ANY2RST) { - err = mthca_cmd_box(dev, 0, indma, (!!is_ee << 24) | num, - op_mod, op[trans], CMD_TIME_CLASS_C, status); + err = mthca_cmd_box(dev, 0, mailbox ? mailbox->dma : 0, + (!!is_ee << 24) | num, op_mod, + op[trans], CMD_TIME_CLASS_C, status); - if (0) { + if (0 && mailbox) { int i; mthca_dbg(dev, "Dumping QP context:\n"); - printk(" %08x\n", be32_to_cpup(qp_context)); + printk(" %08x\n", be32_to_cpup(mailbox->buf)); for (i = 0; i < 0x100 / 4; ++i) { if (i % 8 == 0) printk("[%02x] ", i * 4); - printk(" %08x", be32_to_cpu(((u32 *) qp_context)[i + 2])); + printk(" %08x", + be32_to_cpu(((__be32 *) mailbox->buf)[i + 2])); if ((i + 1) % 8 == 0) printk("\n"); } } } else - err = mthca_cmd(dev, indma, (!!is_ee << 24) | num, + err = mthca_cmd(dev, mailbox->dma, (!!is_ee << 24) | num, op_mod, op[trans], CMD_TIME_CLASS_C, status); - if (trans != MTHCA_TRANS_ANY2RST) - pci_unmap_single(dev->pdev, indma, - MTHCA_QP_CONTEXT_SIZE, PCI_DMA_TODEVICE); - else - pci_free_consistent(dev->pdev, MTHCA_QP_CONTEXT_SIZE, - qp_context, indma); + if (my_mailbox) + mthca_free_mailbox(dev, mailbox); + return err; } int mthca_QUERY_QP(struct mthca_dev *dev, u32 num, int is_ee, - void *qp_context, u8 *status) + struct mthca_mailbox *mailbox, u8 *status) { - dma_addr_t outdma = 0; - int err; - - outdma = pci_map_single(dev->pdev, qp_context, - MTHCA_QP_CONTEXT_SIZE, - PCI_DMA_FROMDEVICE); - if (pci_dma_mapping_error(outdma)) - return -ENOMEM; - - err = mthca_cmd_box(dev, 0, outdma, (!!is_ee << 24) | num, 0, - CMD_QUERY_QPEE, - CMD_TIME_CLASS_A, status); - - pci_unmap_single(dev->pdev, outdma, - MTHCA_QP_CONTEXT_SIZE, - PCI_DMA_FROMDEVICE); - return err; + return mthca_cmd_box(dev, 0, mailbox->dma, (!!is_ee << 24) | num, 0, + CMD_QUERY_QPEE, CMD_TIME_CLASS_A, status); } int mthca_CONF_SPECIAL_QP(struct mthca_dev *dev, int type, u32 qpn, @@ -1635,11 +1639,11 @@ int mthca_CONF_SPECIAL_QP(struct mthca_dev *dev, int type, u32 qpn, } int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey, - int port, struct ib_wc* in_wc, struct ib_grh* in_grh, + int port, struct ib_wc *in_wc, struct ib_grh *in_grh, void *in_mad, void *response_mad, u8 *status) { - void *box; - dma_addr_t dma; + struct mthca_mailbox *inmailbox, *outmailbox; + void *inbox; int err; u32 in_modifier = port; u8 op_modifier = 0; @@ -1653,11 +1657,18 @@ int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey, #define MAD_IFC_PKEY_OFFSET 0x10e #define MAD_IFC_GRH_OFFSET 0x140 - box = pci_alloc_consistent(dev->pdev, MAD_IFC_BOX_SIZE, &dma); - if (!box) - return -ENOMEM; + inmailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(inmailbox)) + return PTR_ERR(inmailbox); + inbox = inmailbox->buf; - memcpy(box, in_mad, 256); + outmailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(outmailbox)) { + mthca_free_mailbox(dev, inmailbox); + return PTR_ERR(outmailbox); + } + + memcpy(inbox, in_mad, 256); /* * Key check traps can't be generated unless we have in_wc to @@ -1671,97 +1682,65 @@ int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey, if (in_wc) { u8 val; - memset(box + 256, 0, 256); + memset(inbox + 256, 0, 256); - MTHCA_PUT(box, in_wc->qp_num, MAD_IFC_MY_QPN_OFFSET); - MTHCA_PUT(box, in_wc->src_qp, MAD_IFC_RQPN_OFFSET); + MTHCA_PUT(inbox, in_wc->qp_num, MAD_IFC_MY_QPN_OFFSET); + MTHCA_PUT(inbox, in_wc->src_qp, MAD_IFC_RQPN_OFFSET); val = in_wc->sl << 4; - MTHCA_PUT(box, val, MAD_IFC_SL_OFFSET); + MTHCA_PUT(inbox, val, MAD_IFC_SL_OFFSET); val = in_wc->dlid_path_bits | (in_wc->wc_flags & IB_WC_GRH ? 0x80 : 0); - MTHCA_PUT(box, val, MAD_IFC_GRH_OFFSET); + MTHCA_PUT(inbox, val, MAD_IFC_GRH_OFFSET); - MTHCA_PUT(box, in_wc->slid, MAD_IFC_RLID_OFFSET); - MTHCA_PUT(box, in_wc->pkey_index, MAD_IFC_PKEY_OFFSET); + MTHCA_PUT(inbox, in_wc->slid, MAD_IFC_RLID_OFFSET); + MTHCA_PUT(inbox, in_wc->pkey_index, MAD_IFC_PKEY_OFFSET); if (in_grh) - memcpy((u8 *) box + MAD_IFC_GRH_OFFSET, in_grh, 40); + memcpy(inbox + MAD_IFC_GRH_OFFSET, in_grh, 40); op_modifier |= 0x10; in_modifier |= in_wc->slid << 16; } - err = mthca_cmd_box(dev, dma, dma + 512, in_modifier, op_modifier, + err = mthca_cmd_box(dev, inmailbox->dma, outmailbox->dma, + in_modifier, op_modifier, CMD_MAD_IFC, CMD_TIME_CLASS_C, status); if (!err && !*status) - memcpy(response_mad, box + 512, 256); + memcpy(response_mad, outmailbox->buf, 256); - pci_free_consistent(dev->pdev, MAD_IFC_BOX_SIZE, box, dma); + mthca_free_mailbox(dev, inmailbox); + mthca_free_mailbox(dev, outmailbox); return err; } -int mthca_READ_MGM(struct mthca_dev *dev, int index, void *mgm, - u8 *status) +int mthca_READ_MGM(struct mthca_dev *dev, int index, + struct mthca_mailbox *mailbox, u8 *status) { - dma_addr_t outdma = 0; - int err; - - outdma = pci_map_single(dev->pdev, mgm, - MTHCA_MGM_ENTRY_SIZE, - PCI_DMA_FROMDEVICE); - if (pci_dma_mapping_error(outdma)) - return -ENOMEM; - - err = mthca_cmd_box(dev, 0, outdma, index, 0, - CMD_READ_MGM, - CMD_TIME_CLASS_A, status); - - pci_unmap_single(dev->pdev, outdma, - MTHCA_MGM_ENTRY_SIZE, - PCI_DMA_FROMDEVICE); - return err; + return mthca_cmd_box(dev, 0, mailbox->dma, index, 0, + CMD_READ_MGM, CMD_TIME_CLASS_A, status); } -int mthca_WRITE_MGM(struct mthca_dev *dev, int index, void *mgm, - u8 *status) +int mthca_WRITE_MGM(struct mthca_dev *dev, int index, + struct mthca_mailbox *mailbox, u8 *status) { - dma_addr_t indma; - int err; - - indma = pci_map_single(dev->pdev, mgm, - MTHCA_MGM_ENTRY_SIZE, - PCI_DMA_TODEVICE); - if (pci_dma_mapping_error(indma)) - return -ENOMEM; - - err = mthca_cmd(dev, indma, index, 0, CMD_WRITE_MGM, - CMD_TIME_CLASS_A, status); - - pci_unmap_single(dev->pdev, indma, - MTHCA_MGM_ENTRY_SIZE, PCI_DMA_TODEVICE); - return err; + return mthca_cmd(dev, mailbox->dma, index, 0, CMD_WRITE_MGM, + CMD_TIME_CLASS_A, status); } -int mthca_MGID_HASH(struct mthca_dev *dev, void *gid, u16 *hash, - u8 *status) +int mthca_MGID_HASH(struct mthca_dev *dev, struct mthca_mailbox *mailbox, + u16 *hash, u8 *status) { - dma_addr_t indma; u64 imm; int err; - indma = pci_map_single(dev->pdev, gid, 16, PCI_DMA_TODEVICE); - if (pci_dma_mapping_error(indma)) - return -ENOMEM; - - err = mthca_cmd_imm(dev, indma, &imm, 0, 0, CMD_MGID_HASH, + err = mthca_cmd_imm(dev, mailbox->dma, &imm, 0, 0, CMD_MGID_HASH, CMD_TIME_CLASS_A, status); - *hash = imm; - pci_unmap_single(dev->pdev, indma, 16, PCI_DMA_TODEVICE); + *hash = imm; return err; } diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.h b/drivers/infiniband/hw/mthca/mthca_cmd.h index adf039b3c540..65f976a13e02 100644 --- a/drivers/infiniband/hw/mthca/mthca_cmd.h +++ b/drivers/infiniband/hw/mthca/mthca_cmd.h @@ -1,5 +1,6 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -35,10 +36,9 @@ #ifndef MTHCA_CMD_H #define MTHCA_CMD_H -#include <ib_verbs.h> +#include <rdma/ib_verbs.h> -#define MTHCA_CMD_MAILBOX_ALIGN 16UL -#define MTHCA_CMD_MAILBOX_EXTRA (MTHCA_CMD_MAILBOX_ALIGN - 1) +#define MTHCA_MAILBOX_SIZE 4096 enum { /* command completed successfully: */ @@ -112,6 +112,11 @@ enum { DEV_LIM_FLAG_UD_MULTI = 1 << 21, }; +struct mthca_mailbox { + dma_addr_t dma; + void *buf; +}; + struct mthca_dev_lim { int max_srq_sz; int max_qp_sz; @@ -179,10 +184,11 @@ struct mthca_dev_lim { }; struct mthca_adapter { - u32 vendor_id; - u32 device_id; - u32 revision_id; - u8 inta_pin; + u32 vendor_id; + u32 device_id; + u32 revision_id; + char board_id[MTHCA_BOARD_ID_LEN]; + u8 inta_pin; }; struct mthca_init_hca_param { @@ -214,8 +220,7 @@ struct mthca_init_hca_param { }; struct mthca_init_ib_param { - int enable_1x; - int enable_4x; + int port_width; int vl_cap; int mtu_cap; u16 gid_cap; @@ -235,11 +240,17 @@ struct mthca_set_ib_param { u32 cap_mask; }; +int mthca_cmd_init(struct mthca_dev *dev); +void mthca_cmd_cleanup(struct mthca_dev *dev); int mthca_cmd_use_events(struct mthca_dev *dev); void mthca_cmd_use_polling(struct mthca_dev *dev); void mthca_cmd_event(struct mthca_dev *dev, u16 token, u8 status, u64 out_param); +struct mthca_mailbox *mthca_alloc_mailbox(struct mthca_dev *dev, + unsigned int gfp_mask); +void mthca_free_mailbox(struct mthca_dev *dev, struct mthca_mailbox *mailbox); + int mthca_SYS_EN(struct mthca_dev *dev, u8 *status); int mthca_SYS_DIS(struct mthca_dev *dev, u8 *status); int mthca_MAP_FA(struct mthca_dev *dev, struct mthca_icm *icm, u8 *status); @@ -270,41 +281,44 @@ int mthca_MAP_ICM_AUX(struct mthca_dev *dev, struct mthca_icm *icm, u8 *status); int mthca_UNMAP_ICM_AUX(struct mthca_dev *dev, u8 *status); int mthca_SET_ICM_SIZE(struct mthca_dev *dev, u64 icm_size, u64 *aux_pages, u8 *status); -int mthca_SW2HW_MPT(struct mthca_dev *dev, void *mpt_entry, +int mthca_SW2HW_MPT(struct mthca_dev *dev, struct mthca_mailbox *mailbox, int mpt_index, u8 *status); -int mthca_HW2SW_MPT(struct mthca_dev *dev, void *mpt_entry, +int mthca_HW2SW_MPT(struct mthca_dev *dev, struct mthca_mailbox *mailbox, int mpt_index, u8 *status); -int mthca_WRITE_MTT(struct mthca_dev *dev, u64 *mtt_entry, +int mthca_WRITE_MTT(struct mthca_dev *dev, struct mthca_mailbox *mailbox, int num_mtt, u8 *status); int mthca_SYNC_TPT(struct mthca_dev *dev, u8 *status); int mthca_MAP_EQ(struct mthca_dev *dev, u64 event_mask, int unmap, int eq_num, u8 *status); -int mthca_SW2HW_EQ(struct mthca_dev *dev, void *eq_context, +int mthca_SW2HW_EQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, int eq_num, u8 *status); -int mthca_HW2SW_EQ(struct mthca_dev *dev, void *eq_context, +int mthca_HW2SW_EQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, int eq_num, u8 *status); -int mthca_SW2HW_CQ(struct mthca_dev *dev, void *cq_context, +int mthca_SW2HW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, int cq_num, u8 *status); -int mthca_HW2SW_CQ(struct mthca_dev *dev, void *cq_context, +int mthca_HW2SW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, int cq_num, u8 *status); +int mthca_SW2HW_SRQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, + int srq_num, u8 *status); +int mthca_HW2SW_SRQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, + int srq_num, u8 *status); +int mthca_ARM_SRQ(struct mthca_dev *dev, int srq_num, int limit, u8 *status); int mthca_MODIFY_QP(struct mthca_dev *dev, int trans, u32 num, - int is_ee, void *qp_context, u32 optmask, + int is_ee, struct mthca_mailbox *mailbox, u32 optmask, u8 *status); int mthca_QUERY_QP(struct mthca_dev *dev, u32 num, int is_ee, - void *qp_context, u8 *status); + struct mthca_mailbox *mailbox, u8 *status); int mthca_CONF_SPECIAL_QP(struct mthca_dev *dev, int type, u32 qpn, u8 *status); int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey, - int port, struct ib_wc* in_wc, struct ib_grh* in_grh, + int port, struct ib_wc *in_wc, struct ib_grh *in_grh, void *in_mad, void *response_mad, u8 *status); -int mthca_READ_MGM(struct mthca_dev *dev, int index, void *mgm, - u8 *status); -int mthca_WRITE_MGM(struct mthca_dev *dev, int index, void *mgm, - u8 *status); -int mthca_MGID_HASH(struct mthca_dev *dev, void *gid, u16 *hash, - u8 *status); +int mthca_READ_MGM(struct mthca_dev *dev, int index, + struct mthca_mailbox *mailbox, u8 *status); +int mthca_WRITE_MGM(struct mthca_dev *dev, int index, + struct mthca_mailbox *mailbox, u8 *status); +int mthca_MGID_HASH(struct mthca_dev *dev, struct mthca_mailbox *mailbox, + u16 *hash, u8 *status); int mthca_NOP(struct mthca_dev *dev, u8 *status); -#define MAILBOX_ALIGN(x) ((void *) ALIGN((unsigned long) (x), MTHCA_CMD_MAILBOX_ALIGN)) - #endif /* MTHCA_CMD_H */ diff --git a/drivers/infiniband/hw/mthca/mthca_config_reg.h b/drivers/infiniband/hw/mthca/mthca_config_reg.h index b4bfbbfe2c3d..afa56bfaab2e 100644 --- a/drivers/infiniband/hw/mthca/mthca_config_reg.h +++ b/drivers/infiniband/hw/mthca/mthca_config_reg.h @@ -1,5 +1,6 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c index 2bf347b84c31..8600b6c3e0c2 100644 --- a/drivers/infiniband/hw/mthca/mthca_cq.c +++ b/drivers/infiniband/hw/mthca/mthca_cq.c @@ -1,5 +1,9 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2005 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2004 Voltaire, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -35,7 +39,7 @@ #include <linux/init.h> #include <linux/hardirq.h> -#include <ib_pack.h> +#include <rdma/ib_pack.h> #include "mthca_dev.h" #include "mthca_cmd.h" @@ -53,21 +57,21 @@ enum { * Must be packed because start is 64 bits but only aligned to 32 bits. */ struct mthca_cq_context { - u32 flags; - u64 start; - u32 logsize_usrpage; - u32 error_eqn; /* Tavor only */ - u32 comp_eqn; - u32 pd; - u32 lkey; - u32 last_notified_index; - u32 solicit_producer_index; - u32 consumer_index; - u32 producer_index; - u32 cqn; - u32 ci_db; /* Arbel only */ - u32 state_db; /* Arbel only */ - u32 reserved; + __be32 flags; + __be64 start; + __be32 logsize_usrpage; + __be32 error_eqn; /* Tavor only */ + __be32 comp_eqn; + __be32 pd; + __be32 lkey; + __be32 last_notified_index; + __be32 solicit_producer_index; + __be32 consumer_index; + __be32 producer_index; + __be32 cqn; + __be32 ci_db; /* Arbel only */ + __be32 state_db; /* Arbel only */ + u32 reserved; } __attribute__((packed)); #define MTHCA_CQ_STATUS_OK ( 0 << 28) @@ -106,31 +110,31 @@ enum { }; struct mthca_cqe { - u32 my_qpn; - u32 my_ee; - u32 rqpn; - u16 sl_g_mlpath; - u16 rlid; - u32 imm_etype_pkey_eec; - u32 byte_cnt; - u32 wqe; - u8 opcode; - u8 is_send; - u8 reserved; - u8 owner; + __be32 my_qpn; + __be32 my_ee; + __be32 rqpn; + __be16 sl_g_mlpath; + __be16 rlid; + __be32 imm_etype_pkey_eec; + __be32 byte_cnt; + __be32 wqe; + u8 opcode; + u8 is_send; + u8 reserved; + u8 owner; }; struct mthca_err_cqe { - u32 my_qpn; - u32 reserved1[3]; - u8 syndrome; - u8 reserved2; - u16 db_cnt; - u32 reserved3; - u32 wqe; - u8 opcode; - u8 reserved4[2]; - u8 owner; + __be32 my_qpn; + u32 reserved1[3]; + u8 syndrome; + u8 reserved2; + __be16 db_cnt; + u32 reserved3; + __be32 wqe; + u8 opcode; + u8 reserved4[2]; + u8 owner; }; #define MTHCA_CQ_ENTRY_OWNER_SW (0 << 7) @@ -171,6 +175,17 @@ static inline void set_cqe_hw(struct mthca_cqe *cqe) cqe->owner = MTHCA_CQ_ENTRY_OWNER_HW; } +static void dump_cqe(struct mthca_dev *dev, void *cqe_ptr) +{ + __be32 *cqe = cqe_ptr; + + (void) cqe; /* avoid warning if mthca_dbg compiled away... */ + mthca_dbg(dev, "CQE contents %08x %08x %08x %08x %08x %08x %08x %08x\n", + be32_to_cpu(cqe[0]), be32_to_cpu(cqe[1]), be32_to_cpu(cqe[2]), + be32_to_cpu(cqe[3]), be32_to_cpu(cqe[4]), be32_to_cpu(cqe[5]), + be32_to_cpu(cqe[6]), be32_to_cpu(cqe[7])); +} + /* * incr is ignored in native Arbel (mem-free) mode, so cq->cons_index * should be correct before calling update_cons_index(). @@ -178,7 +193,7 @@ static inline void set_cqe_hw(struct mthca_cqe *cqe) static inline void update_cons_index(struct mthca_dev *dev, struct mthca_cq *cq, int incr) { - u32 doorbell[2]; + __be32 doorbell[2]; if (mthca_is_memfree(dev)) { *cq->set_ci_db = cpu_to_be32(cq->cons_index); @@ -209,7 +224,8 @@ void mthca_cq_event(struct mthca_dev *dev, u32 cqn) cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); } -void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn) +void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn, + struct mthca_srq *srq) { struct mthca_cq *cq; struct mthca_cqe *cqe; @@ -250,8 +266,11 @@ void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn) */ while (prod_index > cq->cons_index) { cqe = get_cqe(cq, (prod_index - 1) & cq->ibcq.cqe); - if (cqe->my_qpn == cpu_to_be32(qpn)) + if (cqe->my_qpn == cpu_to_be32(qpn)) { + if (srq) + mthca_free_srq_wqe(srq, be32_to_cpu(cqe->wqe)); ++nfreed; + } else if (nfreed) memcpy(get_cqe(cq, (prod_index - 1 + nfreed) & cq->ibcq.cqe), @@ -278,18 +297,14 @@ static int handle_error_cqe(struct mthca_dev *dev, struct mthca_cq *cq, { int err; int dbd; - u32 new_wqe; - - if (1 && cqe->syndrome != SYNDROME_WR_FLUSH_ERR) { - int j; - - mthca_dbg(dev, "%x/%d: error CQE -> QPN %06x, WQE @ %08x\n", - cq->cqn, cq->cons_index, be32_to_cpu(cqe->my_qpn), - be32_to_cpu(cqe->wqe)); - - for (j = 0; j < 8; ++j) - printk(KERN_DEBUG " [%2x] %08x\n", - j * 4, be32_to_cpu(((u32 *) cqe)[j])); + __be32 new_wqe; + + if (cqe->syndrome == SYNDROME_LOCAL_QP_OP_ERR) { + mthca_dbg(dev, "local QP operation err " + "(QPN %06x, WQE @ %08x, CQN %06x, index %d)\n", + be32_to_cpu(cqe->my_qpn), be32_to_cpu(cqe->wqe), + cq->cqn, cq->cons_index); + dump_cqe(dev, cqe); } /* @@ -356,6 +371,13 @@ static int handle_error_cqe(struct mthca_dev *dev, struct mthca_cq *cq, break; } + /* + * Mem-free HCAs always generate one CQE per WQE, even in the + * error case, so we don't have to check the doorbell count, etc. + */ + if (mthca_is_memfree(dev)) + return 0; + err = mthca_free_err_wqe(dev, qp, is_send, wqe_index, &dbd, &new_wqe); if (err) return err; @@ -377,15 +399,6 @@ static int handle_error_cqe(struct mthca_dev *dev, struct mthca_cq *cq, return 0; } -static void dump_cqe(struct mthca_cqe *cqe) -{ - int j; - - for (j = 0; j < 8; ++j) - printk(KERN_DEBUG " [%2x] %08x\n", - j * 4, be32_to_cpu(((u32 *) cqe)[j])); -} - static inline int mthca_poll_one(struct mthca_dev *dev, struct mthca_cq *cq, struct mthca_qp **cur_qp, @@ -414,8 +427,7 @@ static inline int mthca_poll_one(struct mthca_dev *dev, mthca_dbg(dev, "%x/%d: CQE -> QPN %06x, WQE @ %08x\n", cq->cqn, cq->cons_index, be32_to_cpu(cqe->my_qpn), be32_to_cpu(cqe->wqe)); - - dump_cqe(cqe); + dump_cqe(dev, cqe); } is_error = (cqe->opcode & MTHCA_ERROR_CQE_OPCODE_MASK) == @@ -447,23 +459,27 @@ static inline int mthca_poll_one(struct mthca_dev *dev, >> wq->wqe_shift); entry->wr_id = (*cur_qp)->wrid[wqe_index + (*cur_qp)->rq.max]; + } else if ((*cur_qp)->ibqp.srq) { + struct mthca_srq *srq = to_msrq((*cur_qp)->ibqp.srq); + u32 wqe = be32_to_cpu(cqe->wqe); + wq = NULL; + wqe_index = wqe >> srq->wqe_shift; + entry->wr_id = srq->wrid[wqe_index]; + mthca_free_srq_wqe(srq, wqe); } else { wq = &(*cur_qp)->rq; wqe_index = be32_to_cpu(cqe->wqe) >> wq->wqe_shift; entry->wr_id = (*cur_qp)->wrid[wqe_index]; } - if (wq->last_comp < wqe_index) - wq->tail += wqe_index - wq->last_comp; - else - wq->tail += wqe_index + wq->max - wq->last_comp; - - wq->last_comp = wqe_index; + if (wq) { + if (wq->last_comp < wqe_index) + wq->tail += wqe_index - wq->last_comp; + else + wq->tail += wqe_index + wq->max - wq->last_comp; - if (0) - mthca_dbg(dev, "%s completion for QP %06x, index %d (nr %d)\n", - is_send ? "Send" : "Receive", - (*cur_qp)->qpn, wqe_index, wq->max); + wq->last_comp = wqe_index; + } if (is_error) { err = handle_error_cqe(dev, cq, *cur_qp, wqe_index, is_send, @@ -581,13 +597,13 @@ int mthca_poll_cq(struct ib_cq *ibcq, int num_entries, int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify notify) { - u32 doorbell[2]; + __be32 doorbell[2]; doorbell[0] = cpu_to_be32((notify == IB_CQ_SOLICITED ? MTHCA_TAVOR_CQ_DB_REQ_NOT_SOL : MTHCA_TAVOR_CQ_DB_REQ_NOT) | to_mcq(cq)->cqn); - doorbell[1] = 0xffffffff; + doorbell[1] = (__force __be32) 0xffffffff; mthca_write64(doorbell, to_mdev(cq->device)->kar + MTHCA_CQ_DOORBELL, @@ -599,9 +615,9 @@ int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify notify) int mthca_arbel_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify) { struct mthca_cq *cq = to_mcq(ibcq); - u32 doorbell[2]; + __be32 doorbell[2]; u32 sn; - u32 ci; + __be32 ci; sn = cq->arm_sn & 3; ci = cpu_to_be32(cq->cons_index); @@ -634,119 +650,16 @@ int mthca_arbel_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify) static void mthca_free_cq_buf(struct mthca_dev *dev, struct mthca_cq *cq) { - int i; - int size; - - if (cq->is_direct) - pci_free_consistent(dev->pdev, - (cq->ibcq.cqe + 1) * MTHCA_CQ_ENTRY_SIZE, - cq->queue.direct.buf, - pci_unmap_addr(&cq->queue.direct, - mapping)); - else { - size = (cq->ibcq.cqe + 1) * MTHCA_CQ_ENTRY_SIZE; - for (i = 0; i < (size + PAGE_SIZE - 1) / PAGE_SIZE; ++i) - if (cq->queue.page_list[i].buf) - pci_free_consistent(dev->pdev, PAGE_SIZE, - cq->queue.page_list[i].buf, - pci_unmap_addr(&cq->queue.page_list[i], - mapping)); - - kfree(cq->queue.page_list); - } -} - -static int mthca_alloc_cq_buf(struct mthca_dev *dev, int size, - struct mthca_cq *cq) -{ - int err = -ENOMEM; - int npages, shift; - u64 *dma_list = NULL; - dma_addr_t t; - int i; - - if (size <= MTHCA_MAX_DIRECT_CQ_SIZE) { - cq->is_direct = 1; - npages = 1; - shift = get_order(size) + PAGE_SHIFT; - - cq->queue.direct.buf = pci_alloc_consistent(dev->pdev, - size, &t); - if (!cq->queue.direct.buf) - return -ENOMEM; - - pci_unmap_addr_set(&cq->queue.direct, mapping, t); - - memset(cq->queue.direct.buf, 0, size); - - while (t & ((1 << shift) - 1)) { - --shift; - npages *= 2; - } - - dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL); - if (!dma_list) - goto err_free; - - for (i = 0; i < npages; ++i) - dma_list[i] = t + i * (1 << shift); - } else { - cq->is_direct = 0; - npages = (size + PAGE_SIZE - 1) / PAGE_SIZE; - shift = PAGE_SHIFT; - - dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL); - if (!dma_list) - return -ENOMEM; - - cq->queue.page_list = kmalloc(npages * sizeof *cq->queue.page_list, - GFP_KERNEL); - if (!cq->queue.page_list) - goto err_out; - - for (i = 0; i < npages; ++i) - cq->queue.page_list[i].buf = NULL; - - for (i = 0; i < npages; ++i) { - cq->queue.page_list[i].buf = - pci_alloc_consistent(dev->pdev, PAGE_SIZE, &t); - if (!cq->queue.page_list[i].buf) - goto err_free; - - dma_list[i] = t; - pci_unmap_addr_set(&cq->queue.page_list[i], mapping, t); - - memset(cq->queue.page_list[i].buf, 0, PAGE_SIZE); - } - } - - err = mthca_mr_alloc_phys(dev, dev->driver_pd.pd_num, - dma_list, shift, npages, - 0, size, - MTHCA_MPT_FLAG_LOCAL_WRITE | - MTHCA_MPT_FLAG_LOCAL_READ, - &cq->mr); - if (err) - goto err_free; - - kfree(dma_list); - - return 0; - -err_free: - mthca_free_cq_buf(dev, cq); - -err_out: - kfree(dma_list); - - return err; + mthca_buf_free(dev, (cq->ibcq.cqe + 1) * MTHCA_CQ_ENTRY_SIZE, + &cq->queue, cq->is_direct, &cq->mr); } int mthca_init_cq(struct mthca_dev *dev, int nent, + struct mthca_ucontext *ctx, u32 pdn, struct mthca_cq *cq) { int size = nent * MTHCA_CQ_ENTRY_SIZE; - void *mailbox = NULL; + struct mthca_mailbox *mailbox; struct mthca_cq_context *cq_context; int err = -ENOMEM; u8 status; @@ -754,45 +667,51 @@ int mthca_init_cq(struct mthca_dev *dev, int nent, might_sleep(); - cq->ibcq.cqe = nent - 1; + cq->ibcq.cqe = nent - 1; + cq->is_kernel = !ctx; cq->cqn = mthca_alloc(&dev->cq_table.alloc); if (cq->cqn == -1) return -ENOMEM; if (mthca_is_memfree(dev)) { - cq->arm_sn = 1; - err = mthca_table_get(dev, dev->cq_table.table, cq->cqn); if (err) goto err_out; - err = -ENOMEM; + if (cq->is_kernel) { + cq->arm_sn = 1; - cq->set_ci_db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, - cq->cqn, &cq->set_ci_db); - if (cq->set_ci_db_index < 0) - goto err_out_icm; + err = -ENOMEM; - cq->arm_db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_CQ_ARM, - cq->cqn, &cq->arm_db); - if (cq->arm_db_index < 0) - goto err_out_ci; + cq->set_ci_db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, + cq->cqn, &cq->set_ci_db); + if (cq->set_ci_db_index < 0) + goto err_out_icm; + + cq->arm_db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_CQ_ARM, + cq->cqn, &cq->arm_db); + if (cq->arm_db_index < 0) + goto err_out_ci; + } } - mailbox = kmalloc(sizeof (struct mthca_cq_context) + MTHCA_CMD_MAILBOX_EXTRA, - GFP_KERNEL); - if (!mailbox) - goto err_out_mailbox; + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + goto err_out_arm; - cq_context = MAILBOX_ALIGN(mailbox); + cq_context = mailbox->buf; - err = mthca_alloc_cq_buf(dev, size, cq); - if (err) - goto err_out_mailbox; + if (cq->is_kernel) { + err = mthca_buf_alloc(dev, size, MTHCA_MAX_DIRECT_CQ_SIZE, + &cq->queue, &cq->is_direct, + &dev->driver_pd, 1, &cq->mr); + if (err) + goto err_out_mailbox; - for (i = 0; i < nent; ++i) - set_cqe_hw(get_cqe(cq, i)); + for (i = 0; i < nent; ++i) + set_cqe_hw(get_cqe(cq, i)); + } spin_lock_init(&cq->lock); atomic_set(&cq->refcount, 1); @@ -802,12 +721,14 @@ int mthca_init_cq(struct mthca_dev *dev, int nent, cq_context->flags = cpu_to_be32(MTHCA_CQ_STATUS_OK | MTHCA_CQ_STATE_DISARMED | MTHCA_CQ_FLAG_TR); - cq_context->start = cpu_to_be64(0); - cq_context->logsize_usrpage = cpu_to_be32((ffs(nent) - 1) << 24 | - dev->driver_uar.index); + cq_context->logsize_usrpage = cpu_to_be32((ffs(nent) - 1) << 24); + if (ctx) + cq_context->logsize_usrpage |= cpu_to_be32(ctx->uar.index); + else + cq_context->logsize_usrpage |= cpu_to_be32(dev->driver_uar.index); cq_context->error_eqn = cpu_to_be32(dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn); cq_context->comp_eqn = cpu_to_be32(dev->eq_table.eq[MTHCA_EQ_COMP].eqn); - cq_context->pd = cpu_to_be32(dev->driver_pd.pd_num); + cq_context->pd = cpu_to_be32(pdn); cq_context->lkey = cpu_to_be32(cq->mr.ibmr.lkey); cq_context->cqn = cpu_to_be32(cq->cqn); @@ -816,7 +737,7 @@ int mthca_init_cq(struct mthca_dev *dev, int nent, cq_context->state_db = cpu_to_be32(cq->arm_db_index); } - err = mthca_SW2HW_CQ(dev, cq_context, cq->cqn, &status); + err = mthca_SW2HW_CQ(dev, mailbox, cq->cqn, &status); if (err) { mthca_warn(dev, "SW2HW_CQ failed (%d)\n", err); goto err_out_free_mr; @@ -840,22 +761,23 @@ int mthca_init_cq(struct mthca_dev *dev, int nent, cq->cons_index = 0; - kfree(mailbox); + mthca_free_mailbox(dev, mailbox); return 0; err_out_free_mr: - mthca_free_mr(dev, &cq->mr); - mthca_free_cq_buf(dev, cq); + if (cq->is_kernel) + mthca_free_cq_buf(dev, cq); err_out_mailbox: - kfree(mailbox); + mthca_free_mailbox(dev, mailbox); - if (mthca_is_memfree(dev)) +err_out_arm: + if (cq->is_kernel && mthca_is_memfree(dev)) mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM, cq->arm_db_index); err_out_ci: - if (mthca_is_memfree(dev)) + if (cq->is_kernel && mthca_is_memfree(dev)) mthca_free_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, cq->set_ci_db_index); err_out_icm: @@ -870,32 +792,31 @@ err_out: void mthca_free_cq(struct mthca_dev *dev, struct mthca_cq *cq) { - void *mailbox; + struct mthca_mailbox *mailbox; int err; u8 status; might_sleep(); - mailbox = kmalloc(sizeof (struct mthca_cq_context) + MTHCA_CMD_MAILBOX_EXTRA, - GFP_KERNEL); - if (!mailbox) { + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) { mthca_warn(dev, "No memory for mailbox to free CQ.\n"); return; } - err = mthca_HW2SW_CQ(dev, MAILBOX_ALIGN(mailbox), cq->cqn, &status); + err = mthca_HW2SW_CQ(dev, mailbox, cq->cqn, &status); if (err) mthca_warn(dev, "HW2SW_CQ failed (%d)\n", err); else if (status) - mthca_warn(dev, "HW2SW_CQ returned status 0x%02x\n", - status); + mthca_warn(dev, "HW2SW_CQ returned status 0x%02x\n", status); if (0) { - u32 *ctx = MAILBOX_ALIGN(mailbox); + __be32 *ctx = mailbox->buf; int j; printk(KERN_ERR "context for CQN %x (cons index %x, next sw %d)\n", - cq->cqn, cq->cons_index, !!next_cqe_sw(cq)); + cq->cqn, cq->cons_index, + cq->is_kernel ? !!next_cqe_sw(cq) : 0); for (j = 0; j < 16; ++j) printk(KERN_ERR "[%2x] %08x\n", j * 4, be32_to_cpu(ctx[j])); } @@ -913,17 +834,17 @@ void mthca_free_cq(struct mthca_dev *dev, atomic_dec(&cq->refcount); wait_event(cq->wait, !atomic_read(&cq->refcount)); - mthca_free_mr(dev, &cq->mr); - mthca_free_cq_buf(dev, cq); - - if (mthca_is_memfree(dev)) { - mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM, cq->arm_db_index); - mthca_free_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, cq->set_ci_db_index); - mthca_table_put(dev, dev->cq_table.table, cq->cqn); + if (cq->is_kernel) { + mthca_free_cq_buf(dev, cq); + if (mthca_is_memfree(dev)) { + mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM, cq->arm_db_index); + mthca_free_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, cq->set_ci_db_index); + } } + mthca_table_put(dev, dev->cq_table.table, cq->cqn); mthca_free(&dev->cq_table.alloc, cq->cqn); - kfree(mailbox); + mthca_free_mailbox(dev, mailbox); } int __devinit mthca_init_cq_table(struct mthca_dev *dev) diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h index e3d79e267dc9..7bff5a8425f4 100644 --- a/drivers/infiniband/hw/mthca/mthca_dev.h +++ b/drivers/infiniband/hw/mthca/mthca_dev.h @@ -1,5 +1,9 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2004 Voltaire, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -46,8 +50,8 @@ #define DRV_NAME "ib_mthca" #define PFX DRV_NAME ": " -#define DRV_VERSION "0.06-pre" -#define DRV_RELDATE "November 8, 2004" +#define DRV_VERSION "0.06" +#define DRV_RELDATE "June 23, 2005" enum { MTHCA_FLAG_DDR_HIDDEN = 1 << 1, @@ -65,6 +69,10 @@ enum { }; enum { + MTHCA_BOARD_ID_LEN = 64 +}; + +enum { MTHCA_EQ_CONTEXT_SIZE = 0x40, MTHCA_CQ_CONTEXT_SIZE = 0x40, MTHCA_QP_CONTEXT_SIZE = 0x200, @@ -98,6 +106,7 @@ enum { }; struct mthca_cmd { + struct pci_pool *pool; int use_events; struct semaphore hcr_sem; struct semaphore poll_sem; @@ -139,6 +148,7 @@ struct mthca_limits { int reserved_mcgs; int num_pds; int reserved_pds; + u8 port_width_cap; }; struct mthca_alloc { @@ -208,6 +218,13 @@ struct mthca_cq_table { struct mthca_icm_table *table; }; +struct mthca_srq_table { + struct mthca_alloc alloc; + spinlock_t lock; + struct mthca_array srq; + struct mthca_icm_table *table; +}; + struct mthca_qp_table { struct mthca_alloc alloc; u32 rdb_base; @@ -243,6 +260,7 @@ struct mthca_dev { unsigned long device_cap_flags; u32 rev_id; + char board_id[MTHCA_BOARD_ID_LEN]; /* firmware info */ u64 fw_ver; @@ -288,6 +306,7 @@ struct mthca_dev { struct mthca_mr_table mr_table; struct mthca_eq_table eq_table; struct mthca_cq_table cq_table; + struct mthca_srq_table srq_table; struct mthca_qp_table qp_table; struct mthca_av_table av_table; struct mthca_mcg_table mcg_table; @@ -328,14 +347,13 @@ extern void __buggy_use_of_MTHCA_PUT(void); #define MTHCA_PUT(dest, source, offset) \ do { \ - __typeof__(source) *__p = \ - (__typeof__(source) *) ((char *) (dest) + (offset)); \ + void *__d = ((char *) (dest) + (offset)); \ switch (sizeof(source)) { \ - case 1: *__p = (source); break; \ - case 2: *__p = cpu_to_be16(source); break; \ - case 4: *__p = cpu_to_be32(source); break; \ - case 8: *__p = cpu_to_be64(source); break; \ - default: __buggy_use_of_MTHCA_PUT(); \ + case 1: *(u8 *) __d = (source); break; \ + case 2: *(__be16 *) __d = cpu_to_be16(source); break; \ + case 4: *(__be32 *) __d = cpu_to_be32(source); break; \ + case 8: *(__be64 *) __d = cpu_to_be64(source); break; \ + default: __buggy_use_of_MTHCA_PUT(); \ } \ } while (0) @@ -351,12 +369,18 @@ int mthca_array_set(struct mthca_array *array, int index, void *value); void mthca_array_clear(struct mthca_array *array, int index); int mthca_array_init(struct mthca_array *array, int nent); void mthca_array_cleanup(struct mthca_array *array, int nent); +int mthca_buf_alloc(struct mthca_dev *dev, int size, int max_direct, + union mthca_buf *buf, int *is_direct, struct mthca_pd *pd, + int hca_write, struct mthca_mr *mr); +void mthca_buf_free(struct mthca_dev *dev, int size, union mthca_buf *buf, + int is_direct, struct mthca_mr *mr); int mthca_init_uar_table(struct mthca_dev *dev); int mthca_init_pd_table(struct mthca_dev *dev); int mthca_init_mr_table(struct mthca_dev *dev); int mthca_init_eq_table(struct mthca_dev *dev); int mthca_init_cq_table(struct mthca_dev *dev); +int mthca_init_srq_table(struct mthca_dev *dev); int mthca_init_qp_table(struct mthca_dev *dev); int mthca_init_av_table(struct mthca_dev *dev); int mthca_init_mcg_table(struct mthca_dev *dev); @@ -366,6 +390,7 @@ void mthca_cleanup_pd_table(struct mthca_dev *dev); void mthca_cleanup_mr_table(struct mthca_dev *dev); void mthca_cleanup_eq_table(struct mthca_dev *dev); void mthca_cleanup_cq_table(struct mthca_dev *dev); +void mthca_cleanup_srq_table(struct mthca_dev *dev); void mthca_cleanup_qp_table(struct mthca_dev *dev); void mthca_cleanup_av_table(struct mthca_dev *dev); void mthca_cleanup_mcg_table(struct mthca_dev *dev); @@ -376,9 +401,15 @@ void mthca_unregister_device(struct mthca_dev *dev); int mthca_uar_alloc(struct mthca_dev *dev, struct mthca_uar *uar); void mthca_uar_free(struct mthca_dev *dev, struct mthca_uar *uar); -int mthca_pd_alloc(struct mthca_dev *dev, struct mthca_pd *pd); +int mthca_pd_alloc(struct mthca_dev *dev, int privileged, struct mthca_pd *pd); void mthca_pd_free(struct mthca_dev *dev, struct mthca_pd *pd); +struct mthca_mtt *mthca_alloc_mtt(struct mthca_dev *dev, int size); +void mthca_free_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt); +int mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt, + int start_index, u64 *buffer_list, int list_len); +int mthca_mr_alloc(struct mthca_dev *dev, u32 pd, int buffer_size_shift, + u64 iova, u64 total_size, u32 access, struct mthca_mr *mr); int mthca_mr_alloc_notrans(struct mthca_dev *dev, u32 pd, u32 access, struct mthca_mr *mr); int mthca_mr_alloc_phys(struct mthca_dev *dev, u32 pd, @@ -405,11 +436,24 @@ int mthca_poll_cq(struct ib_cq *ibcq, int num_entries, int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify notify); int mthca_arbel_arm_cq(struct ib_cq *cq, enum ib_cq_notify notify); int mthca_init_cq(struct mthca_dev *dev, int nent, + struct mthca_ucontext *ctx, u32 pdn, struct mthca_cq *cq); void mthca_free_cq(struct mthca_dev *dev, struct mthca_cq *cq); void mthca_cq_event(struct mthca_dev *dev, u32 cqn); -void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn); +void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn, + struct mthca_srq *srq); + +int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd, + struct ib_srq_attr *attr, struct mthca_srq *srq); +void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq); +void mthca_srq_event(struct mthca_dev *dev, u32 srqn, + enum ib_event_type event_type); +void mthca_free_srq_wqe(struct mthca_srq *srq, u32 wqe_addr); +int mthca_tavor_post_srq_recv(struct ib_srq *srq, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr); +int mthca_arbel_post_srq_recv(struct ib_srq *srq, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr); void mthca_qp_event(struct mthca_dev *dev, u32 qpn, enum ib_event_type event_type); @@ -423,19 +467,21 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, int mthca_arbel_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, struct ib_recv_wr **bad_wr); int mthca_free_err_wqe(struct mthca_dev *dev, struct mthca_qp *qp, int is_send, - int index, int *dbd, u32 *new_wqe); + int index, int *dbd, __be32 *new_wqe); int mthca_alloc_qp(struct mthca_dev *dev, struct mthca_pd *pd, struct mthca_cq *send_cq, struct mthca_cq *recv_cq, enum ib_qp_type type, enum ib_sig_type send_policy, + struct ib_qp_cap *cap, struct mthca_qp *qp); int mthca_alloc_sqp(struct mthca_dev *dev, struct mthca_pd *pd, struct mthca_cq *send_cq, struct mthca_cq *recv_cq, enum ib_sig_type send_policy, + struct ib_qp_cap *cap, int qpn, int port, struct mthca_sqp *sqp); diff --git a/drivers/infiniband/hw/mthca/mthca_doorbell.h b/drivers/infiniband/hw/mthca/mthca_doorbell.h index 821039a49049..dd9a44d170c9 100644 --- a/drivers/infiniband/hw/mthca/mthca_doorbell.h +++ b/drivers/infiniband/hw/mthca/mthca_doorbell.h @@ -1,5 +1,7 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -56,13 +58,13 @@ static inline void mthca_write64_raw(__be64 val, void __iomem *dest) __raw_writeq((__force u64) val, dest); } -static inline void mthca_write64(u32 val[2], void __iomem *dest, +static inline void mthca_write64(__be32 val[2], void __iomem *dest, spinlock_t *doorbell_lock) { __raw_writeq(*(u64 *) val, dest); } -static inline void mthca_write_db_rec(u32 val[2], u32 *db) +static inline void mthca_write_db_rec(__be32 val[2], __be32 *db) { *(u64 *) db = *(u64 *) val; } @@ -85,18 +87,18 @@ static inline void mthca_write64_raw(__be64 val, void __iomem *dest) __raw_writel(((__force u32 *) &val)[1], dest + 4); } -static inline void mthca_write64(u32 val[2], void __iomem *dest, +static inline void mthca_write64(__be32 val[2], void __iomem *dest, spinlock_t *doorbell_lock) { unsigned long flags; spin_lock_irqsave(doorbell_lock, flags); - __raw_writel(val[0], dest); - __raw_writel(val[1], dest + 4); + __raw_writel((__force u32) val[0], dest); + __raw_writel((__force u32) val[1], dest + 4); spin_unlock_irqrestore(doorbell_lock, flags); } -static inline void mthca_write_db_rec(u32 val[2], u32 *db) +static inline void mthca_write_db_rec(__be32 val[2], __be32 *db) { db[0] = val[0]; wmb(); diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c index f46d615d396f..18f0981eb0c1 100644 --- a/drivers/infiniband/hw/mthca/mthca_eq.c +++ b/drivers/infiniband/hw/mthca/mthca_eq.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -51,18 +52,18 @@ enum { * Must be packed because start is 64 bits but only aligned to 32 bits. */ struct mthca_eq_context { - u32 flags; - u64 start; - u32 logsize_usrpage; - u32 tavor_pd; /* reserved for Arbel */ - u8 reserved1[3]; - u8 intr; - u32 arbel_pd; /* lost_count for Tavor */ - u32 lkey; - u32 reserved2[2]; - u32 consumer_index; - u32 producer_index; - u32 reserved3[4]; + __be32 flags; + __be64 start; + __be32 logsize_usrpage; + __be32 tavor_pd; /* reserved for Arbel */ + u8 reserved1[3]; + u8 intr; + __be32 arbel_pd; /* lost_count for Tavor */ + __be32 lkey; + u32 reserved2[2]; + __be32 consumer_index; + __be32 producer_index; + u32 reserved3[4]; } __attribute__((packed)); #define MTHCA_EQ_STATUS_OK ( 0 << 28) @@ -127,28 +128,28 @@ struct mthca_eqe { union { u32 raw[6]; struct { - u32 cqn; + __be32 cqn; } __attribute__((packed)) comp; struct { - u16 reserved1; - u16 token; - u32 reserved2; - u8 reserved3[3]; - u8 status; - u64 out_param; + u16 reserved1; + __be16 token; + u32 reserved2; + u8 reserved3[3]; + u8 status; + __be64 out_param; } __attribute__((packed)) cmd; struct { - u32 qpn; + __be32 qpn; } __attribute__((packed)) qp; struct { - u32 cqn; - u32 reserved1; - u8 reserved2[3]; - u8 syndrome; + __be32 cqn; + u32 reserved1; + u8 reserved2[3]; + u8 syndrome; } __attribute__((packed)) cq_err; struct { - u32 reserved1[2]; - u32 port; + u32 reserved1[2]; + __be32 port; } __attribute__((packed)) port_change; } event; u8 reserved3[3]; @@ -167,7 +168,7 @@ static inline u64 async_mask(struct mthca_dev *dev) static inline void tavor_set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u32 ci) { - u32 doorbell[2]; + __be32 doorbell[2]; doorbell[0] = cpu_to_be32(MTHCA_EQ_DB_SET_CI | eq->eqn); doorbell[1] = cpu_to_be32(ci & (eq->nent - 1)); @@ -190,8 +191,8 @@ static inline void arbel_set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u { /* See comment in tavor_set_eq_ci() above. */ wmb(); - __raw_writel(cpu_to_be32(ci), dev->eq_regs.arbel.eq_set_ci_base + - eq->eqn * 8); + __raw_writel((__force u32) cpu_to_be32(ci), + dev->eq_regs.arbel.eq_set_ci_base + eq->eqn * 8); /* We still want ordering, just not swabbing, so add a barrier */ mb(); } @@ -206,7 +207,7 @@ static inline void set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u32 ci) static inline void tavor_eq_req_not(struct mthca_dev *dev, int eqn) { - u32 doorbell[2]; + __be32 doorbell[2]; doorbell[0] = cpu_to_be32(MTHCA_EQ_DB_REQ_NOT | eqn); doorbell[1] = 0; @@ -224,7 +225,7 @@ static inline void arbel_eq_req_not(struct mthca_dev *dev, u32 eqn_mask) static inline void disarm_cq(struct mthca_dev *dev, int eqn, int cqn) { if (!mthca_is_memfree(dev)) { - u32 doorbell[2]; + __be32 doorbell[2]; doorbell[0] = cpu_to_be32(MTHCA_EQ_DB_DISARM_CQ | eqn); doorbell[1] = cpu_to_be32(cqn); @@ -469,7 +470,7 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev, PAGE_SIZE; u64 *dma_list = NULL; dma_addr_t t; - void *mailbox = NULL; + struct mthca_mailbox *mailbox; struct mthca_eq_context *eq_context; int err = -ENOMEM; int i; @@ -494,17 +495,16 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev, if (!dma_list) goto err_out_free; - mailbox = kmalloc(sizeof *eq_context + MTHCA_CMD_MAILBOX_EXTRA, - GFP_KERNEL); - if (!mailbox) + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) goto err_out_free; - eq_context = MAILBOX_ALIGN(mailbox); + eq_context = mailbox->buf; for (i = 0; i < npages; ++i) { - eq->page_list[i].buf = pci_alloc_consistent(dev->pdev, - PAGE_SIZE, &t); + eq->page_list[i].buf = dma_alloc_coherent(&dev->pdev->dev, + PAGE_SIZE, &t, GFP_KERNEL); if (!eq->page_list[i].buf) - goto err_out_free; + goto err_out_free_pages; dma_list[i] = t; pci_unmap_addr_set(&eq->page_list[i], mapping, t); @@ -517,7 +517,7 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev, eq->eqn = mthca_alloc(&dev->eq_table.alloc); if (eq->eqn == -1) - goto err_out_free; + goto err_out_free_pages; err = mthca_mr_alloc_phys(dev, dev->driver_pd.pd_num, dma_list, PAGE_SHIFT, npages, @@ -548,7 +548,7 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev, eq_context->intr = intr; eq_context->lkey = cpu_to_be32(eq->mr.ibmr.lkey); - err = mthca_SW2HW_EQ(dev, eq_context, eq->eqn, &status); + err = mthca_SW2HW_EQ(dev, mailbox, eq->eqn, &status); if (err) { mthca_warn(dev, "SW2HW_EQ failed (%d)\n", err); goto err_out_free_mr; @@ -561,7 +561,7 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev, } kfree(dma_list); - kfree(mailbox); + mthca_free_mailbox(dev, mailbox); eq->eqn_mask = swab32(1 << eq->eqn); eq->cons_index = 0; @@ -579,17 +579,19 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev, err_out_free_eq: mthca_free(&dev->eq_table.alloc, eq->eqn); - err_out_free: + err_out_free_pages: for (i = 0; i < npages; ++i) if (eq->page_list[i].buf) - pci_free_consistent(dev->pdev, PAGE_SIZE, - eq->page_list[i].buf, - pci_unmap_addr(&eq->page_list[i], - mapping)); + dma_free_coherent(&dev->pdev->dev, PAGE_SIZE, + eq->page_list[i].buf, + pci_unmap_addr(&eq->page_list[i], + mapping)); + + mthca_free_mailbox(dev, mailbox); + err_out_free: kfree(eq->page_list); kfree(dma_list); - kfree(mailbox); err_out: return err; @@ -598,25 +600,22 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev, static void mthca_free_eq(struct mthca_dev *dev, struct mthca_eq *eq) { - void *mailbox = NULL; + struct mthca_mailbox *mailbox; int err; u8 status; int npages = (eq->nent * MTHCA_EQ_ENTRY_SIZE + PAGE_SIZE - 1) / PAGE_SIZE; int i; - mailbox = kmalloc(sizeof (struct mthca_eq_context) + MTHCA_CMD_MAILBOX_EXTRA, - GFP_KERNEL); - if (!mailbox) + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) return; - err = mthca_HW2SW_EQ(dev, MAILBOX_ALIGN(mailbox), - eq->eqn, &status); + err = mthca_HW2SW_EQ(dev, mailbox, eq->eqn, &status); if (err) mthca_warn(dev, "HW2SW_EQ failed (%d)\n", err); if (status) - mthca_warn(dev, "HW2SW_EQ returned status 0x%02x\n", - status); + mthca_warn(dev, "HW2SW_EQ returned status 0x%02x\n", status); dev->eq_table.arm_mask &= ~eq->eqn_mask; @@ -625,7 +624,7 @@ static void mthca_free_eq(struct mthca_dev *dev, for (i = 0; i < sizeof (struct mthca_eq_context) / 4; ++i) { if (i % 4 == 0) printk("[%02x] ", i * 4); - printk(" %08x", be32_to_cpup(MAILBOX_ALIGN(mailbox) + i * 4)); + printk(" %08x", be32_to_cpup(mailbox->buf + i * 4)); if ((i + 1) % 4 == 0) printk("\n"); } @@ -638,7 +637,7 @@ static void mthca_free_eq(struct mthca_dev *dev, pci_unmap_addr(&eq->page_list[i], mapping)); kfree(eq->page_list); - kfree(mailbox); + mthca_free_mailbox(dev, mailbox); } static void mthca_free_irqs(struct mthca_dev *dev) @@ -709,8 +708,7 @@ static int __devinit mthca_map_eq_regs(struct mthca_dev *dev) if (mthca_map_reg(dev, ((pci_resource_len(dev->pdev, 0) - 1) & dev->fw.arbel.eq_arm_base) + 4, 4, &dev->eq_regs.arbel.eq_arm)) { - mthca_err(dev, "Couldn't map interrupt clear register, " - "aborting.\n"); + mthca_err(dev, "Couldn't map EQ arm register, aborting.\n"); mthca_unmap_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) & dev->fw.arbel.clr_int_base, MTHCA_CLR_INT_SIZE, dev->clr_base); @@ -721,8 +719,7 @@ static int __devinit mthca_map_eq_regs(struct mthca_dev *dev) dev->fw.arbel.eq_set_ci_base, MTHCA_EQ_SET_CI_SIZE, &dev->eq_regs.arbel.eq_set_ci_base)) { - mthca_err(dev, "Couldn't map interrupt clear register, " - "aborting.\n"); + mthca_err(dev, "Couldn't map EQ CI register, aborting.\n"); mthca_unmap_reg(dev, ((pci_resource_len(dev->pdev, 0) - 1) & dev->fw.arbel.eq_arm_base) + 4, 4, dev->eq_regs.arbel.eq_arm); diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c index 7df223642015..9804174f7f3c 100644 --- a/drivers/infiniband/hw/mthca/mthca_mad.c +++ b/drivers/infiniband/hw/mthca/mthca_mad.c @@ -1,5 +1,7 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2004 Voltaire, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -32,9 +34,9 @@ * $Id: mthca_mad.c 1349 2004-12-16 21:09:43Z roland $ */ -#include <ib_verbs.h> -#include <ib_mad.h> -#include <ib_smi.h> +#include <rdma/ib_verbs.h> +#include <rdma/ib_mad.h> +#include <rdma/ib_smi.h> #include "mthca_dev.h" #include "mthca_cmd.h" @@ -192,7 +194,7 @@ int mthca_process_mad(struct ib_device *ibdev, { int err; u8 status; - u16 slid = in_wc ? in_wc->slid : IB_LID_PERMISSIVE; + u16 slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE); /* Forward locally generated traps to the SM */ if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c index d40590356df8..3241d6c9dc11 100644 --- a/drivers/infiniband/hw/mthca/mthca_main.c +++ b/drivers/infiniband/hw/mthca/mthca_main.c @@ -1,5 +1,7 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -33,7 +35,6 @@ */ #include <linux/config.h> -#include <linux/version.h> #include <linux/module.h> #include <linux/init.h> #include <linux/errno.h> @@ -69,7 +70,7 @@ MODULE_PARM_DESC(msi, "attempt to use MSI if nonzero"); #endif /* CONFIG_PCI_MSI */ static const char mthca_version[] __devinitdata = - "ib_mthca: Mellanox InfiniBand HCA driver v" + DRV_NAME ": Mellanox InfiniBand HCA driver v" DRV_VERSION " (" DRV_RELDATE ")\n"; static struct mthca_profile default_profile = { @@ -170,6 +171,7 @@ static int __devinit mthca_dev_lim(struct mthca_dev *mdev, struct mthca_dev_lim mdev->limits.reserved_mrws = dev_lim->reserved_mrws; mdev->limits.reserved_uars = dev_lim->reserved_uars; mdev->limits.reserved_pds = dev_lim->reserved_pds; + mdev->limits.port_width_cap = dev_lim->max_port_width; /* IB_DEVICE_RESIZE_MAX_WR not supported by driver. May be doable since hardware supports it for SRQ. @@ -211,7 +213,6 @@ static int __devinit mthca_init_tavor(struct mthca_dev *mdev) struct mthca_dev_lim dev_lim; struct mthca_profile profile; struct mthca_init_hca_param init_hca; - struct mthca_adapter adapter; err = mthca_SYS_EN(mdev, &status); if (err) { @@ -252,6 +253,8 @@ static int __devinit mthca_init_tavor(struct mthca_dev *mdev) profile = default_profile; profile.num_uar = dev_lim.uar_size / PAGE_SIZE; profile.uarc_size = 0; + if (mdev->mthca_flags & MTHCA_FLAG_SRQ) + profile.num_srq = dev_lim.max_srqs; err = mthca_make_profile(mdev, &profile, &dev_lim, &init_hca); if (err < 0) @@ -269,26 +272,8 @@ static int __devinit mthca_init_tavor(struct mthca_dev *mdev) goto err_disable; } - err = mthca_QUERY_ADAPTER(mdev, &adapter, &status); - if (err) { - mthca_err(mdev, "QUERY_ADAPTER command failed, aborting.\n"); - goto err_close; - } - if (status) { - mthca_err(mdev, "QUERY_ADAPTER returned status 0x%02x, " - "aborting.\n", status); - err = -EINVAL; - goto err_close; - } - - mdev->eq_table.inta_pin = adapter.inta_pin; - mdev->rev_id = adapter.revision_id; - return 0; -err_close: - mthca_CLOSE_HCA(mdev, 0, &status); - err_disable: mthca_SYS_DIS(mdev, &status); @@ -441,15 +426,29 @@ static int __devinit mthca_init_icm(struct mthca_dev *mdev, } mdev->cq_table.table = mthca_alloc_icm_table(mdev, init_hca->cqc_base, - dev_lim->cqc_entry_sz, - mdev->limits.num_cqs, - mdev->limits.reserved_cqs, 0); + dev_lim->cqc_entry_sz, + mdev->limits.num_cqs, + mdev->limits.reserved_cqs, 0); if (!mdev->cq_table.table) { mthca_err(mdev, "Failed to map CQ context memory, aborting.\n"); err = -ENOMEM; goto err_unmap_rdb; } + if (mdev->mthca_flags & MTHCA_FLAG_SRQ) { + mdev->srq_table.table = + mthca_alloc_icm_table(mdev, init_hca->srqc_base, + dev_lim->srq_entry_sz, + mdev->limits.num_srqs, + mdev->limits.reserved_srqs, 0); + if (!mdev->srq_table.table) { + mthca_err(mdev, "Failed to map SRQ context memory, " + "aborting.\n"); + err = -ENOMEM; + goto err_unmap_cq; + } + } + /* * It's not strictly required, but for simplicity just map the * whole multicast group table now. The table isn't very big @@ -465,11 +464,15 @@ static int __devinit mthca_init_icm(struct mthca_dev *mdev, if (!mdev->mcg_table.table) { mthca_err(mdev, "Failed to map MCG context memory, aborting.\n"); err = -ENOMEM; - goto err_unmap_cq; + goto err_unmap_srq; } return 0; +err_unmap_srq: + if (mdev->mthca_flags & MTHCA_FLAG_SRQ) + mthca_free_icm_table(mdev, mdev->srq_table.table); + err_unmap_cq: mthca_free_icm_table(mdev, mdev->cq_table.table); @@ -505,7 +508,6 @@ static int __devinit mthca_init_arbel(struct mthca_dev *mdev) struct mthca_dev_lim dev_lim; struct mthca_profile profile; struct mthca_init_hca_param init_hca; - struct mthca_adapter adapter; u64 icm_size; u8 status; int err; @@ -550,6 +552,8 @@ static int __devinit mthca_init_arbel(struct mthca_dev *mdev) profile = default_profile; profile.num_uar = dev_lim.uar_size / PAGE_SIZE; profile.num_udav = 0; + if (mdev->mthca_flags & MTHCA_FLAG_SRQ) + profile.num_srq = dev_lim.max_srqs; icm_size = mthca_make_profile(mdev, &profile, &dev_lim, &init_hca); if ((int) icm_size < 0) { @@ -573,24 +577,11 @@ static int __devinit mthca_init_arbel(struct mthca_dev *mdev) goto err_free_icm; } - err = mthca_QUERY_ADAPTER(mdev, &adapter, &status); - if (err) { - mthca_err(mdev, "QUERY_ADAPTER command failed, aborting.\n"); - goto err_free_icm; - } - if (status) { - mthca_err(mdev, "QUERY_ADAPTER returned status 0x%02x, " - "aborting.\n", status); - err = -EINVAL; - goto err_free_icm; - } - - mdev->eq_table.inta_pin = adapter.inta_pin; - mdev->rev_id = adapter.revision_id; - return 0; err_free_icm: + if (mdev->mthca_flags & MTHCA_FLAG_SRQ) + mthca_free_icm_table(mdev, mdev->srq_table.table); mthca_free_icm_table(mdev, mdev->cq_table.table); mthca_free_icm_table(mdev, mdev->qp_table.rdb_table); mthca_free_icm_table(mdev, mdev->qp_table.eqp_table); @@ -613,12 +604,70 @@ err_disable: return err; } +static void mthca_close_hca(struct mthca_dev *mdev) +{ + u8 status; + + mthca_CLOSE_HCA(mdev, 0, &status); + + if (mthca_is_memfree(mdev)) { + if (mdev->mthca_flags & MTHCA_FLAG_SRQ) + mthca_free_icm_table(mdev, mdev->srq_table.table); + mthca_free_icm_table(mdev, mdev->cq_table.table); + mthca_free_icm_table(mdev, mdev->qp_table.rdb_table); + mthca_free_icm_table(mdev, mdev->qp_table.eqp_table); + mthca_free_icm_table(mdev, mdev->qp_table.qp_table); + mthca_free_icm_table(mdev, mdev->mr_table.mpt_table); + mthca_free_icm_table(mdev, mdev->mr_table.mtt_table); + mthca_unmap_eq_icm(mdev); + + mthca_UNMAP_ICM_AUX(mdev, &status); + mthca_free_icm(mdev, mdev->fw.arbel.aux_icm); + + mthca_UNMAP_FA(mdev, &status); + mthca_free_icm(mdev, mdev->fw.arbel.fw_icm); + + if (!(mdev->mthca_flags & MTHCA_FLAG_NO_LAM)) + mthca_DISABLE_LAM(mdev, &status); + } else + mthca_SYS_DIS(mdev, &status); +} + static int __devinit mthca_init_hca(struct mthca_dev *mdev) { + u8 status; + int err; + struct mthca_adapter adapter; + if (mthca_is_memfree(mdev)) - return mthca_init_arbel(mdev); + err = mthca_init_arbel(mdev); else - return mthca_init_tavor(mdev); + err = mthca_init_tavor(mdev); + + if (err) + return err; + + err = mthca_QUERY_ADAPTER(mdev, &adapter, &status); + if (err) { + mthca_err(mdev, "QUERY_ADAPTER command failed, aborting.\n"); + goto err_close; + } + if (status) { + mthca_err(mdev, "QUERY_ADAPTER returned status 0x%02x, " + "aborting.\n", status); + err = -EINVAL; + goto err_close; + } + + mdev->eq_table.inta_pin = adapter.inta_pin; + mdev->rev_id = adapter.revision_id; + memcpy(mdev->board_id, adapter.board_id, sizeof mdev->board_id); + + return 0; + +err_close: + mthca_close_hca(mdev); + return err; } static int __devinit mthca_setup_hca(struct mthca_dev *dev) @@ -664,7 +713,7 @@ static int __devinit mthca_setup_hca(struct mthca_dev *dev) goto err_pd_table_free; } - err = mthca_pd_alloc(dev, &dev->driver_pd); + err = mthca_pd_alloc(dev, 1, &dev->driver_pd); if (err) { mthca_err(dev, "Failed to create driver PD, " "aborting.\n"); @@ -708,11 +757,18 @@ static int __devinit mthca_setup_hca(struct mthca_dev *dev) goto err_cmd_poll; } + err = mthca_init_srq_table(dev); + if (err) { + mthca_err(dev, "Failed to initialize " + "shared receive queue table, aborting.\n"); + goto err_cq_table_free; + } + err = mthca_init_qp_table(dev); if (err) { mthca_err(dev, "Failed to initialize " "queue pair table, aborting.\n"); - goto err_cq_table_free; + goto err_srq_table_free; } err = mthca_init_av_table(dev); @@ -737,6 +793,9 @@ err_av_table_free: err_qp_table_free: mthca_cleanup_qp_table(dev); +err_srq_table_free: + mthca_cleanup_srq_table(dev); + err_cq_table_free: mthca_cleanup_cq_table(dev); @@ -843,33 +902,6 @@ static int __devinit mthca_enable_msi_x(struct mthca_dev *mdev) return 0; } -static void mthca_close_hca(struct mthca_dev *mdev) -{ - u8 status; - - mthca_CLOSE_HCA(mdev, 0, &status); - - if (mthca_is_memfree(mdev)) { - mthca_free_icm_table(mdev, mdev->cq_table.table); - mthca_free_icm_table(mdev, mdev->qp_table.rdb_table); - mthca_free_icm_table(mdev, mdev->qp_table.eqp_table); - mthca_free_icm_table(mdev, mdev->qp_table.qp_table); - mthca_free_icm_table(mdev, mdev->mr_table.mpt_table); - mthca_free_icm_table(mdev, mdev->mr_table.mtt_table); - mthca_unmap_eq_icm(mdev); - - mthca_UNMAP_ICM_AUX(mdev, &status); - mthca_free_icm(mdev, mdev->fw.arbel.aux_icm); - - mthca_UNMAP_FA(mdev, &status); - mthca_free_icm(mdev, mdev->fw.arbel.fw_icm); - - if (!(mdev->mthca_flags & MTHCA_FLAG_NO_LAM)) - mthca_DISABLE_LAM(mdev, &status); - } else - mthca_SYS_DIS(mdev, &status); -} - /* Types of supported HCA */ enum { TAVOR, /* MT23108 */ @@ -886,9 +918,9 @@ static struct { int is_memfree; int is_pcie; } mthca_hca_table[] = { - [TAVOR] = { .latest_fw = MTHCA_FW_VER(3, 3, 2), .is_memfree = 0, .is_pcie = 0 }, - [ARBEL_COMPAT] = { .latest_fw = MTHCA_FW_VER(4, 6, 2), .is_memfree = 0, .is_pcie = 1 }, - [ARBEL_NATIVE] = { .latest_fw = MTHCA_FW_VER(5, 0, 1), .is_memfree = 1, .is_pcie = 1 }, + [TAVOR] = { .latest_fw = MTHCA_FW_VER(3, 3, 3), .is_memfree = 0, .is_pcie = 0 }, + [ARBEL_COMPAT] = { .latest_fw = MTHCA_FW_VER(4, 7, 0), .is_memfree = 0, .is_pcie = 1 }, + [ARBEL_NATIVE] = { .latest_fw = MTHCA_FW_VER(5, 1, 0), .is_memfree = 1, .is_pcie = 1 }, [SINAI] = { .latest_fw = MTHCA_FW_VER(1, 0, 1), .is_memfree = 1, .is_pcie = 1 } }; @@ -927,13 +959,13 @@ static int __devinit mthca_init_one(struct pci_dev *pdev, */ if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM) || pci_resource_len(pdev, 0) != 1 << 20) { - dev_err(&pdev->dev, "Missing DCS, aborting."); + dev_err(&pdev->dev, "Missing DCS, aborting.\n"); err = -ENODEV; goto err_disable_pdev; } if (!(pci_resource_flags(pdev, 2) & IORESOURCE_MEM) || pci_resource_len(pdev, 2) != 1 << 23) { - dev_err(&pdev->dev, "Missing UAR, aborting."); + dev_err(&pdev->dev, "Missing UAR, aborting.\n"); err = -ENODEV; goto err_disable_pdev; } @@ -1004,25 +1036,18 @@ static int __devinit mthca_init_one(struct pci_dev *pdev, !pci_enable_msi(pdev)) mdev->mthca_flags |= MTHCA_FLAG_MSI; - sema_init(&mdev->cmd.hcr_sem, 1); - sema_init(&mdev->cmd.poll_sem, 1); - mdev->cmd.use_events = 0; - - mdev->hcr = ioremap(pci_resource_start(pdev, 0) + MTHCA_HCR_BASE, MTHCA_HCR_SIZE); - if (!mdev->hcr) { - mthca_err(mdev, "Couldn't map command register, " - "aborting.\n"); - err = -ENOMEM; + if (mthca_cmd_init(mdev)) { + mthca_err(mdev, "Failed to init command interface, aborting.\n"); goto err_free_dev; } err = mthca_tune_pci(mdev); if (err) - goto err_iounmap; + goto err_cmd; err = mthca_init_hca(mdev); if (err) - goto err_iounmap; + goto err_cmd; if (mdev->fw_ver < mthca_hca_table[id->driver_data].latest_fw) { mthca_warn(mdev, "HCA FW version %x.%x.%x is old (%x.%x.%x is current).\n", @@ -1057,6 +1082,7 @@ err_cleanup: mthca_cleanup_mcg_table(mdev); mthca_cleanup_av_table(mdev); mthca_cleanup_qp_table(mdev); + mthca_cleanup_srq_table(mdev); mthca_cleanup_cq_table(mdev); mthca_cmd_use_polling(mdev); mthca_cleanup_eq_table(mdev); @@ -1070,8 +1096,8 @@ err_cleanup: err_close: mthca_close_hca(mdev); -err_iounmap: - iounmap(mdev->hcr); +err_cmd: + mthca_cmd_cleanup(mdev); err_free_dev: if (mdev->mthca_flags & MTHCA_FLAG_MSI_X) @@ -1106,6 +1132,7 @@ static void __devexit mthca_remove_one(struct pci_dev *pdev) mthca_cleanup_mcg_table(mdev); mthca_cleanup_av_table(mdev); mthca_cleanup_qp_table(mdev); + mthca_cleanup_srq_table(mdev); mthca_cleanup_cq_table(mdev); mthca_cmd_use_polling(mdev); mthca_cleanup_eq_table(mdev); @@ -1118,10 +1145,8 @@ static void __devexit mthca_remove_one(struct pci_dev *pdev) iounmap(mdev->kar); mthca_uar_free(mdev, &mdev->driver_uar); mthca_cleanup_uar_table(mdev); - mthca_close_hca(mdev); - - iounmap(mdev->hcr); + mthca_cmd_cleanup(mdev); if (mdev->mthca_flags & MTHCA_FLAG_MSI_X) pci_disable_msix(pdev); @@ -1163,7 +1188,7 @@ static struct pci_device_id mthca_pci_table[] = { MODULE_DEVICE_TABLE(pci, mthca_pci_table); static struct pci_driver mthca_driver = { - .name = "ib_mthca", + .name = DRV_NAME, .id_table = mthca_pci_table, .probe = mthca_init_one, .remove = __devexit_p(mthca_remove_one) diff --git a/drivers/infiniband/hw/mthca/mthca_mcg.c b/drivers/infiniband/hw/mthca/mthca_mcg.c index 70a6553a588e..a2707605f4c8 100644 --- a/drivers/infiniband/hw/mthca/mthca_mcg.c +++ b/drivers/infiniband/hw/mthca/mthca_mcg.c @@ -42,10 +42,10 @@ enum { }; struct mthca_mgm { - u32 next_gid_index; - u32 reserved[3]; - u8 gid[16]; - u32 qp[MTHCA_QP_PER_MGM]; + __be32 next_gid_index; + u32 reserved[3]; + u8 gid[16]; + __be32 qp[MTHCA_QP_PER_MGM]; }; static const u8 zero_gid[16]; /* automatically initialized to 0 */ @@ -66,22 +66,23 @@ static const u8 zero_gid[16]; /* automatically initialized to 0 */ * entry in hash chain and *mgm holds end of hash chain. */ static int find_mgm(struct mthca_dev *dev, - u8 *gid, struct mthca_mgm *mgm, + u8 *gid, struct mthca_mailbox *mgm_mailbox, u16 *hash, int *prev, int *index) { - void *mailbox; + struct mthca_mailbox *mailbox; + struct mthca_mgm *mgm = mgm_mailbox->buf; u8 *mgid; int err; u8 status; - mailbox = kmalloc(16 + MTHCA_CMD_MAILBOX_EXTRA, GFP_KERNEL); - if (!mailbox) + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) return -ENOMEM; - mgid = MAILBOX_ALIGN(mailbox); + mgid = mailbox->buf; memcpy(mgid, gid, 16); - err = mthca_MGID_HASH(dev, mgid, hash, &status); + err = mthca_MGID_HASH(dev, mailbox, hash, &status); if (err) goto out; if (status) { @@ -93,17 +94,21 @@ static int find_mgm(struct mthca_dev *dev, if (0) mthca_dbg(dev, "Hash for %04x:%04x:%04x:%04x:" "%04x:%04x:%04x:%04x is %04x\n", - be16_to_cpu(((u16 *) gid)[0]), be16_to_cpu(((u16 *) gid)[1]), - be16_to_cpu(((u16 *) gid)[2]), be16_to_cpu(((u16 *) gid)[3]), - be16_to_cpu(((u16 *) gid)[4]), be16_to_cpu(((u16 *) gid)[5]), - be16_to_cpu(((u16 *) gid)[6]), be16_to_cpu(((u16 *) gid)[7]), + be16_to_cpu(((__be16 *) gid)[0]), + be16_to_cpu(((__be16 *) gid)[1]), + be16_to_cpu(((__be16 *) gid)[2]), + be16_to_cpu(((__be16 *) gid)[3]), + be16_to_cpu(((__be16 *) gid)[4]), + be16_to_cpu(((__be16 *) gid)[5]), + be16_to_cpu(((__be16 *) gid)[6]), + be16_to_cpu(((__be16 *) gid)[7]), *hash); *index = *hash; *prev = -1; do { - err = mthca_READ_MGM(dev, *index, mgm, &status); + err = mthca_READ_MGM(dev, *index, mgm_mailbox, &status); if (err) goto out; if (status) { @@ -129,14 +134,14 @@ static int find_mgm(struct mthca_dev *dev, *index = -1; out: - kfree(mailbox); + mthca_free_mailbox(dev, mailbox); return err; } int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) { struct mthca_dev *dev = to_mdev(ibqp->device); - void *mailbox; + struct mthca_mailbox *mailbox; struct mthca_mgm *mgm; u16 hash; int index, prev; @@ -145,15 +150,15 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) int err; u8 status; - mailbox = kmalloc(sizeof *mgm + MTHCA_CMD_MAILBOX_EXTRA, GFP_KERNEL); - if (!mailbox) - return -ENOMEM; - mgm = MAILBOX_ALIGN(mailbox); + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + mgm = mailbox->buf; if (down_interruptible(&dev->mcg_table.sem)) return -EINTR; - err = find_mgm(dev, gid->raw, mgm, &hash, &prev, &index); + err = find_mgm(dev, gid->raw, mailbox, &hash, &prev, &index); if (err) goto out; @@ -170,7 +175,7 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) goto out; } - err = mthca_READ_MGM(dev, index, mgm, &status); + err = mthca_READ_MGM(dev, index, mailbox, &status); if (err) goto out; if (status) { @@ -195,7 +200,7 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) goto out; } - err = mthca_WRITE_MGM(dev, index, mgm, &status); + err = mthca_WRITE_MGM(dev, index, mailbox, &status); if (err) goto out; if (status) { @@ -206,7 +211,7 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) if (!link) goto out; - err = mthca_READ_MGM(dev, prev, mgm, &status); + err = mthca_READ_MGM(dev, prev, mailbox, &status); if (err) goto out; if (status) { @@ -217,7 +222,7 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) mgm->next_gid_index = cpu_to_be32(index << 5); - err = mthca_WRITE_MGM(dev, prev, mgm, &status); + err = mthca_WRITE_MGM(dev, prev, mailbox, &status); if (err) goto out; if (status) { @@ -227,14 +232,14 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) out: up(&dev->mcg_table.sem); - kfree(mailbox); + mthca_free_mailbox(dev, mailbox); return err; } int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) { struct mthca_dev *dev = to_mdev(ibqp->device); - void *mailbox; + struct mthca_mailbox *mailbox; struct mthca_mgm *mgm; u16 hash; int prev, index; @@ -242,29 +247,29 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) int err; u8 status; - mailbox = kmalloc(sizeof *mgm + MTHCA_CMD_MAILBOX_EXTRA, GFP_KERNEL); - if (!mailbox) - return -ENOMEM; - mgm = MAILBOX_ALIGN(mailbox); + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + mgm = mailbox->buf; if (down_interruptible(&dev->mcg_table.sem)) return -EINTR; - err = find_mgm(dev, gid->raw, mgm, &hash, &prev, &index); + err = find_mgm(dev, gid->raw, mailbox, &hash, &prev, &index); if (err) goto out; if (index == -1) { mthca_err(dev, "MGID %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x " "not found\n", - be16_to_cpu(((u16 *) gid->raw)[0]), - be16_to_cpu(((u16 *) gid->raw)[1]), - be16_to_cpu(((u16 *) gid->raw)[2]), - be16_to_cpu(((u16 *) gid->raw)[3]), - be16_to_cpu(((u16 *) gid->raw)[4]), - be16_to_cpu(((u16 *) gid->raw)[5]), - be16_to_cpu(((u16 *) gid->raw)[6]), - be16_to_cpu(((u16 *) gid->raw)[7])); + be16_to_cpu(((__be16 *) gid->raw)[0]), + be16_to_cpu(((__be16 *) gid->raw)[1]), + be16_to_cpu(((__be16 *) gid->raw)[2]), + be16_to_cpu(((__be16 *) gid->raw)[3]), + be16_to_cpu(((__be16 *) gid->raw)[4]), + be16_to_cpu(((__be16 *) gid->raw)[5]), + be16_to_cpu(((__be16 *) gid->raw)[6]), + be16_to_cpu(((__be16 *) gid->raw)[7])); err = -EINVAL; goto out; } @@ -285,7 +290,7 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) mgm->qp[loc] = mgm->qp[i - 1]; mgm->qp[i - 1] = 0; - err = mthca_WRITE_MGM(dev, index, mgm, &status); + err = mthca_WRITE_MGM(dev, index, mailbox, &status); if (err) goto out; if (status) { @@ -304,7 +309,7 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) if (be32_to_cpu(mgm->next_gid_index) >> 5) { err = mthca_READ_MGM(dev, be32_to_cpu(mgm->next_gid_index) >> 5, - mgm, &status); + mailbox, &status); if (err) goto out; if (status) { @@ -316,7 +321,7 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) } else memset(mgm->gid, 0, 16); - err = mthca_WRITE_MGM(dev, index, mgm, &status); + err = mthca_WRITE_MGM(dev, index, mailbox, &status); if (err) goto out; if (status) { @@ -327,7 +332,7 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) } else { /* Remove entry from AMGM */ index = be32_to_cpu(mgm->next_gid_index) >> 5; - err = mthca_READ_MGM(dev, prev, mgm, &status); + err = mthca_READ_MGM(dev, prev, mailbox, &status); if (err) goto out; if (status) { @@ -338,7 +343,7 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) mgm->next_gid_index = cpu_to_be32(index << 5); - err = mthca_WRITE_MGM(dev, prev, mgm, &status); + err = mthca_WRITE_MGM(dev, prev, mailbox, &status); if (err) goto out; if (status) { @@ -350,7 +355,7 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) out: up(&dev->mcg_table.sem); - kfree(mailbox); + mthca_free_mailbox(dev, mailbox); return err; } diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c index 637b30e35592..1827400f189b 100644 --- a/drivers/infiniband/hw/mthca/mthca_memfree.c +++ b/drivers/infiniband/hw/mthca/mthca_memfree.c @@ -1,5 +1,7 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -47,6 +49,15 @@ enum { MTHCA_TABLE_CHUNK_SIZE = 1 << 18 }; +struct mthca_user_db_table { + struct semaphore mutex; + struct { + u64 uvirt; + struct scatterlist mem; + int refcount; + } page[0]; +}; + void mthca_free_icm(struct mthca_dev *dev, struct mthca_icm *icm) { struct mthca_icm_chunk *chunk, *tmp; @@ -179,9 +190,14 @@ out: void mthca_table_put(struct mthca_dev *dev, struct mthca_icm_table *table, int obj) { - int i = (obj & (table->num_obj - 1)) * table->obj_size / MTHCA_TABLE_CHUNK_SIZE; + int i; u8 status; + if (!mthca_is_memfree(dev)) + return; + + i = (obj & (table->num_obj - 1)) * table->obj_size / MTHCA_TABLE_CHUNK_SIZE; + down(&table->mutex); if (--table->icm[i]->refcount == 0) { @@ -256,6 +272,9 @@ void mthca_table_put_range(struct mthca_dev *dev, struct mthca_icm_table *table, { int i; + if (!mthca_is_memfree(dev)) + return; + for (i = start; i <= end; i += MTHCA_TABLE_CHUNK_SIZE / table->obj_size) mthca_table_put(dev, table, i); } @@ -267,6 +286,7 @@ struct mthca_icm_table *mthca_alloc_icm_table(struct mthca_dev *dev, { struct mthca_icm_table *table; int num_icm; + unsigned chunk_size; int i; u8 status; @@ -287,7 +307,11 @@ struct mthca_icm_table *mthca_alloc_icm_table(struct mthca_dev *dev, table->icm[i] = NULL; for (i = 0; i * MTHCA_TABLE_CHUNK_SIZE < reserved * obj_size; ++i) { - table->icm[i] = mthca_alloc_icm(dev, MTHCA_TABLE_CHUNK_SIZE >> PAGE_SHIFT, + chunk_size = MTHCA_TABLE_CHUNK_SIZE; + if ((i + 1) * MTHCA_TABLE_CHUNK_SIZE > nobj * obj_size) + chunk_size = nobj * obj_size - i * MTHCA_TABLE_CHUNK_SIZE; + + table->icm[i] = mthca_alloc_icm(dev, chunk_size >> PAGE_SHIFT, (use_lowmem ? GFP_KERNEL : GFP_HIGHUSER) | __GFP_NOWARN); if (!table->icm[i]) @@ -336,14 +360,134 @@ void mthca_free_icm_table(struct mthca_dev *dev, struct mthca_icm_table *table) kfree(table); } -static u64 mthca_uarc_virt(struct mthca_dev *dev, int page) +static u64 mthca_uarc_virt(struct mthca_dev *dev, struct mthca_uar *uar, int page) { return dev->uar_table.uarc_base + - dev->driver_uar.index * dev->uar_table.uarc_size + + uar->index * dev->uar_table.uarc_size + page * 4096; } -int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, u32 **db) +int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar, + struct mthca_user_db_table *db_tab, int index, u64 uaddr) +{ + int ret = 0; + u8 status; + int i; + + if (!mthca_is_memfree(dev)) + return 0; + + if (index < 0 || index > dev->uar_table.uarc_size / 8) + return -EINVAL; + + down(&db_tab->mutex); + + i = index / MTHCA_DB_REC_PER_PAGE; + + if ((db_tab->page[i].refcount >= MTHCA_DB_REC_PER_PAGE) || + (db_tab->page[i].uvirt && db_tab->page[i].uvirt != uaddr) || + (uaddr & 4095)) { + ret = -EINVAL; + goto out; + } + + if (db_tab->page[i].refcount) { + ++db_tab->page[i].refcount; + goto out; + } + + ret = get_user_pages(current, current->mm, uaddr & PAGE_MASK, 1, 1, 0, + &db_tab->page[i].mem.page, NULL); + if (ret < 0) + goto out; + + db_tab->page[i].mem.length = 4096; + db_tab->page[i].mem.offset = uaddr & ~PAGE_MASK; + + ret = pci_map_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE); + if (ret < 0) { + put_page(db_tab->page[i].mem.page); + goto out; + } + + ret = mthca_MAP_ICM_page(dev, sg_dma_address(&db_tab->page[i].mem), + mthca_uarc_virt(dev, uar, i), &status); + if (!ret && status) + ret = -EINVAL; + if (ret) { + pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE); + put_page(db_tab->page[i].mem.page); + goto out; + } + + db_tab->page[i].uvirt = uaddr; + db_tab->page[i].refcount = 1; + +out: + up(&db_tab->mutex); + return ret; +} + +void mthca_unmap_user_db(struct mthca_dev *dev, struct mthca_uar *uar, + struct mthca_user_db_table *db_tab, int index) +{ + if (!mthca_is_memfree(dev)) + return; + + /* + * To make our bookkeeping simpler, we don't unmap DB + * pages until we clean up the whole db table. + */ + + down(&db_tab->mutex); + + --db_tab->page[index / MTHCA_DB_REC_PER_PAGE].refcount; + + up(&db_tab->mutex); +} + +struct mthca_user_db_table *mthca_init_user_db_tab(struct mthca_dev *dev) +{ + struct mthca_user_db_table *db_tab; + int npages; + int i; + + if (!mthca_is_memfree(dev)) + return NULL; + + npages = dev->uar_table.uarc_size / 4096; + db_tab = kmalloc(sizeof *db_tab + npages * sizeof *db_tab->page, GFP_KERNEL); + if (!db_tab) + return ERR_PTR(-ENOMEM); + + init_MUTEX(&db_tab->mutex); + for (i = 0; i < npages; ++i) { + db_tab->page[i].refcount = 0; + db_tab->page[i].uvirt = 0; + } + + return db_tab; +} + +void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar, + struct mthca_user_db_table *db_tab) +{ + int i; + u8 status; + + if (!mthca_is_memfree(dev)) + return; + + for (i = 0; i < dev->uar_table.uarc_size / 4096; ++i) { + if (db_tab->page[i].uvirt) { + mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, uar, i), 1, &status); + pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE); + put_page(db_tab->page[i].mem.page); + } + } +} + +int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, __be32 **db) { int group; int start, end, dir; @@ -399,7 +543,8 @@ int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, u32 **db) } memset(page->db_rec, 0, 4096); - ret = mthca_MAP_ICM_page(dev, page->mapping, mthca_uarc_virt(dev, i), &status); + ret = mthca_MAP_ICM_page(dev, page->mapping, + mthca_uarc_virt(dev, &dev->driver_uar, i), &status); if (!ret && status) ret = -EINVAL; if (ret) { @@ -425,7 +570,7 @@ found: page->db_rec[j] = cpu_to_be64((qn << 8) | (type << 5)); - *db = (u32 *) &page->db_rec[j]; + *db = (__be32 *) &page->db_rec[j]; out: up(&dev->db_tab->mutex); @@ -453,7 +598,7 @@ void mthca_free_db(struct mthca_dev *dev, int type, int db_index) if (bitmap_empty(page->used, MTHCA_DB_REC_PER_PAGE) && i >= dev->db_tab->max_group1 - 1) { - mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, i), 1, &status); + mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, &dev->driver_uar, i), 1, &status); dma_free_coherent(&dev->pdev->dev, 4096, page->db_rec, page->mapping); @@ -522,7 +667,7 @@ void mthca_cleanup_db_tab(struct mthca_dev *dev) if (!bitmap_empty(dev->db_tab->page[i].used, MTHCA_DB_REC_PER_PAGE)) mthca_warn(dev, "Kernel UARC page %d not empty\n", i); - mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, i), 1, &status); + mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, &dev->driver_uar, i), 1, &status); dma_free_coherent(&dev->pdev->dev, 4096, dev->db_tab->page[i].db_rec, diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.h b/drivers/infiniband/hw/mthca/mthca_memfree.h index fe7be2a6bc4a..bafa51544aa3 100644 --- a/drivers/infiniband/hw/mthca/mthca_memfree.h +++ b/drivers/infiniband/hw/mthca/mthca_memfree.h @@ -1,5 +1,7 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -136,7 +138,7 @@ enum { struct mthca_db_page { DECLARE_BITMAP(used, MTHCA_DB_REC_PER_PAGE); - u64 *db_rec; + __be64 *db_rec; dma_addr_t mapping; }; @@ -148,7 +150,7 @@ struct mthca_db_table { struct semaphore mutex; }; -enum { +enum mthca_db_type { MTHCA_DB_TYPE_INVALID = 0x0, MTHCA_DB_TYPE_CQ_SET_CI = 0x1, MTHCA_DB_TYPE_CQ_ARM = 0x2, @@ -158,9 +160,20 @@ enum { MTHCA_DB_TYPE_GROUP_SEP = 0x7 }; +struct mthca_user_db_table; +struct mthca_uar; + +int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar, + struct mthca_user_db_table *db_tab, int index, u64 uaddr); +void mthca_unmap_user_db(struct mthca_dev *dev, struct mthca_uar *uar, + struct mthca_user_db_table *db_tab, int index); +struct mthca_user_db_table *mthca_init_user_db_tab(struct mthca_dev *dev); +void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar, + struct mthca_user_db_table *db_tab); + int mthca_init_db_tab(struct mthca_dev *dev); void mthca_cleanup_db_tab(struct mthca_dev *dev); -int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, u32 **db); +int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, __be32 **db); void mthca_free_db(struct mthca_dev *dev, int type, int db_index); #endif /* MTHCA_MEMFREE_H */ diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c index 8960fc2306be..1f97a44477f5 100644 --- a/drivers/infiniband/hw/mthca/mthca_mr.c +++ b/drivers/infiniband/hw/mthca/mthca_mr.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -40,22 +41,28 @@ #include "mthca_cmd.h" #include "mthca_memfree.h" +struct mthca_mtt { + struct mthca_buddy *buddy; + int order; + u32 first_seg; +}; + /* * Must be packed because mtt_seg is 64 bits but only aligned to 32 bits. */ struct mthca_mpt_entry { - u32 flags; - u32 page_size; - u32 key; - u32 pd; - u64 start; - u64 length; - u32 lkey; - u32 window_count; - u32 window_count_limit; - u64 mtt_seg; - u32 mtt_sz; /* Arbel only */ - u32 reserved[2]; + __be32 flags; + __be32 page_size; + __be32 key; + __be32 pd; + __be64 start; + __be64 length; + __be32 lkey; + __be32 window_count; + __be32 window_count_limit; + __be64 mtt_seg; + __be32 mtt_sz; /* Arbel only */ + u32 reserved[2]; } __attribute__((packed)); #define MTHCA_MPT_FLAG_SW_OWNS (0xfUL << 28) @@ -173,8 +180,8 @@ static void __devexit mthca_buddy_cleanup(struct mthca_buddy *buddy) kfree(buddy->bits); } -static u32 mthca_alloc_mtt(struct mthca_dev *dev, int order, - struct mthca_buddy *buddy) +static u32 mthca_alloc_mtt_range(struct mthca_dev *dev, int order, + struct mthca_buddy *buddy) { u32 seg = mthca_buddy_alloc(buddy, order); @@ -191,14 +198,102 @@ static u32 mthca_alloc_mtt(struct mthca_dev *dev, int order, return seg; } -static void mthca_free_mtt(struct mthca_dev *dev, u32 seg, int order, - struct mthca_buddy* buddy) +static struct mthca_mtt *__mthca_alloc_mtt(struct mthca_dev *dev, int size, + struct mthca_buddy *buddy) { - mthca_buddy_free(buddy, seg, order); + struct mthca_mtt *mtt; + int i; - if (mthca_is_memfree(dev)) - mthca_table_put_range(dev, dev->mr_table.mtt_table, seg, - seg + (1 << order) - 1); + if (size <= 0) + return ERR_PTR(-EINVAL); + + mtt = kmalloc(sizeof *mtt, GFP_KERNEL); + if (!mtt) + return ERR_PTR(-ENOMEM); + + mtt->buddy = buddy; + mtt->order = 0; + for (i = MTHCA_MTT_SEG_SIZE / 8; i < size; i <<= 1) + ++mtt->order; + + mtt->first_seg = mthca_alloc_mtt_range(dev, mtt->order, buddy); + if (mtt->first_seg == -1) { + kfree(mtt); + return ERR_PTR(-ENOMEM); + } + + return mtt; +} + +struct mthca_mtt *mthca_alloc_mtt(struct mthca_dev *dev, int size) +{ + return __mthca_alloc_mtt(dev, size, &dev->mr_table.mtt_buddy); +} + +void mthca_free_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt) +{ + if (!mtt) + return; + + mthca_buddy_free(mtt->buddy, mtt->first_seg, mtt->order); + + mthca_table_put_range(dev, dev->mr_table.mtt_table, + mtt->first_seg, + mtt->first_seg + (1 << mtt->order) - 1); + + kfree(mtt); +} + +int mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt, + int start_index, u64 *buffer_list, int list_len) +{ + struct mthca_mailbox *mailbox; + __be64 *mtt_entry; + int err = 0; + u8 status; + int i; + + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + mtt_entry = mailbox->buf; + + while (list_len > 0) { + mtt_entry[0] = cpu_to_be64(dev->mr_table.mtt_base + + mtt->first_seg * MTHCA_MTT_SEG_SIZE + + start_index * 8); + mtt_entry[1] = 0; + for (i = 0; i < list_len && i < MTHCA_MAILBOX_SIZE / 8 - 2; ++i) + mtt_entry[i + 2] = cpu_to_be64(buffer_list[i] | + MTHCA_MTT_FLAG_PRESENT); + + /* + * If we have an odd number of entries to write, add + * one more dummy entry for firmware efficiency. + */ + if (i & 1) + mtt_entry[i + 2] = 0; + + err = mthca_WRITE_MTT(dev, mailbox, (i + 1) & ~1, &status); + if (err) { + mthca_warn(dev, "WRITE_MTT failed (%d)\n", err); + goto out; + } + if (status) { + mthca_warn(dev, "WRITE_MTT returned status 0x%02x\n", + status); + err = -EINVAL; + goto out; + } + + list_len -= i; + start_index += i; + buffer_list += i; + } + +out: + mthca_free_mailbox(dev, mailbox); + return err; } static inline u32 tavor_hw_index_to_key(u32 ind) @@ -237,18 +332,20 @@ static inline u32 key_to_hw_index(struct mthca_dev *dev, u32 key) return tavor_key_to_hw_index(key); } -int mthca_mr_alloc_notrans(struct mthca_dev *dev, u32 pd, - u32 access, struct mthca_mr *mr) +int mthca_mr_alloc(struct mthca_dev *dev, u32 pd, int buffer_size_shift, + u64 iova, u64 total_size, u32 access, struct mthca_mr *mr) { - void *mailbox = NULL; + struct mthca_mailbox *mailbox; struct mthca_mpt_entry *mpt_entry; u32 key; + int i; int err; u8 status; might_sleep(); - mr->order = -1; + WARN_ON(buffer_size_shift >= 32); + key = mthca_alloc(&dev->mr_table.mpt_alloc); if (key == -1) return -ENOMEM; @@ -260,201 +357,109 @@ int mthca_mr_alloc_notrans(struct mthca_dev *dev, u32 pd, goto err_out_mpt_free; } - mailbox = kmalloc(sizeof *mpt_entry + MTHCA_CMD_MAILBOX_EXTRA, - GFP_KERNEL); - if (!mailbox) { - err = -ENOMEM; + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) { + err = PTR_ERR(mailbox); goto err_out_table; } - mpt_entry = MAILBOX_ALIGN(mailbox); + mpt_entry = mailbox->buf; mpt_entry->flags = cpu_to_be32(MTHCA_MPT_FLAG_SW_OWNS | MTHCA_MPT_FLAG_MIO | - MTHCA_MPT_FLAG_PHYSICAL | MTHCA_MPT_FLAG_REGION | access); - mpt_entry->page_size = 0; + if (!mr->mtt) + mpt_entry->flags |= cpu_to_be32(MTHCA_MPT_FLAG_PHYSICAL); + + mpt_entry->page_size = cpu_to_be32(buffer_size_shift - 12); mpt_entry->key = cpu_to_be32(key); mpt_entry->pd = cpu_to_be32(pd); - mpt_entry->start = 0; - mpt_entry->length = ~0ULL; + mpt_entry->start = cpu_to_be64(iova); + mpt_entry->length = cpu_to_be64(total_size); memset(&mpt_entry->lkey, 0, sizeof *mpt_entry - offsetof(struct mthca_mpt_entry, lkey)); - err = mthca_SW2HW_MPT(dev, mpt_entry, + if (mr->mtt) + mpt_entry->mtt_seg = + cpu_to_be64(dev->mr_table.mtt_base + + mr->mtt->first_seg * MTHCA_MTT_SEG_SIZE); + + if (0) { + mthca_dbg(dev, "Dumping MPT entry %08x:\n", mr->ibmr.lkey); + for (i = 0; i < sizeof (struct mthca_mpt_entry) / 4; ++i) { + if (i % 4 == 0) + printk("[%02x] ", i * 4); + printk(" %08x", be32_to_cpu(((__be32 *) mpt_entry)[i])); + if ((i + 1) % 4 == 0) + printk("\n"); + } + } + + err = mthca_SW2HW_MPT(dev, mailbox, key & (dev->limits.num_mpts - 1), &status); if (err) { mthca_warn(dev, "SW2HW_MPT failed (%d)\n", err); - goto err_out_table; + goto err_out_mailbox; } else if (status) { mthca_warn(dev, "SW2HW_MPT returned status 0x%02x\n", status); err = -EINVAL; - goto err_out_table; + goto err_out_mailbox; } - kfree(mailbox); + mthca_free_mailbox(dev, mailbox); return err; +err_out_mailbox: + mthca_free_mailbox(dev, mailbox); + err_out_table: - if (mthca_is_memfree(dev)) - mthca_table_put(dev, dev->mr_table.mpt_table, key); + mthca_table_put(dev, dev->mr_table.mpt_table, key); err_out_mpt_free: mthca_free(&dev->mr_table.mpt_alloc, key); - kfree(mailbox); return err; } +int mthca_mr_alloc_notrans(struct mthca_dev *dev, u32 pd, + u32 access, struct mthca_mr *mr) +{ + mr->mtt = NULL; + return mthca_mr_alloc(dev, pd, 12, 0, ~0ULL, access, mr); +} + int mthca_mr_alloc_phys(struct mthca_dev *dev, u32 pd, u64 *buffer_list, int buffer_size_shift, int list_len, u64 iova, u64 total_size, u32 access, struct mthca_mr *mr) { - void *mailbox; - u64 *mtt_entry; - struct mthca_mpt_entry *mpt_entry; - u32 key; - int err = -ENOMEM; - u8 status; - int i; - - might_sleep(); - WARN_ON(buffer_size_shift >= 32); - - key = mthca_alloc(&dev->mr_table.mpt_alloc); - if (key == -1) - return -ENOMEM; - mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key); - - if (mthca_is_memfree(dev)) { - err = mthca_table_get(dev, dev->mr_table.mpt_table, key); - if (err) - goto err_out_mpt_free; - } - - for (i = MTHCA_MTT_SEG_SIZE / 8, mr->order = 0; - i < list_len; - i <<= 1, ++mr->order) - ; /* nothing */ - - mr->first_seg = mthca_alloc_mtt(dev, mr->order, - &dev->mr_table.mtt_buddy); - if (mr->first_seg == -1) - goto err_out_table; - - /* - * If list_len is odd, we add one more dummy entry for - * firmware efficiency. - */ - mailbox = kmalloc(max(sizeof *mpt_entry, - (size_t) 8 * (list_len + (list_len & 1) + 2)) + - MTHCA_CMD_MAILBOX_EXTRA, - GFP_KERNEL); - if (!mailbox) - goto err_out_free_mtt; + int err; - mtt_entry = MAILBOX_ALIGN(mailbox); + mr->mtt = mthca_alloc_mtt(dev, list_len); + if (IS_ERR(mr->mtt)) + return PTR_ERR(mr->mtt); - mtt_entry[0] = cpu_to_be64(dev->mr_table.mtt_base + - mr->first_seg * MTHCA_MTT_SEG_SIZE); - mtt_entry[1] = 0; - for (i = 0; i < list_len; ++i) - mtt_entry[i + 2] = cpu_to_be64(buffer_list[i] | - MTHCA_MTT_FLAG_PRESENT); - if (list_len & 1) { - mtt_entry[i + 2] = 0; - ++list_len; - } - - if (0) { - mthca_dbg(dev, "Dumping MPT entry\n"); - for (i = 0; i < list_len + 2; ++i) - printk(KERN_ERR "[%2d] %016llx\n", - i, (unsigned long long) be64_to_cpu(mtt_entry[i])); - } - - err = mthca_WRITE_MTT(dev, mtt_entry, list_len, &status); + err = mthca_write_mtt(dev, mr->mtt, 0, buffer_list, list_len); if (err) { - mthca_warn(dev, "WRITE_MTT failed (%d)\n", err); - goto err_out_mailbox_free; - } - if (status) { - mthca_warn(dev, "WRITE_MTT returned status 0x%02x\n", - status); - err = -EINVAL; - goto err_out_mailbox_free; - } - - mpt_entry = MAILBOX_ALIGN(mailbox); - - mpt_entry->flags = cpu_to_be32(MTHCA_MPT_FLAG_SW_OWNS | - MTHCA_MPT_FLAG_MIO | - MTHCA_MPT_FLAG_REGION | - access); - - mpt_entry->page_size = cpu_to_be32(buffer_size_shift - 12); - mpt_entry->key = cpu_to_be32(key); - mpt_entry->pd = cpu_to_be32(pd); - mpt_entry->start = cpu_to_be64(iova); - mpt_entry->length = cpu_to_be64(total_size); - memset(&mpt_entry->lkey, 0, - sizeof *mpt_entry - offsetof(struct mthca_mpt_entry, lkey)); - mpt_entry->mtt_seg = cpu_to_be64(dev->mr_table.mtt_base + - mr->first_seg * MTHCA_MTT_SEG_SIZE); - - if (0) { - mthca_dbg(dev, "Dumping MPT entry %08x:\n", mr->ibmr.lkey); - for (i = 0; i < sizeof (struct mthca_mpt_entry) / 4; ++i) { - if (i % 4 == 0) - printk("[%02x] ", i * 4); - printk(" %08x", be32_to_cpu(((u32 *) mpt_entry)[i])); - if ((i + 1) % 4 == 0) - printk("\n"); - } + mthca_free_mtt(dev, mr->mtt); + return err; } - err = mthca_SW2HW_MPT(dev, mpt_entry, - key & (dev->limits.num_mpts - 1), - &status); + err = mthca_mr_alloc(dev, pd, buffer_size_shift, iova, + total_size, access, mr); if (err) - mthca_warn(dev, "SW2HW_MPT failed (%d)\n", err); - else if (status) { - mthca_warn(dev, "SW2HW_MPT returned status 0x%02x\n", - status); - err = -EINVAL; - } - - kfree(mailbox); - return err; - -err_out_mailbox_free: - kfree(mailbox); - -err_out_free_mtt: - mthca_free_mtt(dev, mr->first_seg, mr->order, &dev->mr_table.mtt_buddy); - -err_out_table: - if (mthca_is_memfree(dev)) - mthca_table_put(dev, dev->mr_table.mpt_table, key); + mthca_free_mtt(dev, mr->mtt); -err_out_mpt_free: - mthca_free(&dev->mr_table.mpt_alloc, key); return err; } /* Free mr or fmr */ -static void mthca_free_region(struct mthca_dev *dev, u32 lkey, int order, - u32 first_seg, struct mthca_buddy *buddy) +static void mthca_free_region(struct mthca_dev *dev, u32 lkey) { - if (order >= 0) - mthca_free_mtt(dev, first_seg, order, buddy); - - if (mthca_is_memfree(dev)) - mthca_table_put(dev, dev->mr_table.mpt_table, - arbel_key_to_hw_index(lkey)); + mthca_table_put(dev, dev->mr_table.mpt_table, + key_to_hw_index(dev, lkey)); mthca_free(&dev->mr_table.mpt_alloc, key_to_hw_index(dev, lkey)); } @@ -476,15 +481,15 @@ void mthca_free_mr(struct mthca_dev *dev, struct mthca_mr *mr) mthca_warn(dev, "HW2SW_MPT returned status 0x%02x\n", status); - mthca_free_region(dev, mr->ibmr.lkey, mr->order, mr->first_seg, - &dev->mr_table.mtt_buddy); + mthca_free_region(dev, mr->ibmr.lkey); + mthca_free_mtt(dev, mr->mtt); } int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd, u32 access, struct mthca_fmr *mr) { struct mthca_mpt_entry *mpt_entry; - void *mailbox; + struct mthca_mailbox *mailbox; u64 mtt_seg; u32 key, idx; u8 status; @@ -522,31 +527,24 @@ int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd, mr->mem.tavor.mpt = dev->mr_table.tavor_fmr.mpt_base + sizeof *(mr->mem.tavor.mpt) * idx; - for (i = MTHCA_MTT_SEG_SIZE / 8, mr->order = 0; - i < list_len; - i <<= 1, ++mr->order) - ; /* nothing */ - - mr->first_seg = mthca_alloc_mtt(dev, mr->order, - dev->mr_table.fmr_mtt_buddy); - if (mr->first_seg == -1) + mr->mtt = __mthca_alloc_mtt(dev, list_len, dev->mr_table.fmr_mtt_buddy); + if (IS_ERR(mr->mtt)) goto err_out_table; - mtt_seg = mr->first_seg * MTHCA_MTT_SEG_SIZE; + mtt_seg = mr->mtt->first_seg * MTHCA_MTT_SEG_SIZE; if (mthca_is_memfree(dev)) { mr->mem.arbel.mtts = mthca_table_find(dev->mr_table.mtt_table, - mr->first_seg); + mr->mtt->first_seg); BUG_ON(!mr->mem.arbel.mtts); } else mr->mem.tavor.mtts = dev->mr_table.tavor_fmr.mtt_base + mtt_seg; - mailbox = kmalloc(sizeof *mpt_entry + MTHCA_CMD_MAILBOX_EXTRA, - GFP_KERNEL); - if (!mailbox) + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) goto err_out_free_mtt; - mpt_entry = MAILBOX_ALIGN(mailbox); + mpt_entry = mailbox->buf; mpt_entry->flags = cpu_to_be32(MTHCA_MPT_FLAG_SW_OWNS | MTHCA_MPT_FLAG_MIO | @@ -565,13 +563,13 @@ int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd, for (i = 0; i < sizeof (struct mthca_mpt_entry) / 4; ++i) { if (i % 4 == 0) printk("[%02x] ", i * 4); - printk(" %08x", be32_to_cpu(((u32 *) mpt_entry)[i])); + printk(" %08x", be32_to_cpu(((__be32 *) mpt_entry)[i])); if ((i + 1) % 4 == 0) printk("\n"); } } - err = mthca_SW2HW_MPT(dev, mpt_entry, + err = mthca_SW2HW_MPT(dev, mailbox, key & (dev->limits.num_mpts - 1), &status); if (err) { @@ -585,19 +583,17 @@ int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd, goto err_out_mailbox_free; } - kfree(mailbox); + mthca_free_mailbox(dev, mailbox); return 0; err_out_mailbox_free: - kfree(mailbox); + mthca_free_mailbox(dev, mailbox); err_out_free_mtt: - mthca_free_mtt(dev, mr->first_seg, mr->order, - dev->mr_table.fmr_mtt_buddy); + mthca_free_mtt(dev, mr->mtt); err_out_table: - if (mthca_is_memfree(dev)) - mthca_table_put(dev, dev->mr_table.mpt_table, key); + mthca_table_put(dev, dev->mr_table.mpt_table, key); err_out_mpt_free: mthca_free(&dev->mr_table.mpt_alloc, mr->ibmr.lkey); @@ -609,8 +605,9 @@ int mthca_free_fmr(struct mthca_dev *dev, struct mthca_fmr *fmr) if (fmr->maps) return -EBUSY; - mthca_free_region(dev, fmr->ibmr.lkey, fmr->order, fmr->first_seg, - dev->mr_table.fmr_mtt_buddy); + mthca_free_region(dev, fmr->ibmr.lkey); + mthca_free_mtt(dev, fmr->mtt); + return 0; } @@ -673,7 +670,7 @@ int mthca_tavor_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, mpt_entry.length = cpu_to_be64(list_len * (1ull << fmr->attr.page_size)); mpt_entry.start = cpu_to_be64(iova); - writel(mpt_entry.lkey, &fmr->mem.tavor.mpt->key); + __raw_writel((__force u32) mpt_entry.lkey, &fmr->mem.tavor.mpt->key); memcpy_toio(&fmr->mem.tavor.mpt->start, &mpt_entry.start, offsetof(struct mthca_mpt_entry, window_count) - offsetof(struct mthca_mpt_entry, start)); @@ -826,7 +823,8 @@ int __devinit mthca_init_mr_table(struct mthca_dev *dev) if (dev->limits.reserved_mtts) { i = fls(dev->limits.reserved_mtts - 1); - if (mthca_alloc_mtt(dev, i, dev->mr_table.fmr_mtt_buddy) == -1) { + if (mthca_alloc_mtt_range(dev, i, + dev->mr_table.fmr_mtt_buddy) == -1) { mthca_warn(dev, "MTT table of order %d is too small.\n", dev->mr_table.fmr_mtt_buddy->max_order); err = -ENOMEM; diff --git a/drivers/infiniband/hw/mthca/mthca_pd.c b/drivers/infiniband/hw/mthca/mthca_pd.c index ea66847e4ea3..3dbf06a6e6f4 100644 --- a/drivers/infiniband/hw/mthca/mthca_pd.c +++ b/drivers/infiniband/hw/mthca/mthca_pd.c @@ -1,5 +1,7 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -37,23 +39,27 @@ #include "mthca_dev.h" -int mthca_pd_alloc(struct mthca_dev *dev, struct mthca_pd *pd) +int mthca_pd_alloc(struct mthca_dev *dev, int privileged, struct mthca_pd *pd) { - int err; + int err = 0; might_sleep(); + pd->privileged = privileged; + atomic_set(&pd->sqp_count, 0); pd->pd_num = mthca_alloc(&dev->pd_table.alloc); if (pd->pd_num == -1) return -ENOMEM; - err = mthca_mr_alloc_notrans(dev, pd->pd_num, - MTHCA_MPT_FLAG_LOCAL_READ | - MTHCA_MPT_FLAG_LOCAL_WRITE, - &pd->ntmr); - if (err) - mthca_free(&dev->pd_table.alloc, pd->pd_num); + if (privileged) { + err = mthca_mr_alloc_notrans(dev, pd->pd_num, + MTHCA_MPT_FLAG_LOCAL_READ | + MTHCA_MPT_FLAG_LOCAL_WRITE, + &pd->ntmr); + if (err) + mthca_free(&dev->pd_table.alloc, pd->pd_num); + } return err; } @@ -61,7 +67,8 @@ int mthca_pd_alloc(struct mthca_dev *dev, struct mthca_pd *pd) void mthca_pd_free(struct mthca_dev *dev, struct mthca_pd *pd) { might_sleep(); - mthca_free_mr(dev, &pd->ntmr); + if (pd->privileged) + mthca_free_mr(dev, &pd->ntmr); mthca_free(&dev->pd_table.alloc, pd->pd_num); } diff --git a/drivers/infiniband/hw/mthca/mthca_profile.c b/drivers/infiniband/hw/mthca/mthca_profile.c index 4fedc32d5871..0576056b34f4 100644 --- a/drivers/infiniband/hw/mthca/mthca_profile.c +++ b/drivers/infiniband/hw/mthca/mthca_profile.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -101,6 +102,7 @@ u64 mthca_make_profile(struct mthca_dev *dev, profile[MTHCA_RES_UARC].size = request->uarc_size; profile[MTHCA_RES_QP].num = request->num_qp; + profile[MTHCA_RES_SRQ].num = request->num_srq; profile[MTHCA_RES_EQP].num = request->num_qp; profile[MTHCA_RES_RDB].num = request->num_qp * request->rdb_per_qp; profile[MTHCA_RES_CQ].num = request->num_cq; diff --git a/drivers/infiniband/hw/mthca/mthca_profile.h b/drivers/infiniband/hw/mthca/mthca_profile.h index 17aef3357661..94641808f97f 100644 --- a/drivers/infiniband/hw/mthca/mthca_profile.h +++ b/drivers/infiniband/hw/mthca/mthca_profile.h @@ -1,5 +1,6 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -41,6 +42,7 @@ struct mthca_profile { int num_qp; int rdb_per_qp; + int num_srq; int num_cq; int num_mcg; int num_mpt; diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 159f4e6c312d..1c1c2e230871 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -1,5 +1,9 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2004 Voltaire, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -32,10 +36,13 @@ * $Id: mthca_provider.c 1397 2004-12-28 05:09:00Z roland $ */ -#include <ib_smi.h> +#include <rdma/ib_smi.h> +#include <linux/mm.h> #include "mthca_dev.h" #include "mthca_cmd.h" +#include "mthca_user.h" +#include "mthca_memfree.h" static int mthca_query_device(struct ib_device *ibdev, struct ib_device_attr *props) @@ -52,7 +59,7 @@ static int mthca_query_device(struct ib_device *ibdev, if (!in_mad || !out_mad) goto out; - memset(props, 0, sizeof props); + memset(props, 0, sizeof *props); props->fw_ver = mdev->fw_ver; @@ -74,10 +81,10 @@ static int mthca_query_device(struct ib_device *ibdev, } props->device_cap_flags = mdev->device_cap_flags; - props->vendor_id = be32_to_cpup((u32 *) (out_mad->data + 36)) & + props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) & 0xffffff; - props->vendor_part_id = be16_to_cpup((u16 *) (out_mad->data + 30)); - props->hw_ver = be16_to_cpup((u16 *) (out_mad->data + 32)); + props->vendor_part_id = be16_to_cpup((__be16 *) (out_mad->data + 30)); + props->hw_ver = be16_to_cpup((__be16 *) (out_mad->data + 32)); memcpy(&props->sys_image_guid, out_mad->data + 4, 8); memcpy(&props->node_guid, out_mad->data + 12, 8); @@ -113,6 +120,8 @@ static int mthca_query_port(struct ib_device *ibdev, if (!in_mad || !out_mad) goto out; + memset(props, 0, sizeof *props); + memset(in_mad, 0, sizeof *in_mad); in_mad->base_version = 1; in_mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; @@ -131,16 +140,17 @@ static int mthca_query_port(struct ib_device *ibdev, goto out; } - props->lid = be16_to_cpup((u16 *) (out_mad->data + 16)); + props->lid = be16_to_cpup((__be16 *) (out_mad->data + 16)); props->lmc = out_mad->data[34] & 0x7; - props->sm_lid = be16_to_cpup((u16 *) (out_mad->data + 18)); + props->sm_lid = be16_to_cpup((__be16 *) (out_mad->data + 18)); props->sm_sl = out_mad->data[36] & 0xf; props->state = out_mad->data[32] & 0xf; props->phys_state = out_mad->data[33] >> 4; - props->port_cap_flags = be32_to_cpup((u32 *) (out_mad->data + 20)); + props->port_cap_flags = be32_to_cpup((__be32 *) (out_mad->data + 20)); props->gid_tbl_len = to_mdev(ibdev)->limits.gid_table_len; + props->max_msg_sz = 0x80000000; props->pkey_tbl_len = to_mdev(ibdev)->limits.pkey_table_len; - props->qkey_viol_cntr = be16_to_cpup((u16 *) (out_mad->data + 48)); + props->qkey_viol_cntr = be16_to_cpup((__be16 *) (out_mad->data + 48)); props->active_width = out_mad->data[31] & 0xf; props->active_speed = out_mad->data[35] >> 4; @@ -216,7 +226,7 @@ static int mthca_query_pkey(struct ib_device *ibdev, goto out; } - *pkey = be16_to_cpu(((u16 *) out_mad->data)[index % 32]); + *pkey = be16_to_cpu(((__be16 *) out_mad->data)[index % 32]); out: kfree(in_mad); @@ -283,7 +293,78 @@ static int mthca_query_gid(struct ib_device *ibdev, u8 port, return err; } -static struct ib_pd *mthca_alloc_pd(struct ib_device *ibdev) +static struct ib_ucontext *mthca_alloc_ucontext(struct ib_device *ibdev, + struct ib_udata *udata) +{ + struct mthca_alloc_ucontext_resp uresp; + struct mthca_ucontext *context; + int err; + + memset(&uresp, 0, sizeof uresp); + + uresp.qp_tab_size = to_mdev(ibdev)->limits.num_qps; + if (mthca_is_memfree(to_mdev(ibdev))) + uresp.uarc_size = to_mdev(ibdev)->uar_table.uarc_size; + else + uresp.uarc_size = 0; + + context = kmalloc(sizeof *context, GFP_KERNEL); + if (!context) + return ERR_PTR(-ENOMEM); + + err = mthca_uar_alloc(to_mdev(ibdev), &context->uar); + if (err) { + kfree(context); + return ERR_PTR(err); + } + + context->db_tab = mthca_init_user_db_tab(to_mdev(ibdev)); + if (IS_ERR(context->db_tab)) { + err = PTR_ERR(context->db_tab); + mthca_uar_free(to_mdev(ibdev), &context->uar); + kfree(context); + return ERR_PTR(err); + } + + if (ib_copy_to_udata(udata, &uresp, sizeof uresp)) { + mthca_cleanup_user_db_tab(to_mdev(ibdev), &context->uar, context->db_tab); + mthca_uar_free(to_mdev(ibdev), &context->uar); + kfree(context); + return ERR_PTR(-EFAULT); + } + + return &context->ibucontext; +} + +static int mthca_dealloc_ucontext(struct ib_ucontext *context) +{ + mthca_cleanup_user_db_tab(to_mdev(context->device), &to_mucontext(context)->uar, + to_mucontext(context)->db_tab); + mthca_uar_free(to_mdev(context->device), &to_mucontext(context)->uar); + kfree(to_mucontext(context)); + + return 0; +} + +static int mthca_mmap_uar(struct ib_ucontext *context, + struct vm_area_struct *vma) +{ + if (vma->vm_end - vma->vm_start != PAGE_SIZE) + return -EINVAL; + + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + + if (io_remap_pfn_range(vma, vma->vm_start, + to_mucontext(context)->uar.pfn, + PAGE_SIZE, vma->vm_page_prot)) + return -EAGAIN; + + return 0; +} + +static struct ib_pd *mthca_alloc_pd(struct ib_device *ibdev, + struct ib_ucontext *context, + struct ib_udata *udata) { struct mthca_pd *pd; int err; @@ -292,12 +373,20 @@ static struct ib_pd *mthca_alloc_pd(struct ib_device *ibdev) if (!pd) return ERR_PTR(-ENOMEM); - err = mthca_pd_alloc(to_mdev(ibdev), pd); + err = mthca_pd_alloc(to_mdev(ibdev), !context, pd); if (err) { kfree(pd); return ERR_PTR(err); } + if (context) { + if (ib_copy_to_udata(udata, &pd->pd_num, sizeof (__u32))) { + mthca_pd_free(to_mdev(ibdev), pd); + kfree(pd); + return ERR_PTR(-EFAULT); + } + } + return &pd->ibpd; } @@ -336,9 +425,82 @@ static int mthca_ah_destroy(struct ib_ah *ah) return 0; } +static struct ib_srq *mthca_create_srq(struct ib_pd *pd, + struct ib_srq_init_attr *init_attr, + struct ib_udata *udata) +{ + struct mthca_create_srq ucmd; + struct mthca_ucontext *context = NULL; + struct mthca_srq *srq; + int err; + + srq = kmalloc(sizeof *srq, GFP_KERNEL); + if (!srq) + return ERR_PTR(-ENOMEM); + + if (pd->uobject) { + context = to_mucontext(pd->uobject->context); + + if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) + return ERR_PTR(-EFAULT); + + err = mthca_map_user_db(to_mdev(pd->device), &context->uar, + context->db_tab, ucmd.db_index, + ucmd.db_page); + + if (err) + goto err_free; + + srq->mr.ibmr.lkey = ucmd.lkey; + srq->db_index = ucmd.db_index; + } + + err = mthca_alloc_srq(to_mdev(pd->device), to_mpd(pd), + &init_attr->attr, srq); + + if (err && pd->uobject) + mthca_unmap_user_db(to_mdev(pd->device), &context->uar, + context->db_tab, ucmd.db_index); + + if (err) + goto err_free; + + if (context && ib_copy_to_udata(udata, &srq->srqn, sizeof (__u32))) { + mthca_free_srq(to_mdev(pd->device), srq); + err = -EFAULT; + goto err_free; + } + + return &srq->ibsrq; + +err_free: + kfree(srq); + + return ERR_PTR(err); +} + +static int mthca_destroy_srq(struct ib_srq *srq) +{ + struct mthca_ucontext *context; + + if (srq->uobject) { + context = to_mucontext(srq->uobject->context); + + mthca_unmap_user_db(to_mdev(srq->device), &context->uar, + context->db_tab, to_msrq(srq)->db_index); + } + + mthca_free_srq(to_mdev(srq->device), to_msrq(srq)); + kfree(srq); + + return 0; +} + static struct ib_qp *mthca_create_qp(struct ib_pd *pd, - struct ib_qp_init_attr *init_attr) + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata) { + struct mthca_create_qp ucmd; struct mthca_qp *qp; int err; @@ -347,41 +509,82 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd, case IB_QPT_UC: case IB_QPT_UD: { + struct mthca_ucontext *context; + qp = kmalloc(sizeof *qp, GFP_KERNEL); if (!qp) return ERR_PTR(-ENOMEM); - qp->sq.max = init_attr->cap.max_send_wr; - qp->rq.max = init_attr->cap.max_recv_wr; - qp->sq.max_gs = init_attr->cap.max_send_sge; - qp->rq.max_gs = init_attr->cap.max_recv_sge; + if (pd->uobject) { + context = to_mucontext(pd->uobject->context); + + if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) + return ERR_PTR(-EFAULT); + + err = mthca_map_user_db(to_mdev(pd->device), &context->uar, + context->db_tab, + ucmd.sq_db_index, ucmd.sq_db_page); + if (err) { + kfree(qp); + return ERR_PTR(err); + } + + err = mthca_map_user_db(to_mdev(pd->device), &context->uar, + context->db_tab, + ucmd.rq_db_index, ucmd.rq_db_page); + if (err) { + mthca_unmap_user_db(to_mdev(pd->device), + &context->uar, + context->db_tab, + ucmd.sq_db_index); + kfree(qp); + return ERR_PTR(err); + } + + qp->mr.ibmr.lkey = ucmd.lkey; + qp->sq.db_index = ucmd.sq_db_index; + qp->rq.db_index = ucmd.rq_db_index; + } err = mthca_alloc_qp(to_mdev(pd->device), to_mpd(pd), to_mcq(init_attr->send_cq), to_mcq(init_attr->recv_cq), init_attr->qp_type, init_attr->sq_sig_type, - qp); + &init_attr->cap, qp); + + if (err && pd->uobject) { + context = to_mucontext(pd->uobject->context); + + mthca_unmap_user_db(to_mdev(pd->device), + &context->uar, + context->db_tab, + ucmd.sq_db_index); + mthca_unmap_user_db(to_mdev(pd->device), + &context->uar, + context->db_tab, + ucmd.rq_db_index); + } + qp->ibqp.qp_num = qp->qpn; break; } case IB_QPT_SMI: case IB_QPT_GSI: { + /* Don't allow userspace to create special QPs */ + if (pd->uobject) + return ERR_PTR(-EINVAL); + qp = kmalloc(sizeof (struct mthca_sqp), GFP_KERNEL); if (!qp) return ERR_PTR(-ENOMEM); - qp->sq.max = init_attr->cap.max_send_wr; - qp->rq.max = init_attr->cap.max_recv_wr; - qp->sq.max_gs = init_attr->cap.max_send_sge; - qp->rq.max_gs = init_attr->cap.max_recv_sge; - qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1; err = mthca_alloc_sqp(to_mdev(pd->device), to_mpd(pd), to_mcq(init_attr->send_cq), to_mcq(init_attr->recv_cq), - init_attr->sq_sig_type, + init_attr->sq_sig_type, &init_attr->cap, qp->ibqp.qp_num, init_attr->port_num, to_msqp(qp)); break; @@ -396,42 +599,115 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd, return ERR_PTR(err); } - init_attr->cap.max_inline_data = 0; + init_attr->cap.max_inline_data = 0; + init_attr->cap.max_send_wr = qp->sq.max; + init_attr->cap.max_recv_wr = qp->rq.max; + init_attr->cap.max_send_sge = qp->sq.max_gs; + init_attr->cap.max_recv_sge = qp->rq.max_gs; return &qp->ibqp; } static int mthca_destroy_qp(struct ib_qp *qp) { + if (qp->uobject) { + mthca_unmap_user_db(to_mdev(qp->device), + &to_mucontext(qp->uobject->context)->uar, + to_mucontext(qp->uobject->context)->db_tab, + to_mqp(qp)->sq.db_index); + mthca_unmap_user_db(to_mdev(qp->device), + &to_mucontext(qp->uobject->context)->uar, + to_mucontext(qp->uobject->context)->db_tab, + to_mqp(qp)->rq.db_index); + } mthca_free_qp(to_mdev(qp->device), to_mqp(qp)); kfree(qp); return 0; } -static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries) +static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries, + struct ib_ucontext *context, + struct ib_udata *udata) { + struct mthca_create_cq ucmd; struct mthca_cq *cq; int nent; int err; + if (context) { + if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) + return ERR_PTR(-EFAULT); + + err = mthca_map_user_db(to_mdev(ibdev), &to_mucontext(context)->uar, + to_mucontext(context)->db_tab, + ucmd.set_db_index, ucmd.set_db_page); + if (err) + return ERR_PTR(err); + + err = mthca_map_user_db(to_mdev(ibdev), &to_mucontext(context)->uar, + to_mucontext(context)->db_tab, + ucmd.arm_db_index, ucmd.arm_db_page); + if (err) + goto err_unmap_set; + } + cq = kmalloc(sizeof *cq, GFP_KERNEL); - if (!cq) - return ERR_PTR(-ENOMEM); + if (!cq) { + err = -ENOMEM; + goto err_unmap_arm; + } + + if (context) { + cq->mr.ibmr.lkey = ucmd.lkey; + cq->set_ci_db_index = ucmd.set_db_index; + cq->arm_db_index = ucmd.arm_db_index; + } for (nent = 1; nent <= entries; nent <<= 1) ; /* nothing */ - err = mthca_init_cq(to_mdev(ibdev), nent, cq); - if (err) { - kfree(cq); - cq = ERR_PTR(err); + err = mthca_init_cq(to_mdev(ibdev), nent, + context ? to_mucontext(context) : NULL, + context ? ucmd.pdn : to_mdev(ibdev)->driver_pd.pd_num, + cq); + if (err) + goto err_free; + + if (context && ib_copy_to_udata(udata, &cq->cqn, sizeof (__u32))) { + mthca_free_cq(to_mdev(ibdev), cq); + goto err_free; } return &cq->ibcq; + +err_free: + kfree(cq); + +err_unmap_arm: + if (context) + mthca_unmap_user_db(to_mdev(ibdev), &to_mucontext(context)->uar, + to_mucontext(context)->db_tab, ucmd.arm_db_index); + +err_unmap_set: + if (context) + mthca_unmap_user_db(to_mdev(ibdev), &to_mucontext(context)->uar, + to_mucontext(context)->db_tab, ucmd.set_db_index); + + return ERR_PTR(err); } static int mthca_destroy_cq(struct ib_cq *cq) { + if (cq->uobject) { + mthca_unmap_user_db(to_mdev(cq->device), + &to_mucontext(cq->uobject->context)->uar, + to_mucontext(cq->uobject->context)->db_tab, + to_mcq(cq)->arm_db_index); + mthca_unmap_user_db(to_mdev(cq->device), + &to_mucontext(cq->uobject->context)->uar, + to_mucontext(cq->uobject->context)->db_tab, + to_mcq(cq)->set_ci_db_index); + } mthca_free_cq(to_mdev(cq->device), to_mcq(cq)); kfree(cq); @@ -558,6 +834,7 @@ static struct ib_mr *mthca_reg_phys_mr(struct ib_pd *pd, convert_access(acc), mr); if (err) { + kfree(page_list); kfree(mr); return ERR_PTR(err); } @@ -566,6 +843,87 @@ static struct ib_mr *mthca_reg_phys_mr(struct ib_pd *pd, return &mr->ibmr; } +static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, + int acc, struct ib_udata *udata) +{ + struct mthca_dev *dev = to_mdev(pd->device); + struct ib_umem_chunk *chunk; + struct mthca_mr *mr; + u64 *pages; + int shift, n, len; + int i, j, k; + int err = 0; + + shift = ffs(region->page_size) - 1; + + mr = kmalloc(sizeof *mr, GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); + + n = 0; + list_for_each_entry(chunk, ®ion->chunk_list, list) + n += chunk->nents; + + mr->mtt = mthca_alloc_mtt(dev, n); + if (IS_ERR(mr->mtt)) { + err = PTR_ERR(mr->mtt); + goto err; + } + + pages = (u64 *) __get_free_page(GFP_KERNEL); + if (!pages) { + err = -ENOMEM; + goto err_mtt; + } + + i = n = 0; + + list_for_each_entry(chunk, ®ion->chunk_list, list) + for (j = 0; j < chunk->nmap; ++j) { + len = sg_dma_len(&chunk->page_list[j]) >> shift; + for (k = 0; k < len; ++k) { + pages[i++] = sg_dma_address(&chunk->page_list[j]) + + region->page_size * k; + /* + * Be friendly to WRITE_MTT command + * and leave two empty slots for the + * index and reserved fields of the + * mailbox. + */ + if (i == PAGE_SIZE / sizeof (u64) - 2) { + err = mthca_write_mtt(dev, mr->mtt, + n, pages, i); + if (err) + goto mtt_done; + n += i; + i = 0; + } + } + } + + if (i) + err = mthca_write_mtt(dev, mr->mtt, n, pages, i); +mtt_done: + free_page((unsigned long) pages); + if (err) + goto err_mtt; + + err = mthca_mr_alloc(dev, to_mpd(pd)->pd_num, shift, region->virt_base, + region->length, convert_access(acc), mr); + + if (err) + goto err_mtt; + + return &mr->ibmr; + +err_mtt: + mthca_free_mtt(dev, mr->mtt); + +err: + kfree(mr); + return ERR_PTR(err); +} + static int mthca_dereg_mr(struct ib_mr *mr) { struct mthca_mr *mmr = to_mmr(mr); @@ -674,14 +1032,22 @@ static ssize_t show_hca(struct class_device *cdev, char *buf) } } +static ssize_t show_board(struct class_device *cdev, char *buf) +{ + struct mthca_dev *dev = container_of(cdev, struct mthca_dev, ib_dev.class_dev); + return sprintf(buf, "%.*s\n", MTHCA_BOARD_ID_LEN, dev->board_id); +} + static CLASS_DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); static CLASS_DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); static CLASS_DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); +static CLASS_DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); static struct class_device_attribute *mthca_class_attributes[] = { &class_device_attr_hw_rev, &class_device_attr_fw_ver, - &class_device_attr_hca_type + &class_device_attr_hca_type, + &class_device_attr_board_id }; int mthca_register_device(struct mthca_dev *dev) @@ -690,6 +1056,8 @@ int mthca_register_device(struct mthca_dev *dev) int i; strlcpy(dev->ib_dev.name, "mthca%d", IB_DEVICE_NAME_MAX); + dev->ib_dev.owner = THIS_MODULE; + dev->ib_dev.node_type = IB_NODE_CA; dev->ib_dev.phys_port_cnt = dev->limits.num_ports; dev->ib_dev.dma_device = &dev->pdev->dev; @@ -699,10 +1067,24 @@ int mthca_register_device(struct mthca_dev *dev) dev->ib_dev.modify_port = mthca_modify_port; dev->ib_dev.query_pkey = mthca_query_pkey; dev->ib_dev.query_gid = mthca_query_gid; + dev->ib_dev.alloc_ucontext = mthca_alloc_ucontext; + dev->ib_dev.dealloc_ucontext = mthca_dealloc_ucontext; + dev->ib_dev.mmap = mthca_mmap_uar; dev->ib_dev.alloc_pd = mthca_alloc_pd; dev->ib_dev.dealloc_pd = mthca_dealloc_pd; dev->ib_dev.create_ah = mthca_ah_create; dev->ib_dev.destroy_ah = mthca_ah_destroy; + + if (dev->mthca_flags & MTHCA_FLAG_SRQ) { + dev->ib_dev.create_srq = mthca_create_srq; + dev->ib_dev.destroy_srq = mthca_destroy_srq; + + if (mthca_is_memfree(dev)) + dev->ib_dev.post_srq_recv = mthca_arbel_post_srq_recv; + else + dev->ib_dev.post_srq_recv = mthca_tavor_post_srq_recv; + } + dev->ib_dev.create_qp = mthca_create_qp; dev->ib_dev.modify_qp = mthca_modify_qp; dev->ib_dev.destroy_qp = mthca_destroy_qp; @@ -711,6 +1093,7 @@ int mthca_register_device(struct mthca_dev *dev) dev->ib_dev.poll_cq = mthca_poll_cq; dev->ib_dev.get_dma_mr = mthca_get_dma_mr; dev->ib_dev.reg_phys_mr = mthca_reg_phys_mr; + dev->ib_dev.reg_user_mr = mthca_reg_user_mr; dev->ib_dev.dereg_mr = mthca_dereg_mr; if (dev->mthca_flags & MTHCA_FLAG_FMR) { diff --git a/drivers/infiniband/hw/mthca/mthca_provider.h b/drivers/infiniband/hw/mthca/mthca_provider.h index 619710f95a87..bcd4b01a339c 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.h +++ b/drivers/infiniband/hw/mthca/mthca_provider.h @@ -1,5 +1,7 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -35,8 +37,8 @@ #ifndef MTHCA_PROVIDER_H #define MTHCA_PROVIDER_H -#include <ib_verbs.h> -#include <ib_pack.h> +#include <rdma/ib_verbs.h> +#include <rdma/ib_pack.h> #define MTHCA_MPT_FLAG_ATOMIC (1 << 14) #define MTHCA_MPT_FLAG_REMOTE_WRITE (1 << 13) @@ -49,23 +51,36 @@ struct mthca_buf_list { DECLARE_PCI_UNMAP_ADDR(mapping) }; +union mthca_buf { + struct mthca_buf_list direct; + struct mthca_buf_list *page_list; +}; + struct mthca_uar { unsigned long pfn; int index; }; +struct mthca_user_db_table; + +struct mthca_ucontext { + struct ib_ucontext ibucontext; + struct mthca_uar uar; + struct mthca_user_db_table *db_tab; +}; + +struct mthca_mtt; + struct mthca_mr { - struct ib_mr ibmr; - int order; - u32 first_seg; + struct ib_mr ibmr; + struct mthca_mtt *mtt; }; struct mthca_fmr { - struct ib_fmr ibmr; + struct ib_fmr ibmr; struct ib_fmr_attr attr; - int order; - u32 first_seg; - int maps; + struct mthca_mtt *mtt; + int maps; union { struct { struct mthca_mpt_entry __iomem *mpt; @@ -83,6 +98,7 @@ struct mthca_pd { u32 pd_num; atomic_t sqp_count; struct mthca_mr ntmr; + int privileged; }; struct mthca_eq { @@ -167,22 +183,43 @@ struct mthca_cq { int cqn; u32 cons_index; int is_direct; + int is_kernel; /* Next fields are Arbel only */ int set_ci_db_index; - u32 *set_ci_db; + __be32 *set_ci_db; int arm_db_index; - u32 *arm_db; + __be32 *arm_db; int arm_sn; - union { - struct mthca_buf_list direct; - struct mthca_buf_list *page_list; - } queue; + union mthca_buf queue; struct mthca_mr mr; wait_queue_head_t wait; }; +struct mthca_srq { + struct ib_srq ibsrq; + spinlock_t lock; + atomic_t refcount; + int srqn; + int max; + int max_gs; + int wqe_shift; + int first_free; + int last_free; + u16 counter; /* Arbel only */ + int db_index; /* Arbel only */ + __be32 *db; /* Arbel only */ + void *last; + + int is_direct; + u64 *wrid; + union mthca_buf queue; + struct mthca_mr mr; + + wait_queue_head_t wait; +}; + struct mthca_wq { spinlock_t lock; int max; @@ -195,7 +232,7 @@ struct mthca_wq { int wqe_shift; int db_index; /* Arbel only */ - u32 *db; + __be32 *db; }; struct mthca_qp { @@ -216,10 +253,7 @@ struct mthca_qp { int send_wqe_offset; u64 *wrid; - union { - struct mthca_buf_list direct; - struct mthca_buf_list *page_list; - } queue; + union mthca_buf queue; wait_queue_head_t wait; }; @@ -236,6 +270,11 @@ struct mthca_sqp { dma_addr_t header_dma; }; +static inline struct mthca_ucontext *to_mucontext(struct ib_ucontext *ibucontext) +{ + return container_of(ibucontext, struct mthca_ucontext, ibucontext); +} + static inline struct mthca_fmr *to_mfmr(struct ib_fmr *ibmr) { return container_of(ibmr, struct mthca_fmr, ibmr); @@ -261,6 +300,11 @@ static inline struct mthca_cq *to_mcq(struct ib_cq *ibcq) return container_of(ibcq, struct mthca_cq, ibcq); } +static inline struct mthca_srq *to_msrq(struct ib_srq *ibsrq) +{ + return container_of(ibsrq, struct mthca_srq, ibsrq); +} + static inline struct mthca_qp *to_mqp(struct ib_qp *ibqp) { return container_of(ibqp, struct mthca_qp, ibqp); diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index ca73bab11a02..0164b84d4ec6 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -1,5 +1,8 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2004 Voltaire, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -34,19 +37,22 @@ #include <linux/init.h> -#include <ib_verbs.h> -#include <ib_cache.h> -#include <ib_pack.h> +#include <rdma/ib_verbs.h> +#include <rdma/ib_cache.h> +#include <rdma/ib_pack.h> #include "mthca_dev.h" #include "mthca_cmd.h" #include "mthca_memfree.h" +#include "mthca_wqe.h" enum { MTHCA_MAX_DIRECT_QP_SIZE = 4 * PAGE_SIZE, MTHCA_ACK_REQ_FREQ = 10, MTHCA_FLIGHT_LIMIT = 9, - MTHCA_UD_HEADER_SIZE = 72 /* largest UD header possible */ + MTHCA_UD_HEADER_SIZE = 72, /* largest UD header possible */ + MTHCA_INLINE_HEADER_SIZE = 4, /* data segment overhead for inline */ + MTHCA_INLINE_CHUNK_SIZE = 16 /* inline data segment chunk */ }; enum { @@ -92,62 +98,62 @@ enum { }; struct mthca_qp_path { - u32 port_pkey; - u8 rnr_retry; - u8 g_mylmc; - u16 rlid; - u8 ackto; - u8 mgid_index; - u8 static_rate; - u8 hop_limit; - u32 sl_tclass_flowlabel; - u8 rgid[16]; + __be32 port_pkey; + u8 rnr_retry; + u8 g_mylmc; + __be16 rlid; + u8 ackto; + u8 mgid_index; + u8 static_rate; + u8 hop_limit; + __be32 sl_tclass_flowlabel; + u8 rgid[16]; } __attribute__((packed)); struct mthca_qp_context { - u32 flags; - u32 tavor_sched_queue; /* Reserved on Arbel */ - u8 mtu_msgmax; - u8 rq_size_stride; /* Reserved on Tavor */ - u8 sq_size_stride; /* Reserved on Tavor */ - u8 rlkey_arbel_sched_queue; /* Reserved on Tavor */ - u32 usr_page; - u32 local_qpn; - u32 remote_qpn; - u32 reserved1[2]; + __be32 flags; + __be32 tavor_sched_queue; /* Reserved on Arbel */ + u8 mtu_msgmax; + u8 rq_size_stride; /* Reserved on Tavor */ + u8 sq_size_stride; /* Reserved on Tavor */ + u8 rlkey_arbel_sched_queue; /* Reserved on Tavor */ + __be32 usr_page; + __be32 local_qpn; + __be32 remote_qpn; + u32 reserved1[2]; struct mthca_qp_path pri_path; struct mthca_qp_path alt_path; - u32 rdd; - u32 pd; - u32 wqe_base; - u32 wqe_lkey; - u32 params1; - u32 reserved2; - u32 next_send_psn; - u32 cqn_snd; - u32 snd_wqe_base_l; /* Next send WQE on Tavor */ - u32 snd_db_index; /* (debugging only entries) */ - u32 last_acked_psn; - u32 ssn; - u32 params2; - u32 rnr_nextrecvpsn; - u32 ra_buff_indx; - u32 cqn_rcv; - u32 rcv_wqe_base_l; /* Next recv WQE on Tavor */ - u32 rcv_db_index; /* (debugging only entries) */ - u32 qkey; - u32 srqn; - u32 rmsn; - u16 rq_wqe_counter; /* reserved on Tavor */ - u16 sq_wqe_counter; /* reserved on Tavor */ - u32 reserved3[18]; + __be32 rdd; + __be32 pd; + __be32 wqe_base; + __be32 wqe_lkey; + __be32 params1; + __be32 reserved2; + __be32 next_send_psn; + __be32 cqn_snd; + __be32 snd_wqe_base_l; /* Next send WQE on Tavor */ + __be32 snd_db_index; /* (debugging only entries) */ + __be32 last_acked_psn; + __be32 ssn; + __be32 params2; + __be32 rnr_nextrecvpsn; + __be32 ra_buff_indx; + __be32 cqn_rcv; + __be32 rcv_wqe_base_l; /* Next recv WQE on Tavor */ + __be32 rcv_db_index; /* (debugging only entries) */ + __be32 qkey; + __be32 srqn; + __be32 rmsn; + __be16 rq_wqe_counter; /* reserved on Tavor */ + __be16 sq_wqe_counter; /* reserved on Tavor */ + u32 reserved3[18]; } __attribute__((packed)); struct mthca_qp_param { - u32 opt_param_mask; - u32 reserved1; + __be32 opt_param_mask; + u32 reserved1; struct mthca_qp_context context; - u32 reserved2[62]; + u32 reserved2[62]; } __attribute__((packed)); enum { @@ -170,80 +176,6 @@ enum { MTHCA_QP_OPTPAR_SCHED_QUEUE = 1 << 16 }; -enum { - MTHCA_NEXT_DBD = 1 << 7, - MTHCA_NEXT_FENCE = 1 << 6, - MTHCA_NEXT_CQ_UPDATE = 1 << 3, - MTHCA_NEXT_EVENT_GEN = 1 << 2, - MTHCA_NEXT_SOLICIT = 1 << 1, - - MTHCA_MLX_VL15 = 1 << 17, - MTHCA_MLX_SLR = 1 << 16 -}; - -enum { - MTHCA_INVAL_LKEY = 0x100 -}; - -struct mthca_next_seg { - u32 nda_op; /* [31:6] next WQE [4:0] next opcode */ - u32 ee_nds; /* [31:8] next EE [7] DBD [6] F [5:0] next WQE size */ - u32 flags; /* [3] CQ [2] Event [1] Solicit */ - u32 imm; /* immediate data */ -}; - -struct mthca_tavor_ud_seg { - u32 reserved1; - u32 lkey; - u64 av_addr; - u32 reserved2[4]; - u32 dqpn; - u32 qkey; - u32 reserved3[2]; -}; - -struct mthca_arbel_ud_seg { - u32 av[8]; - u32 dqpn; - u32 qkey; - u32 reserved[2]; -}; - -struct mthca_bind_seg { - u32 flags; /* [31] Atomic [30] rem write [29] rem read */ - u32 reserved; - u32 new_rkey; - u32 lkey; - u64 addr; - u64 length; -}; - -struct mthca_raddr_seg { - u64 raddr; - u32 rkey; - u32 reserved; -}; - -struct mthca_atomic_seg { - u64 swap_add; - u64 compare; -}; - -struct mthca_data_seg { - u32 byte_count; - u32 lkey; - u64 addr; -}; - -struct mthca_mlx_seg { - u32 nda_op; - u32 nds; - u32 flags; /* [17] VL15 [16] SLR [14:12] static rate - [11:8] SL [3] C [2] E */ - u16 rlid; - u16 vcrc; -}; - static const u8 mthca_opcode[] = { [IB_WR_SEND] = MTHCA_OPCODE_SEND, [IB_WR_SEND_WITH_IMM] = MTHCA_OPCODE_SEND_IMM, @@ -357,6 +289,9 @@ static const struct { [UD] = (IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_QKEY), + [UC] = (IB_QP_PKEY_INDEX | + IB_QP_PORT | + IB_QP_ACCESS_FLAGS), [RC] = (IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS), @@ -378,6 +313,9 @@ static const struct { [UD] = (IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_QKEY), + [UC] = (IB_QP_PKEY_INDEX | + IB_QP_PORT | + IB_QP_ACCESS_FLAGS), [RC] = (IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS), @@ -388,6 +326,11 @@ static const struct { [IB_QPS_RTR] = { .trans = MTHCA_TRANS_INIT2RTR, .req_param = { + [UC] = (IB_QP_AV | + IB_QP_PATH_MTU | + IB_QP_DEST_QPN | + IB_QP_RQ_PSN | + IB_QP_MAX_DEST_RD_ATOMIC), [RC] = (IB_QP_AV | IB_QP_PATH_MTU | IB_QP_DEST_QPN | @@ -398,6 +341,9 @@ static const struct { .opt_param = { [UD] = (IB_QP_PKEY_INDEX | IB_QP_QKEY), + [UC] = (IB_QP_ALT_PATH | + IB_QP_ACCESS_FLAGS | + IB_QP_PKEY_INDEX), [RC] = (IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX), @@ -413,6 +359,8 @@ static const struct { .trans = MTHCA_TRANS_RTR2RTS, .req_param = { [UD] = IB_QP_SQ_PSN, + [UC] = (IB_QP_SQ_PSN | + IB_QP_MAX_QP_RD_ATOMIC), [RC] = (IB_QP_TIMEOUT | IB_QP_RETRY_CNT | IB_QP_RNR_RETRY | @@ -423,6 +371,11 @@ static const struct { .opt_param = { [UD] = (IB_QP_CUR_STATE | IB_QP_QKEY), + [UC] = (IB_QP_CUR_STATE | + IB_QP_ALT_PATH | + IB_QP_ACCESS_FLAGS | + IB_QP_PKEY_INDEX | + IB_QP_PATH_MIG_STATE), [RC] = (IB_QP_CUR_STATE | IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS | @@ -442,6 +395,9 @@ static const struct { .opt_param = { [UD] = (IB_QP_CUR_STATE | IB_QP_QKEY), + [UC] = (IB_QP_ACCESS_FLAGS | + IB_QP_ALT_PATH | + IB_QP_PATH_MIG_STATE), [RC] = (IB_QP_ACCESS_FLAGS | IB_QP_ALT_PATH | IB_QP_PATH_MIG_STATE | @@ -462,6 +418,10 @@ static const struct { .opt_param = { [UD] = (IB_QP_CUR_STATE | IB_QP_QKEY), + [UC] = (IB_QP_CUR_STATE | + IB_QP_ALT_PATH | + IB_QP_ACCESS_FLAGS | + IB_QP_PATH_MIG_STATE), [RC] = (IB_QP_CUR_STATE | IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS | @@ -476,6 +436,14 @@ static const struct { .opt_param = { [UD] = (IB_QP_PKEY_INDEX | IB_QP_QKEY), + [UC] = (IB_QP_AV | + IB_QP_MAX_QP_RD_ATOMIC | + IB_QP_MAX_DEST_RD_ATOMIC | + IB_QP_CUR_STATE | + IB_QP_ALT_PATH | + IB_QP_ACCESS_FLAGS | + IB_QP_PKEY_INDEX | + IB_QP_PATH_MIG_STATE), [RC] = (IB_QP_AV | IB_QP_TIMEOUT | IB_QP_RETRY_CNT | @@ -501,6 +469,7 @@ static const struct { .opt_param = { [UD] = (IB_QP_CUR_STATE | IB_QP_QKEY), + [UC] = (IB_QP_CUR_STATE), [RC] = (IB_QP_CUR_STATE | IB_QP_MIN_RNR_TIMER), [MLX] = (IB_QP_CUR_STATE | @@ -533,12 +502,11 @@ static void init_port(struct mthca_dev *dev, int port) memset(¶m, 0, sizeof param); - param.enable_1x = 1; - param.enable_4x = 1; - param.vl_cap = dev->limits.vl_cap; - param.mtu_cap = dev->limits.mtu_cap; - param.gid_cap = dev->limits.gid_table_len; - param.pkey_cap = dev->limits.pkey_table_len; + param.port_width = dev->limits.port_width_cap; + param.vl_cap = dev->limits.vl_cap; + param.mtu_cap = dev->limits.mtu_cap; + param.gid_cap = dev->limits.gid_table_len; + param.pkey_cap = dev->limits.pkey_table_len; err = mthca_INIT_IB(dev, ¶m, port, &status); if (err) @@ -552,7 +520,7 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) struct mthca_dev *dev = to_mdev(ibqp->device); struct mthca_qp *qp = to_mqp(ibqp); enum ib_qp_state cur_state, new_state; - void *mailbox = NULL; + struct mthca_mailbox *mailbox; struct mthca_qp_param *qp_param; struct mthca_qp_context *qp_context; u32 req_param, opt_param; @@ -609,10 +577,10 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) return -EINVAL; } - mailbox = kmalloc(sizeof (*qp_param) + MTHCA_CMD_MAILBOX_EXTRA, GFP_KERNEL); - if (!mailbox) - return -ENOMEM; - qp_param = MAILBOX_ALIGN(mailbox); + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + qp_param = mailbox->buf; qp_context = &qp_param->context; memset(qp_param, 0, sizeof *qp_param); @@ -644,15 +612,22 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) qp_context->mtu_msgmax = (attr->path_mtu << 5) | 31; if (mthca_is_memfree(dev)) { - qp_context->rq_size_stride = - ((ffs(qp->rq.max) - 1) << 3) | (qp->rq.wqe_shift - 4); - qp_context->sq_size_stride = - ((ffs(qp->sq.max) - 1) << 3) | (qp->sq.wqe_shift - 4); + if (qp->rq.max) + qp_context->rq_size_stride = long_log2(qp->rq.max) << 3; + qp_context->rq_size_stride |= qp->rq.wqe_shift - 4; + + if (qp->sq.max) + qp_context->sq_size_stride = long_log2(qp->sq.max) << 3; + qp_context->sq_size_stride |= qp->sq.wqe_shift - 4; } /* leave arbel_sched_queue as 0 */ - qp_context->usr_page = cpu_to_be32(dev->driver_uar.index); + if (qp->ibqp.uobject) + qp_context->usr_page = + cpu_to_be32(to_mucontext(qp->ibqp.uobject->context)->uar.index); + else + qp_context->usr_page = cpu_to_be32(dev->driver_uar.index); qp_context->local_qpn = cpu_to_be32(qp->qpn); if (attr_mask & IB_QP_DEST_QPN) { qp_context->remote_qpn = cpu_to_be32(attr->dest_qp_num); @@ -683,7 +658,7 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) if (attr_mask & IB_QP_AV) { qp_context->pri_path.g_mylmc = attr->ah_attr.src_path_bits & 0x7f; qp_context->pri_path.rlid = cpu_to_be16(attr->ah_attr.dlid); - qp_context->pri_path.static_rate = (!!attr->ah_attr.static_rate) << 3; + qp_context->pri_path.static_rate = !!attr->ah_attr.static_rate; if (attr->ah_attr.ah_flags & IB_AH_GRH) { qp_context->pri_path.g_mylmc |= 1 << 7; qp_context->pri_path.mgid_index = attr->ah_attr.grh.sgid_index; @@ -724,9 +699,9 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RETRY_COUNT); } - if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) { - qp_context->params1 |= cpu_to_be32(min(attr->max_dest_rd_atomic ? - ffs(attr->max_dest_rd_atomic) - 1 : 0, + if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) { + qp_context->params1 |= cpu_to_be32(min(attr->max_rd_atomic ? + ffs(attr->max_rd_atomic) - 1 : 0, 7) << 21); qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_SRA_MAX); } @@ -764,10 +739,10 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) qp->atomic_rd_en = attr->qp_access_flags; } - if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) { + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) { u8 rra_max; - if (qp->resp_depth && !attr->max_rd_atomic) { + if (qp->resp_depth && !attr->max_dest_rd_atomic) { /* * Lowering our responder resources to zero. * Turn off RDMA/atomics as responder. @@ -778,7 +753,7 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) MTHCA_QP_OPTPAR_RAE); } - if (!qp->resp_depth && attr->max_rd_atomic) { + if (!qp->resp_depth && attr->max_dest_rd_atomic) { /* * Increasing our responder resources from * zero. Turn on RDMA/atomics as appropriate. @@ -799,7 +774,7 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) } for (rra_max = 0; - 1 << rra_max < attr->max_rd_atomic && + 1 << rra_max < attr->max_dest_rd_atomic && rra_max < dev->qp_table.rdb_shift; ++rra_max) ; /* nothing */ @@ -807,11 +782,14 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) qp_context->params2 |= cpu_to_be32(rra_max << 21); qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RRA_MAX); - qp->resp_depth = attr->max_rd_atomic; + qp->resp_depth = attr->max_dest_rd_atomic; } qp_context->params2 |= cpu_to_be32(MTHCA_QP_BIT_RSC); + if (ibqp->srq) + qp_context->params2 |= cpu_to_be32(MTHCA_QP_BIT_RIC); + if (attr_mask & IB_QP_MIN_RNR_TIMER) { qp_context->rnr_nextrecvpsn |= cpu_to_be32(attr->min_rnr_timer << 24); qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RNR_TIMEOUT); @@ -834,8 +812,12 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_Q_KEY); } + if (ibqp->srq) + qp_context->srqn = cpu_to_be32(1 << 24 | + to_msrq(ibqp->srq)->srqn); + err = mthca_MODIFY_QP(dev, state_table[cur_state][new_state].trans, - qp->qpn, 0, qp_param, 0, &status); + qp->qpn, 0, mailbox, 0, &status); if (status) { mthca_warn(dev, "modify QP %d returned status %02x.\n", state_table[cur_state][new_state].trans, status); @@ -845,7 +827,7 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) if (!err) qp->state = new_state; - kfree(mailbox); + mthca_free_mailbox(dev, mailbox); if (is_sqp(dev, qp)) store_attrs(to_msqp(qp), attr, attr_mask); @@ -881,10 +863,6 @@ static int mthca_alloc_wqe_buf(struct mthca_dev *dev, struct mthca_qp *qp) { int size; - int i; - int npages, shift; - dma_addr_t t; - u64 *dma_list = NULL; int err = -ENOMEM; size = sizeof (struct mthca_next_seg) + @@ -917,6 +895,15 @@ static int mthca_alloc_wqe_buf(struct mthca_dev *dev, qp->send_wqe_offset = ALIGN(qp->rq.max << qp->rq.wqe_shift, 1 << qp->sq.wqe_shift); + + /* + * If this is a userspace QP, we don't actually have to + * allocate anything. All we need is to calculate the WQE + * sizes and the send_wqe_offset, so we're done now. + */ + if (pd->ibpd.uobject) + return 0; + size = PAGE_ALIGN(qp->send_wqe_offset + (qp->sq.max << qp->sq.wqe_shift)); @@ -925,100 +912,31 @@ static int mthca_alloc_wqe_buf(struct mthca_dev *dev, if (!qp->wrid) goto err_out; - if (size <= MTHCA_MAX_DIRECT_QP_SIZE) { - qp->is_direct = 1; - npages = 1; - shift = get_order(size) + PAGE_SHIFT; - - if (0) - mthca_dbg(dev, "Creating direct QP of size %d (shift %d)\n", - size, shift); - - qp->queue.direct.buf = pci_alloc_consistent(dev->pdev, size, &t); - if (!qp->queue.direct.buf) - goto err_out; - - pci_unmap_addr_set(&qp->queue.direct, mapping, t); - - memset(qp->queue.direct.buf, 0, size); - - while (t & ((1 << shift) - 1)) { - --shift; - npages *= 2; - } - - dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL); - if (!dma_list) - goto err_out_free; - - for (i = 0; i < npages; ++i) - dma_list[i] = t + i * (1 << shift); - } else { - qp->is_direct = 0; - npages = size / PAGE_SIZE; - shift = PAGE_SHIFT; - - if (0) - mthca_dbg(dev, "Creating indirect QP with %d pages\n", npages); - - dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL); - if (!dma_list) - goto err_out; - - qp->queue.page_list = kmalloc(npages * - sizeof *qp->queue.page_list, - GFP_KERNEL); - if (!qp->queue.page_list) - goto err_out; - - for (i = 0; i < npages; ++i) { - qp->queue.page_list[i].buf = - pci_alloc_consistent(dev->pdev, PAGE_SIZE, &t); - if (!qp->queue.page_list[i].buf) - goto err_out_free; - - memset(qp->queue.page_list[i].buf, 0, PAGE_SIZE); - - pci_unmap_addr_set(&qp->queue.page_list[i], mapping, t); - dma_list[i] = t; - } - } - - err = mthca_mr_alloc_phys(dev, pd->pd_num, dma_list, shift, - npages, 0, size, - MTHCA_MPT_FLAG_LOCAL_READ, - &qp->mr); + err = mthca_buf_alloc(dev, size, MTHCA_MAX_DIRECT_QP_SIZE, + &qp->queue, &qp->is_direct, pd, 0, &qp->mr); if (err) - goto err_out_free; + goto err_out; - kfree(dma_list); return 0; - err_out_free: - if (qp->is_direct) { - pci_free_consistent(dev->pdev, size, - qp->queue.direct.buf, - pci_unmap_addr(&qp->queue.direct, mapping)); - } else - for (i = 0; i < npages; ++i) { - if (qp->queue.page_list[i].buf) - pci_free_consistent(dev->pdev, PAGE_SIZE, - qp->queue.page_list[i].buf, - pci_unmap_addr(&qp->queue.page_list[i], - mapping)); - - } - - err_out: +err_out: kfree(qp->wrid); - kfree(dma_list); return err; } -static int mthca_alloc_memfree(struct mthca_dev *dev, +static void mthca_free_wqe_buf(struct mthca_dev *dev, struct mthca_qp *qp) { - int ret = 0; + mthca_buf_free(dev, PAGE_ALIGN(qp->send_wqe_offset + + (qp->sq.max << qp->sq.wqe_shift)), + &qp->queue, qp->is_direct, &qp->mr); + kfree(qp->wrid); +} + +static int mthca_map_memfree(struct mthca_dev *dev, + struct mthca_qp *qp) +{ + int ret; if (mthca_is_memfree(dev)) { ret = mthca_table_get(dev, dev->qp_table.qp_table, qp->qpn); @@ -1029,35 +947,15 @@ static int mthca_alloc_memfree(struct mthca_dev *dev, if (ret) goto err_qpc; - ret = mthca_table_get(dev, dev->qp_table.rdb_table, - qp->qpn << dev->qp_table.rdb_shift); - if (ret) - goto err_eqpc; + ret = mthca_table_get(dev, dev->qp_table.rdb_table, + qp->qpn << dev->qp_table.rdb_shift); + if (ret) + goto err_eqpc; - qp->rq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_RQ, - qp->qpn, &qp->rq.db); - if (qp->rq.db_index < 0) { - ret = -ENOMEM; - goto err_rdb; - } - - qp->sq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_SQ, - qp->qpn, &qp->sq.db); - if (qp->sq.db_index < 0) { - ret = -ENOMEM; - goto err_rq_db; - } } return 0; -err_rq_db: - mthca_free_db(dev, MTHCA_DB_TYPE_RQ, qp->rq.db_index); - -err_rdb: - mthca_table_put(dev, dev->qp_table.rdb_table, - qp->qpn << dev->qp_table.rdb_shift); - err_eqpc: mthca_table_put(dev, dev->qp_table.eqp_table, qp->qpn); @@ -1067,16 +965,41 @@ err_qpc: return ret; } +static void mthca_unmap_memfree(struct mthca_dev *dev, + struct mthca_qp *qp) +{ + mthca_table_put(dev, dev->qp_table.rdb_table, + qp->qpn << dev->qp_table.rdb_shift); + mthca_table_put(dev, dev->qp_table.eqp_table, qp->qpn); + mthca_table_put(dev, dev->qp_table.qp_table, qp->qpn); +} + +static int mthca_alloc_memfree(struct mthca_dev *dev, + struct mthca_qp *qp) +{ + int ret = 0; + + if (mthca_is_memfree(dev)) { + qp->rq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_RQ, + qp->qpn, &qp->rq.db); + if (qp->rq.db_index < 0) + return ret; + + qp->sq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_SQ, + qp->qpn, &qp->sq.db); + if (qp->sq.db_index < 0) + mthca_free_db(dev, MTHCA_DB_TYPE_RQ, qp->rq.db_index); + } + + return ret; +} + static void mthca_free_memfree(struct mthca_dev *dev, struct mthca_qp *qp) { if (mthca_is_memfree(dev)) { mthca_free_db(dev, MTHCA_DB_TYPE_SQ, qp->sq.db_index); mthca_free_db(dev, MTHCA_DB_TYPE_RQ, qp->rq.db_index); - mthca_table_put(dev, dev->qp_table.rdb_table, - qp->qpn << dev->qp_table.rdb_shift); - mthca_table_put(dev, dev->qp_table.eqp_table, qp->qpn); - mthca_table_put(dev, dev->qp_table.qp_table, qp->qpn); } } @@ -1108,13 +1031,28 @@ static int mthca_alloc_qp_common(struct mthca_dev *dev, mthca_wq_init(&qp->sq); mthca_wq_init(&qp->rq); - ret = mthca_alloc_memfree(dev, qp); + ret = mthca_map_memfree(dev, qp); if (ret) return ret; ret = mthca_alloc_wqe_buf(dev, pd, qp); if (ret) { - mthca_free_memfree(dev, qp); + mthca_unmap_memfree(dev, qp); + return ret; + } + + /* + * If this is a userspace QP, we're done now. The doorbells + * will be allocated and buffers will be initialized in + * userspace. + */ + if (pd->ibpd.uobject) + return 0; + + ret = mthca_alloc_memfree(dev, qp); + if (ret) { + mthca_free_wqe_buf(dev, qp); + mthca_unmap_memfree(dev, qp); return ret; } @@ -1147,22 +1085,39 @@ static int mthca_alloc_qp_common(struct mthca_dev *dev, return 0; } -static void mthca_align_qp_size(struct mthca_dev *dev, struct mthca_qp *qp) +static int mthca_set_qp_size(struct mthca_dev *dev, struct ib_qp_cap *cap, + struct mthca_qp *qp) { - int i; - - if (!mthca_is_memfree(dev)) - return; + /* Sanity check QP size before proceeding */ + if (cap->max_send_wr > 65536 || cap->max_recv_wr > 65536 || + cap->max_send_sge > 64 || cap->max_recv_sge > 64) + return -EINVAL; - for (i = 0; 1 << i < qp->rq.max; ++i) - ; /* nothing */ + if (mthca_is_memfree(dev)) { + qp->rq.max = cap->max_recv_wr ? + roundup_pow_of_two(cap->max_recv_wr) : 0; + qp->sq.max = cap->max_send_wr ? + roundup_pow_of_two(cap->max_send_wr) : 0; + } else { + qp->rq.max = cap->max_recv_wr; + qp->sq.max = cap->max_send_wr; + } - qp->rq.max = 1 << i; + qp->rq.max_gs = cap->max_recv_sge; + qp->sq.max_gs = max_t(int, cap->max_send_sge, + ALIGN(cap->max_inline_data + MTHCA_INLINE_HEADER_SIZE, + MTHCA_INLINE_CHUNK_SIZE) / + sizeof (struct mthca_data_seg)); - for (i = 0; 1 << i < qp->sq.max; ++i) - ; /* nothing */ + /* + * For MLX transport we need 2 extra S/G entries: + * one for the header and one for the checksum at the end + */ + if ((qp->transport == MLX && qp->sq.max_gs + 2 > dev->limits.max_sg) || + qp->sq.max_gs > dev->limits.max_sg || qp->rq.max_gs > dev->limits.max_sg) + return -EINVAL; - qp->sq.max = 1 << i; + return 0; } int mthca_alloc_qp(struct mthca_dev *dev, @@ -1171,11 +1126,14 @@ int mthca_alloc_qp(struct mthca_dev *dev, struct mthca_cq *recv_cq, enum ib_qp_type type, enum ib_sig_type send_policy, + struct ib_qp_cap *cap, struct mthca_qp *qp) { int err; - mthca_align_qp_size(dev, qp); + err = mthca_set_qp_size(dev, cap, qp); + if (err) + return err; switch (type) { case IB_QPT_RC: qp->transport = RC; break; @@ -1208,14 +1166,17 @@ int mthca_alloc_sqp(struct mthca_dev *dev, struct mthca_cq *send_cq, struct mthca_cq *recv_cq, enum ib_sig_type send_policy, + struct ib_qp_cap *cap, int qpn, int port, struct mthca_sqp *sqp) { - int err = 0; u32 mqpn = qpn * 2 + dev->qp_table.sqp_start + port - 1; + int err; - mthca_align_qp_size(dev, &sqp->qp); + err = mthca_set_qp_size(dev, cap, &sqp->qp); + if (err) + return err; sqp->header_buf_size = sqp->qp.sq.max * MTHCA_UD_HEADER_SIZE; sqp->header_buf = dma_alloc_coherent(&dev->pdev->dev, sqp->header_buf_size, @@ -1274,8 +1235,6 @@ void mthca_free_qp(struct mthca_dev *dev, struct mthca_qp *qp) { u8 status; - int size; - int i; struct mthca_cq *send_cq; struct mthca_cq *recv_cq; @@ -1305,31 +1264,23 @@ void mthca_free_qp(struct mthca_dev *dev, if (qp->state != IB_QPS_RESET) mthca_MODIFY_QP(dev, MTHCA_TRANS_ANY2RST, qp->qpn, 0, NULL, 0, &status); - mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq)->cqn, qp->qpn); - if (qp->ibqp.send_cq != qp->ibqp.recv_cq) - mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq)->cqn, qp->qpn); - - mthca_free_mr(dev, &qp->mr); - - size = PAGE_ALIGN(qp->send_wqe_offset + - (qp->sq.max << qp->sq.wqe_shift)); + /* + * If this is a userspace QP, the buffers, MR, CQs and so on + * will be cleaned up in userspace, so all we have to do is + * unref the mem-free tables and free the QPN in our table. + */ + if (!qp->ibqp.uobject) { + mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq)->cqn, qp->qpn, + qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL); + if (qp->ibqp.send_cq != qp->ibqp.recv_cq) + mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq)->cqn, qp->qpn, + qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL); - if (qp->is_direct) { - pci_free_consistent(dev->pdev, size, - qp->queue.direct.buf, - pci_unmap_addr(&qp->queue.direct, mapping)); - } else { - for (i = 0; i < size / PAGE_SIZE; ++i) { - pci_free_consistent(dev->pdev, PAGE_SIZE, - qp->queue.page_list[i].buf, - pci_unmap_addr(&qp->queue.page_list[i], - mapping)); - } + mthca_free_memfree(dev, qp); + mthca_free_wqe_buf(dev, qp); } - kfree(qp->wrid); - - mthca_free_memfree(dev, qp); + mthca_unmap_memfree(dev, qp); if (is_sqp(dev, qp)) { atomic_dec(&(to_mpd(qp->ibqp.pd)->sqp_count)); @@ -1349,6 +1300,7 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp, { int header_size; int err; + u16 pkey; ib_ud_header_init(256, /* assume a MAD */ sqp->ud_header.grh_present, @@ -1359,8 +1311,8 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp, return err; mlx->flags &= ~cpu_to_be32(MTHCA_NEXT_SOLICIT | 1); mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MTHCA_MLX_VL15 : 0) | - (sqp->ud_header.lrh.destination_lid == 0xffff ? - MTHCA_MLX_SLR : 0) | + (sqp->ud_header.lrh.destination_lid == + IB_LID_PERMISSIVE ? MTHCA_MLX_SLR : 0) | (sqp->ud_header.lrh.service_level << 8)); mlx->rlid = sqp->ud_header.lrh.destination_lid; mlx->vcrc = 0; @@ -1380,18 +1332,16 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp, } sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : 0; - if (sqp->ud_header.lrh.destination_lid == 0xffff) - sqp->ud_header.lrh.source_lid = 0xffff; + if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE) + sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE; sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED); if (!sqp->qp.ibqp.qp_num) ib_get_cached_pkey(&dev->ib_dev, sqp->port, - sqp->pkey_index, - &sqp->ud_header.bth.pkey); + sqp->pkey_index, &pkey); else ib_get_cached_pkey(&dev->ib_dev, sqp->port, - wr->wr.ud.pkey_index, - &sqp->ud_header.bth.pkey); - cpu_to_be16s(&sqp->ud_header.bth.pkey); + wr->wr.ud.pkey_index, &pkey); + sqp->ud_header.bth.pkey = cpu_to_be16(pkey); sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn); sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1)); sqp->ud_header.deth.qkey = cpu_to_be32(wr->wr.ud.remote_qkey & 0x80000000 ? @@ -1529,6 +1479,26 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, break; + case UC: + switch (wr->opcode) { + case IB_WR_RDMA_WRITE: + case IB_WR_RDMA_WRITE_WITH_IMM: + ((struct mthca_raddr_seg *) wqe)->raddr = + cpu_to_be64(wr->wr.rdma.remote_addr); + ((struct mthca_raddr_seg *) wqe)->rkey = + cpu_to_be32(wr->wr.rdma.rkey); + ((struct mthca_raddr_seg *) wqe)->reserved = 0; + wqe += sizeof (struct mthca_raddr_seg); + size += sizeof (struct mthca_raddr_seg) / 16; + break; + + default: + /* No extra segments required for sends */ + break; + } + + break; + case UD: ((struct mthca_tavor_ud_seg *) wqe)->lkey = cpu_to_be32(to_mah(wr->wr.ud.ah)->key); @@ -1614,7 +1584,7 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, out: if (likely(nreq)) { - u32 doorbell[2]; + __be32 doorbell[2]; doorbell[0] = cpu_to_be32(((qp->sq.next_ind << qp->sq.wqe_shift) + qp->send_wqe_offset) | f0 | op0); @@ -1715,7 +1685,7 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, out: if (likely(nreq)) { - u32 doorbell[2]; + __be32 doorbell[2]; doorbell[0] = cpu_to_be32((qp->rq.next_ind << qp->rq.wqe_shift) | size0); doorbell[1] = cpu_to_be32((qp->qpn << 8) | nreq); @@ -1814,9 +1784,29 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, sizeof (struct mthca_atomic_seg); break; + case IB_WR_RDMA_READ: + case IB_WR_RDMA_WRITE: + case IB_WR_RDMA_WRITE_WITH_IMM: + ((struct mthca_raddr_seg *) wqe)->raddr = + cpu_to_be64(wr->wr.rdma.remote_addr); + ((struct mthca_raddr_seg *) wqe)->rkey = + cpu_to_be32(wr->wr.rdma.rkey); + ((struct mthca_raddr_seg *) wqe)->reserved = 0; + wqe += sizeof (struct mthca_raddr_seg); + size += sizeof (struct mthca_raddr_seg) / 16; + break; + + default: + /* No extra segments required for sends */ + break; + } + + break; + + case UC: + switch (wr->opcode) { case IB_WR_RDMA_WRITE: case IB_WR_RDMA_WRITE_WITH_IMM: - case IB_WR_RDMA_READ: ((struct mthca_raddr_seg *) wqe)->raddr = cpu_to_be64(wr->wr.rdma.remote_addr); ((struct mthca_raddr_seg *) wqe)->rkey = @@ -1916,7 +1906,7 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, out: if (likely(nreq)) { - u32 doorbell[2]; + __be32 doorbell[2]; doorbell[0] = cpu_to_be32((nreq << 24) | ((qp->sq.head & 0xffff) << 8) | @@ -2026,19 +2016,25 @@ out: } int mthca_free_err_wqe(struct mthca_dev *dev, struct mthca_qp *qp, int is_send, - int index, int *dbd, u32 *new_wqe) + int index, int *dbd, __be32 *new_wqe) { struct mthca_next_seg *next; + /* + * For SRQs, all WQEs generate a CQE, so we're always at the + * end of the doorbell chain. + */ + if (qp->ibqp.srq) { + *new_wqe = 0; + return 0; + } + if (is_send) next = get_send_wqe(qp, index); else next = get_recv_wqe(qp, index); - if (mthca_is_memfree(dev)) - *dbd = 1; - else - *dbd = !!(next->ee_nds & cpu_to_be32(MTHCA_NEXT_DBD)); + *dbd = !!(next->ee_nds & cpu_to_be32(MTHCA_NEXT_DBD)); if (next->ee_nds & cpu_to_be32(0x3f)) *new_wqe = (next->nda_op & cpu_to_be32(~0x3f)) | (next->ee_nds & cpu_to_be32(0x3f)); diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c new file mode 100644 index 000000000000..75cd2d84ef12 --- /dev/null +++ b/drivers/infiniband/hw/mthca/mthca_srq.c @@ -0,0 +1,591 @@ +/* + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: mthca_srq.c 3047 2005-08-10 03:59:35Z roland $ + */ + +#include "mthca_dev.h" +#include "mthca_cmd.h" +#include "mthca_memfree.h" +#include "mthca_wqe.h" + +enum { + MTHCA_MAX_DIRECT_SRQ_SIZE = 4 * PAGE_SIZE +}; + +struct mthca_tavor_srq_context { + __be64 wqe_base_ds; /* low 6 bits is descriptor size */ + __be32 state_pd; + __be32 lkey; + __be32 uar; + __be32 wqe_cnt; + u32 reserved[2]; +}; + +struct mthca_arbel_srq_context { + __be32 state_logsize_srqn; + __be32 lkey; + __be32 db_index; + __be32 logstride_usrpage; + __be64 wqe_base; + __be32 eq_pd; + __be16 limit_watermark; + __be16 wqe_cnt; + u16 reserved1; + __be16 wqe_counter; + u32 reserved2[3]; +}; + +static void *get_wqe(struct mthca_srq *srq, int n) +{ + if (srq->is_direct) + return srq->queue.direct.buf + (n << srq->wqe_shift); + else + return srq->queue.page_list[(n << srq->wqe_shift) >> PAGE_SHIFT].buf + + ((n << srq->wqe_shift) & (PAGE_SIZE - 1)); +} + +/* + * Return a pointer to the location within a WQE that we're using as a + * link when the WQE is in the free list. We use an offset of 4 + * because in the Tavor case, posting a WQE may overwrite the first + * four bytes of the previous WQE. The offset avoids corrupting our + * free list if the WQE has already completed and been put on the free + * list when we post the next WQE. + */ +static inline int *wqe_to_link(void *wqe) +{ + return (int *) (wqe + 4); +} + +static void mthca_tavor_init_srq_context(struct mthca_dev *dev, + struct mthca_pd *pd, + struct mthca_srq *srq, + struct mthca_tavor_srq_context *context) +{ + memset(context, 0, sizeof *context); + + context->wqe_base_ds = cpu_to_be64(1 << (srq->wqe_shift - 4)); + context->state_pd = cpu_to_be32(pd->pd_num); + context->lkey = cpu_to_be32(srq->mr.ibmr.lkey); + + if (pd->ibpd.uobject) + context->uar = + cpu_to_be32(to_mucontext(pd->ibpd.uobject->context)->uar.index); + else + context->uar = cpu_to_be32(dev->driver_uar.index); +} + +static void mthca_arbel_init_srq_context(struct mthca_dev *dev, + struct mthca_pd *pd, + struct mthca_srq *srq, + struct mthca_arbel_srq_context *context) +{ + int logsize; + + memset(context, 0, sizeof *context); + + logsize = long_log2(srq->max) + srq->wqe_shift; + context->state_logsize_srqn = cpu_to_be32(logsize << 24 | srq->srqn); + context->lkey = cpu_to_be32(srq->mr.ibmr.lkey); + context->db_index = cpu_to_be32(srq->db_index); + context->logstride_usrpage = cpu_to_be32((srq->wqe_shift - 4) << 29); + if (pd->ibpd.uobject) + context->logstride_usrpage |= + cpu_to_be32(to_mucontext(pd->ibpd.uobject->context)->uar.index); + else + context->logstride_usrpage |= cpu_to_be32(dev->driver_uar.index); + context->eq_pd = cpu_to_be32(MTHCA_EQ_ASYNC << 24 | pd->pd_num); +} + +static void mthca_free_srq_buf(struct mthca_dev *dev, struct mthca_srq *srq) +{ + mthca_buf_free(dev, srq->max << srq->wqe_shift, &srq->queue, + srq->is_direct, &srq->mr); + kfree(srq->wrid); +} + +static int mthca_alloc_srq_buf(struct mthca_dev *dev, struct mthca_pd *pd, + struct mthca_srq *srq) +{ + struct mthca_data_seg *scatter; + void *wqe; + int err; + int i; + + if (pd->ibpd.uobject) + return 0; + + srq->wrid = kmalloc(srq->max * sizeof (u64), GFP_KERNEL); + if (!srq->wrid) + return -ENOMEM; + + err = mthca_buf_alloc(dev, srq->max << srq->wqe_shift, + MTHCA_MAX_DIRECT_SRQ_SIZE, + &srq->queue, &srq->is_direct, pd, 1, &srq->mr); + if (err) { + kfree(srq->wrid); + return err; + } + + /* + * Now initialize the SRQ buffer so that all of the WQEs are + * linked into the list of free WQEs. In addition, set the + * scatter list L_Keys to the sentry value of 0x100. + */ + for (i = 0; i < srq->max; ++i) { + wqe = get_wqe(srq, i); + + *wqe_to_link(wqe) = i < srq->max - 1 ? i + 1 : -1; + + for (scatter = wqe + sizeof (struct mthca_next_seg); + (void *) scatter < wqe + (1 << srq->wqe_shift); + ++scatter) + scatter->lkey = cpu_to_be32(MTHCA_INVAL_LKEY); + } + + return 0; +} + +int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd, + struct ib_srq_attr *attr, struct mthca_srq *srq) +{ + struct mthca_mailbox *mailbox; + u8 status; + int ds; + int err; + + /* Sanity check SRQ size before proceeding */ + if (attr->max_wr > 16 << 20 || attr->max_sge > 64) + return -EINVAL; + + srq->max = attr->max_wr; + srq->max_gs = attr->max_sge; + srq->last = NULL; + srq->counter = 0; + + if (mthca_is_memfree(dev)) + srq->max = roundup_pow_of_two(srq->max + 1); + + ds = min(64UL, + roundup_pow_of_two(sizeof (struct mthca_next_seg) + + srq->max_gs * sizeof (struct mthca_data_seg))); + srq->wqe_shift = long_log2(ds); + + srq->srqn = mthca_alloc(&dev->srq_table.alloc); + if (srq->srqn == -1) + return -ENOMEM; + + if (mthca_is_memfree(dev)) { + err = mthca_table_get(dev, dev->srq_table.table, srq->srqn); + if (err) + goto err_out; + + if (!pd->ibpd.uobject) { + srq->db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_SRQ, + srq->srqn, &srq->db); + if (srq->db_index < 0) { + err = -ENOMEM; + goto err_out_icm; + } + } + } + + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) { + err = PTR_ERR(mailbox); + goto err_out_db; + } + + err = mthca_alloc_srq_buf(dev, pd, srq); + if (err) + goto err_out_mailbox; + + spin_lock_init(&srq->lock); + atomic_set(&srq->refcount, 1); + init_waitqueue_head(&srq->wait); + + if (mthca_is_memfree(dev)) + mthca_arbel_init_srq_context(dev, pd, srq, mailbox->buf); + else + mthca_tavor_init_srq_context(dev, pd, srq, mailbox->buf); + + err = mthca_SW2HW_SRQ(dev, mailbox, srq->srqn, &status); + + if (err) { + mthca_warn(dev, "SW2HW_SRQ failed (%d)\n", err); + goto err_out_free_buf; + } + if (status) { + mthca_warn(dev, "SW2HW_SRQ returned status 0x%02x\n", + status); + err = -EINVAL; + goto err_out_free_buf; + } + + spin_lock_irq(&dev->srq_table.lock); + if (mthca_array_set(&dev->srq_table.srq, + srq->srqn & (dev->limits.num_srqs - 1), + srq)) { + spin_unlock_irq(&dev->srq_table.lock); + goto err_out_free_srq; + } + spin_unlock_irq(&dev->srq_table.lock); + + mthca_free_mailbox(dev, mailbox); + + srq->first_free = 0; + srq->last_free = srq->max - 1; + + return 0; + +err_out_free_srq: + err = mthca_HW2SW_SRQ(dev, mailbox, srq->srqn, &status); + if (err) + mthca_warn(dev, "HW2SW_SRQ failed (%d)\n", err); + else if (status) + mthca_warn(dev, "HW2SW_SRQ returned status 0x%02x\n", status); + +err_out_free_buf: + if (!pd->ibpd.uobject) + mthca_free_srq_buf(dev, srq); + +err_out_mailbox: + mthca_free_mailbox(dev, mailbox); + +err_out_db: + if (!pd->ibpd.uobject && mthca_is_memfree(dev)) + mthca_free_db(dev, MTHCA_DB_TYPE_SRQ, srq->db_index); + +err_out_icm: + mthca_table_put(dev, dev->srq_table.table, srq->srqn); + +err_out: + mthca_free(&dev->srq_table.alloc, srq->srqn); + + return err; +} + +void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq) +{ + struct mthca_mailbox *mailbox; + int err; + u8 status; + + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) { + mthca_warn(dev, "No memory for mailbox to free SRQ.\n"); + return; + } + + err = mthca_HW2SW_SRQ(dev, mailbox, srq->srqn, &status); + if (err) + mthca_warn(dev, "HW2SW_SRQ failed (%d)\n", err); + else if (status) + mthca_warn(dev, "HW2SW_SRQ returned status 0x%02x\n", status); + + spin_lock_irq(&dev->srq_table.lock); + mthca_array_clear(&dev->srq_table.srq, + srq->srqn & (dev->limits.num_srqs - 1)); + spin_unlock_irq(&dev->srq_table.lock); + + atomic_dec(&srq->refcount); + wait_event(srq->wait, !atomic_read(&srq->refcount)); + + if (!srq->ibsrq.uobject) { + mthca_free_srq_buf(dev, srq); + if (mthca_is_memfree(dev)) + mthca_free_db(dev, MTHCA_DB_TYPE_SRQ, srq->db_index); + } + + mthca_table_put(dev, dev->srq_table.table, srq->srqn); + mthca_free(&dev->srq_table.alloc, srq->srqn); + mthca_free_mailbox(dev, mailbox); +} + +void mthca_srq_event(struct mthca_dev *dev, u32 srqn, + enum ib_event_type event_type) +{ + struct mthca_srq *srq; + struct ib_event event; + + spin_lock(&dev->srq_table.lock); + srq = mthca_array_get(&dev->srq_table.srq, srqn & (dev->limits.num_srqs - 1)); + if (srq) + atomic_inc(&srq->refcount); + spin_unlock(&dev->srq_table.lock); + + if (!srq) { + mthca_warn(dev, "Async event for bogus SRQ %08x\n", srqn); + return; + } + + if (!srq->ibsrq.event_handler) + goto out; + + event.device = &dev->ib_dev; + event.event = event_type; + event.element.srq = &srq->ibsrq; + srq->ibsrq.event_handler(&event, srq->ibsrq.srq_context); + +out: + if (atomic_dec_and_test(&srq->refcount)) + wake_up(&srq->wait); +} + +/* + * This function must be called with IRQs disabled. + */ +void mthca_free_srq_wqe(struct mthca_srq *srq, u32 wqe_addr) +{ + int ind; + + ind = wqe_addr >> srq->wqe_shift; + + spin_lock(&srq->lock); + + if (likely(srq->first_free >= 0)) + *wqe_to_link(get_wqe(srq, srq->last_free)) = ind; + else + srq->first_free = ind; + + *wqe_to_link(get_wqe(srq, ind)) = -1; + srq->last_free = ind; + + spin_unlock(&srq->lock); +} + +int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr) +{ + struct mthca_dev *dev = to_mdev(ibsrq->device); + struct mthca_srq *srq = to_msrq(ibsrq); + unsigned long flags; + int err = 0; + int first_ind; + int ind; + int next_ind; + int nreq; + int i; + void *wqe; + void *prev_wqe; + + spin_lock_irqsave(&srq->lock, flags); + + first_ind = srq->first_free; + + for (nreq = 0; wr; ++nreq, wr = wr->next) { + ind = srq->first_free; + + if (ind < 0) { + mthca_err(dev, "SRQ %06x full\n", srq->srqn); + err = -ENOMEM; + *bad_wr = wr; + return nreq; + } + + wqe = get_wqe(srq, ind); + next_ind = *wqe_to_link(wqe); + prev_wqe = srq->last; + srq->last = wqe; + + ((struct mthca_next_seg *) wqe)->nda_op = 0; + ((struct mthca_next_seg *) wqe)->ee_nds = 0; + /* flags field will always remain 0 */ + + wqe += sizeof (struct mthca_next_seg); + + if (unlikely(wr->num_sge > srq->max_gs)) { + err = -EINVAL; + *bad_wr = wr; + srq->last = prev_wqe; + return nreq; + } + + for (i = 0; i < wr->num_sge; ++i) { + ((struct mthca_data_seg *) wqe)->byte_count = + cpu_to_be32(wr->sg_list[i].length); + ((struct mthca_data_seg *) wqe)->lkey = + cpu_to_be32(wr->sg_list[i].lkey); + ((struct mthca_data_seg *) wqe)->addr = + cpu_to_be64(wr->sg_list[i].addr); + wqe += sizeof (struct mthca_data_seg); + } + + if (i < srq->max_gs) { + ((struct mthca_data_seg *) wqe)->byte_count = 0; + ((struct mthca_data_seg *) wqe)->lkey = cpu_to_be32(MTHCA_INVAL_LKEY); + ((struct mthca_data_seg *) wqe)->addr = 0; + } + + if (likely(prev_wqe)) { + ((struct mthca_next_seg *) prev_wqe)->nda_op = + cpu_to_be32((ind << srq->wqe_shift) | 1); + wmb(); + ((struct mthca_next_seg *) prev_wqe)->ee_nds = + cpu_to_be32(MTHCA_NEXT_DBD); + } + + srq->wrid[ind] = wr->wr_id; + srq->first_free = next_ind; + } + + return nreq; + + if (likely(nreq)) { + __be32 doorbell[2]; + + doorbell[0] = cpu_to_be32(first_ind << srq->wqe_shift); + doorbell[1] = cpu_to_be32((srq->srqn << 8) | nreq); + + /* + * Make sure that descriptors are written before + * doorbell is rung. + */ + wmb(); + + mthca_write64(doorbell, + dev->kar + MTHCA_RECEIVE_DOORBELL, + MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); + } + + spin_unlock_irqrestore(&srq->lock, flags); + return err; +} + +int mthca_arbel_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr) +{ + struct mthca_dev *dev = to_mdev(ibsrq->device); + struct mthca_srq *srq = to_msrq(ibsrq); + unsigned long flags; + int err = 0; + int ind; + int next_ind; + int nreq; + int i; + void *wqe; + + spin_lock_irqsave(&srq->lock, flags); + + for (nreq = 0; wr; ++nreq, wr = wr->next) { + ind = srq->first_free; + + if (ind < 0) { + mthca_err(dev, "SRQ %06x full\n", srq->srqn); + err = -ENOMEM; + *bad_wr = wr; + return nreq; + } + + wqe = get_wqe(srq, ind); + next_ind = *wqe_to_link(wqe); + + ((struct mthca_next_seg *) wqe)->nda_op = + cpu_to_be32((next_ind << srq->wqe_shift) | 1); + ((struct mthca_next_seg *) wqe)->ee_nds = 0; + /* flags field will always remain 0 */ + + wqe += sizeof (struct mthca_next_seg); + + if (unlikely(wr->num_sge > srq->max_gs)) { + err = -EINVAL; + *bad_wr = wr; + return nreq; + } + + for (i = 0; i < wr->num_sge; ++i) { + ((struct mthca_data_seg *) wqe)->byte_count = + cpu_to_be32(wr->sg_list[i].length); + ((struct mthca_data_seg *) wqe)->lkey = + cpu_to_be32(wr->sg_list[i].lkey); + ((struct mthca_data_seg *) wqe)->addr = + cpu_to_be64(wr->sg_list[i].addr); + wqe += sizeof (struct mthca_data_seg); + } + + if (i < srq->max_gs) { + ((struct mthca_data_seg *) wqe)->byte_count = 0; + ((struct mthca_data_seg *) wqe)->lkey = cpu_to_be32(MTHCA_INVAL_LKEY); + ((struct mthca_data_seg *) wqe)->addr = 0; + } + + srq->wrid[ind] = wr->wr_id; + srq->first_free = next_ind; + } + + if (likely(nreq)) { + srq->counter += nreq; + + /* + * Make sure that descriptors are written before + * we write doorbell record. + */ + wmb(); + *srq->db = cpu_to_be32(srq->counter); + } + + spin_unlock_irqrestore(&srq->lock, flags); + return err; +} + +int __devinit mthca_init_srq_table(struct mthca_dev *dev) +{ + int err; + + if (!(dev->mthca_flags & MTHCA_FLAG_SRQ)) + return 0; + + spin_lock_init(&dev->srq_table.lock); + + err = mthca_alloc_init(&dev->srq_table.alloc, + dev->limits.num_srqs, + dev->limits.num_srqs - 1, + dev->limits.reserved_srqs); + if (err) + return err; + + err = mthca_array_init(&dev->srq_table.srq, + dev->limits.num_srqs); + if (err) + mthca_alloc_cleanup(&dev->srq_table.alloc); + + return err; +} + +void __devexit mthca_cleanup_srq_table(struct mthca_dev *dev) +{ + if (!(dev->mthca_flags & MTHCA_FLAG_SRQ)) + return; + + mthca_array_cleanup(&dev->srq_table.srq, dev->limits.num_srqs); + mthca_alloc_cleanup(&dev->srq_table.alloc); +} diff --git a/drivers/infiniband/hw/mthca/mthca_user.h b/drivers/infiniband/hw/mthca/mthca_user.h new file mode 100644 index 000000000000..41613ec8a04e --- /dev/null +++ b/drivers/infiniband/hw/mthca/mthca_user.h @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef MTHCA_USER_H +#define MTHCA_USER_H + +#include <linux/types.h> + +/* + * Make sure that all structs defined in this file remain laid out so + * that they pack the same way on 32-bit and 64-bit architectures (to + * avoid incompatibility between 32-bit userspace and 64-bit kernels). + * In particular do not use pointer types -- pass pointers in __u64 + * instead. + */ + +struct mthca_alloc_ucontext_resp { + __u32 qp_tab_size; + __u32 uarc_size; +}; + +struct mthca_alloc_pd_resp { + __u32 pdn; + __u32 reserved; +}; + +struct mthca_create_cq { + __u32 lkey; + __u32 pdn; + __u64 arm_db_page; + __u64 set_db_page; + __u32 arm_db_index; + __u32 set_db_index; +}; + +struct mthca_create_cq_resp { + __u32 cqn; + __u32 reserved; +}; + +struct mthca_create_srq { + __u32 lkey; + __u32 db_index; + __u64 db_page; +}; + +struct mthca_create_srq_resp { + __u32 srqn; + __u32 reserved; +}; + +struct mthca_create_qp { + __u32 lkey; + __u32 reserved; + __u64 sq_db_page; + __u64 rq_db_page; + __u32 sq_db_index; + __u32 rq_db_index; +}; + +#endif /* MTHCA_USER_H */ diff --git a/drivers/infiniband/hw/mthca/mthca_wqe.h b/drivers/infiniband/hw/mthca/mthca_wqe.h new file mode 100644 index 000000000000..1f4c0ff28f79 --- /dev/null +++ b/drivers/infiniband/hw/mthca/mthca_wqe.h @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: mthca_wqe.h 3047 2005-08-10 03:59:35Z roland $ + */ + +#ifndef MTHCA_WQE_H +#define MTHCA_WQE_H + +#include <linux/types.h> + +enum { + MTHCA_NEXT_DBD = 1 << 7, + MTHCA_NEXT_FENCE = 1 << 6, + MTHCA_NEXT_CQ_UPDATE = 1 << 3, + MTHCA_NEXT_EVENT_GEN = 1 << 2, + MTHCA_NEXT_SOLICIT = 1 << 1, + + MTHCA_MLX_VL15 = 1 << 17, + MTHCA_MLX_SLR = 1 << 16 +}; + +enum { + MTHCA_INVAL_LKEY = 0x100 +}; + +struct mthca_next_seg { + __be32 nda_op; /* [31:6] next WQE [4:0] next opcode */ + __be32 ee_nds; /* [31:8] next EE [7] DBD [6] F [5:0] next WQE size */ + __be32 flags; /* [3] CQ [2] Event [1] Solicit */ + __be32 imm; /* immediate data */ +}; + +struct mthca_tavor_ud_seg { + u32 reserved1; + __be32 lkey; + __be64 av_addr; + u32 reserved2[4]; + __be32 dqpn; + __be32 qkey; + u32 reserved3[2]; +}; + +struct mthca_arbel_ud_seg { + __be32 av[8]; + __be32 dqpn; + __be32 qkey; + u32 reserved[2]; +}; + +struct mthca_bind_seg { + __be32 flags; /* [31] Atomic [30] rem write [29] rem read */ + u32 reserved; + __be32 new_rkey; + __be32 lkey; + __be64 addr; + __be64 length; +}; + +struct mthca_raddr_seg { + __be64 raddr; + __be32 rkey; + u32 reserved; +}; + +struct mthca_atomic_seg { + __be64 swap_add; + __be64 compare; +}; + +struct mthca_data_seg { + __be32 byte_count; + __be32 lkey; + __be64 addr; +}; + +struct mthca_mlx_seg { + __be32 nda_op; + __be32 nds; + __be32 flags; /* [17] VL15 [16] SLR [14:12] static rate + [11:8] SL [3] C [2] E */ + __be16 rlid; + __be16 vcrc; +}; + +#endif /* MTHCA_WQE_H */ diff --git a/drivers/infiniband/include/ib_cache.h b/drivers/infiniband/include/ib_cache.h deleted file mode 100644 index 44ef6bb9b9df..000000000000 --- a/drivers/infiniband/include/ib_cache.h +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Copyright (c) 2004 Topspin Communications. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * $Id: ib_cache.h 1349 2004-12-16 21:09:43Z roland $ - */ - -#ifndef _IB_CACHE_H -#define _IB_CACHE_H - -#include <ib_verbs.h> - -/** - * ib_get_cached_gid - Returns a cached GID table entry - * @device: The device to query. - * @port_num: The port number of the device to query. - * @index: The index into the cached GID table to query. - * @gid: The GID value found at the specified index. - * - * ib_get_cached_gid() fetches the specified GID table entry stored in - * the local software cache. - */ -int ib_get_cached_gid(struct ib_device *device, - u8 port_num, - int index, - union ib_gid *gid); - -/** - * ib_find_cached_gid - Returns the port number and GID table index where - * a specified GID value occurs. - * @device: The device to query. - * @gid: The GID value to search for. - * @port_num: The port number of the device where the GID value was found. - * @index: The index into the cached GID table where the GID was found. This - * parameter may be NULL. - * - * ib_find_cached_gid() searches for the specified GID value in - * the local software cache. - */ -int ib_find_cached_gid(struct ib_device *device, - union ib_gid *gid, - u8 *port_num, - u16 *index); - -/** - * ib_get_cached_pkey - Returns a cached PKey table entry - * @device: The device to query. - * @port_num: The port number of the device to query. - * @index: The index into the cached PKey table to query. - * @pkey: The PKey value found at the specified index. - * - * ib_get_cached_pkey() fetches the specified PKey table entry stored in - * the local software cache. - */ -int ib_get_cached_pkey(struct ib_device *device_handle, - u8 port_num, - int index, - u16 *pkey); - -/** - * ib_find_cached_pkey - Returns the PKey table index where a specified - * PKey value occurs. - * @device: The device to query. - * @port_num: The port number of the device to search for the PKey. - * @pkey: The PKey value to search for. - * @index: The index into the cached PKey table where the PKey was found. - * - * ib_find_cached_pkey() searches the specified PKey table in - * the local software cache. - */ -int ib_find_cached_pkey(struct ib_device *device, - u8 port_num, - u16 pkey, - u16 *index); - -#endif /* _IB_CACHE_H */ diff --git a/drivers/infiniband/include/ib_fmr_pool.h b/drivers/infiniband/include/ib_fmr_pool.h deleted file mode 100644 index e8769657cbbb..000000000000 --- a/drivers/infiniband/include/ib_fmr_pool.h +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Copyright (c) 2004 Topspin Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * $Id: ib_fmr_pool.h 1349 2004-12-16 21:09:43Z roland $ - */ - -#if !defined(IB_FMR_POOL_H) -#define IB_FMR_POOL_H - -#include <ib_verbs.h> - -struct ib_fmr_pool; - -/** - * struct ib_fmr_pool_param - Parameters for creating FMR pool - * @max_pages_per_fmr:Maximum number of pages per map request. - * @access:Access flags for FMRs in pool. - * @pool_size:Number of FMRs to allocate for pool. - * @dirty_watermark:Flush is triggered when @dirty_watermark dirty - * FMRs are present. - * @flush_function:Callback called when unmapped FMRs are flushed and - * more FMRs are possibly available for mapping - * @flush_arg:Context passed to user's flush function. - * @cache:If set, FMRs may be reused after unmapping for identical map - * requests. - */ -struct ib_fmr_pool_param { - int max_pages_per_fmr; - enum ib_access_flags access; - int pool_size; - int dirty_watermark; - void (*flush_function)(struct ib_fmr_pool *pool, - void * arg); - void *flush_arg; - unsigned cache:1; -}; - -struct ib_pool_fmr { - struct ib_fmr *fmr; - struct ib_fmr_pool *pool; - struct list_head list; - struct hlist_node cache_node; - int ref_count; - int remap_count; - u64 io_virtual_address; - int page_list_len; - u64 page_list[0]; -}; - -struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd, - struct ib_fmr_pool_param *params); - -int ib_destroy_fmr_pool(struct ib_fmr_pool *pool); - -int ib_flush_fmr_pool(struct ib_fmr_pool *pool); - -struct ib_pool_fmr *ib_fmr_pool_map_phys(struct ib_fmr_pool *pool_handle, - u64 *page_list, - int list_len, - u64 *io_virtual_address); - -int ib_fmr_pool_unmap(struct ib_pool_fmr *fmr); - -#endif /* IB_FMR_POOL_H */ diff --git a/drivers/infiniband/include/ib_mad.h b/drivers/infiniband/include/ib_mad.h deleted file mode 100644 index 4a6bf6763a97..000000000000 --- a/drivers/infiniband/include/ib_mad.h +++ /dev/null @@ -1,404 +0,0 @@ -/* - * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved. - * Copyright (c) 2004 Infinicon Corporation. All rights reserved. - * Copyright (c) 2004 Intel Corporation. All rights reserved. - * Copyright (c) 2004 Topspin Corporation. All rights reserved. - * Copyright (c) 2004 Voltaire Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * $Id: ib_mad.h 1389 2004-12-27 22:56:47Z roland $ - */ - -#if !defined( IB_MAD_H ) -#define IB_MAD_H - -#include <ib_verbs.h> - -/* Management base version */ -#define IB_MGMT_BASE_VERSION 1 - -/* Management classes */ -#define IB_MGMT_CLASS_SUBN_LID_ROUTED 0x01 -#define IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE 0x81 -#define IB_MGMT_CLASS_SUBN_ADM 0x03 -#define IB_MGMT_CLASS_PERF_MGMT 0x04 -#define IB_MGMT_CLASS_BM 0x05 -#define IB_MGMT_CLASS_DEVICE_MGMT 0x06 -#define IB_MGMT_CLASS_CM 0x07 -#define IB_MGMT_CLASS_SNMP 0x08 -#define IB_MGMT_CLASS_VENDOR_RANGE2_START 0x30 -#define IB_MGMT_CLASS_VENDOR_RANGE2_END 0x4F - -/* Management methods */ -#define IB_MGMT_METHOD_GET 0x01 -#define IB_MGMT_METHOD_SET 0x02 -#define IB_MGMT_METHOD_GET_RESP 0x81 -#define IB_MGMT_METHOD_SEND 0x03 -#define IB_MGMT_METHOD_TRAP 0x05 -#define IB_MGMT_METHOD_REPORT 0x06 -#define IB_MGMT_METHOD_REPORT_RESP 0x86 -#define IB_MGMT_METHOD_TRAP_REPRESS 0x07 - -#define IB_MGMT_METHOD_RESP 0x80 - -#define IB_MGMT_MAX_METHODS 128 - -#define IB_QP0 0 -#define IB_QP1 __constant_htonl(1) -#define IB_QP1_QKEY 0x80010000 - -struct ib_grh { - u32 version_tclass_flow; - u16 paylen; - u8 next_hdr; - u8 hop_limit; - union ib_gid sgid; - union ib_gid dgid; -} __attribute__ ((packed)); - -struct ib_mad_hdr { - u8 base_version; - u8 mgmt_class; - u8 class_version; - u8 method; - u16 status; - u16 class_specific; - u64 tid; - u16 attr_id; - u16 resv; - u32 attr_mod; -} __attribute__ ((packed)); - -struct ib_rmpp_hdr { - u8 rmpp_version; - u8 rmpp_type; - u8 rmpp_rtime_flags; - u8 rmpp_status; - u32 seg_num; - u32 paylen_newwin; -} __attribute__ ((packed)); - -struct ib_mad { - struct ib_mad_hdr mad_hdr; - u8 data[232]; -} __attribute__ ((packed)); - -struct ib_rmpp_mad { - struct ib_mad_hdr mad_hdr; - struct ib_rmpp_hdr rmpp_hdr; - u8 data[220]; -} __attribute__ ((packed)); - -struct ib_vendor_mad { - struct ib_mad_hdr mad_hdr; - struct ib_rmpp_hdr rmpp_hdr; - u8 reserved; - u8 oui[3]; - u8 data[216]; -} __attribute__ ((packed)); - -struct ib_mad_agent; -struct ib_mad_send_wc; -struct ib_mad_recv_wc; - -/** - * ib_mad_send_handler - callback handler for a sent MAD. - * @mad_agent: MAD agent that sent the MAD. - * @mad_send_wc: Send work completion information on the sent MAD. - */ -typedef void (*ib_mad_send_handler)(struct ib_mad_agent *mad_agent, - struct ib_mad_send_wc *mad_send_wc); - -/** - * ib_mad_snoop_handler - Callback handler for snooping sent MADs. - * @mad_agent: MAD agent that snooped the MAD. - * @send_wr: Work request information on the sent MAD. - * @mad_send_wc: Work completion information on the sent MAD. Valid - * only for snooping that occurs on a send completion. - * - * Clients snooping MADs should not modify data referenced by the @send_wr - * or @mad_send_wc. - */ -typedef void (*ib_mad_snoop_handler)(struct ib_mad_agent *mad_agent, - struct ib_send_wr *send_wr, - struct ib_mad_send_wc *mad_send_wc); - -/** - * ib_mad_recv_handler - callback handler for a received MAD. - * @mad_agent: MAD agent requesting the received MAD. - * @mad_recv_wc: Received work completion information on the received MAD. - * - * MADs received in response to a send request operation will be handed to - * the user after the send operation completes. All data buffers given - * to registered agents through this routine are owned by the receiving - * client, except for snooping agents. Clients snooping MADs should not - * modify the data referenced by @mad_recv_wc. - */ -typedef void (*ib_mad_recv_handler)(struct ib_mad_agent *mad_agent, - struct ib_mad_recv_wc *mad_recv_wc); - -/** - * ib_mad_agent - Used to track MAD registration with the access layer. - * @device: Reference to device registration is on. - * @qp: Reference to QP used for sending and receiving MADs. - * @recv_handler: Callback handler for a received MAD. - * @send_handler: Callback handler for a sent MAD. - * @snoop_handler: Callback handler for snooped sent MADs. - * @context: User-specified context associated with this registration. - * @hi_tid: Access layer assigned transaction ID for this client. - * Unsolicited MADs sent by this client will have the upper 32-bits - * of their TID set to this value. - * @port_num: Port number on which QP is registered - */ -struct ib_mad_agent { - struct ib_device *device; - struct ib_qp *qp; - ib_mad_recv_handler recv_handler; - ib_mad_send_handler send_handler; - ib_mad_snoop_handler snoop_handler; - void *context; - u32 hi_tid; - u8 port_num; -}; - -/** - * ib_mad_send_wc - MAD send completion information. - * @wr_id: Work request identifier associated with the send MAD request. - * @status: Completion status. - * @vendor_err: Optional vendor error information returned with a failed - * request. - */ -struct ib_mad_send_wc { - u64 wr_id; - enum ib_wc_status status; - u32 vendor_err; -}; - -/** - * ib_mad_recv_buf - received MAD buffer information. - * @list: Reference to next data buffer for a received RMPP MAD. - * @grh: References a data buffer containing the global route header. - * The data refereced by this buffer is only valid if the GRH is - * valid. - * @mad: References the start of the received MAD. - */ -struct ib_mad_recv_buf { - struct list_head list; - struct ib_grh *grh; - struct ib_mad *mad; -}; - -/** - * ib_mad_recv_wc - received MAD information. - * @wc: Completion information for the received data. - * @recv_buf: Specifies the location of the received data buffer(s). - * @mad_len: The length of the received MAD, without duplicated headers. - * - * For received response, the wr_id field of the wc is set to the wr_id - * for the corresponding send request. - */ -struct ib_mad_recv_wc { - struct ib_wc *wc; - struct ib_mad_recv_buf recv_buf; - int mad_len; -}; - -/** - * ib_mad_reg_req - MAD registration request - * @mgmt_class: Indicates which management class of MADs should be receive - * by the caller. This field is only required if the user wishes to - * receive unsolicited MADs, otherwise it should be 0. - * @mgmt_class_version: Indicates which version of MADs for the given - * management class to receive. - * @oui: Indicates IEEE OUI when mgmt_class is a vendor class - * in the range from 0x30 to 0x4f. Otherwise not used. - * @method_mask: The caller will receive unsolicited MADs for any method - * where @method_mask = 1. - */ -struct ib_mad_reg_req { - u8 mgmt_class; - u8 mgmt_class_version; - u8 oui[3]; - DECLARE_BITMAP(method_mask, IB_MGMT_MAX_METHODS); -}; - -/** - * ib_register_mad_agent - Register to send/receive MADs. - * @device: The device to register with. - * @port_num: The port on the specified device to use. - * @qp_type: Specifies which QP to access. Must be either - * IB_QPT_SMI or IB_QPT_GSI. - * @mad_reg_req: Specifies which unsolicited MADs should be received - * by the caller. This parameter may be NULL if the caller only - * wishes to receive solicited responses. - * @rmpp_version: If set, indicates that the client will send - * and receive MADs that contain the RMPP header for the given version. - * If set to 0, indicates that RMPP is not used by this client. - * @send_handler: The completion callback routine invoked after a send - * request has completed. - * @recv_handler: The completion callback routine invoked for a received - * MAD. - * @context: User specified context associated with the registration. - */ -struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, - u8 port_num, - enum ib_qp_type qp_type, - struct ib_mad_reg_req *mad_reg_req, - u8 rmpp_version, - ib_mad_send_handler send_handler, - ib_mad_recv_handler recv_handler, - void *context); - -enum ib_mad_snoop_flags { - /*IB_MAD_SNOOP_POSTED_SENDS = 1,*/ - /*IB_MAD_SNOOP_RMPP_SENDS = (1<<1),*/ - IB_MAD_SNOOP_SEND_COMPLETIONS = (1<<2), - /*IB_MAD_SNOOP_RMPP_SEND_COMPLETIONS = (1<<3),*/ - IB_MAD_SNOOP_RECVS = (1<<4) - /*IB_MAD_SNOOP_RMPP_RECVS = (1<<5),*/ - /*IB_MAD_SNOOP_REDIRECTED_QPS = (1<<6)*/ -}; - -/** - * ib_register_mad_snoop - Register to snoop sent and received MADs. - * @device: The device to register with. - * @port_num: The port on the specified device to use. - * @qp_type: Specifies which QP traffic to snoop. Must be either - * IB_QPT_SMI or IB_QPT_GSI. - * @mad_snoop_flags: Specifies information where snooping occurs. - * @send_handler: The callback routine invoked for a snooped send. - * @recv_handler: The callback routine invoked for a snooped receive. - * @context: User specified context associated with the registration. - */ -struct ib_mad_agent *ib_register_mad_snoop(struct ib_device *device, - u8 port_num, - enum ib_qp_type qp_type, - int mad_snoop_flags, - ib_mad_snoop_handler snoop_handler, - ib_mad_recv_handler recv_handler, - void *context); - -/** - * ib_unregister_mad_agent - Unregisters a client from using MAD services. - * @mad_agent: Corresponding MAD registration request to deregister. - * - * After invoking this routine, MAD services are no longer usable by the - * client on the associated QP. - */ -int ib_unregister_mad_agent(struct ib_mad_agent *mad_agent); - -/** - * ib_post_send_mad - Posts MAD(s) to the send queue of the QP associated - * with the registered client. - * @mad_agent: Specifies the associated registration to post the send to. - * @send_wr: Specifies the information needed to send the MAD(s). - * @bad_send_wr: Specifies the MAD on which an error was encountered. - * - * Sent MADs are not guaranteed to complete in the order that they were posted. - */ -int ib_post_send_mad(struct ib_mad_agent *mad_agent, - struct ib_send_wr *send_wr, - struct ib_send_wr **bad_send_wr); - -/** - * ib_coalesce_recv_mad - Coalesces received MAD data into a single buffer. - * @mad_recv_wc: Work completion information for a received MAD. - * @buf: User-provided data buffer to receive the coalesced buffers. The - * referenced buffer should be at least the size of the mad_len specified - * by @mad_recv_wc. - * - * This call copies a chain of received RMPP MADs into a single data buffer, - * removing duplicated headers. - */ -void ib_coalesce_recv_mad(struct ib_mad_recv_wc *mad_recv_wc, - void *buf); - -/** - * ib_free_recv_mad - Returns data buffers used to receive a MAD to the - * access layer. - * @mad_recv_wc: Work completion information for a received MAD. - * - * Clients receiving MADs through their ib_mad_recv_handler must call this - * routine to return the work completion buffers to the access layer. - */ -void ib_free_recv_mad(struct ib_mad_recv_wc *mad_recv_wc); - -/** - * ib_cancel_mad - Cancels an outstanding send MAD operation. - * @mad_agent: Specifies the registration associated with sent MAD. - * @wr_id: Indicates the work request identifier of the MAD to cancel. - * - * MADs will be returned to the user through the corresponding - * ib_mad_send_handler. - */ -void ib_cancel_mad(struct ib_mad_agent *mad_agent, - u64 wr_id); - -/** - * ib_redirect_mad_qp - Registers a QP for MAD services. - * @qp: Reference to a QP that requires MAD services. - * @rmpp_version: If set, indicates that the client will send - * and receive MADs that contain the RMPP header for the given version. - * If set to 0, indicates that RMPP is not used by this client. - * @send_handler: The completion callback routine invoked after a send - * request has completed. - * @recv_handler: The completion callback routine invoked for a received - * MAD. - * @context: User specified context associated with the registration. - * - * Use of this call allows clients to use MAD services, such as RMPP, - * on user-owned QPs. After calling this routine, users may send - * MADs on the specified QP by calling ib_mad_post_send. - */ -struct ib_mad_agent *ib_redirect_mad_qp(struct ib_qp *qp, - u8 rmpp_version, - ib_mad_send_handler send_handler, - ib_mad_recv_handler recv_handler, - void *context); - -/** - * ib_process_mad_wc - Processes a work completion associated with a - * MAD sent or received on a redirected QP. - * @mad_agent: Specifies the registered MAD service using the redirected QP. - * @wc: References a work completion associated with a sent or received - * MAD segment. - * - * This routine is used to complete or continue processing on a MAD request. - * If the work completion is associated with a send operation, calling - * this routine is required to continue an RMPP transfer or to wait for a - * corresponding response, if it is a request. If the work completion is - * associated with a receive operation, calling this routine is required to - * process an inbound or outbound RMPP transfer, or to match a response MAD - * with its corresponding request. - */ -int ib_process_mad_wc(struct ib_mad_agent *mad_agent, - struct ib_wc *wc); - -#endif /* IB_MAD_H */ diff --git a/drivers/infiniband/include/ib_pack.h b/drivers/infiniband/include/ib_pack.h deleted file mode 100644 index fe480f3e8654..000000000000 --- a/drivers/infiniband/include/ib_pack.h +++ /dev/null @@ -1,245 +0,0 @@ -/* - * Copyright (c) 2004 Topspin Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * $Id: ib_pack.h 1349 2004-12-16 21:09:43Z roland $ - */ - -#ifndef IB_PACK_H -#define IB_PACK_H - -#include <ib_verbs.h> - -enum { - IB_LRH_BYTES = 8, - IB_GRH_BYTES = 40, - IB_BTH_BYTES = 12, - IB_DETH_BYTES = 8 -}; - -struct ib_field { - size_t struct_offset_bytes; - size_t struct_size_bytes; - int offset_words; - int offset_bits; - int size_bits; - char *field_name; -}; - -#define RESERVED \ - .field_name = "reserved" - -/* - * This macro cleans up the definitions of constants for BTH opcodes. - * It is used to define constants such as IB_OPCODE_UD_SEND_ONLY, - * which becomes IB_OPCODE_UD + IB_OPCODE_SEND_ONLY, and this gives - * the correct value. - * - * In short, user code should use the constants defined using the - * macro rather than worrying about adding together other constants. -*/ -#define IB_OPCODE(transport, op) \ - IB_OPCODE_ ## transport ## _ ## op = \ - IB_OPCODE_ ## transport + IB_OPCODE_ ## op - -enum { - /* transport types -- just used to define real constants */ - IB_OPCODE_RC = 0x00, - IB_OPCODE_UC = 0x20, - IB_OPCODE_RD = 0x40, - IB_OPCODE_UD = 0x60, - - /* operations -- just used to define real constants */ - IB_OPCODE_SEND_FIRST = 0x00, - IB_OPCODE_SEND_MIDDLE = 0x01, - IB_OPCODE_SEND_LAST = 0x02, - IB_OPCODE_SEND_LAST_WITH_IMMEDIATE = 0x03, - IB_OPCODE_SEND_ONLY = 0x04, - IB_OPCODE_SEND_ONLY_WITH_IMMEDIATE = 0x05, - IB_OPCODE_RDMA_WRITE_FIRST = 0x06, - IB_OPCODE_RDMA_WRITE_MIDDLE = 0x07, - IB_OPCODE_RDMA_WRITE_LAST = 0x08, - IB_OPCODE_RDMA_WRITE_LAST_WITH_IMMEDIATE = 0x09, - IB_OPCODE_RDMA_WRITE_ONLY = 0x0a, - IB_OPCODE_RDMA_WRITE_ONLY_WITH_IMMEDIATE = 0x0b, - IB_OPCODE_RDMA_READ_REQUEST = 0x0c, - IB_OPCODE_RDMA_READ_RESPONSE_FIRST = 0x0d, - IB_OPCODE_RDMA_READ_RESPONSE_MIDDLE = 0x0e, - IB_OPCODE_RDMA_READ_RESPONSE_LAST = 0x0f, - IB_OPCODE_RDMA_READ_RESPONSE_ONLY = 0x10, - IB_OPCODE_ACKNOWLEDGE = 0x11, - IB_OPCODE_ATOMIC_ACKNOWLEDGE = 0x12, - IB_OPCODE_COMPARE_SWAP = 0x13, - IB_OPCODE_FETCH_ADD = 0x14, - - /* real constants follow -- see comment about above IB_OPCODE() - macro for more details */ - - /* RC */ - IB_OPCODE(RC, SEND_FIRST), - IB_OPCODE(RC, SEND_MIDDLE), - IB_OPCODE(RC, SEND_LAST), - IB_OPCODE(RC, SEND_LAST_WITH_IMMEDIATE), - IB_OPCODE(RC, SEND_ONLY), - IB_OPCODE(RC, SEND_ONLY_WITH_IMMEDIATE), - IB_OPCODE(RC, RDMA_WRITE_FIRST), - IB_OPCODE(RC, RDMA_WRITE_MIDDLE), - IB_OPCODE(RC, RDMA_WRITE_LAST), - IB_OPCODE(RC, RDMA_WRITE_LAST_WITH_IMMEDIATE), - IB_OPCODE(RC, RDMA_WRITE_ONLY), - IB_OPCODE(RC, RDMA_WRITE_ONLY_WITH_IMMEDIATE), - IB_OPCODE(RC, RDMA_READ_REQUEST), - IB_OPCODE(RC, RDMA_READ_RESPONSE_FIRST), - IB_OPCODE(RC, RDMA_READ_RESPONSE_MIDDLE), - IB_OPCODE(RC, RDMA_READ_RESPONSE_LAST), - IB_OPCODE(RC, RDMA_READ_RESPONSE_ONLY), - IB_OPCODE(RC, ACKNOWLEDGE), - IB_OPCODE(RC, ATOMIC_ACKNOWLEDGE), - IB_OPCODE(RC, COMPARE_SWAP), - IB_OPCODE(RC, FETCH_ADD), - - /* UC */ - IB_OPCODE(UC, SEND_FIRST), - IB_OPCODE(UC, SEND_MIDDLE), - IB_OPCODE(UC, SEND_LAST), - IB_OPCODE(UC, SEND_LAST_WITH_IMMEDIATE), - IB_OPCODE(UC, SEND_ONLY), - IB_OPCODE(UC, SEND_ONLY_WITH_IMMEDIATE), - IB_OPCODE(UC, RDMA_WRITE_FIRST), - IB_OPCODE(UC, RDMA_WRITE_MIDDLE), - IB_OPCODE(UC, RDMA_WRITE_LAST), - IB_OPCODE(UC, RDMA_WRITE_LAST_WITH_IMMEDIATE), - IB_OPCODE(UC, RDMA_WRITE_ONLY), - IB_OPCODE(UC, RDMA_WRITE_ONLY_WITH_IMMEDIATE), - - /* RD */ - IB_OPCODE(RD, SEND_FIRST), - IB_OPCODE(RD, SEND_MIDDLE), - IB_OPCODE(RD, SEND_LAST), - IB_OPCODE(RD, SEND_LAST_WITH_IMMEDIATE), - IB_OPCODE(RD, SEND_ONLY), - IB_OPCODE(RD, SEND_ONLY_WITH_IMMEDIATE), - IB_OPCODE(RD, RDMA_WRITE_FIRST), - IB_OPCODE(RD, RDMA_WRITE_MIDDLE), - IB_OPCODE(RD, RDMA_WRITE_LAST), - IB_OPCODE(RD, RDMA_WRITE_LAST_WITH_IMMEDIATE), - IB_OPCODE(RD, RDMA_WRITE_ONLY), - IB_OPCODE(RD, RDMA_WRITE_ONLY_WITH_IMMEDIATE), - IB_OPCODE(RD, RDMA_READ_REQUEST), - IB_OPCODE(RD, RDMA_READ_RESPONSE_FIRST), - IB_OPCODE(RD, RDMA_READ_RESPONSE_MIDDLE), - IB_OPCODE(RD, RDMA_READ_RESPONSE_LAST), - IB_OPCODE(RD, RDMA_READ_RESPONSE_ONLY), - IB_OPCODE(RD, ACKNOWLEDGE), - IB_OPCODE(RD, ATOMIC_ACKNOWLEDGE), - IB_OPCODE(RD, COMPARE_SWAP), - IB_OPCODE(RD, FETCH_ADD), - - /* UD */ - IB_OPCODE(UD, SEND_ONLY), - IB_OPCODE(UD, SEND_ONLY_WITH_IMMEDIATE) -}; - -enum { - IB_LNH_RAW = 0, - IB_LNH_IP = 1, - IB_LNH_IBA_LOCAL = 2, - IB_LNH_IBA_GLOBAL = 3 -}; - -struct ib_unpacked_lrh { - u8 virtual_lane; - u8 link_version; - u8 service_level; - u8 link_next_header; - __be16 destination_lid; - __be16 packet_length; - __be16 source_lid; -}; - -struct ib_unpacked_grh { - u8 ip_version; - u8 traffic_class; - __be32 flow_label; - __be16 payload_length; - u8 next_header; - u8 hop_limit; - union ib_gid source_gid; - union ib_gid destination_gid; -}; - -struct ib_unpacked_bth { - u8 opcode; - u8 solicited_event; - u8 mig_req; - u8 pad_count; - u8 transport_header_version; - __be16 pkey; - __be32 destination_qpn; - u8 ack_req; - __be32 psn; -}; - -struct ib_unpacked_deth { - __be32 qkey; - __be32 source_qpn; -}; - -struct ib_ud_header { - struct ib_unpacked_lrh lrh; - int grh_present; - struct ib_unpacked_grh grh; - struct ib_unpacked_bth bth; - struct ib_unpacked_deth deth; - int immediate_present; - __be32 immediate_data; -}; - -void ib_pack(const struct ib_field *desc, - int desc_len, - void *structure, - void *buf); - -void ib_unpack(const struct ib_field *desc, - int desc_len, - void *buf, - void *structure); - -void ib_ud_header_init(int payload_bytes, - int grh_present, - struct ib_ud_header *header); - -int ib_ud_header_pack(struct ib_ud_header *header, - void *buf); - -int ib_ud_header_unpack(void *buf, - struct ib_ud_header *header); - -#endif /* IB_PACK_H */ diff --git a/drivers/infiniband/include/ib_sa.h b/drivers/infiniband/include/ib_sa.h deleted file mode 100644 index 00222285eb9a..000000000000 --- a/drivers/infiniband/include/ib_sa.h +++ /dev/null @@ -1,308 +0,0 @@ -/* - * Copyright (c) 2004 Topspin Communications. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * $Id: ib_sa.h 1389 2004-12-27 22:56:47Z roland $ - */ - -#ifndef IB_SA_H -#define IB_SA_H - -#include <linux/compiler.h> - -#include <ib_verbs.h> -#include <ib_mad.h> - -enum { - IB_SA_CLASS_VERSION = 2, /* IB spec version 1.1/1.2 */ - - IB_SA_METHOD_DELETE = 0x15 -}; - -enum ib_sa_selector { - IB_SA_GTE = 0, - IB_SA_LTE = 1, - IB_SA_EQ = 2, - /* - * The meaning of "best" depends on the attribute: for - * example, for MTU best will return the largest available - * MTU, while for packet life time, best will return the - * smallest available life time. - */ - IB_SA_BEST = 3 -}; - -enum ib_sa_rate { - IB_SA_RATE_2_5_GBPS = 2, - IB_SA_RATE_5_GBPS = 5, - IB_SA_RATE_10_GBPS = 3, - IB_SA_RATE_20_GBPS = 6, - IB_SA_RATE_30_GBPS = 4, - IB_SA_RATE_40_GBPS = 7, - IB_SA_RATE_60_GBPS = 8, - IB_SA_RATE_80_GBPS = 9, - IB_SA_RATE_120_GBPS = 10 -}; - -static inline int ib_sa_rate_enum_to_int(enum ib_sa_rate rate) -{ - switch (rate) { - case IB_SA_RATE_2_5_GBPS: return 1; - case IB_SA_RATE_5_GBPS: return 2; - case IB_SA_RATE_10_GBPS: return 4; - case IB_SA_RATE_20_GBPS: return 8; - case IB_SA_RATE_30_GBPS: return 12; - case IB_SA_RATE_40_GBPS: return 16; - case IB_SA_RATE_60_GBPS: return 24; - case IB_SA_RATE_80_GBPS: return 32; - case IB_SA_RATE_120_GBPS: return 48; - default: return -1; - } -} - -typedef u64 __bitwise ib_sa_comp_mask; - -#define IB_SA_COMP_MASK(n) ((__force ib_sa_comp_mask) cpu_to_be64(1ull << n)) - -/* - * Structures for SA records are named "struct ib_sa_xxx_rec." No - * attempt is made to pack structures to match the physical layout of - * SA records in SA MADs; all packing and unpacking is handled by the - * SA query code. - * - * For a record with structure ib_sa_xxx_rec, the naming convention - * for the component mask value for field yyy is IB_SA_XXX_REC_YYY (we - * never use different abbreviations or otherwise change the spelling - * of xxx/yyy between ib_sa_xxx_rec.yyy and IB_SA_XXX_REC_YYY). - * - * Reserved rows are indicated with comments to help maintainability. - */ - -/* reserved: 0 */ -/* reserved: 1 */ -#define IB_SA_PATH_REC_DGID IB_SA_COMP_MASK( 2) -#define IB_SA_PATH_REC_SGID IB_SA_COMP_MASK( 3) -#define IB_SA_PATH_REC_DLID IB_SA_COMP_MASK( 4) -#define IB_SA_PATH_REC_SLID IB_SA_COMP_MASK( 5) -#define IB_SA_PATH_REC_RAW_TRAFFIC IB_SA_COMP_MASK( 6) -/* reserved: 7 */ -#define IB_SA_PATH_REC_FLOW_LABEL IB_SA_COMP_MASK( 8) -#define IB_SA_PATH_REC_HOP_LIMIT IB_SA_COMP_MASK( 9) -#define IB_SA_PATH_REC_TRAFFIC_CLASS IB_SA_COMP_MASK(10) -#define IB_SA_PATH_REC_REVERSIBLE IB_SA_COMP_MASK(11) -#define IB_SA_PATH_REC_NUMB_PATH IB_SA_COMP_MASK(12) -#define IB_SA_PATH_REC_PKEY IB_SA_COMP_MASK(13) -/* reserved: 14 */ -#define IB_SA_PATH_REC_SL IB_SA_COMP_MASK(15) -#define IB_SA_PATH_REC_MTU_SELECTOR IB_SA_COMP_MASK(16) -#define IB_SA_PATH_REC_MTU IB_SA_COMP_MASK(17) -#define IB_SA_PATH_REC_RATE_SELECTOR IB_SA_COMP_MASK(18) -#define IB_SA_PATH_REC_RATE IB_SA_COMP_MASK(19) -#define IB_SA_PATH_REC_PACKET_LIFE_TIME_SELECTOR IB_SA_COMP_MASK(20) -#define IB_SA_PATH_REC_PACKET_LIFE_TIME IB_SA_COMP_MASK(21) -#define IB_SA_PATH_REC_PREFERENCE IB_SA_COMP_MASK(22) - -struct ib_sa_path_rec { - /* reserved */ - /* reserved */ - union ib_gid dgid; - union ib_gid sgid; - u16 dlid; - u16 slid; - int raw_traffic; - /* reserved */ - u32 flow_label; - u8 hop_limit; - u8 traffic_class; - int reversible; - u8 numb_path; - u16 pkey; - /* reserved */ - u8 sl; - u8 mtu_selector; - u8 mtu; - u8 rate_selector; - u8 rate; - u8 packet_life_time_selector; - u8 packet_life_time; - u8 preference; -}; - -#define IB_SA_MCMEMBER_REC_MGID IB_SA_COMP_MASK( 0) -#define IB_SA_MCMEMBER_REC_PORT_GID IB_SA_COMP_MASK( 1) -#define IB_SA_MCMEMBER_REC_QKEY IB_SA_COMP_MASK( 2) -#define IB_SA_MCMEMBER_REC_MLID IB_SA_COMP_MASK( 3) -#define IB_SA_MCMEMBER_REC_MTU_SELECTOR IB_SA_COMP_MASK( 4) -#define IB_SA_MCMEMBER_REC_MTU IB_SA_COMP_MASK( 5) -#define IB_SA_MCMEMBER_REC_TRAFFIC_CLASS IB_SA_COMP_MASK( 6) -#define IB_SA_MCMEMBER_REC_PKEY IB_SA_COMP_MASK( 7) -#define IB_SA_MCMEMBER_REC_RATE_SELECTOR IB_SA_COMP_MASK( 8) -#define IB_SA_MCMEMBER_REC_RATE IB_SA_COMP_MASK( 9) -#define IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME_SELECTOR IB_SA_COMP_MASK(10) -#define IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME IB_SA_COMP_MASK(11) -#define IB_SA_MCMEMBER_REC_SL IB_SA_COMP_MASK(12) -#define IB_SA_MCMEMBER_REC_FLOW_LABEL IB_SA_COMP_MASK(13) -#define IB_SA_MCMEMBER_REC_HOP_LIMIT IB_SA_COMP_MASK(14) -#define IB_SA_MCMEMBER_REC_SCOPE IB_SA_COMP_MASK(15) -#define IB_SA_MCMEMBER_REC_JOIN_STATE IB_SA_COMP_MASK(16) -#define IB_SA_MCMEMBER_REC_PROXY_JOIN IB_SA_COMP_MASK(17) - -struct ib_sa_mcmember_rec { - union ib_gid mgid; - union ib_gid port_gid; - u32 qkey; - u16 mlid; - u8 mtu_selector; - u8 mtu; - u8 traffic_class; - u16 pkey; - u8 rate_selector; - u8 rate; - u8 packet_life_time_selector; - u8 packet_life_time; - u8 sl; - u32 flow_label; - u8 hop_limit; - u8 scope; - u8 join_state; - int proxy_join; -}; - -struct ib_sa_query; - -void ib_sa_cancel_query(int id, struct ib_sa_query *query); - -int ib_sa_path_rec_get(struct ib_device *device, u8 port_num, - struct ib_sa_path_rec *rec, - ib_sa_comp_mask comp_mask, - int timeout_ms, int gfp_mask, - void (*callback)(int status, - struct ib_sa_path_rec *resp, - void *context), - void *context, - struct ib_sa_query **query); - -int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num, - u8 method, - struct ib_sa_mcmember_rec *rec, - ib_sa_comp_mask comp_mask, - int timeout_ms, int gfp_mask, - void (*callback)(int status, - struct ib_sa_mcmember_rec *resp, - void *context), - void *context, - struct ib_sa_query **query); - -/** - * ib_sa_mcmember_rec_set - Start an MCMember set query - * @device:device to send query on - * @port_num: port number to send query on - * @rec:MCMember Record to send in query - * @comp_mask:component mask to send in query - * @timeout_ms:time to wait for response - * @gfp_mask:GFP mask to use for internal allocations - * @callback:function called when query completes, times out or is - * canceled - * @context:opaque user context passed to callback - * @sa_query:query context, used to cancel query - * - * Send an MCMember Set query to the SA (eg to join a multicast - * group). The callback function will be called when the query - * completes (or fails); status is 0 for a successful response, -EINTR - * if the query is canceled, -ETIMEDOUT is the query timed out, or - * -EIO if an error occurred sending the query. The resp parameter of - * the callback is only valid if status is 0. - * - * If the return value of ib_sa_mcmember_rec_set() is negative, it is - * an error code. Otherwise it is a query ID that can be used to - * cancel the query. - */ -static inline int -ib_sa_mcmember_rec_set(struct ib_device *device, u8 port_num, - struct ib_sa_mcmember_rec *rec, - ib_sa_comp_mask comp_mask, - int timeout_ms, int gfp_mask, - void (*callback)(int status, - struct ib_sa_mcmember_rec *resp, - void *context), - void *context, - struct ib_sa_query **query) -{ - return ib_sa_mcmember_rec_query(device, port_num, - IB_MGMT_METHOD_SET, - rec, comp_mask, - timeout_ms, gfp_mask, callback, - context, query); -} - -/** - * ib_sa_mcmember_rec_delete - Start an MCMember delete query - * @device:device to send query on - * @port_num: port number to send query on - * @rec:MCMember Record to send in query - * @comp_mask:component mask to send in query - * @timeout_ms:time to wait for response - * @gfp_mask:GFP mask to use for internal allocations - * @callback:function called when query completes, times out or is - * canceled - * @context:opaque user context passed to callback - * @sa_query:query context, used to cancel query - * - * Send an MCMember Delete query to the SA (eg to leave a multicast - * group). The callback function will be called when the query - * completes (or fails); status is 0 for a successful response, -EINTR - * if the query is canceled, -ETIMEDOUT is the query timed out, or - * -EIO if an error occurred sending the query. The resp parameter of - * the callback is only valid if status is 0. - * - * If the return value of ib_sa_mcmember_rec_delete() is negative, it - * is an error code. Otherwise it is a query ID that can be used to - * cancel the query. - */ -static inline int -ib_sa_mcmember_rec_delete(struct ib_device *device, u8 port_num, - struct ib_sa_mcmember_rec *rec, - ib_sa_comp_mask comp_mask, - int timeout_ms, int gfp_mask, - void (*callback)(int status, - struct ib_sa_mcmember_rec *resp, - void *context), - void *context, - struct ib_sa_query **query) -{ - return ib_sa_mcmember_rec_query(device, port_num, - IB_SA_METHOD_DELETE, - rec, comp_mask, - timeout_ms, gfp_mask, callback, - context, query); -} - - -#endif /* IB_SA_H */ diff --git a/drivers/infiniband/include/ib_smi.h b/drivers/infiniband/include/ib_smi.h deleted file mode 100644 index ca8216514963..000000000000 --- a/drivers/infiniband/include/ib_smi.h +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved. - * Copyright (c) 2004 Infinicon Corporation. All rights reserved. - * Copyright (c) 2004 Intel Corporation. All rights reserved. - * Copyright (c) 2004 Topspin Corporation. All rights reserved. - * Copyright (c) 2004 Voltaire Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * $Id: ib_smi.h 1389 2004-12-27 22:56:47Z roland $ - */ - -#if !defined( IB_SMI_H ) -#define IB_SMI_H - -#include <ib_mad.h> - -#define IB_LID_PERMISSIVE 0xFFFF - -#define IB_SMP_DATA_SIZE 64 -#define IB_SMP_MAX_PATH_HOPS 64 - -struct ib_smp { - u8 base_version; - u8 mgmt_class; - u8 class_version; - u8 method; - u16 status; - u8 hop_ptr; - u8 hop_cnt; - u64 tid; - u16 attr_id; - u16 resv; - u32 attr_mod; - u64 mkey; - u16 dr_slid; - u16 dr_dlid; - u8 reserved[28]; - u8 data[IB_SMP_DATA_SIZE]; - u8 initial_path[IB_SMP_MAX_PATH_HOPS]; - u8 return_path[IB_SMP_MAX_PATH_HOPS]; -} __attribute__ ((packed)); - -#define IB_SMP_DIRECTION __constant_htons(0x8000) - -/* Subnet management attributes */ -#define IB_SMP_ATTR_NOTICE __constant_htons(0x0002) -#define IB_SMP_ATTR_NODE_DESC __constant_htons(0x0010) -#define IB_SMP_ATTR_NODE_INFO __constant_htons(0x0011) -#define IB_SMP_ATTR_SWITCH_INFO __constant_htons(0x0012) -#define IB_SMP_ATTR_GUID_INFO __constant_htons(0x0014) -#define IB_SMP_ATTR_PORT_INFO __constant_htons(0x0015) -#define IB_SMP_ATTR_PKEY_TABLE __constant_htons(0x0016) -#define IB_SMP_ATTR_SL_TO_VL_TABLE __constant_htons(0x0017) -#define IB_SMP_ATTR_VL_ARB_TABLE __constant_htons(0x0018) -#define IB_SMP_ATTR_LINEAR_FORWARD_TABLE __constant_htons(0x0019) -#define IB_SMP_ATTR_RANDOM_FORWARD_TABLE __constant_htons(0x001A) -#define IB_SMP_ATTR_MCAST_FORWARD_TABLE __constant_htons(0x001B) -#define IB_SMP_ATTR_SM_INFO __constant_htons(0x0020) -#define IB_SMP_ATTR_VENDOR_DIAG __constant_htons(0x0030) -#define IB_SMP_ATTR_LED_INFO __constant_htons(0x0031) -#define IB_SMP_ATTR_VENDOR_MASK __constant_htons(0xFF00) - -static inline u8 -ib_get_smp_direction(struct ib_smp *smp) -{ - return ((smp->status & IB_SMP_DIRECTION) == IB_SMP_DIRECTION); -} - -#endif /* IB_SMI_H */ diff --git a/drivers/infiniband/include/ib_user_mad.h b/drivers/infiniband/include/ib_user_mad.h deleted file mode 100644 index 06ad4a6075fa..000000000000 --- a/drivers/infiniband/include/ib_user_mad.h +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Copyright (c) 2004 Topspin Communications. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * $Id: ib_user_mad.h 1389 2004-12-27 22:56:47Z roland $ - */ - -#ifndef IB_USER_MAD_H -#define IB_USER_MAD_H - -#include <linux/types.h> -#include <linux/ioctl.h> - -/* - * Increment this value if any changes that break userspace ABI - * compatibility are made. - */ -#define IB_USER_MAD_ABI_VERSION 2 - -/* - * Make sure that all structs defined in this file remain laid out so - * that they pack the same way on 32-bit and 64-bit architectures (to - * avoid incompatibility between 32-bit userspace and 64-bit kernels). - */ - -/** - * ib_user_mad - MAD packet - * @data - Contents of MAD - * @id - ID of agent MAD received with/to be sent with - * @status - 0 on successful receive, ETIMEDOUT if no response - * received (transaction ID in data[] will be set to TID of original - * request) (ignored on send) - * @timeout_ms - Milliseconds to wait for response (unset on receive) - * @qpn - Remote QP number received from/to be sent to - * @qkey - Remote Q_Key to be sent with (unset on receive) - * @lid - Remote lid received from/to be sent to - * @sl - Service level received with/to be sent with - * @path_bits - Local path bits received with/to be sent with - * @grh_present - If set, GRH was received/should be sent - * @gid_index - Local GID index to send with (unset on receive) - * @hop_limit - Hop limit in GRH - * @traffic_class - Traffic class in GRH - * @gid - Remote GID in GRH - * @flow_label - Flow label in GRH - * - * All multi-byte quantities are stored in network (big endian) byte order. - */ -struct ib_user_mad { - __u8 data[256]; - __u32 id; - __u32 status; - __u32 timeout_ms; - __u32 qpn; - __u32 qkey; - __u16 lid; - __u8 sl; - __u8 path_bits; - __u8 grh_present; - __u8 gid_index; - __u8 hop_limit; - __u8 traffic_class; - __u8 gid[16]; - __u32 flow_label; -}; - -/** - * ib_user_mad_reg_req - MAD registration request - * @id - Set by the kernel; used to identify agent in future requests. - * @qpn - Queue pair number; must be 0 or 1. - * @method_mask - The caller will receive unsolicited MADs for any method - * where @method_mask = 1. - * @mgmt_class - Indicates which management class of MADs should be receive - * by the caller. This field is only required if the user wishes to - * receive unsolicited MADs, otherwise it should be 0. - * @mgmt_class_version - Indicates which version of MADs for the given - * management class to receive. - * @oui: Indicates IEEE OUI when mgmt_class is a vendor class - * in the range from 0x30 to 0x4f. Otherwise not used. - */ -struct ib_user_mad_reg_req { - __u32 id; - __u32 method_mask[4]; - __u8 qpn; - __u8 mgmt_class; - __u8 mgmt_class_version; - __u8 oui[3]; -}; - -#define IB_IOCTL_MAGIC 0x1b - -#define IB_USER_MAD_REGISTER_AGENT _IOWR(IB_IOCTL_MAGIC, 1, \ - struct ib_user_mad_reg_req) - -#define IB_USER_MAD_UNREGISTER_AGENT _IOW(IB_IOCTL_MAGIC, 2, __u32) - -#endif /* IB_USER_MAD_H */ diff --git a/drivers/infiniband/include/ib_verbs.h b/drivers/infiniband/include/ib_verbs.h deleted file mode 100644 index cf01f044a223..000000000000 --- a/drivers/infiniband/include/ib_verbs.h +++ /dev/null @@ -1,1252 +0,0 @@ -/* - * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved. - * Copyright (c) 2004 Infinicon Corporation. All rights reserved. - * Copyright (c) 2004 Intel Corporation. All rights reserved. - * Copyright (c) 2004 Topspin Corporation. All rights reserved. - * Copyright (c) 2004 Voltaire Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * $Id: ib_verbs.h 1349 2004-12-16 21:09:43Z roland $ - */ - -#if !defined(IB_VERBS_H) -#define IB_VERBS_H - -#include <linux/types.h> -#include <linux/device.h> -#include <asm/atomic.h> - -union ib_gid { - u8 raw[16]; - struct { - u64 subnet_prefix; - u64 interface_id; - } global; -}; - -enum ib_node_type { - IB_NODE_CA = 1, - IB_NODE_SWITCH, - IB_NODE_ROUTER -}; - -enum ib_device_cap_flags { - IB_DEVICE_RESIZE_MAX_WR = 1, - IB_DEVICE_BAD_PKEY_CNTR = (1<<1), - IB_DEVICE_BAD_QKEY_CNTR = (1<<2), - IB_DEVICE_RAW_MULTI = (1<<3), - IB_DEVICE_AUTO_PATH_MIG = (1<<4), - IB_DEVICE_CHANGE_PHY_PORT = (1<<5), - IB_DEVICE_UD_AV_PORT_ENFORCE = (1<<6), - IB_DEVICE_CURR_QP_STATE_MOD = (1<<7), - IB_DEVICE_SHUTDOWN_PORT = (1<<8), - IB_DEVICE_INIT_TYPE = (1<<9), - IB_DEVICE_PORT_ACTIVE_EVENT = (1<<10), - IB_DEVICE_SYS_IMAGE_GUID = (1<<11), - IB_DEVICE_RC_RNR_NAK_GEN = (1<<12), - IB_DEVICE_SRQ_RESIZE = (1<<13), - IB_DEVICE_N_NOTIFY_CQ = (1<<14), -}; - -enum ib_atomic_cap { - IB_ATOMIC_NONE, - IB_ATOMIC_HCA, - IB_ATOMIC_GLOB -}; - -struct ib_device_attr { - u64 fw_ver; - u64 node_guid; - u64 sys_image_guid; - u64 max_mr_size; - u64 page_size_cap; - u32 vendor_id; - u32 vendor_part_id; - u32 hw_ver; - int max_qp; - int max_qp_wr; - int device_cap_flags; - int max_sge; - int max_sge_rd; - int max_cq; - int max_cqe; - int max_mr; - int max_pd; - int max_qp_rd_atom; - int max_ee_rd_atom; - int max_res_rd_atom; - int max_qp_init_rd_atom; - int max_ee_init_rd_atom; - enum ib_atomic_cap atomic_cap; - int max_ee; - int max_rdd; - int max_mw; - int max_raw_ipv6_qp; - int max_raw_ethy_qp; - int max_mcast_grp; - int max_mcast_qp_attach; - int max_total_mcast_qp_attach; - int max_ah; - int max_fmr; - int max_map_per_fmr; - int max_srq; - int max_srq_wr; - int max_srq_sge; - u16 max_pkeys; - u8 local_ca_ack_delay; -}; - -enum ib_mtu { - IB_MTU_256 = 1, - IB_MTU_512 = 2, - IB_MTU_1024 = 3, - IB_MTU_2048 = 4, - IB_MTU_4096 = 5 -}; - -static inline int ib_mtu_enum_to_int(enum ib_mtu mtu) -{ - switch (mtu) { - case IB_MTU_256: return 256; - case IB_MTU_512: return 512; - case IB_MTU_1024: return 1024; - case IB_MTU_2048: return 2048; - case IB_MTU_4096: return 4096; - default: return -1; - } -} - -enum ib_port_state { - IB_PORT_NOP = 0, - IB_PORT_DOWN = 1, - IB_PORT_INIT = 2, - IB_PORT_ARMED = 3, - IB_PORT_ACTIVE = 4, - IB_PORT_ACTIVE_DEFER = 5 -}; - -enum ib_port_cap_flags { - IB_PORT_SM = 1 << 1, - IB_PORT_NOTICE_SUP = 1 << 2, - IB_PORT_TRAP_SUP = 1 << 3, - IB_PORT_OPT_IPD_SUP = 1 << 4, - IB_PORT_AUTO_MIGR_SUP = 1 << 5, - IB_PORT_SL_MAP_SUP = 1 << 6, - IB_PORT_MKEY_NVRAM = 1 << 7, - IB_PORT_PKEY_NVRAM = 1 << 8, - IB_PORT_LED_INFO_SUP = 1 << 9, - IB_PORT_SM_DISABLED = 1 << 10, - IB_PORT_SYS_IMAGE_GUID_SUP = 1 << 11, - IB_PORT_PKEY_SW_EXT_PORT_TRAP_SUP = 1 << 12, - IB_PORT_CM_SUP = 1 << 16, - IB_PORT_SNMP_TUNNEL_SUP = 1 << 17, - IB_PORT_REINIT_SUP = 1 << 18, - IB_PORT_DEVICE_MGMT_SUP = 1 << 19, - IB_PORT_VENDOR_CLASS_SUP = 1 << 20, - IB_PORT_DR_NOTICE_SUP = 1 << 21, - IB_PORT_CAP_MASK_NOTICE_SUP = 1 << 22, - IB_PORT_BOOT_MGMT_SUP = 1 << 23, - IB_PORT_LINK_LATENCY_SUP = 1 << 24, - IB_PORT_CLIENT_REG_SUP = 1 << 25 -}; - -enum ib_port_width { - IB_WIDTH_1X = 1, - IB_WIDTH_4X = 2, - IB_WIDTH_8X = 4, - IB_WIDTH_12X = 8 -}; - -static inline int ib_width_enum_to_int(enum ib_port_width width) -{ - switch (width) { - case IB_WIDTH_1X: return 1; - case IB_WIDTH_4X: return 4; - case IB_WIDTH_8X: return 8; - case IB_WIDTH_12X: return 12; - default: return -1; - } -} - -struct ib_port_attr { - enum ib_port_state state; - enum ib_mtu max_mtu; - enum ib_mtu active_mtu; - int gid_tbl_len; - u32 port_cap_flags; - u32 max_msg_sz; - u32 bad_pkey_cntr; - u32 qkey_viol_cntr; - u16 pkey_tbl_len; - u16 lid; - u16 sm_lid; - u8 lmc; - u8 max_vl_num; - u8 sm_sl; - u8 subnet_timeout; - u8 init_type_reply; - u8 active_width; - u8 active_speed; - u8 phys_state; -}; - -enum ib_device_modify_flags { - IB_DEVICE_MODIFY_SYS_IMAGE_GUID = 1 -}; - -struct ib_device_modify { - u64 sys_image_guid; -}; - -enum ib_port_modify_flags { - IB_PORT_SHUTDOWN = 1, - IB_PORT_INIT_TYPE = (1<<2), - IB_PORT_RESET_QKEY_CNTR = (1<<3) -}; - -struct ib_port_modify { - u32 set_port_cap_mask; - u32 clr_port_cap_mask; - u8 init_type; -}; - -enum ib_event_type { - IB_EVENT_CQ_ERR, - IB_EVENT_QP_FATAL, - IB_EVENT_QP_REQ_ERR, - IB_EVENT_QP_ACCESS_ERR, - IB_EVENT_COMM_EST, - IB_EVENT_SQ_DRAINED, - IB_EVENT_PATH_MIG, - IB_EVENT_PATH_MIG_ERR, - IB_EVENT_DEVICE_FATAL, - IB_EVENT_PORT_ACTIVE, - IB_EVENT_PORT_ERR, - IB_EVENT_LID_CHANGE, - IB_EVENT_PKEY_CHANGE, - IB_EVENT_SM_CHANGE -}; - -struct ib_event { - struct ib_device *device; - union { - struct ib_cq *cq; - struct ib_qp *qp; - u8 port_num; - } element; - enum ib_event_type event; -}; - -struct ib_event_handler { - struct ib_device *device; - void (*handler)(struct ib_event_handler *, struct ib_event *); - struct list_head list; -}; - -#define INIT_IB_EVENT_HANDLER(_ptr, _device, _handler) \ - do { \ - (_ptr)->device = _device; \ - (_ptr)->handler = _handler; \ - INIT_LIST_HEAD(&(_ptr)->list); \ - } while (0) - -struct ib_global_route { - union ib_gid dgid; - u32 flow_label; - u8 sgid_index; - u8 hop_limit; - u8 traffic_class; -}; - -enum { - IB_MULTICAST_QPN = 0xffffff -}; - -enum ib_ah_flags { - IB_AH_GRH = 1 -}; - -struct ib_ah_attr { - struct ib_global_route grh; - u16 dlid; - u8 sl; - u8 src_path_bits; - u8 static_rate; - u8 ah_flags; - u8 port_num; -}; - -enum ib_wc_status { - IB_WC_SUCCESS, - IB_WC_LOC_LEN_ERR, - IB_WC_LOC_QP_OP_ERR, - IB_WC_LOC_EEC_OP_ERR, - IB_WC_LOC_PROT_ERR, - IB_WC_WR_FLUSH_ERR, - IB_WC_MW_BIND_ERR, - IB_WC_BAD_RESP_ERR, - IB_WC_LOC_ACCESS_ERR, - IB_WC_REM_INV_REQ_ERR, - IB_WC_REM_ACCESS_ERR, - IB_WC_REM_OP_ERR, - IB_WC_RETRY_EXC_ERR, - IB_WC_RNR_RETRY_EXC_ERR, - IB_WC_LOC_RDD_VIOL_ERR, - IB_WC_REM_INV_RD_REQ_ERR, - IB_WC_REM_ABORT_ERR, - IB_WC_INV_EECN_ERR, - IB_WC_INV_EEC_STATE_ERR, - IB_WC_FATAL_ERR, - IB_WC_RESP_TIMEOUT_ERR, - IB_WC_GENERAL_ERR -}; - -enum ib_wc_opcode { - IB_WC_SEND, - IB_WC_RDMA_WRITE, - IB_WC_RDMA_READ, - IB_WC_COMP_SWAP, - IB_WC_FETCH_ADD, - IB_WC_BIND_MW, -/* - * Set value of IB_WC_RECV so consumers can test if a completion is a - * receive by testing (opcode & IB_WC_RECV). - */ - IB_WC_RECV = 1 << 7, - IB_WC_RECV_RDMA_WITH_IMM -}; - -enum ib_wc_flags { - IB_WC_GRH = 1, - IB_WC_WITH_IMM = (1<<1) -}; - -struct ib_wc { - u64 wr_id; - enum ib_wc_status status; - enum ib_wc_opcode opcode; - u32 vendor_err; - u32 byte_len; - __be32 imm_data; - u32 qp_num; - u32 src_qp; - int wc_flags; - u16 pkey_index; - u16 slid; - u8 sl; - u8 dlid_path_bits; - u8 port_num; /* valid only for DR SMPs on switches */ -}; - -enum ib_cq_notify { - IB_CQ_SOLICITED, - IB_CQ_NEXT_COMP -}; - -struct ib_qp_cap { - u32 max_send_wr; - u32 max_recv_wr; - u32 max_send_sge; - u32 max_recv_sge; - u32 max_inline_data; -}; - -enum ib_sig_type { - IB_SIGNAL_ALL_WR, - IB_SIGNAL_REQ_WR -}; - -enum ib_qp_type { - /* - * IB_QPT_SMI and IB_QPT_GSI have to be the first two entries - * here (and in that order) since the MAD layer uses them as - * indices into a 2-entry table. - */ - IB_QPT_SMI, - IB_QPT_GSI, - - IB_QPT_RC, - IB_QPT_UC, - IB_QPT_UD, - IB_QPT_RAW_IPV6, - IB_QPT_RAW_ETY -}; - -struct ib_qp_init_attr { - void (*event_handler)(struct ib_event *, void *); - void *qp_context; - struct ib_cq *send_cq; - struct ib_cq *recv_cq; - struct ib_srq *srq; - struct ib_qp_cap cap; - enum ib_sig_type sq_sig_type; - enum ib_qp_type qp_type; - u8 port_num; /* special QP types only */ -}; - -enum ib_rnr_timeout { - IB_RNR_TIMER_655_36 = 0, - IB_RNR_TIMER_000_01 = 1, - IB_RNR_TIMER_000_02 = 2, - IB_RNR_TIMER_000_03 = 3, - IB_RNR_TIMER_000_04 = 4, - IB_RNR_TIMER_000_06 = 5, - IB_RNR_TIMER_000_08 = 6, - IB_RNR_TIMER_000_12 = 7, - IB_RNR_TIMER_000_16 = 8, - IB_RNR_TIMER_000_24 = 9, - IB_RNR_TIMER_000_32 = 10, - IB_RNR_TIMER_000_48 = 11, - IB_RNR_TIMER_000_64 = 12, - IB_RNR_TIMER_000_96 = 13, - IB_RNR_TIMER_001_28 = 14, - IB_RNR_TIMER_001_92 = 15, - IB_RNR_TIMER_002_56 = 16, - IB_RNR_TIMER_003_84 = 17, - IB_RNR_TIMER_005_12 = 18, - IB_RNR_TIMER_007_68 = 19, - IB_RNR_TIMER_010_24 = 20, - IB_RNR_TIMER_015_36 = 21, - IB_RNR_TIMER_020_48 = 22, - IB_RNR_TIMER_030_72 = 23, - IB_RNR_TIMER_040_96 = 24, - IB_RNR_TIMER_061_44 = 25, - IB_RNR_TIMER_081_92 = 26, - IB_RNR_TIMER_122_88 = 27, - IB_RNR_TIMER_163_84 = 28, - IB_RNR_TIMER_245_76 = 29, - IB_RNR_TIMER_327_68 = 30, - IB_RNR_TIMER_491_52 = 31 -}; - -enum ib_qp_attr_mask { - IB_QP_STATE = 1, - IB_QP_CUR_STATE = (1<<1), - IB_QP_EN_SQD_ASYNC_NOTIFY = (1<<2), - IB_QP_ACCESS_FLAGS = (1<<3), - IB_QP_PKEY_INDEX = (1<<4), - IB_QP_PORT = (1<<5), - IB_QP_QKEY = (1<<6), - IB_QP_AV = (1<<7), - IB_QP_PATH_MTU = (1<<8), - IB_QP_TIMEOUT = (1<<9), - IB_QP_RETRY_CNT = (1<<10), - IB_QP_RNR_RETRY = (1<<11), - IB_QP_RQ_PSN = (1<<12), - IB_QP_MAX_QP_RD_ATOMIC = (1<<13), - IB_QP_ALT_PATH = (1<<14), - IB_QP_MIN_RNR_TIMER = (1<<15), - IB_QP_SQ_PSN = (1<<16), - IB_QP_MAX_DEST_RD_ATOMIC = (1<<17), - IB_QP_PATH_MIG_STATE = (1<<18), - IB_QP_CAP = (1<<19), - IB_QP_DEST_QPN = (1<<20) -}; - -enum ib_qp_state { - IB_QPS_RESET, - IB_QPS_INIT, - IB_QPS_RTR, - IB_QPS_RTS, - IB_QPS_SQD, - IB_QPS_SQE, - IB_QPS_ERR -}; - -enum ib_mig_state { - IB_MIG_MIGRATED, - IB_MIG_REARM, - IB_MIG_ARMED -}; - -struct ib_qp_attr { - enum ib_qp_state qp_state; - enum ib_qp_state cur_qp_state; - enum ib_mtu path_mtu; - enum ib_mig_state path_mig_state; - u32 qkey; - u32 rq_psn; - u32 sq_psn; - u32 dest_qp_num; - int qp_access_flags; - struct ib_qp_cap cap; - struct ib_ah_attr ah_attr; - struct ib_ah_attr alt_ah_attr; - u16 pkey_index; - u16 alt_pkey_index; - u8 en_sqd_async_notify; - u8 sq_draining; - u8 max_rd_atomic; - u8 max_dest_rd_atomic; - u8 min_rnr_timer; - u8 port_num; - u8 timeout; - u8 retry_cnt; - u8 rnr_retry; - u8 alt_port_num; - u8 alt_timeout; -}; - -enum ib_wr_opcode { - IB_WR_RDMA_WRITE, - IB_WR_RDMA_WRITE_WITH_IMM, - IB_WR_SEND, - IB_WR_SEND_WITH_IMM, - IB_WR_RDMA_READ, - IB_WR_ATOMIC_CMP_AND_SWP, - IB_WR_ATOMIC_FETCH_AND_ADD -}; - -enum ib_send_flags { - IB_SEND_FENCE = 1, - IB_SEND_SIGNALED = (1<<1), - IB_SEND_SOLICITED = (1<<2), - IB_SEND_INLINE = (1<<3) -}; - -struct ib_sge { - u64 addr; - u32 length; - u32 lkey; -}; - -struct ib_send_wr { - struct ib_send_wr *next; - u64 wr_id; - struct ib_sge *sg_list; - int num_sge; - enum ib_wr_opcode opcode; - int send_flags; - u32 imm_data; - union { - struct { - u64 remote_addr; - u32 rkey; - } rdma; - struct { - u64 remote_addr; - u64 compare_add; - u64 swap; - u32 rkey; - } atomic; - struct { - struct ib_ah *ah; - struct ib_mad_hdr *mad_hdr; - u32 remote_qpn; - u32 remote_qkey; - int timeout_ms; /* valid for MADs only */ - u16 pkey_index; /* valid for GSI only */ - u8 port_num; /* valid for DR SMPs on switch only */ - } ud; - } wr; -}; - -struct ib_recv_wr { - struct ib_recv_wr *next; - u64 wr_id; - struct ib_sge *sg_list; - int num_sge; -}; - -enum ib_access_flags { - IB_ACCESS_LOCAL_WRITE = 1, - IB_ACCESS_REMOTE_WRITE = (1<<1), - IB_ACCESS_REMOTE_READ = (1<<2), - IB_ACCESS_REMOTE_ATOMIC = (1<<3), - IB_ACCESS_MW_BIND = (1<<4) -}; - -struct ib_phys_buf { - u64 addr; - u64 size; -}; - -struct ib_mr_attr { - struct ib_pd *pd; - u64 device_virt_addr; - u64 size; - int mr_access_flags; - u32 lkey; - u32 rkey; -}; - -enum ib_mr_rereg_flags { - IB_MR_REREG_TRANS = 1, - IB_MR_REREG_PD = (1<<1), - IB_MR_REREG_ACCESS = (1<<2) -}; - -struct ib_mw_bind { - struct ib_mr *mr; - u64 wr_id; - u64 addr; - u32 length; - int send_flags; - int mw_access_flags; -}; - -struct ib_fmr_attr { - int max_pages; - int max_maps; - u8 page_size; -}; - -struct ib_pd { - struct ib_device *device; - atomic_t usecnt; /* count all resources */ -}; - -struct ib_ah { - struct ib_device *device; - struct ib_pd *pd; -}; - -typedef void (*ib_comp_handler)(struct ib_cq *cq, void *cq_context); - -struct ib_cq { - struct ib_device *device; - ib_comp_handler comp_handler; - void (*event_handler)(struct ib_event *, void *); - void * cq_context; - int cqe; - atomic_t usecnt; /* count number of work queues */ -}; - -struct ib_srq { - struct ib_device *device; - struct ib_pd *pd; - void *srq_context; - atomic_t usecnt; -}; - -struct ib_qp { - struct ib_device *device; - struct ib_pd *pd; - struct ib_cq *send_cq; - struct ib_cq *recv_cq; - struct ib_srq *srq; - void (*event_handler)(struct ib_event *, void *); - void *qp_context; - u32 qp_num; - enum ib_qp_type qp_type; -}; - -struct ib_mr { - struct ib_device *device; - struct ib_pd *pd; - u32 lkey; - u32 rkey; - atomic_t usecnt; /* count number of MWs */ -}; - -struct ib_mw { - struct ib_device *device; - struct ib_pd *pd; - u32 rkey; -}; - -struct ib_fmr { - struct ib_device *device; - struct ib_pd *pd; - struct list_head list; - u32 lkey; - u32 rkey; -}; - -struct ib_mad; -struct ib_grh; - -enum ib_process_mad_flags { - IB_MAD_IGNORE_MKEY = 1, - IB_MAD_IGNORE_BKEY = 2, - IB_MAD_IGNORE_ALL = IB_MAD_IGNORE_MKEY | IB_MAD_IGNORE_BKEY -}; - -enum ib_mad_result { - IB_MAD_RESULT_FAILURE = 0, /* (!SUCCESS is the important flag) */ - IB_MAD_RESULT_SUCCESS = 1 << 0, /* MAD was successfully processed */ - IB_MAD_RESULT_REPLY = 1 << 1, /* Reply packet needs to be sent */ - IB_MAD_RESULT_CONSUMED = 1 << 2 /* Packet consumed: stop processing */ -}; - -#define IB_DEVICE_NAME_MAX 64 - -struct ib_cache { - rwlock_t lock; - struct ib_event_handler event_handler; - struct ib_pkey_cache **pkey_cache; - struct ib_gid_cache **gid_cache; -}; - -struct ib_device { - struct device *dma_device; - - char name[IB_DEVICE_NAME_MAX]; - - struct list_head event_handler_list; - spinlock_t event_handler_lock; - - struct list_head core_list; - struct list_head client_data_list; - spinlock_t client_data_lock; - - struct ib_cache cache; - - u32 flags; - - int (*query_device)(struct ib_device *device, - struct ib_device_attr *device_attr); - int (*query_port)(struct ib_device *device, - u8 port_num, - struct ib_port_attr *port_attr); - int (*query_gid)(struct ib_device *device, - u8 port_num, int index, - union ib_gid *gid); - int (*query_pkey)(struct ib_device *device, - u8 port_num, u16 index, u16 *pkey); - int (*modify_device)(struct ib_device *device, - int device_modify_mask, - struct ib_device_modify *device_modify); - int (*modify_port)(struct ib_device *device, - u8 port_num, int port_modify_mask, - struct ib_port_modify *port_modify); - struct ib_pd * (*alloc_pd)(struct ib_device *device); - int (*dealloc_pd)(struct ib_pd *pd); - struct ib_ah * (*create_ah)(struct ib_pd *pd, - struct ib_ah_attr *ah_attr); - int (*modify_ah)(struct ib_ah *ah, - struct ib_ah_attr *ah_attr); - int (*query_ah)(struct ib_ah *ah, - struct ib_ah_attr *ah_attr); - int (*destroy_ah)(struct ib_ah *ah); - struct ib_qp * (*create_qp)(struct ib_pd *pd, - struct ib_qp_init_attr *qp_init_attr); - int (*modify_qp)(struct ib_qp *qp, - struct ib_qp_attr *qp_attr, - int qp_attr_mask); - int (*query_qp)(struct ib_qp *qp, - struct ib_qp_attr *qp_attr, - int qp_attr_mask, - struct ib_qp_init_attr *qp_init_attr); - int (*destroy_qp)(struct ib_qp *qp); - int (*post_send)(struct ib_qp *qp, - struct ib_send_wr *send_wr, - struct ib_send_wr **bad_send_wr); - int (*post_recv)(struct ib_qp *qp, - struct ib_recv_wr *recv_wr, - struct ib_recv_wr **bad_recv_wr); - struct ib_cq * (*create_cq)(struct ib_device *device, - int cqe); - int (*destroy_cq)(struct ib_cq *cq); - int (*resize_cq)(struct ib_cq *cq, int *cqe); - int (*poll_cq)(struct ib_cq *cq, int num_entries, - struct ib_wc *wc); - int (*peek_cq)(struct ib_cq *cq, int wc_cnt); - int (*req_notify_cq)(struct ib_cq *cq, - enum ib_cq_notify cq_notify); - int (*req_ncomp_notif)(struct ib_cq *cq, - int wc_cnt); - struct ib_mr * (*get_dma_mr)(struct ib_pd *pd, - int mr_access_flags); - struct ib_mr * (*reg_phys_mr)(struct ib_pd *pd, - struct ib_phys_buf *phys_buf_array, - int num_phys_buf, - int mr_access_flags, - u64 *iova_start); - int (*query_mr)(struct ib_mr *mr, - struct ib_mr_attr *mr_attr); - int (*dereg_mr)(struct ib_mr *mr); - int (*rereg_phys_mr)(struct ib_mr *mr, - int mr_rereg_mask, - struct ib_pd *pd, - struct ib_phys_buf *phys_buf_array, - int num_phys_buf, - int mr_access_flags, - u64 *iova_start); - struct ib_mw * (*alloc_mw)(struct ib_pd *pd); - int (*bind_mw)(struct ib_qp *qp, - struct ib_mw *mw, - struct ib_mw_bind *mw_bind); - int (*dealloc_mw)(struct ib_mw *mw); - struct ib_fmr * (*alloc_fmr)(struct ib_pd *pd, - int mr_access_flags, - struct ib_fmr_attr *fmr_attr); - int (*map_phys_fmr)(struct ib_fmr *fmr, - u64 *page_list, int list_len, - u64 iova); - int (*unmap_fmr)(struct list_head *fmr_list); - int (*dealloc_fmr)(struct ib_fmr *fmr); - int (*attach_mcast)(struct ib_qp *qp, - union ib_gid *gid, - u16 lid); - int (*detach_mcast)(struct ib_qp *qp, - union ib_gid *gid, - u16 lid); - int (*process_mad)(struct ib_device *device, - int process_mad_flags, - u8 port_num, - struct ib_wc *in_wc, - struct ib_grh *in_grh, - struct ib_mad *in_mad, - struct ib_mad *out_mad); - - struct class_device class_dev; - struct kobject ports_parent; - struct list_head port_list; - - enum { - IB_DEV_UNINITIALIZED, - IB_DEV_REGISTERED, - IB_DEV_UNREGISTERED - } reg_state; - - u8 node_type; - u8 phys_port_cnt; -}; - -struct ib_client { - char *name; - void (*add) (struct ib_device *); - void (*remove)(struct ib_device *); - - struct list_head list; -}; - -struct ib_device *ib_alloc_device(size_t size); -void ib_dealloc_device(struct ib_device *device); - -int ib_register_device (struct ib_device *device); -void ib_unregister_device(struct ib_device *device); - -int ib_register_client (struct ib_client *client); -void ib_unregister_client(struct ib_client *client); - -void *ib_get_client_data(struct ib_device *device, struct ib_client *client); -void ib_set_client_data(struct ib_device *device, struct ib_client *client, - void *data); - -int ib_register_event_handler (struct ib_event_handler *event_handler); -int ib_unregister_event_handler(struct ib_event_handler *event_handler); -void ib_dispatch_event(struct ib_event *event); - -int ib_query_device(struct ib_device *device, - struct ib_device_attr *device_attr); - -int ib_query_port(struct ib_device *device, - u8 port_num, struct ib_port_attr *port_attr); - -int ib_query_gid(struct ib_device *device, - u8 port_num, int index, union ib_gid *gid); - -int ib_query_pkey(struct ib_device *device, - u8 port_num, u16 index, u16 *pkey); - -int ib_modify_device(struct ib_device *device, - int device_modify_mask, - struct ib_device_modify *device_modify); - -int ib_modify_port(struct ib_device *device, - u8 port_num, int port_modify_mask, - struct ib_port_modify *port_modify); - -/** - * ib_alloc_pd - Allocates an unused protection domain. - * @device: The device on which to allocate the protection domain. - * - * A protection domain object provides an association between QPs, shared - * receive queues, address handles, memory regions, and memory windows. - */ -struct ib_pd *ib_alloc_pd(struct ib_device *device); - -/** - * ib_dealloc_pd - Deallocates a protection domain. - * @pd: The protection domain to deallocate. - */ -int ib_dealloc_pd(struct ib_pd *pd); - -/** - * ib_create_ah - Creates an address handle for the given address vector. - * @pd: The protection domain associated with the address handle. - * @ah_attr: The attributes of the address vector. - * - * The address handle is used to reference a local or global destination - * in all UD QP post sends. - */ -struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr); - -/** - * ib_modify_ah - Modifies the address vector associated with an address - * handle. - * @ah: The address handle to modify. - * @ah_attr: The new address vector attributes to associate with the - * address handle. - */ -int ib_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr); - -/** - * ib_query_ah - Queries the address vector associated with an address - * handle. - * @ah: The address handle to query. - * @ah_attr: The address vector attributes associated with the address - * handle. - */ -int ib_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr); - -/** - * ib_destroy_ah - Destroys an address handle. - * @ah: The address handle to destroy. - */ -int ib_destroy_ah(struct ib_ah *ah); - -/** - * ib_create_qp - Creates a QP associated with the specified protection - * domain. - * @pd: The protection domain associated with the QP. - * @qp_init_attr: A list of initial attributes required to create the QP. - */ -struct ib_qp *ib_create_qp(struct ib_pd *pd, - struct ib_qp_init_attr *qp_init_attr); - -/** - * ib_modify_qp - Modifies the attributes for the specified QP and then - * transitions the QP to the given state. - * @qp: The QP to modify. - * @qp_attr: On input, specifies the QP attributes to modify. On output, - * the current values of selected QP attributes are returned. - * @qp_attr_mask: A bit-mask used to specify which attributes of the QP - * are being modified. - */ -int ib_modify_qp(struct ib_qp *qp, - struct ib_qp_attr *qp_attr, - int qp_attr_mask); - -/** - * ib_query_qp - Returns the attribute list and current values for the - * specified QP. - * @qp: The QP to query. - * @qp_attr: The attributes of the specified QP. - * @qp_attr_mask: A bit-mask used to select specific attributes to query. - * @qp_init_attr: Additional attributes of the selected QP. - * - * The qp_attr_mask may be used to limit the query to gathering only the - * selected attributes. - */ -int ib_query_qp(struct ib_qp *qp, - struct ib_qp_attr *qp_attr, - int qp_attr_mask, - struct ib_qp_init_attr *qp_init_attr); - -/** - * ib_destroy_qp - Destroys the specified QP. - * @qp: The QP to destroy. - */ -int ib_destroy_qp(struct ib_qp *qp); - -/** - * ib_post_send - Posts a list of work requests to the send queue of - * the specified QP. - * @qp: The QP to post the work request on. - * @send_wr: A list of work requests to post on the send queue. - * @bad_send_wr: On an immediate failure, this parameter will reference - * the work request that failed to be posted on the QP. - */ -static inline int ib_post_send(struct ib_qp *qp, - struct ib_send_wr *send_wr, - struct ib_send_wr **bad_send_wr) -{ - return qp->device->post_send(qp, send_wr, bad_send_wr); -} - -/** - * ib_post_recv - Posts a list of work requests to the receive queue of - * the specified QP. - * @qp: The QP to post the work request on. - * @recv_wr: A list of work requests to post on the receive queue. - * @bad_recv_wr: On an immediate failure, this parameter will reference - * the work request that failed to be posted on the QP. - */ -static inline int ib_post_recv(struct ib_qp *qp, - struct ib_recv_wr *recv_wr, - struct ib_recv_wr **bad_recv_wr) -{ - return qp->device->post_recv(qp, recv_wr, bad_recv_wr); -} - -/** - * ib_create_cq - Creates a CQ on the specified device. - * @device: The device on which to create the CQ. - * @comp_handler: A user-specified callback that is invoked when a - * completion event occurs on the CQ. - * @event_handler: A user-specified callback that is invoked when an - * asynchronous event not associated with a completion occurs on the CQ. - * @cq_context: Context associated with the CQ returned to the user via - * the associated completion and event handlers. - * @cqe: The minimum size of the CQ. - * - * Users can examine the cq structure to determine the actual CQ size. - */ -struct ib_cq *ib_create_cq(struct ib_device *device, - ib_comp_handler comp_handler, - void (*event_handler)(struct ib_event *, void *), - void *cq_context, int cqe); - -/** - * ib_resize_cq - Modifies the capacity of the CQ. - * @cq: The CQ to resize. - * @cqe: The minimum size of the CQ. - * - * Users can examine the cq structure to determine the actual CQ size. - */ -int ib_resize_cq(struct ib_cq *cq, int cqe); - -/** - * ib_destroy_cq - Destroys the specified CQ. - * @cq: The CQ to destroy. - */ -int ib_destroy_cq(struct ib_cq *cq); - -/** - * ib_poll_cq - poll a CQ for completion(s) - * @cq:the CQ being polled - * @num_entries:maximum number of completions to return - * @wc:array of at least @num_entries &struct ib_wc where completions - * will be returned - * - * Poll a CQ for (possibly multiple) completions. If the return value - * is < 0, an error occurred. If the return value is >= 0, it is the - * number of completions returned. If the return value is - * non-negative and < num_entries, then the CQ was emptied. - */ -static inline int ib_poll_cq(struct ib_cq *cq, int num_entries, - struct ib_wc *wc) -{ - return cq->device->poll_cq(cq, num_entries, wc); -} - -/** - * ib_peek_cq - Returns the number of unreaped completions currently - * on the specified CQ. - * @cq: The CQ to peek. - * @wc_cnt: A minimum number of unreaped completions to check for. - * - * If the number of unreaped completions is greater than or equal to wc_cnt, - * this function returns wc_cnt, otherwise, it returns the actual number of - * unreaped completions. - */ -int ib_peek_cq(struct ib_cq *cq, int wc_cnt); - -/** - * ib_req_notify_cq - Request completion notification on a CQ. - * @cq: The CQ to generate an event for. - * @cq_notify: If set to %IB_CQ_SOLICITED, completion notification will - * occur on the next solicited event. If set to %IB_CQ_NEXT_COMP, - * notification will occur on the next completion. - */ -static inline int ib_req_notify_cq(struct ib_cq *cq, - enum ib_cq_notify cq_notify) -{ - return cq->device->req_notify_cq(cq, cq_notify); -} - -/** - * ib_req_ncomp_notif - Request completion notification when there are - * at least the specified number of unreaped completions on the CQ. - * @cq: The CQ to generate an event for. - * @wc_cnt: The number of unreaped completions that should be on the - * CQ before an event is generated. - */ -static inline int ib_req_ncomp_notif(struct ib_cq *cq, int wc_cnt) -{ - return cq->device->req_ncomp_notif ? - cq->device->req_ncomp_notif(cq, wc_cnt) : - -ENOSYS; -} - -/** - * ib_get_dma_mr - Returns a memory region for system memory that is - * usable for DMA. - * @pd: The protection domain associated with the memory region. - * @mr_access_flags: Specifies the memory access rights. - */ -struct ib_mr *ib_get_dma_mr(struct ib_pd *pd, int mr_access_flags); - -/** - * ib_reg_phys_mr - Prepares a virtually addressed memory region for use - * by an HCA. - * @pd: The protection domain associated assigned to the registered region. - * @phys_buf_array: Specifies a list of physical buffers to use in the - * memory region. - * @num_phys_buf: Specifies the size of the phys_buf_array. - * @mr_access_flags: Specifies the memory access rights. - * @iova_start: The offset of the region's starting I/O virtual address. - */ -struct ib_mr *ib_reg_phys_mr(struct ib_pd *pd, - struct ib_phys_buf *phys_buf_array, - int num_phys_buf, - int mr_access_flags, - u64 *iova_start); - -/** - * ib_rereg_phys_mr - Modifies the attributes of an existing memory region. - * Conceptually, this call performs the functions deregister memory region - * followed by register physical memory region. Where possible, - * resources are reused instead of deallocated and reallocated. - * @mr: The memory region to modify. - * @mr_rereg_mask: A bit-mask used to indicate which of the following - * properties of the memory region are being modified. - * @pd: If %IB_MR_REREG_PD is set in mr_rereg_mask, this field specifies - * the new protection domain to associated with the memory region, - * otherwise, this parameter is ignored. - * @phys_buf_array: If %IB_MR_REREG_TRANS is set in mr_rereg_mask, this - * field specifies a list of physical buffers to use in the new - * translation, otherwise, this parameter is ignored. - * @num_phys_buf: If %IB_MR_REREG_TRANS is set in mr_rereg_mask, this - * field specifies the size of the phys_buf_array, otherwise, this - * parameter is ignored. - * @mr_access_flags: If %IB_MR_REREG_ACCESS is set in mr_rereg_mask, this - * field specifies the new memory access rights, otherwise, this - * parameter is ignored. - * @iova_start: The offset of the region's starting I/O virtual address. - */ -int ib_rereg_phys_mr(struct ib_mr *mr, - int mr_rereg_mask, - struct ib_pd *pd, - struct ib_phys_buf *phys_buf_array, - int num_phys_buf, - int mr_access_flags, - u64 *iova_start); - -/** - * ib_query_mr - Retrieves information about a specific memory region. - * @mr: The memory region to retrieve information about. - * @mr_attr: The attributes of the specified memory region. - */ -int ib_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr); - -/** - * ib_dereg_mr - Deregisters a memory region and removes it from the - * HCA translation table. - * @mr: The memory region to deregister. - */ -int ib_dereg_mr(struct ib_mr *mr); - -/** - * ib_alloc_mw - Allocates a memory window. - * @pd: The protection domain associated with the memory window. - */ -struct ib_mw *ib_alloc_mw(struct ib_pd *pd); - -/** - * ib_bind_mw - Posts a work request to the send queue of the specified - * QP, which binds the memory window to the given address range and - * remote access attributes. - * @qp: QP to post the bind work request on. - * @mw: The memory window to bind. - * @mw_bind: Specifies information about the memory window, including - * its address range, remote access rights, and associated memory region. - */ -static inline int ib_bind_mw(struct ib_qp *qp, - struct ib_mw *mw, - struct ib_mw_bind *mw_bind) -{ - /* XXX reference counting in corresponding MR? */ - return mw->device->bind_mw ? - mw->device->bind_mw(qp, mw, mw_bind) : - -ENOSYS; -} - -/** - * ib_dealloc_mw - Deallocates a memory window. - * @mw: The memory window to deallocate. - */ -int ib_dealloc_mw(struct ib_mw *mw); - -/** - * ib_alloc_fmr - Allocates a unmapped fast memory region. - * @pd: The protection domain associated with the unmapped region. - * @mr_access_flags: Specifies the memory access rights. - * @fmr_attr: Attributes of the unmapped region. - * - * A fast memory region must be mapped before it can be used as part of - * a work request. - */ -struct ib_fmr *ib_alloc_fmr(struct ib_pd *pd, - int mr_access_flags, - struct ib_fmr_attr *fmr_attr); - -/** - * ib_map_phys_fmr - Maps a list of physical pages to a fast memory region. - * @fmr: The fast memory region to associate with the pages. - * @page_list: An array of physical pages to map to the fast memory region. - * @list_len: The number of pages in page_list. - * @iova: The I/O virtual address to use with the mapped region. - */ -static inline int ib_map_phys_fmr(struct ib_fmr *fmr, - u64 *page_list, int list_len, - u64 iova) -{ - return fmr->device->map_phys_fmr(fmr, page_list, list_len, iova); -} - -/** - * ib_unmap_fmr - Removes the mapping from a list of fast memory regions. - * @fmr_list: A linked list of fast memory regions to unmap. - */ -int ib_unmap_fmr(struct list_head *fmr_list); - -/** - * ib_dealloc_fmr - Deallocates a fast memory region. - * @fmr: The fast memory region to deallocate. - */ -int ib_dealloc_fmr(struct ib_fmr *fmr); - -/** - * ib_attach_mcast - Attaches the specified QP to a multicast group. - * @qp: QP to attach to the multicast group. The QP must be type - * IB_QPT_UD. - * @gid: Multicast group GID. - * @lid: Multicast group LID in host byte order. - * - * In order to send and receive multicast packets, subnet - * administration must have created the multicast group and configured - * the fabric appropriately. The port associated with the specified - * QP must also be a member of the multicast group. - */ -int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid); - -/** - * ib_detach_mcast - Detaches the specified QP from a multicast group. - * @qp: QP to detach from the multicast group. - * @gid: Multicast group GID. - * @lid: Multicast group LID in host byte order. - */ -int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid); - -#endif /* IB_VERBS_H */ diff --git a/drivers/infiniband/ulp/ipoib/Makefile b/drivers/infiniband/ulp/ipoib/Makefile index 394bc08abc6f..8935e74ae3f8 100644 --- a/drivers/infiniband/ulp/ipoib/Makefile +++ b/drivers/infiniband/ulp/ipoib/Makefile @@ -1,5 +1,3 @@ -EXTRA_CFLAGS += -Idrivers/infiniband/include - obj-$(CONFIG_INFINIBAND_IPOIB) += ib_ipoib.o ib_ipoib-y := ipoib_main.o \ diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 04c98f54e9c4..bea960b8191f 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -1,5 +1,7 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2004 Voltaire, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -49,9 +51,9 @@ #include <asm/atomic.h> #include <asm/semaphore.h> -#include <ib_verbs.h> -#include <ib_pack.h> -#include <ib_sa.h> +#include <rdma/ib_verbs.h> +#include <rdma/ib_pack.h> +#include <rdma/ib_sa.h> /* constants */ @@ -88,8 +90,8 @@ enum { /* structs */ struct ipoib_header { - u16 proto; - u16 reserved; + __be16 proto; + u16 reserved; }; struct ipoib_pseudoheader { diff --git a/drivers/infiniband/ulp/ipoib/ipoib_fs.c b/drivers/infiniband/ulp/ipoib/ipoib_fs.c index a84e5fe0f193..38b150f775e7 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_fs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_fs.c @@ -97,7 +97,7 @@ static int ipoib_mcg_seq_show(struct seq_file *file, void *iter_ptr) for (n = 0, i = 0; i < sizeof mgid / 2; ++i) { n += sprintf(gid_buf + n, "%x", - be16_to_cpu(((u16 *)mgid.raw)[i])); + be16_to_cpu(((__be16 *) mgid.raw)[i])); if (i < sizeof mgid / 2 - 1) gid_buf[n++] = ':'; } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 8238766746b2..ef0e3894863c 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -1,5 +1,8 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -35,7 +38,7 @@ #include <linux/delay.h> #include <linux/dma-mapping.h> -#include <ib_cache.h> +#include <rdma/ib_cache.h> #include "ipoib.h" @@ -81,7 +84,7 @@ void ipoib_free_ah(struct kref *kref) unsigned long flags; - if (ah->last_send <= priv->tx_tail) { + if ((int) priv->tx_tail - (int) ah->last_send >= 0) { ipoib_dbg(priv, "Freeing ah %p\n", ah->ah); ib_destroy_ah(ah->ah); kfree(ah); @@ -355,7 +358,7 @@ static void __ipoib_reap_ah(struct net_device *dev) spin_lock_irq(&priv->lock); list_for_each_entry_safe(ah, tah, &priv->dead_ahs, list) - if (ah->last_send <= priv->tx_tail) { + if ((int) priv->tx_tail - (int) ah->last_send >= 0) { list_del(&ah->list); list_add_tail(&ah->list, &remove_list); } @@ -486,7 +489,7 @@ int ipoib_ib_dev_stop(struct net_device *dev) * assume the HW is wedged and just free up * all our pending work requests. */ - while (priv->tx_tail < priv->tx_head) { + while ((int) priv->tx_tail - (int) priv->tx_head < 0) { tx_req = &priv->tx_ring[priv->tx_tail & (IPOIB_TX_RING_SIZE - 1)]; dma_unmap_single(priv->ca->dma_device, diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 6f60abbaebd5..0e8ac138e355 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1,5 +1,7 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2004 Voltaire, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -34,7 +36,6 @@ #include "ipoib.h" -#include <linux/version.h> #include <linux/module.h> #include <linux/init.h> @@ -600,14 +601,15 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) ipoib_mcast_send(dev, (union ib_gid *) (phdr->hwaddr + 4), skb); } else { - /* unicast GID -- should be ARP reply */ + /* unicast GID -- should be ARP or RARP reply */ - if (be16_to_cpup((u16 *) skb->data) != ETH_P_ARP) { + if ((be16_to_cpup((__be16 *) skb->data) != ETH_P_ARP) && + (be16_to_cpup((__be16 *) skb->data) != ETH_P_RARP)) { ipoib_warn(priv, "Unicast, no %s: type %04x, QPN %06x " IPOIB_GID_FMT "\n", skb->dst ? "neigh" : "dst", - be16_to_cpup((u16 *) skb->data), - be32_to_cpup((u32 *) phdr->hwaddr), + be16_to_cpup((__be16 *) skb->data), + be32_to_cpup((__be32 *) phdr->hwaddr), IPOIB_GID_ARG(*(union ib_gid *) (phdr->hwaddr + 4))); dev_kfree_skb_any(skb); ++priv->stats.tx_dropped; @@ -670,7 +672,7 @@ static void ipoib_set_mcast_list(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); - schedule_work(&priv->restart_task); + queue_work(ipoib_workqueue, &priv->restart_task); } static void ipoib_neigh_destructor(struct neighbour *n) @@ -779,15 +781,11 @@ void ipoib_dev_cleanup(struct net_device *dev) ipoib_ib_dev_cleanup(dev); - if (priv->rx_ring) { - kfree(priv->rx_ring); - priv->rx_ring = NULL; - } + kfree(priv->rx_ring); + kfree(priv->tx_ring); - if (priv->tx_ring) { - kfree(priv->tx_ring); - priv->tx_ring = NULL; - } + priv->rx_ring = NULL; + priv->tx_ring = NULL; } static void ipoib_setup(struct net_device *dev) @@ -885,6 +883,12 @@ static ssize_t create_child(struct class_device *cdev, if (pkey < 0 || pkey > 0xffff) return -EINVAL; + /* + * Set the full membership bit, so that we join the right + * broadcast group, etc. + */ + pkey |= 0x8000; + ret = ipoib_vlan_add(container_of(cdev, struct net_device, class_dev), pkey); @@ -937,6 +941,12 @@ static struct net_device *ipoib_add_port(const char *format, goto alloc_mem_failed; } + /* + * Set the full membership bit, so that we join the right + * broadcast group, etc. + */ + priv->pkey |= 0x8000; + priv->dev->broadcast[8] = priv->pkey >> 8; priv->dev->broadcast[9] = priv->pkey & 0xff; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 70208c3d21e2..aca7aea18a69 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -1,5 +1,7 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2004 Voltaire, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -357,7 +359,7 @@ static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast) rec.mgid = mcast->mcmember.mgid; rec.port_gid = priv->local_gid; - rec.pkey = be16_to_cpu(priv->pkey); + rec.pkey = cpu_to_be16(priv->pkey); ret = ib_sa_mcmember_rec_set(priv->ca, priv->port, &rec, IB_SA_MCMEMBER_REC_MGID | @@ -457,7 +459,7 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast, rec.mgid = mcast->mcmember.mgid; rec.port_gid = priv->local_gid; - rec.pkey = be16_to_cpu(priv->pkey); + rec.pkey = cpu_to_be16(priv->pkey); comp_mask = IB_SA_MCMEMBER_REC_MGID | @@ -646,7 +648,7 @@ static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast) rec.mgid = mcast->mcmember.mgid; rec.port_gid = priv->local_gid; - rec.pkey = be16_to_cpu(priv->pkey); + rec.pkey = cpu_to_be16(priv->pkey); /* Remove ourselves from the multicast group */ ret = ipoib_mcast_detach(dev, be16_to_cpu(mcast->mcmember.mlid), diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index 4933edf062c2..79f59d0563ed 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -32,7 +33,7 @@ * $Id: ipoib_verbs.c 1349 2004-12-16 21:09:43Z roland $ */ -#include <ib_cache.h> +#include <rdma/ib_cache.h> #include "ipoib.h" diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c index 94b8ea812fef..332d730e60c2 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c @@ -32,7 +32,6 @@ * $Id: ipoib_vlan.c 1349 2004-12-16 21:09:43Z roland $ */ -#include <linux/version.h> #include <linux/module.h> #include <linux/init.h> |