Merge branches 'amso1100', 'cma', 'cxgb3', 'cxgb4', 'fdr', 'ipath', 'ipoib', 'misc...
authorRoland Dreier <roland@purestorage.com>
Tue, 1 Nov 2011 16:37:08 +0000 (09:37 -0700)
committerRoland Dreier <roland@purestorage.com>
Tue, 1 Nov 2011 16:37:08 +0000 (09:37 -0700)
74 files changed:
drivers/infiniband/core/cm.c
drivers/infiniband/core/cm_msgs.h
drivers/infiniband/core/cma.c
drivers/infiniband/core/mad.c
drivers/infiniband/core/sysfs.c
drivers/infiniband/core/ucm.c
drivers/infiniband/core/ucma.c
drivers/infiniband/core/user_mad.c
drivers/infiniband/core/uverbs.h
drivers/infiniband/core/uverbs_cmd.c
drivers/infiniband/core/uverbs_main.c
drivers/infiniband/core/verbs.c
drivers/infiniband/hw/amso1100/c2_ae.c
drivers/infiniband/hw/amso1100/c2_intr.c
drivers/infiniband/hw/amso1100/c2_provider.c
drivers/infiniband/hw/cxgb3/iwch_cm.c
drivers/infiniband/hw/cxgb3/iwch_ev.c
drivers/infiniband/hw/cxgb3/iwch_provider.c
drivers/infiniband/hw/cxgb3/iwch_provider.h
drivers/infiniband/hw/cxgb3/iwch_qp.c
drivers/infiniband/hw/cxgb4/cm.c
drivers/infiniband/hw/cxgb4/cq.c
drivers/infiniband/hw/cxgb4/device.c
drivers/infiniband/hw/cxgb4/ev.c
drivers/infiniband/hw/cxgb4/iw_cxgb4.h
drivers/infiniband/hw/cxgb4/qp.c
drivers/infiniband/hw/ehca/ehca_eq.c
drivers/infiniband/hw/ehca/ehca_qp.c
drivers/infiniband/hw/ipath/ipath_init_chip.c
drivers/infiniband/hw/ipath/ipath_srq.c
drivers/infiniband/hw/mlx4/main.c
drivers/infiniband/hw/mlx4/mlx4_ib.h
drivers/infiniband/hw/mlx4/qp.c
drivers/infiniband/hw/mlx4/srq.c
drivers/infiniband/hw/mthca/mthca_provider.c
drivers/infiniband/hw/nes/nes_cm.c
drivers/infiniband/hw/nes/nes_cm.h
drivers/infiniband/hw/nes/nes_verbs.h
drivers/infiniband/hw/qib/qib.h
drivers/infiniband/hw/qib/qib_driver.c
drivers/infiniband/hw/qib/qib_file_ops.c
drivers/infiniband/hw/qib/qib_iba6120.c
drivers/infiniband/hw/qib/qib_iba7220.c
drivers/infiniband/hw/qib/qib_iba7322.c
drivers/infiniband/hw/qib/qib_init.c
drivers/infiniband/hw/qib/qib_qp.c
drivers/infiniband/hw/qib/qib_qsfp.c
drivers/infiniband/hw/qib/qib_qsfp.h
drivers/infiniband/hw/qib/qib_rc.c
drivers/infiniband/hw/qib/qib_ruc.c
drivers/infiniband/hw/qib/qib_srq.c
drivers/infiniband/hw/qib/qib_sysfs.c
drivers/infiniband/hw/qib/qib_uc.c
drivers/infiniband/hw/qib/qib_verbs.c
drivers/infiniband/hw/qib/qib_verbs.h
drivers/infiniband/ulp/ipoib/ipoib_cm.c
drivers/infiniband/ulp/ipoib/ipoib_fs.c
drivers/net/mlx4/eq.c
drivers/net/mlx4/fw.c
drivers/net/mlx4/fw.h
drivers/net/mlx4/main.c
drivers/net/mlx4/mlx4.h
drivers/net/mlx4/mr.c
drivers/net/mlx4/pd.c
drivers/net/mlx4/port.c
drivers/net/mlx4/qp.c
drivers/net/mlx4/srq.c
include/linux/mlx4/device.h
include/linux/mlx4/qp.h
include/rdma/ib_user_verbs.h
include/rdma/ib_verbs.h
include/rdma/iw_cm.h
include/rdma/rdma_cm.h
include/rdma/rdma_user_cm.h

index fc0f2bd..4104ea2 100644 (file)
@@ -889,6 +889,8 @@ retest:
                break;
        case IB_CM_ESTABLISHED:
                spin_unlock_irq(&cm_id_priv->lock);
+               if (cm_id_priv->qp_type == IB_QPT_XRC_TGT)
+                       break;
                ib_send_cm_dreq(cm_id, NULL, 0);
                goto retest;
        case IB_CM_DREQ_SENT:
@@ -1008,7 +1010,6 @@ static void cm_format_req(struct cm_req_msg *req_msg,
        req_msg->service_id = param->service_id;
        req_msg->local_ca_guid = cm_id_priv->id.device->node_guid;
        cm_req_set_local_qpn(req_msg, cpu_to_be32(param->qp_num));
-       cm_req_set_resp_res(req_msg, param->responder_resources);
        cm_req_set_init_depth(req_msg, param->initiator_depth);
        cm_req_set_remote_resp_timeout(req_msg,
                                       param->remote_cm_response_timeout);
@@ -1017,12 +1018,16 @@ static void cm_format_req(struct cm_req_msg *req_msg,
        cm_req_set_starting_psn(req_msg, cpu_to_be32(param->starting_psn));
        cm_req_set_local_resp_timeout(req_msg,
                                      param->local_cm_response_timeout);
-       cm_req_set_retry_count(req_msg, param->retry_count);
        req_msg->pkey = param->primary_path->pkey;
        cm_req_set_path_mtu(req_msg, param->primary_path->mtu);
-       cm_req_set_rnr_retry_count(req_msg, param->rnr_retry_count);
        cm_req_set_max_cm_retries(req_msg, param->max_cm_retries);
-       cm_req_set_srq(req_msg, param->srq);
+
+       if (param->qp_type != IB_QPT_XRC_INI) {
+               cm_req_set_resp_res(req_msg, param->responder_resources);
+               cm_req_set_retry_count(req_msg, param->retry_count);
+               cm_req_set_rnr_retry_count(req_msg, param->rnr_retry_count);
+               cm_req_set_srq(req_msg, param->srq);
+       }
 
        if (pri_path->hop_limit <= 1) {
                req_msg->primary_local_lid = pri_path->slid;
@@ -1080,7 +1085,8 @@ static int cm_validate_req_param(struct ib_cm_req_param *param)
        if (!param->primary_path)
                return -EINVAL;
 
-       if (param->qp_type != IB_QPT_RC && param->qp_type != IB_QPT_UC)
+       if (param->qp_type != IB_QPT_RC && param->qp_type != IB_QPT_UC &&
+           param->qp_type != IB_QPT_XRC_INI)
                return -EINVAL;
 
        if (param->private_data &&
@@ -1601,18 +1607,24 @@ static void cm_format_rep(struct cm_rep_msg *rep_msg,
        cm_format_mad_hdr(&rep_msg->hdr, CM_REP_ATTR_ID, cm_id_priv->tid);
        rep_msg->local_comm_id = cm_id_priv->id.local_id;
        rep_msg->remote_comm_id = cm_id_priv->id.remote_id;
-       cm_rep_set_local_qpn(rep_msg, cpu_to_be32(param->qp_num));
        cm_rep_set_starting_psn(rep_msg, cpu_to_be32(param->starting_psn));
        rep_msg->resp_resources = param->responder_resources;
-       rep_msg->initiator_depth = param->initiator_depth;
        cm_rep_set_target_ack_delay(rep_msg,
                                    cm_id_priv->av.port->cm_dev->ack_delay);
        cm_rep_set_failover(rep_msg, param->failover_accepted);
-       cm_rep_set_flow_ctrl(rep_msg, param->flow_control);
        cm_rep_set_rnr_retry_count(rep_msg, param->rnr_retry_count);
-       cm_rep_set_srq(rep_msg, param->srq);
        rep_msg->local_ca_guid = cm_id_priv->id.device->node_guid;
 
+       if (cm_id_priv->qp_type != IB_QPT_XRC_TGT) {
+               rep_msg->initiator_depth = param->initiator_depth;
+               cm_rep_set_flow_ctrl(rep_msg, param->flow_control);
+               cm_rep_set_srq(rep_msg, param->srq);
+               cm_rep_set_local_qpn(rep_msg, cpu_to_be32(param->qp_num));
+       } else {
+               cm_rep_set_srq(rep_msg, 1);
+               cm_rep_set_local_eecn(rep_msg, cpu_to_be32(param->qp_num));
+       }
+
        if (param->private_data && param->private_data_len)
                memcpy(rep_msg->private_data, param->private_data,
                       param->private_data_len);
@@ -1660,7 +1672,7 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id,
        cm_id_priv->initiator_depth = param->initiator_depth;
        cm_id_priv->responder_resources = param->responder_resources;
        cm_id_priv->rq_psn = cm_rep_get_starting_psn(rep_msg);
-       cm_id_priv->local_qpn = cm_rep_get_local_qpn(rep_msg);
+       cm_id_priv->local_qpn = cpu_to_be32(param->qp_num & 0xFFFFFF);
 
 out:   spin_unlock_irqrestore(&cm_id_priv->lock, flags);
        return ret;
@@ -1731,7 +1743,7 @@ error:    spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 }
 EXPORT_SYMBOL(ib_send_cm_rtu);
 
-static void cm_format_rep_event(struct cm_work *work)
+static void cm_format_rep_event(struct cm_work *work, enum ib_qp_type qp_type)
 {
        struct cm_rep_msg *rep_msg;
        struct ib_cm_rep_event_param *param;
@@ -1740,7 +1752,7 @@ static void cm_format_rep_event(struct cm_work *work)
        param = &work->cm_event.param.rep_rcvd;
        param->remote_ca_guid = rep_msg->local_ca_guid;
        param->remote_qkey = be32_to_cpu(rep_msg->local_qkey);
-       param->remote_qpn = be32_to_cpu(cm_rep_get_local_qpn(rep_msg));
+       param->remote_qpn = be32_to_cpu(cm_rep_get_qpn(rep_msg, qp_type));
        param->starting_psn = be32_to_cpu(cm_rep_get_starting_psn(rep_msg));
        param->responder_resources = rep_msg->initiator_depth;
        param->initiator_depth = rep_msg->resp_resources;
@@ -1808,7 +1820,7 @@ static int cm_rep_handler(struct cm_work *work)
                return -EINVAL;
        }
 
-       cm_format_rep_event(work);
+       cm_format_rep_event(work, cm_id_priv->qp_type);
 
        spin_lock_irq(&cm_id_priv->lock);
        switch (cm_id_priv->id.state) {
@@ -1823,7 +1835,7 @@ static int cm_rep_handler(struct cm_work *work)
 
        cm_id_priv->timewait_info->work.remote_id = rep_msg->local_comm_id;
        cm_id_priv->timewait_info->remote_ca_guid = rep_msg->local_ca_guid;
-       cm_id_priv->timewait_info->remote_qpn = cm_rep_get_local_qpn(rep_msg);
+       cm_id_priv->timewait_info->remote_qpn = cm_rep_get_qpn(rep_msg, cm_id_priv->qp_type);
 
        spin_lock(&cm.lock);
        /* Check for duplicate REP. */
@@ -1850,7 +1862,7 @@ static int cm_rep_handler(struct cm_work *work)
 
        cm_id_priv->id.state = IB_CM_REP_RCVD;
        cm_id_priv->id.remote_id = rep_msg->local_comm_id;
-       cm_id_priv->remote_qpn = cm_rep_get_local_qpn(rep_msg);
+       cm_id_priv->remote_qpn = cm_rep_get_qpn(rep_msg, cm_id_priv->qp_type);
        cm_id_priv->initiator_depth = rep_msg->resp_resources;
        cm_id_priv->responder_resources = rep_msg->initiator_depth;
        cm_id_priv->sq_psn = cm_rep_get_starting_psn(rep_msg);
@@ -3492,7 +3504,8 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
                qp_attr->path_mtu = cm_id_priv->path_mtu;
                qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn);
                qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn);
-               if (cm_id_priv->qp_type == IB_QPT_RC) {
+               if (cm_id_priv->qp_type == IB_QPT_RC ||
+                   cm_id_priv->qp_type == IB_QPT_XRC_TGT) {
                        *qp_attr_mask |= IB_QP_MAX_DEST_RD_ATOMIC |
                                         IB_QP_MIN_RNR_TIMER;
                        qp_attr->max_dest_rd_atomic =
@@ -3537,15 +3550,21 @@ static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv,
                if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT) {
                        *qp_attr_mask = IB_QP_STATE | IB_QP_SQ_PSN;
                        qp_attr->sq_psn = be32_to_cpu(cm_id_priv->sq_psn);
-                       if (cm_id_priv->qp_type == IB_QPT_RC) {
-                               *qp_attr_mask |= IB_QP_TIMEOUT | IB_QP_RETRY_CNT |
-                                                IB_QP_RNR_RETRY |
+                       switch (cm_id_priv->qp_type) {
+                       case IB_QPT_RC:
+                       case IB_QPT_XRC_INI:
+                               *qp_attr_mask |= IB_QP_RETRY_CNT | IB_QP_RNR_RETRY |
                                                 IB_QP_MAX_QP_RD_ATOMIC;
-                               qp_attr->timeout = cm_id_priv->av.timeout;
                                qp_attr->retry_cnt = cm_id_priv->retry_count;
                                qp_attr->rnr_retry = cm_id_priv->rnr_retry_count;
-                               qp_attr->max_rd_atomic =
-                                       cm_id_priv->initiator_depth;
+                               qp_attr->max_rd_atomic = cm_id_priv->initiator_depth;
+                               /* fall through */
+                       case IB_QPT_XRC_TGT:
+                               *qp_attr_mask |= IB_QP_TIMEOUT;
+                               qp_attr->timeout = cm_id_priv->av.timeout;
+                               break;
+                       default:
+                               break;
                        }
                        if (cm_id_priv->alt_av.ah_attr.dlid) {
                                *qp_attr_mask |= IB_QP_PATH_MIG_STATE;
index 7e63c08..505db2a 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2004 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2004, 2011 Intel Corporation.  All rights reserved.
  * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
  * Copyright (c) 2004 Voltaire Corporation.  All rights reserved.
  *
@@ -86,7 +86,7 @@ struct cm_req_msg {
        __be16 pkey;
        /* path MTU:4, RDC exists:1, RNR retry count:3. */
        u8 offset50;
-       /* max CM Retries:4, SRQ:1, rsvd:3 */
+       /* max CM Retries:4, SRQ:1, extended transport type:3 */
        u8 offset51;
 
        __be16 primary_local_lid;
@@ -175,6 +175,11 @@ static inline enum ib_qp_type cm_req_get_qp_type(struct cm_req_msg *req_msg)
        switch(transport_type) {
        case 0: return IB_QPT_RC;
        case 1: return IB_QPT_UC;
+       case 3:
+               switch (req_msg->offset51 & 0x7) {
+               case 1: return IB_QPT_XRC_TGT;
+               default: return 0;
+               }
        default: return 0;
        }
 }
@@ -188,6 +193,12 @@ static inline void cm_req_set_qp_type(struct cm_req_msg *req_msg,
                                                  req_msg->offset40) &
                                                   0xFFFFFFF9) | 0x2);
                break;
+       case IB_QPT_XRC_INI:
+               req_msg->offset40 = cpu_to_be32((be32_to_cpu(
+                                                req_msg->offset40) &
+                                                  0xFFFFFFF9) | 0x6);
+               req_msg->offset51 = (req_msg->offset51 & 0xF8) | 1;
+               break;
        default:
                req_msg->offset40 = cpu_to_be32(be32_to_cpu(
                                                 req_msg->offset40) &
@@ -527,6 +538,23 @@ static inline void cm_rep_set_local_qpn(struct cm_rep_msg *rep_msg, __be32 qpn)
                            (be32_to_cpu(rep_msg->offset12) & 0x000000FF));
 }
 
+static inline __be32 cm_rep_get_local_eecn(struct cm_rep_msg *rep_msg)
+{
+       return cpu_to_be32(be32_to_cpu(rep_msg->offset16) >> 8);
+}
+
+static inline void cm_rep_set_local_eecn(struct cm_rep_msg *rep_msg, __be32 eecn)
+{
+       rep_msg->offset16 = cpu_to_be32((be32_to_cpu(eecn) << 8) |
+                           (be32_to_cpu(rep_msg->offset16) & 0x000000FF));
+}
+
+static inline __be32 cm_rep_get_qpn(struct cm_rep_msg *rep_msg, enum ib_qp_type qp_type)
+{
+       return (qp_type == IB_QPT_XRC_INI) ?
+               cm_rep_get_local_eecn(rep_msg) : cm_rep_get_local_qpn(rep_msg);
+}
+
 static inline __be32 cm_rep_get_starting_psn(struct cm_rep_msg *rep_msg)
 {
        return cpu_to_be32(be32_to_cpu(rep_msg->offset20) >> 8);
index ca4c5dc..872b184 100644 (file)
@@ -81,6 +81,7 @@ static DEFINE_IDR(sdp_ps);
 static DEFINE_IDR(tcp_ps);
 static DEFINE_IDR(udp_ps);
 static DEFINE_IDR(ipoib_ps);
+static DEFINE_IDR(ib_ps);
 
 struct cma_device {
        struct list_head        list;
@@ -1179,6 +1180,15 @@ static void cma_set_req_event_data(struct rdma_cm_event *event,
        event->param.conn.qp_num = req_data->remote_qpn;
 }
 
+static int cma_check_req_qp_type(struct rdma_cm_id *id, struct ib_cm_event *ib_event)
+{
+       return (((ib_event->event == IB_CM_REQ_RECEIVED) ||
+                (ib_event->param.req_rcvd.qp_type == id->qp_type)) ||
+               ((ib_event->event == IB_CM_SIDR_REQ_RECEIVED) &&
+                (id->qp_type == IB_QPT_UD)) ||
+               (!id->qp_type));
+}
+
 static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
 {
        struct rdma_id_private *listen_id, *conn_id;
@@ -1186,13 +1196,16 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
        int offset, ret;
 
        listen_id = cm_id->context;
+       if (!cma_check_req_qp_type(&listen_id->id, ib_event))
+               return -EINVAL;
+
        if (cma_disable_callback(listen_id, RDMA_CM_LISTEN))
                return -ECONNABORTED;
 
        memset(&event, 0, sizeof event);
        offset = cma_user_data_offset(listen_id->id.ps);
        event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
-       if (listen_id->id.qp_type == IB_QPT_UD) {
+       if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) {
                conn_id = cma_new_udp_id(&listen_id->id, ib_event);
                event.param.ud.private_data = ib_event->private_data + offset;
                event.param.ud.private_data_len =
@@ -1328,6 +1341,8 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
                switch (iw_event->status) {
                case 0:
                        event.event = RDMA_CM_EVENT_ESTABLISHED;
+                       event.param.conn.initiator_depth = iw_event->ird;
+                       event.param.conn.responder_resources = iw_event->ord;
                        break;
                case -ECONNRESET:
                case -ECONNREFUSED:
@@ -1343,6 +1358,8 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
                break;
        case IW_CM_EVENT_ESTABLISHED:
                event.event = RDMA_CM_EVENT_ESTABLISHED;
+               event.param.conn.initiator_depth = iw_event->ird;
+               event.param.conn.responder_resources = iw_event->ord;
                break;
        default:
                BUG_ON(1);
@@ -1433,8 +1450,8 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
        event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
        event.param.conn.private_data = iw_event->private_data;
        event.param.conn.private_data_len = iw_event->private_data_len;
-       event.param.conn.initiator_depth = attr.max_qp_init_rd_atom;
-       event.param.conn.responder_resources = attr.max_qp_rd_atom;
+       event.param.conn.initiator_depth = iw_event->ird;
+       event.param.conn.responder_resources = iw_event->ord;
 
        /*
         * Protect against the user destroying conn_id from another thread
@@ -2234,6 +2251,9 @@ static int cma_get_port(struct rdma_id_private *id_priv)
        case RDMA_PS_IPOIB:
                ps = &ipoib_ps;
                break;
+       case RDMA_PS_IB:
+               ps = &ib_ps;
+               break;
        default:
                return -EPROTONOSUPPORT;
        }
@@ -2569,7 +2589,7 @@ static int cma_connect_ib(struct rdma_id_private *id_priv,
        req.service_id = cma_get_service_id(id_priv->id.ps,
                                            (struct sockaddr *) &route->addr.dst_addr);
        req.qp_num = id_priv->qp_num;
-       req.qp_type = IB_QPT_RC;
+       req.qp_type = id_priv->id.qp_type;
        req.starting_psn = id_priv->seq_num;
        req.responder_resources = conn_param->responder_resources;
        req.initiator_depth = conn_param->initiator_depth;
@@ -2616,14 +2636,16 @@ static int cma_connect_iw(struct rdma_id_private *id_priv,
        if (ret)
                goto out;
 
-       iw_param.ord = conn_param->initiator_depth;
-       iw_param.ird = conn_param->responder_resources;
-       iw_param.private_data = conn_param->private_data;
-       iw_param.private_data_len = conn_param->private_data_len;
-       if (id_priv->id.qp)
+       if (conn_param) {
+               iw_param.ord = conn_param->initiator_depth;
+               iw_param.ird = conn_param->responder_resources;
+               iw_param.private_data = conn_param->private_data;
+               iw_param.private_data_len = conn_param->private_data_len;
+               iw_param.qpn = id_priv->id.qp ? id_priv->qp_num : conn_param->qp_num;
+       } else {
+               memset(&iw_param, 0, sizeof iw_param);
                iw_param.qpn = id_priv->qp_num;
-       else
-               iw_param.qpn = conn_param->qp_num;
+       }
        ret = iw_cm_connect(cm_id, &iw_param);
 out:
        if (ret) {
@@ -2765,14 +2787,20 @@ int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
 
        switch (rdma_node_get_transport(id->device->node_type)) {
        case RDMA_TRANSPORT_IB:
-               if (id->qp_type == IB_QPT_UD)
-                       ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS,
-                                               conn_param->private_data,
-                                               conn_param->private_data_len);
-               else if (conn_param)
-                       ret = cma_accept_ib(id_priv, conn_param);
-               else
-                       ret = cma_rep_recv(id_priv);
+               if (id->qp_type == IB_QPT_UD) {
+                       if (conn_param)
+                               ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS,
+                                                       conn_param->private_data,
+                                                       conn_param->private_data_len);
+                       else
+                               ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS,
+                                                       NULL, 0);
+               } else {
+                       if (conn_param)
+                               ret = cma_accept_ib(id_priv, conn_param);
+                       else
+                               ret = cma_rep_recv(id_priv);
+               }
                break;
        case RDMA_TRANSPORT_IWARP:
                ret = cma_accept_iw(id_priv, conn_param);
@@ -3460,6 +3488,7 @@ static void __exit cma_cleanup(void)
        idr_destroy(&tcp_ps);
        idr_destroy(&udp_ps);
        idr_destroy(&ipoib_ps);
+       idr_destroy(&ib_ps);
 }
 
 module_init(cma_init);
index b4d8672..0563892 100644 (file)
@@ -1596,6 +1596,9 @@ find_mad_agent(struct ib_mad_port_private *port_priv,
                                        mad->mad_hdr.class_version].class;
                        if (!class)
                                goto out;
+                       if (convert_mgmt_class(mad->mad_hdr.mgmt_class) >=
+                           IB_MGMT_MAX_METHODS)
+                               goto out;
                        method = class->method_table[convert_mgmt_class(
                                                        mad->mad_hdr.mgmt_class)];
                        if (method)
index 9ab5df7..2b59b72 100644 (file)
@@ -185,17 +185,35 @@ static ssize_t rate_show(struct ib_port *p, struct port_attribute *unused,
        if (ret)
                return ret;
 
+       rate = (25 * attr.active_speed) / 10;
+
        switch (attr.active_speed) {
-       case 2: speed = " DDR"; break;
-       case 4: speed = " QDR"; break;
+       case 2:
+               speed = " DDR";
+               break;
+       case 4:
+               speed = " QDR";
+               break;
+       case 8:
+               speed = " FDR10";
+               rate = 10;
+               break;
+       case 16:
+               speed = " FDR";
+               rate = 14;
+               break;
+       case 32:
+               speed = " EDR";
+               rate = 25;
+               break;
        }
 
-       rate = 25 * ib_width_enum_to_int(attr.active_width) * attr.active_speed;
+       rate *= ib_width_enum_to_int(attr.active_width);
        if (rate < 0)
                return -EINVAL;
 
        return sprintf(buf, "%d%s Gb/sec (%dX%s)\n",
-                      rate / 10, rate % 10 ? ".5" : "",
+                      rate, (attr.active_speed == 1) ? ".5" : "",
                       ib_width_enum_to_int(attr.active_width), speed);
 }
 
index 08f948d..b8a0b4a 100644 (file)
@@ -1122,7 +1122,7 @@ static ssize_t ib_ucm_write(struct file *filp, const char __user *buf,
        if (copy_from_user(&hdr, buf, sizeof(hdr)))
                return -EFAULT;
 
-       if (hdr.cmd < 0 || hdr.cmd >= ARRAY_SIZE(ucm_cmd_table))
+       if (hdr.cmd >= ARRAY_SIZE(ucm_cmd_table))
                return -EINVAL;
 
        if (hdr.in + sizeof(hdr) > len)
index 71be5ee..b69307f 100644 (file)
@@ -276,7 +276,7 @@ static int ucma_event_handler(struct rdma_cm_id *cm_id,
        ucma_set_event_context(ctx, event, uevent);
        uevent->resp.event = event->event;
        uevent->resp.status = event->status;
-       if (cm_id->ps == RDMA_PS_UDP || cm_id->ps == RDMA_PS_IPOIB)
+       if (cm_id->qp_type == IB_QPT_UD)
                ucma_copy_ud_event(&uevent->resp.param.ud, &event->param.ud);
        else
                ucma_copy_conn_event(&uevent->resp.param.conn,
@@ -377,6 +377,9 @@ static int ucma_get_qp_type(struct rdma_ucm_create_id *cmd, enum ib_qp_type *qp_
        case RDMA_PS_IPOIB:
                *qp_type = IB_QPT_UD;
                return 0;
+       case RDMA_PS_IB:
+               *qp_type = cmd->qp_type;
+               return 0;
        default:
                return -EINVAL;
        }
@@ -1270,7 +1273,7 @@ static ssize_t ucma_write(struct file *filp, const char __user *buf,
        if (copy_from_user(&hdr, buf, sizeof(hdr)))
                return -EFAULT;
 
-       if (hdr.cmd < 0 || hdr.cmd >= ARRAY_SIZE(ucma_cmd_table))
+       if (hdr.cmd >= ARRAY_SIZE(ucma_cmd_table))
                return -EINVAL;
 
        if (hdr.in + sizeof(hdr) > len)
index 8d261b6..07db229 100644 (file)
@@ -458,8 +458,7 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
                goto err;
        }
 
-       if (packet->mad.hdr.id < 0 ||
-           packet->mad.hdr.id >= IB_UMAD_MAX_AGENTS) {
+       if (packet->mad.hdr.id >= IB_UMAD_MAX_AGENTS) {
                ret = -EINVAL;
                goto err;
        }
@@ -703,7 +702,7 @@ static int ib_umad_unreg_agent(struct ib_umad_file *file, u32 __user *arg)
        mutex_lock(&file->port->file_mutex);
        mutex_lock(&file->mutex);
 
-       if (id < 0 || id >= IB_UMAD_MAX_AGENTS || !__get_agent(file, id)) {
+       if (id >= IB_UMAD_MAX_AGENTS || !__get_agent(file, id)) {
                ret = -EINVAL;
                goto out;
        }
index a078e56..5bcb2af 100644 (file)
@@ -76,6 +76,8 @@ struct ib_uverbs_device {
        struct ib_device                       *ib_dev;
        int                                     devnum;
        struct cdev                             cdev;
+       struct rb_root                          xrcd_tree;
+       struct mutex                            xrcd_tree_mutex;
 };
 
 struct ib_uverbs_event_file {
@@ -120,6 +122,16 @@ struct ib_uevent_object {
        u32                     events_reported;
 };
 
+struct ib_uxrcd_object {
+       struct ib_uobject       uobject;
+       atomic_t                refcnt;
+};
+
+struct ib_usrq_object {
+       struct ib_uevent_object uevent;
+       struct ib_uxrcd_object *uxrcd;
+};
+
 struct ib_uqp_object {
        struct ib_uevent_object uevent;
        struct list_head        mcast_list;
@@ -142,6 +154,7 @@ extern struct idr ib_uverbs_ah_idr;
 extern struct idr ib_uverbs_cq_idr;
 extern struct idr ib_uverbs_qp_idr;
 extern struct idr ib_uverbs_srq_idr;
+extern struct idr ib_uverbs_xrcd_idr;
 
 void idr_remove_uobj(struct idr *idp, struct ib_uobject *uobj);
 
@@ -161,6 +174,7 @@ void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr);
 void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr);
 void ib_uverbs_event_handler(struct ib_event_handler *handler,
                             struct ib_event *event);
+void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, struct ib_xrcd *xrcd);
 
 #define IB_UVERBS_DECLARE_CMD(name)                                    \
        ssize_t ib_uverbs_##name(struct ib_uverbs_file *file,           \
@@ -181,6 +195,7 @@ IB_UVERBS_DECLARE_CMD(poll_cq);
 IB_UVERBS_DECLARE_CMD(req_notify_cq);
 IB_UVERBS_DECLARE_CMD(destroy_cq);
 IB_UVERBS_DECLARE_CMD(create_qp);
+IB_UVERBS_DECLARE_CMD(open_qp);
 IB_UVERBS_DECLARE_CMD(query_qp);
 IB_UVERBS_DECLARE_CMD(modify_qp);
 IB_UVERBS_DECLARE_CMD(destroy_qp);
@@ -195,5 +210,8 @@ IB_UVERBS_DECLARE_CMD(create_srq);
 IB_UVERBS_DECLARE_CMD(modify_srq);
 IB_UVERBS_DECLARE_CMD(query_srq);
 IB_UVERBS_DECLARE_CMD(destroy_srq);
+IB_UVERBS_DECLARE_CMD(create_xsrq);
+IB_UVERBS_DECLARE_CMD(open_xrcd);
+IB_UVERBS_DECLARE_CMD(close_xrcd);
 
 #endif /* UVERBS_H */
index c426992..254f164 100644 (file)
@@ -47,6 +47,7 @@ static struct lock_class_key cq_lock_key;
 static struct lock_class_key qp_lock_key;
 static struct lock_class_key ah_lock_key;
 static struct lock_class_key srq_lock_key;
+static struct lock_class_key xrcd_lock_key;
 
 #define INIT_UDATA(udata, ibuf, obuf, ilen, olen)                      \
        do {                                                            \
@@ -255,6 +256,18 @@ static void put_srq_read(struct ib_srq *srq)
        put_uobj_read(srq->uobject);
 }
 
+static struct ib_xrcd *idr_read_xrcd(int xrcd_handle, struct ib_ucontext *context,
+                                    struct ib_uobject **uobj)
+{
+       *uobj = idr_read_uobj(&ib_uverbs_xrcd_idr, xrcd_handle, context, 0);
+       return *uobj ? (*uobj)->object : NULL;
+}
+
+static void put_xrcd_read(struct ib_uobject *uobj)
+{
+       put_uobj_read(uobj);
+}
+
 ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
                              const char __user *buf,
                              int in_len, int out_len)
@@ -298,6 +311,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
        INIT_LIST_HEAD(&ucontext->qp_list);
        INIT_LIST_HEAD(&ucontext->srq_list);
        INIT_LIST_HEAD(&ucontext->ah_list);
+       INIT_LIST_HEAD(&ucontext->xrcd_list);
        ucontext->closing = 0;
 
        resp.num_comp_vectors = file->device->num_comp_vectors;
@@ -579,6 +593,310 @@ ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file,
        return in_len;
 }
 
+struct xrcd_table_entry {
+       struct rb_node  node;
+       struct ib_xrcd *xrcd;
+       struct inode   *inode;
+};
+
+static int xrcd_table_insert(struct ib_uverbs_device *dev,
+                           struct inode *inode,
+                           struct ib_xrcd *xrcd)
+{
+       struct xrcd_table_entry *entry, *scan;
+       struct rb_node **p = &dev->xrcd_tree.rb_node;
+       struct rb_node *parent = NULL;
+
+       entry = kmalloc(sizeof *entry, GFP_KERNEL);
+       if (!entry)
+               return -ENOMEM;
+
+       entry->xrcd  = xrcd;
+       entry->inode = inode;
+
+       while (*p) {
+               parent = *p;
+               scan = rb_entry(parent, struct xrcd_table_entry, node);
+
+               if (inode < scan->inode) {
+                       p = &(*p)->rb_left;
+               } else if (inode > scan->inode) {
+                       p = &(*p)->rb_right;
+               } else {
+                       kfree(entry);
+                       return -EEXIST;
+               }
+       }
+
+       rb_link_node(&entry->node, parent, p);
+       rb_insert_color(&entry->node, &dev->xrcd_tree);
+       igrab(inode);
+       return 0;
+}
+
+static struct xrcd_table_entry *xrcd_table_search(struct ib_uverbs_device *dev,
+                                                 struct inode *inode)
+{
+       struct xrcd_table_entry *entry;
+       struct rb_node *p = dev->xrcd_tree.rb_node;
+
+       while (p) {
+               entry = rb_entry(p, struct xrcd_table_entry, node);
+
+               if (inode < entry->inode)
+                       p = p->rb_left;
+               else if (inode > entry->inode)
+                       p = p->rb_right;
+               else
+                       return entry;
+       }
+
+       return NULL;
+}
+
+static struct ib_xrcd *find_xrcd(struct ib_uverbs_device *dev, struct inode *inode)
+{
+       struct xrcd_table_entry *entry;
+
+       entry = xrcd_table_search(dev, inode);
+       if (!entry)
+               return NULL;
+
+       return entry->xrcd;
+}
+
+static void xrcd_table_delete(struct ib_uverbs_device *dev,
+                             struct inode *inode)
+{
+       struct xrcd_table_entry *entry;
+
+       entry = xrcd_table_search(dev, inode);
+       if (entry) {
+               iput(inode);
+               rb_erase(&entry->node, &dev->xrcd_tree);
+               kfree(entry);
+       }
+}
+
+ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
+                           const char __user *buf, int in_len,
+                           int out_len)
+{
+       struct ib_uverbs_open_xrcd      cmd;
+       struct ib_uverbs_open_xrcd_resp resp;
+       struct ib_udata                 udata;
+       struct ib_uxrcd_object         *obj;
+       struct ib_xrcd                 *xrcd = NULL;
+       struct file                    *f = NULL;
+       struct inode                   *inode = NULL;
+       int                             ret = 0;
+       int                             new_xrcd = 0;
+
+       if (out_len < sizeof resp)
+               return -ENOSPC;
+
+       if (copy_from_user(&cmd, buf, sizeof cmd))
+               return -EFAULT;
+
+       INIT_UDATA(&udata, buf + sizeof cmd,
+                  (unsigned long) cmd.response + sizeof resp,
+                  in_len - sizeof cmd, out_len - sizeof  resp);
+
+       mutex_lock(&file->device->xrcd_tree_mutex);
+
+       if (cmd.fd != -1) {
+               /* search for file descriptor */
+               f = fget(cmd.fd);
+               if (!f) {
+                       ret = -EBADF;
+                       goto err_tree_mutex_unlock;
+               }
+
+               inode = f->f_dentry->d_inode;
+               if (!inode) {
+                       ret = -EBADF;
+                       goto err_tree_mutex_unlock;
+               }
+
+               xrcd = find_xrcd(file->device, inode);
+               if (!xrcd && !(cmd.oflags & O_CREAT)) {
+                       /* no file descriptor. Need CREATE flag */
+                       ret = -EAGAIN;
+                       goto err_tree_mutex_unlock;
+               }
+
+               if (xrcd && cmd.oflags & O_EXCL) {
+                       ret = -EINVAL;
+                       goto err_tree_mutex_unlock;
+               }
+       }
+
+       obj = kmalloc(sizeof *obj, GFP_KERNEL);
+       if (!obj) {
+               ret = -ENOMEM;
+               goto err_tree_mutex_unlock;
+       }
+
+       init_uobj(&obj->uobject, 0, file->ucontext, &xrcd_lock_key);
+
+       down_write(&obj->uobject.mutex);
+
+       if (!xrcd) {
+               xrcd = file->device->ib_dev->alloc_xrcd(file->device->ib_dev,
+                                                       file->ucontext, &udata);
+               if (IS_ERR(xrcd)) {
+                       ret = PTR_ERR(xrcd);
+                       goto err;
+               }
+
+               xrcd->inode   = inode;
+               xrcd->device  = file->device->ib_dev;
+               atomic_set(&xrcd->usecnt, 0);
+               mutex_init(&xrcd->tgt_qp_mutex);
+               INIT_LIST_HEAD(&xrcd->tgt_qp_list);
+               new_xrcd = 1;
+       }
+
+       atomic_set(&obj->refcnt, 0);
+       obj->uobject.object = xrcd;
+       ret = idr_add_uobj(&ib_uverbs_xrcd_idr, &obj->uobject);
+       if (ret)
+               goto err_idr;
+
+       memset(&resp, 0, sizeof resp);
+       resp.xrcd_handle = obj->uobject.id;
+
+       if (inode) {
+               if (new_xrcd) {
+                       /* create new inode/xrcd table entry */
+                       ret = xrcd_table_insert(file->device, inode, xrcd);
+                       if (ret)
+                               goto err_insert_xrcd;
+               }
+               atomic_inc(&xrcd->usecnt);
+       }
+
+       if (copy_to_user((void __user *) (unsigned long) cmd.response,
+                        &resp, sizeof resp)) {
+               ret = -EFAULT;
+               goto err_copy;
+       }
+
+       if (f)
+               fput(f);
+
+       mutex_lock(&file->mutex);
+       list_add_tail(&obj->uobject.list, &file->ucontext->xrcd_list);
+       mutex_unlock(&file->mutex);
+
+       obj->uobject.live = 1;
+       up_write(&obj->uobject.mutex);
+
+       mutex_unlock(&file->device->xrcd_tree_mutex);
+       return in_len;
+
+err_copy:
+       if (inode) {
+               if (new_xrcd)
+                       xrcd_table_delete(file->device, inode);
+               atomic_dec(&xrcd->usecnt);
+       }
+
+err_insert_xrcd:
+       idr_remove_uobj(&ib_uverbs_xrcd_idr, &obj->uobject);
+
+err_idr:
+       ib_dealloc_xrcd(xrcd);
+
+err:
+       put_uobj_write(&obj->uobject);
+
+err_tree_mutex_unlock:
+       if (f)
+               fput(f);
+
+       mutex_unlock(&file->device->xrcd_tree_mutex);
+
+       return ret;
+}
+
+ssize_t ib_uverbs_close_xrcd(struct ib_uverbs_file *file,
+                            const char __user *buf, int in_len,
+                            int out_len)
+{
+       struct ib_uverbs_close_xrcd cmd;
+       struct ib_uobject           *uobj;
+       struct ib_xrcd              *xrcd = NULL;
+       struct inode                *inode = NULL;
+       struct ib_uxrcd_object      *obj;
+       int                         live;
+       int                         ret = 0;
+
+       if (copy_from_user(&cmd, buf, sizeof cmd))
+               return -EFAULT;
+
+       mutex_lock(&file->device->xrcd_tree_mutex);
+       uobj = idr_write_uobj(&ib_uverbs_xrcd_idr, cmd.xrcd_handle, file->ucontext);
+       if (!uobj) {
+               ret = -EINVAL;
+               goto out;
+       }
+
+       xrcd  = uobj->object;
+       inode = xrcd->inode;
+       obj   = container_of(uobj, struct ib_uxrcd_object, uobject);
+       if (atomic_read(&obj->refcnt)) {
+               put_uobj_write(uobj);
+               ret = -EBUSY;
+               goto out;
+       }
+
+       if (!inode || atomic_dec_and_test(&xrcd->usecnt)) {
+               ret = ib_dealloc_xrcd(uobj->object);
+               if (!ret)
+                       uobj->live = 0;
+       }
+
+       live = uobj->live;
+       if (inode && ret)
+               atomic_inc(&xrcd->usecnt);
+
+       put_uobj_write(uobj);
+
+       if (ret)
+               goto out;
+
+       if (inode && !live)
+               xrcd_table_delete(file->device, inode);
+
+       idr_remove_uobj(&ib_uverbs_xrcd_idr, uobj);
+       mutex_lock(&file->mutex);
+       list_del(&uobj->list);
+       mutex_unlock(&file->mutex);
+
+       put_uobj(uobj);
+       ret = in_len;
+
+out:
+       mutex_unlock(&file->device->xrcd_tree_mutex);
+       return ret;
+}
+
+void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev,
+                           struct ib_xrcd *xrcd)
+{
+       struct inode *inode;
+
+       inode = xrcd->inode;
+       if (inode && !atomic_dec_and_test(&xrcd->usecnt))
+               return;
+
+       ib_dealloc_xrcd(xrcd);
+
+       if (inode)
+               xrcd_table_delete(dev, inode);
+}
+
 ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
                         const char __user *buf, int in_len,
                         int out_len)
@@ -1052,9 +1370,12 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
        struct ib_uverbs_create_qp_resp resp;
        struct ib_udata                 udata;
        struct ib_uqp_object           *obj;
-       struct ib_pd                   *pd;
-       struct ib_cq                   *scq, *rcq;
-       struct ib_srq                  *srq;
+       struct ib_device               *device;
+       struct ib_pd                   *pd = NULL;
+       struct ib_xrcd                 *xrcd = NULL;
+       struct ib_uobject              *uninitialized_var(xrcd_uobj);
+       struct ib_cq                   *scq = NULL, *rcq = NULL;
+       struct ib_srq                  *srq = NULL;
        struct ib_qp                   *qp;
        struct ib_qp_init_attr          attr;
        int ret;
@@ -1076,15 +1397,39 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
        init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_key);
        down_write(&obj->uevent.uobject.mutex);
 
-       srq = cmd.is_srq ? idr_read_srq(cmd.srq_handle, file->ucontext) : NULL;
-       pd  = idr_read_pd(cmd.pd_handle, file->ucontext);
-       scq = idr_read_cq(cmd.send_cq_handle, file->ucontext, 0);
-       rcq = cmd.recv_cq_handle == cmd.send_cq_handle ?
-               scq : idr_read_cq(cmd.recv_cq_handle, file->ucontext, 1);
+       if (cmd.qp_type == IB_QPT_XRC_TGT) {
+               xrcd = idr_read_xrcd(cmd.pd_handle, file->ucontext, &xrcd_uobj);
+               if (!xrcd) {
+                       ret = -EINVAL;
+                       goto err_put;
+               }
+               device = xrcd->device;
+       } else {
+               pd  = idr_read_pd(cmd.pd_handle, file->ucontext);
+               scq = idr_read_cq(cmd.send_cq_handle, file->ucontext, 0);
+               if (!pd || !scq) {
+                       ret = -EINVAL;
+                       goto err_put;
+               }
 
-       if (!pd || !scq || !rcq || (cmd.is_srq && !srq)) {
-               ret = -EINVAL;
-               goto err_put;
+               if (cmd.qp_type == IB_QPT_XRC_INI) {
+                       cmd.max_recv_wr = cmd.max_recv_sge = 0;
+               } else {
+                       if (cmd.is_srq) {
+                               srq = idr_read_srq(cmd.srq_handle, file->ucontext);
+                               if (!srq || srq->srq_type != IB_SRQT_BASIC) {
+                                       ret = -EINVAL;
+                                       goto err_put;
+                               }
+                       }
+                       rcq = (cmd.recv_cq_handle == cmd.send_cq_handle) ?
+                              scq : idr_read_cq(cmd.recv_cq_handle, file->ucontext, 1);
+                       if (!rcq) {
+                               ret = -EINVAL;
+                               goto err_put;
+                       }
+               }
+               device = pd->device;
        }
 
        attr.event_handler = ib_uverbs_qp_event_handler;
@@ -1092,6 +1437,7 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
        attr.send_cq       = scq;
        attr.recv_cq       = rcq;
        attr.srq           = srq;
+       attr.xrcd          = xrcd;
        attr.sq_sig_type   = cmd.sq_sig_all ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
        attr.qp_type       = cmd.qp_type;
        attr.create_flags  = 0;
@@ -1106,26 +1452,34 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
        INIT_LIST_HEAD(&obj->uevent.event_list);
        INIT_LIST_HEAD(&obj->mcast_list);
 
-       qp = pd->device->create_qp(pd, &attr, &udata);
+       if (cmd.qp_type == IB_QPT_XRC_TGT)
+               qp = ib_create_qp(pd, &attr);
+       else
+               qp = device->create_qp(pd, &attr, &udata);
+
        if (IS_ERR(qp)) {
                ret = PTR_ERR(qp);
                goto err_put;
        }
 
-       qp->device        = pd->device;
-       qp->pd            = pd;
-       qp->send_cq       = attr.send_cq;
-       qp->recv_cq       = attr.recv_cq;
-       qp->srq           = attr.srq;
-       qp->uobject       = &obj->uevent.uobject;
-       qp->event_handler = attr.event_handler;
-       qp->qp_context    = attr.qp_context;
-       qp->qp_type       = attr.qp_type;
-       atomic_inc(&pd->usecnt);
-       atomic_inc(&attr.send_cq->usecnt);
-       atomic_inc(&attr.recv_cq->usecnt);
-       if (attr.srq)
-               atomic_inc(&attr.srq->usecnt);
+       if (cmd.qp_type != IB_QPT_XRC_TGT) {
+               qp->real_qp       = qp;
+               qp->device        = device;
+               qp->pd            = pd;
+               qp->send_cq       = attr.send_cq;
+               qp->recv_cq       = attr.recv_cq;
+               qp->srq           = attr.srq;
+               qp->event_handler = attr.event_handler;
+               qp->qp_context    = attr.qp_context;
+               qp->qp_type       = attr.qp_type;
+               atomic_inc(&pd->usecnt);
+               atomic_inc(&attr.send_cq->usecnt);
+               if (attr.recv_cq)
+                       atomic_inc(&attr.recv_cq->usecnt);
+               if (attr.srq)
+                       atomic_inc(&attr.srq->usecnt);
+       }
+       qp->uobject = &obj->uevent.uobject;
 
        obj->uevent.uobject.object = qp;
        ret = idr_add_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
@@ -1147,9 +1501,13 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
                goto err_copy;
        }
 
-       put_pd_read(pd);
-       put_cq_read(scq);
-       if (rcq != scq)
+       if (xrcd)
+               put_xrcd_read(xrcd_uobj);
+       if (pd)
+               put_pd_read(pd);
+       if (scq)
+               put_cq_read(scq);
+       if (rcq && rcq != scq)
                put_cq_read(rcq);
        if (srq)
                put_srq_read(srq);
@@ -1171,6 +1529,8 @@ err_destroy:
        ib_destroy_qp(qp);
 
 err_put:
+       if (xrcd)
+               put_xrcd_read(xrcd_uobj);
        if (pd)
                put_pd_read(pd);
        if (scq)
@@ -1184,6 +1544,98 @@ err_put:
        return ret;
 }
 
+ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
+                         const char __user *buf, int in_len, int out_len)
+{
+       struct ib_uverbs_open_qp        cmd;
+       struct ib_uverbs_create_qp_resp resp;
+       struct ib_udata                 udata;
+       struct ib_uqp_object           *obj;
+       struct ib_xrcd                 *xrcd;
+       struct ib_uobject              *uninitialized_var(xrcd_uobj);
+       struct ib_qp                   *qp;
+       struct ib_qp_open_attr          attr;
+       int ret;
+
+       if (out_len < sizeof resp)
+               return -ENOSPC;
+
+       if (copy_from_user(&cmd, buf, sizeof cmd))
+               return -EFAULT;
+
+       INIT_UDATA(&udata, buf + sizeof cmd,
+                  (unsigned long) cmd.response + sizeof resp,
+                  in_len - sizeof cmd, out_len - sizeof resp);
+
+       obj = kmalloc(sizeof *obj, GFP_KERNEL);
+       if (!obj)
+               return -ENOMEM;
+
+       init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_key);
+       down_write(&obj->uevent.uobject.mutex);
+
+       xrcd = idr_read_xrcd(cmd.pd_handle, file->ucontext, &xrcd_uobj);
+       if (!xrcd) {
+               ret = -EINVAL;
+               goto err_put;
+       }
+
+       attr.event_handler = ib_uverbs_qp_event_handler;
+       attr.qp_context    = file;
+       attr.qp_num        = cmd.qpn;
+       attr.qp_type       = cmd.qp_type;
+
+       obj->uevent.events_reported = 0;
+       INIT_LIST_HEAD(&obj->uevent.event_list);
+       INIT_LIST_HEAD(&obj->mcast_list);
+
+       qp = ib_open_qp(xrcd, &attr);
+       if (IS_ERR(qp)) {
+               ret = PTR_ERR(qp);
+               goto err_put;
+       }
+
+       qp->uobject = &obj->uevent.uobject;
+
+       obj->uevent.uobject.object = qp;
+       ret = idr_add_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
+       if (ret)
+               goto err_destroy;
+
+       memset(&resp, 0, sizeof resp);
+       resp.qpn       = qp->qp_num;
+       resp.qp_handle = obj->uevent.uobject.id;
+
+       if (copy_to_user((void __user *) (unsigned long) cmd.response,
+                        &resp, sizeof resp)) {
+               ret = -EFAULT;
+               goto err_remove;
+       }
+
+       put_xrcd_read(xrcd_uobj);
+
+       mutex_lock(&file->mutex);
+       list_add_tail(&obj->uevent.uobject.list, &file->ucontext->qp_list);
+       mutex_unlock(&file->mutex);
+
+       obj->uevent.uobject.live = 1;
+
+       up_write(&obj->uevent.uobject.mutex);
+
+       return in_len;
+
+err_remove:
+       idr_remove_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
+
+err_destroy:
+       ib_destroy_qp(qp);
+
+err_put:
+       put_xrcd_read(xrcd_uobj);
+       put_uobj_write(&obj->uevent.uobject);
+       return ret;
+}
+
 ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
                           const char __user *buf, int in_len,
                           int out_len)
@@ -1284,6 +1736,20 @@ out:
        return ret ? ret : in_len;
 }
 
+/* Remove ignored fields set in the attribute mask */
+static int modify_qp_mask(enum ib_qp_type qp_type, int mask)
+{
+       switch (qp_type) {
+       case IB_QPT_XRC_INI:
+               return mask & ~(IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER);
+       case IB_QPT_XRC_TGT:
+               return mask & ~(IB_QP_MAX_QP_RD_ATOMIC | IB_QP_RETRY_CNT |
+                               IB_QP_RNR_RETRY);
+       default:
+               return mask;
+       }
+}
+
 ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
                            const char __user *buf, int in_len,
                            int out_len)
@@ -1356,7 +1822,12 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
        attr->alt_ah_attr.ah_flags          = cmd.alt_dest.is_global ? IB_AH_GRH : 0;
        attr->alt_ah_attr.port_num          = cmd.alt_dest.port_num;
 
-       ret = qp->device->modify_qp(qp, attr, cmd.attr_mask, &udata);
+       if (qp->real_qp == qp) {
+               ret = qp->device->modify_qp(qp, attr,
+                       modify_qp_mask(qp->qp_type, cmd.attr_mask), &udata);
+       } else {
+               ret = ib_modify_qp(qp, attr, modify_qp_mask(qp->qp_type, cmd.attr_mask));
+       }
 
        put_qp_read(qp);
 
@@ -1553,7 +2024,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
        }
 
        resp.bad_wr = 0;
-       ret = qp->device->post_send(qp, wr, &bad_wr);
+       ret = qp->device->post_send(qp->real_qp, wr, &bad_wr);
        if (ret)
                for (next = wr; next; next = next->next) {
                        ++resp.bad_wr;
@@ -1691,7 +2162,7 @@ ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file,
                goto out;
 
        resp.bad_wr = 0;
-       ret = qp->device->post_recv(qp, wr, &bad_wr);
+       ret = qp->device->post_recv(qp->real_qp, wr, &bad_wr);
 
        put_qp_read(qp);
 
@@ -1975,107 +2446,199 @@ out_put:
        return ret ? ret : in_len;
 }
 
-ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file,
-                            const char __user *buf, int in_len,
-                            int out_len)
+int __uverbs_create_xsrq(struct ib_uverbs_file *file,
+                        struct ib_uverbs_create_xsrq *cmd,
+                        struct ib_udata *udata)
 {
-       struct ib_uverbs_create_srq      cmd;
        struct ib_uverbs_create_srq_resp resp;
-       struct ib_udata                  udata;
-       struct ib_uevent_object         *obj;
+       struct ib_usrq_object           *obj;
        struct ib_pd                    *pd;
        struct ib_srq                   *srq;
+       struct ib_uobject               *uninitialized_var(xrcd_uobj);
        struct ib_srq_init_attr          attr;
        int ret;
 
-       if (out_len < sizeof resp)
-               return -ENOSPC;
-
-       if (copy_from_user(&cmd, buf, sizeof cmd))
-               return -EFAULT;
-
-       INIT_UDATA(&udata, buf + sizeof cmd,
-                  (unsigned long) cmd.response + sizeof resp,
-                  in_len - sizeof cmd, out_len - sizeof resp);
-
        obj = kmalloc(sizeof *obj, GFP_KERNEL);
        if (!obj)
                return -ENOMEM;
 
-       init_uobj(&obj->uobject, cmd.user_handle, file->ucontext, &srq_lock_key);
-       down_write(&obj->uobject.mutex);
+       init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext, &srq_lock_key);
+       down_write(&obj->uevent.uobject.mutex);
 
-       pd  = idr_read_pd(cmd.pd_handle, file->ucontext);
+       pd  = idr_read_pd(cmd->pd_handle, file->ucontext);
        if (!pd) {
                ret = -EINVAL;
                goto err;
        }
 
+       if (cmd->srq_type == IB_SRQT_XRC) {
+               attr.ext.xrc.cq  = idr_read_cq(cmd->cq_handle, file->ucontext, 0);
+               if (!attr.ext.xrc.cq) {
+                       ret = -EINVAL;
+                       goto err_put_pd;
+               }
+
+               attr.ext.xrc.xrcd  = idr_read_xrcd(cmd->xrcd_handle, file->ucontext, &xrcd_uobj);
+               if (!attr.ext.xrc.xrcd) {
+                       ret = -EINVAL;
+                       goto err_put_cq;
+               }
+
+               obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject);
+               atomic_inc(&obj->uxrcd->refcnt);
+       }
+
        attr.event_handler  = ib_uverbs_srq_event_handler;
        attr.srq_context    = file;
-       attr.attr.max_wr    = cmd.max_wr;
-       attr.attr.max_sge   = cmd.max_sge;
-       attr.attr.srq_limit = cmd.srq_limit;
+       attr.srq_type       = cmd->srq_type;
+       attr.attr.max_wr    = cmd->max_wr;
+       attr.attr.max_sge   = cmd->max_sge;
+       attr.attr.srq_limit = cmd->srq_limit;
 
-       obj->events_reported     = 0;
-       INIT_LIST_HEAD(&obj->event_list);
+       obj->uevent.events_reported = 0;
+       INIT_LIST_HEAD(&obj->uevent.event_list);
 
-       srq = pd->device->create_srq(pd, &attr, &udata);
+       srq = pd->device->create_srq(pd, &attr, udata);
        if (IS_ERR(srq)) {
                ret = PTR_ERR(srq);
                goto err_put;
        }
 
-       srq->device        = pd->device;
-       srq->pd            = pd;
-       srq->uobject       = &obj->uobject;
+       srq->device        = pd->device;
+       srq->pd            = pd;
+       srq->srq_type      = cmd->srq_type;
+       srq->uobject       = &obj->uevent.uobject;
        srq->event_handler = attr.event_handler;
        srq->srq_context   = attr.srq_context;
+
+       if (cmd->srq_type == IB_SRQT_XRC) {
+               srq->ext.xrc.cq   = attr.ext.xrc.cq;
+               srq->ext.xrc.xrcd = attr.ext.xrc.xrcd;
+               atomic_inc(&attr.ext.xrc.cq->usecnt);
+               atomic_inc(&attr.ext.xrc.xrcd->usecnt);
+       }
+
        atomic_inc(&pd->usecnt);
        atomic_set(&srq->usecnt, 0);
 
-       obj->uobject.object = srq;
-       ret = idr_add_uobj(&ib_uverbs_srq_idr, &obj->uobject);
+       obj->uevent.uobject.object = srq;
+       ret = idr_add_uobj(&ib_uverbs_srq_idr, &obj->uevent.uobject);
        if (ret)
                goto err_destroy;
 
        memset(&resp, 0, sizeof resp);
-       resp.srq_handle = obj->uobject.id;
+       resp.srq_handle = obj->uevent.uobject.id;
        resp.max_wr     = attr.attr.max_wr;
        resp.max_sge    = attr.attr.max_sge;
+       if (cmd->srq_type == IB_SRQT_XRC)
+               resp.srqn = srq->ext.xrc.srq_num;
 
-       if (copy_to_user((void __user *) (unsigned long) cmd.response,
+       if (copy_to_user((void __user *) (unsigned long) cmd->response,
                         &resp, sizeof resp)) {
                ret = -EFAULT;
                goto err_copy;
        }
 
+       if (cmd->srq_type == IB_SRQT_XRC) {
+               put_uobj_read(xrcd_uobj);
+               put_cq_read(attr.ext.xrc.cq);
+       }
        put_pd_read(pd);
 
        mutex_lock(&file->mutex);
-       list_add_tail(&obj->uobject.list, &file->ucontext->srq_list);
+       list_add_tail(&obj->uevent.uobject.list, &file->ucontext->srq_list);
        mutex_unlock(&file->mutex);
 
-       obj->uobject.live = 1;
+       obj->uevent.uobject.live = 1;
 
-       up_write(&obj->uobject.mutex);
+       up_write(&obj->uevent.uobject.mutex);
 
-       return in_len;
+       return 0;
 
 err_copy:
-       idr_remove_uobj(&ib_uverbs_srq_idr, &obj->uobject);
+       idr_remove_uobj(&ib_uverbs_srq_idr, &obj->uevent.uobject);
 
 err_destroy:
        ib_destroy_srq(srq);
 
 err_put:
+       if (cmd->srq_type == IB_SRQT_XRC) {
+               atomic_dec(&obj->uxrcd->refcnt);
+               put_uobj_read(xrcd_uobj);
+       }
+
+err_put_cq:
+       if (cmd->srq_type == IB_SRQT_XRC)
+               put_cq_read(attr.ext.xrc.cq);
+
+err_put_pd:
        put_pd_read(pd);
 
 err:
-       put_uobj_write(&obj->uobject);
+       put_uobj_write(&obj->uevent.uobject);
        return ret;
 }
 
+ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file,
+                            const char __user *buf, int in_len,
+                            int out_len)
+{
+       struct ib_uverbs_create_srq      cmd;
+       struct ib_uverbs_create_xsrq     xcmd;
+       struct ib_uverbs_create_srq_resp resp;
+       struct ib_udata                  udata;
+       int ret;
+
+       if (out_len < sizeof resp)
+               return -ENOSPC;
+
+       if (copy_from_user(&cmd, buf, sizeof cmd))
+               return -EFAULT;
+
+       xcmd.response    = cmd.response;
+       xcmd.user_handle = cmd.user_handle;
+       xcmd.srq_type    = IB_SRQT_BASIC;
+       xcmd.pd_handle   = cmd.pd_handle;
+       xcmd.max_wr      = cmd.max_wr;
+       xcmd.max_sge     = cmd.max_sge;
+       xcmd.srq_limit   = cmd.srq_limit;
+
+       INIT_UDATA(&udata, buf + sizeof cmd,
+                  (unsigned long) cmd.response + sizeof resp,
+                  in_len - sizeof cmd, out_len - sizeof resp);
+
+       ret = __uverbs_create_xsrq(file, &xcmd, &udata);
+       if (ret)
+               return ret;
+
+       return in_len;
+}
+
+ssize_t ib_uverbs_create_xsrq(struct ib_uverbs_file *file,
+                             const char __user *buf, int in_len, int out_len)
+{
+       struct ib_uverbs_create_xsrq     cmd;
+       struct ib_uverbs_create_srq_resp resp;
+       struct ib_udata                  udata;
+       int ret;
+
+       if (out_len < sizeof resp)
+               return -ENOSPC;
+
+       if (copy_from_user(&cmd, buf, sizeof cmd))
+               return -EFAULT;
+
+       INIT_UDATA(&udata, buf + sizeof cmd,
+                  (unsigned long) cmd.response + sizeof resp,
+                  in_len - sizeof cmd, out_len - sizeof resp);
+
+       ret = __uverbs_create_xsrq(file, &cmd, &udata);
+       if (ret)
+               return ret;
+
+       return in_len;
+}
+
 ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file,
                             const char __user *buf, int in_len,
                             int out_len)
index 56898b6..8796367 100644 (file)
@@ -72,6 +72,7 @@ DEFINE_IDR(ib_uverbs_ah_idr);
 DEFINE_IDR(ib_uverbs_cq_idr);
 DEFINE_IDR(ib_uverbs_qp_idr);
 DEFINE_IDR(ib_uverbs_srq_idr);
+DEFINE_IDR(ib_uverbs_xrcd_idr);
 
 static DEFINE_SPINLOCK(map_lock);
 static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES);
@@ -107,6 +108,10 @@ static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
        [IB_USER_VERBS_CMD_MODIFY_SRQ]          = ib_uverbs_modify_srq,
        [IB_USER_VERBS_CMD_QUERY_SRQ]           = ib_uverbs_query_srq,
        [IB_USER_VERBS_CMD_DESTROY_SRQ]         = ib_uverbs_destroy_srq,
+       [IB_USER_VERBS_CMD_OPEN_XRCD]           = ib_uverbs_open_xrcd,
+       [IB_USER_VERBS_CMD_CLOSE_XRCD]          = ib_uverbs_close_xrcd,
+       [IB_USER_VERBS_CMD_CREATE_XSRQ]         = ib_uverbs_create_xsrq,
+       [IB_USER_VERBS_CMD_OPEN_QP]             = ib_uverbs_open_qp
 };
 
 static void ib_uverbs_add_one(struct ib_device *device);
@@ -202,8 +207,12 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
                        container_of(uobj, struct ib_uqp_object, uevent.uobject);
 
                idr_remove_uobj(&ib_uverbs_qp_idr, uobj);
-               ib_uverbs_detach_umcast(qp, uqp);
-               ib_destroy_qp(qp);
+               if (qp != qp->real_qp) {
+                       ib_close_qp(qp);
+               } else {
+                       ib_uverbs_detach_umcast(qp, uqp);
+                       ib_destroy_qp(qp);
+               }
                ib_uverbs_release_uevent(file, &uqp->uevent);
                kfree(uqp);
        }
@@ -241,6 +250,18 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
                kfree(uobj);
        }
 
+       mutex_lock(&file->device->xrcd_tree_mutex);
+       list_for_each_entry_safe(uobj, tmp, &context->xrcd_list, list) {
+               struct ib_xrcd *xrcd = uobj->object;
+               struct ib_uxrcd_object *uxrcd =
+                       container_of(uobj, struct ib_uxrcd_object, uobject);
+
+               idr_remove_uobj(&ib_uverbs_xrcd_idr, uobj);
+               ib_uverbs_dealloc_xrcd(file->device, xrcd);
+               kfree(uxrcd);
+       }
+       mutex_unlock(&file->device->xrcd_tree_mutex);
+
        list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) {
                struct ib_pd *pd = uobj->object;
 
@@ -557,8 +578,7 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
        if (hdr.in_words * 4 != count)
                return -EINVAL;
 
-       if (hdr.command < 0                             ||
-           hdr.command >= ARRAY_SIZE(uverbs_cmd_table) ||
+       if (hdr.command >= ARRAY_SIZE(uverbs_cmd_table) ||
            !uverbs_cmd_table[hdr.command])
                return -EINVAL;
 
@@ -741,6 +761,8 @@ static void ib_uverbs_add_one(struct ib_device *device)
 
        kref_init(&uverbs_dev->ref);
        init_completion(&uverbs_dev->comp);
+       uverbs_dev->xrcd_tree = RB_ROOT;
+       mutex_init(&uverbs_dev->xrcd_tree_mutex);
 
        spin_lock(&map_lock);
        devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES);
index af7a8b0..4251750 100644 (file)
@@ -39,6 +39,7 @@
 #include <linux/errno.h>
 #include <linux/err.h>
 #include <linux/string.h>
+#include <linux/slab.h>
 
 #include <rdma/ib_verbs.h>
 #include <rdma/ib_cache.h>
@@ -77,6 +78,31 @@ enum ib_rate mult_to_ib_rate(int mult)
 }
 EXPORT_SYMBOL(mult_to_ib_rate);
 
+int ib_rate_to_mbps(enum ib_rate rate)
+{
+       switch (rate) {
+       case IB_RATE_2_5_GBPS: return 2500;
+       case IB_RATE_5_GBPS:   return 5000;
+       case IB_RATE_10_GBPS:  return 10000;
+       case IB_RATE_20_GBPS:  return 20000;
+       case IB_RATE_30_GBPS:  return 30000;
+       case IB_RATE_40_GBPS:  return 40000;
+       case IB_RATE_60_GBPS:  return 60000;
+       case IB_RATE_80_GBPS:  return 80000;
+       case IB_RATE_120_GBPS: return 120000;
+       case IB_RATE_14_GBPS:  return 14062;
+       case IB_RATE_56_GBPS:  return 56250;
+       case IB_RATE_112_GBPS: return 112500;
+       case IB_RATE_168_GBPS: return 168750;
+       case IB_RATE_25_GBPS:  return 25781;
+       case IB_RATE_100_GBPS: return 103125;
+       case IB_RATE_200_GBPS: return 206250;
+       case IB_RATE_300_GBPS: return 309375;
+       default:               return -1;
+       }
+}
+EXPORT_SYMBOL(ib_rate_to_mbps);
+
 enum rdma_transport_type
 rdma_node_get_transport(enum rdma_node_type node_type)
 {
@@ -250,6 +276,13 @@ struct ib_srq *ib_create_srq(struct ib_pd *pd,
                srq->uobject       = NULL;
                srq->event_handler = srq_init_attr->event_handler;
                srq->srq_context   = srq_init_attr->srq_context;
+               srq->srq_type      = srq_init_attr->srq_type;
+               if (srq->srq_type == IB_SRQT_XRC) {
+                       srq->ext.xrc.xrcd = srq_init_attr->ext.xrc.xrcd;
+                       srq->ext.xrc.cq   = srq_init_attr->ext.xrc.cq;
+                       atomic_inc(&srq->ext.xrc.xrcd->usecnt);
+                       atomic_inc(&srq->ext.xrc.cq->usecnt);
+               }
                atomic_inc(&pd->usecnt);
                atomic_set(&srq->usecnt, 0);
        }
@@ -279,16 +312,29 @@ EXPORT_SYMBOL(ib_query_srq);
 int ib_destroy_srq(struct ib_srq *srq)
 {
        struct ib_pd *pd;
+       enum ib_srq_type srq_type;
+       struct ib_xrcd *uninitialized_var(xrcd);
+       struct ib_cq *uninitialized_var(cq);
        int ret;
 
        if (atomic_read(&srq->usecnt))
                return -EBUSY;
 
        pd = srq->pd;
+       srq_type = srq->srq_type;
+       if (srq_type == IB_SRQT_XRC) {
+               xrcd = srq->ext.xrc.xrcd;
+               cq = srq->ext.xrc.cq;
+       }
 
        ret = srq->device->destroy_srq(srq);
-       if (!ret)
+       if (!ret) {
                atomic_dec(&pd->usecnt);
+               if (srq_type == IB_SRQT_XRC) {
+                       atomic_dec(&xrcd->usecnt);
+                       atomic_dec(&cq->usecnt);
+               }
+       }
 
        return ret;
 }
@@ -296,28 +342,123 @@ EXPORT_SYMBOL(ib_destroy_srq);
 
 /* Queue pairs */
 
+static void __ib_shared_qp_event_handler(struct ib_event *event, void *context)
+{
+       struct ib_qp *qp = context;
+
+       list_for_each_entry(event->element.qp, &qp->open_list, open_list)
+               event->element.qp->event_handler(event, event->element.qp->qp_context);
+}
+
+static void __ib_insert_xrcd_qp(struct ib_xrcd *xrcd, struct ib_qp *qp)
+{
+       mutex_lock(&xrcd->tgt_qp_mutex);
+       list_add(&qp->xrcd_list, &xrcd->tgt_qp_list);
+       mutex_unlock(&xrcd->tgt_qp_mutex);
+}
+
+static struct ib_qp *__ib_open_qp(struct ib_qp *real_qp,
+                                 void (*event_handler)(struct ib_event *, void *),
+                                 void *qp_context)
+{
+       struct ib_qp *qp;
+       unsigned long flags;
+
+       qp = kzalloc(sizeof *qp, GFP_KERNEL);
+       if (!qp)
+               return ERR_PTR(-ENOMEM);
+
+       qp->real_qp = real_qp;
+       atomic_inc(&real_qp->usecnt);
+       qp->device = real_qp->device;
+       qp->event_handler = event_handler;
+       qp->qp_context = qp_context;
+       qp->qp_num = real_qp->qp_num;
+       qp->qp_type = real_qp->qp_type;
+
+       spin_lock_irqsave(&real_qp->device->event_handler_lock, flags);
+       list_add(&qp->open_list, &real_qp->open_list);
+       spin_unlock_irqrestore(&real_qp->device->event_handler_lock, flags);
+
+       return qp;
+}
+
+struct ib_qp *ib_open_qp(struct ib_xrcd *xrcd,
+                        struct ib_qp_open_attr *qp_open_attr)
+{
+       struct ib_qp *qp, *real_qp;
+
+       if (qp_open_attr->qp_type != IB_QPT_XRC_TGT)
+               return ERR_PTR(-EINVAL);
+
+       qp = ERR_PTR(-EINVAL);
+       mutex_lock(&xrcd->tgt_qp_mutex);
+       list_for_each_entry(real_qp, &xrcd->tgt_qp_list, xrcd_list) {
+               if (real_qp->qp_num == qp_open_attr->qp_num) {
+                       qp = __ib_open_qp(real_qp, qp_open_attr->event_handler,
+                                         qp_open_attr->qp_context);
+                       break;
+               }
+       }
+       mutex_unlock(&xrcd->tgt_qp_mutex);
+       return qp;
+}
+EXPORT_SYMBOL(ib_open_qp);
+
 struct ib_qp *ib_create_qp(struct ib_pd *pd,
                           struct ib_qp_init_attr *qp_init_attr)
 {
-       struct ib_qp *qp;
+       struct ib_qp *qp, *real_qp;
+       struct ib_device *device;
 
-       qp = pd->device->create_qp(pd, qp_init_attr, NULL);
+       device = pd ? pd->device : qp_init_attr->xrcd->device;
+       qp = device->create_qp(pd, qp_init_attr, NULL);
 
        if (!IS_ERR(qp)) {
-               qp->device        = pd->device;
-               qp->pd            = pd;
-               qp->send_cq       = qp_init_attr->send_cq;
-               qp->recv_cq       = qp_init_attr->recv_cq;
-               qp->srq           = qp_init_attr->srq;
-               qp->uobject       = NULL;
-               qp->event_handler = qp_init_attr->event_handler;
-               qp->qp_context    = qp_init_attr->qp_context;
-               qp->qp_type       = qp_init_attr->qp_type;
-               atomic_inc(&pd->usecnt);
-               atomic_inc(&qp_init_attr->send_cq->usecnt);
-               atomic_inc(&qp_init_attr->recv_cq->usecnt);
-               if (qp_init_attr->srq)
-                       atomic_inc(&qp_init_attr->srq->usecnt);
+               qp->device     = device;
+               qp->real_qp    = qp;
+               qp->uobject    = NULL;
+               qp->qp_type    = qp_init_attr->qp_type;
+
+               if (qp_init_attr->qp_type == IB_QPT_XRC_TGT) {
+                       qp->event_handler = __ib_shared_qp_event_handler;
+                       qp->qp_context = qp;
+                       qp->pd = NULL;
+                       qp->send_cq = qp->recv_cq = NULL;
+                       qp->srq = NULL;
+                       qp->xrcd = qp_init_attr->xrcd;
+                       atomic_inc(&qp_init_attr->xrcd->usecnt);
+                       INIT_LIST_HEAD(&qp->open_list);
+                       atomic_set(&qp->usecnt, 0);
+
+                       real_qp = qp;
+                       qp = __ib_open_qp(real_qp, qp_init_attr->event_handler,
+                                         qp_init_attr->qp_context);
+                       if (!IS_ERR(qp))
+                               __ib_insert_xrcd_qp(qp_init_attr->xrcd, real_qp);
+                       else
+                               real_qp->device->destroy_qp(real_qp);
+               } else {
+                       qp->event_handler = qp_init_attr->event_handler;
+                       qp->qp_context = qp_init_attr->qp_context;
+                       if (qp_init_attr->qp_type == IB_QPT_XRC_INI) {
+                               qp->recv_cq = NULL;
+                               qp->srq = NULL;
+                       } else {
+                               qp->recv_cq = qp_init_attr->recv_cq;
+                               atomic_inc(&qp_init_attr->recv_cq->usecnt);
+                               qp->srq = qp_init_attr->srq;
+                               if (qp->srq)
+                                       atomic_inc(&qp_init_attr->srq->usecnt);
+                       }
+
+                       qp->pd      = pd;
+                       qp->send_cq = qp_init_attr->send_cq;
+                       qp->xrcd    = NULL;
+
+                       atomic_inc(&pd->usecnt);
+                       atomic_inc(&qp_init_attr->send_cq->usecnt);
+               }
        }
 
        return qp;
@@ -326,8 +467,8 @@ EXPORT_SYMBOL(ib_create_qp);
 
 static const struct {
        int                     valid;
-       enum ib_qp_attr_mask    req_param[IB_QPT_RAW_ETHERTYPE + 1];
-       enum ib_qp_attr_mask    opt_param[IB_QPT_RAW_ETHERTYPE + 1];
+       enum ib_qp_attr_mask    req_param[IB_QPT_MAX];
+       enum ib_qp_attr_mask    opt_param[IB_QPT_MAX];
 } qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = {
        [IB_QPS_RESET] = {
                [IB_QPS_RESET] = { .valid = 1 },
@@ -343,6 +484,12 @@ static const struct {
                                [IB_QPT_RC]  = (IB_QP_PKEY_INDEX                |
                                                IB_QP_PORT                      |
                                                IB_QP_ACCESS_FLAGS),
+                               [IB_QPT_XRC_INI] = (IB_QP_PKEY_INDEX            |
+                                               IB_QP_PORT                      |
+                                               IB_QP_ACCESS_FLAGS),
+                               [IB_QPT_XRC_TGT] = (IB_QP_PKEY_INDEX            |
+                                               IB_QP_PORT                      |
+                                               IB_QP_ACCESS_FLAGS),
                                [IB_QPT_SMI] = (IB_QP_PKEY_INDEX                |
                                                IB_QP_QKEY),
                                [IB_QPT_GSI] = (IB_QP_PKEY_INDEX                |
@@ -365,6 +512,12 @@ static const struct {
                                [IB_QPT_RC]  = (IB_QP_PKEY_INDEX                |
                                                IB_QP_PORT                      |
                                                IB_QP_ACCESS_FLAGS),
+                               [IB_QPT_XRC_INI] = (IB_QP_PKEY_INDEX            |
+                                               IB_QP_PORT                      |
+                                               IB_QP_ACCESS_FLAGS),
+                               [IB_QPT_XRC_TGT] = (IB_QP_PKEY_INDEX            |
+                                               IB_QP_PORT                      |
+                                               IB_QP_ACCESS_FLAGS),
                                [IB_QPT_SMI] = (IB_QP_PKEY_INDEX                |
                                                IB_QP_QKEY),
                                [IB_QPT_GSI] = (IB_QP_PKEY_INDEX                |
@@ -384,6 +537,16 @@ static const struct {
                                                IB_QP_RQ_PSN                    |
                                                IB_QP_MAX_DEST_RD_ATOMIC        |
                                                IB_QP_MIN_RNR_TIMER),
+                               [IB_QPT_XRC_INI] = (IB_QP_AV                    |
+                                               IB_QP_PATH_MTU                  |
+                                               IB_QP_DEST_QPN                  |
+                                               IB_QP_RQ_PSN),
+                               [IB_QPT_XRC_TGT] = (IB_QP_AV                    |
+                                               IB_QP_PATH_MTU                  |
+                                               IB_QP_DEST_QPN                  |
+                                               IB_QP_RQ_PSN                    |
+                                               IB_QP_MAX_DEST_RD_ATOMIC        |
+                                               IB_QP_MIN_RNR_TIMER),
                        },
                        .opt_param = {
                                 [IB_QPT_UD]  = (IB_QP_PKEY_INDEX               |
@@ -394,6 +557,12 @@ static const struct {
                                 [IB_QPT_RC]  = (IB_QP_ALT_PATH                 |
                                                 IB_QP_ACCESS_FLAGS             |
                                                 IB_QP_PKEY_INDEX),
+                                [IB_QPT_XRC_INI] = (IB_QP_ALT_PATH             |
+                                                IB_QP_ACCESS_FLAGS             |
+                                                IB_QP_PKEY_INDEX),
+                                [IB_QPT_XRC_TGT] = (IB_QP_ALT_PATH             |
+                                                IB_QP_ACCESS_FLAGS             |
+                                                IB_QP_PKEY_INDEX),
                                 [IB_QPT_SMI] = (IB_QP_PKEY_INDEX               |
                                                 IB_QP_QKEY),
                                 [IB_QPT_GSI] = (IB_QP_PKEY_INDEX               |
@@ -414,6 +583,13 @@ static const struct {
                                                IB_QP_RNR_RETRY                 |
                                                IB_QP_SQ_PSN                    |
                                                IB_QP_MAX_QP_RD_ATOMIC),
+                               [IB_QPT_XRC_INI] = (IB_QP_TIMEOUT               |
+                                               IB_QP_RETRY_CNT                 |
+                                               IB_QP_RNR_RETRY                 |
+                                               IB_QP_SQ_PSN                    |
+                                               IB_QP_MAX_QP_RD_ATOMIC),
+                               [IB_QPT_XRC_TGT] = (IB_QP_TIMEOUT               |
+                                               IB_QP_SQ_PSN),
                                [IB_QPT_SMI] = IB_QP_SQ_PSN,
                                [IB_QPT_GSI] = IB_QP_SQ_PSN,
                        },
@@ -429,6 +605,15 @@ static const struct {
                                                 IB_QP_ACCESS_FLAGS             |
                                                 IB_QP_MIN_RNR_TIMER            |
                                                 IB_QP_PATH_MIG_STATE),
+                                [IB_QPT_XRC_INI] = (IB_QP_CUR_STATE            |
+                                                IB_QP_ALT_PATH                 |
+                                                IB_QP_ACCESS_FLAGS             |
+                                                IB_QP_PATH_MIG_STATE),
+                                [IB_QPT_XRC_TGT] = (IB_QP_CUR_STATE            |
+                                                IB_QP_ALT_PATH                 |
+                                                IB_QP_ACCESS_FLAGS             |
+                                                IB_QP_MIN_RNR_TIMER            |
+                                                IB_QP_PATH_MIG_STATE),
                                 [IB_QPT_SMI] = (IB_QP_CUR_STATE                |
                                                 IB_QP_QKEY),
                                 [IB_QPT_GSI] = (IB_QP_CUR_STATE                |
@@ -453,6 +638,15 @@ static const struct {
                                                IB_QP_ALT_PATH                  |
                                                IB_QP_PATH_MIG_STATE            |
                                                IB_QP_MIN_RNR_TIMER),
+                               [IB_QPT_XRC_INI] = (IB_QP_CUR_STATE             |
+                                               IB_QP_ACCESS_FLAGS              |
+                                               IB_QP_ALT_PATH                  |
+                                               IB_QP_PATH_MIG_STATE),
+                               [IB_QPT_XRC_TGT] = (IB_QP_CUR_STATE             |
+                                               IB_QP_ACCESS_FLAGS              |
+                                               IB_QP_ALT_PATH                  |
+                                               IB_QP_PATH_MIG_STATE            |
+                                               IB_QP_MIN_RNR_TIMER),
                                [IB_QPT_SMI] = (IB_QP_CUR_STATE                 |
                                                IB_QP_QKEY),
                                [IB_QPT_GSI] = (IB_QP_CUR_STATE                 |
@@ -465,6 +659,8 @@ static const struct {
                                [IB_QPT_UD]  = IB_QP_EN_SQD_ASYNC_NOTIFY,
                                [IB_QPT_UC]  = IB_QP_EN_SQD_ASYNC_NOTIFY,
                                [IB_QPT_RC]  = IB_QP_EN_SQD_ASYNC_NOTIFY,
+                               [IB_QPT_XRC_INI] = IB_QP_EN_SQD_ASYNC_NOTIFY,
+                               [IB_QPT_XRC_TGT] = IB_QP_EN_SQD_ASYNC_NOTIFY, /* ??? */
                                [IB_QPT_SMI] = IB_QP_EN_SQD_ASYNC_NOTIFY,
                                [IB_QPT_GSI] = IB_QP_EN_SQD_ASYNC_NOTIFY
                        }
@@ -487,6 +683,15 @@ static const struct {
                                                IB_QP_ACCESS_FLAGS              |
                                                IB_QP_MIN_RNR_TIMER             |
                                                IB_QP_PATH_MIG_STATE),
+                               [IB_QPT_XRC_INI] = (IB_QP_CUR_STATE             |
+                                               IB_QP_ALT_PATH                  |
+                                               IB_QP_ACCESS_FLAGS              |
+                                               IB_QP_PATH_MIG_STATE),
+                               [IB_QPT_XRC_TGT] = (IB_QP_CUR_STATE             |
+                                               IB_QP_ALT_PATH                  |
+                                               IB_QP_ACCESS_FLAGS              |
+                                               IB_QP_MIN_RNR_TIMER             |
+                                               IB_QP_PATH_MIG_STATE),
                                [IB_QPT_SMI] = (IB_QP_CUR_STATE                 |
                                                IB_QP_QKEY),
                                [IB_QPT_GSI] = (IB_QP_CUR_STATE                 |
@@ -515,6 +720,25 @@ static const struct {
                                                IB_QP_PKEY_INDEX                |
                                                IB_QP_MIN_RNR_TIMER             |
                                                IB_QP_PATH_MIG_STATE),
+                               [IB_QPT_XRC_INI] = (IB_QP_PORT                  |
+                                               IB_QP_AV                        |
+                                               IB_QP_TIMEOUT                   |
+                                               IB_QP_RETRY_CNT                 |
+                                               IB_QP_RNR_RETRY                 |
+                                               IB_QP_MAX_QP_RD_ATOMIC          |
+                                               IB_QP_ALT_PATH                  |
+                                               IB_QP_ACCESS_FLAGS              |
+                                               IB_QP_PKEY_INDEX                |
+                                               IB_QP_PATH_MIG_STATE),
+                               [IB_QPT_XRC_TGT] = (IB_QP_PORT                  |
+                                               IB_QP_AV                        |
+                                               IB_QP_TIMEOUT                   |
+                                               IB_QP_MAX_DEST_RD_ATOMIC        |
+                                               IB_QP_ALT_PATH                  |
+                                               IB_QP_ACCESS_FLAGS              |
+                                               IB_QP_PKEY_INDEX                |
+                                               IB_QP_MIN_RNR_TIMER             |
+                                               IB_QP_PATH_MIG_STATE),
                                [IB_QPT_SMI] = (IB_QP_PKEY_INDEX                |
                                                IB_QP_QKEY),
                                [IB_QPT_GSI] = (IB_QP_PKEY_INDEX                |
@@ -579,7 +803,7 @@ int ib_modify_qp(struct ib_qp *qp,
                 struct ib_qp_attr *qp_attr,
                 int qp_attr_mask)
 {
-       return qp->device->modify_qp(qp, qp_attr, qp_attr_mask, NULL);
+       return qp->device->modify_qp(qp->real_qp, qp_attr, qp_attr_mask, NULL);
 }
 EXPORT_SYMBOL(ib_modify_qp);
 
@@ -589,11 +813,59 @@ int ib_query_qp(struct ib_qp *qp,
                struct ib_qp_init_attr *qp_init_attr)
 {
        return qp->device->query_qp ?
-               qp->device->query_qp(qp, qp_attr, qp_attr_mask, qp_init_attr) :
+               qp->device->query_qp(qp->real_qp, qp_attr, qp_attr_mask, qp_init_attr) :
                -ENOSYS;
 }
 EXPORT_SYMBOL(ib_query_qp);
 
+int ib_close_qp(struct ib_qp *qp)
+{
+       struct ib_qp *real_qp;
+       unsigned long flags;
+
+       real_qp = qp->real_qp;
+       if (real_qp == qp)
+               return -EINVAL;
+
+       spin_lock_irqsave(&real_qp->device->event_handler_lock, flags);
+       list_del(&qp->open_list);
+       spin_unlock_irqrestore(&real_qp->device->event_handler_lock, flags);
+
+       atomic_dec(&real_qp->usecnt);
+       kfree(qp);
+
+       return 0;
+}
+EXPORT_SYMBOL(ib_close_qp);
+
+static int __ib_destroy_shared_qp(struct ib_qp *qp)
+{
+       struct ib_xrcd *xrcd;
+       struct ib_qp *real_qp;
+       int ret;
+
+       real_qp = qp->real_qp;
+       xrcd = real_qp->xrcd;
+
+       mutex_lock(&xrcd->tgt_qp_mutex);
+       ib_close_qp(qp);
+       if (atomic_read(&real_qp->usecnt) == 0)
+               list_del(&real_qp->xrcd_list);
+       else
+               real_qp = NULL;
+       mutex_unlock(&xrcd->tgt_qp_mutex);
+
+       if (real_qp) {
+               ret = ib_destroy_qp(real_qp);
+               if (!ret)
+                       atomic_dec(&xrcd->usecnt);
+               else
+                       __ib_insert_xrcd_qp(xrcd, real_qp);
+       }
+
+       return 0;
+}
+
 int ib_destroy_qp(struct ib_qp *qp)
 {
        struct ib_pd *pd;
@@ -601,16 +873,25 @@ int ib_destroy_qp(struct ib_qp *qp)
        struct ib_srq *srq;
        int ret;
 
-       pd  = qp->pd;
-       scq = qp->send_cq;
-       rcq = qp->recv_cq;
-       srq = qp->srq;
+       if (atomic_read(&qp->usecnt))
+               return -EBUSY;
+
+       if (qp->real_qp != qp)
+               return __ib_destroy_shared_qp(qp);
+
+       pd   = qp->pd;
+       scq  = qp->send_cq;
+       rcq  = qp->recv_cq;
+       srq  = qp->srq;
 
        ret = qp->device->destroy_qp(qp);
        if (!ret) {
-               atomic_dec(&pd->usecnt);
-               atomic_dec(&scq->usecnt);
-               atomic_dec(&rcq->usecnt);
+               if (pd)
+                       atomic_dec(&pd->usecnt);
+               if (scq)
+                       atomic_dec(&scq->usecnt);
+               if (rcq)
+                       atomic_dec(&rcq->usecnt);
                if (srq)
                        atomic_dec(&srq->usecnt);
        }
@@ -920,3 +1201,42 @@ int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
        return qp->device->detach_mcast(qp, gid, lid);
 }
 EXPORT_SYMBOL(ib_detach_mcast);
+
+struct ib_xrcd *ib_alloc_xrcd(struct ib_device *device)
+{
+       struct ib_xrcd *xrcd;
+
+       if (!device->alloc_xrcd)
+               return ERR_PTR(-ENOSYS);
+
+       xrcd = device->alloc_xrcd(device, NULL, NULL);
+       if (!IS_ERR(xrcd)) {
+               xrcd->device = device;
+               xrcd->inode = NULL;
+               atomic_set(&xrcd->usecnt, 0);
+               mutex_init(&xrcd->tgt_qp_mutex);
+               INIT_LIST_HEAD(&xrcd->tgt_qp_list);
+       }
+
+       return xrcd;
+}
+EXPORT_SYMBOL(ib_alloc_xrcd);
+
+int ib_dealloc_xrcd(struct ib_xrcd *xrcd)
+{
+       struct ib_qp *qp;
+       int ret;
+
+       if (atomic_read(&xrcd->usecnt))
+               return -EBUSY;
+
+       while (!list_empty(&xrcd->tgt_qp_list)) {
+               qp = list_entry(xrcd->tgt_qp_list.next, struct ib_qp, xrcd_list);
+               ret = ib_destroy_qp(qp);
+               if (ret)
+                       return ret;
+       }
+
+       return xrcd->device->dealloc_xrcd(xrcd);
+}
+EXPORT_SYMBOL(ib_dealloc_xrcd);
index 24f9e3a..32d34e8 100644 (file)
@@ -288,6 +288,11 @@ void c2_ae_event(struct c2_dev *c2dev, u32 mq_index)
                cm_event.private_data_len =
                        be32_to_cpu(req->private_data_length);
                cm_event.private_data = req->private_data;
+               /*
+                * Until ird/ord negotiation via MPAv2 support is added, send
+                * max supported values
+                */
+               cm_event.ird = cm_event.ord = 128;
 
                if (cm_id->event_handler)
                        cm_id->event_handler(cm_id, &cm_event);
index 0ebe4e8..8951db4 100644 (file)
@@ -183,6 +183,11 @@ static void handle_vq(struct c2_dev *c2dev, u32 mq_index)
        case IW_CM_EVENT_ESTABLISHED:
                c2_set_qp_state(req->qp,
                                C2_QP_STATE_RTS);
+               /*
+                * Until ird/ord negotiation via MPAv2 support is added, send
+                * max supported values
+                */
+               cm_event.ird = cm_event.ord = 128;
        case IW_CM_EVENT_CLOSE:
 
                /*
index f101bb7..12f923d 100644 (file)
@@ -753,10 +753,7 @@ static struct net_device *c2_pseudo_netdev_init(struct c2_dev *c2dev)
        memcpy_fromio(netdev->dev_addr, c2dev->kva + C2_REGS_RDMA_ENADDR, 6);
 
        /* Print out the MAC address */
-       pr_debug("%s: MAC %02X:%02X:%02X:%02X:%02X:%02X\n",
-               netdev->name,
-               netdev->dev_addr[0], netdev->dev_addr[1], netdev->dev_addr[2],
-               netdev->dev_addr[3], netdev->dev_addr[4], netdev->dev_addr[5]);
+       pr_debug("%s: MAC %pM\n", netdev->name, netdev->dev_addr);
 
 #if 0
        /* Disable network packets */
index 6cd642a..de6d077 100644 (file)
@@ -753,6 +753,11 @@ static void connect_request_upcall(struct iwch_ep *ep)
        event.private_data_len = ep->plen;
        event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
        event.provider_data = ep;
+       /*
+        * Until ird/ord negotiation via MPAv2 support is added, send max
+        * supported values
+        */
+       event.ird = event.ord = 8;
        if (state_read(&ep->parent_ep->com) != DEAD) {
                get_ep(&ep->com);
                ep->parent_ep->com.cm_id->event_handler(
@@ -770,6 +775,11 @@ static void established_upcall(struct iwch_ep *ep)
        PDBG("%s ep %p\n", __func__, ep);
        memset(&event, 0, sizeof(event));
        event.event = IW_CM_EVENT_ESTABLISHED;
+       /*
+        * Until ird/ord negotiation via MPAv2 support is added, send max
+        * supported values
+        */
+       event.ird = event.ord = 8;
        if (ep->com.cm_id) {
                PDBG("%s ep %p tid %d\n", __func__, ep, ep->hwtid);
                ep->com.cm_id->event_handler(ep->com.cm_id, &event);
index 71e0d84..abcc9e7 100644 (file)
@@ -46,6 +46,7 @@ static void post_qp_event(struct iwch_dev *rnicp, struct iwch_cq *chp,
        struct ib_event event;
        struct iwch_qp_attributes attrs;
        struct iwch_qp *qhp;
+       unsigned long flag;
 
        spin_lock(&rnicp->lock);
        qhp = get_qhp(rnicp, CQE_QPID(rsp_msg->cqe));
@@ -94,7 +95,9 @@ static void post_qp_event(struct iwch_dev *rnicp, struct iwch_cq *chp,
        if (qhp->ibqp.event_handler)
                (*qhp->ibqp.event_handler)(&event, qhp->ibqp.qp_context);
 
+       spin_lock_irqsave(&chp->comp_handler_lock, flag);
        (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context);
+       spin_unlock_irqrestore(&chp->comp_handler_lock, flag);
 
        if (atomic_dec_and_test(&qhp->refcnt))
                wake_up(&qhp->wait);
@@ -107,6 +110,7 @@ void iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct sk_buff *skb)
        struct iwch_cq *chp;
        struct iwch_qp *qhp;
        u32 cqid = RSPQ_CQID(rsp_msg);
+       unsigned long flag;
 
        rnicp = (struct iwch_dev *) rdev_p->ulp;
        spin_lock(&rnicp->lock);
@@ -170,7 +174,9 @@ void iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct sk_buff *skb)
                 */
                if (qhp->ep && SQ_TYPE(rsp_msg->cqe))
                        dst_confirm(qhp->ep->dst);
+               spin_lock_irqsave(&chp->comp_handler_lock, flag);
                (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context);
+               spin_unlock_irqrestore(&chp->comp_handler_lock, flag);
                break;
 
        case TPT_ERR_STAG:
index c7d9411..37c224f 100644 (file)
@@ -190,6 +190,7 @@ static struct ib_cq *iwch_create_cq(struct ib_device *ibdev, int entries, int ve
        chp->rhp = rhp;
        chp->ibcq.cqe = 1 << chp->cq.size_log2;
        spin_lock_init(&chp->lock);
+       spin_lock_init(&chp->comp_handler_lock);
        atomic_set(&chp->refcnt, 1);
        init_waitqueue_head(&chp->wait);
        if (insert_handle(rhp, &rhp->cqidr, chp, chp->cq.cqid)) {
index 9a342c9..87c14b0 100644 (file)
@@ -103,6 +103,7 @@ struct iwch_cq {
        struct iwch_dev *rhp;
        struct t3_cq cq;
        spinlock_t lock;
+       spinlock_t comp_handler_lock;
        atomic_t refcnt;
        wait_queue_head_t wait;
        u32 __user *user_rptr_addr;
index ecd313f..bea5839 100644 (file)
@@ -822,8 +822,11 @@ static void __flush_qp(struct iwch_qp *qhp, struct iwch_cq *rchp,
        flushed = cxio_flush_rq(&qhp->wq, &rchp->cq, count);
        spin_unlock(&qhp->lock);
        spin_unlock_irqrestore(&rchp->lock, *flag);
-       if (flushed)
+       if (flushed) {
+               spin_lock_irqsave(&rchp->comp_handler_lock, *flag);
                (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
+               spin_unlock_irqrestore(&rchp->comp_handler_lock, *flag);
+       }
 
        /* locking hierarchy: cq lock first, then qp lock. */
        spin_lock_irqsave(&schp->lock, *flag);
@@ -833,8 +836,11 @@ static void __flush_qp(struct iwch_qp *qhp, struct iwch_cq *rchp,
        flushed = cxio_flush_sq(&qhp->wq, &schp->cq, count);
        spin_unlock(&qhp->lock);
        spin_unlock_irqrestore(&schp->lock, *flag);
-       if (flushed)
+       if (flushed) {
+               spin_lock_irqsave(&schp->comp_handler_lock, *flag);
                (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context);
+               spin_unlock_irqrestore(&schp->comp_handler_lock, *flag);
+       }
 
        /* deref */
        if (atomic_dec_and_test(&qhp->refcnt))
@@ -853,11 +859,15 @@ static void flush_qp(struct iwch_qp *qhp, unsigned long *flag)
        if (qhp->ibqp.uobject) {
                cxio_set_wq_in_error(&qhp->wq);
                cxio_set_cq_in_error(&rchp->cq);
+               spin_lock_irqsave(&rchp->comp_handler_lock, *flag);
                (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
+               spin_unlock_irqrestore(&rchp->comp_handler_lock, *flag);
                if (schp != rchp) {
                        cxio_set_cq_in_error(&schp->cq);
+                       spin_lock_irqsave(&schp->comp_handler_lock, *flag);
                        (*schp->ibcq.comp_handler)(&schp->ibcq,
                                                   schp->ibcq.cq_context);
+                       spin_unlock_irqrestore(&schp->comp_handler_lock, *flag);
                }
                return;
        }
index 77f769d..b36cdac 100644 (file)
@@ -103,7 +103,8 @@ MODULE_PARM_DESC(ep_timeout_secs, "CM Endpoint operation timeout "
 static int mpa_rev = 1;
 module_param(mpa_rev, int, 0644);
 MODULE_PARM_DESC(mpa_rev, "MPA Revision, 0 supports amso1100, "
-                "1 is spec compliant. (default=1)");
+               "1 is RFC0544 spec compliant, 2 is IETF MPA Peer Connect Draft"
+               " compliant (default=1)");
 
 static int markers_enabled;
 module_param(markers_enabled, int, 0644);
@@ -497,17 +498,21 @@ static int send_connect(struct c4iw_ep *ep)
        return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
 }
 
-static void send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb)
+static void send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb,
+               u8 mpa_rev_to_use)
 {
        int mpalen, wrlen;
        struct fw_ofld_tx_data_wr *req;
        struct mpa_message *mpa;
+       struct mpa_v2_conn_params mpa_v2_params;
 
        PDBG("%s ep %p tid %u pd_len %d\n", __func__, ep, ep->hwtid, ep->plen);
 
        BUG_ON(skb_cloned(skb));
 
        mpalen = sizeof(*mpa) + ep->plen;
+       if (mpa_rev_to_use == 2)
+               mpalen += sizeof(struct mpa_v2_conn_params);
        wrlen = roundup(mpalen + sizeof *req, 16);
        skb = get_skb(skb, wrlen, GFP_KERNEL);
        if (!skb) {
@@ -533,12 +538,39 @@ static void send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb)
        mpa = (struct mpa_message *)(req + 1);
        memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key));
        mpa->flags = (crc_enabled ? MPA_CRC : 0) |
-                    (markers_enabled ? MPA_MARKERS : 0);
+                    (markers_enabled ? MPA_MARKERS : 0) |
+                    (mpa_rev_to_use == 2 ? MPA_ENHANCED_RDMA_CONN : 0);
        mpa->private_data_size = htons(ep->plen);
-       mpa->revision = mpa_rev;
+       mpa->revision = mpa_rev_to_use;
+       if (mpa_rev_to_use == 1)
+               ep->tried_with_mpa_v1 = 1;
+
+       if (mpa_rev_to_use == 2) {
+               mpa->private_data_size +=
+                       htons(sizeof(struct mpa_v2_conn_params));
+               mpa_v2_params.ird = htons((u16)ep->ird);
+               mpa_v2_params.ord = htons((u16)ep->ord);
+
+               if (peer2peer) {
+                       mpa_v2_params.ird |= htons(MPA_V2_PEER2PEER_MODEL);
+                       if (p2p_type == FW_RI_INIT_P2PTYPE_RDMA_WRITE)
+                               mpa_v2_params.ord |=
+                                       htons(MPA_V2_RDMA_WRITE_RTR);
+                       else if (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ)
+                               mpa_v2_params.ord |=
+                                       htons(MPA_V2_RDMA_READ_RTR);
+               }
+               memcpy(mpa->private_data, &mpa_v2_params,
+                      sizeof(struct mpa_v2_conn_params));
 
-       if (ep->plen)
-               memcpy(mpa->private_data, ep->mpa_pkt + sizeof(*mpa), ep->plen);
+               if (ep->plen)
+                       memcpy(mpa->private_data +
+                              sizeof(struct mpa_v2_conn_params),
+                              ep->mpa_pkt + sizeof(*mpa), ep->plen);
+       } else
+               if (ep->plen)
+                       memcpy(mpa->private_data,
+                                       ep->mpa_pkt + sizeof(*mpa), ep->plen);
 
        /*
         * Reference the mpa skb.  This ensures the data area
@@ -562,10 +594,13 @@ static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen)
        struct fw_ofld_tx_data_wr *req;
        struct mpa_message *mpa;
        struct sk_buff *skb;
+       struct mpa_v2_conn_params mpa_v2_params;
 
        PDBG("%s ep %p tid %u pd_len %d\n", __func__, ep, ep->hwtid, ep->plen);
 
        mpalen = sizeof(*mpa) + plen;
+       if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn)
+               mpalen += sizeof(struct mpa_v2_conn_params);
        wrlen = roundup(mpalen + sizeof *req, 16);
 
        skb = get_skb(NULL, wrlen, GFP_KERNEL);
@@ -595,8 +630,29 @@ static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen)
        mpa->flags = MPA_REJECT;
        mpa->revision = mpa_rev;
        mpa->private_data_size = htons(plen);
-       if (plen)
-               memcpy(mpa->private_data, pdata, plen);
+
+       if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
+               mpa->flags |= MPA_ENHANCED_RDMA_CONN;
+               mpa->private_data_size +=
+                       htons(sizeof(struct mpa_v2_conn_params));
+               mpa_v2_params.ird = htons(((u16)ep->ird) |
+                                         (peer2peer ? MPA_V2_PEER2PEER_MODEL :
+                                          0));
+               mpa_v2_params.ord = htons(((u16)ep->ord) | (peer2peer ?
+                                         (p2p_type ==
+                                          FW_RI_INIT_P2PTYPE_RDMA_WRITE ?
+                                          MPA_V2_RDMA_WRITE_RTR : p2p_type ==
+                                          FW_RI_INIT_P2PTYPE_READ_REQ ?
+                                          MPA_V2_RDMA_READ_RTR : 0) : 0));
+               memcpy(mpa->private_data, &mpa_v2_params,
+                      sizeof(struct mpa_v2_conn_params));
+
+               if (ep->plen)
+                       memcpy(mpa->private_data +
+                              sizeof(struct mpa_v2_conn_params), pdata, plen);
+       } else
+               if (plen)
+                       memcpy(mpa->private_data, pdata, plen);
 
        /*
         * Reference the mpa skb again.  This ensures the data area
@@ -617,10 +673,13 @@ static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen)
        struct fw_ofld_tx_data_wr *req;
        struct mpa_message *mpa;
        struct sk_buff *skb;
+       struct mpa_v2_conn_params mpa_v2_params;
 
        PDBG("%s ep %p tid %u pd_len %d\n", __func__, ep, ep->hwtid, ep->plen);
 
        mpalen = sizeof(*mpa) + plen;
+       if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn)
+               mpalen += sizeof(struct mpa_v2_conn_params);
        wrlen = roundup(mpalen + sizeof *req, 16);
 
        skb = get_skb(NULL, wrlen, GFP_KERNEL);
@@ -649,10 +708,36 @@ static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen)
        memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
        mpa->flags = (ep->mpa_attr.crc_enabled ? MPA_CRC : 0) |
                     (markers_enabled ? MPA_MARKERS : 0);
-       mpa->revision = mpa_rev;
+       mpa->revision = ep->mpa_attr.version;
        mpa->private_data_size = htons(plen);
-       if (plen)
-               memcpy(mpa->private_data, pdata, plen);
+
+       if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
+               mpa->flags |= MPA_ENHANCED_RDMA_CONN;
+               mpa->private_data_size +=
+                       htons(sizeof(struct mpa_v2_conn_params));
+               mpa_v2_params.ird = htons((u16)ep->ird);
+               mpa_v2_params.ord = htons((u16)ep->ord);
+               if (peer2peer && (ep->mpa_attr.p2p_type !=
+                                       FW_RI_INIT_P2PTYPE_DISABLED)) {
+                       mpa_v2_params.ird |= htons(MPA_V2_PEER2PEER_MODEL);
+
+                       if (p2p_type == FW_RI_INIT_P2PTYPE_RDMA_WRITE)
+                               mpa_v2_params.ord |=
+                                       htons(MPA_V2_RDMA_WRITE_RTR);
+                       else if (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ)
+                               mpa_v2_params.ord |=
+                                       htons(MPA_V2_RDMA_READ_RTR);
+               }
+
+               memcpy(mpa->private_data, &mpa_v2_params,
+                      sizeof(struct mpa_v2_conn_params));
+
+               if (ep->plen)
+                       memcpy(mpa->private_data +
+                              sizeof(struct mpa_v2_conn_params), pdata, plen);
+       } else
+               if (plen)
+                       memcpy(mpa->private_data, pdata, plen);
 
        /*
         * Reference the mpa skb.  This ensures the data area
@@ -695,7 +780,10 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb)
 
        /* start MPA negotiation */
        send_flowc(ep, NULL);
-       send_mpa_req(ep, skb);
+       if (ep->retry_with_mpa_v1)
+               send_mpa_req(ep, skb, 1);
+       else
+               send_mpa_req(ep, skb, mpa_rev);
 
        return 0;
 }
@@ -769,8 +857,19 @@ static void connect_reply_upcall(struct c4iw_ep *ep, int status)
        event.remote_addr = ep->com.remote_addr;
 
        if ((status == 0) || (status == -ECONNREFUSED)) {
-               event.private_data_len = ep->plen;
-               event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
+               if (!ep->tried_with_mpa_v1) {
+                       /* this means MPA_v2 is used */
+                       event.private_data_len = ep->plen -
+                               sizeof(struct mpa_v2_conn_params);
+                       event.private_data = ep->mpa_pkt +
+                               sizeof(struct mpa_message) +
+                               sizeof(struct mpa_v2_conn_params);
+               } else {
+                       /* this means MPA_v1 is used */
+                       event.private_data_len = ep->plen;
+                       event.private_data = ep->mpa_pkt +
+                               sizeof(struct mpa_message);
+               }
        }
 
        PDBG("%s ep %p tid %u status %d\n", __func__, ep,
@@ -793,9 +892,22 @@ static void connect_request_upcall(struct c4iw_ep *ep)
        event.event = IW_CM_EVENT_CONNECT_REQUEST;
        event.local_addr = ep->com.local_addr;
        event.remote_addr = ep->com.remote_addr;
-       event.private_data_len = ep->plen;
-       event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
        event.provider_data = ep;
+       if (!ep->tried_with_mpa_v1) {
+               /* this means MPA_v2 is used */
+               event.ord = ep->ord;
+               event.ird = ep->ird;
+               event.private_data_len = ep->plen -
+                       sizeof(struct mpa_v2_conn_params);
+               event.private_data = ep->mpa_pkt + sizeof(struct mpa_message) +
+                       sizeof(struct mpa_v2_conn_params);
+       } else {
+               /* this means MPA_v1 is used. Send max supported */
+               event.ord = c4iw_max_read_depth;
+               event.ird = c4iw_max_read_depth;
+               event.private_data_len = ep->plen;
+               event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
+       }
        if (state_read(&ep->parent_ep->com) != DEAD) {
                c4iw_get_ep(&ep->com);
                ep->parent_ep->com.cm_id->event_handler(
@@ -813,6 +925,8 @@ static void established_upcall(struct c4iw_ep *ep)
        PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
        memset(&event, 0, sizeof(event));
        event.event = IW_CM_EVENT_ESTABLISHED;
+       event.ird = ep->ird;
+       event.ord = ep->ord;
        if (ep->com.cm_id) {
                PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
                ep->com.cm_id->event_handler(ep->com.cm_id, &event);
@@ -848,7 +962,10 @@ static int update_rx_credits(struct c4iw_ep *ep, u32 credits)
 static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
 {
        struct mpa_message *mpa;
+       struct mpa_v2_conn_params *mpa_v2_params;
        u16 plen;
+       u16 resp_ird, resp_ord;
+       u8 rtr_mismatch = 0, insuff_ird = 0;
        struct c4iw_qp_attributes attrs;
        enum c4iw_qp_attr_mask mask;
        int err;
@@ -888,7 +1005,9 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
        mpa = (struct mpa_message *) ep->mpa_pkt;
 
        /* Validate MPA header. */
-       if (mpa->revision != mpa_rev) {
+       if (mpa->revision > mpa_rev) {
+               printk(KERN_ERR MOD "%s MPA version mismatch. Local = %d,"
+                      " Received = %d\n", __func__, mpa_rev, mpa->revision);
                err = -EPROTO;
                goto err;
        }
@@ -938,13 +1057,66 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
        ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
        ep->mpa_attr.recv_marker_enabled = markers_enabled;
        ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
-       ep->mpa_attr.version = mpa_rev;
-       ep->mpa_attr.p2p_type = peer2peer ? p2p_type :
-                                           FW_RI_INIT_P2PTYPE_DISABLED;
+       ep->mpa_attr.version = mpa->revision;
+       ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
+
+       if (mpa->revision == 2) {
+               ep->mpa_attr.enhanced_rdma_conn =
+                       mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0;
+               if (ep->mpa_attr.enhanced_rdma_conn) {
+                       mpa_v2_params = (struct mpa_v2_conn_params *)
+                               (ep->mpa_pkt + sizeof(*mpa));
+                       resp_ird = ntohs(mpa_v2_params->ird) &
+                               MPA_V2_IRD_ORD_MASK;
+                       resp_ord = ntohs(mpa_v2_params->ord) &
+                               MPA_V2_IRD_ORD_MASK;
+
+                       /*
+                        * This is a double-check. Ideally, below checks are
+                        * not required since ird/ord stuff has been taken
+                        * care of in c4iw_accept_cr
+                        */
+                       if ((ep->ird < resp_ord) || (ep->ord > resp_ird)) {
+                               err = -ENOMEM;
+                               ep->ird = resp_ord;
+                               ep->ord = resp_ird;
+                               insuff_ird = 1;
+                       }
+
+                       if (ntohs(mpa_v2_params->ird) &
+                                       MPA_V2_PEER2PEER_MODEL) {
+                               if (ntohs(mpa_v2_params->ord) &
+                                               MPA_V2_RDMA_WRITE_RTR)
+                                       ep->mpa_attr.p2p_type =
+                                               FW_RI_INIT_P2PTYPE_RDMA_WRITE;
+                               else if (ntohs(mpa_v2_params->ord) &
+                                               MPA_V2_RDMA_READ_RTR)
+                                       ep->mpa_attr.p2p_type =
+                                               FW_RI_INIT_P2PTYPE_READ_REQ;
+                       }
+               }
+       } else if (mpa->revision == 1)
+               if (peer2peer)
+                       ep->mpa_attr.p2p_type = p2p_type;
+
        PDBG("%s - crc_enabled=%d, recv_marker_enabled=%d, "
-            "xmit_marker_enabled=%d, version=%d\n", __func__,
-            ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
-            ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version);
+            "xmit_marker_enabled=%d, version=%d p2p_type=%d local-p2p_type = "
+            "%d\n", __func__, ep->mpa_attr.crc_enabled,
+            ep->mpa_attr.recv_marker_enabled,
+            ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version,
+            ep->mpa_attr.p2p_type, p2p_type);
+
+       /*
+        * If responder's RTR does not match with that of initiator, assign
+        * FW_RI_INIT_P2PTYPE_DISABLED in mpa attributes so that RTR is not
+        * generated when moving QP to RTS state.
+        * A TERM message will be sent after QP has moved to RTS state
+        */
+       if ((ep->mpa_attr.version == 2) &&
+                       (ep->mpa_attr.p2p_type != p2p_type)) {
+               ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
+               rtr_mismatch = 1;
+       }
 
        attrs.mpa_attr = ep->mpa_attr;
        attrs.max_ird = ep->ird;
@@ -961,6 +1133,39 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
                             ep->com.qp, mask, &attrs, 1);
        if (err)
                goto err;
+
+       /*
+        * If responder's RTR requirement did not match with what initiator
+        * supports, generate TERM message
+        */
+       if (rtr_mismatch) {
+               printk(KERN_ERR "%s: RTR mismatch, sending TERM\n", __func__);
+               attrs.layer_etype = LAYER_MPA | DDP_LLP;
+               attrs.ecode = MPA_NOMATCH_RTR;
+               attrs.next_state = C4IW_QP_STATE_TERMINATE;
+               err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
+                               C4IW_QP_ATTR_NEXT_STATE, &attrs, 0);
+               err = -ENOMEM;
+               goto out;
+       }
+
+       /*
+        * Generate TERM if initiator IRD is not sufficient for responder
+        * provided ORD. Currently, we do the same behaviour even when
+        * responder provided IRD is also not sufficient as regards to
+        * initiator ORD.
+        */
+       if (insuff_ird) {
+               printk(KERN_ERR "%s: Insufficient IRD, sending TERM\n",
+                               __func__);
+               attrs.layer_etype = LAYER_MPA | DDP_LLP;
+               attrs.ecode = MPA_INSUFF_IRD;
+               attrs.next_state = C4IW_QP_STATE_TERMINATE;
+               err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
+                               C4IW_QP_ATTR_NEXT_STATE, &attrs, 0);
+               err = -ENOMEM;
+               goto out;
+       }
        goto out;
 err:
        state_set(&ep->com, ABORTING);
@@ -973,6 +1178,7 @@ out:
 static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
 {
        struct mpa_message *mpa;
+       struct mpa_v2_conn_params *mpa_v2_params;
        u16 plen;
 
        PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
@@ -1013,7 +1219,9 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
        /*
         * Validate MPA Header.
         */
-       if (mpa->revision != mpa_rev) {
+       if (mpa->revision > mpa_rev) {
+               printk(KERN_ERR MOD "%s MPA version mismatch. Local = %d,"
+                      " Received = %d\n", __func__, mpa_rev, mpa->revision);
                abort_connection(ep, skb, GFP_KERNEL);
                return;
        }
@@ -1056,9 +1264,37 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
        ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
        ep->mpa_attr.recv_marker_enabled = markers_enabled;
        ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
-       ep->mpa_attr.version = mpa_rev;
-       ep->mpa_attr.p2p_type = peer2peer ? p2p_type :
-                                           FW_RI_INIT_P2PTYPE_DISABLED;
+       ep->mpa_attr.version = mpa->revision;
+       if (mpa->revision == 1)
+               ep->tried_with_mpa_v1 = 1;
+       ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
+
+       if (mpa->revision == 2) {
+               ep->mpa_attr.enhanced_rdma_conn =
+                       mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0;
+               if (ep->mpa_attr.enhanced_rdma_conn) {
+                       mpa_v2_params = (struct mpa_v2_conn_params *)
+                               (ep->mpa_pkt + sizeof(*mpa));
+                       ep->ird = ntohs(mpa_v2_params->ird) &
+                               MPA_V2_IRD_ORD_MASK;
+                       ep->ord = ntohs(mpa_v2_params->ord) &
+                               MPA_V2_IRD_ORD_MASK;
+                       if (ntohs(mpa_v2_params->ird) & MPA_V2_PEER2PEER_MODEL)
+                               if (peer2peer) {
+                                       if (ntohs(mpa_v2_params->ord) &
+                                                       MPA_V2_RDMA_WRITE_RTR)
+                                               ep->mpa_attr.p2p_type =
+                                               FW_RI_INIT_P2PTYPE_RDMA_WRITE;
+                                       else if (ntohs(mpa_v2_params->ord) &
+                                                       MPA_V2_RDMA_READ_RTR)
+                                               ep->mpa_attr.p2p_type =
+                                               FW_RI_INIT_P2PTYPE_READ_REQ;
+                               }
+               }
+       } else if (mpa->revision == 1)
+               if (peer2peer)
+                       ep->mpa_attr.p2p_type = p2p_type;
+
        PDBG("%s - crc_enabled=%d, recv_marker_enabled=%d, "
             "xmit_marker_enabled=%d, version=%d p2p_type=%d\n", __func__,
             ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
@@ -1550,6 +1786,112 @@ static int is_neg_adv_abort(unsigned int status)
               status == CPL_ERR_PERSIST_NEG_ADVICE;
 }
 
+static int c4iw_reconnect(struct c4iw_ep *ep)
+{
+       int err = 0;
+       struct rtable *rt;
+       struct net_device *pdev;
+       struct neighbour *neigh;
+       int step;
+
+       PDBG("%s qp %p cm_id %p\n", __func__, ep->com.qp, ep->com.cm_id);
+       init_timer(&ep->timer);
+
+       /*
+        * Allocate an active TID to initiate a TCP connection.
+        */
+       ep->atid = cxgb4_alloc_atid(ep->com.dev->rdev.lldi.tids, ep);
+       if (ep->atid == -1) {
+               printk(KERN_ERR MOD "%s - cannot alloc atid.\n", __func__);
+               err = -ENOMEM;
+               goto fail2;
+       }
+
+       /* find a route */
+       rt = find_route(ep->com.dev,
+                       ep->com.cm_id->local_addr.sin_addr.s_addr,
+                       ep->com.cm_id->remote_addr.sin_addr.s_addr,
+                       ep->com.cm_id->local_addr.sin_port,
+                       ep->com.cm_id->remote_addr.sin_port, 0);
+       if (!rt) {
+               printk(KERN_ERR MOD "%s - cannot find route.\n", __func__);
+               err = -EHOSTUNREACH;
+               goto fail3;
+       }
+       ep->dst = &rt->dst;
+
+       neigh = dst_get_neighbour(ep->dst);
+
+       /* get a l2t entry */
+       if (neigh->dev->flags & IFF_LOOPBACK) {
+               PDBG("%s LOOPBACK\n", __func__);
+               pdev = ip_dev_find(&init_net,
+                                  ep->com.cm_id->remote_addr.sin_addr.s_addr);
+               ep->l2t = cxgb4_l2t_get(ep->com.dev->rdev.lldi.l2t,
+                                       neigh, pdev, 0);
+               ep->mtu = pdev->mtu;
+               ep->tx_chan = cxgb4_port_chan(pdev);
+               ep->smac_idx = (cxgb4_port_viid(pdev) & 0x7F) << 1;
+               step = ep->com.dev->rdev.lldi.ntxq /
+                       ep->com.dev->rdev.lldi.nchan;
+               ep->txq_idx = cxgb4_port_idx(pdev) * step;
+               step = ep->com.dev->rdev.lldi.nrxq /
+                       ep->com.dev->rdev.lldi.nchan;
+               ep->ctrlq_idx = cxgb4_port_idx(pdev);
+               ep->rss_qid = ep->com.dev->rdev.lldi.rxq_ids[
+                       cxgb4_port_idx(pdev) * step];
+               dev_put(pdev);
+       } else {
+               ep->l2t = cxgb4_l2t_get(ep->com.dev->rdev.lldi.l2t,
+                                       neigh, neigh->dev, 0);
+               ep->mtu = dst_mtu(ep->dst);
+               ep->tx_chan = cxgb4_port_chan(neigh->dev);
+               ep->smac_idx = (cxgb4_port_viid(neigh->dev) & 0x7F) << 1;
+               step = ep->com.dev->rdev.lldi.ntxq /
+                       ep->com.dev->rdev.lldi.nchan;
+               ep->txq_idx = cxgb4_port_idx(neigh->dev) * step;
+               ep->ctrlq_idx = cxgb4_port_idx(neigh->dev);
+               step = ep->com.dev->rdev.lldi.nrxq /
+                       ep->com.dev->rdev.lldi.nchan;
+               ep->rss_qid = ep->com.dev->rdev.lldi.rxq_ids[
+                       cxgb4_port_idx(neigh->dev) * step];
+       }
+       if (!ep->l2t) {
+               printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__);
+               err = -ENOMEM;
+               goto fail4;
+       }
+
+       PDBG("%s txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n",
+            __func__, ep->txq_idx, ep->tx_chan, ep->smac_idx, ep->rss_qid,
+            ep->l2t->idx);
+
+       state_set(&ep->com, CONNECTING);
+       ep->tos = 0;
+
+       /* send connect request to rnic */
+       err = send_connect(ep);
+       if (!err)
+               goto out;
+
+       cxgb4_l2t_release(ep->l2t);
+fail4:
+       dst_release(ep->dst);
+fail3:
+       cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid);
+fail2:
+       /*
+        * remember to send notification to upper layer.
+        * We are in here so the upper layer is not aware that this is
+        * re-connect attempt and so, upper layer is still waiting for
+        * response of 1st connect request.
+        */
+       connect_reply_upcall(ep, -ECONNRESET);
+       c4iw_put_ep(&ep->com);
+out:
+       return err;
+}
+
 static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
 {
        struct cpl_abort_req_rss *req = cplhdr(skb);
@@ -1573,8 +1915,11 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
 
        /*
         * Wake up any threads in rdma_init() or rdma_fini().
+        * However, this is not needed if com state is just
+        * MPA_REQ_SENT
         */
-       c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
+       if (ep->com.state != MPA_REQ_SENT)
+               c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
 
        mutex_lock(&ep->com.mutex);
        switch (ep->com.state) {
@@ -1585,7 +1930,21 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
                break;
        case MPA_REQ_SENT:
                stop_ep_timer(ep);
-               connect_reply_upcall(ep, -ECONNRESET);
+               if (mpa_rev == 2 && ep->tried_with_mpa_v1)
+                       connect_reply_upcall(ep, -ECONNRESET);
+               else {
+                       /*
+                        * we just don't send notification upwards because we
+                        * want to retry with mpa_v1 without upper layers even
+                        * knowing it.
+                        *
+                        * do some housekeeping so as to re-initiate the
+                        * connection
+                        */
+                       PDBG("%s: mpa_rev=%d. Retrying with mpav1\n", __func__,
+                            mpa_rev);
+                       ep->retry_with_mpa_v1 = 1;
+               }
                break;
        case MPA_REP_SENT:
                break;
@@ -1621,7 +1980,9 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
        dst_confirm(ep->dst);
        if (ep->com.state != ABORTING) {
                __state_set(&ep->com, DEAD);
-               release = 1;
+               /* we don't release if we want to retry with mpa_v1 */
+               if (!ep->retry_with_mpa_v1)
+                       release = 1;
        }
        mutex_unlock(&ep->com.mutex);
 
@@ -1641,6 +2002,15 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
 out:
        if (release)
                release_ep_resources(ep);
+
+       /* retry with mpa-v1 */
+       if (ep && ep->retry_with_mpa_v1) {
+               cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid);
+               dst_release(ep->dst);
+               cxgb4_l2t_release(ep->l2t);
+               c4iw_reconnect(ep);
+       }
+
        return 0;
 }
 
@@ -1792,18 +2162,40 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
                goto err;
        }
 
-       cm_id->add_ref(cm_id);
-       ep->com.cm_id = cm_id;
-       ep->com.qp = qp;
+       if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
+               if (conn_param->ord > ep->ird) {
+                       ep->ird = conn_param->ird;
+                       ep->ord = conn_param->ord;
+                       send_mpa_reject(ep, conn_param->private_data,
+                                       conn_param->private_data_len);
+                       abort_connection(ep, NULL, GFP_KERNEL);
+                       err = -ENOMEM;
+                       goto err;
+               }
+               if (conn_param->ird > ep->ord) {
+                       if (!ep->ord)
+                               conn_param->ird = 1;
+                       else {
+                               abort_connection(ep, NULL, GFP_KERNEL);
+                               err = -ENOMEM;
+                               goto err;
+                       }
+               }
 
+       }
        ep->ird = conn_param->ird;
        ep->ord = conn_param->ord;
 
-       if (peer2peer && ep->ird == 0)
-               ep->ird = 1;
+       if (ep->mpa_attr.version != 2)
+               if (peer2peer && ep->ird == 0)
+                       ep->ird = 1;
 
        PDBG("%s %d ird %d ord %d\n", __func__, __LINE__, ep->ird, ep->ord);
 
+       cm_id->add_ref(cm_id);
+       ep->com.cm_id = cm_id;
+       ep->com.qp = qp;
+
        /* bind QP to EP and move to RTS */
        attrs.mpa_attr = ep->mpa_attr;
        attrs.max_ird = ep->ird;
@@ -1944,6 +2336,8 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
                       ep->com.dev->rdev.lldi.nchan;
                ep->rss_qid = ep->com.dev->rdev.lldi.rxq_ids[
                              cxgb4_port_idx(neigh->dev) * step];
+               ep->retry_with_mpa_v1 = 0;
+               ep->tried_with_mpa_v1 = 0;
        }
        if (!ep->l2t) {
                printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__);
@@ -2323,8 +2717,11 @@ static int peer_abort_intr(struct c4iw_dev *dev, struct sk_buff *skb)
 
        /*
         * Wake up any threads in rdma_init() or rdma_fini().
+        * However, this is not needed if com state is just
+        * MPA_REQ_SENT
         */
-       c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
+       if (ep->com.state != MPA_REQ_SENT)
+               c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
        sched(dev, skb);
        return 0;
 }
index 1720dc7..f35a935 100644 (file)
@@ -185,7 +185,7 @@ static void insert_recv_cqe(struct t4_wq *wq, struct t4_cq *cq)
                                 V_CQE_OPCODE(FW_RI_SEND) |
                                 V_CQE_TYPE(0) |
                                 V_CQE_SWCQE(1) |
-                                V_CQE_QPID(wq->rq.qid));
+                                V_CQE_QPID(wq->sq.qid));
        cqe.bits_type_ts = cpu_to_be64(V_CQE_GENBIT((u64)cq->gen));
        cq->sw_queue[cq->sw_pidx] = cqe;
        t4_swcq_produce(cq);
@@ -818,6 +818,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries,
        chp->cq.size--;                         /* status page */
        chp->ibcq.cqe = entries - 2;
        spin_lock_init(&chp->lock);
+       spin_lock_init(&chp->comp_handler_lock);
        atomic_set(&chp->refcnt, 1);
        init_waitqueue_head(&chp->wait);
        ret = insert_handle(rhp, &rhp->cqidr, chp, chp->cq.cqid);
index 40a13cc..6d0df6e 100644 (file)
@@ -376,10 +376,8 @@ struct uld_ctx {
        struct c4iw_dev *dev;
 };
 
-static void c4iw_remove(struct uld_ctx *ctx)
+static void c4iw_dealloc(struct uld_ctx *ctx)
 {
-       PDBG("%s c4iw_dev %p\n", __func__,  ctx->dev);
-       c4iw_unregister_device(ctx->dev);
        c4iw_rdev_close(&ctx->dev->rdev);
        idr_destroy(&ctx->dev->cqidr);
        idr_destroy(&ctx->dev->qpidr);
@@ -389,11 +387,30 @@ static void c4iw_remove(struct uld_ctx *ctx)
        ctx->dev = NULL;
 }
 
+static void c4iw_remove(struct uld_ctx *ctx)
+{
+       PDBG("%s c4iw_dev %p\n", __func__,  ctx->dev);
+       c4iw_unregister_device(ctx->dev);
+       c4iw_dealloc(ctx);
+}
+
+static int rdma_supported(const struct cxgb4_lld_info *infop)
+{
+       return infop->vr->stag.size > 0 && infop->vr->pbl.size > 0 &&
+              infop->vr->rq.size > 0 && infop->vr->qp.size > 0 &&
+              infop->vr->cq.size > 0 && infop->vr->ocq.size > 0;
+}
+
 static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop)
 {
        struct c4iw_dev *devp;
        int ret;
 
+       if (!rdma_supported(infop)) {
+               printk(KERN_INFO MOD "%s: RDMA not supported on this device.\n",
+                      pci_name(infop->pdev));
+               return ERR_PTR(-ENOSYS);
+       }
        devp = (struct c4iw_dev *)ib_alloc_device(sizeof(*devp));
        if (!devp) {
                printk(KERN_ERR MOD "Cannot allocate ib device\n");
@@ -414,7 +431,6 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop)
 
        ret = c4iw_rdev_open(&devp->rdev);
        if (ret) {
-               mutex_unlock(&dev_mutex);
                printk(KERN_ERR MOD "Unable to open CXIO rdev err %d\n", ret);
                ib_dealloc_device(&devp->ibdev);
                return ERR_PTR(ret);
@@ -519,15 +535,24 @@ static int c4iw_uld_state_change(void *handle, enum cxgb4_state new_state)
        case CXGB4_STATE_UP:
                printk(KERN_INFO MOD "%s: Up\n", pci_name(ctx->lldi.pdev));
                if (!ctx->dev) {
-                       int ret = 0;
+                       int ret;
 
                        ctx->dev = c4iw_alloc(&ctx->lldi);
-                       if (!IS_ERR(ctx->dev))
-                               ret = c4iw_register_device(ctx->dev);
-                       if (IS_ERR(ctx->dev) || ret)
+                       if (IS_ERR(ctx->dev)) {
+                               printk(KERN_ERR MOD
+                                      "%s: initialization failed: %ld\n",
+                                      pci_name(ctx->lldi.pdev),
+                                      PTR_ERR(ctx->dev));
+                               ctx->dev = NULL;
+                               break;
+                       }
+                       ret = c4iw_register_device(ctx->dev);
+                       if (ret) {
                                printk(KERN_ERR MOD
                                       "%s: RDMA registration failed: %d\n",
                                       pci_name(ctx->lldi.pdev), ret);
+                               c4iw_dealloc(ctx);
+                       }
                }
                break;
        case CXGB4_STATE_DOWN:
index c13041a..397cb36 100644 (file)
@@ -42,6 +42,7 @@ static void post_qp_event(struct c4iw_dev *dev, struct c4iw_cq *chp,
 {
        struct ib_event event;
        struct c4iw_qp_attributes attrs;
+       unsigned long flag;
 
        if ((qhp->attr.state == C4IW_QP_STATE_ERROR) ||
            (qhp->attr.state == C4IW_QP_STATE_TERMINATE)) {
@@ -72,7 +73,9 @@ static void post_qp_event(struct c4iw_dev *dev, struct c4iw_cq *chp,
        if (qhp->ibqp.event_handler)
                (*qhp->ibqp.event_handler)(&event, qhp->ibqp.qp_context);
 
+       spin_lock_irqsave(&chp->comp_handler_lock, flag);
        (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context);
+       spin_unlock_irqrestore(&chp->comp_handler_lock, flag);
 }
 
 void c4iw_ev_dispatch(struct c4iw_dev *dev, struct t4_cqe *err_cqe)
@@ -183,11 +186,14 @@ out:
 int c4iw_ev_handler(struct c4iw_dev *dev, u32 qid)
 {
        struct c4iw_cq *chp;
+       unsigned long flag;
 
        chp = get_chp(dev, qid);
-       if (chp)
+       if (chp) {
+               spin_lock_irqsave(&chp->comp_handler_lock, flag);
                (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context);
-       else
+               spin_unlock_irqrestore(&chp->comp_handler_lock, flag);
+       } else
                PDBG("%s unknown cqid 0x%x\n", __func__, qid);
        return 0;
 }
index 4f04537..1357c5b 100644 (file)
@@ -309,6 +309,7 @@ struct c4iw_cq {
        struct c4iw_dev *rhp;
        struct t4_cq cq;
        spinlock_t lock;
+       spinlock_t comp_handler_lock;
        atomic_t refcnt;
        wait_queue_head_t wait;
 };
@@ -323,6 +324,7 @@ struct c4iw_mpa_attributes {
        u8 recv_marker_enabled;
        u8 xmit_marker_enabled;
        u8 crc_enabled;
+       u8 enhanced_rdma_conn;
        u8 version;
        u8 p2p_type;
 };
@@ -349,6 +351,8 @@ struct c4iw_qp_attributes {
        u8 is_terminate_local;
        struct c4iw_mpa_attributes mpa_attr;
        struct c4iw_ep *llp_stream_handle;
+       u8 layer_etype;
+       u8 ecode;
 };
 
 struct c4iw_qp {
@@ -501,11 +505,18 @@ enum c4iw_mmid_state {
 #define MPA_KEY_REP "MPA ID Rep Frame"
 
 #define MPA_MAX_PRIVATE_DATA   256
+#define MPA_ENHANCED_RDMA_CONN 0x10
 #define MPA_REJECT             0x20
 #define MPA_CRC                        0x40
 #define MPA_MARKERS            0x80
 #define MPA_FLAGS_MASK         0xE0
 
+#define MPA_V2_PEER2PEER_MODEL          0x8000
+#define MPA_V2_ZERO_LEN_FPDU_RTR        0x4000
+#define MPA_V2_RDMA_WRITE_RTR           0x8000
+#define MPA_V2_RDMA_READ_RTR            0x4000
+#define MPA_V2_IRD_ORD_MASK             0x3FFF
+
 #define c4iw_put_ep(ep) { \
        PDBG("put_ep (via %s:%u) ep %p refcnt %d\n", __func__, __LINE__,  \
             ep, atomic_read(&((ep)->kref.refcount))); \
@@ -528,6 +539,11 @@ struct mpa_message {
        u8 private_data[0];
 };
 
+struct mpa_v2_conn_params {
+       __be16 ird;
+       __be16 ord;
+};
+
 struct terminate_message {
        u8 layer_etype;
        u8 ecode;
@@ -580,7 +596,10 @@ enum c4iw_ddp_ecodes {
 
 enum c4iw_mpa_ecodes {
        MPA_CRC_ERR             = 0x02,
-       MPA_MARKER_ERR          = 0x03
+       MPA_MARKER_ERR          = 0x03,
+       MPA_LOCAL_CATA          = 0x05,
+       MPA_INSUFF_IRD          = 0x06,
+       MPA_NOMATCH_RTR         = 0x07,
 };
 
 enum c4iw_ep_state {
@@ -651,6 +670,8 @@ struct c4iw_ep {
        u16 txq_idx;
        u16 ctrlq_idx;
        u8 tos;
+       u8 retry_with_mpa_v1;
+       u8 tried_with_mpa_v1;
 };
 
 static inline struct c4iw_ep *to_ep(struct iw_cm_id *cm_id)
index a41578e..d6ccc7e 100644 (file)
@@ -917,7 +917,11 @@ static void post_terminate(struct c4iw_qp *qhp, struct t4_cqe *err_cqe,
        wqe->u.terminate.type = FW_RI_TYPE_TERMINATE;
        wqe->u.terminate.immdlen = cpu_to_be32(sizeof *term);
        term = (struct terminate_message *)wqe->u.terminate.termmsg;
-       build_term_codes(err_cqe, &term->layer_etype, &term->ecode);
+       if (qhp->attr.layer_etype == (LAYER_MPA|DDP_LLP)) {
+               term->layer_etype = qhp->attr.layer_etype;
+               term->ecode = qhp->attr.ecode;
+       } else
+               build_term_codes(err_cqe, &term->layer_etype, &term->ecode);
        c4iw_ofld_send(&qhp->rhp->rdev, skb);
 }
 
@@ -941,8 +945,11 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp,
        flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count);
        spin_unlock(&qhp->lock);
        spin_unlock_irqrestore(&rchp->lock, flag);
-       if (flushed)
+       if (flushed) {
+               spin_lock_irqsave(&rchp->comp_handler_lock, flag);
                (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
+               spin_unlock_irqrestore(&rchp->comp_handler_lock, flag);
+       }
 
        /* locking hierarchy: cq lock first, then qp lock. */
        spin_lock_irqsave(&schp->lock, flag);
@@ -952,13 +959,17 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp,
        flushed = c4iw_flush_sq(&qhp->wq, &schp->cq, count);
        spin_unlock(&qhp->lock);
        spin_unlock_irqrestore(&schp->lock, flag);
-       if (flushed)
+       if (flushed) {
+               spin_lock_irqsave(&schp->comp_handler_lock, flag);
                (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context);
+               spin_unlock_irqrestore(&schp->comp_handler_lock, flag);
+       }
 }
 
 static void flush_qp(struct c4iw_qp *qhp)
 {
        struct c4iw_cq *rchp, *schp;
+       unsigned long flag;
 
        rchp = get_chp(qhp->rhp, qhp->attr.rcq);
        schp = get_chp(qhp->rhp, qhp->attr.scq);
@@ -966,8 +977,16 @@ static void flush_qp(struct c4iw_qp *qhp)
        if (qhp->ibqp.uobject) {
                t4_set_wq_in_error(&qhp->wq);
                t4_set_cq_in_error(&rchp->cq);
-               if (schp != rchp)
+               spin_lock_irqsave(&rchp->comp_handler_lock, flag);
+               (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
+               spin_unlock_irqrestore(&rchp->comp_handler_lock, flag);
+               if (schp != rchp) {
                        t4_set_cq_in_error(&schp->cq);
+                       spin_lock_irqsave(&schp->comp_handler_lock, flag);
+                       (*schp->ibcq.comp_handler)(&schp->ibcq,
+                                       schp->ibcq.cq_context);
+                       spin_unlock_irqrestore(&schp->comp_handler_lock, flag);
+               }
                return;
        }
        __flush_qp(qhp, rchp, schp);
@@ -1012,6 +1031,7 @@ out:
 
 static void build_rtr_msg(u8 p2p_type, struct fw_ri_init *init)
 {
+       PDBG("%s p2p_type = %d\n", __func__, p2p_type);
        memset(&init->u, 0, sizeof init->u);
        switch (p2p_type) {
        case FW_RI_INIT_P2PTYPE_RDMA_WRITE:
@@ -1206,12 +1226,16 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
                                disconnect = 1;
                                c4iw_get_ep(&qhp->ep->com);
                        }
+                       if (qhp->ibqp.uobject)
+                               t4_set_wq_in_error(&qhp->wq);
                        ret = rdma_fini(rhp, qhp, ep);
                        if (ret)
                                goto err;
                        break;
                case C4IW_QP_STATE_TERMINATE:
                        set_state(qhp, C4IW_QP_STATE_TERMINATE);
+                       qhp->attr.layer_etype = attrs->layer_etype;
+                       qhp->attr.ecode = attrs->ecode;
                        if (qhp->ibqp.uobject)
                                t4_set_wq_in_error(&qhp->wq);
                        ep = qhp->ep;
@@ -1222,6 +1246,8 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
                        break;
                case C4IW_QP_STATE_ERROR:
                        set_state(qhp, C4IW_QP_STATE_ERROR);
+                       if (qhp->ibqp.uobject)
+                               t4_set_wq_in_error(&qhp->wq);
                        if (!internal) {
                                abort = 1;
                                disconnect = 1;
@@ -1334,7 +1360,10 @@ int c4iw_destroy_qp(struct ib_qp *ib_qp)
        rhp = qhp->rhp;
 
        attrs.next_state = C4IW_QP_STATE_ERROR;
-       c4iw_modify_qp(rhp, qhp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 0);
+       if (qhp->attr.state == C4IW_QP_STATE_TERMINATE)
+               c4iw_modify_qp(rhp, qhp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
+       else
+               c4iw_modify_qp(rhp, qhp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 0);
        wait_event(qhp->wait, !qhp->ep);
 
        remove_handle(rhp, &rhp->qpidr, qhp->wq.sq.qid);
index d9b1bb4..818d721 100644 (file)
@@ -125,7 +125,7 @@ int ehca_create_eq(struct ehca_shca *shca,
                tasklet_init(&eq->interrupt_task, ehca_tasklet_eq, (long)shca);
 
                ret = ibmebus_request_irq(eq->ist, ehca_interrupt_eq,
-                                         IRQF_DISABLED, "ehca_eq",
+                                         0, "ehca_eq",
                                          (void *)shca);
                if (ret < 0)
                        ehca_err(ib_dev, "Can't map interrupt handler.");
@@ -133,7 +133,7 @@ int ehca_create_eq(struct ehca_shca *shca,
                tasklet_init(&eq->interrupt_task, ehca_tasklet_neq, (long)shca);
 
                ret = ibmebus_request_irq(eq->ist, ehca_interrupt_neq,
-                                         IRQF_DISABLED, "ehca_neq",
+                                         0, "ehca_neq",
                                          (void *)shca);
                if (ret < 0)
                        ehca_err(ib_dev, "Can't map interrupt handler.");
index 32fb342..964f855 100644 (file)
@@ -977,6 +977,9 @@ struct ib_srq *ehca_create_srq(struct ib_pd *pd,
        struct hcp_modify_qp_control_block *mqpcb;
        u64 hret, update_mask;
 
+       if (srq_init_attr->srq_type != IB_SRQT_BASIC)
+               return ERR_PTR(-ENOSYS);
+
        /* For common attributes, internal_create_qp() takes its info
         * out of qp_init_attr, so copy all common attrs there.
         */
index 7c1eebe..824a4d5 100644 (file)
@@ -34,6 +34,7 @@
 #include <linux/pci.h>
 #include <linux/netdevice.h>
 #include <linux/slab.h>
+#include <linux/stat.h>
 #include <linux/vmalloc.h>
 
 #include "ipath_kernel.h"
index 386e2c7..2627198 100644 (file)
@@ -107,6 +107,11 @@ struct ib_srq *ipath_create_srq(struct ib_pd *ibpd,
        u32 sz;
        struct ib_srq *ret;
 
+       if (srq_init_attr->srq_type != IB_SRQT_BASIC) {
+               ret = ERR_PTR(-ENOSYS);
+               goto done;
+       }
+
        if (srq_init_attr->attr.max_wr == 0) {
                ret = ERR_PTR(-EINVAL);
                goto done;
index fa643f4..77f3dbc 100644 (file)
@@ -128,6 +128,8 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
            (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_REMOTE_INV) &&
            (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_FAST_REG_WR))
                props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
+       if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC)
+               props->device_cap_flags |= IB_DEVICE_XRC;
 
        props->vendor_id           = be32_to_cpup((__be32 *) (out_mad->data + 36)) &
                0xffffff;
@@ -181,8 +183,12 @@ mlx4_ib_port_link_layer(struct ib_device *device, u8 port_num)
 
 static int ib_link_query_port(struct ib_device *ibdev, u8 port,
                              struct ib_port_attr *props,
+                             struct ib_smp *in_mad,
                              struct ib_smp *out_mad)
 {
+       int ext_active_speed;
+       int err;
+
        props->lid              = be16_to_cpup((__be16 *) (out_mad->data + 16));
        props->lmc              = out_mad->data[34] & 0x7;
        props->sm_lid           = be16_to_cpup((__be16 *) (out_mad->data + 18));
@@ -203,6 +209,39 @@ static int ib_link_query_port(struct ib_device *ibdev, u8 port,
        props->max_vl_num       = out_mad->data[37] >> 4;
        props->init_type_reply  = out_mad->data[41] >> 4;
 
+       /* Check if extended speeds (EDR/FDR/...) are supported */
+       if (props->port_cap_flags & IB_PORT_EXTENDED_SPEEDS_SUP) {
+               ext_active_speed = out_mad->data[62] >> 4;
+
+               switch (ext_active_speed) {
+               case 1:
+                       props->active_speed = 16; /* FDR */
+                       break;
+               case 2:
+                       props->active_speed = 32; /* EDR */
+                       break;
+               }
+       }
+
+       /* If reported active speed is QDR, check if is FDR-10 */
+       if (props->active_speed == 4) {
+               if (to_mdev(ibdev)->dev->caps.ext_port_cap[port] &
+                   MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO) {
+                       init_query_mad(in_mad);
+                       in_mad->attr_id = MLX4_ATTR_EXTENDED_PORT_INFO;
+                       in_mad->attr_mod = cpu_to_be32(port);
+
+                       err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port,
+                                          NULL, NULL, in_mad, out_mad);
+                       if (err)
+                               return err;
+
+                       /* Checking LinkSpeedActive for FDR-10 */
+                       if (out_mad->data[15] & 0x1)
+                               props->active_speed = 8;
+               }
+       }
+
        return 0;
 }
 
@@ -227,7 +266,7 @@ static int eth_link_query_port(struct ib_device *ibdev, u8 port,
        props->pkey_tbl_len     = 1;
        props->bad_pkey_cntr    = be16_to_cpup((__be16 *) (out_mad->data + 46));
        props->qkey_viol_cntr   = be16_to_cpup((__be16 *) (out_mad->data + 48));
-       props->max_mtu          = IB_MTU_2048;
+       props->max_mtu          = IB_MTU_4096;
        props->subnet_timeout   = 0;
        props->max_vl_num       = out_mad->data[37] >> 4;
        props->init_type_reply  = 0;
@@ -274,7 +313,7 @@ static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
                goto out;
 
        err = mlx4_ib_port_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND ?
-               ib_link_query_port(ibdev, port, props, out_mad) :
+               ib_link_query_port(ibdev, port, props, in_mad, out_mad) :
                eth_link_query_port(ibdev, port, props, out_mad);
 
 out:
@@ -566,6 +605,57 @@ static int mlx4_ib_dealloc_pd(struct ib_pd *pd)
        return 0;
 }
 
+static struct ib_xrcd *mlx4_ib_alloc_xrcd(struct ib_device *ibdev,
+                                         struct ib_ucontext *context,
+                                         struct ib_udata *udata)
+{
+       struct mlx4_ib_xrcd *xrcd;
+       int err;
+
+       if (!(to_mdev(ibdev)->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC))
+               return ERR_PTR(-ENOSYS);
+
+       xrcd = kmalloc(sizeof *xrcd, GFP_KERNEL);
+       if (!xrcd)
+               return ERR_PTR(-ENOMEM);
+
+       err = mlx4_xrcd_alloc(to_mdev(ibdev)->dev, &xrcd->xrcdn);
+       if (err)
+               goto err1;
+
+       xrcd->pd = ib_alloc_pd(ibdev);
+       if (IS_ERR(xrcd->pd)) {
+               err = PTR_ERR(xrcd->pd);
+               goto err2;
+       }
+
+       xrcd->cq = ib_create_cq(ibdev, NULL, NULL, xrcd, 1, 0);
+       if (IS_ERR(xrcd->cq)) {
+               err = PTR_ERR(xrcd->cq);
+               goto err3;
+       }
+
+       return &xrcd->ibxrcd;
+
+err3:
+       ib_dealloc_pd(xrcd->pd);
+err2:
+       mlx4_xrcd_free(to_mdev(ibdev)->dev, xrcd->xrcdn);
+err1:
+       kfree(xrcd);
+       return ERR_PTR(err);
+}
+
+static int mlx4_ib_dealloc_xrcd(struct ib_xrcd *xrcd)
+{
+       ib_destroy_cq(to_mxrcd(xrcd)->cq);
+       ib_dealloc_pd(to_mxrcd(xrcd)->pd);
+       mlx4_xrcd_free(to_mdev(xrcd->device)->dev, to_mxrcd(xrcd)->xrcdn);
+       kfree(xrcd);
+
+       return 0;
+}
+
 static int add_gid_entry(struct ib_qp *ibqp, union ib_gid *gid)
 {
        struct mlx4_ib_qp *mqp = to_mqp(ibqp);
@@ -1044,7 +1134,9 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
                (1ull << IB_USER_VERBS_CMD_CREATE_SRQ)          |
                (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ)          |
                (1ull << IB_USER_VERBS_CMD_QUERY_SRQ)           |
-               (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ);
+               (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ)         |
+               (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ)         |
+               (1ull << IB_USER_VERBS_CMD_OPEN_QP);
 
        ibdev->ib_dev.query_device      = mlx4_ib_query_device;
        ibdev->ib_dev.query_port        = mlx4_ib_query_port;
@@ -1093,6 +1185,14 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
        ibdev->ib_dev.unmap_fmr         = mlx4_ib_unmap_fmr;
        ibdev->ib_dev.dealloc_fmr       = mlx4_ib_fmr_dealloc;
 
+       if (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) {
+               ibdev->ib_dev.alloc_xrcd = mlx4_ib_alloc_xrcd;
+               ibdev->ib_dev.dealloc_xrcd = mlx4_ib_dealloc_xrcd;
+               ibdev->ib_dev.uverbs_cmd_mask |=
+                       (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) |
+                       (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
+       }
+
        spin_lock_init(&iboe->lock);
 
        if (init_node_data(ibdev))
index e4bf2cf..ed80345 100644 (file)
@@ -56,6 +56,13 @@ struct mlx4_ib_pd {
        u32                     pdn;
 };
 
+struct mlx4_ib_xrcd {
+       struct ib_xrcd          ibxrcd;
+       u32                     xrcdn;
+       struct ib_pd           *pd;
+       struct ib_cq           *cq;
+};
+
 struct mlx4_ib_cq_buf {
        struct mlx4_buf         buf;
        struct mlx4_mtt         mtt;
@@ -138,6 +145,7 @@ struct mlx4_ib_qp {
        struct mlx4_mtt         mtt;
        int                     buf_size;
        struct mutex            mutex;
+       u16                     xrcdn;
        u32                     flags;
        u8                      port;
        u8                      alt_port;
@@ -211,6 +219,11 @@ static inline struct mlx4_ib_pd *to_mpd(struct ib_pd *ibpd)
        return container_of(ibpd, struct mlx4_ib_pd, ibpd);
 }
 
+static inline struct mlx4_ib_xrcd *to_mxrcd(struct ib_xrcd *ibxrcd)
+{
+       return container_of(ibxrcd, struct mlx4_ib_xrcd, ibxrcd);
+}
+
 static inline struct mlx4_ib_cq *to_mcq(struct ib_cq *ibcq)
 {
        return container_of(ibcq, struct mlx4_ib_cq, ibcq);
index 3a91d9d..a16f0c8 100644 (file)
@@ -302,15 +302,14 @@ static int send_wqe_overhead(enum ib_qp_type type, u32 flags)
 }
 
 static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
-                      int is_user, int has_srq, struct mlx4_ib_qp *qp)
+                      int is_user, int has_rq, struct mlx4_ib_qp *qp)
 {
        /* Sanity check RQ size before proceeding */
        if (cap->max_recv_wr  > dev->dev->caps.max_wqes  ||
            cap->max_recv_sge > dev->dev->caps.max_rq_sg)
                return -EINVAL;
 
-       if (has_srq) {
-               /* QPs attached to an SRQ should have no RQ */
+       if (!has_rq) {
                if (cap->max_recv_wr)
                        return -EINVAL;
 
@@ -463,6 +462,14 @@ static int set_user_sq_size(struct mlx4_ib_dev *dev,
        return 0;
 }
 
+static int qp_has_rq(struct ib_qp_init_attr *attr)
+{
+       if (attr->qp_type == IB_QPT_XRC_INI || attr->qp_type == IB_QPT_XRC_TGT)
+               return 0;
+
+       return !attr->srq;
+}
+
 static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
                            struct ib_qp_init_attr *init_attr,
                            struct ib_udata *udata, int sqpn, struct mlx4_ib_qp *qp)
@@ -479,7 +486,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
        if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
                qp->sq_signal_bits = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
 
-       err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, !!init_attr->srq, qp);
+       err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, qp_has_rq(init_attr), qp);
        if (err)
                goto err;
 
@@ -513,7 +520,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
                if (err)
                        goto err_mtt;
 
-               if (!init_attr->srq) {
+               if (qp_has_rq(init_attr)) {
                        err = mlx4_ib_db_map_user(to_mucontext(pd->uobject->context),
                                                  ucmd.db_addr, &qp->db);
                        if (err)
@@ -532,7 +539,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
                if (err)
                        goto err;
 
-               if (!init_attr->srq) {
+               if (qp_has_rq(init_attr)) {
                        err = mlx4_db_alloc(dev->dev, &qp->db, 0);
                        if (err)
                                goto err;
@@ -575,6 +582,9 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
        if (err)
                goto err_qpn;
 
+       if (init_attr->qp_type == IB_QPT_XRC_TGT)
+               qp->mqp.qpn |= (1 << 23);
+
        /*
         * Hardware wants QPN written in big-endian order (after
         * shifting) for send doorbell.  Precompute this value to save
@@ -592,9 +602,8 @@ err_qpn:
 
 err_wrid:
        if (pd->uobject) {
-               if (!init_attr->srq)
-                       mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context),
-                                             &qp->db);
+               if (qp_has_rq(init_attr))
+                       mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &qp->db);
        } else {
                kfree(qp->sq.wrid);
                kfree(qp->rq.wrid);
@@ -610,7 +619,7 @@ err_buf:
                mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf);
 
 err_db:
-       if (!pd->uobject && !init_attr->srq)
+       if (!pd->uobject && qp_has_rq(init_attr))
                mlx4_db_free(dev->dev, &qp->db);
 
 err:
@@ -671,6 +680,33 @@ static void del_gid_entries(struct mlx4_ib_qp *qp)
        }
 }
 
+static struct mlx4_ib_pd *get_pd(struct mlx4_ib_qp *qp)
+{
+       if (qp->ibqp.qp_type == IB_QPT_XRC_TGT)
+               return to_mpd(to_mxrcd(qp->ibqp.xrcd)->pd);
+       else
+               return to_mpd(qp->ibqp.pd);
+}
+
+static void get_cqs(struct mlx4_ib_qp *qp,
+                   struct mlx4_ib_cq **send_cq, struct mlx4_ib_cq **recv_cq)
+{
+       switch (qp->ibqp.qp_type) {
+       case IB_QPT_XRC_TGT:
+               *send_cq = to_mcq(to_mxrcd(qp->ibqp.xrcd)->cq);
+               *recv_cq = *send_cq;
+               break;
+       case IB_QPT_XRC_INI:
+               *send_cq = to_mcq(qp->ibqp.send_cq);
+               *recv_cq = *send_cq;
+               break;
+       default:
+               *send_cq = to_mcq(qp->ibqp.send_cq);
+               *recv_cq = to_mcq(qp->ibqp.recv_cq);
+               break;
+       }
+}
+
 static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
                              int is_user)
 {
@@ -682,8 +718,7 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
                        printk(KERN_WARNING "mlx4_ib: modify QP %06x to RESET failed.\n",
                               qp->mqp.qpn);
 
-       send_cq = to_mcq(qp->ibqp.send_cq);
-       recv_cq = to_mcq(qp->ibqp.recv_cq);
+       get_cqs(qp, &send_cq, &recv_cq);
 
        mlx4_ib_lock_cqs(send_cq, recv_cq);
 
@@ -706,7 +741,7 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
        mlx4_mtt_cleanup(dev->dev, &qp->mtt);
 
        if (is_user) {
-               if (!qp->ibqp.srq)
+               if (qp->rq.wqe_cnt)
                        mlx4_ib_db_unmap_user(to_mucontext(qp->ibqp.uobject->context),
                                              &qp->db);
                ib_umem_release(qp->umem);
@@ -714,7 +749,7 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
                kfree(qp->sq.wrid);
                kfree(qp->rq.wrid);
                mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf);
-               if (!qp->ibqp.srq)
+               if (qp->rq.wqe_cnt)
                        mlx4_db_free(dev->dev, &qp->db);
        }
 
@@ -725,10 +760,10 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
                                struct ib_qp_init_attr *init_attr,
                                struct ib_udata *udata)
 {
-       struct mlx4_ib_dev *dev = to_mdev(pd->device);
        struct mlx4_ib_sqp *sqp;
        struct mlx4_ib_qp *qp;
        int err;
+       u16 xrcdn = 0;
 
        /*
         * We only support LSO and multicast loopback blocking, and
@@ -739,10 +774,20 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
                return ERR_PTR(-EINVAL);
 
        if (init_attr->create_flags &&
-           (pd->uobject || init_attr->qp_type != IB_QPT_UD))
+           (udata || init_attr->qp_type != IB_QPT_UD))
                return ERR_PTR(-EINVAL);
 
        switch (init_attr->qp_type) {
+       case IB_QPT_XRC_TGT:
+               pd = to_mxrcd(init_attr->xrcd)->pd;
+               xrcdn = to_mxrcd(init_attr->xrcd)->xrcdn;
+               init_attr->send_cq = to_mxrcd(init_attr->xrcd)->cq;
+               /* fall through */
+       case IB_QPT_XRC_INI:
+               if (!(to_mdev(pd->device)->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC))
+                       return ERR_PTR(-ENOSYS);
+               init_attr->recv_cq = init_attr->send_cq;
+               /* fall through */
        case IB_QPT_RC:
        case IB_QPT_UC:
        case IB_QPT_UD:
@@ -751,13 +796,14 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
                if (!qp)
                        return ERR_PTR(-ENOMEM);
 
-               err = create_qp_common(dev, pd, init_attr, udata, 0, qp);
+               err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata, 0, qp);
                if (err) {
                        kfree(qp);
                        return ERR_PTR(err);
                }
 
                qp->ibqp.qp_num = qp->mqp.qpn;
+               qp->xrcdn = xrcdn;
 
                break;
        }
@@ -765,7 +811,7 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
        case IB_QPT_GSI:
        {
                /* Userspace is not allowed to create special QPs: */
-               if (pd->uobject)
+               if (udata)
                        return ERR_PTR(-EINVAL);
 
                sqp = kzalloc(sizeof *sqp, GFP_KERNEL);
@@ -774,8 +820,8 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
 
                qp = &sqp->qp;
 
-               err = create_qp_common(dev, pd, init_attr, udata,
-                                      dev->dev->caps.sqp_start +
+               err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata,
+                                      to_mdev(pd->device)->dev->caps.sqp_start +
                                       (init_attr->qp_type == IB_QPT_SMI ? 0 : 2) +
                                       init_attr->port_num - 1,
                                       qp);
@@ -801,11 +847,13 @@ int mlx4_ib_destroy_qp(struct ib_qp *qp)
 {
        struct mlx4_ib_dev *dev = to_mdev(qp->device);
        struct mlx4_ib_qp *mqp = to_mqp(qp);
+       struct mlx4_ib_pd *pd;
 
        if (is_qp0(dev, mqp))
                mlx4_CLOSE_PORT(dev->dev, mqp->port);
 
-       destroy_qp_common(dev, mqp, !!qp->pd->uobject);
+       pd = get_pd(mqp);
+       destroy_qp_common(dev, mqp, !!pd->ibpd.uobject);
 
        if (is_sqp(dev, mqp))
                kfree(to_msqp(mqp));
@@ -821,6 +869,8 @@ static int to_mlx4_st(enum ib_qp_type type)
        case IB_QPT_RC:         return MLX4_QP_ST_RC;
        case IB_QPT_UC:         return MLX4_QP_ST_UC;
        case IB_QPT_UD:         return MLX4_QP_ST_UD;
+       case IB_QPT_XRC_INI:
+       case IB_QPT_XRC_TGT:    return MLX4_QP_ST_XRC;
        case IB_QPT_SMI:
        case IB_QPT_GSI:        return MLX4_QP_ST_MLX;
        default:                return -1;
@@ -959,6 +1009,8 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
 {
        struct mlx4_ib_dev *dev = to_mdev(ibqp->device);
        struct mlx4_ib_qp *qp = to_mqp(ibqp);
+       struct mlx4_ib_pd *pd;
+       struct mlx4_ib_cq *send_cq, *recv_cq;
        struct mlx4_qp_context *context;
        enum mlx4_qp_optpar optpar = 0;
        int sqd_event;
@@ -1014,8 +1066,10 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
                context->sq_size_stride = ilog2(qp->sq.wqe_cnt) << 3;
        context->sq_size_stride |= qp->sq.wqe_shift - 4;
 
-       if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
+       if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
                context->sq_size_stride |= !!qp->sq_no_prefetch << 7;
+               context->xrcd = cpu_to_be32((u32) qp->xrcdn);
+       }
 
        if (qp->ibqp.uobject)
                context->usr_page = cpu_to_be32(to_mucontext(ibqp->uobject->context)->uar.index);
@@ -1079,8 +1133,12 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
                optpar |= MLX4_QP_OPTPAR_ALT_ADDR_PATH;
        }
 
-       context->pd         = cpu_to_be32(to_mpd(ibqp->pd)->pdn);
-       context->params1    = cpu_to_be32(MLX4_IB_ACK_REQ_FREQ << 28);
+       pd = get_pd(qp);
+       get_cqs(qp, &send_cq, &recv_cq);
+       context->pd       = cpu_to_be32(pd->pdn);
+       context->cqn_send = cpu_to_be32(send_cq->mcq.cqn);
+       context->cqn_recv = cpu_to_be32(recv_cq->mcq.cqn);
+       context->params1  = cpu_to_be32(MLX4_IB_ACK_REQ_FREQ << 28);
 
        /* Set "fast registration enabled" for all kernel QPs */
        if (!qp->ibqp.uobject)
@@ -1106,8 +1164,6 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
        if (attr_mask & IB_QP_SQ_PSN)
                context->next_send_psn = cpu_to_be32(attr->sq_psn);
 
-       context->cqn_send = cpu_to_be32(to_mcq(ibqp->send_cq)->mcq.cqn);
-
        if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
                if (attr->max_dest_rd_atomic)
                        context->params2 |=
@@ -1130,8 +1186,6 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
        if (attr_mask & IB_QP_RQ_PSN)
                context->rnr_nextrecvpsn |= cpu_to_be32(attr->rq_psn);
 
-       context->cqn_recv = cpu_to_be32(to_mcq(ibqp->recv_cq)->mcq.cqn);
-
        if (attr_mask & IB_QP_QKEY) {
                context->qkey = cpu_to_be32(attr->qkey);
                optpar |= MLX4_QP_OPTPAR_Q_KEY;
@@ -1140,7 +1194,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
        if (ibqp->srq)
                context->srqn = cpu_to_be32(1 << 24 | to_msrq(ibqp->srq)->msrq.srqn);
 
-       if (!ibqp->srq && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
+       if (qp->rq.wqe_cnt && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
                context->db_rec_addr = cpu_to_be64(qp->db.dma);
 
        if (cur_state == IB_QPS_INIT &&
@@ -1225,17 +1279,17 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
         * entries and reinitialize the QP.
         */
        if (new_state == IB_QPS_RESET && !ibqp->uobject) {
-               mlx4_ib_cq_clean(to_mcq(ibqp->recv_cq), qp->mqp.qpn,
+               mlx4_ib_cq_clean(recv_cq, qp->mqp.qpn,
                                 ibqp->srq ? to_msrq(ibqp->srq): NULL);
-               if (ibqp->send_cq != ibqp->recv_cq)
-                       mlx4_ib_cq_clean(to_mcq(ibqp->send_cq), qp->mqp.qpn, NULL);
+               if (send_cq != recv_cq)
+                       mlx4_ib_cq_clean(send_cq, qp->mqp.qpn, NULL);
 
                qp->rq.head = 0;
                qp->rq.tail = 0;
                qp->sq.head = 0;
                qp->sq.tail = 0;
                qp->sq_next_wqe = 0;
-               if (!ibqp->srq)
+               if (qp->rq.wqe_cnt)
                        *qp->db.db  = 0;
        }
 
@@ -1547,14 +1601,13 @@ static void set_masked_atomic_seg(struct mlx4_wqe_masked_atomic_seg *aseg,
 }
 
 static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg,
-                            struct ib_send_wr *wr, __be16 *vlan)
+                            struct ib_send_wr *wr)
 {
        memcpy(dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof (struct mlx4_av));
        dseg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn);
        dseg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey);
        dseg->vlan = to_mah(wr->wr.ud.ah)->av.eth.vlan;
        memcpy(dseg->mac, to_mah(wr->wr.ud.ah)->av.eth.mac, 6);
-       *vlan = dseg->vlan;
 }
 
 static void set_mlx_icrc_seg(void *dseg)
@@ -1657,7 +1710,6 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
        __be32 uninitialized_var(lso_hdr_sz);
        __be32 blh;
        int i;
-       __be16 vlan = cpu_to_be16(0xffff);
 
        spin_lock_irqsave(&qp->sq.lock, flags);
 
@@ -1761,7 +1813,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                        break;
 
                case IB_QPT_UD:
-                       set_datagram_seg(wqe, wr, &vlan);
+                       set_datagram_seg(wqe, wr);
                        wqe  += sizeof (struct mlx4_wqe_datagram_seg);
                        size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
 
@@ -1824,11 +1876,6 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                ctrl->fence_size = (wr->send_flags & IB_SEND_FENCE ?
                                    MLX4_WQE_CTRL_FENCE : 0) | size;
 
-               if (be16_to_cpu(vlan) < 0x1000) {
-                       ctrl->ins_vlan = 1 << 6;
-                       ctrl->vlan_tag = vlan;
-               }
-
                /*
                 * Make sure descriptor is fully written before
                 * setting ownership bit (because HW can start
index 818b7ec..39542f3 100644 (file)
@@ -76,6 +76,8 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
        struct mlx4_ib_srq *srq;
        struct mlx4_wqe_srq_next_seg *next;
        struct mlx4_wqe_data_seg *scatter;
+       u32 cqn;
+       u16 xrcdn;
        int desc_size;
        int buf_size;
        int err;
@@ -174,12 +176,18 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
                }
        }
 
-       err = mlx4_srq_alloc(dev->dev, to_mpd(pd)->pdn, &srq->mtt,
+       cqn = (init_attr->srq_type == IB_SRQT_XRC) ?
+               to_mcq(init_attr->ext.xrc.cq)->mcq.cqn : 0;
+       xrcdn = (init_attr->srq_type == IB_SRQT_XRC) ?
+               to_mxrcd(init_attr->ext.xrc.xrcd)->xrcdn :
+               (u16) dev->dev->caps.reserved_xrcds;
+       err = mlx4_srq_alloc(dev->dev, to_mpd(pd)->pdn, cqn, xrcdn, &srq->mtt,
                             srq->db.dma, &srq->msrq);
        if (err)
                goto err_wrid;
 
        srq->msrq.event = mlx4_ib_srq_event;
+       srq->ibsrq.ext.xrc.srq_num = srq->msrq.srqn;
 
        if (pd->uobject)
                if (ib_copy_to_udata(udata, &srq->msrq.srqn, sizeof (__u32))) {
index 365fe0e..cb9a0b9 100644 (file)
@@ -438,6 +438,9 @@ static struct ib_srq *mthca_create_srq(struct ib_pd *pd,
        struct mthca_srq *srq;
        int err;
 
+       if (init_attr->srq_type != IB_SRQT_BASIC)
+               return ERR_PTR(-ENOSYS);
+
        srq = kmalloc(sizeof *srq, GFP_KERNEL);
        if (!srq)
                return ERR_PTR(-ENOMEM);
index 401b7bb..dfce9ea 100644 (file)
@@ -77,26 +77,19 @@ atomic_t cm_nodes_destroyed;
 atomic_t cm_accel_dropped_pkts;
 atomic_t cm_resets_recvd;
 
-static inline int mini_cm_accelerated(struct nes_cm_core *,
-       struct nes_cm_node *);
-static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *,
-       struct nes_vnic *, struct nes_cm_info *);
+static inline int mini_cm_accelerated(struct nes_cm_core *, struct nes_cm_node *);
+static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *, struct nes_vnic *, struct nes_cm_info *);
 static int mini_cm_del_listen(struct nes_cm_core *, struct nes_cm_listener *);
-static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *,
-       struct nes_vnic *, u16, void *, struct nes_cm_info *);
+static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *, struct nes_vnic *, u16, void *, struct nes_cm_info *);
 static int mini_cm_close(struct nes_cm_core *, struct nes_cm_node *);
-static int mini_cm_accept(struct nes_cm_core *, struct ietf_mpa_frame *,
-       struct nes_cm_node *);
-static int mini_cm_reject(struct nes_cm_core *, struct ietf_mpa_frame *,
-       struct nes_cm_node *);
-static int mini_cm_recv_pkt(struct nes_cm_core *, struct nes_vnic *,
-       struct sk_buff *);
+static int mini_cm_accept(struct nes_cm_core *, struct nes_cm_node *);
+static int mini_cm_reject(struct nes_cm_core *, struct nes_cm_node *);
+static int mini_cm_recv_pkt(struct nes_cm_core *, struct nes_vnic *, struct sk_buff *);
 static int mini_cm_dealloc_core(struct nes_cm_core *);
 static int mini_cm_get(struct nes_cm_core *);
 static int mini_cm_set(struct nes_cm_core *, u32, u32);
 
-static void form_cm_frame(struct sk_buff *, struct nes_cm_node *,
-       void *, u32, void *, u32, u8);
+static void form_cm_frame(struct sk_buff *, struct nes_cm_node *, void *, u32, void *, u32, u8);
 static int add_ref_cm_node(struct nes_cm_node *);
 static int rem_ref_cm_node(struct nes_cm_core *, struct nes_cm_node *);
 
@@ -111,16 +104,14 @@ static int send_syn(struct nes_cm_node *, u32, struct sk_buff *);
 static int send_reset(struct nes_cm_node *, struct sk_buff *);
 static int send_ack(struct nes_cm_node *cm_node, struct sk_buff *skb);
 static int send_fin(struct nes_cm_node *cm_node, struct sk_buff *skb);
-static void process_packet(struct nes_cm_node *, struct sk_buff *,
-       struct nes_cm_core *);
+static void process_packet(struct nes_cm_node *, struct sk_buff *, struct nes_cm_core *);
 
 static void active_open_err(struct nes_cm_node *, struct sk_buff *, int);
 static void passive_open_err(struct nes_cm_node *, struct sk_buff *, int);
 static void cleanup_retrans_entry(struct nes_cm_node *);
 static void handle_rcv_mpa(struct nes_cm_node *, struct sk_buff *);
 static void free_retrans_entry(struct nes_cm_node *cm_node);
-static int handle_tcp_options(struct nes_cm_node *cm_node, struct tcphdr *tcph,
-       struct sk_buff *skb, int optionsize, int passive);
+static int handle_tcp_options(struct nes_cm_node *cm_node, struct tcphdr *tcph, struct sk_buff *skb, int optionsize, int passive);
 
 /* CM event handler functions */
 static void cm_event_connected(struct nes_cm_event *);
@@ -130,6 +121,12 @@ static void cm_event_mpa_req(struct nes_cm_event *);
 static void cm_event_mpa_reject(struct nes_cm_event *);
 static void handle_recv_entry(struct nes_cm_node *cm_node, u32 rem_node);
 
+/* MPA build functions */
+static int cm_build_mpa_frame(struct nes_cm_node *, u8 **, u16 *, u8 *, u8);
+static void build_mpa_v2(struct nes_cm_node *, void *, u8);
+static void build_mpa_v1(struct nes_cm_node *, void *, u8);
+static void build_rdma0_msg(struct nes_cm_node *, struct nes_qp **);
+
 static void print_core(struct nes_cm_core *core);
 
 /* External CM API Interface */
@@ -172,8 +169,8 @@ int nes_rem_ref_cm_node(struct nes_cm_node *cm_node)
 /**
  * create_event
  */
-static struct nes_cm_event *create_event(struct nes_cm_node *cm_node,
-               enum nes_cm_event_type type)
+static struct nes_cm_event *create_event(struct nes_cm_node *  cm_node,
+                                        enum nes_cm_event_type type)
 {
        struct nes_cm_event *event;
 
@@ -195,10 +192,10 @@ static struct nes_cm_event *create_event(struct nes_cm_node *cm_node,
        event->cm_info.cm_id = cm_node->cm_id;
 
        nes_debug(NES_DBG_CM, "cm_node=%p Created event=%p, type=%u, "
-               "dst_addr=%08x[%x], src_addr=%08x[%x]\n",
-               cm_node, event, type, event->cm_info.loc_addr,
-               event->cm_info.loc_port, event->cm_info.rem_addr,
-               event->cm_info.rem_port);
+                 "dst_addr=%08x[%x], src_addr=%08x[%x]\n",
+                 cm_node, event, type, event->cm_info.loc_addr,
+                 event->cm_info.loc_port, event->cm_info.rem_addr,
+                 event->cm_info.rem_port);
 
        nes_cm_post_event(event);
        return event;
@@ -210,14 +207,19 @@ static struct nes_cm_event *create_event(struct nes_cm_node *cm_node,
  */
 static int send_mpa_request(struct nes_cm_node *cm_node, struct sk_buff *skb)
 {
+       u8 start_addr = 0;
+       u8 *start_ptr = &start_addr;
+       u8 **start_buff = &start_ptr;
+       u16 buff_len = 0;
+
        if (!skb) {
                nes_debug(NES_DBG_CM, "skb set to NULL\n");
                return -1;
        }
 
        /* send an MPA Request frame */
-       form_cm_frame(skb, cm_node, NULL, 0, &cm_node->mpa_frame,
-                       cm_node->mpa_frame_size, SET_ACK);
+       cm_build_mpa_frame(cm_node, start_buff, &buff_len, NULL, MPA_KEY_REQUEST);
+       form_cm_frame(skb, cm_node, NULL, 0, *start_buff, buff_len, SET_ACK);
 
        return schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 1, 0);
 }
@@ -226,7 +228,11 @@ static int send_mpa_request(struct nes_cm_node *cm_node, struct sk_buff *skb)
 
 static int send_mpa_reject(struct nes_cm_node *cm_node)
 {
-       struct sk_buff  *skb = NULL;
+       struct sk_buff *skb = NULL;
+       u8 start_addr = 0;
+       u8 *start_ptr = &start_addr;
+       u8 **start_buff = &start_ptr;
+       u16 buff_len = 0;
 
        skb = dev_alloc_skb(MAX_CM_BUFFER);
        if (!skb) {
@@ -235,8 +241,8 @@ static int send_mpa_reject(struct nes_cm_node *cm_node)
        }
 
        /* send an MPA reject frame */
-       form_cm_frame(skb, cm_node, NULL, 0, &cm_node->mpa_frame,
-                       cm_node->mpa_frame_size, SET_ACK | SET_FIN);
+       cm_build_mpa_frame(cm_node, start_buff, &buff_len, NULL, MPA_KEY_REPLY);
+       form_cm_frame(skb, cm_node, NULL, 0, *start_buff, buff_len, SET_ACK | SET_FIN);
 
        cm_node->state = NES_CM_STATE_FIN_WAIT1;
        return schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 1, 0);
@@ -248,24 +254,31 @@ static int send_mpa_reject(struct nes_cm_node *cm_node)
  * IETF MPA frame
  */
 static int parse_mpa(struct nes_cm_node *cm_node, u8 *buffer, u32 *type,
-               u32 len)
+                    u32 len)
 {
-       struct ietf_mpa_frame *mpa_frame;
+       struct ietf_mpa_v1 *mpa_frame;
+       struct ietf_mpa_v2 *mpa_v2_frame;
+       struct ietf_rtr_msg *rtr_msg;
+       int mpa_hdr_len;
+       int priv_data_len;
 
        *type = NES_MPA_REQUEST_ACCEPT;
 
        /* assume req frame is in tcp data payload */
-       if (len < sizeof(struct ietf_mpa_frame)) {
+       if (len < sizeof(struct ietf_mpa_v1)) {
                nes_debug(NES_DBG_CM, "The received ietf buffer was too small (%x)\n", len);
                return -EINVAL;
        }
 
-       mpa_frame = (struct ietf_mpa_frame *)buffer;
-       cm_node->mpa_frame_size = ntohs(mpa_frame->priv_data_len);
+       /* points to the beginning of the frame, which could be MPA V1 or V2 */
+       mpa_frame = (struct ietf_mpa_v1 *)buffer;
+       mpa_hdr_len = sizeof(struct ietf_mpa_v1);
+       priv_data_len = ntohs(mpa_frame->priv_data_len);
+
        /* make sure mpa private data len is less than 512 bytes */
-       if (cm_node->mpa_frame_size > IETF_MAX_PRIV_DATA_LEN) {
+       if (priv_data_len > IETF_MAX_PRIV_DATA_LEN) {
                nes_debug(NES_DBG_CM, "The received Length of Private"
-                       " Data field exceeds 512 octets\n");
+                         " Data field exceeds 512 octets\n");
                return -EINVAL;
        }
        /*
@@ -273,11 +286,22 @@ static int parse_mpa(struct nes_cm_node *cm_node, u8 *buffer, u32 *type,
         * received MPA version and MPA key information
         *
         */
-       if (mpa_frame->rev != mpa_version) {
+       if (mpa_frame->rev != IETF_MPA_V1 && mpa_frame->rev != IETF_MPA_V2) {
+               nes_debug(NES_DBG_CM, "The received mpa version"
+                         " is not supported\n");
+               return -EINVAL;
+       }
+       /*
+       * backwards compatibility only
+       */
+       if (mpa_frame->rev > cm_node->mpa_frame_rev) {
                nes_debug(NES_DBG_CM, "The received mpa version"
-                               " can not be interoperated\n");
+                       " can not be interoperated\n");
                return -EINVAL;
+       } else {
+               cm_node->mpa_frame_rev = mpa_frame->rev;
        }
+
        if (cm_node->state != NES_CM_STATE_MPAREQ_SENT) {
                if (memcmp(mpa_frame->key, IEFT_MPA_KEY_REQ, IETF_MPA_KEY_SIZE)) {
                        nes_debug(NES_DBG_CM, "Unexpected MPA Key received \n");
@@ -290,25 +314,75 @@ static int parse_mpa(struct nes_cm_node *cm_node, u8 *buffer, u32 *type,
                }
        }
 
-       if (cm_node->mpa_frame_size + sizeof(struct ietf_mpa_frame) != len) {
+
+       if (priv_data_len + mpa_hdr_len != len) {
                nes_debug(NES_DBG_CM, "The received ietf buffer was not right"
-                               " complete (%x + %x != %x)\n",
-                               cm_node->mpa_frame_size,
-                               (u32)sizeof(struct ietf_mpa_frame), len);
+                       " complete (%x + %x != %x)\n",
+                       priv_data_len, mpa_hdr_len, len);
                return -EINVAL;
        }
        /* make sure it does not exceed the max size */
        if (len > MAX_CM_BUFFER) {
                nes_debug(NES_DBG_CM, "The received ietf buffer was too large"
-                               " (%x + %x != %x)\n",
-                               cm_node->mpa_frame_size,
-                               (u32)sizeof(struct ietf_mpa_frame), len);
+                       " (%x + %x != %x)\n",
+                       priv_data_len, mpa_hdr_len, len);
                return -EINVAL;
        }
 
+       cm_node->mpa_frame_size = priv_data_len;
+
+       switch (mpa_frame->rev) {
+       case IETF_MPA_V2: {
+               u16 ird_size;
+               u16 ord_size;
+               mpa_v2_frame = (struct ietf_mpa_v2 *)buffer;
+               mpa_hdr_len += IETF_RTR_MSG_SIZE;
+               cm_node->mpa_frame_size -= IETF_RTR_MSG_SIZE;
+               rtr_msg = &mpa_v2_frame->rtr_msg;
+
+               /* parse rtr message */
+               rtr_msg->ctrl_ird = ntohs(rtr_msg->ctrl_ird);
+               rtr_msg->ctrl_ord = ntohs(rtr_msg->ctrl_ord);
+               ird_size = rtr_msg->ctrl_ird & IETF_NO_IRD_ORD;
+               ord_size = rtr_msg->ctrl_ord & IETF_NO_IRD_ORD;
+
+               if (!(rtr_msg->ctrl_ird & IETF_PEER_TO_PEER)) {
+                       /* send reset */
+                       return -EINVAL;
+               }
+
+               if (cm_node->state != NES_CM_STATE_MPAREQ_SENT) {
+                       /* responder */
+                       if (cm_node->ord_size > ird_size)
+                               cm_node->ord_size = ird_size;
+               } else {
+                       /* initiator */
+                       if (cm_node->ord_size > ird_size)
+                               cm_node->ord_size = ird_size;
+
+                       if (cm_node->ird_size < ord_size) {
+                               /* no resources available */
+                               /* send terminate message */
+                               return -EINVAL;
+                       }
+               }
+
+               if (rtr_msg->ctrl_ord & IETF_RDMA0_READ) {
+                       cm_node->send_rdma0_op = SEND_RDMA_READ_ZERO;
+               } else if (rtr_msg->ctrl_ord & IETF_RDMA0_WRITE) {
+                       cm_node->send_rdma0_op = SEND_RDMA_WRITE_ZERO;
+               } else {        /* Not supported RDMA0 operation */
+                       return -EINVAL;
+               }
+               break;
+       }
+       case IETF_MPA_V1:
+       default:
+               break;
+       }
+
        /* copy entire MPA frame to our cm_node's frame */
-       memcpy(cm_node->mpa_frame_buf, buffer + sizeof(struct ietf_mpa_frame),
-                       cm_node->mpa_frame_size);
+       memcpy(cm_node->mpa_frame_buf, buffer + mpa_hdr_len, cm_node->mpa_frame_size);
 
        if (mpa_frame->flags & IETF_MPA_FLAGS_REJECT)
                *type = NES_MPA_REQUEST_REJECT;
@@ -321,8 +395,8 @@ static int parse_mpa(struct nes_cm_node *cm_node, u8 *buffer, u32 *type,
  * node info to build.
  */
 static void form_cm_frame(struct sk_buff *skb,
-       struct nes_cm_node *cm_node, void *options, u32 optionsize,
-       void *data, u32 datasize, u8 flags)
+                         struct nes_cm_node *cm_node, void *options, u32 optionsize,
+                         void *data, u32 datasize, u8 flags)
 {
        struct tcphdr *tcph;
        struct iphdr *iph;
@@ -331,14 +405,14 @@ static void form_cm_frame(struct sk_buff *skb,
        u16 packetsize = sizeof(*iph);
 
        packetsize += sizeof(*tcph);
-       packetsize +=  optionsize + datasize;
+       packetsize += optionsize + datasize;
 
+       skb_trim(skb, 0);
        memset(skb->data, 0x00, ETH_HLEN + sizeof(*iph) + sizeof(*tcph));
 
-       skb->len = 0;
        buf = skb_put(skb, packetsize + ETH_HLEN);
 
-       ethh = (struct ethhdr *) buf;
+       ethh = (struct ethhdr *)buf;
        buf += ETH_HLEN;
 
        iph = (struct iphdr *)buf;
@@ -346,7 +420,7 @@ static void form_cm_frame(struct sk_buff *skb,
        tcph = (struct tcphdr *)buf;
        skb_reset_mac_header(skb);
        skb_set_network_header(skb, ETH_HLEN);
-       skb_set_transport_header(skb, ETH_HLEN+sizeof(*iph));
+       skb_set_transport_header(skb, ETH_HLEN + sizeof(*iph));
        buf += sizeof(*tcph);
 
        skb->ip_summed = CHECKSUM_PARTIAL;
@@ -359,14 +433,14 @@ static void form_cm_frame(struct sk_buff *skb,
        ethh->h_proto = htons(0x0800);
 
        iph->version = IPVERSION;
-       iph->ihl = 5;           /* 5 * 4Byte words, IP headr len */
+       iph->ihl = 5;           /* 5 * 4Byte words, IP headr len */
        iph->tos = 0;
        iph->tot_len = htons(packetsize);
        iph->id = htons(++cm_node->tcp_cntxt.loc_id);
 
        iph->frag_off = htons(0x4000);
        iph->ttl = 0x40;
-       iph->protocol = 0x06;   /* IPPROTO_TCP */
+       iph->protocol = 0x06;   /* IPPROTO_TCP */
 
        iph->saddr = htonl(cm_node->loc_addr);
        iph->daddr = htonl(cm_node->rem_addr);
@@ -379,14 +453,16 @@ static void form_cm_frame(struct sk_buff *skb,
                cm_node->tcp_cntxt.loc_ack_num = cm_node->tcp_cntxt.rcv_nxt;
                tcph->ack_seq = htonl(cm_node->tcp_cntxt.loc_ack_num);
                tcph->ack = 1;
-       } else
+       } else {
                tcph->ack_seq = 0;
+       }
 
        if (flags & SET_SYN) {
                cm_node->tcp_cntxt.loc_seq_num++;
                tcph->syn = 1;
-       } else
+       } else {
                cm_node->tcp_cntxt.loc_seq_num += datasize;
+       }
 
        if (flags & SET_FIN) {
                cm_node->tcp_cntxt.loc_seq_num++;
@@ -407,10 +483,8 @@ static void form_cm_frame(struct sk_buff *skb,
 
        skb_shinfo(skb)->nr_frags = 0;
        cm_packets_created++;
-
 }
 
-
 /**
  * print_core - dump a cm core
  */
@@ -422,7 +496,7 @@ static void print_core(struct nes_cm_core *core)
                return;
        nes_debug(NES_DBG_CM, "---------------------------------------------\n");
 
-       nes_debug(NES_DBG_CM, "State         : %u \n",  core->state);
+       nes_debug(NES_DBG_CM, "State         : %u \n", core->state);
 
        nes_debug(NES_DBG_CM, "Listen Nodes  : %u \n", atomic_read(&core->listen_node_cnt));
        nes_debug(NES_DBG_CM, "Active Nodes  : %u \n", atomic_read(&core->node_cnt));
@@ -432,6 +506,147 @@ static void print_core(struct nes_cm_core *core)
        nes_debug(NES_DBG_CM, "-------------- end core ---------------\n");
 }
 
+/**
+ * cm_build_mpa_frame - build a MPA V1 frame or MPA V2 frame
+ */
+static int cm_build_mpa_frame(struct nes_cm_node *cm_node, u8 **start_buff,
+                             u16 *buff_len, u8 *pci_mem, u8 mpa_key)
+{
+       int ret = 0;
+
+       *start_buff = (pci_mem) ? pci_mem : &cm_node->mpa_frame_buf[0];
+
+       switch (cm_node->mpa_frame_rev) {
+       case IETF_MPA_V1:
+               *start_buff = (u8 *)*start_buff + sizeof(struct ietf_rtr_msg);
+               *buff_len = sizeof(struct ietf_mpa_v1) + cm_node->mpa_frame_size;
+               build_mpa_v1(cm_node, *start_buff, mpa_key);
+               break;
+       case IETF_MPA_V2:
+               *buff_len = sizeof(struct ietf_mpa_v2) + cm_node->mpa_frame_size;
+               build_mpa_v2(cm_node, *start_buff, mpa_key);
+               break;
+       default:
+               ret = -EINVAL;
+       }
+       return ret;
+}
+
+/**
+ * build_mpa_v2 - build a MPA V2 frame
+ */
+static void build_mpa_v2(struct nes_cm_node *cm_node,
+                        void *start_addr, u8 mpa_key)
+{
+       struct ietf_mpa_v2 *mpa_frame = (struct ietf_mpa_v2 *)start_addr;
+       struct ietf_rtr_msg *rtr_msg = &mpa_frame->rtr_msg;
+
+       /* initialize the upper 5 bytes of the frame */
+       build_mpa_v1(cm_node, start_addr, mpa_key);
+       mpa_frame->flags |= IETF_MPA_V2_FLAG; /* set a bit to indicate MPA V2 */
+       mpa_frame->priv_data_len += htons(IETF_RTR_MSG_SIZE);
+
+       /* initialize RTR msg */
+       rtr_msg->ctrl_ird = (cm_node->ird_size > IETF_NO_IRD_ORD) ?
+                           IETF_NO_IRD_ORD : cm_node->ird_size;
+       rtr_msg->ctrl_ord = (cm_node->ord_size > IETF_NO_IRD_ORD) ?
+                           IETF_NO_IRD_ORD : cm_node->ord_size;
+
+       rtr_msg->ctrl_ird |= IETF_PEER_TO_PEER;
+       rtr_msg->ctrl_ird |= IETF_FLPDU_ZERO_LEN;
+
+       switch (mpa_key) {
+       case MPA_KEY_REQUEST:
+               rtr_msg->ctrl_ord |= IETF_RDMA0_WRITE;
+               rtr_msg->ctrl_ord |= IETF_RDMA0_READ;
+               break;
+       case MPA_KEY_REPLY:
+               switch (cm_node->send_rdma0_op) {
+               case SEND_RDMA_WRITE_ZERO:
+                       rtr_msg->ctrl_ord |= IETF_RDMA0_WRITE;
+                       break;
+               case SEND_RDMA_READ_ZERO:
+                       rtr_msg->ctrl_ord |= IETF_RDMA0_READ;
+                       break;
+               }
+       }
+       rtr_msg->ctrl_ird = htons(rtr_msg->ctrl_ird);
+       rtr_msg->ctrl_ord = htons(rtr_msg->ctrl_ord);
+}
+
+/**
+ * build_mpa_v1 - build a MPA V1 frame
+ */
+static void build_mpa_v1(struct nes_cm_node *cm_node, void *start_addr, u8 mpa_key)
+{
+       struct ietf_mpa_v1 *mpa_frame = (struct ietf_mpa_v1 *)start_addr;
+
+       switch (mpa_key) {
+       case MPA_KEY_REQUEST:
+               memcpy(mpa_frame->key, IEFT_MPA_KEY_REQ, IETF_MPA_KEY_SIZE);
+               break;
+       case MPA_KEY_REPLY:
+               memcpy(mpa_frame->key, IEFT_MPA_KEY_REP, IETF_MPA_KEY_SIZE);
+               break;
+       }
+       mpa_frame->flags = IETF_MPA_FLAGS_CRC;
+       mpa_frame->rev = cm_node->mpa_frame_rev;
+       mpa_frame->priv_data_len = htons(cm_node->mpa_frame_size);
+}
+
+static void build_rdma0_msg(struct nes_cm_node *cm_node, struct nes_qp **nesqp_addr)
+{
+       u64 u64temp;
+       struct nes_qp *nesqp = *nesqp_addr;
+       struct nes_hw_qp_wqe *wqe = &nesqp->hwqp.sq_vbase[0];
+
+       u64temp = (unsigned long)nesqp;
+       u64temp |= NES_SW_CONTEXT_ALIGN >> 1;
+       set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX, u64temp);
+
+       wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_LOW_IDX] = 0;
+       wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_HIGH_IDX] = 0;
+
+       switch (cm_node->send_rdma0_op) {
+       case SEND_RDMA_WRITE_ZERO:
+               nes_debug(NES_DBG_CM, "Sending first write.\n");
+               wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] =
+                       cpu_to_le32(NES_IWARP_SQ_OP_RDMAW);
+               wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX] = 0;
+               wqe->wqe_words[NES_IWARP_SQ_WQE_LENGTH0_IDX] = 0;
+               wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = 0;
+               break;
+
+       case SEND_RDMA_READ_ZERO:
+       default:
+               if (cm_node->send_rdma0_op != SEND_RDMA_READ_ZERO) {
+                       printk(KERN_ERR "%s[%u]: Unsupported RDMA0 len operation=%u\n",
+                                __func__, __LINE__, cm_node->send_rdma0_op);
+                       WARN_ON(1);
+               }
+               nes_debug(NES_DBG_CM, "Sending first rdma operation.\n");
+               wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] =
+                       cpu_to_le32(NES_IWARP_SQ_OP_RDMAR);
+               wqe->wqe_words[NES_IWARP_SQ_WQE_RDMA_TO_LOW_IDX] = 1;
+               wqe->wqe_words[NES_IWARP_SQ_WQE_RDMA_TO_HIGH_IDX] = 0;
+               wqe->wqe_words[NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX] = 0;
+               wqe->wqe_words[NES_IWARP_SQ_WQE_RDMA_STAG_IDX] = 1;
+               wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = 1;
+               break;
+       }
+
+       if (nesqp->sq_kmapped) {
+               nesqp->sq_kmapped = 0;
+               kunmap(nesqp->page);
+       }
+
+       /*use the reserved spot on the WQ for the extra first WQE*/
+       nesqp->nesqp_context->ird_ord_sizes &= cpu_to_le32(~(NES_QPCONTEXT_ORDIRD_LSMM_PRESENT |
+                                                            NES_QPCONTEXT_ORDIRD_WRPDU |
+                                                            NES_QPCONTEXT_ORDIRD_ALSMM));
+       nesqp->skip_lsmm = 1;
+       nesqp->hwqp.sq_tail = 0;
+}
 
 /**
  * schedule_nes_timer
@@ -439,10 +654,10 @@ static void print_core(struct nes_cm_core *core)
  *                     rem_ref_cm_node(cm_core, cm_node);add_ref_cm_node(cm_node);
  */
 int schedule_nes_timer(struct nes_cm_node *cm_node, struct sk_buff *skb,
-               enum nes_timer_type type, int send_retrans,
-               int close_when_complete)
+                      enum nes_timer_type type, int send_retrans,
+                      int close_when_complete)
 {
-       unsigned long  flags;
+       unsigned long flags;
        struct nes_cm_core *cm_core = cm_node->cm_core;
        struct nes_timer_entry *new_send;
        int ret = 0;
@@ -463,7 +678,7 @@ int schedule_nes_timer(struct nes_cm_node *cm_node, struct sk_buff *skb,
        new_send->close_when_complete = close_when_complete;
 
        if (type == NES_TIMER_TYPE_CLOSE) {
-               new_send->timetosend += (HZ/10);
+               new_send->timetosend += (HZ / 10);
                if (cm_node->recv_entry) {
                        kfree(new_send);
                        WARN_ON(1);
@@ -484,7 +699,7 @@ int schedule_nes_timer(struct nes_cm_node *cm_node, struct sk_buff *skb,
                ret = nes_nic_cm_xmit(new_send->skb, cm_node->netdev);
                if (ret != NETDEV_TX_OK) {
                        nes_debug(NES_DBG_CM, "Error sending packet %p "
-                               "(jiffies = %lu)\n", new_send, jiffies);
+                                 "(jiffies = %lu)\n", new_send, jiffies);
                        new_send->timetosend = jiffies;
                        ret = NETDEV_TX_OK;
                } else {
@@ -513,6 +728,7 @@ static void nes_retrans_expired(struct nes_cm_node *cm_node)
        struct iw_cm_id *cm_id = cm_node->cm_id;
        enum nes_cm_node_state state = cm_node->state;
        cm_node->state = NES_CM_STATE_CLOSED;
+
        switch (state) {
        case NES_CM_STATE_SYN_RCVD:
        case NES_CM_STATE_CLOSING:
@@ -545,10 +761,10 @@ static void handle_recv_entry(struct nes_cm_node *cm_node, u32 rem_node)
                spin_lock_irqsave(&nesqp->lock, qplockflags);
                if (nesqp->cm_id) {
                        nes_debug(NES_DBG_CM, "QP%u: cm_id = %p, "
-                               "refcount = %d: HIT A "
-                               "NES_TIMER_TYPE_CLOSE with something "
-                               "to do!!!\n", nesqp->hwqp.qp_id, cm_id,
-                               atomic_read(&nesqp->refcount));
+                                 "refcount = %d: HIT A "
+                                 "NES_TIMER_TYPE_CLOSE with something "
+                                 "to do!!!\n", nesqp->hwqp.qp_id, cm_id,
+                                 atomic_read(&nesqp->refcount));
                        nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED;
                        nesqp->last_aeq = NES_AEQE_AEID_RESET_SENT;
                        nesqp->ibqp_state = IB_QPS_ERR;
@@ -557,10 +773,10 @@ static void handle_recv_entry(struct nes_cm_node *cm_node, u32 rem_node)
                } else {
                        spin_unlock_irqrestore(&nesqp->lock, qplockflags);
                        nes_debug(NES_DBG_CM, "QP%u: cm_id = %p, "
-                               "refcount = %d: HIT A "
-                               "NES_TIMER_TYPE_CLOSE with nothing "
-                               "to do!!!\n", nesqp->hwqp.qp_id, cm_id,
-                               atomic_read(&nesqp->refcount));
+                                 "refcount = %d: HIT A "
+                                 "NES_TIMER_TYPE_CLOSE with nothing "
+                                 "to do!!!\n", nesqp->hwqp.qp_id, cm_id,
+                                 atomic_read(&nesqp->refcount));
                }
        } else if (rem_node) {
                /* TIME_WAIT state */
@@ -589,11 +805,12 @@ static void nes_cm_timer_tick(unsigned long pass)
        int ret = NETDEV_TX_OK;
 
        struct list_head timer_list;
+
        INIT_LIST_HEAD(&timer_list);
        spin_lock_irqsave(&cm_core->ht_lock, flags);
 
        list_for_each_safe(list_node, list_core_temp,
-                               &cm_core->connected_nodes) {
+                          &cm_core->connected_nodes) {
                cm_node = container_of(list_node, struct nes_cm_node, list);
                if ((cm_node->recv_entry) || (cm_node->send_entry)) {
                        add_ref_cm_node(cm_node);
@@ -604,18 +821,19 @@ static void nes_cm_timer_tick(unsigned long pass)
 
        list_for_each_safe(list_node, list_core_temp, &timer_list) {
                cm_node = container_of(list_node, struct nes_cm_node,
-                                       timer_entry);
+                                      timer_entry);
                recv_entry = cm_node->recv_entry;
 
                if (recv_entry) {
                        if (time_after(recv_entry->timetosend, jiffies)) {
                                if (nexttimeout > recv_entry->timetosend ||
-                                               !settimer) {
+                                   !settimer) {
                                        nexttimeout = recv_entry->timetosend;
                                        settimer = 1;
                                }
-                       } else
+                       } else {
                                handle_recv_entry(cm_node, 1);
+                       }
                }
 
                spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
@@ -626,8 +844,8 @@ static void nes_cm_timer_tick(unsigned long pass)
                        if (time_after(send_entry->timetosend, jiffies)) {
                                if (cm_node->state != NES_CM_STATE_TSA) {
                                        if ((nexttimeout >
-                                               send_entry->timetosend) ||
-                                               !settimer) {
+                                            send_entry->timetosend) ||
+                                           !settimer) {
                                                nexttimeout =
                                                        send_entry->timetosend;
                                                settimer = 1;
@@ -639,13 +857,13 @@ static void nes_cm_timer_tick(unsigned long pass)
                        }
 
                        if ((cm_node->state == NES_CM_STATE_TSA) ||
-                               (cm_node->state == NES_CM_STATE_CLOSED)) {
+                           (cm_node->state == NES_CM_STATE_CLOSED)) {
                                free_retrans_entry(cm_node);
                                break;
                        }
 
                        if (!send_entry->retranscount ||
-                               !send_entry->retrycount) {
+                           !send_entry->retrycount) {
                                cm_packets_dropped++;
                                free_retrans_entry(cm_node);
 
@@ -654,28 +872,28 @@ static void nes_cm_timer_tick(unsigned long pass)
                                nes_retrans_expired(cm_node);
                                cm_node->state = NES_CM_STATE_CLOSED;
                                spin_lock_irqsave(&cm_node->retrans_list_lock,
-                                       flags);
+                                                 flags);
                                break;
                        }
                        atomic_inc(&send_entry->skb->users);
                        cm_packets_retrans++;
                        nes_debug(NES_DBG_CM, "Retransmitting send_entry %p "
-                               "for node %p, jiffies = %lu, time to send = "
-                               "%lu, retranscount = %u, send_entry->seq_num = "
-                               "0x%08X, cm_node->tcp_cntxt.rem_ack_num = "
-                               "0x%08X\n", send_entry, cm_node, jiffies,
-                               send_entry->timetosend,
-                               send_entry->retranscount,
-                               send_entry->seq_num,
-                               cm_node->tcp_cntxt.rem_ack_num);
+                                 "for node %p, jiffies = %lu, time to send = "
+                                 "%lu, retranscount = %u, send_entry->seq_num = "
+                                 "0x%08X, cm_node->tcp_cntxt.rem_ack_num = "
+                                 "0x%08X\n", send_entry, cm_node, jiffies,
+                                 send_entry->timetosend,
+                                 send_entry->retranscount,
+                                 send_entry->seq_num,
+                                 cm_node->tcp_cntxt.rem_ack_num);
 
                        spin_unlock_irqrestore(&cm_node->retrans_list_lock,
-                               flags);
+                                              flags);
                        ret = nes_nic_cm_xmit(send_entry->skb, cm_node->netdev);
                        spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
                        if (ret != NETDEV_TX_OK) {
                                nes_debug(NES_DBG_CM, "rexmit failed for "
-                                       "node=%p\n", cm_node);
+                                         "node=%p\n", cm_node);
                                cm_packets_bounced++;
                                send_entry->retrycount--;
                                nexttimeout = jiffies + NES_SHORT_TIME;
@@ -685,18 +903,18 @@ static void nes_cm_timer_tick(unsigned long pass)
                                cm_packets_sent++;
                        }
                        nes_debug(NES_DBG_CM, "Packet Sent: retrans count = "
-                               "%u, retry count = %u.\n",
-                               send_entry->retranscount,
-                               send_entry->retrycount);
+                                 "%u, retry count = %u.\n",
+                                 send_entry->retranscount,
+                                 send_entry->retrycount);
                        if (send_entry->send_retrans) {
                                send_entry->retranscount--;
                                timetosend = (NES_RETRY_TIMEOUT <<
-                                       (NES_DEFAULT_RETRANS - send_entry->retranscount));
+                                             (NES_DEFAULT_RETRANS - send_entry->retranscount));
 
                                send_entry->timetosend = jiffies +
-                                       min(timetosend, NES_MAX_TIMEOUT);
+                                                        min(timetosend, NES_MAX_TIMEOUT);
                                if (nexttimeout > send_entry->timetosend ||
-                                       !settimer) {
+                                   !settimer) {
                                        nexttimeout = send_entry->timetosend;
                                        settimer = 1;
                                }
@@ -705,11 +923,11 @@ static void nes_cm_timer_tick(unsigned long pass)
                                close_when_complete =
                                        send_entry->close_when_complete;
                                nes_debug(NES_DBG_CM, "cm_node=%p state=%d\n",
-                                       cm_node, cm_node->state);
+                                         cm_node, cm_node->state);
                                free_retrans_entry(cm_node);
                                if (close_when_complete)
                                        rem_ref_cm_node(cm_node->cm_core,
-                                               cm_node);
+                                                       cm_node);
                        }
                } while (0);
 
@@ -719,7 +937,7 @@ static void nes_cm_timer_tick(unsigned long pass)
 
        if (settimer) {
                if (!timer_pending(&cm_core->tcp_timer)) {
-                       cm_core->tcp_timer.expires  = nexttimeout;
+                       cm_core->tcp_timer.expires = nexttimeout;
                        add_timer(&cm_core->tcp_timer);
                }
        }
@@ -730,13 +948,13 @@ static void nes_cm_timer_tick(unsigned long pass)
  * send_syn
  */
 static int send_syn(struct nes_cm_node *cm_node, u32 sendack,
-       struct sk_buff *skb)
+                   struct sk_buff *skb)
 {
        int ret;
        int flags = SET_SYN;
        char optionsbuffer[sizeof(struct option_mss) +
-               sizeof(struct option_windowscale) + sizeof(struct option_base) +
-               TCP_OPTIONS_PADDING];
+                          sizeof(struct option_windowscale) + sizeof(struct option_base) +
+                          TCP_OPTIONS_PADDING];
 
        int optionssize = 0;
        /* Sending MSS option */
@@ -863,7 +1081,7 @@ static int send_fin(struct nes_cm_node *cm_node, struct sk_buff *skb)
  * find_node - find a cm node that matches the reference cm node
  */
 static struct nes_cm_node *find_node(struct nes_cm_core *cm_core,
-               u16 rem_port, nes_addr_t rem_addr, u16 loc_port, nes_addr_t loc_addr)
+                                    u16 rem_port, nes_addr_t rem_addr, u16 loc_port, nes_addr_t loc_addr)
 {
        unsigned long flags;
        struct list_head *hte;
@@ -877,12 +1095,12 @@ static struct nes_cm_node *find_node(struct nes_cm_core *cm_core,
        list_for_each_entry(cm_node, hte, list) {
                /* compare quad, return node handle if a match */
                nes_debug(NES_DBG_CM, "finding node %x:%x =? %x:%x ^ %x:%x =? %x:%x\n",
-                               cm_node->loc_addr, cm_node->loc_port,
-                               loc_addr, loc_port,
-                               cm_node->rem_addr, cm_node->rem_port,
-                               rem_addr, rem_port);
+                         cm_node->loc_addr, cm_node->loc_port,
+                         loc_addr, loc_port,
+                         cm_node->rem_addr, cm_node->rem_port,
+                         rem_addr, rem_port);
                if ((cm_node->loc_addr == loc_addr) && (cm_node->loc_port == loc_port) &&
-                               (cm_node->rem_addr == rem_addr) && (cm_node->rem_port == rem_port)) {
+                   (cm_node->rem_addr == rem_addr) && (cm_node->rem_port == rem_port)) {
                        add_ref_cm_node(cm_node);
                        spin_unlock_irqrestore(&cm_core->ht_lock, flags);
                        return cm_node;
@@ -899,7 +1117,7 @@ static struct nes_cm_node *find_node(struct nes_cm_core *cm_core,
  * find_listener - find a cm node listening on this addr-port pair
  */
 static struct nes_cm_listener *find_listener(struct nes_cm_core *cm_core,
-               nes_addr_t dst_addr, u16 dst_port, enum nes_cm_listener_state listener_state)
+                                            nes_addr_t dst_addr, u16 dst_port, enum nes_cm_listener_state listener_state)
 {
        unsigned long flags;
        struct nes_cm_listener *listen_node;
@@ -909,9 +1127,9 @@ static struct nes_cm_listener *find_listener(struct nes_cm_core *cm_core,
        list_for_each_entry(listen_node, &cm_core->listen_list.list, list) {
                /* compare node pair, return node handle if a match */
                if (((listen_node->loc_addr == dst_addr) ||
-                               listen_node->loc_addr == 0x00000000) &&
-                               (listen_node->loc_port == dst_port) &&
-                               (listener_state & listen_node->listener_state)) {
+                    listen_node->loc_addr == 0x00000000) &&
+                   (listen_node->loc_port == dst_port) &&
+                   (listener_state & listen_node->listener_state)) {
                        atomic_inc(&listen_node->ref_count);
                        spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
                        return listen_node;
@@ -936,7 +1154,7 @@ static int add_hte_node(struct nes_cm_core *cm_core, struct nes_cm_node *cm_node
                return -EINVAL;
 
        nes_debug(NES_DBG_CM, "Adding Node %p to Active Connection HT\n",
-               cm_node);
+                 cm_node);
 
        spin_lock_irqsave(&cm_core->ht_lock, flags);
 
@@ -955,7 +1173,7 @@ static int add_hte_node(struct nes_cm_core *cm_core, struct nes_cm_node *cm_node
  * mini_cm_dec_refcnt_listen
  */
 static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core,
-       struct nes_cm_listener *listener, int free_hanging_nodes)
+                                    struct nes_cm_listener *listener, int free_hanging_nodes)
 {
        int ret = -EINVAL;
        int err = 0;
@@ -966,8 +1184,8 @@ static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core,
        struct list_head reset_list;
 
        nes_debug(NES_DBG_CM, "attempting listener= %p free_nodes= %d, "
-               "refcnt=%d\n", listener, free_hanging_nodes,
-               atomic_read(&listener->ref_count));
+                 "refcnt=%d\n", listener, free_hanging_nodes,
+                 atomic_read(&listener->ref_count));
        /* free non-accelerated child nodes for this listener */
        INIT_LIST_HEAD(&reset_list);
        if (free_hanging_nodes) {
@@ -975,7 +1193,7 @@ static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core,
                list_for_each_safe(list_pos, list_temp,
                                   &g_cm_core->connected_nodes) {
                        cm_node = container_of(list_pos, struct nes_cm_node,
-                               list);
+                                              list);
                        if ((cm_node->listener == listener) &&
                            (!cm_node->accelerated)) {
                                add_ref_cm_node(cm_node);
@@ -987,7 +1205,7 @@ static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core,
 
        list_for_each_safe(list_pos, list_temp, &reset_list) {
                cm_node = container_of(list_pos, struct nes_cm_node,
-                               reset_entry);
+                                      reset_entry);
                {
                        struct nes_cm_node *loopback = cm_node->loopbackpartner;
                        enum nes_cm_node_state old_state;
@@ -999,7 +1217,7 @@ static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core,
                                        err = send_reset(cm_node, NULL);
                                        if (err) {
                                                cm_node->state =
-                                                        NES_CM_STATE_CLOSED;
+                                                       NES_CM_STATE_CLOSED;
                                                WARN_ON(1);
                                        } else {
                                                old_state = cm_node->state;
@@ -1044,10 +1262,9 @@ static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core,
 
                spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
 
-               if (listener->nesvnic) {
+               if (listener->nesvnic)
                        nes_manage_apbvt(listener->nesvnic, listener->loc_port,
-                                       PCI_FUNC(listener->nesvnic->nesdev->pcidev->devfn), NES_MANAGE_APBVT_DEL);
-               }
+                                        PCI_FUNC(listener->nesvnic->nesdev->pcidev->devfn), NES_MANAGE_APBVT_DEL);
 
                nes_debug(NES_DBG_CM, "destroying listener (%p)\n", listener);
 
@@ -1061,8 +1278,8 @@ static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core,
        if (listener) {
                if (atomic_read(&listener->pend_accepts_cnt) > 0)
                        nes_debug(NES_DBG_CM, "destroying listener (%p)"
-                                       " with non-zero pending accepts=%u\n",
-                                       listener, atomic_read(&listener->pend_accepts_cnt));
+                                 " with non-zero pending accepts=%u\n",
+                                 listener, atomic_read(&listener->pend_accepts_cnt));
        }
 
        return ret;
@@ -1073,7 +1290,7 @@ static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core,
  * mini_cm_del_listen
  */
 static int mini_cm_del_listen(struct nes_cm_core *cm_core,
-               struct nes_cm_listener *listener)
+                             struct nes_cm_listener *listener)
 {
        listener->listener_state = NES_CM_LISTENER_PASSIVE_STATE;
        listener->cm_id = NULL; /* going to be destroyed pretty soon */
@@ -1085,9 +1302,10 @@ static int mini_cm_del_listen(struct nes_cm_core *cm_core,
  * mini_cm_accelerated
  */
 static inline int mini_cm_accelerated(struct nes_cm_core *cm_core,
-               struct nes_cm_node *cm_node)
+                                     struct nes_cm_node *cm_node)
 {
        u32 was_timer_set;
+
        cm_node->accelerated = 1;
 
        if (cm_node->accept_pend) {
@@ -1121,7 +1339,7 @@ static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip, int arpi
        rt = ip_route_output(&init_net, htonl(dst_ip), 0, 0, 0);
        if (IS_ERR(rt)) {
                printk(KERN_ERR "%s: ip_route_output_key failed for 0x%08X\n",
-                               __func__, dst_ip);
+                      __func__, dst_ip);
                return rc;
        }
 
@@ -1139,7 +1357,7 @@ static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip, int arpi
 
                        if (arpindex >= 0) {
                                if (!memcmp(nesadapter->arp_table[arpindex].mac_addr,
-                                                       neigh->ha, ETH_ALEN)){
+                                           neigh->ha, ETH_ALEN)) {
                                        /* Mac address same as in nes_arp_table */
                                        neigh_release(neigh);
                                        ip_rt_put(rt);
@@ -1147,8 +1365,8 @@ static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip, int arpi
                                }
 
                                nes_manage_arp_cache(nesvnic->netdev,
-                                               nesadapter->arp_table[arpindex].mac_addr,
-                                               dst_ip, NES_ARP_DELETE);
+                                                    nesadapter->arp_table[arpindex].mac_addr,
+                                                    dst_ip, NES_ARP_DELETE);
                        }
 
                        nes_manage_arp_cache(nesvnic->netdev, neigh->ha,
@@ -1170,8 +1388,8 @@ static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip, int arpi
  * make_cm_node - create a new instance of a cm node
  */
 static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core,
-               struct nes_vnic *nesvnic, struct nes_cm_info *cm_info,
-               struct nes_cm_listener *listener)
+                                       struct nes_vnic *nesvnic, struct nes_cm_info *cm_info,
+                                       struct nes_cm_listener *listener)
 {
        struct nes_cm_node *cm_node;
        struct timespec ts;
@@ -1190,7 +1408,12 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core,
        cm_node->rem_addr = cm_info->rem_addr;
        cm_node->loc_port = cm_info->loc_port;
        cm_node->rem_port = cm_info->rem_port;
-       cm_node->send_write0 = send_first;
+
+       cm_node->mpa_frame_rev = mpa_version;
+       cm_node->send_rdma0_op = SEND_RDMA_READ_ZERO;
+       cm_node->ird_size = IETF_NO_IRD_ORD;
+       cm_node->ord_size = IETF_NO_IRD_ORD;
+
        nes_debug(NES_DBG_CM, "Make node addresses : loc = %pI4:%x, rem = %pI4:%x\n",
                  &cm_node->loc_addr, cm_node->loc_port,
                  &cm_node->rem_addr, cm_node->rem_port);
@@ -1200,7 +1423,7 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core,
        memcpy(cm_node->loc_mac, nesvnic->netdev->dev_addr, ETH_ALEN);
 
        nes_debug(NES_DBG_CM, "listener=%p, cm_id=%p\n", cm_node->listener,
-                       cm_node->cm_id);
+                 cm_node->cm_id);
 
        spin_lock_init(&cm_node->retrans_list_lock);
 
@@ -1211,11 +1434,11 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core,
        cm_node->tcp_cntxt.loc_id = NES_CM_DEF_LOCAL_ID;
        cm_node->tcp_cntxt.rcv_wscale = NES_CM_DEFAULT_RCV_WND_SCALE;
        cm_node->tcp_cntxt.rcv_wnd = NES_CM_DEFAULT_RCV_WND_SCALED >>
-                       NES_CM_DEFAULT_RCV_WND_SCALE;
+                                    NES_CM_DEFAULT_RCV_WND_SCALE;
        ts = current_kernel_time();
        cm_node->tcp_cntxt.loc_seq_num = htonl(ts.tv_nsec);
        cm_node->tcp_cntxt.mss = nesvnic->max_frame_size - sizeof(struct iphdr) -
-                       sizeof(struct tcphdr) - ETH_HLEN - VLAN_HLEN;
+                                sizeof(struct tcphdr) - ETH_HLEN - VLAN_HLEN;
        cm_node->tcp_cntxt.rcv_nxt = 0;
        /* get a unique session ID , add thread_id to an upcounter to handle race */
        atomic_inc(&cm_core->node_cnt);
@@ -1231,12 +1454,11 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core,
        cm_node->loopbackpartner = NULL;
 
        /* get the mac addr for the remote node */
-       if (ipv4_is_loopback(htonl(cm_node->rem_addr)))
+       if (ipv4_is_loopback(htonl(cm_node->rem_addr))) {
                arpindex = nes_arp_table(nesdev, ntohl(nesvnic->local_ipaddr), NULL, NES_ARP_RESOLVE);
-       else {
+       else {
                oldarpindex = nes_arp_table(nesdev, cm_node->rem_addr, NULL, NES_ARP_RESOLVE);
                arpindex = nes_addr_resolve_neigh(nesvnic, cm_info->rem_addr, oldarpindex);
-
        }
        if (arpindex < 0) {
                kfree(cm_node);
@@ -1269,7 +1491,7 @@ static int add_ref_cm_node(struct nes_cm_node *cm_node)
  * rem_ref_cm_node - destroy an instance of a cm node
  */
 static int rem_ref_cm_node(struct nes_cm_core *cm_core,
-       struct nes_cm_node *cm_node)
+                          struct nes_cm_node *cm_node)
 {
        unsigned long flags;
        struct nes_qp *nesqp;
@@ -1300,9 +1522,9 @@ static int rem_ref_cm_node(struct nes_cm_core *cm_core,
        } else {
                if (cm_node->apbvt_set && cm_node->nesvnic) {
                        nes_manage_apbvt(cm_node->nesvnic, cm_node->loc_port,
-                               PCI_FUNC(
-                               cm_node->nesvnic->nesdev->pcidev->devfn),
-                               NES_MANAGE_APBVT_DEL);
+                                        PCI_FUNC(
+                                                cm_node->nesvnic->nesdev->pcidev->devfn),
+                                        NES_MANAGE_APBVT_DEL);
                }
        }
 
@@ -1323,7 +1545,7 @@ static int rem_ref_cm_node(struct nes_cm_core *cm_core,
  * process_options
  */
 static int process_options(struct nes_cm_node *cm_node, u8 *optionsloc,
-       u32 optionsize, u32 syn_packet)
+                          u32 optionsize, u32 syn_packet)
 {
        u32 tmp;
        u32 offset = 0;
@@ -1341,15 +1563,15 @@ static int process_options(struct nes_cm_node *cm_node, u8 *optionsloc,
                        continue;
                case OPTION_NUMBER_MSS:
                        nes_debug(NES_DBG_CM, "%s: MSS Length: %d Offset: %d "
-                               "Size: %d\n", __func__,
-                               all_options->as_mss.length, offset, optionsize);
+                                 "Size: %d\n", __func__,
+                                 all_options->as_mss.length, offset, optionsize);
                        got_mss_option = 1;
                        if (all_options->as_mss.length != 4) {
                                return 1;
                        } else {
                                tmp = ntohs(all_options->as_mss.mss);
                                if (tmp > 0 && tmp <
-                                       cm_node->tcp_cntxt.mss)
+                                   cm_node->tcp_cntxt.mss)
                                        cm_node->tcp_cntxt.mss = tmp;
                        }
                        break;
@@ -1357,12 +1579,9 @@ static int process_options(struct nes_cm_node *cm_node, u8 *optionsloc,
                        cm_node->tcp_cntxt.snd_wscale =
                                all_options->as_windowscale.shiftcount;
                        break;
-               case OPTION_NUMBER_WRITE0:
-                       cm_node->send_write0 = 1;
-                       break;
                default:
                        nes_debug(NES_DBG_CM, "TCP Option not understood: %x\n",
-                               all_options->as_base.optionnum);
+                                 all_options->as_base.optionnum);
                        break;
                }
                offset += all_options->as_base.length;
@@ -1381,8 +1600,8 @@ static void drop_packet(struct sk_buff *skb)
 static void handle_fin_pkt(struct nes_cm_node *cm_node)
 {
        nes_debug(NES_DBG_CM, "Received FIN, cm_node = %p, state = %u. "
-               "refcnt=%d\n", cm_node, cm_node->state,
-               atomic_read(&cm_node->ref_count));
+                 "refcnt=%d\n", cm_node, cm_node->state,
+                 atomic_read(&cm_node->ref_count));
        switch (cm_node->state) {
        case NES_CM_STATE_SYN_RCVD:
        case NES_CM_STATE_SYN_SENT:
@@ -1448,7 +1667,20 @@ static void handle_rst_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
                nes_debug(NES_DBG_CM, "%s[%u] create abort for cm_node=%p "
                        "listener=%p state=%d\n", __func__, __LINE__, cm_node,
                        cm_node->listener, cm_node->state);
-               active_open_err(cm_node, skb, reset);
+               switch (cm_node->mpa_frame_rev) {
+               case IETF_MPA_V2:
+                       cm_node->mpa_frame_rev = IETF_MPA_V1;
+                       /* send a syn and goto syn sent state */
+                       cm_node->state = NES_CM_STATE_SYN_SENT;
+                       if (send_syn(cm_node, 0, NULL)) {
+                               active_open_err(cm_node, skb, reset);
+                       }
+                       break;
+               case IETF_MPA_V1:
+               default:
+                       active_open_err(cm_node, skb, reset);
+                       break;
+               }
                break;
        case NES_CM_STATE_MPAREQ_RCVD:
                atomic_inc(&cm_node->passive_state);
@@ -1484,21 +1716,21 @@ static void handle_rst_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
 
 static void handle_rcv_mpa(struct nes_cm_node *cm_node, struct sk_buff *skb)
 {
-
-       int     ret = 0;
+       int ret = 0;
        int datasize = skb->len;
        u8 *dataloc = skb->data;
 
        enum nes_cm_event_type type = NES_CM_EVENT_UNKNOWN;
-       u32     res_type;
+       u32 res_type;
+
        ret = parse_mpa(cm_node, dataloc, &res_type, datasize);
        if (ret) {
                nes_debug(NES_DBG_CM, "didn't like MPA Request\n");
                if (cm_node->state == NES_CM_STATE_MPAREQ_SENT) {
                        nes_debug(NES_DBG_CM, "%s[%u] create abort for "
-                               "cm_node=%p listener=%p state=%d\n", __func__,
-                               __LINE__, cm_node, cm_node->listener,
-                               cm_node->state);
+                                 "cm_node=%p listener=%p state=%d\n", __func__,
+                                 __LINE__, cm_node, cm_node->listener,
+                                 cm_node->state);
                        active_open_err(cm_node, skb, 1);
                } else {
                        passive_open_err(cm_node, skb, 1);
@@ -1508,16 +1740,15 @@ static void handle_rcv_mpa(struct nes_cm_node *cm_node, struct sk_buff *skb)
 
        switch (cm_node->state) {
        case NES_CM_STATE_ESTABLISHED:
-               if (res_type == NES_MPA_REQUEST_REJECT) {
+               if (res_type == NES_MPA_REQUEST_REJECT)
                        /*BIG problem as we are receiving the MPA.. So should
-                       * not be REJECT.. This is Passive Open.. We can
-                       * only receive it Reject for Active Open...*/
+                        * not be REJECT.. This is Passive Open.. We can
+                        * only receive it Reject for Active Open...*/
                        WARN_ON(1);
-               }
                cm_node->state = NES_CM_STATE_MPAREQ_RCVD;
                type = NES_CM_EVENT_MPA_REQ;
                atomic_set(&cm_node->passive_state,
-                               NES_PASSIVE_STATE_INDICATED);
+                          NES_PASSIVE_STATE_INDICATED);
                break;
        case NES_CM_STATE_MPAREQ_SENT:
                cleanup_retrans_entry(cm_node);
@@ -1544,8 +1775,8 @@ static void indicate_pkt_err(struct nes_cm_node *cm_node, struct sk_buff *skb)
        case NES_CM_STATE_SYN_SENT:
        case NES_CM_STATE_MPAREQ_SENT:
                nes_debug(NES_DBG_CM, "%s[%u] create abort for cm_node=%p "
-                       "listener=%p state=%d\n", __func__, __LINE__, cm_node,
-                       cm_node->listener, cm_node->state);
+                         "listener=%p state=%d\n", __func__, __LINE__, cm_node,
+                         cm_node->listener, cm_node->state);
                active_open_err(cm_node, skb, 1);
                break;
        case NES_CM_STATE_ESTABLISHED:
@@ -1559,11 +1790,11 @@ static void indicate_pkt_err(struct nes_cm_node *cm_node, struct sk_buff *skb)
 }
 
 static int check_syn(struct nes_cm_node *cm_node, struct tcphdr *tcph,
-       struct sk_buff *skb)
+                    struct sk_buff *skb)
 {
        int err;
 
-       err = ((ntohl(tcph->ack_seq) == cm_node->tcp_cntxt.loc_seq_num))? 0 : 1;
+       err = ((ntohl(tcph->ack_seq) == cm_node->tcp_cntxt.loc_seq_num)) ? 0 : 1;
        if (err)
                active_open_err(cm_node, skb, 1);
 
@@ -1571,7 +1802,7 @@ static int check_syn(struct nes_cm_node *cm_node, struct tcphdr *tcph,
 }
 
 static int check_seq(struct nes_cm_node *cm_node, struct tcphdr *tcph,
-       struct sk_buff *skb)
+                    struct sk_buff *skb)
 {
        int err = 0;
        u32 seq;
@@ -1579,21 +1810,22 @@ static int check_seq(struct nes_cm_node *cm_node, struct tcphdr *tcph,
        u32 loc_seq_num = cm_node->tcp_cntxt.loc_seq_num;
        u32 rcv_nxt = cm_node->tcp_cntxt.rcv_nxt;
        u32 rcv_wnd;
+
        seq = ntohl(tcph->seq);
        ack_seq = ntohl(tcph->ack_seq);
        rcv_wnd = cm_node->tcp_cntxt.rcv_wnd;
        if (ack_seq != loc_seq_num)
                err = 1;
-       else if (!between(seq, rcv_nxt, (rcv_nxt+rcv_wnd)))
+       else if (!between(seq, rcv_nxt, (rcv_nxt + rcv_wnd)))
                err = 1;
        if (err) {
                nes_debug(NES_DBG_CM, "%s[%u] create abort for cm_node=%p "
-                       "listener=%p state=%d\n", __func__, __LINE__, cm_node,
-                       cm_node->listener, cm_node->state);
+                         "listener=%p state=%d\n", __func__, __LINE__, cm_node,
+                         cm_node->listener, cm_node->state);
                indicate_pkt_err(cm_node, skb);
                nes_debug(NES_DBG_CM, "seq ERROR cm_node =%p seq=0x%08X "
-                       "rcv_nxt=0x%08X rcv_wnd=0x%x\n", cm_node, seq, rcv_nxt,
-                       rcv_wnd);
+                         "rcv_nxt=0x%08X rcv_wnd=0x%x\n", cm_node, seq, rcv_nxt,
+                         rcv_wnd);
        }
        return err;
 }
@@ -1603,9 +1835,8 @@ static int check_seq(struct nes_cm_node *cm_node, struct tcphdr *tcph,
  * is created with a listener or it may comein as rexmitted packet which in
  * that case will be just dropped.
  */
-
 static void handle_syn_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
-       struct tcphdr *tcph)
+                          struct tcphdr *tcph)
 {
        int ret;
        u32 inc_sequence;
@@ -1624,15 +1855,15 @@ static void handle_syn_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
        case NES_CM_STATE_LISTENING:
                /* Passive OPEN */
                if (atomic_read(&cm_node->listener->pend_accepts_cnt) >
-                               cm_node->listener->backlog) {
+                   cm_node->listener->backlog) {
                        nes_debug(NES_DBG_CM, "drop syn due to backlog "
-                               "pressure \n");
+                                 "pressure \n");
                        cm_backlog_drops++;
                        passive_open_err(cm_node, skb, 0);
                        break;
                }
                ret = handle_tcp_options(cm_node, tcph, skb, optionsize,
-                       1);
+                                        1);
                if (ret) {
                        passive_open_err(cm_node, skb, 0);
                        /* drop pkt */
@@ -1666,9 +1897,8 @@ static void handle_syn_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
 }
 
 static void handle_synack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
-       struct tcphdr *tcph)
+                             struct tcphdr *tcph)
 {
-
        int ret;
        u32 inc_sequence;
        int optionsize;
@@ -1687,7 +1917,7 @@ static void handle_synack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
                ret = handle_tcp_options(cm_node, tcph, skb, optionsize, 0);
                if (ret) {
                        nes_debug(NES_DBG_CM, "cm_node=%p tcp_options failed\n",
-                               cm_node);
+                                 cm_node);
                        break;
                }
                cleanup_retrans_entry(cm_node);
@@ -1726,12 +1956,13 @@ static void handle_synack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
 }
 
 static int handle_ack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
-       struct tcphdr *tcph)
+                         struct tcphdr *tcph)
 {
        int datasize = 0;
        u32 inc_sequence;
        int ret = 0;
        int optionsize;
+
        optionsize = (tcph->doff << 2) - sizeof(struct tcphdr);
 
        if (check_seq(cm_node, tcph, skb))
@@ -1752,8 +1983,9 @@ static int handle_ack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
                if (datasize) {
                        cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize;
                        handle_rcv_mpa(cm_node, skb);
-               } else  /* rcvd ACK only */
+               } else { /* rcvd ACK only */
                        dev_kfree_skb_any(skb);
+               }
                break;
        case NES_CM_STATE_ESTABLISHED:
                /* Passive OPEN */
@@ -1761,16 +1993,18 @@ static int handle_ack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
                if (datasize) {
                        cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize;
                        handle_rcv_mpa(cm_node, skb);
-               } else
+               } else {
                        drop_packet(skb);
+               }
                break;
        case NES_CM_STATE_MPAREQ_SENT:
                cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq);
                if (datasize) {
                        cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize;
                        handle_rcv_mpa(cm_node, skb);
-               } else  /* Could be just an ack pkt.. */
+               } else { /* Could be just an ack pkt.. */
                        dev_kfree_skb_any(skb);
+               }
                break;
        case NES_CM_STATE_LISTENING:
                cleanup_retrans_entry(cm_node);
@@ -1811,14 +2045,15 @@ static int handle_ack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
 
 
 static int handle_tcp_options(struct nes_cm_node *cm_node, struct tcphdr *tcph,
-       struct sk_buff *skb, int optionsize, int passive)
+                             struct sk_buff *skb, int optionsize, int passive)
 {
        u8 *optionsloc = (u8 *)&tcph[1];
+
        if (optionsize) {
                if (process_options(cm_node, optionsloc, optionsize,
-                       (u32)tcph->syn)) {
+                                   (u32)tcph->syn)) {
                        nes_debug(NES_DBG_CM, "%s: Node %p, Sending RESET\n",
-                               __func__, cm_node);
+                                 __func__, cm_node);
                        if (passive)
                                passive_open_err(cm_node, skb, 1);
                        else
@@ -1828,7 +2063,7 @@ static int handle_tcp_options(struct nes_cm_node *cm_node, struct tcphdr *tcph,
        }
 
        cm_node->tcp_cntxt.snd_wnd = ntohs(tcph->window) <<
-                       cm_node->tcp_cntxt.snd_wscale;
+                                    cm_node->tcp_cntxt.snd_wscale;
 
        if (cm_node->tcp_cntxt.snd_wnd > cm_node->tcp_cntxt.max_snd_wnd)
                cm_node->tcp_cntxt.max_snd_wnd = cm_node->tcp_cntxt.snd_wnd;
@@ -1839,18 +2074,18 @@ static int handle_tcp_options(struct nes_cm_node *cm_node, struct tcphdr *tcph,
  * active_open_err() will send reset() if flag set..
  * It will also send ABORT event.
  */
-
 static void active_open_err(struct nes_cm_node *cm_node, struct sk_buff *skb,
-       int reset)
+                           int reset)
 {
        cleanup_retrans_entry(cm_node);
        if (reset) {
                nes_debug(NES_DBG_CM, "ERROR active err called for cm_node=%p, "
-                               "state=%d\n", cm_node, cm_node->state);
+                         "state=%d\n", cm_node, cm_node->state);
                add_ref_cm_node(cm_node);
                send_reset(cm_node, skb);
-       } else
+       } else {
                dev_kfree_skb_any(skb);
+       }
 
        cm_node->state = NES_CM_STATE_CLOSED;
        create_event(cm_node, NES_CM_EVENT_ABORTED);
@@ -1860,15 +2095,14 @@ static void active_open_err(struct nes_cm_node *cm_node, struct sk_buff *skb,
  * passive_open_err() will either do a reset() or will free up the skb and
  * remove the cm_node.
  */
-
 static void passive_open_err(struct nes_cm_node *cm_node, struct sk_buff *skb,
-       int reset)
+                            int reset)
 {
        cleanup_retrans_entry(cm_node);
        cm_node->state = NES_CM_STATE_CLOSED;
        if (reset) {
                nes_debug(NES_DBG_CM, "passive_open_err sending RST for "
-                       "cm_node=%p state =%d\n", cm_node, cm_node->state);
+                         "cm_node=%p state =%d\n", cm_node, cm_node->state);
                send_reset(cm_node, skb);
        } else {
                dev_kfree_skb_any(skb);
@@ -1883,6 +2117,7 @@ static void passive_open_err(struct nes_cm_node *cm_node, struct sk_buff *skb,
 static void free_retrans_entry(struct nes_cm_node *cm_node)
 {
        struct nes_timer_entry *send_entry;
+
        send_entry = cm_node->send_entry;
        if (send_entry) {
                cm_node->send_entry = NULL;
@@ -1906,26 +2141,28 @@ static void cleanup_retrans_entry(struct nes_cm_node *cm_node)
  * Returns skb if to be freed, else it will return NULL if already used..
  */
 static void process_packet(struct nes_cm_node *cm_node, struct sk_buff *skb,
-       struct nes_cm_core *cm_core)
+                          struct nes_cm_core *cm_core)
 {
-       enum nes_tcpip_pkt_type pkt_type = NES_PKT_TYPE_UNKNOWN;
+       enum nes_tcpip_pkt_type pkt_type = NES_PKT_TYPE_UNKNOWN;
        struct tcphdr *tcph = tcp_hdr(skb);
-       u32     fin_set = 0;
+       u32 fin_set = 0;
        int ret = 0;
+
        skb_pull(skb, ip_hdr(skb)->ihl << 2);
 
        nes_debug(NES_DBG_CM, "process_packet: cm_node=%p state =%d syn=%d "
-               "ack=%d rst=%d fin=%d\n", cm_node, cm_node->state, tcph->syn,
-               tcph->ack, tcph->rst, tcph->fin);
+                 "ack=%d rst=%d fin=%d\n", cm_node, cm_node->state, tcph->syn,
+                 tcph->ack, tcph->rst, tcph->fin);
 
-       if (tcph->rst)
+       if (tcph->rst) {
                pkt_type = NES_PKT_TYPE_RST;
-       else if (tcph->syn) {
+       else if (tcph->syn) {
                pkt_type = NES_PKT_TYPE_SYN;
                if (tcph->ack)
                        pkt_type = NES_PKT_TYPE_SYNACK;
-       } else if (tcph->ack)
+       } else if (tcph->ack) {
                pkt_type = NES_PKT_TYPE_ACK;
+       }
        if (tcph->fin)
                fin_set = 1;
 
@@ -1956,17 +2193,17 @@ static void process_packet(struct nes_cm_node *cm_node, struct sk_buff *skb,
  * mini_cm_listen - create a listen node with params
  */
 static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *cm_core,
-       struct nes_vnic *nesvnic, struct nes_cm_info *cm_info)
+                                             struct nes_vnic *nesvnic, struct nes_cm_info *cm_info)
 {
        struct nes_cm_listener *listener;
        unsigned long flags;
 
        nes_debug(NES_DBG_CM, "Search for 0x%08x : 0x%04x\n",
-               cm_info->loc_addr, cm_info->loc_port);
+                 cm_info->loc_addr, cm_info->loc_port);
 
        /* cannot have multiple matching listeners */
        listener = find_listener(cm_core, htonl(cm_info->loc_addr),
-                       htons(cm_info->loc_port), NES_CM_LISTENER_EITHER_STATE);
+                                htons(cm_info->loc_port), NES_CM_LISTENER_EITHER_STATE);
        if (listener && listener->listener_state == NES_CM_LISTENER_ACTIVE_STATE) {
                /* find automatically incs ref count ??? */
                atomic_dec(&listener->ref_count);
@@ -2012,9 +2249,9 @@ static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *cm_core,
        }
 
        nes_debug(NES_DBG_CM, "Api - listen(): addr=0x%08X, port=0x%04x,"
-                       " listener = %p, backlog = %d, cm_id = %p.\n",
-                       cm_info->loc_addr, cm_info->loc_port,
-                       listener, listener->backlog, listener->cm_id);
+                 " listener = %p, backlog = %d, cm_id = %p.\n",
+                 cm_info->loc_addr, cm_info->loc_port,
+                 listener, listener->backlog, listener->cm_id);
 
        return listener;
 }
@@ -2024,26 +2261,20 @@ static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *cm_core,
  * mini_cm_connect - make a connection node with params
  */
 static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core,
-       struct nes_vnic *nesvnic, u16 private_data_len,
-       void *private_data, struct nes_cm_info *cm_info)
+                                          struct nes_vnic *nesvnic, u16 private_data_len,
+                                          void *private_data, struct nes_cm_info *cm_info)
 {
        int ret = 0;
        struct nes_cm_node *cm_node;
        struct nes_cm_listener *loopbackremotelistener;
        struct nes_cm_node *loopbackremotenode;
        struct nes_cm_info loopback_cm_info;
-       u16 mpa_frame_size = sizeof(struct ietf_mpa_frame) + private_data_len;
-       struct ietf_mpa_frame *mpa_frame = NULL;
+       u8 *start_buff;
 
        /* create a CM connection node */
        cm_node = make_cm_node(cm_core, nesvnic, cm_info, NULL);
        if (!cm_node)
                return NULL;
-       mpa_frame = &cm_node->mpa_frame;
-       memcpy(mpa_frame->key, IEFT_MPA_KEY_REQ, IETF_MPA_KEY_SIZE);
-       mpa_frame->flags = IETF_MPA_FLAGS_CRC;
-       mpa_frame->rev =  IETF_MPA_VERSION;
-       mpa_frame->priv_data_len = htons(private_data_len);
 
        /* set our node side to client (active) side */
        cm_node->tcp_cntxt.client = 1;
@@ -2051,8 +2282,8 @@ static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core,
 
        if (cm_info->loc_addr == cm_info->rem_addr) {
                loopbackremotelistener = find_listener(cm_core,
-                               ntohl(nesvnic->local_ipaddr), cm_node->rem_port,
-                               NES_CM_LISTENER_ACTIVE_STATE);
+                                                      ntohl(nesvnic->local_ipaddr), cm_node->rem_port,
+                                                      NES_CM_LISTENER_ACTIVE_STATE);
                if (loopbackremotelistener == NULL) {
                        create_event(cm_node, NES_CM_EVENT_ABORTED);
                } else {
@@ -2061,7 +2292,7 @@ static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core,
                        loopback_cm_info.rem_port = cm_info->loc_port;
                        loopback_cm_info.cm_id = loopbackremotelistener->cm_id;
                        loopbackremotenode = make_cm_node(cm_core, nesvnic,
-                               &loopback_cm_info, loopbackremotelistener);
+                                                         &loopback_cm_info, loopbackremotelistener);
                        if (!loopbackremotenode) {
                                rem_ref_cm_node(cm_node->cm_core, cm_node);
                                return NULL;
@@ -2072,7 +2303,7 @@ static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core,
                                NES_CM_DEFAULT_RCV_WND_SCALE;
                        cm_node->loopbackpartner = loopbackremotenode;
                        memcpy(loopbackremotenode->mpa_frame_buf, private_data,
-                               private_data_len);
+                              private_data_len);
                        loopbackremotenode->mpa_frame_size = private_data_len;
 
                        /* we are done handling this state. */
@@ -2100,12 +2331,10 @@ static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core,
                return cm_node;
        }
 
-       /* set our node side to client (active) side */
-       cm_node->tcp_cntxt.client = 1;
-       /* init our MPA frame ptr */
-       memcpy(mpa_frame->priv_data, private_data, private_data_len);
+       start_buff = &cm_node->mpa_frame_buf[0] + sizeof(struct ietf_mpa_v2);
+       cm_node->mpa_frame_size = private_data_len;
 
-       cm_node->mpa_frame_size = mpa_frame_size;
+       memcpy(start_buff, private_data, private_data_len);
 
        /* send a syn and goto syn sent state */
        cm_node->state = NES_CM_STATE_SYN_SENT;
@@ -2114,18 +2343,19 @@ static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core,
        if (ret) {
                /* error in sending the syn free up the cm_node struct */
                nes_debug(NES_DBG_CM, "Api - connect() FAILED: dest "
-                       "addr=0x%08X, port=0x%04x, cm_node=%p, cm_id = %p.\n",
-                       cm_node->rem_addr, cm_node->rem_port, cm_node,
-                       cm_node->cm_id);
+                         "addr=0x%08X, port=0x%04x, cm_node=%p, cm_id = %p.\n",
+                         cm_node->rem_addr, cm_node->rem_port, cm_node,
+                         cm_node->cm_id);
                rem_ref_cm_node(cm_node->cm_core, cm_node);
                cm_node = NULL;
        }
 
-       if (cm_node)
+       if (cm_node) {
                nes_debug(NES_DBG_CM, "Api - connect(): dest addr=0x%08X,"
-                       "port=0x%04x, cm_node=%p, cm_id = %p.\n",
-                       cm_node->rem_addr, cm_node->rem_port, cm_node,
-                       cm_node->cm_id);
+                         "port=0x%04x, cm_node=%p, cm_id = %p.\n",
+                         cm_node->rem_addr, cm_node->rem_port, cm_node,
+                         cm_node->cm_id);
+       }
 
        return cm_node;
 }
@@ -2135,8 +2365,7 @@ static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core,
  * mini_cm_accept - accept a connection
  * This function is never called
  */
-static int mini_cm_accept(struct nes_cm_core *cm_core,
-       struct ietf_mpa_frame *mpa_frame, struct nes_cm_node *cm_node)
+static int mini_cm_accept(struct nes_cm_core *cm_core, struct nes_cm_node *cm_node)
 {
        return 0;
 }
@@ -2145,8 +2374,7 @@ static int mini_cm_accept(struct nes_cm_core *cm_core,
 /**
  * mini_cm_reject - reject and teardown a connection
  */
-static int mini_cm_reject(struct nes_cm_core *cm_core,
-       struct ietf_mpa_frame *mpa_frame, struct nes_cm_node *cm_node)
+static int mini_cm_reject(struct nes_cm_core *cm_core, struct nes_cm_node *cm_node)
 {
        int ret = 0;
        int err = 0;
@@ -2156,7 +2384,7 @@ static int mini_cm_reject(struct nes_cm_core *cm_core,
        struct nes_cm_node *loopback = cm_node->loopbackpartner;
 
        nes_debug(NES_DBG_CM, "%s cm_node=%p type=%d state=%d\n",
-               __func__, cm_node, cm_node->tcp_cntxt.client, cm_node->state);
+                 __func__, cm_node, cm_node->tcp_cntxt.client, cm_node->state);
 
        if (cm_node->tcp_cntxt.client)
                return ret;
@@ -2177,8 +2405,9 @@ static int mini_cm_reject(struct nes_cm_core *cm_core,
                                        err = send_reset(cm_node, NULL);
                                        if (err)
                                                WARN_ON(1);
-                               } else
+                               } else {
                                        cm_id->add_ref(cm_id);
+                               }
                        }
                }
        } else {
@@ -2253,7 +2482,7 @@ static int mini_cm_close(struct nes_cm_core *cm_core, struct nes_cm_node *cm_nod
        case NES_CM_STATE_TSA:
                if (cm_node->send_entry)
                        printk(KERN_ERR "ERROR Close got called from STATE_TSA "
-                               "send_entry=%p\n", cm_node->send_entry);
+                              "send_entry=%p\n", cm_node->send_entry);
                ret = rem_ref_cm_node(cm_core, cm_node);
                break;
        }
@@ -2266,7 +2495,7 @@ static int mini_cm_close(struct nes_cm_core *cm_core, struct nes_cm_node *cm_nod
  * node state machine
  */
 static int mini_cm_recv_pkt(struct nes_cm_core *cm_core,
-       struct nes_vnic *nesvnic, struct sk_buff *skb)
+                           struct nes_vnic *nesvnic, struct sk_buff *skb)
 {
        struct nes_cm_node *cm_node = NULL;
        struct nes_cm_listener *listener = NULL;
@@ -2278,9 +2507,8 @@ static int mini_cm_recv_pkt(struct nes_cm_core *cm_core,
 
        if (!skb)
                return 0;
-       if (skb->len < sizeof(struct iphdr) + sizeof(struct tcphdr)) {
+       if (skb->len < sizeof(struct iphdr) + sizeof(struct tcphdr))
                return 0;
-       }
 
        iph = (struct iphdr *)skb->data;
        tcph = (struct tcphdr *)(skb->data + sizeof(struct iphdr));
@@ -2298,8 +2526,8 @@ static int mini_cm_recv_pkt(struct nes_cm_core *cm_core,
 
        do {
                cm_node = find_node(cm_core,
-                       nfo.rem_port, nfo.rem_addr,
-                       nfo.loc_port, nfo.loc_addr);
+                                   nfo.rem_port, nfo.rem_addr,
+                                   nfo.loc_port, nfo.loc_addr);
 
                if (!cm_node) {
                        /* Only type of packet accepted are for */
@@ -2309,8 +2537,8 @@ static int mini_cm_recv_pkt(struct nes_cm_core *cm_core,
                                break;
                        }
                        listener = find_listener(cm_core, nfo.loc_addr,
-                               nfo.loc_port,
-                               NES_CM_LISTENER_ACTIVE_STATE);
+                                                nfo.loc_port,
+                                                NES_CM_LISTENER_ACTIVE_STATE);
                        if (!listener) {
                                nfo.cm_id = NULL;
                                nfo.conn_type = 0;
@@ -2321,10 +2549,10 @@ static int mini_cm_recv_pkt(struct nes_cm_core *cm_core,
                        nfo.cm_id = listener->cm_id;
                        nfo.conn_type = listener->conn_type;
                        cm_node = make_cm_node(cm_core, nesvnic, &nfo,
-                               listener);
+                                              listener);
                        if (!cm_node) {
                                nes_debug(NES_DBG_CM, "Unable to allocate "
-                                       "node\n");
+                                         "node\n");
                                cm_packets_dropped++;
                                atomic_dec(&listener->ref_count);
                                dev_kfree_skb_any(skb);
@@ -2376,7 +2604,7 @@ static struct nes_cm_core *nes_cm_alloc_core(void)
        init_timer(&cm_core->tcp_timer);
        cm_core->tcp_timer.function = nes_cm_timer_tick;
 
-       cm_core->mtu   = NES_CM_DEFAULT_MTU;
+       cm_core->mtu = NES_CM_DEFAULT_MTU;
        cm_core->state = NES_CM_STATE_INITED;
        cm_core->free_tx_pkt_max = NES_CM_DEFAULT_FREE_PKTS;
 
@@ -2414,9 +2642,8 @@ static int mini_cm_dealloc_core(struct nes_cm_core *cm_core)
 
        barrier();
 
-       if (timer_pending(&cm_core->tcp_timer)) {
+       if (timer_pending(&cm_core->tcp_timer))
                del_timer(&cm_core->tcp_timer);
-       }
 
        destroy_workqueue(cm_core->event_wq);
        destroy_workqueue(cm_core->disconn_wq);
@@ -2471,8 +2698,8 @@ static int nes_cm_init_tsa_conn(struct nes_qp *nesqp, struct nes_cm_node *cm_nod
                return -EINVAL;
 
        nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_IPV4 |
-                       NES_QPCONTEXT_MISC_NO_NAGLE | NES_QPCONTEXT_MISC_DO_NOT_FRAG |
-                       NES_QPCONTEXT_MISC_DROS);
+                                                 NES_QPCONTEXT_MISC_NO_NAGLE | NES_QPCONTEXT_MISC_DO_NOT_FRAG |
+                                                 NES_QPCONTEXT_MISC_DROS);
 
        if (cm_node->tcp_cntxt.snd_wscale || cm_node->tcp_cntxt.rcv_wscale)
                nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_WSCALE);
@@ -2482,15 +2709,15 @@ static int nes_cm_init_tsa_conn(struct nes_qp *nesqp, struct nes_cm_node *cm_nod
        nesqp->nesqp_context->mss |= cpu_to_le32(((u32)cm_node->tcp_cntxt.mss) << 16);
 
        nesqp->nesqp_context->tcp_state_flow_label |= cpu_to_le32(
-                       (u32)NES_QPCONTEXT_TCPSTATE_EST << NES_QPCONTEXT_TCPFLOW_TCP_STATE_SHIFT);
+               (u32)NES_QPCONTEXT_TCPSTATE_EST << NES_QPCONTEXT_TCPFLOW_TCP_STATE_SHIFT);
 
        nesqp->nesqp_context->pd_index_wscale |= cpu_to_le32(
-                       (cm_node->tcp_cntxt.snd_wscale << NES_QPCONTEXT_PDWSCALE_SND_WSCALE_SHIFT) &
-                       NES_QPCONTEXT_PDWSCALE_SND_WSCALE_MASK);
+               (cm_node->tcp_cntxt.snd_wscale << NES_QPCONTEXT_PDWSCALE_SND_WSCALE_SHIFT) &
+               NES_QPCONTEXT_PDWSCALE_SND_WSCALE_MASK);
 
        nesqp->nesqp_context->pd_index_wscale |= cpu_to_le32(
-                       (cm_node->tcp_cntxt.rcv_wscale << NES_QPCONTEXT_PDWSCALE_RCV_WSCALE_SHIFT) &
-                       NES_QPCONTEXT_PDWSCALE_RCV_WSCALE_MASK);
+               (cm_node->tcp_cntxt.rcv_wscale << NES_QPCONTEXT_PDWSCALE_RCV_WSCALE_SHIFT) &
+               NES_QPCONTEXT_PDWSCALE_RCV_WSCALE_MASK);
 
        nesqp->nesqp_context->keepalive = cpu_to_le32(0x80);
        nesqp->nesqp_context->ts_recent = 0;
@@ -2499,24 +2726,24 @@ static int nes_cm_init_tsa_conn(struct nes_qp *nesqp, struct nes_cm_node *cm_nod
        nesqp->nesqp_context->snd_wnd = cpu_to_le32(cm_node->tcp_cntxt.snd_wnd);
        nesqp->nesqp_context->rcv_nxt = cpu_to_le32(cm_node->tcp_cntxt.rcv_nxt);
        nesqp->nesqp_context->rcv_wnd = cpu_to_le32(cm_node->tcp_cntxt.rcv_wnd <<
-                       cm_node->tcp_cntxt.rcv_wscale);
+                                                   cm_node->tcp_cntxt.rcv_wscale);
        nesqp->nesqp_context->snd_max = cpu_to_le32(cm_node->tcp_cntxt.loc_seq_num);
        nesqp->nesqp_context->snd_una = cpu_to_le32(cm_node->tcp_cntxt.loc_seq_num);
        nesqp->nesqp_context->srtt = 0;
        nesqp->nesqp_context->rttvar = cpu_to_le32(0x6);
        nesqp->nesqp_context->ssthresh = cpu_to_le32(0x3FFFC000);
-       nesqp->nesqp_context->cwnd = cpu_to_le32(2*cm_node->tcp_cntxt.mss);
+       nesqp->nesqp_context->cwnd = cpu_to_le32(2 * cm_node->tcp_cntxt.mss);
        nesqp->nesqp_context->snd_wl1 = cpu_to_le32(cm_node->tcp_cntxt.rcv_nxt);
        nesqp->nesqp_context->snd_wl2 = cpu_to_le32(cm_node->tcp_cntxt.loc_seq_num);
        nesqp->nesqp_context->max_snd_wnd = cpu_to_le32(cm_node->tcp_cntxt.max_snd_wnd);
 
        nes_debug(NES_DBG_CM, "QP%u: rcv_nxt = 0x%08X, snd_nxt = 0x%08X,"
-                       " Setting MSS to %u, PDWscale = 0x%08X, rcv_wnd = %u, context misc = 0x%08X.\n",
-                       nesqp->hwqp.qp_id, le32_to_cpu(nesqp->nesqp_context->rcv_nxt),
-                       le32_to_cpu(nesqp->nesqp_context->snd_nxt),
-                       cm_node->tcp_cntxt.mss, le32_to_cpu(nesqp->nesqp_context->pd_index_wscale),
-                       le32_to_cpu(nesqp->nesqp_context->rcv_wnd),
-                       le32_to_cpu(nesqp->nesqp_context->misc));
+                 " Setting MSS to %u, PDWscale = 0x%08X, rcv_wnd = %u, context misc = 0x%08X.\n",
+                 nesqp->hwqp.qp_id, le32_to_cpu(nesqp->nesqp_context->rcv_nxt),
+                 le32_to_cpu(nesqp->nesqp_context->snd_nxt),
+                 cm_node->tcp_cntxt.mss, le32_to_cpu(nesqp->nesqp_context->pd_index_wscale),
+                 le32_to_cpu(nesqp->nesqp_context->rcv_wnd),
+                 le32_to_cpu(nesqp->nesqp_context->misc));
        nes_debug(NES_DBG_CM, "  snd_wnd  = 0x%08X.\n", le32_to_cpu(nesqp->nesqp_context->snd_wnd));
        nes_debug(NES_DBG_CM, "  snd_cwnd = 0x%08X.\n", le32_to_cpu(nesqp->nesqp_context->cwnd));
        nes_debug(NES_DBG_CM, "  max_swnd = 0x%08X.\n", le32_to_cpu(nesqp->nesqp_context->max_snd_wnd));
@@ -2537,7 +2764,7 @@ int nes_cm_disconn(struct nes_qp *nesqp)
 
        work = kzalloc(sizeof *work, GFP_ATOMIC);
        if (!work)
-               return -ENOMEM; /* Timer will clean up */
+               return -ENOMEM;  /* Timer will clean up */
 
        nes_add_ref(&nesqp->ibqp);
        work->nesqp = nesqp;
@@ -2557,7 +2784,7 @@ static void nes_disconnect_worker(struct work_struct *work)
 
        kfree(dwork);
        nes_debug(NES_DBG_CM, "processing AEQE id 0x%04X for QP%u.\n",
-                       nesqp->last_aeq, nesqp->hwqp.qp_id);
+                 nesqp->last_aeq, nesqp->hwqp.qp_id);
        nes_cm_disconn_true(nesqp);
        nes_rem_ref(&nesqp->ibqp);
 }
@@ -2593,7 +2820,7 @@ static int nes_cm_disconn_true(struct nes_qp *nesqp)
        /* make sure we havent already closed this connection */
        if (!cm_id) {
                nes_debug(NES_DBG_CM, "QP%u disconnect_worker cmid is NULL\n",
-                               nesqp->hwqp.qp_id);
+                         nesqp->hwqp.qp_id);
                spin_unlock_irqrestore(&nesqp->lock, flags);
                return -1;
        }
@@ -2602,7 +2829,7 @@ static int nes_cm_disconn_true(struct nes_qp *nesqp)
        nes_debug(NES_DBG_CM, "Disconnecting QP%u\n", nesqp->hwqp.qp_id);
 
        original_hw_tcp_state = nesqp->hw_tcp_state;
-       original_ibqp_state   = nesqp->ibqp_state;
+       original_ibqp_state = nesqp->ibqp_state;
        last_ae = nesqp->last_aeq;
 
        if (nesqp->term_flags) {
@@ -2660,16 +2887,16 @@ static int nes_cm_disconn_true(struct nes_qp *nesqp)
                        cm_event.private_data_len = 0;
 
                        nes_debug(NES_DBG_CM, "Generating a CM Disconnect Event"
-                               " for  QP%u, SQ Head = %u, SQ Tail = %u. "
-                               "cm_id = %p, refcount = %u.\n",
-                               nesqp->hwqp.qp_id, nesqp->hwqp.sq_head,
-                               nesqp->hwqp.sq_tail, cm_id,
-                               atomic_read(&nesqp->refcount));
+                                 " for  QP%u, SQ Head = %u, SQ Tail = %u. "
+                                 "cm_id = %p, refcount = %u.\n",
+                                 nesqp->hwqp.qp_id, nesqp->hwqp.sq_head,
+                                 nesqp->hwqp.sq_tail, cm_id,
+                                 atomic_read(&nesqp->refcount));
 
                        ret = cm_id->event_handler(cm_id, &cm_event);
                        if (ret)
                                nes_debug(NES_DBG_CM, "OFA CM event_handler "
-                                       "returned, ret=%d\n", ret);
+                                         "returned, ret=%d\n", ret);
                }
 
                if (issue_close) {
@@ -2687,9 +2914,8 @@ static int nes_cm_disconn_true(struct nes_qp *nesqp)
                        cm_event.private_data_len = 0;
 
                        ret = cm_id->event_handler(cm_id, &cm_event);
-                       if (ret) {
+                       if (ret)
                                nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret);
-                       }
 
                        cm_id->rem_ref(cm_id);
                }
@@ -2729,8 +2955,8 @@ static int nes_disconnect(struct nes_qp *nesqp, int abrupt)
                        if (nesqp->lsmm_mr)
                                nesibdev->ibdev.dereg_mr(nesqp->lsmm_mr);
                        pci_free_consistent(nesdev->pcidev,
-                                       nesqp->private_data_len+sizeof(struct ietf_mpa_frame),
-                                       nesqp->ietf_frame, nesqp->ietf_frame_pbase);
+                                           nesqp->private_data_len + nesqp->ietf_frame_size,
+                                           nesqp->ietf_frame, nesqp->ietf_frame_pbase);
                }
        }
 
@@ -2769,6 +2995,12 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
        struct ib_phys_buf ibphysbuf;
        struct nes_pd *nespd;
        u64 tagged_offset;
+       u8 mpa_frame_offset = 0;
+       struct ietf_mpa_v2 *mpa_v2_frame;
+       u8 start_addr = 0;
+       u8 *start_ptr = &start_addr;
+       u8 **start_buff = &start_ptr;
+       u16 buff_len = 0;
 
        ibqp = nes_get_qp(cm_id->device, conn_param->qpn);
        if (!ibqp)
@@ -2809,53 +3041,49 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
        nes_debug(NES_DBG_CM, "netdev refcnt = %u.\n",
                        netdev_refcnt_read(nesvnic->netdev));
 
+       nesqp->ietf_frame_size = sizeof(struct ietf_mpa_v2);
        /* allocate the ietf frame and space for private data */
        nesqp->ietf_frame = pci_alloc_consistent(nesdev->pcidev,
-               sizeof(struct ietf_mpa_frame) + conn_param->private_data_len,
-               &nesqp->ietf_frame_pbase);
+                                                nesqp->ietf_frame_size + conn_param->private_data_len,
+                                                &nesqp->ietf_frame_pbase);
 
        if (!nesqp->ietf_frame) {
-               nes_debug(NES_DBG_CM, "Unable to allocate memory for private "
-                       "data\n");
+               nes_debug(NES_DBG_CM, "Unable to allocate memory for private data\n");
                return -ENOMEM;
        }
+       mpa_v2_frame = (struct ietf_mpa_v2 *)nesqp->ietf_frame;
 
+       if (cm_node->mpa_frame_rev == IETF_MPA_V1)
+               mpa_frame_offset = 4;
 
-       /* setup the MPA frame */
-       nesqp->private_data_len = conn_param->private_data_len;
-       memcpy(nesqp->ietf_frame->key, IEFT_MPA_KEY_REP, IETF_MPA_KEY_SIZE);
-
-       memcpy(nesqp->ietf_frame->priv_data, conn_param->private_data,
-                       conn_param->private_data_len);
+       memcpy(mpa_v2_frame->priv_data, conn_param->private_data,
+              conn_param->private_data_len);
 
-       nesqp->ietf_frame->priv_data_len =
-               cpu_to_be16(conn_param->private_data_len);
-       nesqp->ietf_frame->rev = mpa_version;
-       nesqp->ietf_frame->flags = IETF_MPA_FLAGS_CRC;
+       cm_build_mpa_frame(cm_node, start_buff, &buff_len, nesqp->ietf_frame, MPA_KEY_REPLY);
+       nesqp->private_data_len = conn_param->private_data_len;
 
        /* setup our first outgoing iWarp send WQE (the IETF frame response) */
        wqe = &nesqp->hwqp.sq_vbase[0];
 
        if (cm_id->remote_addr.sin_addr.s_addr !=
-                       cm_id->local_addr.sin_addr.s_addr) {
+           cm_id->local_addr.sin_addr.s_addr) {
                u64temp = (unsigned long)nesqp;
                nesibdev = nesvnic->nesibdev;
                nespd = nesqp->nespd;
-               ibphysbuf.addr = nesqp->ietf_frame_pbase;
-               ibphysbuf.size = conn_param->private_data_len +
-                                       sizeof(struct ietf_mpa_frame);
-               tagged_offset = (u64)(unsigned long)nesqp->ietf_frame;
+               ibphysbuf.addr = nesqp->ietf_frame_pbase + mpa_frame_offset;
+               ibphysbuf.size = buff_len;
+               tagged_offset = (u64)(unsigned long)*start_buff;
                ibmr = nesibdev->ibdev.reg_phys_mr((struct ib_pd *)nespd,
-                                               &ibphysbuf, 1,
-                                               IB_ACCESS_LOCAL_WRITE,
-                                               &tagged_offset);
+                                                  &ibphysbuf, 1,
+                                                  IB_ACCESS_LOCAL_WRITE,
+                                                  &tagged_offset);
                if (!ibmr) {
                        nes_debug(NES_DBG_CM, "Unable to register memory region"
-                                       "for lSMM for cm_node = %p \n",
-                                       cm_node);
+                                 "for lSMM for cm_node = %p \n",
+                                 cm_node);
                        pci_free_consistent(nesdev->pcidev,
-                               nesqp->private_data_len+sizeof(struct ietf_mpa_frame),
-                               nesqp->ietf_frame, nesqp->ietf_frame_pbase);
+                                           nesqp->private_data_len + nesqp->ietf_frame_size,
+                                           nesqp->ietf_frame, nesqp->ietf_frame_pbase);
                        return -ENOMEM;
                }
 
@@ -2863,22 +3091,20 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
                ibmr->device = nespd->ibpd.device;
                nesqp->lsmm_mr = ibmr;
 
-               u64temp |= NES_SW_CONTEXT_ALIGN>>1;
+               u64temp |= NES_SW_CONTEXT_ALIGN >> 1;
                set_wqe_64bit_value(wqe->wqe_words,
-                       NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX,
-                       u64temp);
+                                   NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX,
+                                   u64temp);
                wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] =
                        cpu_to_le32(NES_IWARP_SQ_WQE_STREAMING |
-                       NES_IWARP_SQ_WQE_WRPDU);
+                                   NES_IWARP_SQ_WQE_WRPDU);
                wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX] =
-                       cpu_to_le32(conn_param->private_data_len +
-                       sizeof(struct ietf_mpa_frame));
+                       cpu_to_le32(buff_len);
                set_wqe_64bit_value(wqe->wqe_words,
-                                       NES_IWARP_SQ_WQE_FRAG0_LOW_IDX,
-                                       (u64)(unsigned long)nesqp->ietf_frame);
+                                   NES_IWARP_SQ_WQE_FRAG0_LOW_IDX,
+                                   (u64)(unsigned long)(*start_buff));
                wqe->wqe_words[NES_IWARP_SQ_WQE_LENGTH0_IDX] =
-                       cpu_to_le32(conn_param->private_data_len +
-                       sizeof(struct ietf_mpa_frame));
+                       cpu_to_le32(buff_len);
                wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = ibmr->lkey;
                if (nesqp->sq_kmapped) {
                        nesqp->sq_kmapped = 0;
@@ -2887,7 +3113,7 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
 
                nesqp->nesqp_context->ird_ord_sizes |=
                        cpu_to_le32(NES_QPCONTEXT_ORDIRD_LSMM_PRESENT |
-                       NES_QPCONTEXT_ORDIRD_WRPDU);
+                                   NES_QPCONTEXT_ORDIRD_WRPDU);
        } else {
                nesqp->nesqp_context->ird_ord_sizes |=
                        cpu_to_le32(NES_QPCONTEXT_ORDIRD_WRPDU);
@@ -2901,11 +3127,11 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
 
        /*  nesqp->cm_node = (void *)cm_id->provider_data; */
        cm_id->provider_data = nesqp;
-       nesqp->active_conn   = 0;
+       nesqp->active_conn = 0;
 
        if (cm_node->state == NES_CM_STATE_TSA)
                nes_debug(NES_DBG_CM, "Already state = TSA for cm_node=%p\n",
-                       cm_node);
+                         cm_node);
 
        nes_cm_init_tsa_conn(nesqp, cm_node);
 
@@ -2922,13 +3148,13 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
                        cpu_to_le32(ntohl(cm_id->remote_addr.sin_addr.s_addr));
 
        nesqp->nesqp_context->misc2 |= cpu_to_le32(
-                       (u32)PCI_FUNC(nesdev->pcidev->devfn) <<
-                       NES_QPCONTEXT_MISC2_SRC_IP_SHIFT);
+               (u32)PCI_FUNC(nesdev->pcidev->devfn) <<
+               NES_QPCONTEXT_MISC2_SRC_IP_SHIFT);
 
        nesqp->nesqp_context->arp_index_vlan |=
                cpu_to_le32(nes_arp_table(nesdev,
-                       le32_to_cpu(nesqp->nesqp_context->ip0), NULL,
-                       NES_ARP_RESOLVE) << 16);
+                                         le32_to_cpu(nesqp->nesqp_context->ip0), NULL,
+                                         NES_ARP_RESOLVE) << 16);
 
        nesqp->nesqp_context->ts_val_delta = cpu_to_le32(
                jiffies - nes_read_indexed(nesdev, NES_IDX_TCP_NOW));
@@ -2954,7 +3180,7 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
        crc_value = get_crc_value(&nes_quad);
        nesqp->hte_index = cpu_to_be32(crc_value ^ 0xffffffff);
        nes_debug(NES_DBG_CM, "HTE Index = 0x%08X, CRC = 0x%08X\n",
-               nesqp->hte_index, nesqp->hte_index & adapter->hte_index_mask);
+                 nesqp->hte_index, nesqp->hte_index & adapter->hte_index_mask);
 
        nesqp->hte_index &= adapter->hte_index_mask;
        nesqp->nesqp_context->hte_index = cpu_to_le32(nesqp->hte_index);
@@ -2962,17 +3188,15 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
        cm_node->cm_core->api->accelerated(cm_node->cm_core, cm_node);
 
        nes_debug(NES_DBG_CM, "QP%u, Destination IP = 0x%08X:0x%04X, local = "
-                       "0x%08X:0x%04X, rcv_nxt=0x%08X, snd_nxt=0x%08X, mpa + "
-                       "private data length=%zu.\n", nesqp->hwqp.qp_id,
-                       ntohl(cm_id->remote_addr.sin_addr.s_addr),
-                       ntohs(cm_id->remote_addr.sin_port),
-                       ntohl(cm_id->local_addr.sin_addr.s_addr),
-                       ntohs(cm_id->local_addr.sin_port),
-                       le32_to_cpu(nesqp->nesqp_context->rcv_nxt),
-                       le32_to_cpu(nesqp->nesqp_context->snd_nxt),
-                       conn_param->private_data_len +
-                       sizeof(struct ietf_mpa_frame));
-
+                 "0x%08X:0x%04X, rcv_nxt=0x%08X, snd_nxt=0x%08X, mpa + "
+                 "private data length=%u.\n", nesqp->hwqp.qp_id,
+                 ntohl(cm_id->remote_addr.sin_addr.s_addr),
+                 ntohs(cm_id->remote_addr.sin_port),
+                 ntohl(cm_id->local_addr.sin_addr.s_addr),
+                 ntohs(cm_id->local_addr.sin_port),
+                 le32_to_cpu(nesqp->nesqp_context->rcv_nxt),
+                 le32_to_cpu(nesqp->nesqp_context->snd_nxt),
+                 buff_len);
 
        /* notify OF layer that accept event was successful */
        cm_id->add_ref(cm_id);
@@ -2993,12 +3217,12 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
                        nesqp->private_data_len;
                /* copy entire MPA frame to our cm_node's frame */
                memcpy(cm_node->loopbackpartner->mpa_frame_buf,
-                       nesqp->ietf_frame->priv_data, nesqp->private_data_len);
+                      conn_param->private_data, conn_param->private_data_len);
                create_event(cm_node->loopbackpartner, NES_CM_EVENT_CONNECTED);
        }
        if (ret)
                printk(KERN_ERR "%s[%u] OFA CM event_handler returned, "
-                       "ret=%d\n", __func__, __LINE__, ret);
+                      "ret=%d\n", __func__, __LINE__, ret);
 
        return 0;
 }
@@ -3011,34 +3235,28 @@ int nes_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
 {
        struct nes_cm_node *cm_node;
        struct nes_cm_node *loopback;
-
        struct nes_cm_core *cm_core;
+       u8 *start_buff;
 
        atomic_inc(&cm_rejects);
-       cm_node = (struct nes_cm_node *) cm_id->provider_data;
+       cm_node = (struct nes_cm_node *)cm_id->provider_data;
        loopback = cm_node->loopbackpartner;
        cm_core = cm_node->cm_core;
        cm_node->cm_id = cm_id;
-       cm_node->mpa_frame_size = sizeof(struct ietf_mpa_frame) + pdata_len;
 
-       if (cm_node->mpa_frame_size > MAX_CM_BUFFER)
+       if (pdata_len + sizeof(struct ietf_mpa_v2) > MAX_CM_BUFFER)
                return -EINVAL;
 
-       memcpy(&cm_node->mpa_frame.key[0], IEFT_MPA_KEY_REP, IETF_MPA_KEY_SIZE);
        if (loopback) {
                memcpy(&loopback->mpa_frame.priv_data, pdata, pdata_len);
                loopback->mpa_frame.priv_data_len = pdata_len;
-               loopback->mpa_frame_size = sizeof(struct ietf_mpa_frame) +
-                               pdata_len;
+               loopback->mpa_frame_size = pdata_len;
        } else {
-               memcpy(&cm_node->mpa_frame.priv_data, pdata, pdata_len);
-               cm_node->mpa_frame.priv_data_len = cpu_to_be16(pdata_len);
+               start_buff = &cm_node->mpa_frame_buf[0] + sizeof(struct ietf_mpa_v2);
+               cm_node->mpa_frame_size = pdata_len;
+               memcpy(start_buff, pdata, pdata_len);
        }
-
-       cm_node->mpa_frame.rev = mpa_version;
-       cm_node->mpa_frame.flags = IETF_MPA_FLAGS_CRC | IETF_MPA_FLAGS_REJECT;
-
-       return cm_core->api->reject(cm_core, &cm_node->mpa_frame, cm_node);
+       return cm_core->api->reject(cm_core, cm_node);
 }
 
 
@@ -3065,7 +3283,7 @@ int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
        nesvnic = to_nesvnic(nesqp->ibqp.device);
        if (!nesvnic)
                return -EINVAL;
-       nesdev  = nesvnic->nesdev;
+       nesdev = nesvnic->nesdev;
        if (!nesdev)
                return -EINVAL;
 
@@ -3073,12 +3291,12 @@ int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
                return -EINVAL;
 
        nes_debug(NES_DBG_CM, "QP%u, current IP = 0x%08X, Destination IP = "
-               "0x%08X:0x%04X, local = 0x%08X:0x%04X.\n", nesqp->hwqp.qp_id,
-               ntohl(nesvnic->local_ipaddr),
-               ntohl(cm_id->remote_addr.sin_addr.s_addr),
-               ntohs(cm_id->remote_addr.sin_port),
-               ntohl(cm_id->local_addr.sin_addr.s_addr),
-               ntohs(cm_id->local_addr.sin_port));
+                 "0x%08X:0x%04X, local = 0x%08X:0x%04X.\n", nesqp->hwqp.qp_id,
+                 ntohl(nesvnic->local_ipaddr),
+                 ntohl(cm_id->remote_addr.sin_addr.s_addr),
+                 ntohs(cm_id->remote_addr.sin_port),
+                 ntohl(cm_id->local_addr.sin_addr.s_addr),
+                 ntohs(cm_id->local_addr.sin_port));
 
        atomic_inc(&cm_connects);
        nesqp->active_conn = 1;
@@ -3092,12 +3310,12 @@ int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
        nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32((u32)conn_param->ord);
        nes_debug(NES_DBG_CM, "requested ord = 0x%08X.\n", (u32)conn_param->ord);
        nes_debug(NES_DBG_CM, "mpa private data len =%u\n",
-               conn_param->private_data_len);
+                 conn_param->private_data_len);
 
        if (cm_id->local_addr.sin_addr.s_addr !=
-               cm_id->remote_addr.sin_addr.s_addr) {
+           cm_id->remote_addr.sin_addr.s_addr) {
                nes_manage_apbvt(nesvnic, ntohs(cm_id->local_addr.sin_port),
-                       PCI_FUNC(nesdev->pcidev->devfn), NES_MANAGE_APBVT_ADD);
+                                PCI_FUNC(nesdev->pcidev->devfn), NES_MANAGE_APBVT_ADD);
                apbvt_set = 1;
        }
 
@@ -3113,13 +3331,13 @@ int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
 
        /* create a connect CM node connection */
        cm_node = g_cm_core->api->connect(g_cm_core, nesvnic,
-               conn_param->private_data_len, (void *)conn_param->private_data,
-               &cm_info);
+                                         conn_param->private_data_len, (void *)conn_param->private_data,
+                                         &cm_info);
        if (!cm_node) {
                if (apbvt_set)
                        nes_manage_apbvt(nesvnic, ntohs(cm_id->local_addr.sin_port),
-                               PCI_FUNC(nesdev->pcidev->devfn),
-                               NES_MANAGE_APBVT_DEL);
+                                        PCI_FUNC(nesdev->pcidev->devfn),
+                                        NES_MANAGE_APBVT_DEL);
 
                cm_id->rem_ref(cm_id);
                return -ENOMEM;
@@ -3169,7 +3387,7 @@ int nes_create_listen(struct iw_cm_id *cm_id, int backlog)
        cm_node = g_cm_core->api->listen(g_cm_core, nesvnic, &cm_info);
        if (!cm_node) {
                printk(KERN_ERR "%s[%u] Error returned from listen API call\n",
-                               __func__, __LINE__);
+                      __func__, __LINE__);
                return -ENOMEM;
        }
 
@@ -3177,12 +3395,12 @@ int nes_create_listen(struct iw_cm_id *cm_id, int backlog)
 
        if (!cm_node->reused_node) {
                err = nes_manage_apbvt(nesvnic,
-                       ntohs(cm_id->local_addr.sin_port),
-                       PCI_FUNC(nesvnic->nesdev->pcidev->devfn),
-                       NES_MANAGE_APBVT_ADD);
+                                      ntohs(cm_id->local_addr.sin_port),
+                                      PCI_FUNC(nesvnic->nesdev->pcidev->devfn),
+                                      NES_MANAGE_APBVT_ADD);
                if (err) {
                        printk(KERN_ERR "nes_manage_apbvt call returned %d.\n",
-                               err);
+                              err);
                        g_cm_core->api->stop_listener(g_cm_core, (void *)cm_node);
                        return err;
                }
@@ -3219,13 +3437,13 @@ int nes_destroy_listen(struct iw_cm_id *cm_id)
 int nes_cm_recv(struct sk_buff *skb, struct net_device *netdevice)
 {
        int rc = 0;
+
        cm_packets_received++;
-       if ((g_cm_core) && (g_cm_core->api)) {
+       if ((g_cm_core) && (g_cm_core->api))
                rc = g_cm_core->api->recv_pkt(g_cm_core, netdev_priv(netdevice), skb);
-       } else {
+       else
                nes_debug(NES_DBG_CM, "Unable to process packet for CM,"
-                               " cm is not setup properly.\n");
-       }
+                         " cm is not setup properly.\n");
 
        return rc;
 }
@@ -3240,11 +3458,10 @@ int nes_cm_start(void)
        nes_debug(NES_DBG_CM, "\n");
        /* create the primary CM core, pass this handle to subsequent core inits */
        g_cm_core = nes_cm_alloc_core();
-       if (g_cm_core) {
+       if (g_cm_core)
                return 0;
-       } else {
+       else
                return -ENOMEM;
-       }
 }
 
 
@@ -3265,7 +3482,6 @@ int nes_cm_stop(void)
  */
 static void cm_event_connected(struct nes_cm_event *event)
 {
-       u64 u64temp;
        struct nes_qp *nesqp;
        struct nes_vnic *nesvnic;
        struct nes_device *nesdev;
@@ -3274,7 +3490,6 @@ static void cm_event_connected(struct nes_cm_event *event)
        struct ib_qp_attr attr;
        struct iw_cm_id *cm_id;
        struct iw_cm_event cm_event;
-       struct nes_hw_qp_wqe *wqe;
        struct nes_v4_quad nes_quad;
        u32 crc_value;
        int ret;
@@ -3288,17 +3503,16 @@ static void cm_event_connected(struct nes_cm_event *event)
        nesdev = nesvnic->nesdev;
        nesadapter = nesdev->nesadapter;
 
-       if (nesqp->destroyed) {
+       if (nesqp->destroyed)
                return;
-       }
        atomic_inc(&cm_connecteds);
        nes_debug(NES_DBG_CM, "QP%u attempting to connect to  0x%08X:0x%04X on"
-                       " local port 0x%04X. jiffies = %lu.\n",
-                       nesqp->hwqp.qp_id,
-                       ntohl(cm_id->remote_addr.sin_addr.s_addr),
-                       ntohs(cm_id->remote_addr.sin_port),
-                       ntohs(cm_id->local_addr.sin_port),
-                       jiffies);
+                 " local port 0x%04X. jiffies = %lu.\n",
+                 nesqp->hwqp.qp_id,
+                 ntohl(cm_id->remote_addr.sin_addr.s_addr),
+                 ntohs(cm_id->remote_addr.sin_port),
+                 ntohs(cm_id->local_addr.sin_port),
+                 jiffies);
 
        nes_cm_init_tsa_conn(nesqp, cm_node);
 
@@ -3329,40 +3543,12 @@ static void cm_event_connected(struct nes_cm_event *event)
                        NES_QPCONTEXT_ORDIRD_IWARP_MODE_SHIFT);
 
        /* Adjust tail for not having a LSMM */
-       nesqp->hwqp.sq_tail = 1;
-
-#if defined(NES_SEND_FIRST_WRITE)
-       if (cm_node->send_write0) {
-               nes_debug(NES_DBG_CM, "Sending first write.\n");
-               wqe = &nesqp->hwqp.sq_vbase[0];
-               u64temp = (unsigned long)nesqp;
-               u64temp |= NES_SW_CONTEXT_ALIGN>>1;
-               set_wqe_64bit_value(wqe->wqe_words,
-                               NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX, u64temp);
-               wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] =
-                       cpu_to_le32(NES_IWARP_SQ_OP_RDMAW);
-               wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX] = 0;
-               wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_LOW_IDX] = 0;
-               wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_HIGH_IDX] = 0;
-               wqe->wqe_words[NES_IWARP_SQ_WQE_LENGTH0_IDX] = 0;
-               wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = 0;
+       /*nesqp->hwqp.sq_tail = 1;*/
 
-               if (nesqp->sq_kmapped) {
-                       nesqp->sq_kmapped = 0;
-                       kunmap(nesqp->page);
-               }
+       build_rdma0_msg(cm_node, &nesqp);
 
-               /* use the reserved spot on the WQ for the extra first WQE */
-               nesqp->nesqp_context->ird_ord_sizes &=
-                       cpu_to_le32(~(NES_QPCONTEXT_ORDIRD_LSMM_PRESENT |
-                                               NES_QPCONTEXT_ORDIRD_WRPDU |
-                                               NES_QPCONTEXT_ORDIRD_ALSMM));
-               nesqp->skip_lsmm = 1;
-               nesqp->hwqp.sq_tail = 0;
-               nes_write32(nesdev->regs + NES_WQE_ALLOC,
-                               (1 << 24) | 0x00800000 | nesqp->hwqp.qp_id);
-       }
-#endif
+       nes_write32(nesdev->regs + NES_WQE_ALLOC,
+                   (1 << 24) | 0x00800000 | nesqp->hwqp.qp_id);
 
        memset(&nes_quad, 0, sizeof(nes_quad));
 
@@ -3379,13 +3565,13 @@ static void cm_event_connected(struct nes_cm_event *event)
        crc_value = get_crc_value(&nes_quad);
        nesqp->hte_index = cpu_to_be32(crc_value ^ 0xffffffff);
        nes_debug(NES_DBG_CM, "HTE Index = 0x%08X, After CRC = 0x%08X\n",
-                       nesqp->hte_index, nesqp->hte_index & nesadapter->hte_index_mask);
+                 nesqp->hte_index, nesqp->hte_index & nesadapter->hte_index_mask);
 
        nesqp->hte_index &= nesadapter->hte_index_mask;
        nesqp->nesqp_context->hte_index = cpu_to_le32(nesqp->hte_index);
 
        nesqp->ietf_frame = &cm_node->mpa_frame;
-       nesqp->private_data_len = (u8) cm_node->mpa_frame_size;
+       nesqp->private_data_len = (u8)cm_node->mpa_frame_size;
        cm_node->cm_core->api->accelerated(cm_node->cm_core, cm_node);
 
        /* notify OF layer we successfully created the requested connection */
@@ -3397,7 +3583,9 @@ static void cm_event_connected(struct nes_cm_event *event)
        cm_event.remote_addr = cm_id->remote_addr;
 
        cm_event.private_data = (void *)event->cm_node->mpa_frame_buf;
-       cm_event.private_data_len = (u8) event->cm_node->mpa_frame_size;
+       cm_event.private_data_len = (u8)event->cm_node->mpa_frame_size;
+       cm_event.ird = cm_node->ird_size;
+       cm_event.ord = cm_node->ord_size;
 
        cm_event.local_addr.sin_addr.s_addr = event->cm_info.rem_addr;
        ret = cm_id->event_handler(cm_id, &cm_event);
@@ -3405,12 +3593,12 @@ static void cm_event_connected(struct nes_cm_event *event)
 
        if (ret)
                printk(KERN_ERR "%s[%u] OFA CM event_handler returned, "
-                       "ret=%d\n", __func__, __LINE__, ret);
+                      "ret=%d\n", __func__, __LINE__, ret);
        attr.qp_state = IB_QPS_RTS;
        nes_modify_qp(&nesqp->ibqp, &attr, IB_QP_STATE, NULL);
 
        nes_debug(NES_DBG_CM, "Exiting connect thread for QP%u. jiffies = "
-               "%lu\n", nesqp->hwqp.qp_id, jiffies);
+                 "%lu\n", nesqp->hwqp.qp_id, jiffies);
 
        return;
 }
@@ -3431,16 +3619,14 @@ static void cm_event_connect_error(struct nes_cm_event *event)
                return;
 
        cm_id = event->cm_node->cm_id;
-       if (!cm_id) {
+       if (!cm_id)
                return;
-       }
 
        nes_debug(NES_DBG_CM, "cm_node=%p, cm_id=%p\n", event->cm_node, cm_id);
        nesqp = cm_id->provider_data;
 
-       if (!nesqp) {
+       if (!nesqp)
                return;
-       }
 
        /* notify OF layer about this connection error event */
        /* cm_id->rem_ref(cm_id); */
@@ -3455,14 +3641,14 @@ static void cm_event_connect_error(struct nes_cm_event *event)
        cm_event.private_data_len = 0;
 
        nes_debug(NES_DBG_CM, "call CM_EVENT REJECTED, local_addr=%08x, "
-               "remove_addr=%08x\n", cm_event.local_addr.sin_addr.s_addr,
-               cm_event.remote_addr.sin_addr.s_addr);
+                 "remove_addr=%08x\n", cm_event.local_addr.sin_addr.s_addr,
+                 cm_event.remote_addr.sin_addr.s_addr);
 
        ret = cm_id->event_handler(cm_id, &cm_event);
        nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret);
        if (ret)
                printk(KERN_ERR "%s[%u] OFA CM event_handler returned, "
-                       "ret=%d\n", __func__, __LINE__, ret);
+                      "ret=%d\n", __func__, __LINE__, ret);
        cm_id->rem_ref(cm_id);
 
        rem_ref_cm_node(event->cm_node->cm_core, event->cm_node);
@@ -3532,7 +3718,7 @@ static void cm_event_reset(struct nes_cm_event *event)
  */
 static void cm_event_mpa_req(struct nes_cm_event *event)
 {
-       struct iw_cm_id   *cm_id;
+       struct iw_cm_id *cm_id;
        struct iw_cm_event cm_event;
        int ret;
        struct nes_cm_node *cm_node;
@@ -3544,7 +3730,7 @@ static void cm_event_mpa_req(struct nes_cm_event *event)
 
        atomic_inc(&cm_connect_reqs);
        nes_debug(NES_DBG_CM, "cm_node = %p - cm_id = %p, jiffies = %lu\n",
-                       cm_node, cm_id, jiffies);
+                 cm_node, cm_id, jiffies);
 
        cm_event.event = IW_CM_EVENT_CONNECT_REQUEST;
        cm_event.status = 0;
@@ -3558,19 +3744,21 @@ static void cm_event_mpa_req(struct nes_cm_event *event)
        cm_event.remote_addr.sin_port = htons(event->cm_info.rem_port);
        cm_event.remote_addr.sin_addr.s_addr = htonl(event->cm_info.rem_addr);
        cm_event.private_data = cm_node->mpa_frame_buf;
-       cm_event.private_data_len  = (u8) cm_node->mpa_frame_size;
+       cm_event.private_data_len = (u8)cm_node->mpa_frame_size;
+       cm_event.ird = cm_node->ird_size;
+       cm_event.ord = cm_node->ord_size;
 
        ret = cm_id->event_handler(cm_id, &cm_event);
        if (ret)
                printk(KERN_ERR "%s[%u] OFA CM event_handler returned, ret=%d\n",
-                               __func__, __LINE__, ret);
+                      __func__, __LINE__, ret);
        return;
 }
 
 
 static void cm_event_mpa_reject(struct nes_cm_event *event)
 {
-       struct iw_cm_id   *cm_id;
+       struct iw_cm_id *cm_id;
        struct iw_cm_event cm_event;
        struct nes_cm_node *cm_node;
        int ret;
@@ -3582,7 +3770,7 @@ static void cm_event_mpa_reject(struct nes_cm_event *event)
 
        atomic_inc(&cm_connect_reqs);
        nes_debug(NES_DBG_CM, "cm_node = %p - cm_id = %p, jiffies = %lu\n",
-                       cm_node, cm_id, jiffies);
+                 cm_node, cm_id, jiffies);
 
        cm_event.event = IW_CM_EVENT_CONNECT_REPLY;
        cm_event.status = -ECONNREFUSED;
@@ -3597,17 +3785,17 @@ static void cm_event_mpa_reject(struct nes_cm_event *event)
        cm_event.remote_addr.sin_addr.s_addr = htonl(event->cm_info.rem_addr);
 
        cm_event.private_data = cm_node->mpa_frame_buf;
-       cm_event.private_data_len = (u8) cm_node->mpa_frame_size;
+       cm_event.private_data_len = (u8)cm_node->mpa_frame_size;
 
        nes_debug(NES_DBG_CM, "call CM_EVENT_MPA_REJECTED, local_addr=%08x, "
-                       "remove_addr=%08x\n",
-                       cm_event.local_addr.sin_addr.s_addr,
-                       cm_event.remote_addr.sin_addr.s_addr);
+                 "remove_addr=%08x\n",
+                 cm_event.local_addr.sin_addr.s_addr,
+                 cm_event.remote_addr.sin_addr.s_addr);
 
        ret = cm_id->event_handler(cm_id, &cm_event);
        if (ret)
                printk(KERN_ERR "%s[%u] OFA CM event_handler returned, ret=%d\n",
-                               __func__, __LINE__, ret);
+                      __func__, __LINE__, ret);
 
        return;
 }
@@ -3626,7 +3814,7 @@ static int nes_cm_post_event(struct nes_cm_event *event)
        event->cm_info.cm_id->add_ref(event->cm_info.cm_id);
        INIT_WORK(&event->event_work, nes_cm_event_handler);
        nes_debug(NES_DBG_CM, "cm_node=%p queue_work, event=%p\n",
-               event->cm_node, event);
+                 event->cm_node, event);
 
        queue_work(event->cm_node->cm_core->event_wq, &event->event_work);
 
@@ -3643,7 +3831,7 @@ static int nes_cm_post_event(struct nes_cm_event *event)
 static void nes_cm_event_handler(struct work_struct *work)
 {
        struct nes_cm_event *event = container_of(work, struct nes_cm_event,
-                       event_work);
+                                                 event_work);
        struct nes_cm_core *cm_core;
 
        if ((!event) || (!event->cm_node) || (!event->cm_node->cm_core))
@@ -3651,29 +3839,29 @@ static void nes_cm_event_handler(struct work_struct *work)
 
        cm_core = event->cm_node->cm_core;
        nes_debug(NES_DBG_CM, "event=%p, event->type=%u, events posted=%u\n",
-               event, event->type, atomic_read(&cm_core->events_posted));
+                 event, event->type, atomic_read(&cm_core->events_posted));
 
        switch (event->type) {
        case NES_CM_EVENT_MPA_REQ:
                cm_event_mpa_req(event);
                nes_debug(NES_DBG_CM, "cm_node=%p CM Event: MPA REQUEST\n",
-                       event->cm_node);
+                         event->cm_node);
                break;
        case NES_CM_EVENT_RESET:
                nes_debug(NES_DBG_CM, "cm_node = %p CM Event: RESET\n",
-                       event->cm_node);
+                         event->cm_node);
                cm_event_reset(event);
                break;
        case NES_CM_EVENT_CONNECTED:
                if ((!event->cm_node->cm_id) ||
-                       (event->cm_node->state != NES_CM_STATE_TSA))
+                   (event->cm_node->state != NES_CM_STATE_TSA))
                        break;
                cm_event_connected(event);
                nes_debug(NES_DBG_CM, "CM Event: CONNECTED\n");
                break;
        case NES_CM_EVENT_MPA_REJECT:
                if ((!event->cm_node->cm_id) ||
-                               (event->cm_node->state == NES_CM_STATE_TSA))
+                   (event->cm_node->state == NES_CM_STATE_TSA))
                        break;
                cm_event_mpa_reject(event);
                nes_debug(NES_DBG_CM, "CM Event: REJECT\n");
@@ -3681,7 +3869,7 @@ static void nes_cm_event_handler(struct work_struct *work)
 
        case NES_CM_EVENT_ABORTED:
                if ((!event->cm_node->cm_id) ||
-                       (event->cm_node->state == NES_CM_STATE_TSA))
+                   (event->cm_node->state == NES_CM_STATE_TSA))
                        break;
                cm_event_connect_error(event);
                nes_debug(NES_DBG_CM, "CM Event: ABORTED\n");
index 130c185..bdfa1fb 100644 (file)
 #define IETF_MPA_KEY_SIZE 16
 #define IETF_MPA_VERSION  1
 #define IETF_MAX_PRIV_DATA_LEN 512
-#define IETF_MPA_FRAME_SIZE     20
+#define IETF_MPA_FRAME_SIZE    20
+#define IETF_RTR_MSG_SIZE      4
+#define IETF_MPA_V2_FLAG       0x10
+
+/* IETF RTR MSG Fields               */
+#define IETF_PEER_TO_PEER       0x8000
+#define IETF_FLPDU_ZERO_LEN     0x4000
+#define IETF_RDMA0_WRITE        0x8000
+#define IETF_RDMA0_READ         0x4000
+#define IETF_NO_IRD_ORD         0x3FFF
 
 enum ietf_mpa_flags {
        IETF_MPA_FLAGS_MARKERS = 0x80,  /* receive Markers */
@@ -56,7 +65,7 @@ enum ietf_mpa_flags {
        IETF_MPA_FLAGS_REJECT  = 0x20,  /* Reject */
 };
 
-struct ietf_mpa_frame {
+struct ietf_mpa_v1 {
        u8 key[IETF_MPA_KEY_SIZE];
        u8 flags;
        u8 rev;
@@ -66,6 +75,20 @@ struct ietf_mpa_frame {
 
 #define ietf_mpa_req_resp_frame ietf_mpa_frame
 
+struct ietf_rtr_msg {
+       __be16 ctrl_ird;
+       __be16 ctrl_ord;
+};
+
+struct ietf_mpa_v2 {
+       u8 key[IETF_MPA_KEY_SIZE];
+       u8 flags;
+       u8 rev;
+        __be16 priv_data_len;
+       struct ietf_rtr_msg rtr_msg;
+       u8 priv_data[0];
+};
+
 struct nes_v4_quad {
        u32 rsvd0;
        __le32 DstIpAdrIndex;   /* Only most significant 5 bits are valid */
@@ -171,8 +194,7 @@ struct nes_timer_entry {
 
 #define NES_CM_DEF_SEQ2      0x18ed5740
 #define NES_CM_DEF_LOCAL_ID2 0xb807
-#define        MAX_CM_BUFFER   (IETF_MPA_FRAME_SIZE + IETF_MAX_PRIV_DATA_LEN)
-
+#define        MAX_CM_BUFFER   (IETF_MPA_FRAME_SIZE + IETF_RTR_MSG_SIZE + IETF_MAX_PRIV_DATA_LEN)
 
 typedef u32 nes_addr_t;
 
@@ -204,6 +226,21 @@ enum nes_cm_node_state {
        NES_CM_STATE_CLOSED
 };
 
+enum mpa_frame_version {
+       IETF_MPA_V1 = 1,
+       IETF_MPA_V2 = 2
+};
+
+enum mpa_frame_key {
+       MPA_KEY_REQUEST,
+       MPA_KEY_REPLY
+};
+
+enum send_rdma0 {
+       SEND_RDMA_READ_ZERO = 1,
+       SEND_RDMA_WRITE_ZERO = 2
+};
+
 enum nes_tcpip_pkt_type {
        NES_PKT_TYPE_UNKNOWN,
        NES_PKT_TYPE_SYN,
@@ -245,9 +282,9 @@ struct nes_cm_tcp_context {
 
 
 enum nes_cm_listener_state {
-       NES_CM_LISTENER_PASSIVE_STATE=1,
-       NES_CM_LISTENER_ACTIVE_STATE=2,
-       NES_CM_LISTENER_EITHER_STATE=3
+       NES_CM_LISTENER_PASSIVE_STATE = 1,
+       NES_CM_LISTENER_ACTIVE_STATE = 2,
+       NES_CM_LISTENER_EITHER_STATE = 3
 };
 
 struct nes_cm_listener {
@@ -283,16 +320,20 @@ struct nes_cm_node {
 
        struct nes_cm_node        *loopbackpartner;
 
-       struct nes_timer_entry  *send_entry;
-
+       struct nes_timer_entry    *send_entry;
+       struct nes_timer_entry    *recv_entry;
        spinlock_t                retrans_list_lock;
-       struct nes_timer_entry  *recv_entry;
+       enum send_rdma0           send_rdma0_op;
 
-       int                       send_write0;
        union {
-               struct ietf_mpa_frame mpa_frame;
-               u8                    mpa_frame_buf[MAX_CM_BUFFER];
+               struct ietf_mpa_v1 mpa_frame;
+               struct ietf_mpa_v2 mpa_v2_frame;
+               u8                 mpa_frame_buf[MAX_CM_BUFFER];
        };
+       enum mpa_frame_version    mpa_frame_rev;
+       u16                       ird_size;
+       u16                       ord_size;
+
        u16                       mpa_frame_size;
        struct iw_cm_id           *cm_id;
        struct list_head          list;
@@ -399,10 +440,8 @@ struct nes_cm_ops {
                        struct nes_vnic *, u16, void *,
                        struct nes_cm_info *);
        int (*close)(struct nes_cm_core *, struct nes_cm_node *);
-       int (*accept)(struct nes_cm_core *, struct ietf_mpa_frame *,
-                       struct nes_cm_node *);
-       int (*reject)(struct nes_cm_core *, struct ietf_mpa_frame *,
-                       struct nes_cm_node *);
+       int (*accept)(struct nes_cm_core *, struct nes_cm_node *);
+       int (*reject)(struct nes_cm_core *, struct nes_cm_node *);
        int (*recv_pkt)(struct nes_cm_core *, struct nes_vnic *,
                        struct sk_buff *);
        int (*destroy_cm_core)(struct nes_cm_core *);
index 2800573..fe6b6e9 100644 (file)
@@ -139,7 +139,8 @@ struct nes_qp {
        struct nes_cq         *nesrcq;
        struct nes_pd         *nespd;
        void *cm_node; /* handle of the node this QP is associated with */
-       struct ietf_mpa_frame *ietf_frame;
+       void                  *ietf_frame;
+       u8                    ietf_frame_size;
        dma_addr_t            ietf_frame_pbase;
        struct ib_mr          *lsmm_mr;
        struct nes_hw_qp      hwqp;
index c9624ea..b881bdc 100644 (file)
@@ -171,7 +171,9 @@ struct qib_ctxtdata {
        /* how many alloc_pages() chunks in rcvegrbuf_pages */
        u32 rcvegrbuf_chunks;
        /* how many egrbufs per chunk */
-       u32 rcvegrbufs_perchunk;
+       u16 rcvegrbufs_perchunk;
+       /* ilog2 of above */
+       u16 rcvegrbufs_perchunk_shift;
        /* order for rcvegrbuf_pages */
        size_t rcvegrbuf_size;
        /* rcvhdrq size (for freeing) */
@@ -221,6 +223,9 @@ struct qib_ctxtdata {
        /* ctxt rcvhdrq head offset */
        u32 head;
        u32 pkt_count;
+       /* lookaside fields */
+       struct qib_qp *lookaside_qp;
+       u32 lookaside_qpn;
        /* QPs waiting for context processing */
        struct list_head qp_wait_list;
 };
@@ -807,6 +812,10 @@ struct qib_devdata {
         * supports, less gives more pio bufs/ctxt, etc.
         */
        u32 cfgctxts;
+       /*
+        * number of ctxts available for PSM open
+        */
+       u32 freectxts;
 
        /*
         * hint that we should update pioavailshadow before
@@ -936,7 +945,9 @@ struct qib_devdata {
        /* chip address space used by 4k pio buffers */
        u32 align4k;
        /* size of each rcvegrbuffer */
-       u32 rcvegrbufsize;
+       u16 rcvegrbufsize;
+       /* log2 of above */
+       u16 rcvegrbufsize_shift;
        /* localbus width (1, 2,4,8,16,32) from config space  */
        u32 lbus_width;
        /* localbus speed in MHz */
index 23e584f..9a9047f 100644 (file)
@@ -279,10 +279,10 @@ bail:
  */
 static inline void *qib_get_egrbuf(const struct qib_ctxtdata *rcd, u32 etail)
 {
-       const u32 chunk = etail / rcd->rcvegrbufs_perchunk;
-       const u32 idx =  etail % rcd->rcvegrbufs_perchunk;
+       const u32 chunk = etail >> rcd->rcvegrbufs_perchunk_shift;
+       const u32 idx =  etail & ((u32)rcd->rcvegrbufs_perchunk - 1);
 
-       return rcd->rcvegrbuf[chunk] + idx * rcd->dd->rcvegrbufsize;
+       return rcd->rcvegrbuf[chunk] + (idx << rcd->dd->rcvegrbufsize_shift);
 }
 
 /*
@@ -310,7 +310,6 @@ static u32 qib_rcv_hdrerr(struct qib_ctxtdata *rcd, struct qib_pportdata *ppd,
                u32 opcode;
                u32 psn;
                int diff;
-               unsigned long flags;
 
                /* Sanity check packet */
                if (tlen < 24)
@@ -365,7 +364,6 @@ static u32 qib_rcv_hdrerr(struct qib_ctxtdata *rcd, struct qib_pportdata *ppd,
 
                        switch (qp->ibqp.qp_type) {
                        case IB_QPT_RC:
-                               spin_lock_irqsave(&qp->s_lock, flags);
                                ruc_res =
                                        qib_ruc_check_hdr(
                                                ibp, hdr,
@@ -373,11 +371,8 @@ static u32 qib_rcv_hdrerr(struct qib_ctxtdata *rcd, struct qib_pportdata *ppd,
                                                qp,
                                                be32_to_cpu(ohdr->bth[0]));
                                if (ruc_res) {
-                                       spin_unlock_irqrestore(&qp->s_lock,
-                                                              flags);
                                        goto unlock;
                                }
-                               spin_unlock_irqrestore(&qp->s_lock, flags);
 
                                /* Only deal with RDMA Writes for now */
                                if (opcode <
@@ -547,6 +542,15 @@ move_along:
                        updegr = 0;
                }
        }
+       /*
+        * Notify qib_destroy_qp() if it is waiting
+        * for lookaside_qp to finish.
+        */
+       if (rcd->lookaside_qp) {
+               if (atomic_dec_and_test(&rcd->lookaside_qp->refcount))
+                       wake_up(&rcd->lookaside_qp->wait);
+               rcd->lookaside_qp = NULL;
+       }
 
        rcd->head = l;
        rcd->pkt_count += i;
index 2625303..7763366 100644 (file)
@@ -1284,6 +1284,7 @@ static int setup_ctxt(struct qib_pportdata *ppd, int ctxt,
        strlcpy(rcd->comm, current->comm, sizeof(rcd->comm));
        ctxt_fp(fp) = rcd;
        qib_stats.sps_ctxts++;
+       dd->freectxts++;
        ret = 0;
        goto bail;
 
@@ -1792,6 +1793,7 @@ static int qib_close(struct inode *in, struct file *fp)
                if (dd->pageshadow)
                        unlock_expected_tids(rcd);
                qib_stats.sps_ctxts--;
+               dd->freectxts--;
        }
 
        mutex_unlock(&qib_mutex);
index d8ca0a0..781a802 100644 (file)
@@ -3273,6 +3273,8 @@ static int init_6120_variables(struct qib_devdata *dd)
        /* we always allocate at least 2048 bytes for eager buffers */
        ret = ib_mtu_enum_to_int(qib_ibmtu);
        dd->rcvegrbufsize = ret != -1 ? max(ret, 2048) : QIB_DEFAULT_MTU;
+       BUG_ON(!is_power_of_2(dd->rcvegrbufsize));
+       dd->rcvegrbufsize_shift = ilog2(dd->rcvegrbufsize);
 
        qib_6120_tidtemplate(dd);
 
index e1f9474..3f1d562 100644 (file)
@@ -4085,6 +4085,8 @@ static int qib_init_7220_variables(struct qib_devdata *dd)
        /* we always allocate at least 2048 bytes for eager buffers */
        ret = ib_mtu_enum_to_int(qib_ibmtu);
        dd->rcvegrbufsize = ret != -1 ? max(ret, 2048) : QIB_DEFAULT_MTU;
+       BUG_ON(!is_power_of_2(dd->rcvegrbufsize));
+       dd->rcvegrbufsize_shift = ilog2(dd->rcvegrbufsize);
 
        qib_7220_tidtemplate(dd);
 
index 5ea9ece..efd0a11 100644 (file)
@@ -2310,12 +2310,15 @@ static int qib_7322_bringup_serdes(struct qib_pportdata *ppd)
        val = ppd->cpspec->ibcctrl_a | (QLOGIC_IB_IBCC_LINKINITCMD_DISABLE <<
                QLOGIC_IB_IBCC_LINKINITCMD_SHIFT);
 
+       ppd->cpspec->ibcctrl_a = val;
        /*
         * Reset the PCS interface to the serdes (and also ibc, which is still
         * in reset from above).  Writes new value of ibcctrl_a as last step.
         */
        qib_7322_mini_pcs_reset(ppd);
        qib_write_kreg(dd, kr_scratch, 0ULL);
+       /* clear the linkinit cmds */
+       ppd->cpspec->ibcctrl_a &= ~SYM_MASK(IBCCtrlA_0, LinkInitCmd);
 
        if (!ppd->cpspec->ibcctrl_b) {
                unsigned lse = ppd->link_speed_enabled;
@@ -2387,11 +2390,6 @@ static int qib_7322_bringup_serdes(struct qib_pportdata *ppd)
        qib_write_kreg_port(ppd, krp_rcvctrl, ppd->p_rcvctrl);
        spin_unlock_irqrestore(&dd->cspec->rcvmod_lock, flags);
 
-       /* Hold the link state machine for mezz boards */
-       if (IS_QMH(dd) || IS_QME(dd))
-               qib_set_ib_7322_lstate(ppd, 0,
-                                      QLOGIC_IB_IBCC_LINKINITCMD_DISABLE);
-
        /* Also enable IBSTATUSCHG interrupt.  */
        val = qib_read_kreg_port(ppd, krp_errmask);
        qib_write_kreg_port(ppd, krp_errmask,
@@ -2853,9 +2851,8 @@ static irqreturn_t qib_7322intr(int irq, void *data)
                for (i = 0; i < dd->first_user_ctxt; i++) {
                        if (ctxtrbits & rmask) {
                                ctxtrbits &= ~rmask;
-                               if (dd->rcd[i]) {
+                               if (dd->rcd[i])
                                        qib_kreceive(dd->rcd[i], NULL, &npkts);
-                               }
                        }
                        rmask <<= 1;
                }
@@ -5230,6 +5227,8 @@ static int qib_7322_ib_updown(struct qib_pportdata *ppd, int ibup, u64 ibcs)
                                     QIBL_IB_AUTONEG_INPROG)))
                        set_7322_ibspeed_fast(ppd, ppd->link_speed_enabled);
                if (!(ppd->lflags & QIBL_IB_AUTONEG_INPROG)) {
+                       struct qib_qsfp_data *qd =
+                               &ppd->cpspec->qsfp_data;
                        /* unlock the Tx settings, speed may change */
                        qib_write_kreg_port(ppd, krp_tx_deemph_override,
                                SYM_MASK(IBSD_TX_DEEMPHASIS_OVERRIDE_0,
@@ -5237,6 +5236,12 @@ static int qib_7322_ib_updown(struct qib_pportdata *ppd, int ibup, u64 ibcs)
                        qib_cancel_sends(ppd);
                        /* on link down, ensure sane pcs state */
                        qib_7322_mini_pcs_reset(ppd);
+                       /* schedule the qsfp refresh which should turn the link
+                          off */
+                       if (ppd->dd->flags & QIB_HAS_QSFP) {
+                               qd->t_insert = get_jiffies_64();
+                               schedule_work(&qd->work);
+                       }
                        spin_lock_irqsave(&ppd->sdma_lock, flags);
                        if (__qib_sdma_running(ppd))
                                __qib_sdma_process_event(ppd,
@@ -5587,43 +5592,79 @@ static void qsfp_7322_event(struct work_struct *work)
        struct qib_qsfp_data *qd;
        struct qib_pportdata *ppd;
        u64 pwrup;
+       unsigned long flags;
        int ret;
        u32 le2;
 
        qd = container_of(work, struct qib_qsfp_data, work);
        ppd = qd->ppd;
-       pwrup = qd->t_insert + msecs_to_jiffies(QSFP_PWR_LAG_MSEC);
+       pwrup = qd->t_insert +
+               msecs_to_jiffies(QSFP_PWR_LAG_MSEC - QSFP_MODPRS_LAG_MSEC);
 
-       /*
-        * Some QSFP's not only do not respond until the full power-up
-        * time, but may behave badly if we try. So hold off responding
-        * to insertion.
-        */
-       while (1) {
-               u64 now = get_jiffies_64();
-               if (time_after64(now, pwrup))
-                       break;
-               msleep(20);
-       }
-       ret = qib_refresh_qsfp_cache(ppd, &qd->cache);
-       /*
-        * Need to change LE2 back to defaults if we couldn't
-        * read the cable type (to handle cable swaps), so do this
-        * even on failure to read cable information.  We don't
-        * get here for QME, so IS_QME check not needed here.
-        */
-       if (!ret && !ppd->dd->cspec->r1) {
-               if (QSFP_IS_ACTIVE_FAR(qd->cache.tech))
-                       le2 = LE2_QME;
-               else if (qd->cache.atten[1] >= qib_long_atten &&
-                        QSFP_IS_CU(qd->cache.tech))
-                       le2 = LE2_5m;
-               else
+       /* Delay for 20 msecs to allow ModPrs resistor to setup */
+       mdelay(QSFP_MODPRS_LAG_MSEC);
+
+       if (!qib_qsfp_mod_present(ppd)) {
+               ppd->cpspec->qsfp_data.modpresent = 0;
+               /* Set the physical link to disabled */
+               qib_set_ib_7322_lstate(ppd, 0,
+                                      QLOGIC_IB_IBCC_LINKINITCMD_DISABLE);
+               spin_lock_irqsave(&ppd->lflags_lock, flags);
+               ppd->lflags &= ~QIBL_LINKV;
+               spin_unlock_irqrestore(&ppd->lflags_lock, flags);
+       } else {
+               /*
+                * Some QSFP's not only do not respond until the full power-up
+                * time, but may behave badly if we try. So hold off responding
+                * to insertion.
+                */
+               while (1) {
+                       u64 now = get_jiffies_64();
+                       if (time_after64(now, pwrup))
+                               break;
+                       msleep(20);
+               }
+
+               ret = qib_refresh_qsfp_cache(ppd, &qd->cache);
+
+               /*
+                * Need to change LE2 back to defaults if we couldn't
+                * read the cable type (to handle cable swaps), so do this
+                * even on failure to read cable information.  We don't
+                * get here for QME, so IS_QME check not needed here.
+                */
+               if (!ret && !ppd->dd->cspec->r1) {
+                       if (QSFP_IS_ACTIVE_FAR(qd->cache.tech))
+                               le2 = LE2_QME;
+                       else if (qd->cache.atten[1] >= qib_long_atten &&
+                                QSFP_IS_CU(qd->cache.tech))
+                               le2 = LE2_5m;
+                       else
+                               le2 = LE2_DEFAULT;
+               } else
                        le2 = LE2_DEFAULT;
-       } else
-               le2 = LE2_DEFAULT;
-       ibsd_wr_allchans(ppd, 13, (le2 << 7), BMASK(9, 7));
-       init_txdds_table(ppd, 0);
+               ibsd_wr_allchans(ppd, 13, (le2 << 7), BMASK(9, 7));
+               /*
+                * We always change parameteters, since we can choose
+                * values for cables without eeproms, and the cable may have
+                * changed from a cable with full or partial eeprom content
+                * to one with partial or no content.
+                */
+               init_txdds_table(ppd, 0);
+               /* The physical link is being re-enabled only when the
+                * previous state was DISABLED and the VALID bit is not
+                * set. This should only happen when  the cable has been
+                * physically pulled. */
+               if (!ppd->cpspec->qsfp_data.modpresent &&
+                   (ppd->lflags & (QIBL_LINKV | QIBL_IB_LINK_DISABLED))) {
+                       ppd->cpspec->qsfp_data.modpresent = 1;
+                       qib_set_ib_7322_lstate(ppd, 0,
+                               QLOGIC_IB_IBCC_LINKINITCMD_SLEEP);
+                       spin_lock_irqsave(&ppd->lflags_lock, flags);
+                       ppd->lflags |= QIBL_LINKV;
+                       spin_unlock_irqrestore(&ppd->lflags_lock, flags);
+               }
+       }
 }
 
 /*
@@ -5727,7 +5768,8 @@ static void set_no_qsfp_atten(struct qib_devdata *dd, int change)
                        /* now change the IBC and serdes, overriding generic */
                        init_txdds_table(ppd, 1);
                        /* Re-enable the physical state machine on mezz boards
-                        * now that the correct settings have been set. */
+                        * now that the correct settings have been set.
+                        * QSFP boards are handles by the QSFP event handler */
                        if (IS_QMH(dd) || IS_QME(dd))
                                qib_set_ib_7322_lstate(ppd, 0,
                                            QLOGIC_IB_IBCC_LINKINITCMD_SLEEP);
@@ -6205,6 +6247,8 @@ static int qib_init_7322_variables(struct qib_devdata *dd)
 
        /* we always allocate at least 2048 bytes for eager buffers */
        dd->rcvegrbufsize = max(mtu, 2048);
+       BUG_ON(!is_power_of_2(dd->rcvegrbufsize));
+       dd->rcvegrbufsize_shift = ilog2(dd->rcvegrbufsize);
 
        qib_7322_tidtemplate(dd);
 
@@ -7147,7 +7191,8 @@ static void find_best_ent(struct qib_pportdata *ppd,
                }
        }
 
-       /* Lookup serdes setting by cable type and attenuation */
+       /* Active cables don't have attenuation so we only set SERDES
+        * settings to account for the attenuation of the board traces. */
        if (!override && QSFP_IS_ACTIVE(qd->tech)) {
                *sdr_dds = txdds_sdr + ppd->dd->board_atten;
                *ddr_dds = txdds_ddr + ppd->dd->board_atten;
@@ -7464,12 +7509,6 @@ static int serdes_7322_init_new(struct qib_pportdata *ppd)
        u32 le_val, rxcaldone;
        int chan, chan_done = (1 << SERDES_CHANS) - 1;
 
-       /*
-        * Initialize the Tx DDS tables.  Also done every QSFP event,
-        * for adapters with QSFP
-        */
-       init_txdds_table(ppd, 0);
-
        /* Clear cmode-override, may be set from older driver */
        ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 10, 0 << 14, 1 << 14);
 
@@ -7655,6 +7694,12 @@ static int serdes_7322_init_new(struct qib_pportdata *ppd)
        /* VGA output common mode */
        ibsd_wr_allchans(ppd, 12, (3 << 2), BMASK(3, 2));
 
+       /*
+        * Initialize the Tx DDS tables.  Also done every QSFP event,
+        * for adapters with QSFP
+        */
+       init_txdds_table(ppd, 0);
+
        return 0;
 }
 
index a01f3fc..b093a0b 100644 (file)
@@ -183,6 +183,9 @@ struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *ppd, u32 ctxt)
                rcd->rcvegrbuf_chunks = (rcd->rcvegrcnt +
                        rcd->rcvegrbufs_perchunk - 1) /
                        rcd->rcvegrbufs_perchunk;
+               BUG_ON(!is_power_of_2(rcd->rcvegrbufs_perchunk));
+               rcd->rcvegrbufs_perchunk_shift =
+                       ilog2(rcd->rcvegrbufs_perchunk);
        }
        return rcd;
 }
@@ -398,6 +401,7 @@ static void enable_chip(struct qib_devdata *dd)
                if (rcd)
                        dd->f_rcvctrl(rcd->ppd, rcvmask, i);
        }
+       dd->freectxts = dd->cfgctxts - dd->first_user_ctxt;
 }
 
 static void verify_interrupt(unsigned long opaque)
@@ -581,10 +585,6 @@ int qib_init(struct qib_devdata *dd, int reinit)
                        continue;
                }
 
-               /* let link come up, and enable IBC */
-               spin_lock_irqsave(&ppd->lflags_lock, flags);
-               ppd->lflags &= ~QIBL_IB_LINK_DISABLED;
-               spin_unlock_irqrestore(&ppd->lflags_lock, flags);
                portok++;
        }
 
index e16751f..7e7e16f 100644 (file)
@@ -34,6 +34,7 @@
 
 #include <linux/err.h>
 #include <linux/vmalloc.h>
+#include <linux/jhash.h>
 
 #include "qib.h"
 
@@ -204,6 +205,13 @@ static void free_qpn(struct qib_qpn_table *qpt, u32 qpn)
                clear_bit(qpn & BITS_PER_PAGE_MASK, map->page);
 }
 
+static inline unsigned qpn_hash(struct qib_ibdev *dev, u32 qpn)
+{
+       return jhash_1word(qpn, dev->qp_rnd) &
+               (dev->qp_table_size - 1);
+}
+
+
 /*
  * Put the QP into the hash table.
  * The hash table holds a reference to the QP.
@@ -211,22 +219,23 @@ static void free_qpn(struct qib_qpn_table *qpt, u32 qpn)
 static void insert_qp(struct qib_ibdev *dev, struct qib_qp *qp)
 {
        struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
-       unsigned n = qp->ibqp.qp_num % dev->qp_table_size;
        unsigned long flags;
+       unsigned n = qpn_hash(dev, qp->ibqp.qp_num);
 
        spin_lock_irqsave(&dev->qpt_lock, flags);
+       atomic_inc(&qp->refcount);
 
        if (qp->ibqp.qp_num == 0)
-               ibp->qp0 = qp;
+               rcu_assign_pointer(ibp->qp0, qp);
        else if (qp->ibqp.qp_num == 1)
-               ibp->qp1 = qp;
+               rcu_assign_pointer(ibp->qp1, qp);
        else {
                qp->next = dev->qp_table[n];
-               dev->qp_table[n] = qp;
+               rcu_assign_pointer(dev->qp_table[n], qp);
        }
-       atomic_inc(&qp->refcount);
 
        spin_unlock_irqrestore(&dev->qpt_lock, flags);
+       synchronize_rcu();
 }
 
 /*
@@ -236,29 +245,32 @@ static void insert_qp(struct qib_ibdev *dev, struct qib_qp *qp)
 static void remove_qp(struct qib_ibdev *dev, struct qib_qp *qp)
 {
        struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
-       struct qib_qp *q, **qpp;
+       unsigned n = qpn_hash(dev, qp->ibqp.qp_num);
        unsigned long flags;
 
-       qpp = &dev->qp_table[qp->ibqp.qp_num % dev->qp_table_size];
-
        spin_lock_irqsave(&dev->qpt_lock, flags);
 
        if (ibp->qp0 == qp) {
-               ibp->qp0 = NULL;
                atomic_dec(&qp->refcount);
+               rcu_assign_pointer(ibp->qp0, NULL);
        } else if (ibp->qp1 == qp) {
-               ibp->qp1 = NULL;
                atomic_dec(&qp->refcount);
-       } else
+               rcu_assign_pointer(ibp->qp1, NULL);
+       } else {
+               struct qib_qp *q, **qpp;
+
+               qpp = &dev->qp_table[n];
                for (; (q = *qpp) != NULL; qpp = &q->next)
                        if (q == qp) {
-                               *qpp = qp->next;
-                               qp->next = NULL;
                                atomic_dec(&qp->refcount);
+                               rcu_assign_pointer(*qpp, qp->next);
+                               qp->next = NULL;
                                break;
                        }
+       }
 
        spin_unlock_irqrestore(&dev->qpt_lock, flags);
+       synchronize_rcu();
 }
 
 /**
@@ -280,21 +292,24 @@ unsigned qib_free_all_qps(struct qib_devdata *dd)
 
                if (!qib_mcast_tree_empty(ibp))
                        qp_inuse++;
-               if (ibp->qp0)
+               rcu_read_lock();
+               if (rcu_dereference(ibp->qp0))
                        qp_inuse++;
-               if (ibp->qp1)
+               if (rcu_dereference(ibp->qp1))
                        qp_inuse++;
+               rcu_read_unlock();
        }
 
        spin_lock_irqsave(&dev->qpt_lock, flags);
        for (n = 0; n < dev->qp_table_size; n++) {
                qp = dev->qp_table[n];
-               dev->qp_table[n] = NULL;
+               rcu_assign_pointer(dev->qp_table[n], NULL);
 
                for (; qp; qp = qp->next)
                        qp_inuse++;
        }
        spin_unlock_irqrestore(&dev->qpt_lock, flags);
+       synchronize_rcu();
 
        return qp_inuse;
 }
@@ -309,25 +324,28 @@ unsigned qib_free_all_qps(struct qib_devdata *dd)
  */
 struct qib_qp *qib_lookup_qpn(struct qib_ibport *ibp, u32 qpn)
 {
-       struct qib_ibdev *dev = &ppd_from_ibp(ibp)->dd->verbs_dev;
-       unsigned long flags;
-       struct qib_qp *qp;
+       struct qib_qp *qp = NULL;
 
-       spin_lock_irqsave(&dev->qpt_lock, flags);
+       if (unlikely(qpn <= 1)) {
+               rcu_read_lock();
+               if (qpn == 0)
+                       qp = rcu_dereference(ibp->qp0);
+               else
+                       qp = rcu_dereference(ibp->qp1);
+       } else {
+               struct qib_ibdev *dev = &ppd_from_ibp(ibp)->dd->verbs_dev;
+               unsigned n = qpn_hash(dev, qpn);
 
-       if (qpn == 0)
-               qp = ibp->qp0;
-       else if (qpn == 1)
-               qp = ibp->qp1;
-       else
-               for (qp = dev->qp_table[qpn % dev->qp_table_size]; qp;
-                    qp = qp->next)
+               rcu_read_lock();
+               for (qp = dev->qp_table[n]; rcu_dereference(qp); qp = qp->next)
                        if (qp->ibqp.qp_num == qpn)
                                break;
+       }
        if (qp)
-               atomic_inc(&qp->refcount);
+               if (unlikely(!atomic_inc_not_zero(&qp->refcount)))
+                       qp = NULL;
 
-       spin_unlock_irqrestore(&dev->qpt_lock, flags);
+       rcu_read_unlock();
        return qp;
 }
 
@@ -765,8 +783,10 @@ int qib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
                }
        }
 
-       if (attr_mask & IB_QP_PATH_MTU)
+       if (attr_mask & IB_QP_PATH_MTU) {
                qp->path_mtu = pmtu;
+               qp->pmtu = ib_mtu_enum_to_int(pmtu);
+       }
 
        if (attr_mask & IB_QP_RETRY_CNT) {
                qp->s_retry_cnt = attr->retry_cnt;
@@ -781,8 +801,12 @@ int qib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
        if (attr_mask & IB_QP_MIN_RNR_TIMER)
                qp->r_min_rnr_timer = attr->min_rnr_timer;
 
-       if (attr_mask & IB_QP_TIMEOUT)
+       if (attr_mask & IB_QP_TIMEOUT) {
                qp->timeout = attr->timeout;
+               qp->timeout_jiffies =
+                       usecs_to_jiffies((4096UL * (1UL << qp->timeout)) /
+                               1000UL);
+       }
 
        if (attr_mask & IB_QP_QKEY)
                qp->qkey = attr->qkey;
@@ -1013,6 +1037,10 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd,
                        ret = ERR_PTR(-ENOMEM);
                        goto bail_swq;
                }
+               RCU_INIT_POINTER(qp->next, NULL);
+               qp->timeout_jiffies =
+                       usecs_to_jiffies((4096UL * (1UL << qp->timeout)) /
+                               1000UL);
                if (init_attr->srq)
                        sz = 0;
                else {
index 3374a52..e06c4ed 100644 (file)
@@ -273,18 +273,12 @@ int qib_refresh_qsfp_cache(struct qib_pportdata *ppd, struct qib_qsfp_cache *cp)
        int ret;
        int idx;
        u16 cks;
-       u32 mask;
        u8 peek[4];
 
        /* ensure sane contents on invalid reads, for cable swaps */
        memset(cp, 0, sizeof(*cp));
 
-       mask = QSFP_GPIO_MOD_PRS_N;
-       if (ppd->hw_pidx)
-               mask <<= QSFP_GPIO_PORT2_SHIFT;
-
-       ret = ppd->dd->f_gpio_mod(ppd->dd, 0, 0, 0);
-       if (ret & mask) {
+       if (!qib_qsfp_mod_present(ppd)) {
                ret = -ENODEV;
                goto bail;
        }
@@ -444,6 +438,19 @@ const char * const qib_qsfp_devtech[16] = {
 
 static const char *pwr_codes = "1.5W2.0W2.5W3.5W";
 
+int qib_qsfp_mod_present(struct qib_pportdata *ppd)
+{
+       u32 mask;
+       int ret;
+
+       mask = QSFP_GPIO_MOD_PRS_N <<
+               (ppd->hw_pidx * QSFP_GPIO_PORT2_SHIFT);
+       ret = ppd->dd->f_gpio_mod(ppd->dd, 0, 0, 0);
+
+       return !((ret & mask) >>
+                ((ppd->hw_pidx * QSFP_GPIO_PORT2_SHIFT) + 3));
+}
+
 /*
  * Initialize structures that control access to QSFP. Called once per port
  * on cards that support QSFP.
@@ -452,7 +459,6 @@ void qib_qsfp_init(struct qib_qsfp_data *qd,
                   void (*fevent)(struct work_struct *))
 {
        u32 mask, highs;
-       int pins;
 
        struct qib_devdata *dd = qd->ppd->dd;
 
@@ -480,8 +486,7 @@ void qib_qsfp_init(struct qib_qsfp_data *qd,
                mask <<= QSFP_GPIO_PORT2_SHIFT;
 
        /* Do not try to wait here. Better to let event handle it */
-       pins = dd->f_gpio_mod(dd, 0, 0, 0);
-       if (pins & mask)
+       if (!qib_qsfp_mod_present(qd->ppd))
                goto bail;
        /* We see a module, but it may be unwise to look yet. Just schedule */
        qd->t_insert = get_jiffies_64();
index c109bbd..46002a9 100644 (file)
@@ -34,6 +34,7 @@
 
 #define QSFP_DEV 0xA0
 #define QSFP_PWR_LAG_MSEC 2000
+#define QSFP_MODPRS_LAG_MSEC 20
 
 /*
  * Below are masks for various QSFP signals, for Port 1.
@@ -177,10 +178,12 @@ struct qib_qsfp_data {
        struct work_struct work;
        struct qib_qsfp_cache cache;
        u64 t_insert;
+       u8 modpresent;
 };
 
 extern int qib_refresh_qsfp_cache(struct qib_pportdata *ppd,
                                  struct qib_qsfp_cache *cp);
+extern int qib_qsfp_mod_present(struct qib_pportdata *ppd);
 extern void qib_qsfp_init(struct qib_qsfp_data *qd,
                          void (*fevent)(struct work_struct *));
 extern void qib_qsfp_deinit(struct qib_qsfp_data *qd);
index eca0c41..afaf4ac 100644 (file)
@@ -59,8 +59,7 @@ static void start_timer(struct qib_qp *qp)
        qp->s_flags |= QIB_S_TIMER;
        qp->s_timer.function = rc_timeout;
        /* 4.096 usec. * (1 << qp->timeout) */
-       qp->s_timer.expires = jiffies +
-               usecs_to_jiffies((4096UL * (1UL << qp->timeout)) / 1000UL);
+       qp->s_timer.expires = jiffies + qp->timeout_jiffies;
        add_timer(&qp->s_timer);
 }
 
@@ -239,7 +238,7 @@ int qib_make_rc_req(struct qib_qp *qp)
        u32 len;
        u32 bth0;
        u32 bth2;
-       u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
+       u32 pmtu = qp->pmtu;
        char newreq;
        unsigned long flags;
        int ret = 0;
@@ -1519,9 +1518,7 @@ read_middle:
                 * 4.096 usec. * (1 << qp->timeout)
                 */
                qp->s_flags |= QIB_S_TIMER;
-               mod_timer(&qp->s_timer, jiffies +
-                       usecs_to_jiffies((4096UL * (1UL << qp->timeout)) /
-                                        1000UL));
+               mod_timer(&qp->s_timer, jiffies + qp->timeout_jiffies);
                if (qp->s_flags & QIB_S_WAIT_ACK) {
                        qp->s_flags &= ~QIB_S_WAIT_ACK;
                        qib_schedule_send(qp);
@@ -1732,7 +1729,7 @@ static int qib_rc_rcv_error(struct qib_other_headers *ohdr,
                 * same request.
                 */
                offset = ((psn - e->psn) & QIB_PSN_MASK) *
-                       ib_mtu_enum_to_int(qp->path_mtu);
+                       qp->pmtu;
                len = be32_to_cpu(reth->length);
                if (unlikely(offset + len != e->rdma_sge.sge_length))
                        goto unlock_done;
@@ -1876,7 +1873,7 @@ void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr,
        u32 psn;
        u32 pad;
        struct ib_wc wc;
-       u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
+       u32 pmtu = qp->pmtu;
        int diff;
        struct ib_reth *reth;
        unsigned long flags;
@@ -1892,10 +1889,8 @@ void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr,
        }
 
        opcode = be32_to_cpu(ohdr->bth[0]);
-       spin_lock_irqsave(&qp->s_lock, flags);
        if (qib_ruc_check_hdr(ibp, hdr, has_grh, qp, opcode))
-               goto sunlock;
-       spin_unlock_irqrestore(&qp->s_lock, flags);
+               return;
 
        psn = be32_to_cpu(ohdr->bth[2]);
        opcode >>= 24;
@@ -1955,8 +1950,6 @@ void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr,
                break;
        }
 
-       memset(&wc, 0, sizeof wc);
-
        if (qp->state == IB_QPS_RTR && !(qp->r_flags & QIB_R_COMM_EST)) {
                qp->r_flags |= QIB_R_COMM_EST;
                if (qp->ibqp.event_handler) {
@@ -2009,16 +2002,19 @@ send_middle:
                        goto rnr_nak;
                qp->r_rcv_len = 0;
                if (opcode == OP(SEND_ONLY))
-                       goto send_last;
-               /* FALLTHROUGH */
+                       goto no_immediate_data;
+               /* FALLTHROUGH for SEND_ONLY_WITH_IMMEDIATE */
        case OP(SEND_LAST_WITH_IMMEDIATE):
 send_last_imm:
                wc.ex.imm_data = ohdr->u.imm_data;
                hdrsize += 4;
                wc.wc_flags = IB_WC_WITH_IMM;
-               /* FALLTHROUGH */
+               goto send_last;
        case OP(SEND_LAST):
        case OP(RDMA_WRITE_LAST):
+no_immediate_data:
+               wc.wc_flags = 0;
+               wc.ex.imm_data = 0;
 send_last:
                /* Get the number of bytes the message was padded by. */
                pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
@@ -2051,6 +2047,12 @@ send_last:
                wc.src_qp = qp->remote_qpn;
                wc.slid = qp->remote_ah_attr.dlid;
                wc.sl = qp->remote_ah_attr.sl;
+               /* zero fields that are N/A */
+               wc.vendor_err = 0;
+               wc.pkey_index = 0;
+               wc.dlid_path_bits = 0;
+               wc.port_num = 0;
+               wc.csum_ok = 0;
                /* Signal completion event if the solicited bit is set. */
                qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
                             (ohdr->bth[0] &
@@ -2089,7 +2091,7 @@ send_last:
                if (opcode == OP(RDMA_WRITE_FIRST))
                        goto send_middle;
                else if (opcode == OP(RDMA_WRITE_ONLY))
-                       goto send_last;
+                       goto no_immediate_data;
                ret = qib_get_rwqe(qp, 1);
                if (ret < 0)
                        goto nack_op_err;
index eb78d93..b4b37e4 100644 (file)
@@ -260,12 +260,15 @@ static int gid_ok(union ib_gid *gid, __be64 gid_prefix, __be64 id)
 
 /*
  *
- * This should be called with the QP s_lock held.
+ * This should be called with the QP r_lock held.
+ *
+ * The s_lock will be acquired around the qib_migrate_qp() call.
  */
 int qib_ruc_check_hdr(struct qib_ibport *ibp, struct qib_ib_header *hdr,
                      int has_grh, struct qib_qp *qp, u32 bth0)
 {
        __be64 guid;
+       unsigned long flags;
 
        if (qp->s_mig_state == IB_MIG_ARMED && (bth0 & IB_BTH_MIG_REQ)) {
                if (!has_grh) {
@@ -295,7 +298,9 @@ int qib_ruc_check_hdr(struct qib_ibport *ibp, struct qib_ib_header *hdr,
                if (be16_to_cpu(hdr->lrh[3]) != qp->alt_ah_attr.dlid ||
                    ppd_from_ibp(ibp)->port != qp->alt_ah_attr.port_num)
                        goto err;
+               spin_lock_irqsave(&qp->s_lock, flags);
                qib_migrate_qp(qp);
+               spin_unlock_irqrestore(&qp->s_lock, flags);
        } else {
                if (!has_grh) {
                        if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
index c3ec8ef..d623593 100644 (file)
@@ -107,6 +107,11 @@ struct ib_srq *qib_create_srq(struct ib_pd *ibpd,
        u32 sz;
        struct ib_srq *ret;
 
+       if (srq_init_attr->srq_type != IB_SRQT_BASIC) {
+               ret = ERR_PTR(-ENOSYS);
+               goto done;
+       }
+
        if (srq_init_attr->attr.max_sge == 0 ||
            srq_init_attr->attr.max_sge > ib_qib_max_srq_sges ||
            srq_init_attr->attr.max_wr == 0 ||
index 14d129d..78fbd56 100644 (file)
@@ -515,8 +515,7 @@ static ssize_t show_nfreectxts(struct device *device,
        struct qib_devdata *dd = dd_from_dev(dev);
 
        /* Return the number of free user ports (contexts) available. */
-       return scnprintf(buf, PAGE_SIZE, "%u\n", dd->cfgctxts -
-               dd->first_user_ctxt - (u32)qib_stats.sps_ctxts);
+       return scnprintf(buf, PAGE_SIZE, "%u\n", dd->freectxts);
 }
 
 static ssize_t show_serial(struct device *device,
index 32ccf3c..847e7af 100644 (file)
@@ -51,7 +51,7 @@ int qib_make_uc_req(struct qib_qp *qp)
        u32 hwords;
        u32 bth0;
        u32 len;
-       u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
+       u32 pmtu = qp->pmtu;
        int ret = 0;
 
        spin_lock_irqsave(&qp->s_lock, flags);
@@ -243,13 +243,12 @@ void qib_uc_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
                int has_grh, void *data, u32 tlen, struct qib_qp *qp)
 {
        struct qib_other_headers *ohdr;
-       unsigned long flags;
        u32 opcode;
        u32 hdrsize;
        u32 psn;
        u32 pad;
        struct ib_wc wc;
-       u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
+       u32 pmtu = qp->pmtu;
        struct ib_reth *reth;
        int ret;
 
@@ -263,14 +262,11 @@ void qib_uc_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
        }
 
        opcode = be32_to_cpu(ohdr->bth[0]);
-       spin_lock_irqsave(&qp->s_lock, flags);
        if (qib_ruc_check_hdr(ibp, hdr, has_grh, qp, opcode))
-               goto sunlock;
-       spin_unlock_irqrestore(&qp->s_lock, flags);
+               return;
 
        psn = be32_to_cpu(ohdr->bth[2]);
        opcode >>= 24;
-       memset(&wc, 0, sizeof wc);
 
        /* Compare the PSN verses the expected PSN. */
        if (unlikely(qib_cmp24(psn, qp->r_psn) != 0)) {
@@ -370,7 +366,7 @@ send_first:
                }
                qp->r_rcv_len = 0;
                if (opcode == OP(SEND_ONLY))
-                       goto send_last;
+                       goto no_immediate_data;
                else if (opcode == OP(SEND_ONLY_WITH_IMMEDIATE))
                        goto send_last_imm;
                /* FALLTHROUGH */
@@ -389,8 +385,11 @@ send_last_imm:
                wc.ex.imm_data = ohdr->u.imm_data;
                hdrsize += 4;
                wc.wc_flags = IB_WC_WITH_IMM;
-               /* FALLTHROUGH */
+               goto send_last;
        case OP(SEND_LAST):
+no_immediate_data:
+               wc.ex.imm_data = 0;
+               wc.wc_flags = 0;
 send_last:
                /* Get the number of bytes the message was padded by. */
                pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
@@ -418,6 +417,12 @@ last_imm:
                wc.src_qp = qp->remote_qpn;
                wc.slid = qp->remote_ah_attr.dlid;
                wc.sl = qp->remote_ah_attr.sl;
+               /* zero fields that are N/A */
+               wc.vendor_err = 0;
+               wc.pkey_index = 0;
+               wc.dlid_path_bits = 0;
+               wc.port_num = 0;
+               wc.csum_ok = 0;
                /* Signal completion event if the solicited bit is set. */
                qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
                             (ohdr->bth[0] &
@@ -546,6 +551,4 @@ op_err:
        qib_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
        return;
 
-sunlock:
-       spin_unlock_irqrestore(&qp->s_lock, flags);
 }
index 9fab404..9627cb7 100644 (file)
 #include <linux/utsname.h>
 #include <linux/rculist.h>
 #include <linux/mm.h>
+#include <linux/random.h>
 
 #include "qib.h"
 #include "qib_common.h"
 
-static unsigned int ib_qib_qp_table_size = 251;
+static unsigned int ib_qib_qp_table_size = 256;
 module_param_named(qp_table_size, ib_qib_qp_table_size, uint, S_IRUGO);
 MODULE_PARM_DESC(qp_table_size, "QP table size");
 
@@ -659,17 +660,25 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen)
                if (atomic_dec_return(&mcast->refcount) <= 1)
                        wake_up(&mcast->wait);
        } else {
-               qp = qib_lookup_qpn(ibp, qp_num);
-               if (!qp)
-                       goto drop;
+               if (rcd->lookaside_qp) {
+                       if (rcd->lookaside_qpn != qp_num) {
+                               if (atomic_dec_and_test(
+                                       &rcd->lookaside_qp->refcount))
+                                       wake_up(
+                                        &rcd->lookaside_qp->wait);
+                                       rcd->lookaside_qp = NULL;
+                               }
+               }
+               if (!rcd->lookaside_qp) {
+                       qp = qib_lookup_qpn(ibp, qp_num);
+                       if (!qp)
+                               goto drop;
+                       rcd->lookaside_qp = qp;
+                       rcd->lookaside_qpn = qp_num;
+               } else
+                       qp = rcd->lookaside_qp;
                ibp->n_unicast_rcv++;
                qib_qp_rcv(rcd, hdr, lnh == QIB_LRH_GRH, data, tlen, qp);
-               /*
-                * Notify qib_destroy_qp() if it is waiting
-                * for us to finish.
-                */
-               if (atomic_dec_and_test(&qp->refcount))
-                       wake_up(&qp->wait);
        }
        return;
 
@@ -1974,6 +1983,8 @@ static void init_ibport(struct qib_pportdata *ppd)
        ibp->z_excessive_buffer_overrun_errors =
                cntrs.excessive_buffer_overrun_errors;
        ibp->z_vl15_dropped = cntrs.vl15_dropped;
+       RCU_INIT_POINTER(ibp->qp0, NULL);
+       RCU_INIT_POINTER(ibp->qp1, NULL);
 }
 
 /**
@@ -1990,12 +2001,15 @@ int qib_register_ib_device(struct qib_devdata *dd)
        int ret;
 
        dev->qp_table_size = ib_qib_qp_table_size;
-       dev->qp_table = kzalloc(dev->qp_table_size * sizeof *dev->qp_table,
+       get_random_bytes(&dev->qp_rnd, sizeof(dev->qp_rnd));
+       dev->qp_table = kmalloc(dev->qp_table_size * sizeof *dev->qp_table,
                                GFP_KERNEL);
        if (!dev->qp_table) {
                ret = -ENOMEM;
                goto err_qpt;
        }
+       for (i = 0; i < dev->qp_table_size; i++)
+               RCU_INIT_POINTER(dev->qp_table[i], NULL);
 
        for (i = 0; i < dd->num_pports; i++)
                init_ibport(ppd + i);
index 95e5b47..0c19ef0 100644 (file)
@@ -485,6 +485,7 @@ struct qib_qp {
        u8 alt_timeout;         /* Alternate path timeout for this QP */
        u8 port_num;
        enum ib_mtu path_mtu;
+       u32 pmtu;               /* decoded from path_mtu */
        u32 remote_qpn;
        u32 qkey;               /* QKEY for this QP (for UD or RD) */
        u32 s_size;             /* send work queue size */
@@ -495,6 +496,7 @@ struct qib_qp {
        u32 s_last;             /* last completed entry */
        u32 s_ssn;              /* SSN of tail entry */
        u32 s_lsn;              /* limit sequence number (credit) */
+       unsigned long timeout_jiffies;  /* computed from timeout */
        struct qib_swqe *s_wq;  /* send work queue */
        struct qib_swqe *s_wqe;
        struct qib_rq r_rq;             /* receive work queue */
@@ -723,7 +725,8 @@ struct qib_ibdev {
        dma_addr_t pio_hdrs_phys;
        /* list of QPs waiting for RNR timer */
        spinlock_t pending_lock; /* protect wait lists, PMA counters, etc. */
-       unsigned qp_table_size; /* size of the hash table */
+       u32 qp_table_size; /* size of the hash table */
+       u32 qp_rnd; /* random bytes for hash */
        spinlock_t qpt_lock;
 
        u32 n_piowait;
index 39913a0..fe48677 100644 (file)
@@ -84,7 +84,7 @@ static void ipoib_cm_dma_unmap_rx(struct ipoib_dev_priv *priv, int frags,
        ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_CM_HEAD_SIZE, DMA_FROM_DEVICE);
 
        for (i = 0; i < frags; ++i)
-               ib_dma_unmap_single(priv->ca, mapping[i + 1], PAGE_SIZE, DMA_FROM_DEVICE);
+               ib_dma_unmap_page(priv->ca, mapping[i + 1], PAGE_SIZE, DMA_FROM_DEVICE);
 }
 
 static int ipoib_cm_post_receive_srq(struct net_device *dev, int id)
@@ -183,7 +183,7 @@ partial_error:
        ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_CM_HEAD_SIZE, DMA_FROM_DEVICE);
 
        for (; i > 0; --i)
-               ib_dma_unmap_single(priv->ca, mapping[i], PAGE_SIZE, DMA_FROM_DEVICE);
+               ib_dma_unmap_page(priv->ca, mapping[i], PAGE_SIZE, DMA_FROM_DEVICE);
 
        dev_kfree_skb_any(skb);
        return NULL;
@@ -1496,6 +1496,7 @@ static void ipoib_cm_create_srq(struct net_device *dev, int max_sge)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
        struct ib_srq_init_attr srq_init_attr = {
+               .srq_type = IB_SRQT_BASIC,
                .attr = {
                        .max_wr  = ipoib_recvq_size,
                        .max_sge = max_sge
index 86eae22..0e2fe46 100644 (file)
@@ -212,16 +212,15 @@ static int ipoib_path_seq_show(struct seq_file *file, void *iter_ptr)
                   gid_buf, path.pathrec.dlid ? "yes" : "no");
 
        if (path.pathrec.dlid) {
-               rate = ib_rate_to_mult(path.pathrec.rate) * 25;
+               rate = ib_rate_to_mbps(path.pathrec.rate);
 
                seq_printf(file,
                           "  DLID:     0x%04x\n"
                           "  SL: %12d\n"
-                          "  rate: %*d%s Gb/sec\n",
+                          "  rate: %8d.%d Gb/sec\n",
                           be16_to_cpu(path.pathrec.dlid),
                           path.pathrec.sl,
-                          10 - ((rate % 10) ? 2 : 0),
-                          rate / 10, rate % 10 ? ".5" : "");
+                          rate / 1000, rate % 1000);
        }
 
        seq_putc(file, '\n');
index 1ad1f60..869a2c2 100644 (file)
@@ -484,7 +484,7 @@ static void mlx4_free_eq(struct mlx4_dev *dev,
 
        mlx4_mtt_cleanup(dev, &eq->mtt);
        for (i = 0; i < npages; ++i)
-               pci_free_consistent(dev->pdev, PAGE_SIZE,
+               dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
                                    eq->page_list[i].buf,
                                    eq->page_list[i].map);
 
index 7eb8ba8..875838b 100644 (file)
@@ -204,6 +204,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 #define QUERY_DEV_CAP_MAX_MCG_OFFSET           0x63
 #define QUERY_DEV_CAP_RSVD_PD_OFFSET           0x64
 #define QUERY_DEV_CAP_MAX_PD_OFFSET            0x65
+#define QUERY_DEV_CAP_RSVD_XRC_OFFSET          0x66
+#define QUERY_DEV_CAP_MAX_XRC_OFFSET           0x67
 #define QUERY_DEV_CAP_MAX_COUNTERS_OFFSET      0x68
 #define QUERY_DEV_CAP_RDMARC_ENTRY_SZ_OFFSET   0x80
 #define QUERY_DEV_CAP_QPC_ENTRY_SZ_OFFSET      0x82
@@ -318,6 +320,10 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
        dev_cap->reserved_pds = field >> 4;
        MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_PD_OFFSET);
        dev_cap->max_pds = 1 << (field & 0x3f);
+       MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_XRC_OFFSET);
+       dev_cap->reserved_xrcds = field >> 4;
+       MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_PD_OFFSET);
+       dev_cap->max_xrcds = 1 << (field & 0x1f);
 
        MLX4_GET(size, outbox, QUERY_DEV_CAP_RDMARC_ENTRY_SZ_OFFSET);
        dev_cap->rdmarc_entry_sz = size;
index 1e8ecc3..bf5ec22 100644 (file)
@@ -93,6 +93,8 @@ struct mlx4_dev_cap {
        int max_mcgs;
        int reserved_pds;
        int max_pds;
+       int reserved_xrcds;
+       int max_xrcds;
        int qpc_entry_sz;
        int rdmarc_entry_sz;
        int altc_entry_sz;
index f0ee35d..94bbc85 100644 (file)
@@ -96,6 +96,8 @@ MODULE_PARM_DESC(log_num_mac, "Log2 max number of MACs per ETH port (1-7)");
 static int log_num_vlan;
 module_param_named(log_num_vlan, log_num_vlan, int, 0444);
 MODULE_PARM_DESC(log_num_vlan, "Log2 max number of VLANs per ETH port (0-7)");
+/* Log2 max number of VLANs per ETH port (0-7) */
+#define MLX4_LOG_NUM_VLANS 7
 
 static int use_prio;
 module_param_named(use_prio, use_prio, bool, 0444);
@@ -220,6 +222,10 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
        dev->caps.reserved_mrws      = dev_cap->reserved_mrws;
        dev->caps.reserved_uars      = dev_cap->reserved_uars;
        dev->caps.reserved_pds       = dev_cap->reserved_pds;
+       dev->caps.reserved_xrcds     = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ?
+                                       dev_cap->reserved_xrcds : 0;
+       dev->caps.max_xrcds          = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ?
+                                       dev_cap->max_xrcds : 0;
        dev->caps.mtt_entry_sz       = dev->caps.mtts_per_seg * dev_cap->mtt_entry_sz;
        dev->caps.max_msg_sz         = dev_cap->max_msg_sz;
        dev->caps.page_size_cap      = ~(u32) (dev_cap->min_page_sz - 1);
@@ -230,7 +236,7 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
        dev->caps.max_gso_sz         = dev_cap->max_gso_sz;
 
        dev->caps.log_num_macs  = log_num_mac;
-       dev->caps.log_num_vlans = log_num_vlan;
+       dev->caps.log_num_vlans = MLX4_LOG_NUM_VLANS;
        dev->caps.log_num_prios = use_prio ? 3 : 0;
 
        for (i = 1; i <= dev->caps.num_ports; ++i) {
@@ -912,11 +918,18 @@ static int mlx4_setup_hca(struct mlx4_dev *dev)
                goto err_kar_unmap;
        }
 
+       err = mlx4_init_xrcd_table(dev);
+       if (err) {
+               mlx4_err(dev, "Failed to initialize "
+                        "reliable connection domain table, aborting.\n");
+               goto err_pd_table_free;
+       }
+
        err = mlx4_init_mr_table(dev);
        if (err) {
                mlx4_err(dev, "Failed to initialize "
                         "memory region table, aborting.\n");
-               goto err_pd_table_free;
+               goto err_xrcd_table_free;
        }
 
        err = mlx4_init_eq_table(dev);
@@ -998,6 +1011,13 @@ static int mlx4_setup_hca(struct mlx4_dev *dev)
                                  "ib capabilities (%d). Continuing with "
                                  "caps = 0\n", port, err);
                dev->caps.ib_port_def_cap[port] = ib_port_default_caps;
+
+               err = mlx4_check_ext_port_caps(dev, port);
+               if (err)
+                       mlx4_warn(dev, "failed to get port %d extended "
+                                 "port capabilities support info (%d)."
+                                 " Assuming not supported\n", port, err);
+
                err = mlx4_SET_PORT(dev, port);
                if (err) {
                        mlx4_err(dev, "Failed to set port %d, aborting\n",
@@ -1033,6 +1053,9 @@ err_eq_table_free:
 err_mr_table_free:
        mlx4_cleanup_mr_table(dev);
 
+err_xrcd_table_free:
+       mlx4_cleanup_xrcd_table(dev);
+
 err_pd_table_free:
        mlx4_cleanup_pd_table(dev);
 
@@ -1355,6 +1378,7 @@ err_port:
        mlx4_cmd_use_polling(dev);
        mlx4_cleanup_eq_table(dev);
        mlx4_cleanup_mr_table(dev);
+       mlx4_cleanup_xrcd_table(dev);
        mlx4_cleanup_pd_table(dev);
        mlx4_cleanup_uar_table(dev);
 
@@ -1416,6 +1440,7 @@ static void mlx4_remove_one(struct pci_dev *pdev)
                mlx4_cmd_use_polling(dev);
                mlx4_cleanup_eq_table(dev);
                mlx4_cleanup_mr_table(dev);
+               mlx4_cleanup_xrcd_table(dev);
                mlx4_cleanup_pd_table(dev);
 
                iounmap(priv->kar);
@@ -1489,10 +1514,9 @@ static int __init mlx4_verify_params(void)
                return -1;
        }
 
-       if ((log_num_vlan < 0) || (log_num_vlan > 7)) {
-               pr_warning("mlx4_core: bad num_vlan: %d\n", log_num_vlan);
-               return -1;
-       }
+       if (log_num_vlan != 0)
+               pr_warning("mlx4_core: log_num_vlan - obsolete module param, using %d\n",
+                          MLX4_LOG_NUM_VLANS);
 
        if ((log_mtts_per_seg < 1) || (log_mtts_per_seg > 7)) {
                pr_warning("mlx4_core: bad log_mtts_per_seg: %d\n", log_mtts_per_seg);
index a2fcd84..5dfa68f 100644 (file)
@@ -335,6 +335,7 @@ struct mlx4_priv {
        struct mlx4_cmd         cmd;
 
        struct mlx4_bitmap      pd_bitmap;
+       struct mlx4_bitmap      xrcd_bitmap;
        struct mlx4_uar_table   uar_table;
        struct mlx4_mr_table    mr_table;
        struct mlx4_cq_table    cq_table;
@@ -384,6 +385,7 @@ int mlx4_alloc_eq_table(struct mlx4_dev *dev);
 void mlx4_free_eq_table(struct mlx4_dev *dev);
 
 int mlx4_init_pd_table(struct mlx4_dev *dev);
+int mlx4_init_xrcd_table(struct mlx4_dev *dev);
 int mlx4_init_uar_table(struct mlx4_dev *dev);
 int mlx4_init_mr_table(struct mlx4_dev *dev);
 int mlx4_init_eq_table(struct mlx4_dev *dev);
@@ -393,6 +395,7 @@ int mlx4_init_srq_table(struct mlx4_dev *dev);
 int mlx4_init_mcg_table(struct mlx4_dev *dev);
 
 void mlx4_cleanup_pd_table(struct mlx4_dev *dev);
+void mlx4_cleanup_xrcd_table(struct mlx4_dev *dev);
 void mlx4_cleanup_uar_table(struct mlx4_dev *dev);
 void mlx4_cleanup_mr_table(struct mlx4_dev *dev);
 void mlx4_cleanup_eq_table(struct mlx4_dev *dev);
@@ -450,6 +453,7 @@ void mlx4_init_vlan_table(struct mlx4_dev *dev, struct mlx4_vlan_table *table);
 
 int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port);
 int mlx4_get_port_ib_caps(struct mlx4_dev *dev, u8 port, __be32 *caps);
+int mlx4_check_ext_port_caps(struct mlx4_dev *dev, u8 port);
 
 int mlx4_qp_detach_common(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
                          enum mlx4_protocol prot, enum mlx4_steer_type steer);
index 9c188bd..ab639cf 100644 (file)
@@ -139,7 +139,7 @@ static int mlx4_buddy_init(struct mlx4_buddy *buddy, int max_order)
 
        buddy->bits = kzalloc((buddy->max_order + 1) * sizeof (long *),
                              GFP_KERNEL);
-       buddy->num_free = kzalloc((buddy->max_order + 1) * sizeof (int *),
+       buddy->num_free = kcalloc((buddy->max_order + 1), sizeof *buddy->num_free,
                                  GFP_KERNEL);
        if (!buddy->bits || !buddy->num_free)
                goto err_out;
index 1286b88..3736163 100644 (file)
@@ -61,6 +61,24 @@ void mlx4_pd_free(struct mlx4_dev *dev, u32 pdn)
 }
 EXPORT_SYMBOL_GPL(mlx4_pd_free);
 
+int mlx4_xrcd_alloc(struct mlx4_dev *dev, u32 *xrcdn)
+{
+       struct mlx4_priv *priv = mlx4_priv(dev);
+
+       *xrcdn = mlx4_bitmap_alloc(&priv->xrcd_bitmap);
+       if (*xrcdn == -1)
+               return -ENOMEM;
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(mlx4_xrcd_alloc);
+
+void mlx4_xrcd_free(struct mlx4_dev *dev, u32 xrcdn)
+{
+       mlx4_bitmap_free(&mlx4_priv(dev)->xrcd_bitmap, xrcdn);
+}
+EXPORT_SYMBOL_GPL(mlx4_xrcd_free);
+
 int mlx4_init_pd_table(struct mlx4_dev *dev)
 {
        struct mlx4_priv *priv = mlx4_priv(dev);
@@ -74,6 +92,18 @@ void mlx4_cleanup_pd_table(struct mlx4_dev *dev)
        mlx4_bitmap_cleanup(&mlx4_priv(dev)->pd_bitmap);
 }
 
+int mlx4_init_xrcd_table(struct mlx4_dev *dev)
+{
+       struct mlx4_priv *priv = mlx4_priv(dev);
+
+       return mlx4_bitmap_init(&priv->xrcd_bitmap, (1 << 16),
+                               (1 << 16) - 1, dev->caps.reserved_xrcds + 1, 0);
+}
+
+void mlx4_cleanup_xrcd_table(struct mlx4_dev *dev)
+{
+       mlx4_bitmap_cleanup(&mlx4_priv(dev)->xrcd_bitmap);
+}
 
 int mlx4_uar_alloc(struct mlx4_dev *dev, struct mlx4_uar *uar)
 {
index 609e0ec..881592e 100644 (file)
@@ -148,22 +148,26 @@ int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *qpn, u8 wrap)
 
        if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER) {
                err = mlx4_uc_steer_add(dev, port, mac, qpn, 1);
-               if (!err) {
-                       entry = kmalloc(sizeof *entry, GFP_KERNEL);
-                       if (!entry) {
-                               mlx4_uc_steer_release(dev, port, mac, *qpn, 1);
-                               return -ENOMEM;
-                       }
-                       entry->mac = mac;
-                       err = radix_tree_insert(&info->mac_tree, *qpn, entry);
-                       if (err) {
-                               mlx4_uc_steer_release(dev, port, mac, *qpn, 1);
-                               return err;
-                       }
-               } else
+               if (err)
                        return err;
+
+               entry = kmalloc(sizeof *entry, GFP_KERNEL);
+               if (!entry) {
+                       mlx4_uc_steer_release(dev, port, mac, *qpn, 1);
+                       return -ENOMEM;
+               }
+
+               entry->mac = mac;
+               err = radix_tree_insert(&info->mac_tree, *qpn, entry);
+               if (err) {
+                       kfree(entry);
+                       mlx4_uc_steer_release(dev, port, mac, *qpn, 1);
+                       return err;
+               }
        }
+
        mlx4_dbg(dev, "Registering MAC: 0x%llx\n", (unsigned long long) mac);
+
        mutex_lock(&table->mutex);
        for (i = 0; i < MLX4_MAX_MAC_NUM - 1; i++) {
                if (free < 0 && !table->refs[i]) {
@@ -464,6 +468,48 @@ int mlx4_get_port_ib_caps(struct mlx4_dev *dev, u8 port, __be32 *caps)
        return err;
 }
 
+int mlx4_check_ext_port_caps(struct mlx4_dev *dev, u8 port)
+{
+       struct mlx4_cmd_mailbox *inmailbox, *outmailbox;
+       u8 *inbuf, *outbuf;
+       int err, packet_error;
+
+       inmailbox = mlx4_alloc_cmd_mailbox(dev);
+       if (IS_ERR(inmailbox))
+               return PTR_ERR(inmailbox);
+
+       outmailbox = mlx4_alloc_cmd_mailbox(dev);
+       if (IS_ERR(outmailbox)) {
+               mlx4_free_cmd_mailbox(dev, inmailbox);
+               return PTR_ERR(outmailbox);
+       }
+
+       inbuf = inmailbox->buf;
+       outbuf = outmailbox->buf;
+       memset(inbuf, 0, 256);
+       memset(outbuf, 0, 256);
+       inbuf[0] = 1;
+       inbuf[1] = 1;
+       inbuf[2] = 1;
+       inbuf[3] = 1;
+
+       *(__be16 *) (&inbuf[16]) = MLX4_ATTR_EXTENDED_PORT_INFO;
+       *(__be32 *) (&inbuf[20]) = cpu_to_be32(port);
+
+       err = mlx4_cmd_box(dev, inmailbox->dma, outmailbox->dma, port, 3,
+                          MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C);
+
+       packet_error = be16_to_cpu(*(__be16 *) (outbuf + 4));
+
+       dev->caps.ext_port_cap[port] = (!err && !packet_error) ?
+                                      MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO
+                                      : 0;
+
+       mlx4_free_cmd_mailbox(dev, inmailbox);
+       mlx4_free_cmd_mailbox(dev, outmailbox);
+       return err;
+}
+
 int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port)
 {
        struct mlx4_cmd_mailbox *mailbox;
index ec9350e..51c5389 100644 (file)
@@ -280,6 +280,9 @@ int mlx4_init_qp_table(struct mlx4_dev *dev)
         * We reserve 2 extra QPs per port for the special QPs.  The
         * block of special QPs must be aligned to a multiple of 8, so
         * round up.
+        *
+        * We also reserve the MSB of the 24-bit QP number to indicate
+        * that a QP is an XRC QP.
         */
        dev->caps.sqp_start =
                ALIGN(dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 8);
index 3b07b80..a20b141 100644 (file)
 struct mlx4_srq_context {
        __be32                  state_logsize_srqn;
        u8                      logstride;
-       u8                      reserved1[3];
-       u8                      pg_offset;
-       u8                      reserved2[3];
-       u32                     reserved3;
+       u8                      reserved1;
+       __be16                  xrcd;
+       __be32                  pg_offset_cqn;
+       u32                     reserved2;
        u8                      log_page_size;
-       u8                      reserved4[2];
+       u8                      reserved3[2];
        u8                      mtt_base_addr_h;
        __be32                  mtt_base_addr_l;
        __be32                  pd;
        __be16                  limit_watermark;
        __be16                  wqe_cnt;
-       u16                     reserved5;
+       u16                     reserved4;
        __be16                  wqe_counter;
-       u32                     reserved6;
+       u32                     reserved5;
        __be64                  db_rec_addr;
 };
 
@@ -109,8 +109,8 @@ static int mlx4_QUERY_SRQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox
                            MLX4_CMD_TIME_CLASS_A);
 }
 
-int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, struct mlx4_mtt *mtt,
-                  u64 db_rec, struct mlx4_srq *srq)
+int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, u32 cqn, u16 xrcd,
+                  struct mlx4_mtt *mtt, u64 db_rec, struct mlx4_srq *srq)
 {
        struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table;
        struct mlx4_cmd_mailbox *mailbox;
@@ -148,6 +148,8 @@ int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, struct mlx4_mtt *mtt,
        srq_context->state_logsize_srqn = cpu_to_be32((ilog2(srq->max) << 24) |
                                                      srq->srqn);
        srq_context->logstride          = srq->wqe_shift - 4;
+       srq_context->xrcd               = cpu_to_be16(xrcd);
+       srq_context->pg_offset_cqn      = cpu_to_be32(cqn & 0xffffff);
        srq_context->log_page_size      = mtt->page_shift - MLX4_ICM_PAGE_SHIFT;
 
        mtt_addr = mlx4_mtt_addr(dev, mtt);
index 53ef894..ff3ccd5 100644 (file)
@@ -61,6 +61,7 @@ enum {
        MLX4_DEV_CAP_FLAG_RC            = 1LL <<  0,
        MLX4_DEV_CAP_FLAG_UC            = 1LL <<  1,
        MLX4_DEV_CAP_FLAG_UD            = 1LL <<  2,
+       MLX4_DEV_CAP_FLAG_XRC           = 1LL <<  3,
        MLX4_DEV_CAP_FLAG_SRQ           = 1LL <<  6,
        MLX4_DEV_CAP_FLAG_IPOIB_CSUM    = 1LL <<  7,
        MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR = 1LL <<  8,
@@ -82,6 +83,12 @@ enum {
        MLX4_DEV_CAP_FLAG_COUNTERS      = 1LL << 48
 };
 
+#define MLX4_ATTR_EXTENDED_PORT_INFO   cpu_to_be16(0xff90)
+
+enum {
+       MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO        = 1 <<  0
+};
+
 enum {
        MLX4_BMME_FLAG_LOCAL_INV        = 1 <<  6,
        MLX4_BMME_FLAG_REMOTE_INV       = 1 <<  7,
@@ -256,6 +263,8 @@ struct mlx4_caps {
        int                     num_qp_per_mgm;
        int                     num_pds;
        int                     reserved_pds;
+       int                     max_xrcds;
+       int                     reserved_xrcds;
        int                     mtt_entry_sz;
        u32                     max_msg_sz;
        u32                     page_size_cap;
@@ -276,6 +285,7 @@ struct mlx4_caps {
        u32                     port_mask;
        enum mlx4_port_type     possible_type[MLX4_MAX_PORTS + 1];
        u32                     max_counters;
+       u8                      ext_port_cap[MLX4_MAX_PORTS + 1];
 };
 
 struct mlx4_buf_list {
@@ -499,6 +509,8 @@ static inline void *mlx4_buf_offset(struct mlx4_buf *buf, int offset)
 
 int mlx4_pd_alloc(struct mlx4_dev *dev, u32 *pdn);
 void mlx4_pd_free(struct mlx4_dev *dev, u32 pdn);
+int mlx4_xrcd_alloc(struct mlx4_dev *dev, u32 *xrcdn);
+void mlx4_xrcd_free(struct mlx4_dev *dev, u32 xrcdn);
 
 int mlx4_uar_alloc(struct mlx4_dev *dev, struct mlx4_uar *uar);
 void mlx4_uar_free(struct mlx4_dev *dev, struct mlx4_uar *uar);
@@ -538,8 +550,8 @@ void mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt);
 int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp);
 void mlx4_qp_free(struct mlx4_dev *dev, struct mlx4_qp *qp);
 
-int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, struct mlx4_mtt *mtt,
-                  u64 db_rec, struct mlx4_srq *srq);
+int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, u32 cqn, u16 xrcdn,
+                  struct mlx4_mtt *mtt, u64 db_rec, struct mlx4_srq *srq);
 void mlx4_srq_free(struct mlx4_dev *dev, struct mlx4_srq *srq);
 int mlx4_srq_arm(struct mlx4_dev *dev, struct mlx4_srq *srq, int limit_watermark);
 int mlx4_srq_query(struct mlx4_dev *dev, struct mlx4_srq *srq, int *limit_watermark);
index 4001c82..48cc4cb 100644 (file)
@@ -75,6 +75,7 @@ enum {
        MLX4_QP_ST_UC                           = 0x1,
        MLX4_QP_ST_RD                           = 0x2,
        MLX4_QP_ST_UD                           = 0x3,
+       MLX4_QP_ST_XRC                          = 0x6,
        MLX4_QP_ST_MLX                          = 0x7
 };
 
@@ -137,7 +138,7 @@ struct mlx4_qp_context {
        __be32                  ssn;
        __be32                  params2;
        __be32                  rnr_nextrecvpsn;
-       __be32                  srcd;
+       __be32                  xrcd;
        __be32                  cqn_recv;
        __be64                  db_rec_addr;
        __be32                  qkey;
index fe5b051..81aba3a 100644 (file)
@@ -81,7 +81,11 @@ enum {
        IB_USER_VERBS_CMD_MODIFY_SRQ,
        IB_USER_VERBS_CMD_QUERY_SRQ,
        IB_USER_VERBS_CMD_DESTROY_SRQ,
-       IB_USER_VERBS_CMD_POST_SRQ_RECV
+       IB_USER_VERBS_CMD_POST_SRQ_RECV,
+       IB_USER_VERBS_CMD_OPEN_XRCD,
+       IB_USER_VERBS_CMD_CLOSE_XRCD,
+       IB_USER_VERBS_CMD_CREATE_XSRQ,
+       IB_USER_VERBS_CMD_OPEN_QP
 };
 
 /*
@@ -222,6 +226,21 @@ struct ib_uverbs_dealloc_pd {
        __u32 pd_handle;
 };
 
+struct ib_uverbs_open_xrcd {
+       __u64 response;
+       __u32 fd;
+       __u32 oflags;
+       __u64 driver_data[0];
+};
+
+struct ib_uverbs_open_xrcd_resp {
+       __u32 xrcd_handle;
+};
+
+struct ib_uverbs_close_xrcd {
+       __u32 xrcd_handle;
+};
+
 struct ib_uverbs_reg_mr {
        __u64 response;
        __u64 start;
@@ -404,6 +423,17 @@ struct ib_uverbs_create_qp {
        __u64 driver_data[0];
 };
 
+struct ib_uverbs_open_qp {
+       __u64 response;
+       __u64 user_handle;
+       __u32 pd_handle;
+       __u32 qpn;
+       __u8  qp_type;
+       __u8  reserved[7];
+       __u64 driver_data[0];
+};
+
+/* also used for open response */
 struct ib_uverbs_create_qp_resp {
        __u32 qp_handle;
        __u32 qpn;
@@ -648,11 +678,25 @@ struct ib_uverbs_create_srq {
        __u64 driver_data[0];
 };
 
+struct ib_uverbs_create_xsrq {
+       __u64 response;
+       __u64 user_handle;
+       __u32 srq_type;
+       __u32 pd_handle;
+       __u32 max_wr;
+       __u32 max_sge;
+       __u32 srq_limit;
+       __u32 reserved;
+       __u32 xrcd_handle;
+       __u32 cq_handle;
+       __u64 driver_data[0];
+};
+
 struct ib_uverbs_create_srq_resp {
        __u32 srq_handle;
        __u32 max_wr;
        __u32 max_sge;
-       __u32 reserved;
+       __u32 srqn;
 };
 
 struct ib_uverbs_modify_srq {
index 228be3e..bf5daaf 100644 (file)
@@ -112,6 +112,7 @@ enum ib_device_cap_flags {
         */
        IB_DEVICE_UD_IP_CSUM            = (1<<18),
        IB_DEVICE_UD_TSO                = (1<<19),
+       IB_DEVICE_XRC                   = (1<<20),
        IB_DEVICE_MEM_MGT_EXTENSIONS    = (1<<21),
        IB_DEVICE_BLOCK_MULTICAST_LOOPBACK = (1<<22),
 };
@@ -207,6 +208,7 @@ enum ib_port_cap_flags {
        IB_PORT_SM_DISABLED                     = 1 << 10,
        IB_PORT_SYS_IMAGE_GUID_SUP              = 1 << 11,
        IB_PORT_PKEY_SW_EXT_PORT_TRAP_SUP       = 1 << 12,
+       IB_PORT_EXTENDED_SPEEDS_SUP             = 1 << 14,
        IB_PORT_CM_SUP                          = 1 << 16,
        IB_PORT_SNMP_TUNNEL_SUP                 = 1 << 17,
        IB_PORT_REINIT_SUP                      = 1 << 18,
@@ -415,7 +417,15 @@ enum ib_rate {
        IB_RATE_40_GBPS  = 7,
        IB_RATE_60_GBPS  = 8,
        IB_RATE_80_GBPS  = 9,
-       IB_RATE_120_GBPS = 10
+       IB_RATE_120_GBPS = 10,
+       IB_RATE_14_GBPS  = 11,
+       IB_RATE_56_GBPS  = 12,
+       IB_RATE_112_GBPS = 13,
+       IB_RATE_168_GBPS = 14,
+       IB_RATE_25_GBPS  = 15,
+       IB_RATE_100_GBPS = 16,
+       IB_RATE_200_GBPS = 17,
+       IB_RATE_300_GBPS = 18
 };
 
 /**
@@ -426,6 +436,13 @@ enum ib_rate {
  */
 int ib_rate_to_mult(enum ib_rate rate) __attribute_const__;
 
+/**
+ * ib_rate_to_mbps - Convert the IB rate enum to Mbps.
+ * For example, IB_RATE_2_5_GBPS will be converted to 2500.
+ * @rate: rate to convert.
+ */
+int ib_rate_to_mbps(enum ib_rate rate) __attribute_const__;
+
 /**
  * mult_to_ib_rate - Convert a multiple of 2.5 Gbit/sec to an IB rate
  * enum.
@@ -522,6 +539,11 @@ enum ib_cq_notify_flags {
        IB_CQ_REPORT_MISSED_EVENTS      = 1 << 2,
 };
 
+enum ib_srq_type {
+       IB_SRQT_BASIC,
+       IB_SRQT_XRC
+};
+
 enum ib_srq_attr_mask {
        IB_SRQ_MAX_WR   = 1 << 0,
        IB_SRQ_LIMIT    = 1 << 1,
@@ -537,6 +559,14 @@ struct ib_srq_init_attr {
        void                  (*event_handler)(struct ib_event *, void *);
        void                   *srq_context;
        struct ib_srq_attr      attr;
+       enum ib_srq_type        srq_type;
+
+       union {
+               struct {
+                       struct ib_xrcd *xrcd;
+                       struct ib_cq   *cq;
+               } xrc;
+       } ext;
 };
 
 struct ib_qp_cap {
@@ -565,7 +595,11 @@ enum ib_qp_type {
        IB_QPT_UC,
        IB_QPT_UD,
        IB_QPT_RAW_IPV6,
-       IB_QPT_RAW_ETHERTYPE
+       IB_QPT_RAW_ETHERTYPE,
+       /* Save 8 for RAW_PACKET */
+       IB_QPT_XRC_INI = 9,
+       IB_QPT_XRC_TGT,
+       IB_QPT_MAX
 };
 
 enum ib_qp_create_flags {
@@ -579,6 +613,7 @@ struct ib_qp_init_attr {
        struct ib_cq           *send_cq;
        struct ib_cq           *recv_cq;
        struct ib_srq          *srq;
+       struct ib_xrcd         *xrcd;     /* XRC TGT QPs only */
        struct ib_qp_cap        cap;
        enum ib_sig_type        sq_sig_type;
        enum ib_qp_type         qp_type;
@@ -586,6 +621,13 @@ struct ib_qp_init_attr {
        u8                      port_num; /* special QP types only */
 };
 
+struct ib_qp_open_attr {
+       void                  (*event_handler)(struct ib_event *, void *);
+       void                   *qp_context;
+       u32                     qp_num;
+       enum ib_qp_type         qp_type;
+};
+
 enum ib_rnr_timeout {
        IB_RNR_TIMER_655_36 =  0,
        IB_RNR_TIMER_000_01 =  1,
@@ -770,6 +812,7 @@ struct ib_send_wr {
                        u32                             rkey;
                } fast_reg;
        } wr;
+       u32                     xrc_remote_srq_num;     /* XRC TGT QPs only */
 };
 
 struct ib_recv_wr {
@@ -831,6 +874,7 @@ struct ib_ucontext {
        struct list_head        qp_list;
        struct list_head        srq_list;
        struct list_head        ah_list;
+       struct list_head        xrcd_list;
        int                     closing;
 };
 
@@ -858,6 +902,15 @@ struct ib_pd {
        atomic_t                usecnt; /* count all resources */
 };
 
+struct ib_xrcd {
+       struct ib_device       *device;
+       atomic_t                usecnt; /* count all exposed resources */
+       struct inode           *inode;
+
+       struct mutex            tgt_qp_mutex;
+       struct list_head        tgt_qp_list;
+};
+
 struct ib_ah {
        struct ib_device        *device;
        struct ib_pd            *pd;
@@ -882,7 +935,16 @@ struct ib_srq {
        struct ib_uobject      *uobject;
        void                  (*event_handler)(struct ib_event *, void *);
        void                   *srq_context;
+       enum ib_srq_type        srq_type;
        atomic_t                usecnt;
+
+       union {
+               struct {
+                       struct ib_xrcd *xrcd;
+                       struct ib_cq   *cq;
+                       u32             srq_num;
+               } xrc;
+       } ext;
 };
 
 struct ib_qp {
@@ -891,6 +953,11 @@ struct ib_qp {
        struct ib_cq           *send_cq;
        struct ib_cq           *recv_cq;
        struct ib_srq          *srq;
+       struct ib_xrcd         *xrcd; /* XRC TGT QPs only */
+       struct list_head        xrcd_list;
+       atomic_t                usecnt; /* count times opened */
+       struct list_head        open_list;
+       struct ib_qp           *real_qp;
        struct ib_uobject      *uobject;
        void                  (*event_handler)(struct ib_event *, void *);
        void                   *qp_context;
@@ -1149,6 +1216,10 @@ struct ib_device {
                                                  struct ib_grh *in_grh,
                                                  struct ib_mad *in_mad,
                                                  struct ib_mad *out_mad);
+       struct ib_xrcd *           (*alloc_xrcd)(struct ib_device *device,
+                                                struct ib_ucontext *ucontext,
+                                                struct ib_udata *udata);
+       int                        (*dealloc_xrcd)(struct ib_xrcd *xrcd);
 
        struct ib_dma_mapping_ops   *dma_ops;
 
@@ -1442,6 +1513,25 @@ int ib_query_qp(struct ib_qp *qp,
  */
 int ib_destroy_qp(struct ib_qp *qp);
 
+/**
+ * ib_open_qp - Obtain a reference to an existing sharable QP.
+ * @xrcd - XRC domain
+ * @qp_open_attr: Attributes identifying the QP to open.
+ *
+ * Returns a reference to a sharable QP.
+ */
+struct ib_qp *ib_open_qp(struct ib_xrcd *xrcd,
+                        struct ib_qp_open_attr *qp_open_attr);
+
+/**
+ * ib_close_qp - Release an external reference to a QP.
+ * @qp: The QP handle to release
+ *
+ * The opened QP handle is released by the caller.  The underlying
+ * shared QP is not destroyed until all internal references are released.
+ */
+int ib_close_qp(struct ib_qp *qp);
+
 /**
  * ib_post_send - Posts a list of work requests to the send queue of
  *   the specified QP.
@@ -2060,4 +2150,16 @@ int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid);
  */
 int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid);
 
+/**
+ * ib_alloc_xrcd - Allocates an XRC domain.
+ * @device: The device on which to allocate the XRC domain.
+ */
+struct ib_xrcd *ib_alloc_xrcd(struct ib_device *device);
+
+/**
+ * ib_dealloc_xrcd - Deallocates an XRC domain.
+ * @xrcd: The XRC domain to deallocate.
+ */
+int ib_dealloc_xrcd(struct ib_xrcd *xrcd);
+
 #endif /* IB_VERBS_H */
index 2d0191c..1a046b1 100644 (file)
@@ -52,8 +52,10 @@ struct iw_cm_event {
        struct sockaddr_in local_addr;
        struct sockaddr_in remote_addr;
        void *private_data;
-       u8 private_data_len;
        void *provider_data;
+       u8 private_data_len;
+       u8 ord;
+       u8 ird;
 };
 
 /**
index 26977c1..51988f8 100644 (file)
@@ -65,6 +65,7 @@ enum rdma_cm_event_type {
 enum rdma_port_space {
        RDMA_PS_SDP   = 0x0001,
        RDMA_PS_IPOIB = 0x0002,
+       RDMA_PS_IB    = 0x013F,
        RDMA_PS_TCP   = 0x0106,
        RDMA_PS_UDP   = 0x0111,
 };
index fc82c18..5348a00 100644 (file)
@@ -77,7 +77,8 @@ struct rdma_ucm_create_id {
        __u64 uid;
        __u64 response;
        __u16 ps;
-       __u8  reserved[6];
+       __u8  qp_type;
+       __u8  reserved[5];
 };
 
 struct rdma_ucm_create_id_resp {