Merge branches 'bkl-removal', 'cma', 'ehca', 'for-2.6.27', 'mlx4', 'mthca' and 'nes...
authorRoland Dreier <rolandd@cisco.com>
Thu, 24 Jul 2008 15:38:47 +0000 (08:38 -0700)
committerRoland Dreier <rolandd@cisco.com>
Thu, 24 Jul 2008 15:38:47 +0000 (08:38 -0700)
25 files changed:
MAINTAINERS
drivers/infiniband/core/cma.c
drivers/infiniband/hw/ehca/ehca_classes.h
drivers/infiniband/hw/ehca/ehca_hca.c
drivers/infiniband/hw/ehca/ehca_irq.c
drivers/infiniband/hw/ehca/ehca_qp.c
drivers/infiniband/hw/ehca/ipz_pt_fn.c
drivers/infiniband/hw/mlx4/cq.c
drivers/infiniband/hw/mlx4/main.c
drivers/infiniband/hw/mlx4/mlx4_ib.h
drivers/infiniband/hw/mlx4/mr.c
drivers/infiniband/hw/mlx4/qp.c
drivers/infiniband/hw/mthca/mthca_dev.h
drivers/infiniband/hw/mthca/mthca_mr.c
drivers/infiniband/ulp/iser/iser_verbs.c
drivers/net/mlx4/cmd.c
drivers/net/mlx4/fw.c
drivers/net/mlx4/fw.h
drivers/net/mlx4/main.c
drivers/net/mlx4/mlx4.h
drivers/net/mlx4/mr.c
drivers/net/mlx4/pd.c
include/linux/mlx4/device.h
include/linux/mlx4/qp.h
include/rdma/rdma_cm.h

index 11944b4..2a73da0 100644 (file)
@@ -2915,8 +2915,6 @@ P:        Faisal Latif
 M:     flatif@neteffect.com
 P:     Chien Tung
 M:     ctung@neteffect.com
-P:     Glenn Streiff
-M:     gstreiff@neteffect.com
 L:     general@lists.openfabrics.org
 W:     http://www.neteffect.com
 S:     Supported
index ae11d5c..e980ff3 100644 (file)
@@ -168,6 +168,12 @@ struct cma_work {
        struct rdma_cm_event    event;
 };
 
+struct cma_ndev_work {
+       struct work_struct      work;
+       struct rdma_id_private  *id;
+       struct rdma_cm_event    event;
+};
+
 union cma_ip_addr {
        struct in6_addr ip6;
        struct {
@@ -914,7 +920,10 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
        struct rdma_cm_event event;
        int ret = 0;
 
-       if (cma_disable_callback(id_priv, CMA_CONNECT))
+       if ((ib_event->event != IB_CM_TIMEWAIT_EXIT &&
+               cma_disable_callback(id_priv, CMA_CONNECT)) ||
+           (ib_event->event == IB_CM_TIMEWAIT_EXIT &&
+               cma_disable_callback(id_priv, CMA_DISCONNECT)))
                return 0;
 
        memset(&event, 0, sizeof event);
@@ -950,6 +959,8 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
                event.event = RDMA_CM_EVENT_DISCONNECTED;
                break;
        case IB_CM_TIMEWAIT_EXIT:
+               event.event = RDMA_CM_EVENT_TIMEWAIT_EXIT;
+               break;
        case IB_CM_MRA_RECEIVED:
                /* ignore event */
                goto out;
@@ -1598,6 +1609,30 @@ out:
        kfree(work);
 }
 
+static void cma_ndev_work_handler(struct work_struct *_work)
+{
+       struct cma_ndev_work *work = container_of(_work, struct cma_ndev_work, work);
+       struct rdma_id_private *id_priv = work->id;
+       int destroy = 0;
+
+       mutex_lock(&id_priv->handler_mutex);
+       if (id_priv->state == CMA_DESTROYING ||
+           id_priv->state == CMA_DEVICE_REMOVAL)
+               goto out;
+
+       if (id_priv->id.event_handler(&id_priv->id, &work->event)) {
+               cma_exch(id_priv, CMA_DESTROYING);
+               destroy = 1;
+       }
+
+out:
+       mutex_unlock(&id_priv->handler_mutex);
+       cma_deref_id(id_priv);
+       if (destroy)
+               rdma_destroy_id(&id_priv->id);
+       kfree(work);
+}
+
 static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms)
 {
        struct rdma_route *route = &id_priv->id.route;
@@ -2723,6 +2758,65 @@ void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
 }
 EXPORT_SYMBOL(rdma_leave_multicast);
 
+static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id_priv)
+{
+       struct rdma_dev_addr *dev_addr;
+       struct cma_ndev_work *work;
+
+       dev_addr = &id_priv->id.route.addr.dev_addr;
+
+       if ((dev_addr->src_dev == ndev) &&
+           memcmp(dev_addr->src_dev_addr, ndev->dev_addr, ndev->addr_len)) {
+               printk(KERN_INFO "RDMA CM addr change for ndev %s used by id %p\n",
+                      ndev->name, &id_priv->id);
+               work = kzalloc(sizeof *work, GFP_KERNEL);
+               if (!work)
+                       return -ENOMEM;
+
+               INIT_WORK(&work->work, cma_ndev_work_handler);
+               work->id = id_priv;
+               work->event.event = RDMA_CM_EVENT_ADDR_CHANGE;
+               atomic_inc(&id_priv->refcount);
+               queue_work(cma_wq, &work->work);
+       }
+
+       return 0;
+}
+
+static int cma_netdev_callback(struct notifier_block *self, unsigned long event,
+                              void *ctx)
+{
+       struct net_device *ndev = (struct net_device *)ctx;
+       struct cma_device *cma_dev;
+       struct rdma_id_private *id_priv;
+       int ret = NOTIFY_DONE;
+
+       if (dev_net(ndev) != &init_net)
+               return NOTIFY_DONE;
+
+       if (event != NETDEV_BONDING_FAILOVER)
+               return NOTIFY_DONE;
+
+       if (!(ndev->flags & IFF_MASTER) || !(ndev->priv_flags & IFF_BONDING))
+               return NOTIFY_DONE;
+
+       mutex_lock(&lock);
+       list_for_each_entry(cma_dev, &dev_list, list)
+               list_for_each_entry(id_priv, &cma_dev->id_list, list) {
+                       ret = cma_netdev_change(ndev, id_priv);
+                       if (ret)
+                               goto out;
+               }
+
+out:
+       mutex_unlock(&lock);
+       return ret;
+}
+
+static struct notifier_block cma_nb = {
+       .notifier_call = cma_netdev_callback
+};
+
 static void cma_add_one(struct ib_device *device)
 {
        struct cma_device *cma_dev;
@@ -2831,6 +2925,7 @@ static int cma_init(void)
 
        ib_sa_register_client(&sa_client);
        rdma_addr_register_client(&addr_client);
+       register_netdevice_notifier(&cma_nb);
 
        ret = ib_register_client(&cma_client);
        if (ret)
@@ -2838,6 +2933,7 @@ static int cma_init(void)
        return 0;
 
 err:
+       unregister_netdevice_notifier(&cma_nb);
        rdma_addr_unregister_client(&addr_client);
        ib_sa_unregister_client(&sa_client);
        destroy_workqueue(cma_wq);
@@ -2847,6 +2943,7 @@ err:
 static void cma_cleanup(void)
 {
        ib_unregister_client(&cma_client);
+       unregister_netdevice_notifier(&cma_nb);
        rdma_addr_unregister_client(&addr_client);
        ib_sa_unregister_client(&sa_client);
        destroy_workqueue(cma_wq);
index 1e9e99a..0b0618e 100644 (file)
@@ -194,6 +194,7 @@ struct ehca_qp {
        u32 packet_count;
        atomic_t nr_events; /* events seen */
        wait_queue_head_t wait_completion;
+       int mig_armed;
 };
 
 #define IS_SRQ(qp) (qp->ext_type == EQPT_SRQ)
index bc3b37d..4628822 100644 (file)
@@ -114,7 +114,9 @@ int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props)
        }
 
        props->max_pkeys           = 16;
-       props->local_ca_ack_delay  = min_t(u8, rblock->local_ca_ack_delay, 255);
+       /* Some FW versions say 0 here; insert sensible value in that case */
+       props->local_ca_ack_delay  = rblock->local_ca_ack_delay ?
+               min_t(u8, rblock->local_ca_ack_delay, 255) : 12;
        props->max_raw_ipv6_qp     = limit_uint(rblock->max_raw_ipv6_qp);
        props->max_raw_ethy_qp     = limit_uint(rblock->max_raw_ethy_qp);
        props->max_mcast_grp       = limit_uint(rblock->max_mcast_grp);
index 0792d93..99642a6 100644 (file)
@@ -178,6 +178,10 @@ static void dispatch_qp_event(struct ehca_shca *shca, struct ehca_qp *qp,
 {
        struct ib_event event;
 
+       /* PATH_MIG without the QP ever having been armed is false alarm */
+       if (event_type == IB_EVENT_PATH_MIG && !qp->mig_armed)
+               return;
+
        event.device = &shca->ib_device;
        event.event = event_type;
 
index 3f59587..ea13efd 100644 (file)
@@ -1460,6 +1460,8 @@ static int internal_modify_qp(struct ib_qp *ibqp,
                        goto modify_qp_exit2;
                }
                mqpcb->path_migration_state = attr->path_mig_state + 1;
+               if (attr->path_mig_state == IB_MIG_REARM)
+                       my_qp->mig_armed = 1;
                update_mask |=
                        EHCA_BMASK_SET(MQPCB_MASK_PATH_MIGRATION_STATE, 1);
        }
index 661f8db..c3a3284 100644 (file)
@@ -163,6 +163,7 @@ static int alloc_small_queue_page(struct ipz_queue *queue, struct ehca_pd *pd)
 
 out:
        ehca_err(pd->ib_pd.device, "failed to allocate small queue page");
+       mutex_unlock(&pd->lock);
        return 0;
 }
 
index 299f208..0b191a4 100644 (file)
@@ -637,6 +637,7 @@ repoll:
                case MLX4_OPCODE_SEND_IMM:
                        wc->wc_flags |= IB_WC_WITH_IMM;
                case MLX4_OPCODE_SEND:
+               case MLX4_OPCODE_SEND_INVAL:
                        wc->opcode    = IB_WC_SEND;
                        break;
                case MLX4_OPCODE_RDMA_READ:
@@ -657,6 +658,12 @@ repoll:
                case MLX4_OPCODE_LSO:
                        wc->opcode    = IB_WC_LSO;
                        break;
+               case MLX4_OPCODE_FMR:
+                       wc->opcode    = IB_WC_FAST_REG_MR;
+                       break;
+               case MLX4_OPCODE_LOCAL_INVAL:
+                       wc->opcode    = IB_WC_LOCAL_INV;
+                       break;
                }
        } else {
                wc->byte_len = be32_to_cpu(cqe->byte_cnt);
@@ -667,6 +674,11 @@ repoll:
                        wc->wc_flags    = IB_WC_WITH_IMM;
                        wc->ex.imm_data = cqe->immed_rss_invalid;
                        break;
+               case MLX4_RECV_OPCODE_SEND_INVAL:
+                       wc->opcode      = IB_WC_RECV;
+                       wc->wc_flags    = IB_WC_WITH_INVALIDATE;
+                       wc->ex.invalidate_rkey = be32_to_cpu(cqe->immed_rss_invalid);
+                       break;
                case MLX4_RECV_OPCODE_SEND:
                        wc->opcode   = IB_WC_RECV;
                        wc->wc_flags = 0;
index bcf5064..38d6907 100644 (file)
@@ -104,6 +104,12 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
                props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM;
        if (dev->dev->caps.max_gso_sz)
                props->device_cap_flags |= IB_DEVICE_UD_TSO;
+       if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_RESERVED_LKEY)
+               props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY;
+       if ((dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_LOCAL_INV) &&
+           (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_REMOTE_INV) &&
+           (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_FAST_REG_WR))
+               props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
 
        props->vendor_id           = be32_to_cpup((__be32 *) (out_mad->data + 36)) &
                0xffffff;
@@ -127,6 +133,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
        props->max_srq             = dev->dev->caps.num_srqs - dev->dev->caps.reserved_srqs;
        props->max_srq_wr          = dev->dev->caps.max_srq_wqes - 1;
        props->max_srq_sge         = dev->dev->caps.max_srq_sge;
+       props->max_fast_reg_page_list_len = PAGE_SIZE / sizeof (u64);
        props->local_ca_ack_delay  = dev->dev->caps.local_ca_ack_delay;
        props->atomic_cap          = dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_ATOMIC ?
                IB_ATOMIC_HCA : IB_ATOMIC_NONE;
@@ -565,6 +572,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
        strlcpy(ibdev->ib_dev.name, "mlx4_%d", IB_DEVICE_NAME_MAX);
        ibdev->ib_dev.owner             = THIS_MODULE;
        ibdev->ib_dev.node_type         = RDMA_NODE_IB_CA;
+       ibdev->ib_dev.local_dma_lkey    = dev->caps.reserved_lkey;
        ibdev->ib_dev.phys_port_cnt     = dev->caps.num_ports;
        ibdev->ib_dev.num_comp_vectors  = 1;
        ibdev->ib_dev.dma_device        = &dev->pdev->dev;
@@ -627,6 +635,9 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
        ibdev->ib_dev.get_dma_mr        = mlx4_ib_get_dma_mr;
        ibdev->ib_dev.reg_user_mr       = mlx4_ib_reg_user_mr;
        ibdev->ib_dev.dereg_mr          = mlx4_ib_dereg_mr;
+       ibdev->ib_dev.alloc_fast_reg_mr = mlx4_ib_alloc_fast_reg_mr;
+       ibdev->ib_dev.alloc_fast_reg_page_list = mlx4_ib_alloc_fast_reg_page_list;
+       ibdev->ib_dev.free_fast_reg_page_list  = mlx4_ib_free_fast_reg_page_list;
        ibdev->ib_dev.attach_mcast      = mlx4_ib_mcg_attach;
        ibdev->ib_dev.detach_mcast      = mlx4_ib_mcg_detach;
        ibdev->ib_dev.process_mad       = mlx4_ib_process_mad;
index c4cf5b6..d26a913 100644 (file)
@@ -83,6 +83,11 @@ struct mlx4_ib_mr {
        struct ib_umem         *umem;
 };
 
+struct mlx4_ib_fast_reg_page_list {
+       struct ib_fast_reg_page_list    ibfrpl;
+       dma_addr_t                      map;
+};
+
 struct mlx4_ib_fmr {
        struct ib_fmr           ibfmr;
        struct mlx4_fmr         mfmr;
@@ -199,6 +204,11 @@ static inline struct mlx4_ib_mr *to_mmr(struct ib_mr *ibmr)
        return container_of(ibmr, struct mlx4_ib_mr, ibmr);
 }
 
+static inline struct mlx4_ib_fast_reg_page_list *to_mfrpl(struct ib_fast_reg_page_list *ibfrpl)
+{
+       return container_of(ibfrpl, struct mlx4_ib_fast_reg_page_list, ibfrpl);
+}
+
 static inline struct mlx4_ib_fmr *to_mfmr(struct ib_fmr *ibfmr)
 {
        return container_of(ibfmr, struct mlx4_ib_fmr, ibfmr);
@@ -239,6 +249,11 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
                                  u64 virt_addr, int access_flags,
                                  struct ib_udata *udata);
 int mlx4_ib_dereg_mr(struct ib_mr *mr);
+struct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd,
+                                       int max_page_list_len);
+struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device *ibdev,
+                                                              int page_list_len);
+void mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list);
 
 int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period);
 int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata);
index 68e9248..db2086f 100644 (file)
@@ -183,6 +183,76 @@ int mlx4_ib_dereg_mr(struct ib_mr *ibmr)
        return 0;
 }
 
+struct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd,
+                                       int max_page_list_len)
+{
+       struct mlx4_ib_dev *dev = to_mdev(pd->device);
+       struct mlx4_ib_mr *mr;
+       int err;
+
+       mr = kmalloc(sizeof *mr, GFP_KERNEL);
+       if (!mr)
+               return ERR_PTR(-ENOMEM);
+
+       err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, 0, 0, 0,
+                           max_page_list_len, 0, &mr->mmr);
+       if (err)
+               goto err_free;
+
+       err = mlx4_mr_enable(dev->dev, &mr->mmr);
+       if (err)
+               goto err_mr;
+
+       return &mr->ibmr;
+
+err_mr:
+       mlx4_mr_free(dev->dev, &mr->mmr);
+
+err_free:
+       kfree(mr);
+       return ERR_PTR(err);
+}
+
+struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device *ibdev,
+                                                              int page_list_len)
+{
+       struct mlx4_ib_dev *dev = to_mdev(ibdev);
+       struct mlx4_ib_fast_reg_page_list *mfrpl;
+       int size = page_list_len * sizeof (u64);
+
+       if (size > PAGE_SIZE)
+               return ERR_PTR(-EINVAL);
+
+       mfrpl = kmalloc(sizeof *mfrpl, GFP_KERNEL);
+       if (!mfrpl)
+               return ERR_PTR(-ENOMEM);
+
+       mfrpl->ibfrpl.page_list = dma_alloc_coherent(&dev->dev->pdev->dev,
+                                                    size, &mfrpl->map,
+                                                    GFP_KERNEL);
+       if (!mfrpl->ibfrpl.page_list)
+               goto err_free;
+
+       WARN_ON(mfrpl->map & 0x3f);
+
+       return &mfrpl->ibfrpl;
+
+err_free:
+       kfree(mfrpl);
+       return ERR_PTR(-ENOMEM);
+}
+
+void mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list)
+{
+       struct mlx4_ib_dev *dev = to_mdev(page_list->device);
+       struct mlx4_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list);
+       int size = page_list->max_page_list_len * sizeof (u64);
+
+       dma_free_coherent(&dev->dev->pdev->dev, size, page_list->page_list,
+                         mfrpl->map);
+       kfree(mfrpl);
+}
+
 struct ib_fmr *mlx4_ib_fmr_alloc(struct ib_pd *pd, int acc,
                                 struct ib_fmr_attr *fmr_attr)
 {
index 89eb6cb..02a99bc 100644 (file)
@@ -78,6 +78,9 @@ static const __be32 mlx4_ib_opcode[] = {
        [IB_WR_RDMA_READ]               = __constant_cpu_to_be32(MLX4_OPCODE_RDMA_READ),
        [IB_WR_ATOMIC_CMP_AND_SWP]      = __constant_cpu_to_be32(MLX4_OPCODE_ATOMIC_CS),
        [IB_WR_ATOMIC_FETCH_AND_ADD]    = __constant_cpu_to_be32(MLX4_OPCODE_ATOMIC_FA),
+       [IB_WR_SEND_WITH_INV]           = __constant_cpu_to_be32(MLX4_OPCODE_SEND_INVAL),
+       [IB_WR_LOCAL_INV]               = __constant_cpu_to_be32(MLX4_OPCODE_LOCAL_INVAL),
+       [IB_WR_FAST_REG_MR]             = __constant_cpu_to_be32(MLX4_OPCODE_FMR),
 };
 
 static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp)
@@ -976,6 +979,10 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
        context->pd         = cpu_to_be32(to_mpd(ibqp->pd)->pdn);
        context->params1    = cpu_to_be32(MLX4_IB_ACK_REQ_FREQ << 28);
 
+       /* Set "fast registration enabled" for all kernel QPs */
+       if (!qp->ibqp.uobject)
+               context->params1 |= cpu_to_be32(1 << 11);
+
        if (attr_mask & IB_QP_RNR_RETRY) {
                context->params1 |= cpu_to_be32(attr->rnr_retry << 13);
                optpar |= MLX4_QP_OPTPAR_RNR_RETRY;
@@ -1322,6 +1329,38 @@ static int mlx4_wq_overflow(struct mlx4_ib_wq *wq, int nreq, struct ib_cq *ib_cq
        return cur + nreq >= wq->max_post;
 }
 
+static __be32 convert_access(int acc)
+{
+       return (acc & IB_ACCESS_REMOTE_ATOMIC ? cpu_to_be32(MLX4_WQE_FMR_PERM_ATOMIC)       : 0) |
+              (acc & IB_ACCESS_REMOTE_WRITE  ? cpu_to_be32(MLX4_WQE_FMR_PERM_REMOTE_WRITE) : 0) |
+              (acc & IB_ACCESS_REMOTE_READ   ? cpu_to_be32(MLX4_WQE_FMR_PERM_REMOTE_READ)  : 0) |
+              (acc & IB_ACCESS_LOCAL_WRITE   ? cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_WRITE)  : 0) |
+               cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_READ);
+}
+
+static void set_fmr_seg(struct mlx4_wqe_fmr_seg *fseg, struct ib_send_wr *wr)
+{
+       struct mlx4_ib_fast_reg_page_list *mfrpl = to_mfrpl(wr->wr.fast_reg.page_list);
+
+       fseg->flags             = convert_access(wr->wr.fast_reg.access_flags);
+       fseg->mem_key           = cpu_to_be32(wr->wr.fast_reg.rkey);
+       fseg->buf_list          = cpu_to_be64(mfrpl->map);
+       fseg->start_addr        = cpu_to_be64(wr->wr.fast_reg.iova_start);
+       fseg->reg_len           = cpu_to_be64(wr->wr.fast_reg.length);
+       fseg->offset            = 0; /* XXX -- is this just for ZBVA? */
+       fseg->page_size         = cpu_to_be32(wr->wr.fast_reg.page_shift);
+       fseg->reserved[0]       = 0;
+       fseg->reserved[1]       = 0;
+}
+
+static void set_local_inv_seg(struct mlx4_wqe_local_inval_seg *iseg, u32 rkey)
+{
+       iseg->flags     = 0;
+       iseg->mem_key   = cpu_to_be32(rkey);
+       iseg->guest_id  = 0;
+       iseg->pa        = 0;
+}
+
 static __always_inline void set_raddr_seg(struct mlx4_wqe_raddr_seg *rseg,
                                          u64 remote_addr, u32 rkey)
 {
@@ -1395,7 +1434,7 @@ static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg)
        dseg->addr       = cpu_to_be64(sg->addr);
 }
 
-static int build_lso_seg(struct mlx4_lso_seg *wqe, struct ib_send_wr *wr,
+static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr,
                         struct mlx4_ib_qp *qp, unsigned *lso_seg_len)
 {
        unsigned halign = ALIGN(sizeof *wqe + wr->wr.ud.hlen, 16);
@@ -1423,6 +1462,21 @@ static int build_lso_seg(struct mlx4_lso_seg *wqe, struct ib_send_wr *wr,
        return 0;
 }
 
+static __be32 send_ieth(struct ib_send_wr *wr)
+{
+       switch (wr->opcode) {
+       case IB_WR_SEND_WITH_IMM:
+       case IB_WR_RDMA_WRITE_WITH_IMM:
+               return wr->ex.imm_data;
+
+       case IB_WR_SEND_WITH_INV:
+               return cpu_to_be32(wr->ex.invalidate_rkey);
+
+       default:
+               return 0;
+       }
+}
+
 int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                      struct ib_send_wr **bad_wr)
 {
@@ -1469,11 +1523,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                                     MLX4_WQE_CTRL_TCP_UDP_CSUM) : 0) |
                        qp->sq_signal_bits;
 
-               if (wr->opcode == IB_WR_SEND_WITH_IMM ||
-                   wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM)
-                       ctrl->imm = wr->ex.imm_data;
-               else
-                       ctrl->imm = 0;
+               ctrl->imm = send_ieth(wr);
 
                wqe += sizeof *ctrl;
                size = sizeof *ctrl / 16;
@@ -1505,6 +1555,18 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                                size += sizeof (struct mlx4_wqe_raddr_seg) / 16;
                                break;
 
+                       case IB_WR_LOCAL_INV:
+                               set_local_inv_seg(wqe, wr->ex.invalidate_rkey);
+                               wqe  += sizeof (struct mlx4_wqe_local_inval_seg);
+                               size += sizeof (struct mlx4_wqe_local_inval_seg) / 16;
+                               break;
+
+                       case IB_WR_FAST_REG_MR:
+                               set_fmr_seg(wqe, wr);
+                               wqe  += sizeof (struct mlx4_wqe_fmr_seg);
+                               size += sizeof (struct mlx4_wqe_fmr_seg) / 16;
+                               break;
+
                        default:
                                /* No extra segments required for sends */
                                break;
index ee4d073..2525901 100644 (file)
@@ -202,6 +202,7 @@ struct mthca_pd_table {
 
 struct mthca_buddy {
        unsigned long **bits;
+       int            *num_free;
        int             max_order;
        spinlock_t      lock;
 };
index 8489b1e..882e6b7 100644 (file)
@@ -89,23 +89,26 @@ static u32 mthca_buddy_alloc(struct mthca_buddy *buddy, int order)
 
        spin_lock(&buddy->lock);
 
-       for (o = order; o <= buddy->max_order; ++o) {
-               m = 1 << (buddy->max_order - o);
-               seg = find_first_bit(buddy->bits[o], m);
-               if (seg < m)
-                       goto found;
-       }
+       for (o = order; o <= buddy->max_order; ++o)
+               if (buddy->num_free[o]) {
+                       m = 1 << (buddy->max_order - o);
+                       seg = find_first_bit(buddy->bits[o], m);
+                       if (seg < m)
+                               goto found;
+               }
 
        spin_unlock(&buddy->lock);
        return -1;
 
  found:
        clear_bit(seg, buddy->bits[o]);
+       --buddy->num_free[o];
 
        while (o > order) {
                --o;
                seg <<= 1;
                set_bit(seg ^ 1, buddy->bits[o]);
+               ++buddy->num_free[o];
        }
 
        spin_unlock(&buddy->lock);
@@ -123,11 +126,13 @@ static void mthca_buddy_free(struct mthca_buddy *buddy, u32 seg, int order)
 
        while (test_bit(seg ^ 1, buddy->bits[order])) {
                clear_bit(seg ^ 1, buddy->bits[order]);
+               --buddy->num_free[order];
                seg >>= 1;
                ++order;
        }
 
        set_bit(seg, buddy->bits[order]);
+       ++buddy->num_free[order];
 
        spin_unlock(&buddy->lock);
 }
@@ -141,7 +146,9 @@ static int mthca_buddy_init(struct mthca_buddy *buddy, int max_order)
 
        buddy->bits = kzalloc((buddy->max_order + 1) * sizeof (long *),
                              GFP_KERNEL);
-       if (!buddy->bits)
+       buddy->num_free = kzalloc((buddy->max_order + 1) * sizeof (int *),
+                                 GFP_KERNEL);
+       if (!buddy->bits || !buddy->num_free)
                goto err_out;
 
        for (i = 0; i <= buddy->max_order; ++i) {
@@ -154,6 +161,7 @@ static int mthca_buddy_init(struct mthca_buddy *buddy, int max_order)
        }
 
        set_bit(0, buddy->bits[buddy->max_order]);
+       buddy->num_free[buddy->max_order] = 1;
 
        return 0;
 
@@ -161,9 +169,10 @@ err_out_free:
        for (i = 0; i <= buddy->max_order; ++i)
                kfree(buddy->bits[i]);
 
+err_out:
        kfree(buddy->bits);
+       kfree(buddy->num_free);
 
-err_out:
        return -ENOMEM;
 }
 
@@ -175,6 +184,7 @@ static void mthca_buddy_cleanup(struct mthca_buddy *buddy)
                kfree(buddy->bits[i]);
 
        kfree(buddy->bits);
+       kfree(buddy->num_free);
 }
 
 static u32 mthca_alloc_mtt_range(struct mthca_dev *dev, int order,
index 3a917c1..63462ec 100644 (file)
@@ -483,6 +483,7 @@ static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *eve
                break;
        case RDMA_CM_EVENT_DISCONNECTED:
        case RDMA_CM_EVENT_DEVICE_REMOVAL:
+       case RDMA_CM_EVENT_ADDR_CHANGE:
                iser_disconnected_handler(cma_id);
                break;
        default:
index 70dff94..04d5bc6 100644 (file)
@@ -67,6 +67,8 @@ enum {
        CMD_STAT_BAD_INDEX      = 0x0a,
        /* FW image corrupted: */
        CMD_STAT_BAD_NVMEM      = 0x0b,
+       /* Error in ICM mapping (e.g. not enough auxiliary ICM pages to execute command): */
+       CMD_STAT_ICM_ERROR      = 0x0c,
        /* Attempt to modify a QP/EE which is not in the presumed state: */
        CMD_STAT_BAD_QP_STATE   = 0x10,
        /* Bad segment parameters (Address/Size): */
@@ -119,6 +121,7 @@ static int mlx4_status_to_errno(u8 status)
                [CMD_STAT_BAD_RES_STATE]  = -EBADF,
                [CMD_STAT_BAD_INDEX]      = -EBADF,
                [CMD_STAT_BAD_NVMEM]      = -EFAULT,
+               [CMD_STAT_ICM_ERROR]      = -ENFILE,
                [CMD_STAT_BAD_QP_STATE]   = -EINVAL,
                [CMD_STAT_BAD_SEG_PARAM]  = -EFAULT,
                [CMD_STAT_REG_BOUND]      = -EBUSY,
index 2b5006b..5727822 100644 (file)
@@ -46,6 +46,10 @@ enum {
 extern void __buggy_use_of_MLX4_GET(void);
 extern void __buggy_use_of_MLX4_PUT(void);
 
+static int enable_qos;
+module_param(enable_qos, bool, 0444);
+MODULE_PARM_DESC(enable_qos, "Enable Quality of Service support in the HCA (default: off)");
+
 #define MLX4_GET(dest, source, offset)                               \
        do {                                                          \
                void *__p = (char *) (source) + (offset);             \
@@ -198,7 +202,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 #define QUERY_DEV_CAP_C_MPT_ENTRY_SZ_OFFSET    0x8e
 #define QUERY_DEV_CAP_MTT_ENTRY_SZ_OFFSET      0x90
 #define QUERY_DEV_CAP_D_MPT_ENTRY_SZ_OFFSET    0x92
-#define QUERY_DEV_CAP_BMME_FLAGS_OFFSET                0x97
+#define QUERY_DEV_CAP_BMME_FLAGS_OFFSET                0x94
 #define QUERY_DEV_CAP_RSVD_LKEY_OFFSET         0x98
 #define QUERY_DEV_CAP_MAX_ICM_SZ_OFFSET                0xa0
 
@@ -373,12 +377,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
                }
        }
 
-       if (dev_cap->bmme_flags & 1)
-               mlx4_dbg(dev, "Base MM extensions: yes "
-                        "(flags %d, rsvd L_Key %08x)\n",
-                        dev_cap->bmme_flags, dev_cap->reserved_lkey);
-       else
-               mlx4_dbg(dev, "Base MM extensions: no\n");
+       mlx4_dbg(dev, "Base MM extensions: flags %08x, rsvd L_Key %08x\n",
+                dev_cap->bmme_flags, dev_cap->reserved_lkey);
 
        /*
         * Each UAR has 4 EQ doorbells; so if a UAR is reserved, then
@@ -737,6 +737,10 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param)
        if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM)
                *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 3);
 
+       /* Enable QoS support if module parameter set */
+       if (enable_qos)
+               *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 2);
+
        /* QPC/EEC/CQC/EQC/RDMARC attributes */
 
        MLX4_PUT(inbox, param->qpc_base,      INIT_HCA_QPC_BASE_OFFSET);
index a0e046c..fbf0e22 100644 (file)
@@ -98,7 +98,7 @@ struct mlx4_dev_cap {
        int cmpt_entry_sz;
        int mtt_entry_sz;
        int resize_srq;
-       u bmme_flags;
+       u32 bmme_flags;
        u32 reserved_lkey;
        u64 max_icm_sz;
        int max_gso_sz;
index d373601..8e1d24c 100644 (file)
@@ -158,6 +158,8 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
        dev->caps.max_msg_sz         = dev_cap->max_msg_sz;
        dev->caps.page_size_cap      = ~(u32) (dev_cap->min_page_sz - 1);
        dev->caps.flags              = dev_cap->flags;
+       dev->caps.bmme_flags         = dev_cap->bmme_flags;
+       dev->caps.reserved_lkey      = dev_cap->reserved_lkey;
        dev->caps.stat_rate_support  = dev_cap->stat_rate_support;
        dev->caps.max_gso_sz         = dev_cap->max_gso_sz;
 
index a4023c2..7803849 100644 (file)
@@ -118,6 +118,7 @@ struct mlx4_bitmap {
 
 struct mlx4_buddy {
        unsigned long         **bits;
+       unsigned int           *num_free;
        int                     max_order;
        spinlock_t              lock;
 };
index 03a9abc..a3c04c5 100644 (file)
@@ -47,7 +47,7 @@ struct mlx4_mpt_entry {
        __be32 flags;
        __be32 qpn;
        __be32 key;
-       __be32 pd;
+       __be32 pd_flags;
        __be64 start;
        __be64 length;
        __be32 lkey;
@@ -61,11 +61,15 @@ struct mlx4_mpt_entry {
 } __attribute__((packed));
 
 #define MLX4_MPT_FLAG_SW_OWNS      (0xfUL << 28)
+#define MLX4_MPT_FLAG_FREE         (0x3UL << 28)
 #define MLX4_MPT_FLAG_MIO          (1 << 17)
 #define MLX4_MPT_FLAG_BIND_ENABLE   (1 << 15)
 #define MLX4_MPT_FLAG_PHYSICAL     (1 <<  9)
 #define MLX4_MPT_FLAG_REGION       (1 <<  8)
 
+#define MLX4_MPT_PD_FLAG_FAST_REG   (1 << 26)
+#define MLX4_MPT_PD_FLAG_EN_INV            (3 << 24)
+
 #define MLX4_MTT_FLAG_PRESENT          1
 
 #define MLX4_MPT_STATUS_SW             0xF0
@@ -79,23 +83,26 @@ static u32 mlx4_buddy_alloc(struct mlx4_buddy *buddy, int order)
 
        spin_lock(&buddy->lock);
 
-       for (o = order; o <= buddy->max_order; ++o) {
-               m = 1 << (buddy->max_order - o);
-               seg = find_first_bit(buddy->bits[o], m);
-               if (seg < m)
-                       goto found;
-       }
+       for (o = order; o <= buddy->max_order; ++o)
+               if (buddy->num_free[o]) {
+                       m = 1 << (buddy->max_order - o);
+                       seg = find_first_bit(buddy->bits[o], m);
+                       if (seg < m)
+                               goto found;
+               }
 
        spin_unlock(&buddy->lock);
        return -1;
 
  found:
        clear_bit(seg, buddy->bits[o]);
+       --buddy->num_free[o];
 
        while (o > order) {
                --o;
                seg <<= 1;
                set_bit(seg ^ 1, buddy->bits[o]);
+               ++buddy->num_free[o];
        }
 
        spin_unlock(&buddy->lock);
@@ -113,11 +120,13 @@ static void mlx4_buddy_free(struct mlx4_buddy *buddy, u32 seg, int order)
 
        while (test_bit(seg ^ 1, buddy->bits[order])) {
                clear_bit(seg ^ 1, buddy->bits[order]);
+               --buddy->num_free[order];
                seg >>= 1;
                ++order;
        }
 
        set_bit(seg, buddy->bits[order]);
+       ++buddy->num_free[order];
 
        spin_unlock(&buddy->lock);
 }
@@ -131,7 +140,9 @@ static int mlx4_buddy_init(struct mlx4_buddy *buddy, int max_order)
 
        buddy->bits = kzalloc((buddy->max_order + 1) * sizeof (long *),
                              GFP_KERNEL);
-       if (!buddy->bits)
+       buddy->num_free = kzalloc((buddy->max_order + 1) * sizeof (int *),
+                                 GFP_KERNEL);
+       if (!buddy->bits || !buddy->num_free)
                goto err_out;
 
        for (i = 0; i <= buddy->max_order; ++i) {
@@ -143,6 +154,7 @@ static int mlx4_buddy_init(struct mlx4_buddy *buddy, int max_order)
        }
 
        set_bit(0, buddy->bits[buddy->max_order]);
+       buddy->num_free[buddy->max_order] = 1;
 
        return 0;
 
@@ -150,9 +162,10 @@ err_out_free:
        for (i = 0; i <= buddy->max_order; ++i)
                kfree(buddy->bits[i]);
 
+err_out:
        kfree(buddy->bits);
+       kfree(buddy->num_free);
 
-err_out:
        return -ENOMEM;
 }
 
@@ -164,6 +177,7 @@ static void mlx4_buddy_cleanup(struct mlx4_buddy *buddy)
                kfree(buddy->bits[i]);
 
        kfree(buddy->bits);
+       kfree(buddy->num_free);
 }
 
 static u32 mlx4_alloc_mtt_range(struct mlx4_dev *dev, int order)
@@ -314,21 +328,30 @@ int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr)
 
        memset(mpt_entry, 0, sizeof *mpt_entry);
 
-       mpt_entry->flags = cpu_to_be32(MLX4_MPT_FLAG_SW_OWNS     |
-                                      MLX4_MPT_FLAG_MIO         |
+       mpt_entry->flags = cpu_to_be32(MLX4_MPT_FLAG_MIO         |
                                       MLX4_MPT_FLAG_REGION      |
                                       mr->access);
 
        mpt_entry->key         = cpu_to_be32(key_to_hw_index(mr->key));
-       mpt_entry->pd          = cpu_to_be32(mr->pd);
+       mpt_entry->pd_flags    = cpu_to_be32(mr->pd | MLX4_MPT_PD_FLAG_EN_INV);
        mpt_entry->start       = cpu_to_be64(mr->iova);
        mpt_entry->length      = cpu_to_be64(mr->size);
        mpt_entry->entity_size = cpu_to_be32(mr->mtt.page_shift);
+
        if (mr->mtt.order < 0) {
                mpt_entry->flags |= cpu_to_be32(MLX4_MPT_FLAG_PHYSICAL);
                mpt_entry->mtt_seg = 0;
-       } else
+       } else {
                mpt_entry->mtt_seg = cpu_to_be64(mlx4_mtt_addr(dev, &mr->mtt));
+       }
+
+       if (mr->mtt.order >= 0 && mr->mtt.page_shift == 0) {
+               /* fast register MR in free state */
+               mpt_entry->flags    |= cpu_to_be32(MLX4_MPT_FLAG_FREE);
+               mpt_entry->pd_flags |= cpu_to_be32(MLX4_MPT_PD_FLAG_FAST_REG);
+       } else {
+               mpt_entry->flags    |= cpu_to_be32(MLX4_MPT_FLAG_SW_OWNS);
+       }
 
        err = mlx4_SW2HW_MPT(dev, mailbox,
                             key_to_hw_index(mr->key) & (dev->caps.num_mpts - 1));
index 3a93c5f..aa61689 100644 (file)
@@ -91,6 +91,13 @@ EXPORT_SYMBOL_GPL(mlx4_uar_free);
 
 int mlx4_init_uar_table(struct mlx4_dev *dev)
 {
+       if (dev->caps.num_uars <= 128) {
+               mlx4_err(dev, "Only %d UAR pages (need more than 128)\n",
+                        dev->caps.num_uars);
+               mlx4_err(dev, "Increase firmware log2_uar_bar_megabytes?\n");
+               return -ENODEV;
+       }
+
        return mlx4_bitmap_init(&mlx4_priv(dev)->uar_table.bitmap,
                                dev->caps.num_uars, dev->caps.num_uars - 1,
                                max(128, dev->caps.reserved_uars));
index 81b3dd5..655ea0d 100644 (file)
@@ -68,6 +68,14 @@ enum {
        MLX4_DEV_CAP_FLAG_UD_MCAST      = 1 << 21
 };
 
+enum {
+       MLX4_BMME_FLAG_LOCAL_INV        = 1 <<  6,
+       MLX4_BMME_FLAG_REMOTE_INV       = 1 <<  7,
+       MLX4_BMME_FLAG_TYPE_2_WIN       = 1 <<  9,
+       MLX4_BMME_FLAG_RESERVED_LKEY    = 1 << 10,
+       MLX4_BMME_FLAG_FAST_REG_WR      = 1 << 11,
+};
+
 enum mlx4_event {
        MLX4_EVENT_TYPE_COMP               = 0x00,
        MLX4_EVENT_TYPE_PATH_MIG           = 0x01,
@@ -184,6 +192,8 @@ struct mlx4_caps {
        u32                     max_msg_sz;
        u32                     page_size_cap;
        u32                     flags;
+       u32                     bmme_flags;
+       u32                     reserved_lkey;
        u16                     stat_rate_support;
        u8                      port_width_cap[MLX4_MAX_PORTS + 1];
        int                     max_gso_sz;
index 7f128b2..e27082c 100644 (file)
@@ -219,7 +219,7 @@ struct mlx4_wqe_datagram_seg {
        __be32                  reservd[2];
 };
 
-struct mlx4_lso_seg {
+struct mlx4_wqe_lso_seg {
        __be32                  mss_hdr_size;
        __be32                  header[0];
 };
@@ -233,6 +233,14 @@ struct mlx4_wqe_bind_seg {
        __be64                  length;
 };
 
+enum {
+       MLX4_WQE_FMR_PERM_LOCAL_READ    = 1 << 27,
+       MLX4_WQE_FMR_PERM_LOCAL_WRITE   = 1 << 28,
+       MLX4_WQE_FMR_PERM_REMOTE_READ   = 1 << 29,
+       MLX4_WQE_FMR_PERM_REMOTE_WRITE  = 1 << 30,
+       MLX4_WQE_FMR_PERM_ATOMIC        = 1 << 31
+};
+
 struct mlx4_wqe_fmr_seg {
        __be32                  flags;
        __be32                  mem_key;
@@ -255,11 +263,11 @@ struct mlx4_wqe_fmr_ext_seg {
 };
 
 struct mlx4_wqe_local_inval_seg {
-       u8                      flags;
-       u8                      reserved1[3];
+       __be32                  flags;
+       u32                     reserved1;
        __be32                  mem_key;
-       u8                      reserved2[3];
-       u8                      guest_id;
+       u32                     reserved2[2];
+       __be32                  guest_id;
        __be64                  pa;
 };
 
index 22bb2e7..df7faf0 100644 (file)
@@ -57,7 +57,9 @@ enum rdma_cm_event_type {
        RDMA_CM_EVENT_DISCONNECTED,
        RDMA_CM_EVENT_DEVICE_REMOVAL,
        RDMA_CM_EVENT_MULTICAST_JOIN,
-       RDMA_CM_EVENT_MULTICAST_ERROR
+       RDMA_CM_EVENT_MULTICAST_ERROR,
+       RDMA_CM_EVENT_ADDR_CHANGE,
+       RDMA_CM_EVENT_TIMEWAIT_EXIT
 };
 
 enum rdma_port_space {