Merge master.kernel.org:/pub/scm/linux/kernel/git/gregkh/usb-2.6
authorLinus Torvalds <torvalds@woody.linux-foundation.org>
Thu, 12 Jul 2007 23:46:58 +0000 (16:46 -0700)
committerLinus Torvalds <torvalds@woody.linux-foundation.org>
Thu, 12 Jul 2007 23:46:58 +0000 (16:46 -0700)
* master.kernel.org:/pub/scm/linux/kernel/git/gregkh/usb-2.6: (149 commits)
  USB: ohci-pnx4008: Remove unnecessary cast of return value of kzalloc
  USB: additions to the quirk list
  usb-storage: implement autosuspend
  USB: cdc-acm: add new device id to option driver
  USB: goku_udc trivial cleanups
  USB: usb gadget stack can now -DDEBUG with Kconfig
  usb gadget stack: remove usb_ep_*_buffer(), part 2
  usb gadget stack: remove usb_ep_*_buffer(), part 1
  USB: pxa2xx_udc -- cleanups, mostly removing dma hooks
  USB: pxa2xx_udc: use generic gpio layer
  USB: quirk for samsung printer
  USB: usb/dma doc updates
  USB: drivers/usb/storage/unusual_devs.h whitespace cleanup
  USB: remove Makefile reference to obsolete OHCI_AT91
  USB: io_*: remove bogus termios no change checks
  USB: mos7720: remove bogus no termios change check
  USB: visor and whiteheat: remove bogus termios change checks
  USB: pl2303: remove bogus checks and fix speed support to use tty_get_baud_rate()
  USB: mos7840.c: turn this into a serial driver
  USB: make the usb_device numa_node get assigned from controller
  ...

102 files changed:
MAINTAINERS
drivers/infiniband/Kconfig
drivers/infiniband/core/agent.c
drivers/infiniband/core/cm.c
drivers/infiniband/core/cm_msgs.h
drivers/infiniband/core/cma.c
drivers/infiniband/core/mad.c
drivers/infiniband/core/multicast.c
drivers/infiniband/core/sa.h
drivers/infiniband/core/sa_query.c
drivers/infiniband/core/smi.c
drivers/infiniband/core/smi.h
drivers/infiniband/core/sysfs.c
drivers/infiniband/core/ucm.c
drivers/infiniband/core/umem.c
drivers/infiniband/hw/amso1100/Kconfig
drivers/infiniband/hw/cxgb3/Kconfig
drivers/infiniband/hw/cxgb3/cxio_hal.c
drivers/infiniband/hw/cxgb3/cxio_wr.h
drivers/infiniband/hw/cxgb3/iwch_cm.c
drivers/infiniband/hw/cxgb3/iwch_cm.h
drivers/infiniband/hw/cxgb3/iwch_provider.c
drivers/infiniband/hw/cxgb3/iwch_qp.c
drivers/infiniband/hw/ehca/Kconfig
drivers/infiniband/hw/ehca/ehca_av.c
drivers/infiniband/hw/ehca/ehca_classes.h
drivers/infiniband/hw/ehca/ehca_classes_pSeries.h
drivers/infiniband/hw/ehca/ehca_cq.c
drivers/infiniband/hw/ehca/ehca_hca.c
drivers/infiniband/hw/ehca/ehca_irq.c
drivers/infiniband/hw/ehca/ehca_irq.h
drivers/infiniband/hw/ehca/ehca_iverbs.h
drivers/infiniband/hw/ehca/ehca_main.c
drivers/infiniband/hw/ehca/ehca_qp.c
drivers/infiniband/hw/ehca/ehca_reqs.c
drivers/infiniband/hw/ehca/ehca_tools.h
drivers/infiniband/hw/ehca/ehca_uverbs.c
drivers/infiniband/hw/ehca/hcp_if.c
drivers/infiniband/hw/ehca/hcp_if.h
drivers/infiniband/hw/ehca/hipz_hw.h
drivers/infiniband/hw/ehca/ipz_pt_fn.h
drivers/infiniband/hw/ipath/Kconfig
drivers/infiniband/hw/ipath/ipath_common.h
drivers/infiniband/hw/ipath/ipath_cq.c
drivers/infiniband/hw/ipath/ipath_debug.h
drivers/infiniband/hw/ipath/ipath_diag.c
drivers/infiniband/hw/ipath/ipath_driver.c
drivers/infiniband/hw/ipath/ipath_eeprom.c
drivers/infiniband/hw/ipath/ipath_file_ops.c
drivers/infiniband/hw/ipath/ipath_fs.c
drivers/infiniband/hw/ipath/ipath_iba6110.c
drivers/infiniband/hw/ipath/ipath_iba6120.c
drivers/infiniband/hw/ipath/ipath_init_chip.c
drivers/infiniband/hw/ipath/ipath_intr.c
drivers/infiniband/hw/ipath/ipath_kernel.h
drivers/infiniband/hw/ipath/ipath_keys.c
drivers/infiniband/hw/ipath/ipath_layer.c
drivers/infiniband/hw/ipath/ipath_layer.h
drivers/infiniband/hw/ipath/ipath_mad.c
drivers/infiniband/hw/ipath/ipath_mmap.c
drivers/infiniband/hw/ipath/ipath_mr.c
drivers/infiniband/hw/ipath/ipath_qp.c
drivers/infiniband/hw/ipath/ipath_rc.c
drivers/infiniband/hw/ipath/ipath_registers.h
drivers/infiniband/hw/ipath/ipath_ruc.c
drivers/infiniband/hw/ipath/ipath_srq.c
drivers/infiniband/hw/ipath/ipath_stats.c
drivers/infiniband/hw/ipath/ipath_sysfs.c
drivers/infiniband/hw/ipath/ipath_uc.c
drivers/infiniband/hw/ipath/ipath_ud.c
drivers/infiniband/hw/ipath/ipath_user_pages.c
drivers/infiniband/hw/ipath/ipath_verbs.c
drivers/infiniband/hw/ipath/ipath_verbs.h
drivers/infiniband/hw/ipath/ipath_verbs_mcast.c
drivers/infiniband/hw/ipath/ipath_wc_ppc64.c
drivers/infiniband/hw/ipath/ipath_wc_x86_64.c
drivers/infiniband/hw/mlx4/Kconfig
drivers/infiniband/hw/mlx4/main.c
drivers/infiniband/hw/mlx4/mlx4_ib.h
drivers/infiniband/hw/mlx4/qp.c
drivers/infiniband/hw/mlx4/srq.c
drivers/infiniband/hw/mthca/Kconfig
drivers/infiniband/hw/mthca/mthca_allocator.c
drivers/infiniband/hw/mthca/mthca_eq.c
drivers/infiniband/ulp/ipoib/Kconfig
drivers/infiniband/ulp/ipoib/ipoib_cm.c
drivers/infiniband/ulp/ipoib/ipoib_ib.c
drivers/infiniband/ulp/iser/Kconfig
drivers/infiniband/ulp/srp/Kconfig
drivers/net/bnx2.c
drivers/net/cxgb3/version.h
drivers/net/mlx4/fw.c
drivers/net/mlx4/fw.h
drivers/net/mlx4/main.c
drivers/net/mlx4/mlx4.h
drivers/net/mlx4/qp.c
drivers/net/mlx4/srq.c
include/asm-ia64/pci.h
include/linux/mlx4/device.h
include/linux/mlx4/qp.h
include/rdma/ib_cm.h
include/rdma/ib_mad.h

index 83e9195..845fbf4 100644 (file)
@@ -370,7 +370,7 @@ P:  Tom Tucker
 M:     tom@opengridcomputing.com
 P:     Steve Wise
 M:     swise@opengridcomputing.com
-L:     openib-general@openib.org
+L:     general@lists.openfabrics.org
 S:     Maintained
 
 AOA (Apple Onboard Audio) ALSA DRIVER
@@ -1395,7 +1395,7 @@ P:        Hoang-Nam Nguyen
 M:     hnguyen@de.ibm.com
 P:     Christoph Raisch
 M:     raisch@de.ibm.com
-L:     openib-general@openib.org
+L:     general@lists.openfabrics.org
 S:     Supported
 
 EMU10K1 SOUND DRIVER
@@ -1849,8 +1849,8 @@ M:        rolandd@cisco.com
 P:     Sean Hefty
 M:     mshefty@ichips.intel.com
 P:     Hal Rosenstock
-M:     halr@voltaire.com
-L:     openib-general@openib.org
+M:     hal.rosenstock@gmail.com 
+L:     general@lists.openfabrics.org
 W:     http://www.openib.org/
 T:     git kernel.org:/pub/scm/linux/kernel/git/roland/infiniband.git
 S:     Supported
@@ -1988,9 +1988,10 @@ M:       jjciarla@raiz.uncu.edu.ar
 S:     Maintained
 
 IPATH DRIVER:
-P:     Bryan O'Sullivan
-M:     support@pathscale.com
-L:     openib-general@openib.org
+P:     Arthur Jones
+M:     infinipath@qlogic.com
+L:     general@lists.openfabrics.org
+T:     git git://git.qlogic.com/ipath-linux-2.6
 S:     Supported
 
 IPMI SUBSYSTEM
index 994decc..a193dfb 100644 (file)
@@ -1,14 +1,14 @@
-menu "InfiniBand support"
-       depends on HAS_IOMEM
-
-config INFINIBAND
-       depends on PCI || BROKEN
+menuconfig INFINIBAND
        tristate "InfiniBand support"
+       depends on PCI || BROKEN
+       depends on HAS_IOMEM
        ---help---
          Core support for InfiniBand (IB).  Make sure to also select
          any protocols you wish to use as well as drivers for your
          InfiniBand hardware.
 
+if INFINIBAND
+
 config INFINIBAND_USER_MAD
        tristate "InfiniBand userspace MAD support"
        depends on INFINIBAND
@@ -20,7 +20,6 @@ config INFINIBAND_USER_MAD
 
 config INFINIBAND_USER_ACCESS
        tristate "InfiniBand userspace access (verbs and CM)"
-       depends on INFINIBAND
        ---help---
          Userspace InfiniBand access support.  This enables the
          kernel side of userspace verbs and the userspace
@@ -37,7 +36,7 @@ config INFINIBAND_USER_MEM
 
 config INFINIBAND_ADDR_TRANS
        bool
-       depends on INFINIBAND && INET
+       depends on INET
        default y
 
 source "drivers/infiniband/hw/mthca/Kconfig"
@@ -54,4 +53,4 @@ source "drivers/infiniband/ulp/srp/Kconfig"
 
 source "drivers/infiniband/ulp/iser/Kconfig"
 
-endmenu
+endif # INFINIBAND
index ecd1a30..db2633e 100644 (file)
@@ -3,7 +3,7 @@
  * Copyright (c) 2004, 2005 Infinicon Corporation.  All rights reserved.
  * Copyright (c) 2004, 2005 Intel Corporation.  All rights reserved.
  * Copyright (c) 2004, 2005 Topspin Corporation.  All rights reserved.
- * Copyright (c) 2004, 2005 Voltaire Corporation.  All rights reserved.
+ * Copyright (c) 2004-2007 Voltaire Corporation.  All rights reserved.
  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -34,7 +34,6 @@
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  *
- * $Id: agent.c 1389 2004-12-27 22:56:47Z roland $
  */
 
 #include <linux/slab.h>
@@ -42,6 +41,7 @@
 
 #include "agent.h"
 #include "smi.h"
+#include "mad_priv.h"
 
 #define SPFX "ib_agent: "
 
@@ -87,8 +87,13 @@ int agent_send_response(struct ib_mad *mad, struct ib_grh *grh,
        struct ib_mad_send_buf *send_buf;
        struct ib_ah *ah;
        int ret;
+       struct ib_mad_send_wr_private *mad_send_wr;
+
+       if (device->node_type == RDMA_NODE_IB_SWITCH)
+               port_priv = ib_get_agent_port(device, 0);
+       else
+               port_priv = ib_get_agent_port(device, port_num);
 
-       port_priv = ib_get_agent_port(device, port_num);
        if (!port_priv) {
                printk(KERN_ERR SPFX "Unable to find port agent\n");
                return -ENODEV;
@@ -113,6 +118,14 @@ int agent_send_response(struct ib_mad *mad, struct ib_grh *grh,
 
        memcpy(send_buf->mad, mad, sizeof *mad);
        send_buf->ah = ah;
+
+       if (device->node_type == RDMA_NODE_IB_SWITCH) {
+               mad_send_wr = container_of(send_buf,
+                                          struct ib_mad_send_wr_private,
+                                          send_buf);
+               mad_send_wr->send_wr.wr.ud.port_num = port_num;
+       }
+
        if ((ret = ib_post_send_mad(send_buf, NULL))) {
                printk(KERN_ERR SPFX "ib_post_send_mad error:%d\n", ret);
                goto err2;
index 40c004a..9820c67 100644 (file)
@@ -87,6 +87,7 @@ struct cm_port {
 struct cm_device {
        struct list_head list;
        struct ib_device *device;
+       u8 ack_delay;
        struct cm_port port[0];
 };
 
@@ -95,7 +96,7 @@ struct cm_av {
        union ib_gid dgid;
        struct ib_ah_attr ah_attr;
        u16 pkey_index;
-       u8 packet_life_time;
+       u8 timeout;
 };
 
 struct cm_work {
@@ -154,6 +155,7 @@ struct cm_id_private {
        u8 retry_count;
        u8 rnr_retry_count;
        u8 service_timeout;
+       u8 target_ack_delay;
 
        struct list_head work_list;
        atomic_t work_count;
@@ -293,7 +295,7 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
        av->port = port;
        ib_init_ah_from_path(cm_dev->device, port->port_num, path,
                             &av->ah_attr);
-       av->packet_life_time = path->packet_life_time;
+       av->timeout = path->packet_life_time + 1;
        return 0;
 }
 
@@ -318,12 +320,10 @@ static int cm_alloc_id(struct cm_id_private *cm_id_priv)
 
 static void cm_free_id(__be32 local_id)
 {
-       unsigned long flags;
-
-       spin_lock_irqsave(&cm.lock, flags);
+       spin_lock_irq(&cm.lock);
        idr_remove(&cm.local_id_table,
                   (__force int) (local_id ^ cm.random_id_operand));
-       spin_unlock_irqrestore(&cm.lock, flags);
+       spin_unlock_irq(&cm.lock);
 }
 
 static struct cm_id_private * cm_get_id(__be32 local_id, __be32 remote_id)
@@ -345,11 +345,10 @@ static struct cm_id_private * cm_get_id(__be32 local_id, __be32 remote_id)
 static struct cm_id_private * cm_acquire_id(__be32 local_id, __be32 remote_id)
 {
        struct cm_id_private *cm_id_priv;
-       unsigned long flags;
 
-       spin_lock_irqsave(&cm.lock, flags);
+       spin_lock_irq(&cm.lock);
        cm_id_priv = cm_get_id(local_id, remote_id);
-       spin_unlock_irqrestore(&cm.lock, flags);
+       spin_unlock_irq(&cm.lock);
 
        return cm_id_priv;
 }
@@ -646,6 +645,25 @@ static inline int cm_convert_to_ms(int iba_time)
        return 1 << max(iba_time - 8, 0);
 }
 
+/*
+ * calculate: 4.096x2^ack_timeout = 4.096x2^ack_delay + 2x4.096x2^life_time
+ * Because of how ack_timeout is stored, adding one doubles the timeout.
+ * To avoid large timeouts, select the max(ack_delay, life_time + 1), and
+ * increment it (round up) only if the other is within 50%.
+ */
+static u8 cm_ack_timeout(u8 ca_ack_delay, u8 packet_life_time)
+{
+       int ack_timeout = packet_life_time + 1;
+
+       if (ack_timeout >= ca_ack_delay)
+               ack_timeout += (ca_ack_delay >= (ack_timeout - 1));
+       else
+               ack_timeout = ca_ack_delay +
+                             (ack_timeout >= (ca_ack_delay - 1));
+
+       return min(31, ack_timeout);
+}
+
 static void cm_cleanup_timewait(struct cm_timewait_info *timewait_info)
 {
        if (timewait_info->inserted_remote_id) {
@@ -689,7 +707,7 @@ static void cm_enter_timewait(struct cm_id_private *cm_id_priv)
         * timewait before notifying the user that we've exited timewait.
         */
        cm_id_priv->id.state = IB_CM_TIMEWAIT;
-       wait_time = cm_convert_to_ms(cm_id_priv->av.packet_life_time + 1);
+       wait_time = cm_convert_to_ms(cm_id_priv->av.timeout);
        queue_delayed_work(cm.wq, &cm_id_priv->timewait_info->work.work,
                           msecs_to_jiffies(wait_time));
        cm_id_priv->timewait_info = NULL;
@@ -713,31 +731,30 @@ static void cm_destroy_id(struct ib_cm_id *cm_id, int err)
 {
        struct cm_id_private *cm_id_priv;
        struct cm_work *work;
-       unsigned long flags;
 
        cm_id_priv = container_of(cm_id, struct cm_id_private, id);
 retest:
-       spin_lock_irqsave(&cm_id_priv->lock, flags);
+       spin_lock_irq(&cm_id_priv->lock);
        switch (cm_id->state) {
        case IB_CM_LISTEN:
                cm_id->state = IB_CM_IDLE;
-               spin_unlock_irqrestore(&cm_id_priv->lock, flags);
-               spin_lock_irqsave(&cm.lock, flags);
+               spin_unlock_irq(&cm_id_priv->lock);
+               spin_lock_irq(&cm.lock);
                rb_erase(&cm_id_priv->service_node, &cm.listen_service_table);
-               spin_unlock_irqrestore(&cm.lock, flags);
+               spin_unlock_irq(&cm.lock);
                break;
        case IB_CM_SIDR_REQ_SENT:
                cm_id->state = IB_CM_IDLE;
                ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
-               spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+               spin_unlock_irq(&cm_id_priv->lock);
                break;
        case IB_CM_SIDR_REQ_RCVD:
-               spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+               spin_unlock_irq(&cm_id_priv->lock);
                cm_reject_sidr_req(cm_id_priv, IB_SIDR_REJECT);
                break;
        case IB_CM_REQ_SENT:
                ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
-               spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+               spin_unlock_irq(&cm_id_priv->lock);
                ib_send_cm_rej(cm_id, IB_CM_REJ_TIMEOUT,
                               &cm_id_priv->id.device->node_guid,
                               sizeof cm_id_priv->id.device->node_guid,
@@ -747,9 +764,9 @@ retest:
                if (err == -ENOMEM) {
                        /* Do not reject to allow future retries. */
                        cm_reset_to_idle(cm_id_priv);
-                       spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+                       spin_unlock_irq(&cm_id_priv->lock);
                } else {
-                       spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+                       spin_unlock_irq(&cm_id_priv->lock);
                        ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED,
                                       NULL, 0, NULL, 0);
                }
@@ -762,25 +779,25 @@ retest:
        case IB_CM_MRA_REQ_SENT:
        case IB_CM_REP_RCVD:
        case IB_CM_MRA_REP_SENT:
-               spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+               spin_unlock_irq(&cm_id_priv->lock);
                ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED,
                               NULL, 0, NULL, 0);
                break;
        case IB_CM_ESTABLISHED:
-               spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+               spin_unlock_irq(&cm_id_priv->lock);
                ib_send_cm_dreq(cm_id, NULL, 0);
                goto retest;
        case IB_CM_DREQ_SENT:
                ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
                cm_enter_timewait(cm_id_priv);
-               spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+               spin_unlock_irq(&cm_id_priv->lock);
                break;
        case IB_CM_DREQ_RCVD:
-               spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+               spin_unlock_irq(&cm_id_priv->lock);
                ib_send_cm_drep(cm_id, NULL, 0);
                break;
        default:
-               spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+               spin_unlock_irq(&cm_id_priv->lock);
                break;
        }
 
@@ -912,7 +929,8 @@ static void cm_format_req(struct cm_req_msg *req_msg,
        cm_req_set_primary_sl(req_msg, param->primary_path->sl);
        cm_req_set_primary_subnet_local(req_msg, 1); /* local only... */
        cm_req_set_primary_local_ack_timeout(req_msg,
-               min(31, param->primary_path->packet_life_time + 1));
+               cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
+                              param->primary_path->packet_life_time));
 
        if (param->alternate_path) {
                req_msg->alt_local_lid = param->alternate_path->slid;
@@ -927,7 +945,8 @@ static void cm_format_req(struct cm_req_msg *req_msg,
                cm_req_set_alt_sl(req_msg, param->alternate_path->sl);
                cm_req_set_alt_subnet_local(req_msg, 1); /* local only... */
                cm_req_set_alt_local_ack_timeout(req_msg,
-                       min(31, param->alternate_path->packet_life_time + 1));
+                       cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
+                                      param->alternate_path->packet_life_time));
        }
 
        if (param->private_data && param->private_data_len)
@@ -1169,7 +1188,6 @@ static void cm_format_req_event(struct cm_work *work,
 static void cm_process_work(struct cm_id_private *cm_id_priv,
                            struct cm_work *work)
 {
-       unsigned long flags;
        int ret;
 
        /* We will typically only have the current event to report. */
@@ -1177,9 +1195,9 @@ static void cm_process_work(struct cm_id_private *cm_id_priv,
        cm_free_work(work);
 
        while (!ret && !atomic_add_negative(-1, &cm_id_priv->work_count)) {
-               spin_lock_irqsave(&cm_id_priv->lock, flags);
+               spin_lock_irq(&cm_id_priv->lock);
                work = cm_dequeue_work(cm_id_priv);
-               spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+               spin_unlock_irq(&cm_id_priv->lock);
                BUG_ON(!work);
                ret = cm_id_priv->id.cm_handler(&cm_id_priv->id,
                                                &work->cm_event);
@@ -1250,7 +1268,6 @@ static void cm_dup_req_handler(struct cm_work *work,
                               struct cm_id_private *cm_id_priv)
 {
        struct ib_mad_send_buf *msg = NULL;
-       unsigned long flags;
        int ret;
 
        /* Quick state check to discard duplicate REQs. */
@@ -1261,7 +1278,7 @@ static void cm_dup_req_handler(struct cm_work *work,
        if (ret)
                return;
 
-       spin_lock_irqsave(&cm_id_priv->lock, flags);
+       spin_lock_irq(&cm_id_priv->lock);
        switch (cm_id_priv->id.state) {
        case IB_CM_MRA_REQ_SENT:
                cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
@@ -1276,14 +1293,14 @@ static void cm_dup_req_handler(struct cm_work *work,
        default:
                goto unlock;
        }
-       spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+       spin_unlock_irq(&cm_id_priv->lock);
 
        ret = ib_post_send_mad(msg, NULL);
        if (ret)
                goto free;
        return;
 
-unlock:        spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+unlock:        spin_unlock_irq(&cm_id_priv->lock);
 free:  cm_free_msg(msg);
 }
 
@@ -1293,17 +1310,16 @@ static struct cm_id_private * cm_match_req(struct cm_work *work,
        struct cm_id_private *listen_cm_id_priv, *cur_cm_id_priv;
        struct cm_timewait_info *timewait_info;
        struct cm_req_msg *req_msg;
-       unsigned long flags;
 
        req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
 
        /* Check for possible duplicate REQ. */
-       spin_lock_irqsave(&cm.lock, flags);
+       spin_lock_irq(&cm.lock);
        timewait_info = cm_insert_remote_id(cm_id_priv->timewait_info);
        if (timewait_info) {
                cur_cm_id_priv = cm_get_id(timewait_info->work.local_id,
                                           timewait_info->work.remote_id);
-               spin_unlock_irqrestore(&cm.lock, flags);
+               spin_unlock_irq(&cm.lock);
                if (cur_cm_id_priv) {
                        cm_dup_req_handler(work, cur_cm_id_priv);
                        cm_deref_id(cur_cm_id_priv);
@@ -1315,7 +1331,7 @@ static struct cm_id_private * cm_match_req(struct cm_work *work,
        timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info);
        if (timewait_info) {
                cm_cleanup_timewait(cm_id_priv->timewait_info);
-               spin_unlock_irqrestore(&cm.lock, flags);
+               spin_unlock_irq(&cm.lock);
                cm_issue_rej(work->port, work->mad_recv_wc,
                             IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REQ,
                             NULL, 0);
@@ -1328,7 +1344,7 @@ static struct cm_id_private * cm_match_req(struct cm_work *work,
                                           req_msg->private_data);
        if (!listen_cm_id_priv) {
                cm_cleanup_timewait(cm_id_priv->timewait_info);
-               spin_unlock_irqrestore(&cm.lock, flags);
+               spin_unlock_irq(&cm.lock);
                cm_issue_rej(work->port, work->mad_recv_wc,
                             IB_CM_REJ_INVALID_SERVICE_ID, CM_MSG_RESPONSE_REQ,
                             NULL, 0);
@@ -1338,7 +1354,7 @@ static struct cm_id_private * cm_match_req(struct cm_work *work,
        atomic_inc(&cm_id_priv->refcount);
        cm_id_priv->id.state = IB_CM_REQ_RCVD;
        atomic_inc(&cm_id_priv->work_count);
-       spin_unlock_irqrestore(&cm.lock, flags);
+       spin_unlock_irq(&cm.lock);
 out:
        return listen_cm_id_priv;
 }
@@ -1440,7 +1456,8 @@ static void cm_format_rep(struct cm_rep_msg *rep_msg,
        cm_rep_set_starting_psn(rep_msg, cpu_to_be32(param->starting_psn));
        rep_msg->resp_resources = param->responder_resources;
        rep_msg->initiator_depth = param->initiator_depth;
-       cm_rep_set_target_ack_delay(rep_msg, param->target_ack_delay);
+       cm_rep_set_target_ack_delay(rep_msg,
+                                   cm_id_priv->av.port->cm_dev->ack_delay);
        cm_rep_set_failover(rep_msg, param->failover_accepted);
        cm_rep_set_flow_ctrl(rep_msg, param->flow_control);
        cm_rep_set_rnr_retry_count(rep_msg, param->rnr_retry_count);
@@ -1591,7 +1608,6 @@ static void cm_dup_rep_handler(struct cm_work *work)
        struct cm_id_private *cm_id_priv;
        struct cm_rep_msg *rep_msg;
        struct ib_mad_send_buf *msg = NULL;
-       unsigned long flags;
        int ret;
 
        rep_msg = (struct cm_rep_msg *) work->mad_recv_wc->recv_buf.mad;
@@ -1604,7 +1620,7 @@ static void cm_dup_rep_handler(struct cm_work *work)
        if (ret)
                goto deref;
 
-       spin_lock_irqsave(&cm_id_priv->lock, flags);
+       spin_lock_irq(&cm_id_priv->lock);
        if (cm_id_priv->id.state == IB_CM_ESTABLISHED)
                cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv,
                              cm_id_priv->private_data,
@@ -1616,14 +1632,14 @@ static void cm_dup_rep_handler(struct cm_work *work)
                              cm_id_priv->private_data_len);
        else
                goto unlock;
-       spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+       spin_unlock_irq(&cm_id_priv->lock);
 
        ret = ib_post_send_mad(msg, NULL);
        if (ret)
                goto free;
        goto deref;
 
-unlock:        spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+unlock:        spin_unlock_irq(&cm_id_priv->lock);
 free:  cm_free_msg(msg);
 deref: cm_deref_id(cm_id_priv);
 }
@@ -1632,7 +1648,6 @@ static int cm_rep_handler(struct cm_work *work)
 {
        struct cm_id_private *cm_id_priv;
        struct cm_rep_msg *rep_msg;
-       unsigned long flags;
        int ret;
 
        rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad;
@@ -1644,13 +1659,13 @@ static int cm_rep_handler(struct cm_work *work)
 
        cm_format_rep_event(work);
 
-       spin_lock_irqsave(&cm_id_priv->lock, flags);
+       spin_lock_irq(&cm_id_priv->lock);
        switch (cm_id_priv->id.state) {
        case IB_CM_REQ_SENT:
        case IB_CM_MRA_REQ_RCVD:
                break;
        default:
-               spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+               spin_unlock_irq(&cm_id_priv->lock);
                ret = -EINVAL;
                goto error;
        }
@@ -1663,7 +1678,7 @@ static int cm_rep_handler(struct cm_work *work)
        /* Check for duplicate REP. */
        if (cm_insert_remote_id(cm_id_priv->timewait_info)) {
                spin_unlock(&cm.lock);
-               spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+               spin_unlock_irq(&cm_id_priv->lock);
                ret = -EINVAL;
                goto error;
        }
@@ -1673,7 +1688,7 @@ static int cm_rep_handler(struct cm_work *work)
                         &cm.remote_id_table);
                cm_id_priv->timewait_info->inserted_remote_id = 0;
                spin_unlock(&cm.lock);
-               spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+               spin_unlock_irq(&cm_id_priv->lock);
                cm_issue_rej(work->port, work->mad_recv_wc,
                             IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REP,
                             NULL, 0);
@@ -1689,6 +1704,13 @@ static int cm_rep_handler(struct cm_work *work)
        cm_id_priv->responder_resources = rep_msg->initiator_depth;
        cm_id_priv->sq_psn = cm_rep_get_starting_psn(rep_msg);
        cm_id_priv->rnr_retry_count = cm_rep_get_rnr_retry_count(rep_msg);
+       cm_id_priv->target_ack_delay = cm_rep_get_target_ack_delay(rep_msg);
+       cm_id_priv->av.timeout =
+                       cm_ack_timeout(cm_id_priv->target_ack_delay,
+                                      cm_id_priv->av.timeout - 1);
+       cm_id_priv->alt_av.timeout =
+                       cm_ack_timeout(cm_id_priv->target_ack_delay,
+                                      cm_id_priv->alt_av.timeout - 1);
 
        /* todo: handle peer_to_peer */
 
@@ -1696,7 +1718,7 @@ static int cm_rep_handler(struct cm_work *work)
        ret = atomic_inc_and_test(&cm_id_priv->work_count);
        if (!ret)
                list_add_tail(&work->list, &cm_id_priv->work_list);
-       spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+       spin_unlock_irq(&cm_id_priv->lock);
 
        if (ret)
                cm_process_work(cm_id_priv, work);
@@ -1712,7 +1734,6 @@ error:
 static int cm_establish_handler(struct cm_work *work)
 {
        struct cm_id_private *cm_id_priv;
-       unsigned long flags;
        int ret;
 
        /* See comment in cm_establish about lookup. */
@@ -1720,9 +1741,9 @@ static int cm_establish_handler(struct cm_work *work)
        if (!cm_id_priv)
                return -EINVAL;
 
-       spin_lock_irqsave(&cm_id_priv->lock, flags);
+       spin_lock_irq(&cm_id_priv->lock);
        if (cm_id_priv->id.state != IB_CM_ESTABLISHED) {
-               spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+               spin_unlock_irq(&cm_id_priv->lock);
                goto out;
        }
 
@@ -1730,7 +1751,7 @@ static int cm_establish_handler(struct cm_work *work)
        ret = atomic_inc_and_test(&cm_id_priv->work_count);
        if (!ret)
                list_add_tail(&work->list, &cm_id_priv->work_list);
-       spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+       spin_unlock_irq(&cm_id_priv->lock);
 
        if (ret)
                cm_process_work(cm_id_priv, work);
@@ -1746,7 +1767,6 @@ static int cm_rtu_handler(struct cm_work *work)
 {
        struct cm_id_private *cm_id_priv;
        struct cm_rtu_msg *rtu_msg;
-       unsigned long flags;
        int ret;
 
        rtu_msg = (struct cm_rtu_msg *)work->mad_recv_wc->recv_buf.mad;
@@ -1757,10 +1777,10 @@ static int cm_rtu_handler(struct cm_work *work)
 
        work->cm_event.private_data = &rtu_msg->private_data;
 
-       spin_lock_irqsave(&cm_id_priv->lock, flags);
+       spin_lock_irq(&cm_id_priv->lock);
        if (cm_id_priv->id.state != IB_CM_REP_SENT &&
            cm_id_priv->id.state != IB_CM_MRA_REP_RCVD) {
-               spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+               spin_unlock_irq(&cm_id_priv->lock);
                goto out;
        }
        cm_id_priv->id.state = IB_CM_ESTABLISHED;
@@ -1769,7 +1789,7 @@ static int cm_rtu_handler(struct cm_work *work)
        ret = atomic_inc_and_test(&cm_id_priv->work_count);
        if (!ret)
                list_add_tail(&work->list, &cm_id_priv->work_list);
-       spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+       spin_unlock_irq(&cm_id_priv->lock);
 
        if (ret)
                cm_process_work(cm_id_priv, work);
@@ -1932,7 +1952,6 @@ static int cm_dreq_handler(struct cm_work *work)
        struct cm_id_private *cm_id_priv;
        struct cm_dreq_msg *dreq_msg;
        struct ib_mad_send_buf *msg = NULL;
-       unsigned long flags;
        int ret;
 
        dreq_msg = (struct cm_dreq_msg *)work->mad_recv_wc->recv_buf.mad;
@@ -1945,7 +1964,7 @@ static int cm_dreq_handler(struct cm_work *work)
 
        work->cm_event.private_data = &dreq_msg->private_data;
 
-       spin_lock_irqsave(&cm_id_priv->lock, flags);
+       spin_lock_irq(&cm_id_priv->lock);
        if (cm_id_priv->local_qpn != cm_dreq_get_remote_qpn(dreq_msg))
                goto unlock;
 
@@ -1964,7 +1983,7 @@ static int cm_dreq_handler(struct cm_work *work)
                cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv,
                               cm_id_priv->private_data,
                               cm_id_priv->private_data_len);
-               spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+               spin_unlock_irq(&cm_id_priv->lock);
 
                if (ib_post_send_mad(msg, NULL))
                        cm_free_msg(msg);
@@ -1977,7 +1996,7 @@ static int cm_dreq_handler(struct cm_work *work)
        ret = atomic_inc_and_test(&cm_id_priv->work_count);
        if (!ret)
                list_add_tail(&work->list, &cm_id_priv->work_list);
-       spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+       spin_unlock_irq(&cm_id_priv->lock);
 
        if (ret)
                cm_process_work(cm_id_priv, work);
@@ -1985,7 +2004,7 @@ static int cm_dreq_handler(struct cm_work *work)
                cm_deref_id(cm_id_priv);
        return 0;
 
-unlock:        spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+unlock:        spin_unlock_irq(&cm_id_priv->lock);
 deref: cm_deref_id(cm_id_priv);
        return -EINVAL;
 }
@@ -1994,7 +2013,6 @@ static int cm_drep_handler(struct cm_work *work)
 {
        struct cm_id_private *cm_id_priv;
        struct cm_drep_msg *drep_msg;
-       unsigned long flags;
        int ret;
 
        drep_msg = (struct cm_drep_msg *)work->mad_recv_wc->recv_buf.mad;
@@ -2005,10 +2023,10 @@ static int cm_drep_handler(struct cm_work *work)
 
        work->cm_event.private_data = &drep_msg->private_data;
 
-       spin_lock_irqsave(&cm_id_priv->lock, flags);
+       spin_lock_irq(&cm_id_priv->lock);
        if (cm_id_priv->id.state != IB_CM_DREQ_SENT &&
            cm_id_priv->id.state != IB_CM_DREQ_RCVD) {
-               spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+               spin_unlock_irq(&cm_id_priv->lock);
                goto out;
        }
        cm_enter_timewait(cm_id_priv);
@@ -2017,7 +2035,7 @@ static int cm_drep_handler(struct cm_work *work)
        ret = atomic_inc_and_test(&cm_id_priv->work_count);
        if (!ret)
                list_add_tail(&work->list, &cm_id_priv->work_list);
-       spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+       spin_unlock_irq(&cm_id_priv->lock);
 
        if (ret)
                cm_process_work(cm_id_priv, work);
@@ -2107,17 +2125,16 @@ static struct cm_id_private * cm_acquire_rejected_id(struct cm_rej_msg *rej_msg)
 {
        struct cm_timewait_info *timewait_info;
        struct cm_id_private *cm_id_priv;
-       unsigned long flags;
        __be32 remote_id;
 
        remote_id = rej_msg->local_comm_id;
 
        if (__be16_to_cpu(rej_msg->reason) == IB_CM_REJ_TIMEOUT) {
-               spin_lock_irqsave(&cm.lock, flags);
+               spin_lock_irq(&cm.lock);
                timewait_info = cm_find_remote_id( *((__be64 *) rej_msg->ari),
                                                  remote_id);
                if (!timewait_info) {
-                       spin_unlock_irqrestore(&cm.lock, flags);
+                       spin_unlock_irq(&cm.lock);
                        return NULL;
                }
                cm_id_priv = idr_find(&cm.local_id_table, (__force int)
@@ -2129,7 +2146,7 @@ static struct cm_id_private * cm_acquire_rejected_id(struct cm_rej_msg *rej_msg)
                        else
                                cm_id_priv = NULL;
                }
-               spin_unlock_irqrestore(&cm.lock, flags);
+               spin_unlock_irq(&cm.lock);
        } else if (cm_rej_get_msg_rejected(rej_msg) == CM_MSG_RESPONSE_REQ)
                cm_id_priv = cm_acquire_id(rej_msg->remote_comm_id, 0);
        else
@@ -2142,7 +2159,6 @@ static int cm_rej_handler(struct cm_work *work)
 {
        struct cm_id_private *cm_id_priv;
        struct cm_rej_msg *rej_msg;
-       unsigned long flags;
        int ret;
 
        rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad;
@@ -2152,7 +2168,7 @@ static int cm_rej_handler(struct cm_work *work)
 
        cm_format_rej_event(work);
 
-       spin_lock_irqsave(&cm_id_priv->lock, flags);
+       spin_lock_irq(&cm_id_priv->lock);
        switch (cm_id_priv->id.state) {
        case IB_CM_REQ_SENT:
        case IB_CM_MRA_REQ_RCVD:
@@ -2176,7 +2192,7 @@ static int cm_rej_handler(struct cm_work *work)
                cm_enter_timewait(cm_id_priv);
                break;
        default:
-               spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+               spin_unlock_irq(&cm_id_priv->lock);
                ret = -EINVAL;
                goto out;
        }
@@ -2184,7 +2200,7 @@ static int cm_rej_handler(struct cm_work *work)
        ret = atomic_inc_and_test(&cm_id_priv->work_count);
        if (!ret)
                list_add_tail(&work->list, &cm_id_priv->work_list);
-       spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+       spin_unlock_irq(&cm_id_priv->lock);
 
        if (ret)
                cm_process_work(cm_id_priv, work);
@@ -2295,7 +2311,6 @@ static int cm_mra_handler(struct cm_work *work)
 {
        struct cm_id_private *cm_id_priv;
        struct cm_mra_msg *mra_msg;
-       unsigned long flags;
        int timeout, ret;
 
        mra_msg = (struct cm_mra_msg *)work->mad_recv_wc->recv_buf.mad;
@@ -2307,9 +2322,9 @@ static int cm_mra_handler(struct cm_work *work)
        work->cm_event.param.mra_rcvd.service_timeout =
                                        cm_mra_get_service_timeout(mra_msg);
        timeout = cm_convert_to_ms(cm_mra_get_service_timeout(mra_msg)) +
-                 cm_convert_to_ms(cm_id_priv->av.packet_life_time);
+                 cm_convert_to_ms(cm_id_priv->av.timeout);
 
-       spin_lock_irqsave(&cm_id_priv->lock, flags);
+       spin_lock_irq(&cm_id_priv->lock);
        switch (cm_id_priv->id.state) {
        case IB_CM_REQ_SENT:
                if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REQ ||
@@ -2342,7 +2357,7 @@ static int cm_mra_handler(struct cm_work *work)
        ret = atomic_inc_and_test(&cm_id_priv->work_count);
        if (!ret)
                list_add_tail(&work->list, &cm_id_priv->work_list);
-       spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+       spin_unlock_irq(&cm_id_priv->lock);
 
        if (ret)
                cm_process_work(cm_id_priv, work);
@@ -2350,7 +2365,7 @@ static int cm_mra_handler(struct cm_work *work)
                cm_deref_id(cm_id_priv);
        return 0;
 out:
-       spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+       spin_unlock_irq(&cm_id_priv->lock);
        cm_deref_id(cm_id_priv);
        return -EINVAL;
 }
@@ -2379,7 +2394,8 @@ static void cm_format_lap(struct cm_lap_msg *lap_msg,
        cm_lap_set_sl(lap_msg, alternate_path->sl);
        cm_lap_set_subnet_local(lap_msg, 1); /* local only... */
        cm_lap_set_local_ack_timeout(lap_msg,
-               min(31, alternate_path->packet_life_time + 1));
+               cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
+                              alternate_path->packet_life_time));
 
        if (private_data && private_data_len)
                memcpy(lap_msg->private_data, private_data, private_data_len);
@@ -2410,6 +2426,9 @@ int ib_send_cm_lap(struct ib_cm_id *cm_id,
        ret = cm_init_av_by_path(alternate_path, &cm_id_priv->alt_av);
        if (ret)
                goto out;
+       cm_id_priv->alt_av.timeout =
+                       cm_ack_timeout(cm_id_priv->target_ack_delay,
+                                      cm_id_priv->alt_av.timeout - 1);
 
        ret = cm_alloc_msg(cm_id_priv, &msg);
        if (ret)
@@ -2465,7 +2484,6 @@ static int cm_lap_handler(struct cm_work *work)
        struct cm_lap_msg *lap_msg;
        struct ib_cm_lap_event_param *param;
        struct ib_mad_send_buf *msg = NULL;
-       unsigned long flags;
        int ret;
 
        /* todo: verify LAP request and send reject APR if invalid. */
@@ -2480,7 +2498,7 @@ static int cm_lap_handler(struct cm_work *work)
        cm_format_path_from_lap(cm_id_priv, param->alternate_path, lap_msg);
        work->cm_event.private_data = &lap_msg->private_data;
 
-       spin_lock_irqsave(&cm_id_priv->lock, flags);
+       spin_lock_irq(&cm_id_priv->lock);
        if (cm_id_priv->id.state != IB_CM_ESTABLISHED)
                goto unlock;
 
@@ -2497,7 +2515,7 @@ static int cm_lap_handler(struct cm_work *work)
                              cm_id_priv->service_timeout,
                              cm_id_priv->private_data,
                              cm_id_priv->private_data_len);
-               spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+               spin_unlock_irq(&cm_id_priv->lock);
 
                if (ib_post_send_mad(msg, NULL))
                        cm_free_msg(msg);
@@ -2515,7 +2533,7 @@ static int cm_lap_handler(struct cm_work *work)
        ret = atomic_inc_and_test(&cm_id_priv->work_count);
        if (!ret)
                list_add_tail(&work->list, &cm_id_priv->work_list);
-       spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+       spin_unlock_irq(&cm_id_priv->lock);
 
        if (ret)
                cm_process_work(cm_id_priv, work);
@@ -2523,7 +2541,7 @@ static int cm_lap_handler(struct cm_work *work)
                cm_deref_id(cm_id_priv);
        return 0;
 
-unlock:        spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+unlock:        spin_unlock_irq(&cm_id_priv->lock);
 deref: cm_deref_id(cm_id_priv);
        return -EINVAL;
 }
@@ -2598,7 +2616,6 @@ static int cm_apr_handler(struct cm_work *work)
 {
        struct cm_id_private *cm_id_priv;
        struct cm_apr_msg *apr_msg;
-       unsigned long flags;
        int ret;
 
        apr_msg = (struct cm_apr_msg *)work->mad_recv_wc->recv_buf.mad;
@@ -2612,11 +2629,11 @@ static int cm_apr_handler(struct cm_work *work)
        work->cm_event.param.apr_rcvd.info_len = apr_msg->info_length;
        work->cm_event.private_data = &apr_msg->private_data;
 
-       spin_lock_irqsave(&cm_id_priv->lock, flags);
+       spin_lock_irq(&cm_id_priv->lock);
        if (cm_id_priv->id.state != IB_CM_ESTABLISHED ||
            (cm_id_priv->id.lap_state != IB_CM_LAP_SENT &&
             cm_id_priv->id.lap_state != IB_CM_MRA_LAP_RCVD)) {
-               spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+               spin_unlock_irq(&cm_id_priv->lock);
                goto out;
        }
        cm_id_priv->id.lap_state = IB_CM_LAP_IDLE;
@@ -2626,7 +2643,7 @@ static int cm_apr_handler(struct cm_work *work)
        ret = atomic_inc_and_test(&cm_id_priv->work_count);
        if (!ret)
                list_add_tail(&work->list, &cm_id_priv->work_list);
-       spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+       spin_unlock_irq(&cm_id_priv->lock);
 
        if (ret)
                cm_process_work(cm_id_priv, work);
@@ -2761,7 +2778,6 @@ static int cm_sidr_req_handler(struct cm_work *work)
        struct cm_id_private *cm_id_priv, *cur_cm_id_priv;
        struct cm_sidr_req_msg *sidr_req_msg;
        struct ib_wc *wc;
-       unsigned long flags;
 
        cm_id = ib_create_cm_id(work->port->cm_dev->device, NULL, NULL);
        if (IS_ERR(cm_id))
@@ -2778,27 +2794,26 @@ static int cm_sidr_req_handler(struct cm_work *work)
                                work->mad_recv_wc->recv_buf.grh,
                                &cm_id_priv->av);
        cm_id_priv->id.remote_id = sidr_req_msg->request_id;
-       cm_id_priv->id.state = IB_CM_SIDR_REQ_RCVD;
        cm_id_priv->tid = sidr_req_msg->hdr.tid;
        atomic_inc(&cm_id_priv->work_count);
 
-       spin_lock_irqsave(&cm.lock, flags);
+       spin_lock_irq(&cm.lock);
        cur_cm_id_priv = cm_insert_remote_sidr(cm_id_priv);
        if (cur_cm_id_priv) {
-               spin_unlock_irqrestore(&cm.lock, flags);
+               spin_unlock_irq(&cm.lock);
                goto out; /* Duplicate message. */
        }
+       cm_id_priv->id.state = IB_CM_SIDR_REQ_RCVD;
        cur_cm_id_priv = cm_find_listen(cm_id->device,
                                        sidr_req_msg->service_id,
                                        sidr_req_msg->private_data);
        if (!cur_cm_id_priv) {
-               rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
-               spin_unlock_irqrestore(&cm.lock, flags);
-               /* todo: reply with no match */
+               spin_unlock_irq(&cm.lock);
+               cm_reject_sidr_req(cm_id_priv, IB_SIDR_UNSUPPORTED);
                goto out; /* No match. */
        }
        atomic_inc(&cur_cm_id_priv->refcount);
-       spin_unlock_irqrestore(&cm.lock, flags);
+       spin_unlock_irq(&cm.lock);
 
        cm_id_priv->id.cm_handler = cur_cm_id_priv->id.cm_handler;
        cm_id_priv->id.context = cur_cm_id_priv->id.context;
@@ -2899,7 +2914,6 @@ static int cm_sidr_rep_handler(struct cm_work *work)
 {
        struct cm_sidr_rep_msg *sidr_rep_msg;
        struct cm_id_private *cm_id_priv;
-       unsigned long flags;
 
        sidr_rep_msg = (struct cm_sidr_rep_msg *)
                                work->mad_recv_wc->recv_buf.mad;
@@ -2907,14 +2921,14 @@ static int cm_sidr_rep_handler(struct cm_work *work)
        if (!cm_id_priv)
                return -EINVAL; /* Unmatched reply. */
 
-       spin_lock_irqsave(&cm_id_priv->lock, flags);
+       spin_lock_irq(&cm_id_priv->lock);
        if (cm_id_priv->id.state != IB_CM_SIDR_REQ_SENT) {
-               spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+               spin_unlock_irq(&cm_id_priv->lock);
                goto out;
        }
        cm_id_priv->id.state = IB_CM_IDLE;
        ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
-       spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+       spin_unlock_irq(&cm_id_priv->lock);
 
        cm_format_sidr_rep_event(work);
        cm_process_work(cm_id_priv, work);
@@ -2930,14 +2944,13 @@ static void cm_process_send_error(struct ib_mad_send_buf *msg,
        struct cm_id_private *cm_id_priv;
        struct ib_cm_event cm_event;
        enum ib_cm_state state;
-       unsigned long flags;
        int ret;
 
        memset(&cm_event, 0, sizeof cm_event);
        cm_id_priv = msg->context[0];
 
        /* Discard old sends or ones without a response. */
-       spin_lock_irqsave(&cm_id_priv->lock, flags);
+       spin_lock_irq(&cm_id_priv->lock);
        state = (enum ib_cm_state) (unsigned long) msg->context[1];
        if (msg != cm_id_priv->msg || state != cm_id_priv->id.state)
                goto discard;
@@ -2964,7 +2977,7 @@ static void cm_process_send_error(struct ib_mad_send_buf *msg,
        default:
                goto discard;
        }
-       spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+       spin_unlock_irq(&cm_id_priv->lock);
        cm_event.param.send_status = wc_status;
 
        /* No other events can occur on the cm_id at this point. */
@@ -2974,7 +2987,7 @@ static void cm_process_send_error(struct ib_mad_send_buf *msg,
                ib_destroy_cm_id(&cm_id_priv->id);
        return;
 discard:
-       spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+       spin_unlock_irq(&cm_id_priv->lock);
        cm_free_msg(msg);
 }
 
@@ -3269,8 +3282,7 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
                        *qp_attr_mask |= IB_QP_ALT_PATH;
                        qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num;
                        qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index;
-                       qp_attr->alt_timeout =
-                                       cm_id_priv->alt_av.packet_life_time + 1;
+                       qp_attr->alt_timeout = cm_id_priv->alt_av.timeout;
                        qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr;
                }
                ret = 0;
@@ -3308,8 +3320,7 @@ static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv,
                                *qp_attr_mask |= IB_QP_TIMEOUT | IB_QP_RETRY_CNT |
                                                 IB_QP_RNR_RETRY |
                                                 IB_QP_MAX_QP_RD_ATOMIC;
-                               qp_attr->timeout =
-                                       cm_id_priv->av.packet_life_time + 1;
+                               qp_attr->timeout = cm_id_priv->av.timeout;
                                qp_attr->retry_cnt = cm_id_priv->retry_count;
                                qp_attr->rnr_retry = cm_id_priv->rnr_retry_count;
                                qp_attr->max_rd_atomic =
@@ -3323,8 +3334,7 @@ static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv,
                        *qp_attr_mask = IB_QP_ALT_PATH | IB_QP_PATH_MIG_STATE;
                        qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num;
                        qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index;
-                       qp_attr->alt_timeout =
-                               cm_id_priv->alt_av.packet_life_time + 1;
+                       qp_attr->alt_timeout = cm_id_priv->alt_av.timeout;
                        qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr;
                        qp_attr->path_mig_state = IB_MIG_REARM;
                }
@@ -3364,6 +3374,16 @@ int ib_cm_init_qp_attr(struct ib_cm_id *cm_id,
 }
 EXPORT_SYMBOL(ib_cm_init_qp_attr);
 
+void cm_get_ack_delay(struct cm_device *cm_dev)
+{
+       struct ib_device_attr attr;
+
+       if (ib_query_device(cm_dev->device, &attr))
+               cm_dev->ack_delay = 0; /* acks will rely on packet life time */
+       else
+               cm_dev->ack_delay = attr.local_ca_ack_delay;
+}
+
 static void cm_add_one(struct ib_device *device)
 {
        struct cm_device *cm_dev;
@@ -3388,6 +3408,7 @@ static void cm_add_one(struct ib_device *device)
                return;
 
        cm_dev->device = device;
+       cm_get_ack_delay(cm_dev);
 
        set_bit(IB_MGMT_METHOD_SEND, reg_req.method_mask);
        for (i = 1; i <= device->phys_port_cnt; i++) {
index 4d3aee9..aec9c7a 100644 (file)
@@ -35,6 +35,7 @@
 #define CM_MSGS_H
 
 #include <rdma/ib_mad.h>
+#include <rdma/ib_cm.h>
 
 /*
  * Parameters to routines below should be in network-byte order, and values
index 32a0e66..23af7a0 100644 (file)
@@ -2326,7 +2326,6 @@ static int cma_accept_ib(struct rdma_id_private *id_priv,
        rep.private_data_len = conn_param->private_data_len;
        rep.responder_resources = conn_param->responder_resources;
        rep.initiator_depth = conn_param->initiator_depth;
-       rep.target_ack_delay = CMA_CM_RESPONSE_TIMEOUT;
        rep.failover_accepted = 0;
        rep.flow_control = conn_param->flow_control;
        rep.rnr_retry_count = conn_param->rnr_retry_count;
index 85ccf13..6b8faca 100644 (file)
@@ -675,10 +675,16 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
        struct ib_mad_port_private *port_priv;
        struct ib_mad_agent_private *recv_mad_agent = NULL;
        struct ib_device *device = mad_agent_priv->agent.device;
-       u8 port_num = mad_agent_priv->agent.port_num;
+       u8 port_num;
        struct ib_wc mad_wc;
        struct ib_send_wr *send_wr = &mad_send_wr->send_wr;
 
+       if (device->node_type == RDMA_NODE_IB_SWITCH &&
+           smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
+               port_num = send_wr->wr.ud.port_num;
+       else
+               port_num = mad_agent_priv->agent.port_num;
+
        /*
         * Directed route handling starts if the initial LID routed part of
         * a request or the ending LID routed part of a response is empty.
@@ -1839,6 +1845,7 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv,
        struct ib_mad_private *recv, *response;
        struct ib_mad_list_head *mad_list;
        struct ib_mad_agent_private *mad_agent;
+       int port_num;
 
        response = kmem_cache_alloc(ib_mad_cache, GFP_KERNEL);
        if (!response)
@@ -1872,25 +1879,50 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv,
        if (!validate_mad(&recv->mad.mad, qp_info->qp->qp_num))
                goto out;
 
+       if (port_priv->device->node_type == RDMA_NODE_IB_SWITCH)
+               port_num = wc->port_num;
+       else
+               port_num = port_priv->port_num;
+
        if (recv->mad.mad.mad_hdr.mgmt_class ==
            IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
+               enum smi_forward_action retsmi;
+
                if (smi_handle_dr_smp_recv(&recv->mad.smp,
                                           port_priv->device->node_type,
-                                          port_priv->port_num,
+                                          port_num,
                                           port_priv->device->phys_port_cnt) ==
                                           IB_SMI_DISCARD)
                        goto out;
 
-               if (smi_check_forward_dr_smp(&recv->mad.smp) == IB_SMI_LOCAL)
+               retsmi = smi_check_forward_dr_smp(&recv->mad.smp);
+               if (retsmi == IB_SMI_LOCAL)
                        goto local;
 
-               if (smi_handle_dr_smp_send(&recv->mad.smp,
-                                          port_priv->device->node_type,
-                                          port_priv->port_num) == IB_SMI_DISCARD)
-                       goto out;
+               if (retsmi == IB_SMI_SEND) { /* don't forward */
+                       if (smi_handle_dr_smp_send(&recv->mad.smp,
+                                                  port_priv->device->node_type,
+                                                  port_num) == IB_SMI_DISCARD)
+                               goto out;
+
+                       if (smi_check_local_smp(&recv->mad.smp, port_priv->device) == IB_SMI_DISCARD)
+                               goto out;
+               } else if (port_priv->device->node_type == RDMA_NODE_IB_SWITCH) {
+                       /* forward case for switches */
+                       memcpy(response, recv, sizeof(*response));
+                       response->header.recv_wc.wc = &response->header.wc;
+                       response->header.recv_wc.recv_buf.mad = &response->mad.mad;
+                       response->header.recv_wc.recv_buf.grh = &response->grh;
+
+                       if (!agent_send_response(&response->mad.mad,
+                                                &response->grh, wc,
+                                                port_priv->device,
+                                                smi_get_fwd_port(&recv->mad.smp),
+                                                qp_info->qp->qp_num))
+                               response = NULL;
 
-               if (smi_check_local_smp(&recv->mad.smp, port_priv->device) == IB_SMI_DISCARD)
                        goto out;
+               }
        }
 
 local:
@@ -1919,7 +1951,7 @@ local:
                                agent_send_response(&response->mad.mad,
                                                    &recv->grh, wc,
                                                    port_priv->device,
-                                                   port_priv->port_num,
+                                                   port_num,
                                                    qp_info->qp->qp_num);
                                goto out;
                        }
index 1e13ab4..15b4c4d 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2006 Intel Corporation.  All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
index 24c93fd..b1d4bbf 100644 (file)
@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 2004 Topspin Communications.  All rights reserved.
- * Copyright (c) 2005 Voltaire, Inc.  All rights reserved.
+ * Copyright (c) 2005 Voltaire, Inc.  All rights reserved.
  * Copyright (c) 2006 Intel Corporation.  All rights reserved.
  *
  * This software is available to you under a choice of one of two
index 6469406..20ab6b3 100644 (file)
@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 2004 Topspin Communications.  All rights reserved.
- * Copyright (c) 2005 Voltaire, Inc.  All rights reserved.
+ * Copyright (c) 2005 Voltaire, Inc.  All rights reserved.
  * Copyright (c) 2006 Intel Corporation.  All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -56,6 +56,7 @@ MODULE_LICENSE("Dual BSD/GPL");
 struct ib_sa_sm_ah {
        struct ib_ah        *ah;
        struct kref          ref;
+       u16                  pkey_index;
        u8                   src_path_mask;
 };
 
@@ -382,6 +383,13 @@ static void update_sm_ah(struct work_struct *work)
        kref_init(&new_ah->ref);
        new_ah->src_path_mask = (1 << port_attr.lmc) - 1;
 
+       new_ah->pkey_index = 0;
+       if (ib_find_pkey(port->agent->device, port->port_num,
+                        IB_DEFAULT_PKEY_FULL, &new_ah->pkey_index) &&
+           ib_find_pkey(port->agent->device, port->port_num,
+                        IB_DEFAULT_PKEY_PARTIAL, &new_ah->pkey_index))
+               printk(KERN_ERR "Couldn't find index for default PKey\n");
+
        memset(&ah_attr, 0, sizeof ah_attr);
        ah_attr.dlid     = port_attr.sm_lid;
        ah_attr.sl       = port_attr.sm_sl;
@@ -512,6 +520,35 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
 }
 EXPORT_SYMBOL(ib_init_ah_from_path);
 
+static int alloc_mad(struct ib_sa_query *query, gfp_t gfp_mask)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&query->port->ah_lock, flags);
+       kref_get(&query->port->sm_ah->ref);
+       query->sm_ah = query->port->sm_ah;
+       spin_unlock_irqrestore(&query->port->ah_lock, flags);
+
+       query->mad_buf = ib_create_send_mad(query->port->agent, 1,
+                                           query->sm_ah->pkey_index,
+                                           0, IB_MGMT_SA_HDR, IB_MGMT_SA_DATA,
+                                           gfp_mask);
+       if (!query->mad_buf) {
+               kref_put(&query->sm_ah->ref, free_sm_ah);
+               return -ENOMEM;
+       }
+
+       query->mad_buf->ah = query->sm_ah->ah;
+
+       return 0;
+}
+
+static void free_mad(struct ib_sa_query *query)
+{
+       ib_free_send_mad(query->mad_buf);
+       kref_put(&query->sm_ah->ref, free_sm_ah);
+}
+
 static void init_mad(struct ib_sa_mad *mad, struct ib_mad_agent *agent)
 {
        unsigned long flags;
@@ -548,20 +585,11 @@ retry:
        query->mad_buf->context[0] = query;
        query->id = id;
 
-       spin_lock_irqsave(&query->port->ah_lock, flags);
-       kref_get(&query->port->sm_ah->ref);
-       query->sm_ah = query->port->sm_ah;
-       spin_unlock_irqrestore(&query->port->ah_lock, flags);
-
-       query->mad_buf->ah = query->sm_ah->ah;
-
        ret = ib_post_send_mad(query->mad_buf, NULL);
        if (ret) {
                spin_lock_irqsave(&idr_lock, flags);
                idr_remove(&query_idr, id);
                spin_unlock_irqrestore(&idr_lock, flags);
-
-               kref_put(&query->sm_ah->ref, free_sm_ah);
        }
 
        /*
@@ -647,13 +675,10 @@ int ib_sa_path_rec_get(struct ib_sa_client *client,
        if (!query)
                return -ENOMEM;
 
-       query->sa_query.mad_buf = ib_create_send_mad(agent, 1, 0,
-                                                    0, IB_MGMT_SA_HDR,
-                                                    IB_MGMT_SA_DATA, gfp_mask);
-       if (!query->sa_query.mad_buf) {
-               ret = -ENOMEM;
+       query->sa_query.port     = port;
+       ret = alloc_mad(&query->sa_query, gfp_mask);
+       if (ret)
                goto err1;
-       }
 
        ib_sa_client_get(client);
        query->sa_query.client = client;
@@ -665,7 +690,6 @@ int ib_sa_path_rec_get(struct ib_sa_client *client,
 
        query->sa_query.callback = callback ? ib_sa_path_rec_callback : NULL;
        query->sa_query.release  = ib_sa_path_rec_release;
-       query->sa_query.port     = port;
        mad->mad_hdr.method      = IB_MGMT_METHOD_GET;
        mad->mad_hdr.attr_id     = cpu_to_be16(IB_SA_ATTR_PATH_REC);
        mad->sa_hdr.comp_mask    = comp_mask;
@@ -683,7 +707,7 @@ int ib_sa_path_rec_get(struct ib_sa_client *client,
 err2:
        *sa_query = NULL;
        ib_sa_client_put(query->sa_query.client);
-       ib_free_send_mad(query->sa_query.mad_buf);
+       free_mad(&query->sa_query);
 
 err1:
        kfree(query);
@@ -773,13 +797,10 @@ int ib_sa_service_rec_query(struct ib_sa_client *client,
        if (!query)
                return -ENOMEM;
 
-       query->sa_query.mad_buf = ib_create_send_mad(agent, 1, 0,
-                                                    0, IB_MGMT_SA_HDR,
-                                                    IB_MGMT_SA_DATA, gfp_mask);
-       if (!query->sa_query.mad_buf) {
-               ret = -ENOMEM;
+       query->sa_query.port     = port;
+       ret = alloc_mad(&query->sa_query, gfp_mask);
+       if (ret)
                goto err1;
-       }
 
        ib_sa_client_get(client);
        query->sa_query.client = client;
@@ -791,7 +812,6 @@ int ib_sa_service_rec_query(struct ib_sa_client *client,
 
        query->sa_query.callback = callback ? ib_sa_service_rec_callback : NULL;
        query->sa_query.release  = ib_sa_service_rec_release;
-       query->sa_query.port     = port;
        mad->mad_hdr.method      = method;
        mad->mad_hdr.attr_id     = cpu_to_be16(IB_SA_ATTR_SERVICE_REC);
        mad->sa_hdr.comp_mask    = comp_mask;
@@ -810,7 +830,7 @@ int ib_sa_service_rec_query(struct ib_sa_client *client,
 err2:
        *sa_query = NULL;
        ib_sa_client_put(query->sa_query.client);
-       ib_free_send_mad(query->sa_query.mad_buf);
+       free_mad(&query->sa_query);
 
 err1:
        kfree(query);
@@ -869,13 +889,10 @@ int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
        if (!query)
                return -ENOMEM;
 
-       query->sa_query.mad_buf = ib_create_send_mad(agent, 1, 0,
-                                                    0, IB_MGMT_SA_HDR,
-                                                    IB_MGMT_SA_DATA, gfp_mask);
-       if (!query->sa_query.mad_buf) {
-               ret = -ENOMEM;
+       query->sa_query.port     = port;
+       ret = alloc_mad(&query->sa_query, gfp_mask);
+       if (ret)
                goto err1;
-       }
 
        ib_sa_client_get(client);
        query->sa_query.client = client;
@@ -887,7 +904,6 @@ int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
 
        query->sa_query.callback = callback ? ib_sa_mcmember_rec_callback : NULL;
        query->sa_query.release  = ib_sa_mcmember_rec_release;
-       query->sa_query.port     = port;
        mad->mad_hdr.method      = method;
        mad->mad_hdr.attr_id     = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC);
        mad->sa_hdr.comp_mask    = comp_mask;
@@ -906,7 +922,7 @@ int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
 err2:
        *sa_query = NULL;
        ib_sa_client_put(query->sa_query.client);
-       ib_free_send_mad(query->sa_query.mad_buf);
+       free_mad(&query->sa_query);
 
 err1:
        kfree(query);
@@ -939,8 +955,7 @@ static void send_handler(struct ib_mad_agent *agent,
        idr_remove(&query_idr, query->id);
        spin_unlock_irqrestore(&idr_lock, flags);
 
-       ib_free_send_mad(mad_send_wc->send_buf);
-       kref_put(&query->sm_ah->ref, free_sm_ah);
+       free_mad(query);
        ib_sa_client_put(query->client);
        query->release(query);
 }
index 2bca753..8723675 100644 (file)
@@ -192,7 +192,7 @@ enum smi_action smi_handle_dr_smp_recv(struct ib_smp *smp, u8 node_type,
                        }
                        /* smp->hop_ptr updated when sending */
                        return (node_type == RDMA_NODE_IB_SWITCH ?
-                               IB_SMI_HANDLE: IB_SMI_DISCARD);
+                               IB_SMI_HANDLE : IB_SMI_DISCARD);
                }
 
                /* C14-13:4 -- hop_ptr = 0 -> give to SM */
@@ -211,7 +211,7 @@ enum smi_forward_action smi_check_forward_dr_smp(struct ib_smp *smp)
        if (!ib_get_smp_direction(smp)) {
                /* C14-9:2 -- intermediate hop */
                if (hop_ptr && hop_ptr < hop_cnt)
-                       return IB_SMI_SEND;
+                       return IB_SMI_FORWARD;
 
                /* C14-9:3 -- at the end of the DR segment of path */
                if (hop_ptr == hop_cnt)
@@ -224,7 +224,7 @@ enum smi_forward_action smi_check_forward_dr_smp(struct ib_smp *smp)
        } else {
                /* C14-13:2  -- intermediate hop */
                if (2 <= hop_ptr && hop_ptr <= hop_cnt)
-                       return IB_SMI_SEND;
+                       return IB_SMI_FORWARD;
 
                /* C14-13:3 -- at the end of the DR segment of path */
                if (hop_ptr == 1)
@@ -233,3 +233,13 @@ enum smi_forward_action smi_check_forward_dr_smp(struct ib_smp *smp)
        }
        return IB_SMI_LOCAL;
 }
+
+/*
+ * Return the forwarding port number from initial_path for outgoing SMP and
+ * from return_path for returning SMP
+ */
+int smi_get_fwd_port(struct ib_smp *smp)
+{
+       return (!ib_get_smp_direction(smp) ? smp->initial_path[smp->hop_ptr+1] :
+               smp->return_path[smp->hop_ptr-1]);
+}
index 9a4b349..1cfc298 100644 (file)
@@ -48,10 +48,12 @@ enum smi_action {
 enum smi_forward_action {
        IB_SMI_LOCAL,   /* SMP should be completed up the stack */
        IB_SMI_SEND,    /* received DR SMP should be forwarded to the send queue */
+       IB_SMI_FORWARD  /* SMP should be forwarded (for switches only) */
 };
 
 enum smi_action smi_handle_dr_smp_recv(struct ib_smp *smp, u8 node_type,
                                       int port_num, int phys_port_cnt);
+int smi_get_fwd_port(struct ib_smp *smp);
 extern enum smi_forward_action smi_check_forward_dr_smp(struct ib_smp *smp);
 extern enum smi_action smi_handle_dr_smp_send(struct ib_smp *smp,
                                              u8 node_type, int port_num);
index bf9b992..70b77ae 100644 (file)
@@ -311,7 +311,7 @@ static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr,
                return sprintf(buf, "N/A (no PMA)\n");
 
        in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
-       out_mad = kmalloc(sizeof *in_mad, GFP_KERNEL);
+       out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
        if (!in_mad || !out_mad) {
                ret = -ENOMEM;
                goto out;
index 2586a3e..424983f 100644 (file)
@@ -823,7 +823,6 @@ static ssize_t ib_ucm_send_rep(struct ib_ucm_file *file,
        param.private_data_len    = cmd.len;
        param.responder_resources = cmd.responder_resources;
        param.initiator_depth     = cmd.initiator_depth;
-       param.target_ack_delay    = cmd.target_ack_delay;
        param.failover_accepted   = cmd.failover_accepted;
        param.flow_control        = cmd.flow_control;
        param.rnr_retry_count     = cmd.rnr_retry_count;
index d40652a..26d0470 100644 (file)
@@ -121,6 +121,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
 
        cur_base = addr & PAGE_MASK;
 
+       ret = 0;
        while (npages) {
                ret = get_user_pages(current, current->mm, cur_base,
                                     min_t(int, npages,
index 809cb14..e6ce5f2 100644 (file)
@@ -1,6 +1,6 @@
 config INFINIBAND_AMSO1100
        tristate "Ammasso 1100 HCA support"
-       depends on PCI && INET && INFINIBAND
+       depends on PCI && INET
        ---help---
          This is a low-level driver for the Ammasso 1100 host
          channel adapter (HCA).
index 77977f5..2acec3f 100644 (file)
@@ -1,6 +1,6 @@
 config INFINIBAND_CXGB3
        tristate "Chelsio RDMA Driver"
-       depends on CHELSIO_T3 && INFINIBAND && INET
+       depends on CHELSIO_T3 && INET
        select GENERIC_ALLOCATOR
        ---help---
          This is an iWARP/RDMA driver for the Chelsio T3 1GbE and
index 76049af..1518b41 100644 (file)
@@ -144,7 +144,7 @@ static int cxio_hal_clear_qp_ctx(struct cxio_rdev *rdev_p, u32 qpid)
        }
        wqe = (struct t3_modify_qp_wr *) skb_put(skb, sizeof(*wqe));
        memset(wqe, 0, sizeof(*wqe));
-       build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 3, 1, qpid, 7);
+       build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 3, 0, qpid, 7);
        wqe->flags = cpu_to_be32(MODQP_WRITE_EC);
        sge_cmd = qpid << 8 | 3;
        wqe->sge_cmd = cpu_to_be64(sge_cmd);
@@ -548,7 +548,7 @@ static int cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p)
                        V_EC_UP_TOKEN(T3_CTL_QP_TID) | F_EC_VALID)) << 32;
        wqe = (struct t3_modify_qp_wr *) skb_put(skb, sizeof(*wqe));
        memset(wqe, 0, sizeof(*wqe));
-       build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 0, 1,
+       build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 0, 0,
                       T3_CTL_QP_TID, 7);
        wqe->flags = cpu_to_be32(MODQP_WRITE_EC);
        sge_cmd = (3ULL << 56) | FW_RI_SGEEC_START << 8 | 3;
@@ -833,7 +833,7 @@ int cxio_rdma_init(struct cxio_rdev *rdev_p, struct t3_rdma_init_attr *attr)
        wqe->ird = cpu_to_be32(attr->ird);
        wqe->qp_dma_addr = cpu_to_be64(attr->qp_dma_addr);
        wqe->qp_dma_size = cpu_to_be32(attr->qp_dma_size);
-       wqe->rsvd = 0;
+       wqe->irs = cpu_to_be32(attr->irs);
        skb->priority = 0;      /* 0=>ToeQ; 1=>CtrlQ */
        return (cxgb3_ofld_send(rdev_p->t3cdev_p, skb));
 }
index ff7290e..c84d4ac 100644 (file)
@@ -294,6 +294,7 @@ struct t3_rdma_init_attr {
        u64 qp_dma_addr;
        u32 qp_dma_size;
        u32 flags;
+       u32 irs;
 };
 
 struct t3_rdma_init_wr {
@@ -314,7 +315,7 @@ struct t3_rdma_init_wr {
        __be32 ird;
        __be64 qp_dma_addr;     /* 7 */
        __be32 qp_dma_size;     /* 8 */
-       u32 rsvd;
+       u32 irs;
 };
 
 struct t3_genbit {
index b2faff5..3b41dc0 100644 (file)
@@ -254,8 +254,6 @@ static void release_ep_resources(struct iwch_ep *ep)
        cxgb3_remove_tid(ep->com.tdev, (void *)ep, ep->hwtid);
        dst_release(ep->dst);
        l2t_release(L2DATA(ep->com.tdev), ep->l2t);
-       if (ep->com.tdev->type == T3B)
-               release_tid(ep->com.tdev, ep->hwtid, NULL);
        put_ep(&ep->com);
 }
 
@@ -515,7 +513,7 @@ static void send_mpa_req(struct iwch_ep *ep, struct sk_buff *skb)
        req->len = htonl(len);
        req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) |
                           V_TX_SNDBUF(snd_win>>15));
-       req->flags = htonl(F_TX_IMM_ACK|F_TX_INIT);
+       req->flags = htonl(F_TX_INIT);
        req->sndseq = htonl(ep->snd_seq);
        BUG_ON(ep->mpa_skb);
        ep->mpa_skb = skb;
@@ -566,7 +564,7 @@ static int send_mpa_reject(struct iwch_ep *ep, const void *pdata, u8 plen)
        req->len = htonl(mpalen);
        req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) |
                           V_TX_SNDBUF(snd_win>>15));
-       req->flags = htonl(F_TX_IMM_ACK|F_TX_INIT);
+       req->flags = htonl(F_TX_INIT);
        req->sndseq = htonl(ep->snd_seq);
        BUG_ON(ep->mpa_skb);
        ep->mpa_skb = skb;
@@ -618,7 +616,7 @@ static int send_mpa_reply(struct iwch_ep *ep, const void *pdata, u8 plen)
        req->len = htonl(len);
        req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) |
                           V_TX_SNDBUF(snd_win>>15));
-       req->flags = htonl(F_TX_MORE | F_TX_IMM_ACK | F_TX_INIT);
+       req->flags = htonl(F_TX_INIT);
        req->sndseq = htonl(ep->snd_seq);
        ep->mpa_skb = skb;
        state_set(&ep->com, MPA_REP_SENT);
@@ -641,6 +639,7 @@ static int act_establish(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
        cxgb3_insert_tid(ep->com.tdev, &t3c_client, ep, tid);
 
        ep->snd_seq = ntohl(req->snd_isn);
+       ep->rcv_seq = ntohl(req->rcv_isn);
 
        set_emss(ep, ntohs(req->tcp_opt));
 
@@ -1023,6 +1022,9 @@ static int rx_data(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
        skb_pull(skb, sizeof(*hdr));
        skb_trim(skb, dlen);
 
+       ep->rcv_seq += dlen;
+       BUG_ON(ep->rcv_seq != (ntohl(hdr->seq) + dlen));
+
        switch (state_read(&ep->com)) {
        case MPA_REQ_SENT:
                process_mpa_reply(ep, skb);
@@ -1060,7 +1062,6 @@ static int tx_ack(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
        struct iwch_ep *ep = ctx;
        struct cpl_wr_ack *hdr = cplhdr(skb);
        unsigned int credits = ntohs(hdr->credits);
-       enum iwch_qp_attr_mask  mask;
 
        PDBG("%s ep %p credits %u\n", __FUNCTION__, ep, credits);
 
@@ -1072,30 +1073,6 @@ static int tx_ack(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
        ep->mpa_skb = NULL;
        dst_confirm(ep->dst);
        if (state_read(&ep->com) == MPA_REP_SENT) {
-               struct iwch_qp_attributes attrs;
-
-               /* bind QP to EP and move to RTS */
-               attrs.mpa_attr = ep->mpa_attr;
-               attrs.max_ird = ep->ord;
-               attrs.max_ord = ep->ord;
-               attrs.llp_stream_handle = ep;
-               attrs.next_state = IWCH_QP_STATE_RTS;
-
-               /* bind QP and TID with INIT_WR */
-               mask = IWCH_QP_ATTR_NEXT_STATE |
-                                    IWCH_QP_ATTR_LLP_STREAM_HANDLE |
-                                    IWCH_QP_ATTR_MPA_ATTR |
-                                    IWCH_QP_ATTR_MAX_IRD |
-                                    IWCH_QP_ATTR_MAX_ORD;
-
-               ep->com.rpl_err = iwch_modify_qp(ep->com.qp->rhp,
-                                    ep->com.qp, mask, &attrs, 1);
-
-               if (!ep->com.rpl_err) {
-                       state_set(&ep->com, FPDU_MODE);
-                       established_upcall(ep);
-               }
-
                ep->com.rpl_done = 1;
                PDBG("waking up ep %p\n", ep);
                wake_up(&ep->com.waitq);
@@ -1124,6 +1101,15 @@ static int abort_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
        return CPL_RET_BUF_DONE;
 }
 
+/*
+ * Return whether a failed active open has allocated a TID
+ */
+static inline int act_open_has_tid(int status)
+{
+       return status != CPL_ERR_TCAM_FULL && status != CPL_ERR_CONN_EXIST &&
+              status != CPL_ERR_ARP_MISS;
+}
+
 static int act_open_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
 {
        struct iwch_ep *ep = ctx;
@@ -1133,7 +1119,7 @@ static int act_open_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
             status2errno(rpl->status));
        connect_reply_upcall(ep, status2errno(rpl->status));
        state_set(&ep->com, DEAD);
-       if (ep->com.tdev->type == T3B)
+       if (ep->com.tdev->type == T3B && act_open_has_tid(rpl->status))
                release_tid(ep->com.tdev, GET_TID(rpl), NULL);
        cxgb3_free_atid(ep->com.tdev, ep->atid);
        dst_release(ep->dst);
@@ -1378,6 +1364,7 @@ static int pass_establish(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
 
        PDBG("%s ep %p\n", __FUNCTION__, ep);
        ep->snd_seq = ntohl(req->snd_isn);
+       ep->rcv_seq = ntohl(req->rcv_isn);
 
        set_emss(ep, ntohs(req->tcp_opt));
 
@@ -1485,6 +1472,13 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
        int ret;
        int state;
 
+       if (is_neg_adv_abort(req->status)) {
+               PDBG("%s neg_adv_abort ep %p tid %d\n", __FUNCTION__, ep,
+                    ep->hwtid);
+               t3_l2t_send_event(ep->com.tdev, ep->l2t);
+               return CPL_RET_BUF_DONE;
+       }
+
        /*
         * We get 2 peer aborts from the HW.  The first one must
         * be ignored except for scribbling that we need one more.
@@ -1494,13 +1488,6 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
                return CPL_RET_BUF_DONE;
        }
 
-       if (is_neg_adv_abort(req->status)) {
-               PDBG("%s neg_adv_abort ep %p tid %d\n", __FUNCTION__, ep,
-                    ep->hwtid);
-               t3_l2t_send_event(ep->com.tdev, ep->l2t);
-               return CPL_RET_BUF_DONE;
-       }
-
        state = state_read(&ep->com);
        PDBG("%s ep %p state %u\n", __FUNCTION__, ep, state);
        switch (state) {
@@ -1732,10 +1719,8 @@ int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
        struct iwch_qp *qp = get_qhp(h, conn_param->qpn);
 
        PDBG("%s ep %p tid %u\n", __FUNCTION__, ep, ep->hwtid);
-       if (state_read(&ep->com) == DEAD) {
-               put_ep(&ep->com);
+       if (state_read(&ep->com) == DEAD)
                return -ECONNRESET;
-       }
 
        BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD);
        BUG_ON(!qp);
@@ -1755,17 +1740,8 @@ int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
        ep->ird = conn_param->ird;
        ep->ord = conn_param->ord;
        PDBG("%s %d ird %d ord %d\n", __FUNCTION__, __LINE__, ep->ird, ep->ord);
+
        get_ep(&ep->com);
-       err = send_mpa_reply(ep, conn_param->private_data,
-                            conn_param->private_data_len);
-       if (err) {
-               ep->com.cm_id = NULL;
-               ep->com.qp = NULL;
-               cm_id->rem_ref(cm_id);
-               abort_connection(ep, NULL, GFP_KERNEL);
-               put_ep(&ep->com);
-               return err;
-       }
 
        /* bind QP to EP and move to RTS */
        attrs.mpa_attr = ep->mpa_attr;
@@ -1783,16 +1759,28 @@ int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
 
        err = iwch_modify_qp(ep->com.qp->rhp,
                             ep->com.qp, mask, &attrs, 1);
+       if (err)
+               goto err;
 
-       if (err) {
-               ep->com.cm_id = NULL;
-               ep->com.qp = NULL;
-               cm_id->rem_ref(cm_id);
-               abort_connection(ep, NULL, GFP_KERNEL);
-       } else {
-               state_set(&ep->com, FPDU_MODE);
-               established_upcall(ep);
-       }
+       err = send_mpa_reply(ep, conn_param->private_data,
+                            conn_param->private_data_len);
+       if (err)
+               goto err;
+
+       /* wait for wr_ack */
+       wait_event(ep->com.waitq, ep->com.rpl_done);
+       err = ep->com.rpl_err;
+       if (err)
+               goto err;
+
+       state_set(&ep->com, FPDU_MODE);
+       established_upcall(ep);
+       put_ep(&ep->com);
+       return 0;
+err:
+       ep->com.cm_id = NULL;
+       ep->com.qp = NULL;
+       cm_id->rem_ref(cm_id);
        put_ep(&ep->com);
        return err;
 }
index 21a388c..6107e7c 100644 (file)
@@ -175,6 +175,7 @@ struct iwch_ep {
        unsigned int atid;
        u32 hwtid;
        u32 snd_seq;
+       u32 rcv_seq;
        struct l2t_entry *l2t;
        struct dst_entry *dst;
        struct sk_buff *mpa_skb;
index e7c2c39..f0c7775 100644 (file)
@@ -1163,9 +1163,10 @@ int iwch_register_device(struct iwch_dev *dev)
        dev->ibdev.post_recv = iwch_post_receive;
 
 
-       dev->ibdev.iwcm =
-           (struct iw_cm_verbs *) kmalloc(sizeof(struct iw_cm_verbs),
-                                          GFP_KERNEL);
+       dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL);
+       if (!dev->ibdev.iwcm)
+               return -ENOMEM;
+
        dev->ibdev.iwcm->connect = iwch_connect;
        dev->ibdev.iwcm->accept = iwch_accept_cr;
        dev->ibdev.iwcm->reject = iwch_reject_cr;
index 714dddb..dd89b6b 100644 (file)
@@ -628,9 +628,9 @@ int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg)
        /* immediate data starts here. */
        term = (struct terminate_message *)wqe->send.sgl;
        build_term_codes(rsp_msg, &term->layer_etype, &term->ecode);
-       build_fw_riwrh((void *)wqe, T3_WR_SEND,
-                      T3_COMPLETION_FLAG | T3_NOTIFY_FLAG, 1,
-                      qhp->ep->hwtid, 5);
+       wqe->send.wrh.op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(T3_WR_SEND) |
+                        V_FW_RIWR_FLAGS(T3_COMPLETION_FLAG | T3_NOTIFY_FLAG));
+       wqe->send.wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(qhp->ep->hwtid));
        skb->priority = CPL_PRIORITY_DATA;
        return cxgb3_ofld_send(qhp->rhp->rdev.t3cdev_p, skb);
 }
@@ -732,6 +732,7 @@ static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp,
        init_attr.qp_dma_addr = qhp->wq.dma_addr;
        init_attr.qp_dma_size = (1UL << qhp->wq.size_log2);
        init_attr.flags = rqes_posted(qhp) ? RECVS_POSTED : 0;
+       init_attr.irs = qhp->ep->rcv_seq;
        PDBG("%s init_attr.rq_addr 0x%x init_attr.rq_size = %d "
             "flags 0x%x qpcaps 0x%x\n", __FUNCTION__,
             init_attr.rq_addr, init_attr.rq_size,
index 1a85459..59f807d 100644 (file)
@@ -1,6 +1,6 @@
 config INFINIBAND_EHCA
        tristate "eHCA support"
-       depends on IBMEBUS && INFINIBAND
+       depends on IBMEBUS
        ---help---
        This driver supports the IBM pSeries eHCA InfiniBand adapter.
 
index 0d6e2c4..3cd6bf3 100644 (file)
@@ -118,7 +118,7 @@ struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
                }
                memcpy(&av->av.grh.word_1, &gid, sizeof(gid));
        }
-       av->av.pmtu = EHCA_MAX_MTU;
+       av->av.pmtu = shca->max_mtu;
 
        /* dgid comes in grh.word_3 */
        memcpy(&av->av.grh.word_3, &ah_attr->grh.dgid,
@@ -137,6 +137,8 @@ int ehca_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr)
        struct ehca_av *av;
        struct ehca_ud_av new_ehca_av;
        struct ehca_pd *my_pd = container_of(ah->pd, struct ehca_pd, ib_pd);
+       struct ehca_shca *shca = container_of(ah->pd->device, struct ehca_shca,
+                                             ib_device);
        u32 cur_pid = current->tgid;
 
        if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
@@ -192,7 +194,7 @@ int ehca_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr)
                memcpy(&new_ehca_av.grh.word_1, &gid, sizeof(gid));
        }
 
-       new_ehca_av.pmtu = EHCA_MAX_MTU;
+       new_ehca_av.pmtu = shca->max_mtu;
 
        memcpy(&new_ehca_av.grh.word_3, &ah_attr->grh.dgid,
               sizeof(ah_attr->grh.dgid));
index 1d286d3..daf823e 100644 (file)
@@ -5,6 +5,7 @@
  *
  *  Authors: Heiko J Schick <schickhj@de.ibm.com>
  *           Christoph Raisch <raisch@de.ibm.com>
+ *           Joachim Fenkes <fenkes@de.ibm.com>
  *
  *  Copyright (c) 2005 IBM Corporation
  *
@@ -86,11 +87,17 @@ struct ehca_eq {
        struct ehca_eqe_cache_entry eqe_cache[EHCA_EQE_CACHE_SIZE];
 };
 
+struct ehca_sma_attr {
+       u16 lid, lmc, sm_sl, sm_lid;
+       u16 pkey_tbl_len, pkeys[16];
+};
+
 struct ehca_sport {
        struct ib_cq *ibcq_aqp1;
        struct ib_qp *ibqp_aqp1;
        enum ib_rate  rate;
        enum ib_port_state port_state;
+       struct ehca_sma_attr saved_attr;
 };
 
 struct ehca_shca {
@@ -107,6 +114,8 @@ struct ehca_shca {
        struct ehca_pd *pd;
        struct h_galpas galpas;
        struct mutex modify_mutex;
+       u64 hca_cap;
+       int max_mtu;
 };
 
 struct ehca_pd {
@@ -115,9 +124,20 @@ struct ehca_pd {
        u32 ownpid;
 };
 
+enum ehca_ext_qp_type {
+       EQPT_NORMAL    = 0,
+       EQPT_LLQP      = 1,
+       EQPT_SRQBASE   = 2,
+       EQPT_SRQ       = 3,
+};
+
 struct ehca_qp {
-       struct ib_qp ib_qp;
+       union {
+               struct ib_qp ib_qp;
+               struct ib_srq ib_srq;
+       };
        u32 qp_type;
+       enum ehca_ext_qp_type ext_type;
        struct ipz_queue ipz_squeue;
        struct ipz_queue ipz_rqueue;
        struct h_galpas galpas;
@@ -140,6 +160,10 @@ struct ehca_qp {
        u32 mm_count_galpa;
 };
 
+#define IS_SRQ(qp) (qp->ext_type == EQPT_SRQ)
+#define HAS_SQ(qp) (qp->ext_type != EQPT_SRQ)
+#define HAS_RQ(qp) (qp->ext_type != EQPT_SRQBASE)
+
 /* must be power of 2 */
 #define QP_HASHTAB_LEN 8
 
@@ -156,8 +180,8 @@ struct ehca_cq {
        spinlock_t cb_lock;
        struct hlist_head qp_hashtab[QP_HASHTAB_LEN];
        struct list_head entry;
-       u32 nr_callbacks; /* #events assigned to cpu by scaling code */
-       u32 nr_events;    /* #events seen */
+       u32 nr_callbacks;   /* #events assigned to cpu by scaling code */
+       atomic_t nr_events; /* #events seen */
        wait_queue_head_t wait_completion;
        spinlock_t task_lock;
        u32 ownpid;
@@ -275,9 +299,8 @@ void ehca_cleanup_av_cache(void);
 int ehca_init_mrmw_cache(void);
 void ehca_cleanup_mrmw_cache(void);
 
-extern spinlock_t ehca_qp_idr_lock;
-extern spinlock_t ehca_cq_idr_lock;
-extern spinlock_t hcall_lock;
+extern rwlock_t ehca_qp_idr_lock;
+extern rwlock_t ehca_cq_idr_lock;
 extern struct idr ehca_qp_idr;
 extern struct idr ehca_cq_idr;
 
@@ -305,6 +328,7 @@ struct ehca_create_qp_resp {
        u32 qp_num;
        u32 token;
        u32 qp_type;
+       u32 ext_type;
        u32 qkey;
        /* qp_num assigned by ehca: sqp0/1 may have got different numbers */
        u32 real_qp_num;
@@ -320,14 +344,42 @@ struct ehca_alloc_cq_parms {
        struct ipz_eq_handle eq_handle;
 };
 
+enum ehca_service_type {
+       ST_RC  = 0,
+       ST_UC  = 1,
+       ST_RD  = 2,
+       ST_UD  = 3,
+};
+
+enum ehca_ll_comp_flags {
+       LLQP_SEND_COMP = 0x20,
+       LLQP_RECV_COMP = 0x40,
+       LLQP_COMP_MASK = 0x60,
+};
+
 struct ehca_alloc_qp_parms {
-       int servicetype;
+/* input parameters */
+       enum ehca_service_type servicetype;
        int sigtype;
-       int daqp_ctrl;
-       int max_send_sge;
-       int max_recv_sge;
+       enum ehca_ext_qp_type ext_type;
+       enum ehca_ll_comp_flags ll_comp_flags;
+
+       int max_send_wr, max_recv_wr;
+       int max_send_sge, max_recv_sge;
        int ud_av_l_key_ctl;
 
+       u32 token;
+       struct ipz_eq_handle eq_handle;
+       struct ipz_pd pd;
+       struct ipz_cq_handle send_cq_handle, recv_cq_handle;
+
+       u32 srq_qpn, srq_token, srq_limit;
+
+/* output parameters */
+       u32 real_qp_num;
+       struct ipz_qp_handle qp_handle;
+       struct h_galpas galpas;
+
        u16 act_nr_send_wqes;
        u16 act_nr_recv_wqes;
        u8  act_nr_recv_sges;
@@ -335,9 +387,6 @@ struct ehca_alloc_qp_parms {
 
        u32 nr_rq_pages;
        u32 nr_sq_pages;
-
-       struct ipz_eq_handle ipz_eq_handle;
-       struct ipz_pd pd;
 };
 
 int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp);
index 5665f21..fb3df5c 100644 (file)
@@ -228,8 +228,8 @@ struct hcp_modify_qp_control_block {
 #define MQPCB_QP_NUMBER                         EHCA_BMASK_IBM(8,31)
 #define MQPCB_MASK_QP_ENABLE                    EHCA_BMASK_IBM(48,48)
 #define MQPCB_QP_ENABLE                         EHCA_BMASK_IBM(31,31)
-#define MQPCB_MASK_CURR_SQR_LIMIT               EHCA_BMASK_IBM(49,49)
-#define MQPCB_CURR_SQR_LIMIT                    EHCA_BMASK_IBM(15,31)
+#define MQPCB_MASK_CURR_SRQ_LIMIT               EHCA_BMASK_IBM(49,49)
+#define MQPCB_CURR_SRQ_LIMIT                    EHCA_BMASK_IBM(16,31)
 #define MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG       EHCA_BMASK_IBM(50,50)
 #define MQPCB_MASK_SHARED_RQ_HNDL               EHCA_BMASK_IBM(51,51)
 
index 67f0670..01d4a14 100644 (file)
@@ -56,11 +56,11 @@ int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp)
 {
        unsigned int qp_num = qp->real_qp_num;
        unsigned int key = qp_num & (QP_HASHTAB_LEN-1);
-       unsigned long spl_flags;
+       unsigned long flags;
 
-       spin_lock_irqsave(&cq->spinlock, spl_flags);
+       spin_lock_irqsave(&cq->spinlock, flags);
        hlist_add_head(&qp->list_entries, &cq->qp_hashtab[key]);
-       spin_unlock_irqrestore(&cq->spinlock, spl_flags);
+       spin_unlock_irqrestore(&cq->spinlock, flags);
 
        ehca_dbg(cq->ib_cq.device, "cq_num=%x real_qp_num=%x",
                 cq->cq_number, qp_num);
@@ -74,9 +74,9 @@ int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int real_qp_num)
        unsigned int key = real_qp_num & (QP_HASHTAB_LEN-1);
        struct hlist_node *iter;
        struct ehca_qp *qp;
-       unsigned long spl_flags;
+       unsigned long flags;
 
-       spin_lock_irqsave(&cq->spinlock, spl_flags);
+       spin_lock_irqsave(&cq->spinlock, flags);
        hlist_for_each(iter, &cq->qp_hashtab[key]) {
                qp = hlist_entry(iter, struct ehca_qp, list_entries);
                if (qp->real_qp_num == real_qp_num) {
@@ -88,7 +88,7 @@ int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int real_qp_num)
                        break;
                }
        }
-       spin_unlock_irqrestore(&cq->spinlock, spl_flags);
+       spin_unlock_irqrestore(&cq->spinlock, flags);
        if (ret)
                ehca_err(cq->ib_cq.device,
                         "qp not found cq_num=%x real_qp_num=%x",
@@ -146,6 +146,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
        spin_lock_init(&my_cq->spinlock);
        spin_lock_init(&my_cq->cb_lock);
        spin_lock_init(&my_cq->task_lock);
+       atomic_set(&my_cq->nr_events, 0);
        init_waitqueue_head(&my_cq->wait_completion);
        my_cq->ownpid = current->tgid;
 
@@ -162,9 +163,9 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
                        goto create_cq_exit1;
                }
 
-               spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+               write_lock_irqsave(&ehca_cq_idr_lock, flags);
                ret = idr_get_new(&ehca_cq_idr, my_cq, &my_cq->token);
-               spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+               write_unlock_irqrestore(&ehca_cq_idr_lock, flags);
 
        } while (ret == -EAGAIN);
 
@@ -293,9 +294,9 @@ create_cq_exit3:
                         "cq_num=%x h_ret=%lx", my_cq, my_cq->cq_number, h_ret);
 
 create_cq_exit2:
-       spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+       write_lock_irqsave(&ehca_cq_idr_lock, flags);
        idr_remove(&ehca_cq_idr, my_cq->token);
-       spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+       write_unlock_irqrestore(&ehca_cq_idr_lock, flags);
 
 create_cq_exit1:
        kmem_cache_free(cq_cache, my_cq);
@@ -303,16 +304,6 @@ create_cq_exit1:
        return cq;
 }
 
-static int get_cq_nr_events(struct ehca_cq *my_cq)
-{
-       int ret;
-       unsigned long flags;
-       spin_lock_irqsave(&ehca_cq_idr_lock, flags);
-       ret = my_cq->nr_events;
-       spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
-       return ret;
-}
-
 int ehca_destroy_cq(struct ib_cq *cq)
 {
        u64 h_ret;
@@ -339,17 +330,18 @@ int ehca_destroy_cq(struct ib_cq *cq)
                }
        }
 
-       spin_lock_irqsave(&ehca_cq_idr_lock, flags);
-       while (my_cq->nr_events) {
-               spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
-               wait_event(my_cq->wait_completion, !get_cq_nr_events(my_cq));
-               spin_lock_irqsave(&ehca_cq_idr_lock, flags);
-               /* recheck nr_events to assure no cqe has just arrived */
-       }
-
+       /*
+        * remove the CQ from the idr first to make sure
+        * no more interrupt tasklets will touch this CQ
+        */
+       write_lock_irqsave(&ehca_cq_idr_lock, flags);
        idr_remove(&ehca_cq_idr, my_cq->token);
-       spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+       write_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+
+       /* now wait until all pending events have completed */
+       wait_event(my_cq->wait_completion, !atomic_read(&my_cq->nr_events));
 
+       /* nobody's using our CQ any longer -- we can destroy it */
        h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 0);
        if (h_ret == H_R_STATE) {
                /* cq in err: read err data and destroy it forcibly */
index 32b55a4..bbd3c6a 100644 (file)
 
 int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props)
 {
-       int ret = 0;
+       int i, ret = 0;
        struct ehca_shca *shca = container_of(ibdev, struct ehca_shca,
                                              ib_device);
        struct hipz_query_hca *rblock;
 
+       static const u32 cap_mapping[] = {
+               IB_DEVICE_RESIZE_MAX_WR,      HCA_CAP_WQE_RESIZE,
+               IB_DEVICE_BAD_PKEY_CNTR,      HCA_CAP_BAD_P_KEY_CTR,
+               IB_DEVICE_BAD_QKEY_CNTR,      HCA_CAP_Q_KEY_VIOL_CTR,
+               IB_DEVICE_RAW_MULTI,          HCA_CAP_RAW_PACKET_MCAST,
+               IB_DEVICE_AUTO_PATH_MIG,      HCA_CAP_AUTO_PATH_MIG,
+               IB_DEVICE_CHANGE_PHY_PORT,    HCA_CAP_SQD_RTS_PORT_CHANGE,
+               IB_DEVICE_UD_AV_PORT_ENFORCE, HCA_CAP_AH_PORT_NR_CHECK,
+               IB_DEVICE_CURR_QP_STATE_MOD,  HCA_CAP_CUR_QP_STATE_MOD,
+               IB_DEVICE_SHUTDOWN_PORT,      HCA_CAP_SHUTDOWN_PORT,
+               IB_DEVICE_INIT_TYPE,          HCA_CAP_INIT_TYPE,
+               IB_DEVICE_PORT_ACTIVE_EVENT,  HCA_CAP_PORT_ACTIVE_EVENT,
+       };
+
        rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
        if (!rblock) {
                ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
@@ -96,6 +110,13 @@ int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props)
        props->max_total_mcast_qp_attach
                = min_t(int, rblock->max_total_mcast_qp_attach, INT_MAX);
 
+       /* translate device capabilities */
+       props->device_cap_flags = IB_DEVICE_SYS_IMAGE_GUID |
+               IB_DEVICE_RC_RNR_NAK_GEN | IB_DEVICE_N_NOTIFY_CQ;
+       for (i = 0; i < ARRAY_SIZE(cap_mapping); i += 2)
+               if (rblock->hca_cap_indicators & cap_mapping[i + 1])
+                       props->device_cap_flags |= cap_mapping[i];
+
 query_device1:
        ehca_free_fw_ctrlblock(rblock);
 
@@ -172,6 +193,40 @@ query_port1:
        return ret;
 }
 
+int ehca_query_sma_attr(struct ehca_shca *shca,
+                       u8 port, struct ehca_sma_attr *attr)
+{
+       int ret = 0;
+       struct hipz_query_port *rblock;
+
+       rblock = ehca_alloc_fw_ctrlblock(GFP_ATOMIC);
+       if (!rblock) {
+               ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
+               return -ENOMEM;
+       }
+
+       if (hipz_h_query_port(shca->ipz_hca_handle, port, rblock) != H_SUCCESS) {
+               ehca_err(&shca->ib_device, "Can't query port properties");
+               ret = -EINVAL;
+               goto query_sma_attr1;
+       }
+
+       memset(attr, 0, sizeof(struct ehca_sma_attr));
+
+       attr->lid    = rblock->lid;
+       attr->lmc    = rblock->lmc;
+       attr->sm_sl  = rblock->sm_sl;
+       attr->sm_lid = rblock->sm_lid;
+
+       attr->pkey_tbl_len = rblock->pkey_tbl_len;
+       memcpy(attr->pkeys, rblock->pkey_entries, sizeof(attr->pkeys));
+
+query_sma_attr1:
+       ehca_free_fw_ctrlblock(rblock);
+
+       return ret;
+}
+
 int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
 {
        int ret = 0;
@@ -261,7 +316,7 @@ int ehca_modify_port(struct ib_device *ibdev,
        }
 
        if (mutex_lock_interruptible(&shca->modify_mutex))
-                return -ERESTARTSYS;
+               return -ERESTARTSYS;
 
        rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
        if (!rblock) {
@@ -290,7 +345,7 @@ modify_port2:
        ehca_free_fw_ctrlblock(rblock);
 
 modify_port1:
-        mutex_unlock(&shca->modify_mutex);
+       mutex_unlock(&shca->modify_mutex);
 
        return ret;
 }
index 100329b..96eba38 100644 (file)
@@ -5,6 +5,8 @@
  *
  *  Authors: Heiko J Schick <schickhj@de.ibm.com>
  *           Khadija Souissi <souissi@de.ibm.com>
+ *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *           Joachim Fenkes <fenkes@de.ibm.com>
  *
  *  Copyright (c) 2005 IBM Corporation
  *
@@ -59,6 +61,7 @@
 #define NEQE_EVENT_CODE        EHCA_BMASK_IBM(2,7)
 #define NEQE_PORT_NUMBER       EHCA_BMASK_IBM(8,15)
 #define NEQE_PORT_AVAILABILITY EHCA_BMASK_IBM(16,16)
+#define NEQE_DISRUPTIVE        EHCA_BMASK_IBM(16,16)
 
 #define ERROR_DATA_LENGTH      EHCA_BMASK_IBM(52,63)
 #define ERROR_DATA_TYPE        EHCA_BMASK_IBM(0,7)
@@ -178,12 +181,11 @@ static void qp_event_callback(struct ehca_shca *shca,
 {
        struct ib_event event;
        struct ehca_qp *qp;
-       unsigned long flags;
        u32 token = EHCA_BMASK_GET(EQE_QP_TOKEN, eqe);
 
-       spin_lock_irqsave(&ehca_qp_idr_lock, flags);
+       read_lock(&ehca_qp_idr_lock);
        qp = idr_find(&ehca_qp_idr, token);
-       spin_unlock_irqrestore(&ehca_qp_idr_lock, flags);
+       read_unlock(&ehca_qp_idr_lock);
 
 
        if (!qp)
@@ -207,18 +209,22 @@ static void cq_event_callback(struct ehca_shca *shca,
                              u64 eqe)
 {
        struct ehca_cq *cq;
-       unsigned long flags;
        u32 token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe);
 
-       spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+       read_lock(&ehca_cq_idr_lock);
        cq = idr_find(&ehca_cq_idr, token);
-       spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+       if (cq)
+               atomic_inc(&cq->nr_events);
+       read_unlock(&ehca_cq_idr_lock);
 
        if (!cq)
                return;
 
        ehca_error_data(shca, cq, cq->ipz_cq_handle.handle);
 
+       if (atomic_dec_and_test(&cq->nr_events))
+               wake_up(&cq->wait_completion);
+
        return;
 }
 
@@ -281,30 +287,61 @@ static void parse_identifier(struct ehca_shca *shca, u64 eqe)
        return;
 }
 
-static void parse_ec(struct ehca_shca *shca, u64 eqe)
+static void dispatch_port_event(struct ehca_shca *shca, int port_num,
+                               enum ib_event_type type, const char *msg)
 {
        struct ib_event event;
+
+       ehca_info(&shca->ib_device, "port %d %s.", port_num, msg);
+       event.device = &shca->ib_device;
+       event.event = type;
+       event.element.port_num = port_num;
+       ib_dispatch_event(&event);
+}
+
+static void notify_port_conf_change(struct ehca_shca *shca, int port_num)
+{
+       struct ehca_sma_attr  new_attr;
+       struct ehca_sma_attr *old_attr = &shca->sport[port_num - 1].saved_attr;
+
+       ehca_query_sma_attr(shca, port_num, &new_attr);
+
+       if (new_attr.sm_sl  != old_attr->sm_sl ||
+           new_attr.sm_lid != old_attr->sm_lid)
+               dispatch_port_event(shca, port_num, IB_EVENT_SM_CHANGE,
+                                   "SM changed");
+
+       if (new_attr.lid != old_attr->lid ||
+           new_attr.lmc != old_attr->lmc)
+               dispatch_port_event(shca, port_num, IB_EVENT_LID_CHANGE,
+                                   "LID changed");
+
+       if (new_attr.pkey_tbl_len != old_attr->pkey_tbl_len ||
+           memcmp(new_attr.pkeys, old_attr->pkeys,
+                  sizeof(u16) * new_attr.pkey_tbl_len))
+               dispatch_port_event(shca, port_num, IB_EVENT_PKEY_CHANGE,
+                                   "P_Key changed");
+
+       *old_attr = new_attr;
+}
+
+static void parse_ec(struct ehca_shca *shca, u64 eqe)
+{
        u8 ec   = EHCA_BMASK_GET(NEQE_EVENT_CODE, eqe);
        u8 port = EHCA_BMASK_GET(NEQE_PORT_NUMBER, eqe);
 
        switch (ec) {
        case 0x30: /* port availability change */
                if (EHCA_BMASK_GET(NEQE_PORT_AVAILABILITY, eqe)) {
-                       ehca_info(&shca->ib_device,
-                                 "port %x is active.", port);
-                       event.device = &shca->ib_device;
-                       event.event = IB_EVENT_PORT_ACTIVE;
-                       event.element.port_num = port;
                        shca->sport[port - 1].port_state = IB_PORT_ACTIVE;
-                       ib_dispatch_event(&event);
+                       dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE,
+                                           "is active");
+                       ehca_query_sma_attr(shca, port,
+                                           &shca->sport[port - 1].saved_attr);
                } else {
-                       ehca_info(&shca->ib_device,
-                                 "port %x is inactive.", port);
-                       event.device = &shca->ib_device;
-                       event.event = IB_EVENT_PORT_ERR;
-                       event.element.port_num = port;
                        shca->sport[port - 1].port_state = IB_PORT_DOWN;
-                       ib_dispatch_event(&event);
+                       dispatch_port_event(shca, port, IB_EVENT_PORT_ERR,
+                                           "is inactive");
                }
                break;
        case 0x31:
@@ -312,24 +349,19 @@ static void parse_ec(struct ehca_shca *shca, u64 eqe)
                 * disruptive change is caused by
                 * LID, PKEY or SM change
                 */
-               ehca_warn(&shca->ib_device,
-                         "disruptive port %x configuration change", port);
-
-               ehca_info(&shca->ib_device,
-                         "port %x is inactive.", port);
-               event.device = &shca->ib_device;
-               event.event = IB_EVENT_PORT_ERR;
-               event.element.port_num = port;
-               shca->sport[port - 1].port_state = IB_PORT_DOWN;
-               ib_dispatch_event(&event);
-
-               ehca_info(&shca->ib_device,
-                         "port %x is active.", port);
-               event.device = &shca->ib_device;
-               event.event = IB_EVENT_PORT_ACTIVE;
-               event.element.port_num = port;
-               shca->sport[port - 1].port_state = IB_PORT_ACTIVE;
-               ib_dispatch_event(&event);
+               if (EHCA_BMASK_GET(NEQE_DISRUPTIVE, eqe)) {
+                       ehca_warn(&shca->ib_device, "disruptive port "
+                                 "%d configuration change", port);
+
+                       shca->sport[port - 1].port_state = IB_PORT_DOWN;
+                       dispatch_port_event(shca, port, IB_EVENT_PORT_ERR,
+                                           "is inactive");
+
+                       shca->sport[port - 1].port_state = IB_PORT_ACTIVE;
+                       dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE,
+                                           "is active");
+               } else
+                       notify_port_conf_change(shca, port);
                break;
        case 0x32: /* adapter malfunction */
                ehca_err(&shca->ib_device, "Adapter malfunction.");
@@ -404,7 +436,6 @@ static inline void process_eqe(struct ehca_shca *shca, struct ehca_eqe *eqe)
 {
        u64 eqe_value;
        u32 token;
-       unsigned long flags;
        struct ehca_cq *cq;
 
        eqe_value = eqe->entry;
@@ -412,27 +443,24 @@ static inline void process_eqe(struct ehca_shca *shca, struct ehca_eqe *eqe)
        if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) {
                ehca_dbg(&shca->ib_device, "Got completion event");
                token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value);
-               spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+               read_lock(&ehca_cq_idr_lock);
                cq = idr_find(&ehca_cq_idr, token);
+               if (cq)
+                       atomic_inc(&cq->nr_events);
+               read_unlock(&ehca_cq_idr_lock);
                if (cq == NULL) {
-                       spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
                        ehca_err(&shca->ib_device,
                                 "Invalid eqe for non-existing cq token=%x",
                                 token);
                        return;
                }
                reset_eq_pending(cq);
-               cq->nr_events++;
-               spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
                if (ehca_scaling_code)
                        queue_comp_task(cq);
                else {
                        comp_event_callback(cq);
-                       spin_lock_irqsave(&ehca_cq_idr_lock, flags);
-                       cq->nr_events--;
-                       if (!cq->nr_events)
+                       if (atomic_dec_and_test(&cq->nr_events))
                                wake_up(&cq->wait_completion);
-                       spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
                }
        } else {
                ehca_dbg(&shca->ib_device, "Got non completion event");
@@ -476,17 +504,17 @@ void ehca_process_eq(struct ehca_shca *shca, int is_irq)
                eqe_value = eqe_cache[eqe_cnt].eqe->entry;
                if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) {
                        token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value);
-                       spin_lock(&ehca_cq_idr_lock);
+                       read_lock(&ehca_cq_idr_lock);
                        eqe_cache[eqe_cnt].cq = idr_find(&ehca_cq_idr, token);
+                       if (eqe_cache[eqe_cnt].cq)
+                               atomic_inc(&eqe_cache[eqe_cnt].cq->nr_events);
+                       read_unlock(&ehca_cq_idr_lock);
                        if (!eqe_cache[eqe_cnt].cq) {
-                               spin_unlock(&ehca_cq_idr_lock);
                                ehca_err(&shca->ib_device,
                                         "Invalid eqe for non-existing cq "
                                         "token=%x", token);
                                continue;
                        }
-                       eqe_cache[eqe_cnt].cq->nr_events++;
-                       spin_unlock(&ehca_cq_idr_lock);
                } else
                        eqe_cache[eqe_cnt].cq = NULL;
                eqe_cnt++;
@@ -517,11 +545,8 @@ void ehca_process_eq(struct ehca_shca *shca, int is_irq)
                        else {
                                struct ehca_cq *cq = eq->eqe_cache[i].cq;
                                comp_event_callback(cq);
-                               spin_lock(&ehca_cq_idr_lock);
-                               cq->nr_events--;
-                               if (!cq->nr_events)
+                               if (atomic_dec_and_test(&cq->nr_events))
                                        wake_up(&cq->wait_completion);
-                               spin_unlock(&ehca_cq_idr_lock);
                        }
                } else {
                        ehca_dbg(&shca->ib_device, "Got non completion event");
@@ -621,13 +646,10 @@ static void run_comp_task(struct ehca_cpu_comp_task* cct)
        while (!list_empty(&cct->cq_list)) {
                cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);
                spin_unlock_irqrestore(&cct->task_lock, flags);
-               comp_event_callback(cq);
 
-               spin_lock_irqsave(&ehca_cq_idr_lock, flags);
-               cq->nr_events--;
-               if (!cq->nr_events)
+               comp_event_callback(cq);
+               if (atomic_dec_and_test(&cq->nr_events))
                        wake_up(&cq->wait_completion);
-               spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
 
                spin_lock_irqsave(&cct->task_lock, flags);
                spin_lock(&cq->task_lock);
index 6ed06ee..3346cb0 100644 (file)
@@ -47,7 +47,6 @@ struct ehca_shca;
 
 #include <linux/interrupt.h>
 #include <linux/types.h>
-#include <asm/atomic.h>
 
 int ehca_error_data(struct ehca_shca *shca, void *data, u64 resource);
 
index 37e7fe0..77aeca6 100644 (file)
@@ -49,6 +49,9 @@ int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props);
 int ehca_query_port(struct ib_device *ibdev, u8 port,
                    struct ib_port_attr *props);
 
+int ehca_query_sma_attr(struct ehca_shca *shca, u8 port,
+                       struct ehca_sma_attr *attr);
+
 int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 * pkey);
 
 int ehca_query_gid(struct ib_device *ibdev, u8 port, int index,
@@ -154,6 +157,21 @@ int ehca_post_send(struct ib_qp *qp, struct ib_send_wr *send_wr,
 int ehca_post_recv(struct ib_qp *qp, struct ib_recv_wr *recv_wr,
                   struct ib_recv_wr **bad_recv_wr);
 
+int ehca_post_srq_recv(struct ib_srq *srq,
+                      struct ib_recv_wr *recv_wr,
+                      struct ib_recv_wr **bad_recv_wr);
+
+struct ib_srq *ehca_create_srq(struct ib_pd *pd,
+                              struct ib_srq_init_attr *init_attr,
+                              struct ib_udata *udata);
+
+int ehca_modify_srq(struct ib_srq *srq, struct ib_srq_attr *attr,
+                   enum ib_srq_attr_mask attr_mask, struct ib_udata *udata);
+
+int ehca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
+
+int ehca_destroy_srq(struct ib_srq *srq);
+
 u64 ehca_define_sqp(struct ehca_shca *shca, struct ehca_qp *ibqp,
                    struct ib_qp_init_attr *qp_init_attr);
 
index c3f99f3..28ba2dd 100644 (file)
@@ -94,17 +94,15 @@ MODULE_PARM_DESC(poll_all_eqs,
 MODULE_PARM_DESC(static_rate,
                 "set permanent static rate (default: disabled)");
 MODULE_PARM_DESC(scaling_code,
-                "set scaling code (0: disabled, 1: enabled/default)");
+                "set scaling code (0: disabled/default, 1: enabled)");
 
-spinlock_t ehca_qp_idr_lock;
-spinlock_t ehca_cq_idr_lock;
-spinlock_t hcall_lock;
+DEFINE_RWLOCK(ehca_qp_idr_lock);
+DEFINE_RWLOCK(ehca_cq_idr_lock);
 DEFINE_IDR(ehca_qp_idr);
 DEFINE_IDR(ehca_cq_idr);
 
-
-static struct list_head shca_list; /* list of all registered ehcas */
-static spinlock_t shca_list_lock;
+static LIST_HEAD(shca_list); /* list of all registered ehcas */
+static DEFINE_SPINLOCK(shca_list_lock);
 
 static struct timer_list poll_eqs_timer;
 
@@ -205,11 +203,35 @@ static void ehca_destroy_slab_caches(void)
 #define EHCA_HCAAVER  EHCA_BMASK_IBM(32,39)
 #define EHCA_REVID    EHCA_BMASK_IBM(40,63)
 
+static struct cap_descr {
+       u64 mask;
+       char *descr;
+} hca_cap_descr[] = {
+       { HCA_CAP_AH_PORT_NR_CHECK, "HCA_CAP_AH_PORT_NR_CHECK" },
+       { HCA_CAP_ATOMIC, "HCA_CAP_ATOMIC" },
+       { HCA_CAP_AUTO_PATH_MIG, "HCA_CAP_AUTO_PATH_MIG" },
+       { HCA_CAP_BAD_P_KEY_CTR, "HCA_CAP_BAD_P_KEY_CTR" },
+       { HCA_CAP_SQD_RTS_PORT_CHANGE, "HCA_CAP_SQD_RTS_PORT_CHANGE" },
+       { HCA_CAP_CUR_QP_STATE_MOD, "HCA_CAP_CUR_QP_STATE_MOD" },
+       { HCA_CAP_INIT_TYPE, "HCA_CAP_INIT_TYPE" },
+       { HCA_CAP_PORT_ACTIVE_EVENT, "HCA_CAP_PORT_ACTIVE_EVENT" },
+       { HCA_CAP_Q_KEY_VIOL_CTR, "HCA_CAP_Q_KEY_VIOL_CTR" },
+       { HCA_CAP_WQE_RESIZE, "HCA_CAP_WQE_RESIZE" },
+       { HCA_CAP_RAW_PACKET_MCAST, "HCA_CAP_RAW_PACKET_MCAST" },
+       { HCA_CAP_SHUTDOWN_PORT, "HCA_CAP_SHUTDOWN_PORT" },
+       { HCA_CAP_RC_LL_QP, "HCA_CAP_RC_LL_QP" },
+       { HCA_CAP_SRQ, "HCA_CAP_SRQ" },
+       { HCA_CAP_UD_LL_QP, "HCA_CAP_UD_LL_QP" },
+       { HCA_CAP_RESIZE_MR, "HCA_CAP_RESIZE_MR" },
+       { HCA_CAP_MINI_QP, "HCA_CAP_MINI_QP" },
+};
+
 int ehca_sense_attributes(struct ehca_shca *shca)
 {
-       int ret = 0;
+       int i, ret = 0;
        u64 h_ret;
        struct hipz_query_hca *rblock;
+       struct hipz_query_port *port;
 
        rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
        if (!rblock) {
@@ -222,7 +244,7 @@ int ehca_sense_attributes(struct ehca_shca *shca)
                ehca_gen_err("Cannot query device properties. h_ret=%lx",
                             h_ret);
                ret = -EPERM;
-               goto num_ports1;
+               goto sense_attributes1;
        }
 
        if (ehca_nr_ports == 1)
@@ -242,18 +264,44 @@ int ehca_sense_attributes(struct ehca_shca *shca)
                ehca_gen_dbg(" ... hardware version=%x:%x", hcaaver, revid);
 
                if ((hcaaver == 1) && (revid == 0))
-                       shca->hw_level = 0;
+                       shca->hw_level = 0x11;
                else if ((hcaaver == 1) && (revid == 1))
-                       shca->hw_level = 1;
+                       shca->hw_level = 0x12;
                else if ((hcaaver == 1) && (revid == 2))
-                       shca->hw_level = 2;
+                       shca->hw_level = 0x13;
+               else if ((hcaaver == 2) && (revid == 0))
+                       shca->hw_level = 0x21;
+               else if ((hcaaver == 2) && (revid == 0x10))
+                       shca->hw_level = 0x22;
+               else {
+                       ehca_gen_warn("unknown hardware version"
+                                     " - assuming default level");
+                       shca->hw_level = 0x22;
+               }
        }
        ehca_gen_dbg(" ... hardware level=%x", shca->hw_level);
 
        shca->sport[0].rate = IB_RATE_30_GBPS;
        shca->sport[1].rate = IB_RATE_30_GBPS;
 
-num_ports1:
+       shca->hca_cap = rblock->hca_cap_indicators;
+       ehca_gen_dbg(" ... HCA capabilities:");
+       for (i = 0; i < ARRAY_SIZE(hca_cap_descr); i++)
+               if (EHCA_BMASK_GET(hca_cap_descr[i].mask, shca->hca_cap))
+                       ehca_gen_dbg("   %s", hca_cap_descr[i].descr);
+
+       port = (struct hipz_query_port *) rblock;
+       h_ret = hipz_h_query_port(shca->ipz_hca_handle, 1, port);
+       if (h_ret != H_SUCCESS) {
+               ehca_gen_err("Cannot query port properties. h_ret=%lx",
+                            h_ret);
+               ret = -EPERM;
+               goto sense_attributes1;
+       }
+
+       shca->max_mtu = port->max_mtu;
+
+sense_attributes1:
        ehca_free_fw_ctrlblock(rblock);
        return ret;
 }
@@ -293,7 +341,7 @@ int ehca_init_device(struct ehca_shca *shca)
        strlcpy(shca->ib_device.name, "ehca%d", IB_DEVICE_NAME_MAX);
        shca->ib_device.owner               = THIS_MODULE;
 
-       shca->ib_device.uverbs_abi_ver      = 6;
+       shca->ib_device.uverbs_abi_ver      = 7;
        shca->ib_device.uverbs_cmd_mask     =
                (1ull << IB_USER_VERBS_CMD_GET_CONTEXT)         |
                (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE)        |
@@ -361,6 +409,20 @@ int ehca_init_device(struct ehca_shca *shca)
        /* shca->ib_device.process_mad      = ehca_process_mad;     */
        shca->ib_device.mmap                = ehca_mmap;
 
+       if (EHCA_BMASK_GET(HCA_CAP_SRQ, shca->hca_cap)) {
+               shca->ib_device.uverbs_cmd_mask |=
+                       (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
+                       (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
+                       (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
+                       (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ);
+
+               shca->ib_device.create_srq          = ehca_create_srq;
+               shca->ib_device.modify_srq          = ehca_modify_srq;
+               shca->ib_device.query_srq           = ehca_query_srq;
+               shca->ib_device.destroy_srq         = ehca_destroy_srq;
+               shca->ib_device.post_srq_recv       = ehca_post_srq_recv;
+       }
+
        return ret;
 }
 
@@ -800,14 +862,6 @@ int __init ehca_module_init(void)
 
        printk(KERN_INFO "eHCA Infiniband Device Driver "
               "(Rel.: SVNEHCA_0023)\n");
-       idr_init(&ehca_qp_idr);
-       idr_init(&ehca_cq_idr);
-       spin_lock_init(&ehca_qp_idr_lock);
-       spin_lock_init(&ehca_cq_idr_lock);
-       spin_lock_init(&hcall_lock);
-
-       INIT_LIST_HEAD(&shca_list);
-       spin_lock_init(&shca_list_lock);
 
        if ((ret = ehca_create_comp_pool())) {
                ehca_gen_err("Cannot create comp pool.");
index b5bc787..7467125 100644 (file)
@@ -3,7 +3,9 @@
  *
  *  QP functions
  *
- *  Authors: Waleri Fomin <fomin@de.ibm.com>
+ *  Authors: Joachim Fenkes <fenkes@de.ibm.com>
+ *           Stefan Roscher <stefan.roscher@de.ibm.com>
+ *           Waleri Fomin <fomin@de.ibm.com>
  *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
  *           Reinhard Ernst <rernst@de.ibm.com>
  *           Heiko J Schick <schickhj@de.ibm.com>
@@ -234,13 +236,6 @@ static inline enum ib_qp_statetrans get_modqp_statetrans(int ib_fromstate,
        return index;
 }
 
-enum ehca_service_type {
-       ST_RC = 0,
-       ST_UC = 1,
-       ST_RD = 2,
-       ST_UD = 3
-};
-
 /*
  * ibqptype2servicetype returns hcp service type corresponding to given
  * ib qp type used by create_qp()
@@ -268,15 +263,34 @@ static inline int ibqptype2servicetype(enum ib_qp_type ibqptype)
 }
 
 /*
- * init_qp_queues initializes/constructs r/squeue and registers queue pages.
+ * init userspace queue info from ipz_queue data
  */
-static inline int init_qp_queues(struct ehca_shca *shca,
-                                struct ehca_qp *my_qp,
-                                int nr_sq_pages,
-                                int nr_rq_pages,
-                                int swqe_size,
-                                int rwqe_size,
-                                int nr_send_sges, int nr_receive_sges)
+static inline void queue2resp(struct ipzu_queue_resp *resp,
+                             struct ipz_queue *queue)
+{
+       resp->qe_size = queue->qe_size;
+       resp->act_nr_of_sg = queue->act_nr_of_sg;
+       resp->queue_length = queue->queue_length;
+       resp->pagesize = queue->pagesize;
+       resp->toggle_state = queue->toggle_state;
+}
+
+static inline int ll_qp_msg_size(int nr_sge)
+{
+       return 128 << nr_sge;
+}
+
+/*
+ * init_qp_queue initializes/constructs r/squeue and registers queue pages.
+ */
+static inline int init_qp_queue(struct ehca_shca *shca,
+                               struct ehca_qp *my_qp,
+                               struct ipz_queue *queue,
+                               int q_type,
+                               u64 expected_hret,
+                               int nr_q_pages,
+                               int wqe_size,
+                               int nr_sges)
 {
        int ret, cnt, ipz_rc;
        void *vpage;
@@ -284,127 +298,93 @@ static inline int init_qp_queues(struct ehca_shca *shca,
        struct ib_device *ib_dev = &shca->ib_device;
        struct ipz_adapter_handle ipz_hca_handle = shca->ipz_hca_handle;
 
-       ipz_rc = ipz_queue_ctor(&my_qp->ipz_squeue,
-                               nr_sq_pages,
-                               EHCA_PAGESIZE, swqe_size, nr_send_sges);
+       if (!nr_q_pages)
+               return 0;
+
+       ipz_rc = ipz_queue_ctor(queue, nr_q_pages, EHCA_PAGESIZE,
+                               wqe_size, nr_sges);
        if (!ipz_rc) {
-               ehca_err(ib_dev,"Cannot allocate page for squeue. ipz_rc=%x",
+               ehca_err(ib_dev, "Cannot allocate page for queue. ipz_rc=%x",
                         ipz_rc);
                return -EBUSY;
        }
 
-       ipz_rc = ipz_queue_ctor(&my_qp->ipz_rqueue,
-                               nr_rq_pages,
-                               EHCA_PAGESIZE, rwqe_size, nr_receive_sges);
-       if (!ipz_rc) {
-               ehca_err(ib_dev, "Cannot allocate page for rqueue. ipz_rc=%x",
-                        ipz_rc);
-               ret = -EBUSY;
-               goto init_qp_queues0;
-       }
-       /* register SQ pages */
-       for (cnt = 0; cnt < nr_sq_pages; cnt++) {
-               vpage = ipz_qpageit_get_inc(&my_qp->ipz_squeue);
+       /* register queue pages */
+       for (cnt = 0; cnt < nr_q_pages; cnt++) {
+               vpage = ipz_qpageit_get_inc(queue);
                if (!vpage) {
-                       ehca_err(ib_dev, "SQ ipz_qpageit_get_inc() "
+                       ehca_err(ib_dev, "ipz_qpageit_get_inc() "
                                 "failed p_vpage= %p", vpage);
                        ret = -EINVAL;
-                       goto init_qp_queues1;
+                       goto init_qp_queue1;
                }
                rpage = virt_to_abs(vpage);
 
                h_ret = hipz_h_register_rpage_qp(ipz_hca_handle,
                                                 my_qp->ipz_qp_handle,
-                                                &my_qp->pf, 0, 0,
+                                                NULL, 0, q_type,
                                                 rpage, 1,
                                                 my_qp->galpas.kernel);
-               if (h_ret < H_SUCCESS) {
-                       ehca_err(ib_dev, "SQ hipz_qp_register_rpage()"
-                                " failed rc=%lx", h_ret);
-                       ret = ehca2ib_return_code(h_ret);
-                       goto init_qp_queues1;
-               }
-       }
-
-       ipz_qeit_reset(&my_qp->ipz_squeue);
-
-       /* register RQ pages */
-       for (cnt = 0; cnt < nr_rq_pages; cnt++) {
-               vpage = ipz_qpageit_get_inc(&my_qp->ipz_rqueue);
-               if (!vpage) {
-                       ehca_err(ib_dev, "RQ ipz_qpageit_get_inc() "
-                                "failed p_vpage = %p", vpage);
-                       ret = -EINVAL;
-                       goto init_qp_queues1;
-               }
-
-               rpage = virt_to_abs(vpage);
-
-               h_ret = hipz_h_register_rpage_qp(ipz_hca_handle,
-                                                my_qp->ipz_qp_handle,
-                                                &my_qp->pf, 0, 1,
-                                                rpage, 1,my_qp->galpas.kernel);
-               if (h_ret < H_SUCCESS) {
-                       ehca_err(ib_dev, "RQ hipz_qp_register_rpage() failed "
-                                "rc=%lx", h_ret);
-                       ret = ehca2ib_return_code(h_ret);
-                       goto init_qp_queues1;
-               }
-               if (cnt == (nr_rq_pages - 1)) { /* last page! */
-                       if (h_ret != H_SUCCESS) {
-                               ehca_err(ib_dev, "RQ hipz_qp_register_rpage() "
+               if (cnt == (nr_q_pages - 1)) {  /* last page! */
+                       if (h_ret != expected_hret) {
+                               ehca_err(ib_dev, "hipz_qp_register_rpage() "
                                         "h_ret= %lx ", h_ret);
                                ret = ehca2ib_return_code(h_ret);
-                               goto init_qp_queues1;
+                               goto init_qp_queue1;
                        }
                        vpage = ipz_qpageit_get_inc(&my_qp->ipz_rqueue);
                        if (vpage) {
                                ehca_err(ib_dev, "ipz_qpageit_get_inc() "
                                         "should not succeed vpage=%p", vpage);
                                ret = -EINVAL;
-                               goto init_qp_queues1;
+                               goto init_qp_queue1;
                        }
                } else {
                        if (h_ret != H_PAGE_REGISTERED) {
-                               ehca_err(ib_dev, "RQ hipz_qp_register_rpage() "
+                               ehca_err(ib_dev, "hipz_qp_register_rpage() "
                                         "h_ret= %lx ", h_ret);
                                ret = ehca2ib_return_code(h_ret);
-                               goto init_qp_queues1;
+                               goto init_qp_queue1;
                        }
                }
        }
 
-       ipz_qeit_reset(&my_qp->ipz_rqueue);
+       ipz_qeit_reset(queue);
 
        return 0;
 
-init_qp_queues1:
-       ipz_queue_dtor(&my_qp->ipz_rqueue);
-init_qp_queues0:
-       ipz_queue_dtor(&my_qp->ipz_squeue);
+init_qp_queue1:
+       ipz_queue_dtor(queue);
        return ret;
 }
 
-struct ib_qp *ehca_create_qp(struct ib_pd *pd,
-                            struct ib_qp_init_attr *init_attr,
-                            struct ib_udata *udata)
+/*
+ * Create an ib_qp struct that is either a QP or an SRQ, depending on
+ * the value of the is_srq parameter. If init_attr and srq_init_attr share
+ * fields, the field out of init_attr is used.
+ */
+struct ehca_qp *internal_create_qp(struct ib_pd *pd,
+                                  struct ib_qp_init_attr *init_attr,
+                                  struct ib_srq_init_attr *srq_init_attr,
+                                  struct ib_udata *udata, int is_srq)
 {
-       static int da_rc_msg_size[]={ 128, 256, 512, 1024, 2048, 4096 };
-       static int da_ud_sq_msg_size[]={ 128, 384, 896, 1920, 3968 };
        struct ehca_qp *my_qp;
        struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd);
        struct ehca_shca *shca = container_of(pd->device, struct ehca_shca,
                                              ib_device);
        struct ib_ucontext *context = NULL;
        u64 h_ret;
-       int max_send_sge, max_recv_sge, ret;
+       int is_llqp = 0, has_srq = 0;
+       int qp_type, max_send_sge, max_recv_sge, ret;
 
        /* h_call's out parameters */
        struct ehca_alloc_qp_parms parms;
-       u32 swqe_size = 0, rwqe_size = 0;
-       u8 daqp_completion, isdaqp;
+       u32 swqe_size = 0, rwqe_size = 0, ib_qp_num;
        unsigned long flags;
 
+       memset(&parms, 0, sizeof(parms));
+       qp_type = init_attr->qp_type;
+
        if (init_attr->sq_sig_type != IB_SIGNAL_REQ_WR &&
                init_attr->sq_sig_type != IB_SIGNAL_ALL_WR) {
                ehca_err(pd->device, "init_attr->sg_sig_type=%x not allowed",
@@ -412,41 +392,98 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd,
                return ERR_PTR(-EINVAL);
        }
 
-       /* save daqp completion bits */
-       daqp_completion = init_attr->qp_type & 0x60;
-       /* save daqp bit */
-       isdaqp = (init_attr->qp_type & 0x80) ? 1 : 0;
-       init_attr->qp_type = init_attr->qp_type & 0x1F;
+       /* save LLQP info */
+       if (qp_type & 0x80) {
+               is_llqp = 1;
+               parms.ext_type = EQPT_LLQP;
+               parms.ll_comp_flags = qp_type & LLQP_COMP_MASK;
+       }
+       qp_type &= 0x1F;
+       init_attr->qp_type &= 0x1F;
 
-       if (init_attr->qp_type != IB_QPT_UD &&
-           init_attr->qp_type != IB_QPT_SMI &&
-           init_attr->qp_type != IB_QPT_GSI &&
-           init_attr->qp_type != IB_QPT_UC &&
-           init_attr->qp_type != IB_QPT_RC) {
-               ehca_err(pd->device, "wrong QP Type=%x", init_attr->qp_type);
-               return ERR_PTR(-EINVAL);
+       /* handle SRQ base QPs */
+       if (init_attr->srq) {
+               struct ehca_qp *my_srq =
+                       container_of(init_attr->srq, struct ehca_qp, ib_srq);
+
+               has_srq = 1;
+               parms.ext_type = EQPT_SRQBASE;
+               parms.srq_qpn = my_srq->real_qp_num;
+               parms.srq_token = my_srq->token;
        }
-       if ((init_attr->qp_type != IB_QPT_RC && init_attr->qp_type != IB_QPT_UD)
-           && isdaqp) {
-               ehca_err(pd->device, "unsupported LL QP Type=%x",
-                        init_attr->qp_type);
+
+       if (is_llqp && has_srq) {
+               ehca_err(pd->device, "LLQPs can't have an SRQ");
                return ERR_PTR(-EINVAL);
-       } else if (init_attr->qp_type == IB_QPT_RC && isdaqp &&
-                  (init_attr->cap.max_send_wr > 255 ||
-                   init_attr->cap.max_recv_wr > 255 )) {
-                      ehca_err(pd->device, "Invalid Number of max_sq_wr =%x "
-                               "or max_rq_wr=%x for QP Type=%x",
-                               init_attr->cap.max_send_wr,
-                               init_attr->cap.max_recv_wr,init_attr->qp_type);
-                      return ERR_PTR(-EINVAL);
-       } else if (init_attr->qp_type == IB_QPT_UD && isdaqp &&
-                 init_attr->cap.max_send_wr > 255) {
-               ehca_err(pd->device,
-                        "Invalid Number of max_send_wr=%x for UD QP_TYPE=%x",
-                        init_attr->cap.max_send_wr, init_attr->qp_type);
+       }
+
+       /* handle SRQs */
+       if (is_srq) {
+               parms.ext_type = EQPT_SRQ;
+               parms.srq_limit = srq_init_attr->attr.srq_limit;
+               if (init_attr->cap.max_recv_sge > 3) {
+                       ehca_err(pd->device, "no more than three SGEs "
+                                "supported for SRQ  pd=%p  max_sge=%x",
+                                pd, init_attr->cap.max_recv_sge);
+                       return ERR_PTR(-EINVAL);
+               }
+       }
+
+       /* check QP type */
+       if (qp_type != IB_QPT_UD &&
+           qp_type != IB_QPT_UC &&
+           qp_type != IB_QPT_RC &&
+           qp_type != IB_QPT_SMI &&
+           qp_type != IB_QPT_GSI) {
+               ehca_err(pd->device, "wrong QP Type=%x", qp_type);
                return ERR_PTR(-EINVAL);
        }
 
+       if (is_llqp) {
+               switch (qp_type) {
+               case IB_QPT_RC:
+                       if ((init_attr->cap.max_send_wr > 255) ||
+                           (init_attr->cap.max_recv_wr > 255)) {
+                               ehca_err(pd->device,
+                                        "Invalid Number of max_sq_wr=%x "
+                                        "or max_rq_wr=%x for RC LLQP",
+                                        init_attr->cap.max_send_wr,
+                                        init_attr->cap.max_recv_wr);
+                               return ERR_PTR(-EINVAL);
+                       }
+                       break;
+               case IB_QPT_UD:
+                       if (!EHCA_BMASK_GET(HCA_CAP_UD_LL_QP, shca->hca_cap)) {
+                               ehca_err(pd->device, "UD LLQP not supported "
+                                        "by this adapter");
+                               return ERR_PTR(-ENOSYS);
+                       }
+                       if (!(init_attr->cap.max_send_sge <= 5
+                           && init_attr->cap.max_send_sge >= 1
+                           && init_attr->cap.max_recv_sge <= 5
+                           && init_attr->cap.max_recv_sge >= 1)) {
+                               ehca_err(pd->device,
+                                        "Invalid Number of max_send_sge=%x "
+                                        "or max_recv_sge=%x for UD LLQP",
+                                        init_attr->cap.max_send_sge,
+                                        init_attr->cap.max_recv_sge);
+                               return ERR_PTR(-EINVAL);
+                       } else if (init_attr->cap.max_send_wr > 255) {
+                               ehca_err(pd->device,
+                                        "Invalid Number of "
+                                        "ax_send_wr=%x for UD QP_TYPE=%x",
+                                        init_attr->cap.max_send_wr, qp_type);
+                               return ERR_PTR(-EINVAL);
+                       }
+                       break;
+               default:
+                       ehca_err(pd->device, "unsupported LL QP Type=%x",
+                                qp_type);
+                       return ERR_PTR(-EINVAL);
+                       break;
+               }
+       }
+
        if (pd->uobject && udata)
                context = pd->uobject->context;
 
@@ -456,16 +493,17 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd,
                return ERR_PTR(-ENOMEM);
        }
 
-       memset (&parms, 0, sizeof(struct ehca_alloc_qp_parms));
        spin_lock_init(&my_qp->spinlock_s);
        spin_lock_init(&my_qp->spinlock_r);
+       my_qp->qp_type = qp_type;
+       my_qp->ext_type = parms.ext_type;
 
-       my_qp->recv_cq =
-               container_of(init_attr->recv_cq, struct ehca_cq, ib_cq);
-       my_qp->send_cq =
-               container_of(init_attr->send_cq, struct ehca_cq, ib_cq);
-
-       my_qp->init_attr = *init_attr;
+       if (init_attr->recv_cq)
+               my_qp->recv_cq =
+                       container_of(init_attr->recv_cq, struct ehca_cq, ib_cq);
+       if (init_attr->send_cq)
+               my_qp->send_cq =
+                       container_of(init_attr->send_cq, struct ehca_cq, ib_cq);
 
        do {
                if (!idr_pre_get(&ehca_qp_idr, GFP_KERNEL)) {
@@ -474,9 +512,9 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd,
                        goto create_qp_exit0;
                }
 
-               spin_lock_irqsave(&ehca_qp_idr_lock, flags);
+               write_lock_irqsave(&ehca_qp_idr_lock, flags);
                ret = idr_get_new(&ehca_qp_idr, my_qp, &my_qp->token);
-               spin_unlock_irqrestore(&ehca_qp_idr_lock, flags);
+               write_unlock_irqrestore(&ehca_qp_idr_lock, flags);
 
        } while (ret == -EAGAIN);
 
@@ -486,10 +524,10 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd,
                goto create_qp_exit0;
        }
 
-       parms.servicetype = ibqptype2servicetype(init_attr->qp_type);
+       parms.servicetype = ibqptype2servicetype(qp_type);
        if (parms.servicetype < 0) {
                ret = -EINVAL;
-               ehca_err(pd->device, "Invalid qp_type=%x", init_attr->qp_type);
+               ehca_err(pd->device, "Invalid qp_type=%x", qp_type);
                goto create_qp_exit0;
        }
 
@@ -501,21 +539,25 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd,
        /* UD_AV CIRCUMVENTION */
        max_send_sge = init_attr->cap.max_send_sge;
        max_recv_sge = init_attr->cap.max_recv_sge;
-       if (IB_QPT_UD == init_attr->qp_type ||
-           IB_QPT_GSI == init_attr->qp_type ||
-           IB_QPT_SMI == init_attr->qp_type) {
+       if (parms.servicetype == ST_UD && !is_llqp) {
                max_send_sge += 2;
                max_recv_sge += 2;
        }
 
-       parms.ipz_eq_handle = shca->eq.ipz_eq_handle;
-       parms.daqp_ctrl = isdaqp | daqp_completion;
+       parms.token = my_qp->token;
+       parms.eq_handle = shca->eq.ipz_eq_handle;
        parms.pd = my_pd->fw_pd;
-       parms.max_recv_sge = max_recv_sge;
-       parms.max_send_sge = max_send_sge;
+       if (my_qp->send_cq)
+               parms.send_cq_handle = my_qp->send_cq->ipz_cq_handle;
+       if (my_qp->recv_cq)
+               parms.recv_cq_handle = my_qp->recv_cq->ipz_cq_handle;
 
-       h_ret = hipz_h_alloc_resource_qp(shca->ipz_hca_handle, my_qp, &parms);
+       parms.max_send_wr = init_attr->cap.max_send_wr;
+       parms.max_recv_wr = init_attr->cap.max_recv_wr;
+       parms.max_send_sge = max_send_sge;
+       parms.max_recv_sge = max_recv_sge;
 
+       h_ret = hipz_h_alloc_resource_qp(shca->ipz_hca_handle, &parms);
        if (h_ret != H_SUCCESS) {
                ehca_err(pd->device, "h_alloc_resource_qp() failed h_ret=%lx",
                         h_ret);
@@ -523,18 +565,20 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd,
                goto create_qp_exit1;
        }
 
-       my_qp->ib_qp.qp_num = my_qp->real_qp_num;
+       ib_qp_num = my_qp->real_qp_num = parms.real_qp_num;
+       my_qp->ipz_qp_handle = parms.qp_handle;
+       my_qp->galpas = parms.galpas;
 
-       switch (init_attr->qp_type) {
+       switch (qp_type) {
        case IB_QPT_RC:
-               if (isdaqp == 0) {
+               if (!is_llqp) {
                        swqe_size = offsetof(struct ehca_wqe, u.nud.sg_list[
                                             (parms.act_nr_send_sges)]);
                        rwqe_size = offsetof(struct ehca_wqe, u.nud.sg_list[
                                             (parms.act_nr_recv_sges)]);
-               } else { /* for daqp we need to use msg size, not wqe size */
-                       swqe_size = da_rc_msg_size[max_send_sge];
-                       rwqe_size = da_rc_msg_size[max_recv_sge];
+               } else { /* for LLQP we need to use msg size, not wqe size */
+                       swqe_size = ll_qp_msg_size(max_send_sge);
+                       rwqe_size = ll_qp_msg_size(max_recv_sge);
                        parms.act_nr_send_sges = 1;
                        parms.act_nr_recv_sges = 1;
                }
@@ -549,29 +593,27 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd,
        case IB_QPT_UD:
        case IB_QPT_GSI:
        case IB_QPT_SMI:
-               /* UD circumvention */
-               parms.act_nr_recv_sges -= 2;
-               parms.act_nr_send_sges -= 2;
-               if (isdaqp) {
-                       swqe_size = da_ud_sq_msg_size[max_send_sge];
-                       rwqe_size = da_rc_msg_size[max_recv_sge];
+               if (is_llqp) {
+                       swqe_size = ll_qp_msg_size(parms.act_nr_send_sges);
+                       rwqe_size = ll_qp_msg_size(parms.act_nr_recv_sges);
                        parms.act_nr_send_sges = 1;
                        parms.act_nr_recv_sges = 1;
                } else {
+                       /* UD circumvention */
+                       parms.act_nr_send_sges -= 2;
+                       parms.act_nr_recv_sges -= 2;
                        swqe_size = offsetof(struct ehca_wqe,
                                             u.ud_av.sg_list[parms.act_nr_send_sges]);
                        rwqe_size = offsetof(struct ehca_wqe,
                                             u.ud_av.sg_list[parms.act_nr_recv_sges]);
                }
 
-               if (IB_QPT_GSI == init_attr->qp_type ||
-                   IB_QPT_SMI == init_attr->qp_type) {
+               if (IB_QPT_GSI == qp_type || IB_QPT_SMI == qp_type) {
                        parms.act_nr_send_wqes = init_attr->cap.max_send_wr;
                        parms.act_nr_recv_wqes = init_attr->cap.max_recv_wr;
                        parms.act_nr_send_sges = init_attr->cap.max_send_sge;
                        parms.act_nr_recv_sges = init_attr->cap.max_recv_sge;
-                       my_qp->ib_qp.qp_num =
-                               (init_attr->qp_type == IB_QPT_SMI) ? 0 : 1;
+                       ib_qp_num = (qp_type == IB_QPT_SMI) ? 0 : 1;
                }
 
                break;
@@ -580,108 +622,234 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd,
                break;
        }
 
-       /* initializes r/squeue and registers queue pages */
-       ret = init_qp_queues(shca, my_qp,
-                            parms.nr_sq_pages, parms.nr_rq_pages,
-                            swqe_size, rwqe_size,
-                            parms.act_nr_send_sges, parms.act_nr_recv_sges);
-       if (ret) {
-               ehca_err(pd->device,
-                        "Couldn't initialize r/squeue and pages ret=%x", ret);
-               goto create_qp_exit2;
+       /* initialize r/squeue and register queue pages */
+       if (HAS_SQ(my_qp)) {
+               ret = init_qp_queue(
+                       shca, my_qp, &my_qp->ipz_squeue, 0,
+                       HAS_RQ(my_qp) ? H_PAGE_REGISTERED : H_SUCCESS,
+                       parms.nr_sq_pages, swqe_size,
+                       parms.act_nr_send_sges);
+               if (ret) {
+                       ehca_err(pd->device, "Couldn't initialize squeue "
+                                "and pages  ret=%x", ret);
+                       goto create_qp_exit2;
+               }
        }
 
-       my_qp->ib_qp.pd = &my_pd->ib_pd;
-       my_qp->ib_qp.device = my_pd->ib_pd.device;
+       if (HAS_RQ(my_qp)) {
+               ret = init_qp_queue(
+                       shca, my_qp, &my_qp->ipz_rqueue, 1,
+                       H_SUCCESS, parms.nr_rq_pages, rwqe_size,
+                       parms.act_nr_recv_sges);
+               if (ret) {
+                       ehca_err(pd->device, "Couldn't initialize rqueue "
+                                "and pages ret=%x", ret);
+                       goto create_qp_exit3;
+               }
+       }
 
-       my_qp->ib_qp.recv_cq = init_attr->recv_cq;
-       my_qp->ib_qp.send_cq = init_attr->send_cq;
+       if (is_srq) {
+               my_qp->ib_srq.pd = &my_pd->ib_pd;
+               my_qp->ib_srq.device = my_pd->ib_pd.device;
 
-       my_qp->ib_qp.qp_type = init_attr->qp_type;
+               my_qp->ib_srq.srq_context = init_attr->qp_context;
+               my_qp->ib_srq.event_handler = init_attr->event_handler;
+       } else {
+               my_qp->ib_qp.qp_num = ib_qp_num;
+               my_qp->ib_qp.pd = &my_pd->ib_pd;
+               my_qp->ib_qp.device = my_pd->ib_pd.device;
+
+               my_qp->ib_qp.recv_cq = init_attr->recv_cq;
+               my_qp->ib_qp.send_cq = init_attr->send_cq;
 
-       my_qp->qp_type = init_attr->qp_type;
-       my_qp->ib_qp.srq = init_attr->srq;
+               my_qp->ib_qp.qp_type = qp_type;
+               my_qp->ib_qp.srq = init_attr->srq;
 
-       my_qp->ib_qp.qp_context = init_attr->qp_context;
-       my_qp->ib_qp.event_handler = init_attr->event_handler;
+               my_qp->ib_qp.qp_context = init_attr->qp_context;
+               my_qp->ib_qp.event_handler = init_attr->event_handler;
+       }
 
        init_attr->cap.max_inline_data = 0; /* not supported yet */
        init_attr->cap.max_recv_sge = parms.act_nr_recv_sges;
        init_attr->cap.max_recv_wr = parms.act_nr_recv_wqes;
        init_attr->cap.max_send_sge = parms.act_nr_send_sges;
        init_attr->cap.max_send_wr = parms.act_nr_send_wqes;
+       my_qp->init_attr = *init_attr;
 
        /* NOTE: define_apq0() not supported yet */
-       if (init_attr->qp_type == IB_QPT_GSI) {
+       if (qp_type == IB_QPT_GSI) {
                h_ret = ehca_define_sqp(shca, my_qp, init_attr);
                if (h_ret != H_SUCCESS) {
                        ehca_err(pd->device, "ehca_define_sqp() failed rc=%lx",
                                 h_ret);
                        ret = ehca2ib_return_code(h_ret);
-                       goto create_qp_exit3;
+                       goto create_qp_exit4;
                }
        }
-       if (init_attr->send_cq) {
-               struct ehca_cq *cq = container_of(init_attr->send_cq,
-                                                 struct ehca_cq, ib_cq);
-               ret = ehca_cq_assign_qp(cq, my_qp);
+
+       if (my_qp->send_cq) {
+               ret = ehca_cq_assign_qp(my_qp->send_cq, my_qp);
                if (ret) {
                        ehca_err(pd->device, "Couldn't assign qp to send_cq ret=%x",
                                 ret);
-                       goto create_qp_exit3;
+                       goto create_qp_exit4;
                }
-               my_qp->send_cq = cq;
        }
+
        /* copy queues, galpa data to user space */
        if (context && udata) {
-               struct ipz_queue *ipz_rqueue = &my_qp->ipz_rqueue;
-               struct ipz_queue *ipz_squeue = &my_qp->ipz_squeue;
                struct ehca_create_qp_resp resp;
                memset(&resp, 0, sizeof(resp));
 
                resp.qp_num = my_qp->real_qp_num;
                resp.token = my_qp->token;
                resp.qp_type = my_qp->qp_type;
+               resp.ext_type = my_qp->ext_type;
                resp.qkey = my_qp->qkey;
                resp.real_qp_num = my_qp->real_qp_num;
-               /* rqueue properties */
-               resp.ipz_rqueue.qe_size = ipz_rqueue->qe_size;
-               resp.ipz_rqueue.act_nr_of_sg = ipz_rqueue->act_nr_of_sg;
-               resp.ipz_rqueue.queue_length = ipz_rqueue->queue_length;
-               resp.ipz_rqueue.pagesize = ipz_rqueue->pagesize;
-               resp.ipz_rqueue.toggle_state = ipz_rqueue->toggle_state;
-               /* squeue properties */
-               resp.ipz_squeue.qe_size = ipz_squeue->qe_size;
-               resp.ipz_squeue.act_nr_of_sg = ipz_squeue->act_nr_of_sg;
-               resp.ipz_squeue.queue_length = ipz_squeue->queue_length;
-               resp.ipz_squeue.pagesize = ipz_squeue->pagesize;
-               resp.ipz_squeue.toggle_state = ipz_squeue->toggle_state;
+               if (HAS_SQ(my_qp))
+                       queue2resp(&resp.ipz_squeue, &my_qp->ipz_squeue);
+               if (HAS_RQ(my_qp))
+                       queue2resp(&resp.ipz_rqueue, &my_qp->ipz_rqueue);
+
                if (ib_copy_to_udata(udata, &resp, sizeof resp)) {
                        ehca_err(pd->device, "Copy to udata failed");
                        ret = -EINVAL;
-                       goto create_qp_exit3;
+                       goto create_qp_exit4;
                }
        }
 
-       return &my_qp->ib_qp;
+       return my_qp;
+
+create_qp_exit4:
+       if (HAS_RQ(my_qp))
+               ipz_queue_dtor(&my_qp->ipz_rqueue);
 
 create_qp_exit3:
-       ipz_queue_dtor(&my_qp->ipz_rqueue);
-       ipz_queue_dtor(&my_qp->ipz_squeue);
+       if (HAS_SQ(my_qp))
+               ipz_queue_dtor(&my_qp->ipz_squeue);
 
 create_qp_exit2:
        hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp);
 
 create_qp_exit1:
-       spin_lock_irqsave(&ehca_qp_idr_lock, flags);
+       write_lock_irqsave(&ehca_qp_idr_lock, flags);
        idr_remove(&ehca_qp_idr, my_qp->token);
-       spin_unlock_irqrestore(&ehca_qp_idr_lock, flags);
+       write_unlock_irqrestore(&ehca_qp_idr_lock, flags);
 
 create_qp_exit0:
        kmem_cache_free(qp_cache, my_qp);
        return ERR_PTR(ret);
 }
 
+struct ib_qp *ehca_create_qp(struct ib_pd *pd,
+                            struct ib_qp_init_attr *qp_init_attr,
+                            struct ib_udata *udata)
+{
+       struct ehca_qp *ret;
+
+       ret = internal_create_qp(pd, qp_init_attr, NULL, udata, 0);
+       return IS_ERR(ret) ? (struct ib_qp *) ret : &ret->ib_qp;
+}
+
+int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp,
+                       struct ib_uobject *uobject);
+
+struct ib_srq *ehca_create_srq(struct ib_pd *pd,
+                              struct ib_srq_init_attr *srq_init_attr,
+                              struct ib_udata *udata)
+{
+       struct ib_qp_init_attr qp_init_attr;
+       struct ehca_qp *my_qp;
+       struct ib_srq *ret;
+       struct ehca_shca *shca = container_of(pd->device, struct ehca_shca,
+                                             ib_device);
+       struct hcp_modify_qp_control_block *mqpcb;
+       u64 hret, update_mask;
+
+       /* For common attributes, internal_create_qp() takes its info
+        * out of qp_init_attr, so copy all common attrs there.
+        */
+       memset(&qp_init_attr, 0, sizeof(qp_init_attr));
+       qp_init_attr.event_handler = srq_init_attr->event_handler;
+       qp_init_attr.qp_context = srq_init_attr->srq_context;
+       qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
+       qp_init_attr.qp_type = IB_QPT_RC;
+       qp_init_attr.cap.max_recv_wr = srq_init_attr->attr.max_wr;
+       qp_init_attr.cap.max_recv_sge = srq_init_attr->attr.max_sge;
+
+       my_qp = internal_create_qp(pd, &qp_init_attr, srq_init_attr, udata, 1);
+       if (IS_ERR(my_qp))
+               return (struct ib_srq *) my_qp;
+
+       /* copy back return values */
+       srq_init_attr->attr.max_wr = qp_init_attr.cap.max_recv_wr;
+       srq_init_attr->attr.max_sge = qp_init_attr.cap.max_recv_sge;
+
+       /* drive SRQ into RTR state */
+       mqpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
+       if (!mqpcb) {
+               ehca_err(pd->device, "Could not get zeroed page for mqpcb "
+                        "ehca_qp=%p qp_num=%x ", my_qp, my_qp->real_qp_num);
+               ret = ERR_PTR(-ENOMEM);
+               goto create_srq1;
+       }
+
+       mqpcb->qp_state = EHCA_QPS_INIT;
+       mqpcb->prim_phys_port = 1;
+       update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_STATE, 1);
+       hret = hipz_h_modify_qp(shca->ipz_hca_handle,
+                               my_qp->ipz_qp_handle,
+                               &my_qp->pf,
+                               update_mask,
+                               mqpcb, my_qp->galpas.kernel);
+       if (hret != H_SUCCESS) {
+               ehca_err(pd->device, "Could not modify SRQ to INIT"
+                        "ehca_qp=%p qp_num=%x hret=%lx",
+                        my_qp, my_qp->real_qp_num, hret);
+               goto create_srq2;
+       }
+
+       mqpcb->qp_enable = 1;
+       update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_ENABLE, 1);
+       hret = hipz_h_modify_qp(shca->ipz_hca_handle,
+                               my_qp->ipz_qp_handle,
+                               &my_qp->pf,
+                               update_mask,
+                               mqpcb, my_qp->galpas.kernel);
+       if (hret != H_SUCCESS) {
+               ehca_err(pd->device, "Could not enable SRQ"
+                        "ehca_qp=%p qp_num=%x hret=%lx",
+                        my_qp, my_qp->real_qp_num, hret);
+               goto create_srq2;
+       }
+
+       mqpcb->qp_state  = EHCA_QPS_RTR;
+       update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_STATE, 1);
+       hret = hipz_h_modify_qp(shca->ipz_hca_handle,
+                               my_qp->ipz_qp_handle,
+                               &my_qp->pf,
+                               update_mask,
+                               mqpcb, my_qp->galpas.kernel);
+       if (hret != H_SUCCESS) {
+               ehca_err(pd->device, "Could not modify SRQ to RTR"
+                        "ehca_qp=%p qp_num=%x hret=%lx",
+                        my_qp, my_qp->real_qp_num, hret);
+               goto create_srq2;
+       }
+
+       return &my_qp->ib_srq;
+
+create_srq2:
+       ret = ERR_PTR(ehca2ib_return_code(hret));
+       ehca_free_fw_ctrlblock(mqpcb);
+
+create_srq1:
+       internal_destroy_qp(pd->device, my_qp, my_qp->ib_srq.uobject);
+
+       return ret;
+}
+
 /*
  * prepare_sqe_rts called by internal_modify_qp() at trans sqe -> rts
  * set purge bit of bad wqe and subsequent wqes to avoid reentering sqe
@@ -765,7 +933,7 @@ static int internal_modify_qp(struct ib_qp *ibqp,
        u64 h_ret;
        int bad_wqe_cnt = 0;
        int squeue_locked = 0;
-       unsigned long spl_flags = 0;
+       unsigned long flags = 0;
 
        /* do query_qp to obtain current attr values */
        mqpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
@@ -886,6 +1054,17 @@ static int internal_modify_qp(struct ib_qp *ibqp,
                 "ehca_qp=%p qp_num=%x <VALID STATE CHANGE> qp_state_xsit=%x",
                 my_qp, ibqp->qp_num, statetrans);
 
+       /* eHCA2 rev2 and higher require the SEND_GRH_FLAG to be set
+        * in non-LL UD QPs.
+        */
+       if ((my_qp->qp_type == IB_QPT_UD) &&
+           (my_qp->ext_type != EQPT_LLQP) &&
+           (statetrans == IB_QPST_INIT2RTR) &&
+           (shca->hw_level >= 0x22)) {
+               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG, 1);
+               mqpcb->send_grh_flag = 1;
+       }
+
        /* sqe -> rts: set purge bit of bad wqe before actual trans */
        if ((my_qp->qp_type == IB_QPT_UD ||
             my_qp->qp_type == IB_QPT_GSI ||
@@ -895,7 +1074,7 @@ static int internal_modify_qp(struct ib_qp *ibqp,
                if (!ibqp->uobject) {
                        struct ehca_wqe *wqe;
                        /* lock send queue */
-                       spin_lock_irqsave(&my_qp->spinlock_s, spl_flags);
+                       spin_lock_irqsave(&my_qp->spinlock_s, flags);
                        squeue_locked = 1;
                        /* mark next free wqe */
                        wqe = (struct ehca_wqe*)
@@ -1181,7 +1360,7 @@ static int internal_modify_qp(struct ib_qp *ibqp,
 
 modify_qp_exit2:
        if (squeue_locked) { /* this means: sqe -> rts */
-               spin_unlock_irqrestore(&my_qp->spinlock_s, spl_flags);
+               spin_unlock_irqrestore(&my_qp->spinlock_s, flags);
                my_qp->sqerr_purgeflag = 1;
        }
 
@@ -1312,6 +1491,9 @@ int ehca_query_qp(struct ib_qp *qp,
        qp_attr->alt_port_num = qpcb->alt_phys_port;
        qp_attr->alt_timeout = qpcb->timeout_al;
 
+       qp_attr->max_dest_rd_atomic = qpcb->rdma_nr_atomic_resp_res;
+       qp_attr->max_rd_atomic = qpcb->rdma_atomic_outst_dest_qp;
+
        /* primary av */
        qp_attr->ah_attr.sl = qpcb->service_level;
 
@@ -1367,53 +1549,170 @@ query_qp_exit1:
        return ret;
 }
 
-int ehca_destroy_qp(struct ib_qp *ibqp)
+int ehca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
+                   enum ib_srq_attr_mask attr_mask, struct ib_udata *udata)
 {
-       struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp);
-       struct ehca_shca *shca = container_of(ibqp->device, struct ehca_shca,
+       struct ehca_qp *my_qp =
+               container_of(ibsrq, struct ehca_qp, ib_srq);
+       struct ehca_pd *my_pd =
+               container_of(ibsrq->pd, struct ehca_pd, ib_pd);
+       struct ehca_shca *shca =
+               container_of(ibsrq->pd->device, struct ehca_shca, ib_device);
+       struct hcp_modify_qp_control_block *mqpcb;
+       u64 update_mask;
+       u64 h_ret;
+       int ret = 0;
+
+       u32 cur_pid = current->tgid;
+       if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
+           my_pd->ownpid != cur_pid) {
+               ehca_err(ibsrq->pd->device, "Invalid caller pid=%x ownpid=%x",
+                        cur_pid, my_pd->ownpid);
+               return -EINVAL;
+       }
+
+       mqpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
+       if (!mqpcb) {
+               ehca_err(ibsrq->device, "Could not get zeroed page for mqpcb "
+                        "ehca_qp=%p qp_num=%x ", my_qp, my_qp->real_qp_num);
+               return -ENOMEM;
+       }
+
+       update_mask = 0;
+       if (attr_mask & IB_SRQ_LIMIT) {
+               attr_mask &= ~IB_SRQ_LIMIT;
+               update_mask |=
+                       EHCA_BMASK_SET(MQPCB_MASK_CURR_SRQ_LIMIT, 1)
+                       | EHCA_BMASK_SET(MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG, 1);
+               mqpcb->curr_srq_limit =
+                       EHCA_BMASK_SET(MQPCB_CURR_SRQ_LIMIT, attr->srq_limit);
+               mqpcb->qp_aff_asyn_ev_log_reg =
+                       EHCA_BMASK_SET(QPX_AAELOG_RESET_SRQ_LIMIT, 1);
+       }
+
+       /* by now, all bits in attr_mask should have been cleared */
+       if (attr_mask) {
+               ehca_err(ibsrq->device, "invalid attribute mask bits set  "
+                        "attr_mask=%x", attr_mask);
+               ret = -EINVAL;
+               goto modify_srq_exit0;
+       }
+
+       if (ehca_debug_level)
+               ehca_dmp(mqpcb, 4*70, "qp_num=%x", my_qp->real_qp_num);
+
+       h_ret = hipz_h_modify_qp(shca->ipz_hca_handle, my_qp->ipz_qp_handle,
+                                NULL, update_mask, mqpcb,
+                                my_qp->galpas.kernel);
+
+       if (h_ret != H_SUCCESS) {
+               ret = ehca2ib_return_code(h_ret);
+               ehca_err(ibsrq->device, "hipz_h_modify_qp() failed rc=%lx "
+                        "ehca_qp=%p qp_num=%x",
+                        h_ret, my_qp, my_qp->real_qp_num);
+       }
+
+modify_srq_exit0:
+       ehca_free_fw_ctrlblock(mqpcb);
+
+       return ret;
+}
+
+int ehca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr)
+{
+       struct ehca_qp *my_qp = container_of(srq, struct ehca_qp, ib_srq);
+       struct ehca_pd *my_pd = container_of(srq->pd, struct ehca_pd, ib_pd);
+       struct ehca_shca *shca = container_of(srq->device, struct ehca_shca,
                                              ib_device);
+       struct ipz_adapter_handle adapter_handle = shca->ipz_hca_handle;
+       struct hcp_modify_qp_control_block *qpcb;
+       u32 cur_pid = current->tgid;
+       int ret = 0;
+       u64 h_ret;
+
+       if (my_pd->ib_pd.uobject  && my_pd->ib_pd.uobject->context  &&
+           my_pd->ownpid != cur_pid) {
+               ehca_err(srq->device, "Invalid caller pid=%x ownpid=%x",
+                        cur_pid, my_pd->ownpid);
+               return -EINVAL;
+       }
+
+       qpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
+       if (!qpcb) {
+               ehca_err(srq->device, "Out of memory for qpcb "
+                        "ehca_qp=%p qp_num=%x", my_qp, my_qp->real_qp_num);
+               return -ENOMEM;
+       }
+
+       h_ret = hipz_h_query_qp(adapter_handle, my_qp->ipz_qp_handle,
+                               NULL, qpcb, my_qp->galpas.kernel);
+
+       if (h_ret != H_SUCCESS) {
+               ret = ehca2ib_return_code(h_ret);
+               ehca_err(srq->device, "hipz_h_query_qp() failed "
+                        "ehca_qp=%p qp_num=%x h_ret=%lx",
+                        my_qp, my_qp->real_qp_num, h_ret);
+               goto query_srq_exit1;
+       }
+
+       srq_attr->max_wr = qpcb->max_nr_outst_recv_wr - 1;
+       srq_attr->srq_limit = EHCA_BMASK_GET(
+               MQPCB_CURR_SRQ_LIMIT, qpcb->curr_srq_limit);
+
+       if (ehca_debug_level)
+               ehca_dmp(qpcb, 4*70, "qp_num=%x", my_qp->real_qp_num);
+
+query_srq_exit1:
+       ehca_free_fw_ctrlblock(qpcb);
+
+       return ret;
+}
+
+int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp,
+                       struct ib_uobject *uobject)
+{
+       struct ehca_shca *shca = container_of(dev, struct ehca_shca, ib_device);
        struct ehca_pd *my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd,
                                             ib_pd);
        u32 cur_pid = current->tgid;
-       u32 qp_num = ibqp->qp_num;
+       u32 qp_num = my_qp->real_qp_num;
        int ret;
        u64 h_ret;
        u8 port_num;
        enum ib_qp_type qp_type;
        unsigned long flags;
 
-       if (ibqp->uobject) {
+       if (uobject) {
                if (my_qp->mm_count_galpa ||
                    my_qp->mm_count_rqueue || my_qp->mm_count_squeue) {
-                       ehca_err(ibqp->device, "Resources still referenced in "
-                                "user space qp_num=%x", ibqp->qp_num);
+                       ehca_err(dev, "Resources still referenced in "
+                                "user space qp_num=%x", qp_num);
                        return -EINVAL;
                }
                if (my_pd->ownpid != cur_pid) {
-                       ehca_err(ibqp->device, "Invalid caller pid=%x ownpid=%x",
+                       ehca_err(dev, "Invalid caller pid=%x ownpid=%x",
                                 cur_pid, my_pd->ownpid);
                        return -EINVAL;
                }
        }
 
        if (my_qp->send_cq) {
-               ret = ehca_cq_unassign_qp(my_qp->send_cq,
-                                             my_qp->real_qp_num);
+               ret = ehca_cq_unassign_qp(my_qp->send_cq, qp_num);
                if (ret) {
-                       ehca_err(ibqp->device, "Couldn't unassign qp from "
+                       ehca_err(dev, "Couldn't unassign qp from "
                                 "send_cq ret=%x qp_num=%x cq_num=%x", ret,
-                                my_qp->ib_qp.qp_num, my_qp->send_cq->cq_number);
+                                qp_num, my_qp->send_cq->cq_number);
                        return ret;
                }
        }
 
-       spin_lock_irqsave(&ehca_qp_idr_lock, flags);
+       write_lock_irqsave(&ehca_qp_idr_lock, flags);
        idr_remove(&ehca_qp_idr, my_qp->token);
-       spin_unlock_irqrestore(&ehca_qp_idr_lock, flags);
+       write_unlock_irqrestore(&ehca_qp_idr_lock, flags);
 
        h_ret = hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp);
        if (h_ret != H_SUCCESS) {
-               ehca_err(ibqp->device, "hipz_h_destroy_qp() failed rc=%lx "
+               ehca_err(dev, "hipz_h_destroy_qp() failed rc=%lx "
                         "ehca_qp=%p qp_num=%x", h_ret, my_qp, qp_num);
                return ehca2ib_return_code(h_ret);
        }
@@ -1424,7 +1723,7 @@ int ehca_destroy_qp(struct ib_qp *ibqp)
        /* no support for IB_QPT_SMI yet */
        if (qp_type == IB_QPT_GSI) {
                struct ib_event event;
-               ehca_info(ibqp->device, "device %s: port %x is inactive.",
+               ehca_info(dev, "device %s: port %x is inactive.",
                          shca->ib_device.name, port_num);
                event.device = &shca->ib_device;
                event.event = IB_EVENT_PORT_ERR;
@@ -1433,12 +1732,28 @@ int ehca_destroy_qp(struct ib_qp *ibqp)
                ib_dispatch_event(&event);
        }
 
-       ipz_queue_dtor(&my_qp->ipz_rqueue);
-       ipz_queue_dtor(&my_qp->ipz_squeue);
+       if (HAS_RQ(my_qp))
+               ipz_queue_dtor(&my_qp->ipz_rqueue);
+       if (HAS_SQ(my_qp))
+               ipz_queue_dtor(&my_qp->ipz_squeue);
        kmem_cache_free(qp_cache, my_qp);
        return 0;
 }
 
+int ehca_destroy_qp(struct ib_qp *qp)
+{
+       return internal_destroy_qp(qp->device,
+                                  container_of(qp, struct ehca_qp, ib_qp),
+                                  qp->uobject);
+}
+
+int ehca_destroy_srq(struct ib_srq *srq)
+{
+       return internal_destroy_qp(srq->device,
+                                  container_of(srq, struct ehca_qp, ib_srq),
+                                  srq->uobject);
+}
+
 int ehca_init_qp_cache(void)
 {
        qp_cache = kmem_cache_create("ehca_cache_qp",
index caec9de..61da65e 100644 (file)
@@ -3,8 +3,9 @@
  *
  *  post_send/recv, poll_cq, req_notify
  *
- *  Authors: Waleri Fomin <fomin@de.ibm.com>
- *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *  Authors: Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *           Waleri Fomin <fomin@de.ibm.com>
+ *           Joachim Fenkes <fenkes@de.ibm.com>
  *           Reinhard Ernst <rernst@de.ibm.com>
  *
  *  Copyright (c) 2005 IBM Corporation
@@ -362,10 +363,10 @@ int ehca_post_send(struct ib_qp *qp,
        struct ehca_wqe *wqe_p;
        int wqe_cnt = 0;
        int ret = 0;
-       unsigned long spl_flags;
+       unsigned long flags;
 
        /* LOCK the QUEUE */
-       spin_lock_irqsave(&my_qp->spinlock_s, spl_flags);
+       spin_lock_irqsave(&my_qp->spinlock_s, flags);
 
        /* loop processes list of send reqs */
        for (cur_send_wr = send_wr; cur_send_wr != NULL;
@@ -406,26 +407,31 @@ int ehca_post_send(struct ib_qp *qp,
        } /* eof for cur_send_wr */
 
 post_send_exit0:
-       /* UNLOCK the QUEUE */
-       spin_unlock_irqrestore(&my_qp->spinlock_s, spl_flags);
        iosync(); /* serialize GAL register access */
        hipz_update_sqa(my_qp, wqe_cnt);
+       spin_unlock_irqrestore(&my_qp->spinlock_s, flags);
        return ret;
 }
 
-int ehca_post_recv(struct ib_qp *qp,
-                  struct ib_recv_wr *recv_wr,
-                  struct ib_recv_wr **bad_recv_wr)
+static int internal_post_recv(struct ehca_qp *my_qp,
+                             struct ib_device *dev,
+                             struct ib_recv_wr *recv_wr,
+                             struct ib_recv_wr **bad_recv_wr)
 {
-       struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp);
        struct ib_recv_wr *cur_recv_wr;
        struct ehca_wqe *wqe_p;
        int wqe_cnt = 0;
        int ret = 0;
-       unsigned long spl_flags;
+       unsigned long flags;
+
+       if (unlikely(!HAS_RQ(my_qp))) {
+               ehca_err(dev, "QP has no RQ  ehca_qp=%p qp_num=%x ext_type=%d",
+                        my_qp, my_qp->real_qp_num, my_qp->ext_type);
+               return -ENODEV;
+       }
 
        /* LOCK the QUEUE */
-       spin_lock_irqsave(&my_qp->spinlock_r, spl_flags);
+       spin_lock_irqsave(&my_qp->spinlock_r, flags);
 
        /* loop processes list of send reqs */
        for (cur_recv_wr = recv_wr; cur_recv_wr != NULL;
@@ -439,8 +445,8 @@ int ehca_post_recv(struct ib_qp *qp,
                                *bad_recv_wr = cur_recv_wr;
                        if (wqe_cnt == 0) {
                                ret = -ENOMEM;
-                               ehca_err(qp->device, "Too many posted WQEs "
-                                        "qp_num=%x", qp->qp_num);
+                               ehca_err(dev, "Too many posted WQEs "
+                                        "qp_num=%x", my_qp->real_qp_num);
                        }
                        goto post_recv_exit0;
                }
@@ -455,23 +461,39 @@ int ehca_post_recv(struct ib_qp *qp,
                        *bad_recv_wr = cur_recv_wr;
                        if (wqe_cnt == 0) {
                                ret = -EINVAL;
-                               ehca_err(qp->device, "Could not write WQE "
-                                        "qp_num=%x", qp->qp_num);
+                               ehca_err(dev, "Could not write WQE "
+                                        "qp_num=%x", my_qp->real_qp_num);
                        }
                        goto post_recv_exit0;
                }
                wqe_cnt++;
-               ehca_gen_dbg("ehca_qp=%p qp_num=%x wqe_cnt=%d",
-                    my_qp, qp->qp_num, wqe_cnt);
+               ehca_dbg(dev, "ehca_qp=%p qp_num=%x wqe_cnt=%d",
+                        my_qp, my_qp->real_qp_num, wqe_cnt);
        } /* eof for cur_recv_wr */
 
 post_recv_exit0:
-       spin_unlock_irqrestore(&my_qp->spinlock_r, spl_flags);
        iosync(); /* serialize GAL register access */
        hipz_update_rqa(my_qp, wqe_cnt);
+       spin_unlock_irqrestore(&my_qp->spinlock_r, flags);
        return ret;
 }
 
+int ehca_post_recv(struct ib_qp *qp,
+                  struct ib_recv_wr *recv_wr,
+                  struct ib_recv_wr **bad_recv_wr)
+{
+       return internal_post_recv(container_of(qp, struct ehca_qp, ib_qp),
+                                 qp->device, recv_wr, bad_recv_wr);
+}
+
+int ehca_post_srq_recv(struct ib_srq *srq,
+                      struct ib_recv_wr *recv_wr,
+                      struct ib_recv_wr **bad_recv_wr)
+{
+       return internal_post_recv(container_of(srq, struct ehca_qp, ib_srq),
+                                 srq->device, recv_wr, bad_recv_wr);
+}
+
 /*
  * ib_wc_opcode table converts ehca wc opcode to ib
  * Since we use zero to indicate invalid opcode, the actual ib opcode must
@@ -494,6 +516,7 @@ static inline int ehca_poll_cq_one(struct ib_cq *cq, struct ib_wc *wc)
        int ret = 0;
        struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
        struct ehca_cqe *cqe;
+       struct ehca_qp *my_qp;
        int cqe_count = 0;
 
 poll_cq_one_read_cqe:
@@ -513,7 +536,7 @@ poll_cq_one_read_cqe:
        if (unlikely(cqe->status & WC_STATUS_PURGE_BIT)) {
                struct ehca_qp *qp=ehca_cq_get_qp(my_cq, cqe->local_qp_number);
                int purgeflag;
-               unsigned long spl_flags;
+               unsigned long flags;
                if (!qp) {
                        ehca_err(cq->device, "cq_num=%x qp_num=%x "
                                 "could not find qp -> ignore cqe",
@@ -523,9 +546,9 @@ poll_cq_one_read_cqe:
                        /* ignore this purged cqe */
                        goto poll_cq_one_read_cqe;
                }
-               spin_lock_irqsave(&qp->spinlock_s, spl_flags);
+               spin_lock_irqsave(&qp->spinlock_s, flags);
                purgeflag = qp->sqerr_purgeflag;
-               spin_unlock_irqrestore(&qp->spinlock_s, spl_flags);
+               spin_unlock_irqrestore(&qp->spinlock_s, flags);
 
                if (purgeflag) {
                        ehca_dbg(cq->device, "Got CQE with purged bit qp_num=%x "
@@ -545,7 +568,7 @@ poll_cq_one_read_cqe:
        }
 
        /* tracing cqe */
-       if (ehca_debug_level) {
+       if (unlikely(ehca_debug_level)) {
                ehca_dbg(cq->device,
                         "Received COMPLETION ehca_cq=%p cq_num=%x -----",
                         my_cq, my_cq->cq_number);
@@ -579,7 +602,11 @@ poll_cq_one_read_cqe:
        } else
                wc->status = IB_WC_SUCCESS;
 
-       wc->qp = NULL;
+       read_lock(&ehca_qp_idr_lock);
+       my_qp = idr_find(&ehca_qp_idr, cqe->qp_token);
+       wc->qp = &my_qp->ib_qp;
+       read_unlock(&ehca_qp_idr_lock);
+
        wc->byte_len = cqe->nr_bytes_transferred;
        wc->pkey_index = cqe->pkey_index;
        wc->slid = cqe->rlid;
@@ -589,7 +616,7 @@ poll_cq_one_read_cqe:
        wc->imm_data = cpu_to_be32(cqe->immediate_data);
        wc->sl = cqe->service_level;
 
-       if (wc->status != IB_WC_SUCCESS)
+       if (unlikely(wc->status != IB_WC_SUCCESS))
                ehca_dbg(cq->device,
                         "ehca_cq=%p cq_num=%x WARNING unsuccessful cqe "
                         "OPType=%x status=%x qp_num=%x src_qp=%x wr_id=%lx "
@@ -610,7 +637,7 @@ int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc)
        int nr;
        struct ib_wc *current_wc = wc;
        int ret = 0;
-       unsigned long spl_flags;
+       unsigned long flags;
 
        if (num_entries < 1) {
                ehca_err(cq->device, "Invalid num_entries=%d ehca_cq=%p "
@@ -619,14 +646,14 @@ int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc)
                goto poll_cq_exit0;
        }
 
-       spin_lock_irqsave(&my_cq->spinlock, spl_flags);
+       spin_lock_irqsave(&my_cq->spinlock, flags);
        for (nr = 0; nr < num_entries; nr++) {
                ret = ehca_poll_cq_one(cq, current_wc);
                if (ret)
                        break;
                current_wc++;
        } /* eof for nr */
-       spin_unlock_irqrestore(&my_cq->spinlock, spl_flags);
+       spin_unlock_irqrestore(&my_cq->spinlock, flags);
        if (ret == -EAGAIN  || !ret)
                ret = nr;
 
@@ -637,7 +664,6 @@ poll_cq_exit0:
 int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags notify_flags)
 {
        struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
-       unsigned long spl_flags;
        int ret = 0;
 
        switch (notify_flags & IB_CQ_SOLICITED_MASK) {
@@ -652,6 +678,7 @@ int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags notify_flags)
        }
 
        if (notify_flags & IB_CQ_REPORT_MISSED_EVENTS) {
+               unsigned long spl_flags;
                spin_lock_irqsave(&my_cq->spinlock, spl_flags);
                ret = ipz_qeit_is_valid(&my_cq->ipz_queue);
                spin_unlock_irqrestore(&my_cq->spinlock, spl_flags);
index 973c4b5..03b185f 100644 (file)
@@ -59,6 +59,7 @@
 #include <linux/cpu.h>
 #include <linux/device.h>
 
+#include <asm/atomic.h>
 #include <asm/abs_addr.h>
 #include <asm/ibmebus.h>
 #include <asm/io.h>
index 73db920..3031b3b 100644 (file)
@@ -253,16 +253,16 @@ int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
        u32 rsrc_type = (fileoffset >> 24) & 0xF; /* sq,rq,cmnd_window */
        u32 cur_pid = current->tgid;
        u32 ret;
-       unsigned long flags;
        struct ehca_cq *cq;
        struct ehca_qp *qp;
        struct ehca_pd *pd;
+       struct ib_uobject *uobject;
 
        switch (q_type) {
        case  1: /* CQ */
-               spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+               read_lock(&ehca_cq_idr_lock);
                cq = idr_find(&ehca_cq_idr, idr_handle);
-               spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+               read_unlock(&ehca_cq_idr_lock);
 
                /* make sure this mmap really belongs to the authorized user */
                if (!cq)
@@ -288,9 +288,9 @@ int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
                break;
 
        case 2: /* QP */
-               spin_lock_irqsave(&ehca_qp_idr_lock, flags);
+               read_lock(&ehca_qp_idr_lock);
                qp = idr_find(&ehca_qp_idr, idr_handle);
-               spin_unlock_irqrestore(&ehca_qp_idr_lock, flags);
+               read_unlock(&ehca_qp_idr_lock);
 
                /* make sure this mmap really belongs to the authorized user */
                if (!qp)
@@ -304,7 +304,8 @@ int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
                        return -ENOMEM;
                }
 
-               if (!qp->ib_qp.uobject || qp->ib_qp.uobject->context != context)
+               uobject = IS_SRQ(qp) ? qp->ib_srq.uobject : qp->ib_qp.uobject;
+               if (!uobject || uobject->context != context)
                        return -EINVAL;
 
                ret = ehca_mmap_qp(vma, qp, rsrc_type);
index 5766ae3..4776a8b 100644 (file)
@@ -5,6 +5,7 @@
  *
  *  Authors: Christoph Raisch <raisch@de.ibm.com>
  *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *           Joachim Fenkes <fenkes@de.ibm.com>
  *           Gerd Bayer <gerd.bayer@de.ibm.com>
  *           Waleri Fomin <fomin@de.ibm.com>
  *
 #define H_ALL_RES_QP_MAX_SEND_SGE       EHCA_BMASK_IBM(32, 39)
 #define H_ALL_RES_QP_MAX_RECV_SGE       EHCA_BMASK_IBM(40, 47)
 
+#define H_ALL_RES_QP_UD_AV_LKEY         EHCA_BMASK_IBM(32, 63)
+#define H_ALL_RES_QP_SRQ_QP_TOKEN       EHCA_BMASK_IBM(0, 31)
+#define H_ALL_RES_QP_SRQ_QP_HANDLE      EHCA_BMASK_IBM(0, 64)
+#define H_ALL_RES_QP_SRQ_LIMIT          EHCA_BMASK_IBM(48, 63)
+#define H_ALL_RES_QP_SRQ_QPN            EHCA_BMASK_IBM(40, 63)
+
 #define H_ALL_RES_QP_ACT_OUTST_SEND_WR  EHCA_BMASK_IBM(16, 31)
 #define H_ALL_RES_QP_ACT_OUTST_RECV_WR  EHCA_BMASK_IBM(48, 63)
 #define H_ALL_RES_QP_ACT_SEND_SGE       EHCA_BMASK_IBM(8, 15)
 #define H_MP_SHUTDOWN                   EHCA_BMASK_IBM(48, 48)
 #define H_MP_RESET_QKEY_CTR             EHCA_BMASK_IBM(49, 49)
 
-/* direct access qp controls */
-#define DAQP_CTRL_ENABLE    0x01
-#define DAQP_CTRL_SEND_COMP 0x20
-#define DAQP_CTRL_RECV_COMP 0x40
+static DEFINE_SPINLOCK(hcall_lock);
 
 static u32 get_longbusy_msecs(int longbusy_rc)
 {
@@ -155,7 +159,7 @@ static long ehca_plpar_hcall9(unsigned long opcode,
 {
        long ret;
        int i, sleep_msecs, lock_is_set = 0;
-       unsigned long flags;
+       unsigned long flags = 0;
 
        ehca_gen_dbg("opcode=%lx arg1=%lx arg2=%lx arg3=%lx arg4=%lx "
                     "arg5=%lx arg6=%lx arg7=%lx arg8=%lx arg9=%lx",
@@ -284,53 +288,53 @@ u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle,
 }
 
 u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle,
-                            struct ehca_qp *qp,
                             struct ehca_alloc_qp_parms *parms)
 {
        u64 ret;
-       u64 allocate_controls;
-       u64 max_r10_reg;
+       u64 allocate_controls, max_r10_reg, r11, r12;
        u64 outs[PLPAR_HCALL9_BUFSIZE];
-       u16 max_nr_receive_wqes = qp->init_attr.cap.max_recv_wr + 1;
-       u16 max_nr_send_wqes = qp->init_attr.cap.max_send_wr + 1;
-       int daqp_ctrl = parms->daqp_ctrl;
 
        allocate_controls =
-               EHCA_BMASK_SET(H_ALL_RES_QP_ENHANCED_OPS,
-                              (daqp_ctrl & DAQP_CTRL_ENABLE) ? 1 : 0)
+               EHCA_BMASK_SET(H_ALL_RES_QP_ENHANCED_OPS, parms->ext_type)
                | EHCA_BMASK_SET(H_ALL_RES_QP_PTE_PIN, 0)
                | EHCA_BMASK_SET(H_ALL_RES_QP_SERVICE_TYPE, parms->servicetype)
                | EHCA_BMASK_SET(H_ALL_RES_QP_SIGNALING_TYPE, parms->sigtype)
                | EHCA_BMASK_SET(H_ALL_RES_QP_LL_RQ_CQE_POSTING,
-                                (daqp_ctrl & DAQP_CTRL_RECV_COMP) ? 1 : 0)
+                                !!(parms->ll_comp_flags & LLQP_RECV_COMP))
                | EHCA_BMASK_SET(H_ALL_RES_QP_LL_SQ_CQE_POSTING,
-                                (daqp_ctrl & DAQP_CTRL_SEND_COMP) ? 1 : 0)
+                                !!(parms->ll_comp_flags & LLQP_SEND_COMP))
                | EHCA_BMASK_SET(H_ALL_RES_QP_UD_AV_LKEY_CTRL,
                                 parms->ud_av_l_key_ctl)
                | EHCA_BMASK_SET(H_ALL_RES_QP_RESOURCE_TYPE, 1);
 
        max_r10_reg =
                EHCA_BMASK_SET(H_ALL_RES_QP_MAX_OUTST_SEND_WR,
-                              max_nr_send_wqes)
+                              parms->max_send_wr + 1)
                | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_OUTST_RECV_WR,
-                                max_nr_receive_wqes)
+                                parms->max_recv_wr + 1)
                | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_SEND_SGE,
                                 parms->max_send_sge)
                | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_RECV_SGE,
                                 parms->max_recv_sge);
 
+       r11 = EHCA_BMASK_SET(H_ALL_RES_QP_SRQ_QP_TOKEN, parms->srq_token);
+
+       if (parms->ext_type == EQPT_SRQ)
+               r12 = EHCA_BMASK_SET(H_ALL_RES_QP_SRQ_LIMIT, parms->srq_limit);
+       else
+               r12 = EHCA_BMASK_SET(H_ALL_RES_QP_SRQ_QPN, parms->srq_qpn);
+
        ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
                                adapter_handle.handle,             /* r4  */
                                allocate_controls,                 /* r5  */
-                               qp->send_cq->ipz_cq_handle.handle,
-                               qp->recv_cq->ipz_cq_handle.handle,
-                               parms->ipz_eq_handle.handle,
-                               ((u64)qp->token << 32) | parms->pd.value,
-                               max_r10_reg,                       /* r10 */
-                               parms->ud_av_l_key_ctl,            /* r11 */
-                               0);
-       qp->ipz_qp_handle.handle = outs[0];
-       qp->real_qp_num = (u32)outs[1];
+                               parms->send_cq_handle.handle,
+                               parms->recv_cq_handle.handle,
+                               parms->eq_handle.handle,
+                               ((u64)parms->token << 32) | parms->pd.value,
+                               max_r10_reg, r11, r12);
+
+       parms->qp_handle.handle = outs[0];
+       parms->real_qp_num = (u32)outs[1];
        parms->act_nr_send_wqes =
                (u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_SEND_WR, outs[2]);
        parms->act_nr_recv_wqes =
@@ -345,7 +349,7 @@ u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle,
                (u32)EHCA_BMASK_GET(H_ALL_RES_QP_RQUEUE_SIZE_PAGES, outs[4]);
 
        if (ret == H_SUCCESS)
-               hcp_galpas_ctor(&qp->galpas, outs[6], outs[6]);
+               hcp_galpas_ctor(&parms->galpas, outs[6], outs[6]);
 
        if (ret == H_NOT_ENOUGH_RESOURCES)
                ehca_gen_err("Not enough resources. ret=%lx", ret);
index 2869f7d..60ce02b 100644 (file)
@@ -78,7 +78,6 @@ u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle,
  * initialize resources, create empty QPPTs (2 rings).
  */
 u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle,
-                            struct ehca_qp *qp,
                             struct ehca_alloc_qp_parms *parms);
 
 u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle,
index fad9136..dad6dea 100644 (file)
@@ -163,6 +163,7 @@ struct hipz_qptemm {
 
 #define QPX_SQADDER EHCA_BMASK_IBM(48,63)
 #define QPX_RQADDER EHCA_BMASK_IBM(48,63)
+#define QPX_AAELOG_RESET_SRQ_LIMIT EHCA_BMASK_IBM(3,3)
 
 #define QPTEMM_OFFSET(x) offsetof(struct hipz_qptemm,x)
 
@@ -360,6 +361,24 @@ struct hipz_query_hca {
        u32 max_neq;
 } __attribute__ ((packed));
 
+#define HCA_CAP_AH_PORT_NR_CHECK      EHCA_BMASK_IBM( 0,  0)
+#define HCA_CAP_ATOMIC                EHCA_BMASK_IBM( 1,  1)
+#define HCA_CAP_AUTO_PATH_MIG         EHCA_BMASK_IBM( 2,  2)
+#define HCA_CAP_BAD_P_KEY_CTR         EHCA_BMASK_IBM( 3,  3)
+#define HCA_CAP_SQD_RTS_PORT_CHANGE   EHCA_BMASK_IBM( 4,  4)
+#define HCA_CAP_CUR_QP_STATE_MOD      EHCA_BMASK_IBM( 5,  5)
+#define HCA_CAP_INIT_TYPE             EHCA_BMASK_IBM( 6,  6)
+#define HCA_CAP_PORT_ACTIVE_EVENT     EHCA_BMASK_IBM( 7,  7)
+#define HCA_CAP_Q_KEY_VIOL_CTR        EHCA_BMASK_IBM( 8,  8)
+#define HCA_CAP_WQE_RESIZE            EHCA_BMASK_IBM( 9,  9)
+#define HCA_CAP_RAW_PACKET_MCAST      EHCA_BMASK_IBM(10, 10)
+#define HCA_CAP_SHUTDOWN_PORT         EHCA_BMASK_IBM(11, 11)
+#define HCA_CAP_RC_LL_QP              EHCA_BMASK_IBM(12, 12)
+#define HCA_CAP_SRQ                   EHCA_BMASK_IBM(13, 13)
+#define HCA_CAP_UD_LL_QP              EHCA_BMASK_IBM(16, 16)
+#define HCA_CAP_RESIZE_MR             EHCA_BMASK_IBM(17, 17)
+#define HCA_CAP_MINI_QP               EHCA_BMASK_IBM(18, 18)
+
 /* query port response block */
 struct hipz_query_port {
        u32 state;
index 57f141a..007f088 100644 (file)
@@ -105,7 +105,6 @@ void *ipz_qpageit_get_inc(struct ipz_queue *queue);
  * step in struct ipz_queue, will wrap in ringbuffer
  * returns address (kv) of Queue Entry BEFORE increment
  * warning don't use in parallel with ipz_qpageit_get_inc()
- * warning unpredictable results may occur if steps>act_nr_of_queue_entries
  */
 static inline void *ipz_qeit_get_inc(struct ipz_queue *queue)
 {
@@ -120,32 +119,25 @@ static inline void *ipz_qeit_get_inc(struct ipz_queue *queue)
        return ret;
 }
 
+/*
+ * return a bool indicating whether current Queue Entry is valid
+ */
+static inline int ipz_qeit_is_valid(struct ipz_queue *queue)
+{
+       struct ehca_cqe *cqe = ipz_qeit_get(queue);
+       return ((cqe->cqe_flags >> 7) == (queue->toggle_state & 1));
+}
+
 /*
  * return current Queue Entry, increment Queue Entry iterator by one
  * step in struct ipz_queue, will wrap in ringbuffer
  * returns address (kv) of Queue Entry BEFORE increment
  * returns 0 and does not increment, if wrong valid state
  * warning don't use in parallel with ipz_qpageit_get_inc()
- * warning unpredictable results may occur if steps>act_nr_of_queue_entries
  */
 static inline void *ipz_qeit_get_inc_valid(struct ipz_queue *queue)
 {
-       struct ehca_cqe *cqe = ipz_qeit_get(queue);
-       u32 cqe_flags = cqe->cqe_flags;
-
-       if ((cqe_flags >> 7) != (queue->toggle_state & 1))
-               return NULL;
-
-       ipz_qeit_get_inc(queue);
-       return cqe;
-}
-
-static inline int ipz_qeit_is_valid(struct ipz_queue *queue)
-{
-       struct ehca_cqe *cqe = ipz_qeit_get(queue);
-       u32 cqe_flags = cqe->cqe_flags;
-
-       return cqe_flags >> 7 == (queue->toggle_state & 1);
+       return ipz_qeit_is_valid(queue) ? ipz_qeit_get_inc(queue) : NULL;
 }
 
 /*
index 90c1454..044da58 100644 (file)
@@ -1,6 +1,6 @@
 config INFINIBAND_IPATH
        tristate "QLogic InfiniPath Driver"
-       depends on (PCI_MSI || HT_IRQ) && 64BIT && INFINIBAND && NET
+       depends on (PCI_MSI || HT_IRQ) && 64BIT && NET
        ---help---
        This is a driver for QLogic InfiniPath host channel adapters,
        including InfiniBand verbs support.  This driver allows these
index 10c008f..b4b786d 100644 (file)
@@ -189,8 +189,7 @@ typedef enum _ipath_ureg {
 #define IPATH_RUNTIME_FORCE_WC_ORDER   0x4
 #define IPATH_RUNTIME_RCVHDR_COPY      0x8
 #define IPATH_RUNTIME_MASTER   0x10
-#define IPATH_RUNTIME_PBC_REWRITE 0x20
-#define IPATH_RUNTIME_LOOSE_DMA_ALIGN 0x40
+/* 0x20 and 0x40 are no longer used, but are reserved for ABI compatibility */
 
 /*
  * This structure is returned by ipath_userinit() immediately after
@@ -432,8 +431,15 @@ struct ipath_user_info {
 #define IPATH_CMD_UNUSED_1     25
 #define IPATH_CMD_UNUSED_2     26
 #define IPATH_CMD_PIOAVAILUPD  27      /* force an update of PIOAvail reg */
+#define IPATH_CMD_POLL_TYPE    28      /* set the kind of polling we want */
 
-#define IPATH_CMD_MAX          27
+#define IPATH_CMD_MAX          28
+
+/*
+ * Poll types
+ */
+#define IPATH_POLL_TYPE_URGENT  0x01
+#define IPATH_POLL_TYPE_OVERFLOW 0x02
 
 struct ipath_port_info {
        __u32 num_active;       /* number of active units */
@@ -474,6 +480,8 @@ struct ipath_cmd {
                __u16 part_key;
                /* user address of __u32 bitmask of active slaves */
                __u64 slave_mask_addr;
+               /* type of polling we want */
+               __u16 poll_type;
        } cmd;
 };
 
@@ -502,13 +510,30 @@ struct __ipath_sendpkt {
        struct ipath_iovec sps_iov[4];
 };
 
-/* Passed into diag data special file's ->write method. */
+/*
+ * diagnostics can send a packet by "writing" one of the following
+ * two structs to diag data special file
+ * The first is the legacy version for backward compatibility
+ */
 struct ipath_diag_pkt {
        __u32 unit;
        __u64 data;
        __u32 len;
 };
 
+/* The second diag_pkt struct is the expanded version that allows
+ * more control over the packet, specifically, by allowing a custom
+ * pbc (+ extra) qword, so that special modes and deliberate
+ * changes to CRCs can be used. The elements were also re-ordered
+ * for better alignment and to avoid padding issues.
+ */
+struct ipath_diag_xpkt {
+       __u64 data;
+       __u64 pbc_wd;
+       __u32 unit;
+       __u32 len;
+};
+
 /*
  * Data layout in I2C flash (for GUID, etc.)
  * All fields are little-endian binary unless otherwise stated
index 3e9241b..a6f04d2 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
  * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -90,6 +90,8 @@ void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
        wc->queue[head].sl = entry->sl;
        wc->queue[head].dlid_path_bits = entry->dlid_path_bits;
        wc->queue[head].port_num = entry->port_num;
+       /* Make sure queue entry is written before the head index. */
+       smp_wmb();
        wc->head = next;
 
        if (cq->notify == IB_CQ_NEXT_COMP ||
@@ -139,7 +141,8 @@ int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
 
                if (tail == wc->head)
                        break;
-
+               /* Make sure entry is read after head index is read. */
+               smp_rmb();
                qp = ipath_lookup_qpn(&to_idev(cq->ibcq.device)->qp_table,
                                      wc->queue[tail].qp_num);
                entry->qp = &qp->ibqp;
index 42bfbdb..19c56e6 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
index 63e8368..a698f19 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -323,13 +323,14 @@ static ssize_t ipath_diagpkt_write(struct file *fp,
 {
        u32 __iomem *piobuf;
        u32 plen, clen, pbufn;
-       struct ipath_diag_pkt dp;
+       struct ipath_diag_pkt odp;
+       struct ipath_diag_xpkt dp;
        u32 *tmpbuf = NULL;
        struct ipath_devdata *dd;
        ssize_t ret = 0;
        u64 val;
 
-       if (count < sizeof(dp)) {
+       if (count != sizeof(dp)) {
                ret = -EINVAL;
                goto bail;
        }
@@ -339,6 +340,29 @@ static ssize_t ipath_diagpkt_write(struct file *fp,
                goto bail;
        }
 
+       /*
+        * Due to padding/alignment issues (lessened with new struct)
+        * the old and new structs are the same length. We need to
+        * disambiguate them, which we can do because odp.len has never
+        * been less than the total of LRH+BTH+DETH so far, while
+        * dp.unit (same offset) unit is unlikely to get that high.
+        * Similarly, dp.data, the pointer to user at the same offset
+        * as odp.unit, is almost certainly at least one (512byte)page
+        * "above" NULL. The if-block below can be omitted if compatibility
+        * between a new driver and older diagnostic code is unimportant.
+        * compatibility the other direction (new diags, old driver) is
+        * handled in the diagnostic code, with a warning.
+        */
+       if (dp.unit >= 20 && dp.data < 512) {
+               /* very probable version mismatch. Fix it up */
+               memcpy(&odp, &dp, sizeof(odp));
+               /* We got a legacy dp, copy elements to dp */
+               dp.unit = odp.unit;
+               dp.data = odp.data;
+               dp.len = odp.len;
+               dp.pbc_wd = 0; /* Indicate we need to compute PBC wd */
+       }
+
        /* send count must be an exact number of dwords */
        if (dp.len & 3) {
                ret = -EINVAL;
@@ -371,9 +395,10 @@ static ssize_t ipath_diagpkt_write(struct file *fp,
                ret = -ENODEV;
                goto bail;
        }
+       /* Check link state, but not if we have custom PBC */
        val = dd->ipath_lastibcstat & IPATH_IBSTATE_MASK;
-       if (val != IPATH_IBSTATE_INIT && val != IPATH_IBSTATE_ARM &&
-           val != IPATH_IBSTATE_ACTIVE) {
+       if (!dp.pbc_wd && val != IPATH_IBSTATE_INIT &&
+               val != IPATH_IBSTATE_ARM && val != IPATH_IBSTATE_ACTIVE) {
                ipath_cdbg(VERBOSE, "unit %u not ready (state %llx)\n",
                           dd->ipath_unit, (unsigned long long) val);
                ret = -EINVAL;
@@ -419,9 +444,13 @@ static ssize_t ipath_diagpkt_write(struct file *fp,
                ipath_cdbg(VERBOSE, "unit %u 0x%x+1w pio%d\n",
                           dd->ipath_unit, plen - 1, pbufn);
 
+       if (dp.pbc_wd == 0)
+               /* Legacy operation, use computed pbc_wd */
+               dp.pbc_wd = plen;
+
        /* we have to flush after the PBC for correctness on some cpus
         * or WC buffer can be written out of order */
-       writeq(plen, piobuf);
+       writeq(dp.pbc_wd, piobuf);
        ipath_flush_wc();
        /* copy all by the trigger word, then flush, so it's written
         * to chip before trigger word, then write trigger word, then
index 834e86f..9361f5a 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -104,6 +104,9 @@ static int __devinit ipath_init_one(struct pci_dev *,
 #define PCI_DEVICE_ID_INFINIPATH_HT 0xd
 #define PCI_DEVICE_ID_INFINIPATH_PE800 0x10
 
+/* Number of seconds before our card status check...  */
+#define STATUS_TIMEOUT 60
+
 static const struct pci_device_id ipath_pci_tbl[] = {
        { PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_INFINIPATH_HT) },
        { PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_INFINIPATH_PE800) },
@@ -119,6 +122,18 @@ static struct pci_driver ipath_driver = {
        .id_table = ipath_pci_tbl,
 };
 
+static void ipath_check_status(struct work_struct *work)
+{
+       struct ipath_devdata *dd = container_of(work, struct ipath_devdata,
+                                               status_work.work);
+
+       /*
+        * If we don't have any interrupts, let the user know and
+        * don't bother checking again.
+        */
+       if (dd->ipath_int_counter == 0)
+               dev_err(&dd->pcidev->dev, "No interrupts detected.\n");
+}
 
 static inline void read_bars(struct ipath_devdata *dd, struct pci_dev *dev,
                             u32 *bar0, u32 *bar1)
@@ -187,6 +202,8 @@ static struct ipath_devdata *ipath_alloc_devdata(struct pci_dev *pdev)
        dd->pcidev = pdev;
        pci_set_drvdata(pdev, dd);
 
+       INIT_DELAYED_WORK(&dd->status_work, ipath_check_status);
+
        list_add(&dd->ipath_list, &ipath_dev_list);
 
 bail_unlock:
@@ -504,6 +521,9 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
        ipath_diag_add(dd);
        ipath_register_ib_device(dd);
 
+       /* Check that card status in STATUS_TIMEOUT seconds. */
+       schedule_delayed_work(&dd->status_work, HZ * STATUS_TIMEOUT);
+
        goto bail;
 
 bail_irqsetup:
@@ -631,6 +651,9 @@ static void __devexit ipath_remove_one(struct pci_dev *pdev)
         */
        ipath_shutdown_device(dd);
 
+       cancel_delayed_work(&dd->status_work);
+       flush_scheduled_work();
+
        if (dd->verbs_dev)
                ipath_unregister_ib_device(dd->verbs_dev);
 
@@ -699,9 +722,9 @@ void ipath_disarm_piobufs(struct ipath_devdata *dd, unsigned first,
        u64 sendctrl, sendorig;
 
        ipath_cdbg(PKT, "disarm %u PIObufs first=%u\n", cnt, first);
-       sendorig = dd->ipath_sendctrl | INFINIPATH_S_DISARM;
+       sendorig = dd->ipath_sendctrl;
        for (i = first; i < last; i++) {
-               sendctrl = sendorig |
+               sendctrl = sendorig  | INFINIPATH_S_DISARM |
                        (i << INFINIPATH_S_DISARMPIOBUF_SHIFT);
                ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
                                 sendctrl);
@@ -712,12 +735,12 @@ void ipath_disarm_piobufs(struct ipath_devdata *dd, unsigned first,
         * while we were looping; no critical bits that would require
         * locking.
         *
-        * Write a 0, and then the original value, reading scratch in
+        * disable PIOAVAILUPD, then re-enable, reading scratch in
         * between.  This seems to avoid a chip timing race that causes
         * pioavail updates to memory to stop.
         */
        ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-                        0);
+                        sendorig & ~IPATH_S_PIOBUFAVAILUPD);
        sendorig = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
        ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
                         dd->ipath_sendctrl);
@@ -1014,14 +1037,10 @@ void ipath_kreceive(struct ipath_devdata *dd)
                goto bail;
        }
 
-       /* There is already a thread processing this queue. */
-       if (test_and_set_bit(0, &dd->ipath_rcv_pending))
-               goto bail;
-
        l = dd->ipath_port0head;
        hdrqtail = (u32) le64_to_cpu(*dd->ipath_hdrqtailptr);
        if (l == hdrqtail)
-               goto done;
+               goto bail;
 
 reloop:
        for (i = 0; l != hdrqtail; i++) {
@@ -1156,10 +1175,6 @@ reloop:
        ipath_stats.sps_avgpkts_call =
                ipath_stats.sps_port0pkts / ++totcalls;
 
-done:
-       clear_bit(0, &dd->ipath_rcv_pending);
-       smp_mb__after_clear_bit();
-
 bail:;
 }
 
@@ -1589,6 +1604,35 @@ int ipath_waitfor_mdio_cmdready(struct ipath_devdata *dd)
        return ret;
 }
 
+
+/*
+ * Flush all sends that might be in the ready to send state, as well as any
+ * that are in the process of being sent.   Used whenever we need to be
+ * sure the send side is idle.  Cleans up all buffer state by canceling
+ * all pio buffers, and issuing an abort, which cleans up anything in the
+ * launch fifo.  The cancel is superfluous on some chip versions, but
+ * it's safer to always do it.
+ * PIOAvail bits are updated by the chip as if normal send had happened.
+ */
+void ipath_cancel_sends(struct ipath_devdata *dd)
+{
+       ipath_dbg("Cancelling all in-progress send buffers\n");
+       dd->ipath_lastcancel = jiffies+HZ/2; /* skip armlaunch errs a bit */
+       /*
+        * the abort bit is auto-clearing.  We read scratch to be sure
+        * that cancels and the abort have taken effect in the chip.
+        */
+       ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
+               INFINIPATH_S_ABORT);
+       ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+       ipath_disarm_piobufs(dd, 0,
+               (unsigned)(dd->ipath_piobcnt2k + dd->ipath_piobcnt4k));
+
+       /* and again, be sure all have hit the chip */
+       ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+}
+
+
 static void ipath_set_ib_lstate(struct ipath_devdata *dd, int which)
 {
        static const char *what[4] = {
@@ -1610,14 +1654,8 @@ static void ipath_set_ib_lstate(struct ipath_devdata *dd, int which)
                           INFINIPATH_IBCS_LINKTRAININGSTATE_MASK]);
        /* flush all queued sends when going to DOWN or INIT, to be sure that
         * they don't block MAD packets */
-       if (!linkcmd || linkcmd == INFINIPATH_IBCC_LINKCMD_INIT) {
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-                                INFINIPATH_S_ABORT);
-               ipath_disarm_piobufs(dd, dd->ipath_lastport_piobuf,
-                                   (unsigned)(dd->ipath_piobcnt2k +
-                                   dd->ipath_piobcnt4k) -
-                                   dd->ipath_lastport_piobuf);
-       }
+       if (!linkcmd || linkcmd == INFINIPATH_IBCC_LINKCMD_INIT)
+               ipath_cancel_sends(dd);
 
        ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
                         dd->ipath_ibcctrl | which);
@@ -1839,6 +1877,87 @@ void ipath_write_kreg_port(const struct ipath_devdata *dd, ipath_kreg regno,
        ipath_write_kreg(dd, where, value);
 }
 
+/*
+ * Following deal with the "obviously simple" task of overriding the state
+ * of the LEDS, which normally indicate link physical and logical status.
+ * The complications arise in dealing with different hardware mappings
+ * and the board-dependent routine being called from interrupts.
+ * and then there's the requirement to _flash_ them.
+ */
+#define LED_OVER_FREQ_SHIFT 8
+#define LED_OVER_FREQ_MASK (0xFF<<LED_OVER_FREQ_SHIFT)
+/* Below is "non-zero" to force override, but both actual LEDs are off */
+#define LED_OVER_BOTH_OFF (8)
+
+void ipath_run_led_override(unsigned long opaque)
+{
+       struct ipath_devdata *dd = (struct ipath_devdata *)opaque;
+       int timeoff;
+       int pidx;
+       u64 lstate, ltstate, val;
+
+       if (!(dd->ipath_flags & IPATH_INITTED))
+               return;
+
+       pidx = dd->ipath_led_override_phase++ & 1;
+       dd->ipath_led_override = dd->ipath_led_override_vals[pidx];
+       timeoff = dd->ipath_led_override_timeoff;
+
+       /*
+        * below potentially restores the LED values per current status,
+        * should also possibly setup the traffic-blink register,
+        * but leave that to per-chip functions.
+        */
+       val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
+       ltstate = (val >> INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) &
+                 INFINIPATH_IBCS_LINKTRAININGSTATE_MASK;
+       lstate = (val >> INFINIPATH_IBCS_LINKSTATE_SHIFT) &
+                INFINIPATH_IBCS_LINKSTATE_MASK;
+
+       dd->ipath_f_setextled(dd, lstate, ltstate);
+       mod_timer(&dd->ipath_led_override_timer, jiffies + timeoff);
+}
+
+void ipath_set_led_override(struct ipath_devdata *dd, unsigned int val)
+{
+       int timeoff, freq;
+
+       if (!(dd->ipath_flags & IPATH_INITTED))
+               return;
+
+       /* First check if we are blinking. If not, use 1HZ polling */
+       timeoff = HZ;
+       freq = (val & LED_OVER_FREQ_MASK) >> LED_OVER_FREQ_SHIFT;
+
+       if (freq) {
+               /* For blink, set each phase from one nybble of val */
+               dd->ipath_led_override_vals[0] = val & 0xF;
+               dd->ipath_led_override_vals[1] = (val >> 4) & 0xF;
+               timeoff = (HZ << 4)/freq;
+       } else {
+               /* Non-blink set both phases the same. */
+               dd->ipath_led_override_vals[0] = val & 0xF;
+               dd->ipath_led_override_vals[1] = val & 0xF;
+       }
+       dd->ipath_led_override_timeoff = timeoff;
+
+       /*
+        * If the timer has not already been started, do so. Use a "quick"
+        * timeout so the function will be called soon, to look at our request.
+        */
+       if (atomic_inc_return(&dd->ipath_led_override_timer_active) == 1) {
+               /* Need to start timer */
+               init_timer(&dd->ipath_led_override_timer);
+               dd->ipath_led_override_timer.function =
+                                                ipath_run_led_override;
+               dd->ipath_led_override_timer.data = (unsigned long) dd;
+               dd->ipath_led_override_timer.expires = jiffies + 1;
+               add_timer(&dd->ipath_led_override_timer);
+       } else {
+               atomic_dec(&dd->ipath_led_override_timer_active);
+       }
+}
+
 /**
  * ipath_shutdown_device - shut down a device
  * @dd: the infinipath device
@@ -1879,17 +1998,9 @@ void ipath_shutdown_device(struct ipath_devdata *dd)
         */
        udelay(5);
 
-       /*
-        * abort any armed or launched PIO buffers that didn't go. (self
-        * clearing).  Will cause any packet currently being transmitted to
-        * go out with an EBP, and may also cause a short packet error on
-        * the receiver.
-        */
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-                        INFINIPATH_S_ABORT);
-
        ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_DISABLE <<
                            INFINIPATH_IBCC_LINKINITCMD_SHIFT);
+       ipath_cancel_sends(dd);
 
        /* disable IBC */
        dd->ipath_control &= ~INFINIPATH_C_LINKENABLE;
@@ -1902,7 +2013,6 @@ void ipath_shutdown_device(struct ipath_devdata *dd)
         * Turn the LEDs off explictly for the same reason.
         */
        dd->ipath_f_quiet_serdes(dd);
-       dd->ipath_f_setextled(dd, 0, 0);
 
        if (dd->ipath_stats_timer_active) {
                del_timer_sync(&dd->ipath_stats_timer);
@@ -1918,6 +2028,9 @@ void ipath_shutdown_device(struct ipath_devdata *dd)
                         ~0ULL & ~INFINIPATH_HWE_MEMBISTFAILED);
        ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, -1LL);
        ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, -1LL);
+
+       ipath_cdbg(VERBOSE, "Flush time and errors to EEPROM\n");
+       ipath_update_eeprom_log(dd);
 }
 
 /**
@@ -2078,6 +2191,16 @@ int ipath_reset_device(int unit)
                goto bail;
        }
 
+       if (atomic_read(&dd->ipath_led_override_timer_active)) {
+               /* Need to stop LED timer, _then_ shut off LEDs */
+               del_timer_sync(&dd->ipath_led_override_timer);
+               atomic_set(&dd->ipath_led_override_timer_active, 0);
+       }
+
+       /* Shut off LEDs after we are sure timer is not running */
+       dd->ipath_led_override = LED_OVER_BOTH_OFF;
+       dd->ipath_f_setextled(dd, 0, 0);
+
        dev_info(&dd->pcidev->dev, "Reset on unit %u requested\n", unit);
 
        if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT)) {
index 030185f..6b91479 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -95,39 +95,37 @@ static int i2c_gpio_set(struct ipath_devdata *dd,
                        enum i2c_type line,
                        enum i2c_state new_line_state)
 {
-       u64 read_val, write_val, mask, *gpioval;
+       u64 out_mask, dir_mask, *gpioval;
+       unsigned long flags = 0;
 
        gpioval = &dd->ipath_gpio_out;
-       read_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extctrl);
-       if (line == i2c_line_scl)
-               mask = dd->ipath_gpio_scl;
-       else
-               mask = dd->ipath_gpio_sda;
 
-       if (new_line_state == i2c_line_high)
+       if (line == i2c_line_scl) {
+               dir_mask = dd->ipath_gpio_scl;
+               out_mask = (1UL << dd->ipath_gpio_scl_num);
+       } else {
+               dir_mask = dd->ipath_gpio_sda;
+               out_mask = (1UL << dd->ipath_gpio_sda_num);
+       }
+
+       spin_lock_irqsave(&dd->ipath_gpio_lock, flags);
+       if (new_line_state == i2c_line_high) {
                /* tri-state the output rather than force high */
-               write_val = read_val & ~mask;
-       else
+               dd->ipath_extctrl &= ~dir_mask;
+       } else {
                /* config line to be an output */
-               write_val = read_val | mask;
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, write_val);
+               dd->ipath_extctrl |= dir_mask;
+       }
+       ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, dd->ipath_extctrl);
 
-       /* set high and verify */
+       /* set output as well (no real verify) */
        if (new_line_state == i2c_line_high)
-               write_val = 0x1UL;
+               *gpioval |= out_mask;
        else
-               write_val = 0x0UL;
+               *gpioval &= ~out_mask;
 
-       if (line == i2c_line_scl) {
-               write_val <<= dd->ipath_gpio_scl_num;
-               *gpioval = *gpioval & ~(1UL << dd->ipath_gpio_scl_num);
-               *gpioval |= write_val;
-       } else {
-               write_val <<= dd->ipath_gpio_sda_num;
-               *gpioval = *gpioval & ~(1UL << dd->ipath_gpio_sda_num);
-               *gpioval |= write_val;
-       }
        ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_out, *gpioval);
+       spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags);
 
        return 0;
 }
@@ -145,8 +143,9 @@ static int i2c_gpio_get(struct ipath_devdata *dd,
                        enum i2c_type line,
                        enum i2c_state *curr_statep)
 {
-       u64 read_val, write_val, mask;
+       u64 read_val, mask;
        int ret;
+       unsigned long flags = 0;
 
        /* check args */
        if (curr_statep == NULL) {
@@ -154,15 +153,21 @@ static int i2c_gpio_get(struct ipath_devdata *dd,
                goto bail;
        }
 
-       read_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extctrl);
        /* config line to be an input */
        if (line == i2c_line_scl)
                mask = dd->ipath_gpio_scl;
        else
                mask = dd->ipath_gpio_sda;
-       write_val = read_val & ~mask;
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, write_val);
+
+       spin_lock_irqsave(&dd->ipath_gpio_lock, flags);
+       dd->ipath_extctrl &= ~mask;
+       ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, dd->ipath_extctrl);
+       /*
+        * Below is very unlikely to reflect true input state if Output
+        * Enable actually changed.
+        */
        read_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extstatus);
+       spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags);
 
        if (read_val & mask)
                *curr_statep = i2c_line_high;
@@ -192,6 +197,7 @@ static void i2c_wait_for_writes(struct ipath_devdata *dd)
 
 static void scl_out(struct ipath_devdata *dd, u8 bit)
 {
+       udelay(1);
        i2c_gpio_set(dd, i2c_line_scl, bit ? i2c_line_high : i2c_line_low);
 
        i2c_wait_for_writes(dd);
@@ -314,12 +320,18 @@ static int eeprom_reset(struct ipath_devdata *dd)
        int clock_cycles_left = 9;
        u64 *gpioval = &dd->ipath_gpio_out;
        int ret;
+       unsigned long flags;
 
-       eeprom_init = 1;
+       spin_lock_irqsave(&dd->ipath_gpio_lock, flags);
+       /* Make sure shadows are consistent */
+       dd->ipath_extctrl = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extctrl);
        *gpioval = ipath_read_kreg64(dd, dd->ipath_kregs->kr_gpio_out);
+       spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags);
+
        ipath_cdbg(VERBOSE, "Resetting i2c eeprom; initial gpioout reg "
                   "is %llx\n", (unsigned long long) *gpioval);
 
+       eeprom_init = 1;
        /*
         * This is to get the i2c into a known state, by first going low,
         * then tristate sda (and then tristate scl as first thing
@@ -355,8 +367,8 @@ bail:
  * @len: number of bytes to receive
  */
 
-int ipath_eeprom_read(struct ipath_devdata *dd, u8 eeprom_offset,
-                     void *buffer, int len)
+static int ipath_eeprom_internal_read(struct ipath_devdata *dd,
+                                       u8 eeprom_offset, void *buffer, int len)
 {
        /* compiler complains unless initialized */
        u8 single_byte = 0;
@@ -406,6 +418,7 @@ bail:
        return ret;
 }
 
+
 /**
  * ipath_eeprom_write - writes data to the eeprom via I2C
  * @dd: the infinipath device
@@ -413,8 +426,8 @@ bail:
  * @buffer: data to write
  * @len: number of bytes to write
  */
-int ipath_eeprom_write(struct ipath_devdata *dd, u8 eeprom_offset,
-                      const void *buffer, int len)
+int ipath_eeprom_internal_write(struct ipath_devdata *dd, u8 eeprom_offset,
+                               const void *buffer, int len)
 {
        u8 single_byte;
        int sub_len;
@@ -488,6 +501,38 @@ bail:
        return ret;
 }
 
+/*
+ * The public entry-points ipath_eeprom_read() and ipath_eeprom_write()
+ * are now just wrappers around the internal functions.
+ */
+int ipath_eeprom_read(struct ipath_devdata *dd, u8 eeprom_offset,
+                       void *buff, int len)
+{
+       int ret;
+
+       ret = down_interruptible(&dd->ipath_eep_sem);
+       if (!ret) {
+               ret = ipath_eeprom_internal_read(dd, eeprom_offset, buff, len);
+               up(&dd->ipath_eep_sem);
+       }
+
+       return ret;
+}
+
+int ipath_eeprom_write(struct ipath_devdata *dd, u8 eeprom_offset,
+                       const void *buff, int len)
+{
+       int ret;
+
+       ret = down_interruptible(&dd->ipath_eep_sem);
+       if (!ret) {
+               ret = ipath_eeprom_internal_write(dd, eeprom_offset, buff, len);
+               up(&dd->ipath_eep_sem);
+       }
+
+       return ret;
+}
+
 static u8 flash_csum(struct ipath_flash *ifp, int adjust)
 {
        u8 *ip = (u8 *) ifp;
@@ -515,7 +560,7 @@ void ipath_get_eeprom_info(struct ipath_devdata *dd)
        void *buf;
        struct ipath_flash *ifp;
        __be64 guid;
-       int len;
+       int len, eep_stat;
        u8 csum, *bguid;
        int t = dd->ipath_unit;
        struct ipath_devdata *dd0 = ipath_lookup(0);
@@ -559,7 +604,11 @@ void ipath_get_eeprom_info(struct ipath_devdata *dd)
                goto bail;
        }
 
-       if (ipath_eeprom_read(dd, 0, buf, len)) {
+       down(&dd->ipath_eep_sem);
+       eep_stat = ipath_eeprom_internal_read(dd, 0, buf, len);
+       up(&dd->ipath_eep_sem);
+
+       if (eep_stat) {
                ipath_dev_err(dd, "Failed reading GUID from eeprom\n");
                goto done;
        }
@@ -634,8 +683,192 @@ void ipath_get_eeprom_info(struct ipath_devdata *dd)
        ipath_cdbg(VERBOSE, "Initted GUID to %llx from eeprom\n",
                   (unsigned long long) be64_to_cpu(dd->ipath_guid));
 
+       memcpy(&dd->ipath_eep_st_errs, &ifp->if_errcntp, IPATH_EEP_LOG_CNT);
+       /*
+        * Power-on (actually "active") hours are kept as little-endian value
+        * in EEPROM, but as seconds in a (possibly as small as 24-bit)
+        * atomic_t while running.
+        */
+       atomic_set(&dd->ipath_active_time, 0);
+       dd->ipath_eep_hrs = ifp->if_powerhour[0] | (ifp->if_powerhour[1] << 8);
+
 done:
        vfree(buf);
 
 bail:;
 }
+
+/**
+ * ipath_update_eeprom_log - copy active-time and error counters to eeprom
+ * @dd: the infinipath device
+ *
+ * Although the time is kept as seconds in the ipath_devdata struct, it is
+ * rounded to hours for re-write, as we have only 16 bits in EEPROM.
+ * First-cut code reads whole (expected) struct ipath_flash, modifies,
+ * re-writes. Future direction: read/write only what we need, assuming
+ * that the EEPROM had to have been "good enough" for driver init, and
+ * if not, we aren't making it worse.
+ *
+ */
+
+int ipath_update_eeprom_log(struct ipath_devdata *dd)
+{
+       void *buf;
+       struct ipath_flash *ifp;
+       int len, hi_water;
+       uint32_t new_time, new_hrs;
+       u8 csum;
+       int ret, idx;
+       unsigned long flags;
+
+       /* first, check if we actually need to do anything. */
+       ret = 0;
+       for (idx = 0; idx < IPATH_EEP_LOG_CNT; ++idx) {
+               if (dd->ipath_eep_st_new_errs[idx]) {
+                       ret = 1;
+                       break;
+               }
+       }
+       new_time = atomic_read(&dd->ipath_active_time);
+
+       if (ret == 0 && new_time < 3600)
+               return 0;
+
+       /*
+        * The quick-check above determined that there is something worthy
+        * of logging, so get current contents and do a more detailed idea.
+        */
+       len = offsetof(struct ipath_flash, if_future);
+       buf = vmalloc(len);
+       ret = 1;
+       if (!buf) {
+               ipath_dev_err(dd, "Couldn't allocate memory to read %u "
+                               "bytes from eeprom for logging\n", len);
+               goto bail;
+       }
+
+       /* Grab semaphore and read current EEPROM. If we get an
+        * error, let go, but if not, keep it until we finish write.
+        */
+       ret = down_interruptible(&dd->ipath_eep_sem);
+       if (ret) {
+               ipath_dev_err(dd, "Unable to acquire EEPROM for logging\n");
+               goto free_bail;
+       }
+       ret = ipath_eeprom_internal_read(dd, 0, buf, len);
+       if (ret) {
+               up(&dd->ipath_eep_sem);
+               ipath_dev_err(dd, "Unable read EEPROM for logging\n");
+               goto free_bail;
+       }
+       ifp = (struct ipath_flash *)buf;
+
+       csum = flash_csum(ifp, 0);
+       if (csum != ifp->if_csum) {
+               up(&dd->ipath_eep_sem);
+               ipath_dev_err(dd, "EEPROM cks err (0x%02X, S/B 0x%02X)\n",
+                               csum, ifp->if_csum);
+               ret = 1;
+               goto free_bail;
+       }
+       hi_water = 0;
+       spin_lock_irqsave(&dd->ipath_eep_st_lock, flags);
+       for (idx = 0; idx < IPATH_EEP_LOG_CNT; ++idx) {
+               int new_val = dd->ipath_eep_st_new_errs[idx];
+               if (new_val) {
+                       /*
+                        * If we have seen any errors, add to EEPROM values
+                        * We need to saturate at 0xFF (255) and we also
+                        * would need to adjust the checksum if we were
+                        * trying to minimize EEPROM traffic
+                        * Note that we add to actual current count in EEPROM,
+                        * in case it was altered while we were running.
+                        */
+                       new_val += ifp->if_errcntp[idx];
+                       if (new_val > 0xFF)
+                               new_val = 0xFF;
+                       if (ifp->if_errcntp[idx] != new_val) {
+                               ifp->if_errcntp[idx] = new_val;
+                               hi_water = offsetof(struct ipath_flash,
+                                               if_errcntp) + idx;
+                       }
+                       /*
+                        * update our shadow (used to minimize EEPROM
+                        * traffic), to match what we are about to write.
+                        */
+                       dd->ipath_eep_st_errs[idx] = new_val;
+                       dd->ipath_eep_st_new_errs[idx] = 0;
+               }
+       }
+       /*
+        * now update active-time. We would like to round to the nearest hour
+        * but unless atomic_t are sure to be proper signed ints we cannot,
+        * because we need to account for what we "transfer" to EEPROM and
+        * if we log an hour at 31 minutes, then we would need to set
+        * active_time to -29 to accurately count the _next_ hour.
+        */
+       if (new_time > 3600) {
+               new_hrs = new_time / 3600;
+               atomic_sub((new_hrs * 3600), &dd->ipath_active_time);
+               new_hrs += dd->ipath_eep_hrs;
+               if (new_hrs > 0xFFFF)
+                       new_hrs = 0xFFFF;
+               dd->ipath_eep_hrs = new_hrs;
+               if ((new_hrs & 0xFF) != ifp->if_powerhour[0]) {
+                       ifp->if_powerhour[0] = new_hrs & 0xFF;
+                       hi_water = offsetof(struct ipath_flash, if_powerhour);
+               }
+               if ((new_hrs >> 8) != ifp->if_powerhour[1]) {
+                       ifp->if_powerhour[1] = new_hrs >> 8;
+                       hi_water = offsetof(struct ipath_flash, if_powerhour)
+                                       + 1;
+               }
+       }
+       /*
+        * There is a tiny possibility that we could somehow fail to write
+        * the EEPROM after updating our shadows, but problems from holding
+        * the spinlock too long are a much bigger issue.
+        */
+       spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
+       if (hi_water) {
+               /* we made some change to the data, uopdate cksum and write */
+               csum = flash_csum(ifp, 1);
+               ret = ipath_eeprom_internal_write(dd, 0, buf, hi_water + 1);
+       }
+       up(&dd->ipath_eep_sem);
+       if (ret)
+               ipath_dev_err(dd, "Failed updating EEPROM\n");
+
+free_bail:
+       vfree(buf);
+bail:
+       return ret;
+
+}
+
+/**
+ * ipath_inc_eeprom_err - increment one of the four error counters
+ * that are logged to EEPROM.
+ * @dd: the infinipath device
+ * @eidx: 0..3, the counter to increment
+ * @incr: how much to add
+ *
+ * Each counter is 8-bits, and saturates at 255 (0xFF). They
+ * are copied to the EEPROM (aka flash) whenever ipath_update_eeprom_log()
+ * is called, but it can only be called in a context that allows sleep.
+ * This function can be called even at interrupt level.
+ */
+
+void ipath_inc_eeprom_err(struct ipath_devdata *dd, u32 eidx, u32 incr)
+{
+       uint new_val;
+       unsigned long flags;
+
+       spin_lock_irqsave(&dd->ipath_eep_st_lock, flags);
+       new_val = dd->ipath_eep_st_new_errs[eidx] + incr;
+       if (new_val > 255)
+               new_val = 255;
+       dd->ipath_eep_st_new_errs[eidx] = new_val;
+       spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
+       return;
+}
index 1272aaf..33ab0d6 100644 (file)
@@ -396,7 +396,8 @@ static int ipath_tid_update(struct ipath_portdata *pd, struct file *fp,
                           "TID %u, vaddr %lx, physaddr %llx pgp %p\n",
                           tid, vaddr, (unsigned long long) physaddr,
                           pagep[i]);
-               dd->ipath_f_put_tid(dd, &tidbase[tid], 1, physaddr);
+               dd->ipath_f_put_tid(dd, &tidbase[tid], RCVHQ_RCV_TYPE_EXPECTED,
+                                   physaddr);
                /*
                 * don't check this tid in ipath_portshadow, since we
                 * just filled it in; start with the next one.
@@ -422,7 +423,8 @@ static int ipath_tid_update(struct ipath_portdata *pd, struct file *fp,
                        if (dd->ipath_pageshadow[porttid + tid]) {
                                ipath_cdbg(VERBOSE, "Freeing TID %u\n",
                                           tid);
-                               dd->ipath_f_put_tid(dd, &tidbase[tid], 1,
+                               dd->ipath_f_put_tid(dd, &tidbase[tid],
+                                                   RCVHQ_RCV_TYPE_EXPECTED,
                                                    dd->ipath_tidinvalid);
                                pci_unmap_page(dd->pcidev,
                                        dd->ipath_physshadow[porttid + tid],
@@ -538,7 +540,8 @@ static int ipath_tid_free(struct ipath_portdata *pd, unsigned subport,
                if (dd->ipath_pageshadow[porttid + tid]) {
                        ipath_cdbg(VERBOSE, "PID %u freeing TID %u\n",
                                   pd->port_pid, tid);
-                       dd->ipath_f_put_tid(dd, &tidbase[tid], 1,
+                       dd->ipath_f_put_tid(dd, &tidbase[tid],
+                                           RCVHQ_RCV_TYPE_EXPECTED,
                                            dd->ipath_tidinvalid);
                        pci_unmap_page(dd->pcidev,
                                dd->ipath_physshadow[porttid + tid],
@@ -921,7 +924,8 @@ static int ipath_create_user_egr(struct ipath_portdata *pd)
                                            (u64 __iomem *)
                                            ((char __iomem *)
                                             dd->ipath_kregbase +
-                                            dd->ipath_rcvegrbase), 0, pa);
+                                            dd->ipath_rcvegrbase),
+                                           RCVHQ_RCV_TYPE_EAGER, pa);
                        pa += egrsize;
                }
                cond_resched(); /* don't hog the cpu */
@@ -1337,68 +1341,133 @@ bail:
        return ret;
 }
 
-static unsigned int ipath_poll(struct file *fp,
-                              struct poll_table_struct *pt)
+static unsigned int ipath_poll_urgent(struct ipath_portdata *pd,
+                                     struct file *fp,
+                                     struct poll_table_struct *pt)
 {
-       struct ipath_portdata *pd;
-       u32 head, tail;
-       int bit;
        unsigned pollflag = 0;
        struct ipath_devdata *dd;
 
-       pd = port_fp(fp);
-       if (!pd)
-               goto bail;
        dd = pd->port_dd;
 
-       bit = pd->port_port + INFINIPATH_R_INTRAVAIL_SHIFT;
-       set_bit(bit, &dd->ipath_rcvctrl);
+       if (test_bit(IPATH_PORT_WAITING_OVERFLOW, &pd->int_flag)) {
+               pollflag |= POLLERR;
+               clear_bit(IPATH_PORT_WAITING_OVERFLOW, &pd->int_flag);
+       }
 
-       /*
-        * Before blocking, make sure that head is still == tail,
-        * reading from the chip, so we can be sure the interrupt
-        * enable has made it to the chip.  If not equal, disable
-        * interrupt again and return immediately.  This avoids races,
-        * and the overhead of the chip read doesn't matter much at
-        * this point, since we are waiting for something anyway.
-        */
+       if (test_bit(IPATH_PORT_WAITING_URG, &pd->int_flag)) {
+               pollflag |= POLLIN | POLLRDNORM;
+               clear_bit(IPATH_PORT_WAITING_URG, &pd->int_flag);
+       }
 
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
-                        dd->ipath_rcvctrl);
+       if (!pollflag) {
+               set_bit(IPATH_PORT_WAITING_URG, &pd->port_flag);
+               if (pd->poll_type & IPATH_POLL_TYPE_OVERFLOW)
+                       set_bit(IPATH_PORT_WAITING_OVERFLOW,
+                               &pd->port_flag);
+
+               poll_wait(fp, &pd->port_wait, pt);
+       }
+
+       return pollflag;
+}
+
+static unsigned int ipath_poll_next(struct ipath_portdata *pd,
+                                   struct file *fp,
+                                   struct poll_table_struct *pt)
+{
+       u32 head, tail;
+       unsigned pollflag = 0;
+       struct ipath_devdata *dd;
+
+       dd = pd->port_dd;
 
        head = ipath_read_ureg32(dd, ur_rcvhdrhead, pd->port_port);
-       tail = ipath_read_ureg32(dd, ur_rcvhdrtail, pd->port_port);
+       tail = *(volatile u64 *)pd->port_rcvhdrtail_kvaddr;
 
-       if (tail == head) {
+       if (test_bit(IPATH_PORT_WAITING_OVERFLOW, &pd->int_flag)) {
+               pollflag |= POLLERR;
+               clear_bit(IPATH_PORT_WAITING_OVERFLOW, &pd->int_flag);
+       }
+
+       if (tail != head ||
+           test_bit(IPATH_PORT_WAITING_RCV, &pd->int_flag)) {
+               pollflag |= POLLIN | POLLRDNORM;
+               clear_bit(IPATH_PORT_WAITING_RCV, &pd->int_flag);
+       }
+
+       if (!pollflag) {
                set_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag);
+               if (pd->poll_type & IPATH_POLL_TYPE_OVERFLOW)
+                       set_bit(IPATH_PORT_WAITING_OVERFLOW,
+                               &pd->port_flag);
+
+               set_bit(pd->port_port + INFINIPATH_R_INTRAVAIL_SHIFT,
+                       &dd->ipath_rcvctrl);
+
+               ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
+                                dd->ipath_rcvctrl);
+
                if (dd->ipath_rhdrhead_intr_off) /* arm rcv interrupt */
-                       (void)ipath_write_ureg(dd, ur_rcvhdrhead,
-                                              dd->ipath_rhdrhead_intr_off
-                                              | head, pd->port_port);
-               poll_wait(fp, &pd->port_wait, pt);
+                       ipath_write_ureg(dd, ur_rcvhdrhead,
+                                        dd->ipath_rhdrhead_intr_off | head,
+                                        pd->port_port);
 
-               if (test_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag)) {
-                       /* timed out, no packets received */
-                       clear_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag);
-                       pd->port_rcvwait_to++;
-               }
-               else
-                       pollflag = POLLIN | POLLRDNORM;
-       }
-       else {
-               /* it's already happened; don't do wait_event overhead */
-               pollflag = POLLIN | POLLRDNORM;
-               pd->port_rcvnowait++;
+               poll_wait(fp, &pd->port_wait, pt);
        }
 
-       clear_bit(bit, &dd->ipath_rcvctrl);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
-                        dd->ipath_rcvctrl);
+       return pollflag;
+}
+
+static unsigned int ipath_poll(struct file *fp,
+                              struct poll_table_struct *pt)
+{
+       struct ipath_portdata *pd;
+       unsigned pollflag;
+
+       pd = port_fp(fp);
+       if (!pd)
+               pollflag = 0;
+       else if (pd->poll_type & IPATH_POLL_TYPE_URGENT)
+               pollflag = ipath_poll_urgent(pd, fp, pt);
+       else
+               pollflag = ipath_poll_next(pd, fp, pt);
 
-bail:
        return pollflag;
 }
 
+static int ipath_supports_subports(int user_swmajor, int user_swminor)
+{
+       /* no subport implementation prior to software version 1.3 */
+       return (user_swmajor > 1) || (user_swminor >= 3);
+}
+
+static int ipath_compatible_subports(int user_swmajor, int user_swminor)
+{
+       /* this code is written long-hand for clarity */
+       if (IPATH_USER_SWMAJOR != user_swmajor) {
+               /* no promise of compatibility if major mismatch */
+               return 0;
+       }
+       if (IPATH_USER_SWMAJOR == 1) {
+               switch (IPATH_USER_SWMINOR) {
+               case 0:
+               case 1:
+               case 2:
+                       /* no subport implementation so cannot be compatible */
+                       return 0;
+               case 3:
+                       /* 3 is only compatible with itself */
+                       return user_swminor == 3;
+               default:
+                       /* >= 4 are compatible (or are expected to be) */
+                       return user_swminor >= 4;
+               }
+       }
+       /* make no promises yet for future major versions */
+       return 0;
+}
+
 static int init_subports(struct ipath_devdata *dd,
                         struct ipath_portdata *pd,
                         const struct ipath_user_info *uinfo)
@@ -1408,20 +1477,32 @@ static int init_subports(struct ipath_devdata *dd,
        size_t size;
 
        /*
-        * If the user is requesting zero or one port,
+        * If the user is requesting zero subports,
         * skip the subport allocation.
         */
-       if (uinfo->spu_subport_cnt <= 1)
+       if (uinfo->spu_subport_cnt <= 0)
+               goto bail;
+
+       /* Self-consistency check for ipath_compatible_subports() */
+       if (ipath_supports_subports(IPATH_USER_SWMAJOR, IPATH_USER_SWMINOR) &&
+           !ipath_compatible_subports(IPATH_USER_SWMAJOR,
+                                      IPATH_USER_SWMINOR)) {
+               dev_info(&dd->pcidev->dev,
+                        "Inconsistent ipath_compatible_subports()\n");
                goto bail;
+       }
 
-       /* Old user binaries don't know about new subport implementation */
-       if ((uinfo->spu_userversion & 0xffff) != IPATH_USER_SWMINOR) {
+       /* Check for subport compatibility */
+       if (!ipath_compatible_subports(uinfo->spu_userversion >> 16,
+                                      uinfo->spu_userversion & 0xffff)) {
                dev_info(&dd->pcidev->dev,
-                        "Mismatched user minor version (%d) and driver "
-                         "minor version (%d) while port sharing. Ensure "
+                        "Mismatched user version (%d.%d) and driver "
+                        "version (%d.%d) while port sharing. Ensure "
                          "that driver and library are from the same "
                          "release.\n",
+                        (int) (uinfo->spu_userversion >> 16),
                          (int) (uinfo->spu_userversion & 0xffff),
+                        IPATH_USER_SWMAJOR,
                         IPATH_USER_SWMINOR);
                goto bail;
        }
@@ -1725,14 +1806,13 @@ static int ipath_open(struct inode *in, struct file *fp)
        return fp->private_data ? 0 : -ENOMEM;
 }
 
-
 /* Get port early, so can set affinity prior to memory allocation */
 static int ipath_assign_port(struct file *fp,
                              const struct ipath_user_info *uinfo)
 {
        int ret;
        int i_minor;
-       unsigned swminor;
+       unsigned swmajor, swminor;
 
        /* Check to be sure we haven't already initialized this file */
        if (port_fp(fp)) {
@@ -1741,7 +1821,8 @@ static int ipath_assign_port(struct file *fp,
        }
 
        /* for now, if major version is different, bail */
-       if ((uinfo->spu_userversion >> 16) != IPATH_USER_SWMAJOR) {
+       swmajor = uinfo->spu_userversion >> 16;
+       if (swmajor != IPATH_USER_SWMAJOR) {
                ipath_dbg("User major version %d not same as driver "
                          "major %d\n", uinfo->spu_userversion >> 16,
                          IPATH_USER_SWMAJOR);
@@ -1756,7 +1837,8 @@ static int ipath_assign_port(struct file *fp,
 
        mutex_lock(&ipath_mutex);
 
-       if (swminor == IPATH_USER_SWMINOR && uinfo->spu_subport_cnt &&
+       if (ipath_compatible_subports(swmajor, swminor) &&
+           uinfo->spu_subport_cnt &&
            (ret = find_shared_port(fp, uinfo))) {
                mutex_unlock(&ipath_mutex);
                if (ret > 0)
@@ -2020,7 +2102,8 @@ static int ipath_port_info(struct ipath_portdata *pd, u16 subport,
        info.port = pd->port_port;
        info.subport = subport;
        /* Don't return new fields if old library opened the port. */
-       if ((pd->userversion & 0xffff) == IPATH_USER_SWMINOR) {
+       if (ipath_supports_subports(pd->userversion >> 16,
+                                   pd->userversion & 0xffff)) {
                /* Number of user ports available for this device. */
                info.num_ports = pd->port_dd->ipath_cfgports - 1;
                info.num_subports = pd->port_subport_cnt;
@@ -2123,6 +2206,11 @@ static ssize_t ipath_write(struct file *fp, const char __user *data,
                src = NULL;
                dest = NULL;
                break;
+       case IPATH_CMD_POLL_TYPE:
+               copy = sizeof(cmd.cmd.poll_type);
+               dest = &cmd.cmd.poll_type;
+               src = &ucmd->cmd.poll_type;
+               break;
        default:
                ret = -EINVAL;
                goto bail;
@@ -2195,6 +2283,9 @@ static ssize_t ipath_write(struct file *fp, const char __user *data,
        case IPATH_CMD_PIOAVAILUPD:
                ret = ipath_force_pio_avail_update(pd->port_dd);
                break;
+       case IPATH_CMD_POLL_TYPE:
+               pd->poll_type = cmd.cmd.poll_type;
+               break;
        }
 
        if (ret >= 0)
index ebd5c7b..2e689b9 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
  * Copyright (c) 2006 PathScale, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -257,9 +257,14 @@ static ssize_t atomic_port_info_read(struct file *file, char __user *buf,
                /* Notimpl InitType (actually, an SMA decision) */
                /* VLHighLimit is 0 (only one VL) */
                ; /* VLArbitrationHighCap is 0 (only one VL) */
+       /*
+        * Note: the chips support a maximum MTU of 4096, but the driver
+        * hasn't implemented this feature yet, so set the maximum
+        * to 2048.
+        */
        portinfo[10] =  /* VLArbitrationLowCap is 0 (only one VL) */
                /* InitTypeReply is SMA decision */
-               (5 << 16)       /* MTUCap 4096 */
+               (4 << 16)       /* MTUCap 2048 */
                | (7 << 13)     /* VLStallCount */
                | (0x1f << 8)   /* HOQLife */
                | (1 << 4)
index 4171198..650745d 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -36,6 +36,7 @@
  * HT chip.
  */
 
+#include <linux/vmalloc.h>
 #include <linux/pci.h>
 #include <linux/delay.h>
 #include <linux/htirq.h>
@@ -439,6 +440,7 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
        u32 bits, ctrl;
        int isfatal = 0;
        char bitsmsg[64];
+       int log_idx;
 
        hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus);
 
@@ -467,6 +469,11 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
 
        hwerrs &= dd->ipath_hwerrmask;
 
+       /* We log some errors to EEPROM, check if we have any of those. */
+       for (log_idx = 0; log_idx < IPATH_EEP_LOG_CNT; ++log_idx)
+               if (hwerrs & dd->ipath_eep_st_masks[log_idx].hwerrs_to_log)
+                       ipath_inc_eeprom_err(dd, log_idx, 1);
+
        /*
         * make sure we get this much out, unless told to be quiet,
         * it's a parity error we may recover from,
@@ -502,9 +509,7 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
                if (!hwerrs) {
                        ipath_dbg("Clearing freezemode on ignored or "
                                  "recovered hardware error\n");
-                       ctrl &= ~INFINIPATH_C_FREEZEMODE;
-                       ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
-                                        ctrl);
+                       ipath_clear_freeze(dd);
                }
        }
 
@@ -672,10 +677,16 @@ static int ipath_ht_boardname(struct ipath_devdata *dd, char *name,
        if (n)
                snprintf(name, namelen, "%s", n);
 
+       if (dd->ipath_boardrev != 6 && dd->ipath_boardrev != 7 &&
+           dd->ipath_boardrev != 11) {
+               ipath_dev_err(dd, "Unsupported InfiniPath board %s!\n", name);
+               ret = 1;
+               goto bail;
+       }
        if (dd->ipath_majrev != 3 || (dd->ipath_minrev < 2 ||
-               dd->ipath_minrev > 3)) {
+               dd->ipath_minrev > 4)) {
                /*
-                * This version of the driver only supports Rev 3.2 and 3.3
+                * This version of the driver only supports Rev 3.2 - 3.4
                 */
                ipath_dev_err(dd,
                              "Unsupported InfiniPath hardware revision %u.%u!\n",
@@ -689,36 +700,11 @@ static int ipath_ht_boardname(struct ipath_devdata *dd, char *name,
         * copies
         */
        dd->ipath_flags |= IPATH_32BITCOUNTERS;
+       dd->ipath_flags |= IPATH_GPIO_INTR;
        if (dd->ipath_htspeed != 800)
                ipath_dev_err(dd,
                              "Incorrectly configured for HT @ %uMHz\n",
                              dd->ipath_htspeed);
-       if (dd->ipath_boardrev == 7 || dd->ipath_boardrev == 11 ||
-           dd->ipath_boardrev == 6)
-               dd->ipath_flags |= IPATH_GPIO_INTR;
-       else
-               dd->ipath_flags |= IPATH_POLL_RX_INTR;
-       if (dd->ipath_boardrev == 8) {  /* LS/X-1 */
-               u64 val;
-               val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extstatus);
-               if (val & INFINIPATH_EXTS_SERDESSEL) {
-                       /*
-                        * hardware disabled
-                        *
-                        * This means that the chip is hardware disabled,
-                        * and will not be able to bring up the link,
-                        * in any case.  We special case this and abort
-                        * early, to avoid later messages.  We also set
-                        * the DISABLED status bit
-                        */
-                       ipath_dbg("Unit %u is hardware-disabled\n",
-                                 dd->ipath_unit);
-                       *dd->ipath_statusp |= IPATH_STATUS_DISABLED;
-                       /* this value is handled differently */
-                       ret = 2;
-                       goto bail;
-               }
-       }
        ret = 0;
 
 bail:
@@ -1058,12 +1044,24 @@ static void ipath_setup_ht_setextled(struct ipath_devdata *dd,
                                     u64 lst, u64 ltst)
 {
        u64 extctl;
+       unsigned long flags = 0;
 
        /* the diags use the LED to indicate diag info, so we leave
         * the external LED alone when the diags are running */
        if (ipath_diag_inuse)
                return;
 
+       /* Allow override of LED display for, e.g. Locating system in rack */
+       if (dd->ipath_led_override) {
+               ltst = (dd->ipath_led_override & IPATH_LED_PHYS)
+                       ? INFINIPATH_IBCS_LT_STATE_LINKUP
+                       : INFINIPATH_IBCS_LT_STATE_DISABLED;
+               lst = (dd->ipath_led_override & IPATH_LED_LOG)
+                       ? INFINIPATH_IBCS_L_STATE_ACTIVE
+                       : INFINIPATH_IBCS_L_STATE_DOWN;
+       }
+
+       spin_lock_irqsave(&dd->ipath_gpio_lock, flags);
        /*
         * start by setting both LED control bits to off, then turn
         * on the appropriate bit(s).
@@ -1092,6 +1090,7 @@ static void ipath_setup_ht_setextled(struct ipath_devdata *dd,
        }
        dd->ipath_extctrl = extctl;
        ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, extctl);
+       spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags);
 }
 
 static void ipath_init_ht_variables(struct ipath_devdata *dd)
@@ -1157,6 +1156,22 @@ static void ipath_init_ht_variables(struct ipath_devdata *dd)
 
        dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK;
        dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK;
+
+       /*
+        * EEPROM error log 0 is TXE Parity errors. 1 is RXE Parity.
+        * 2 is Some Misc, 3 is reserved for future.
+        */
+       dd->ipath_eep_st_masks[0].hwerrs_to_log =
+               INFINIPATH_HWE_TXEMEMPARITYERR_MASK <<
+               INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT;
+
+       dd->ipath_eep_st_masks[1].hwerrs_to_log =
+               INFINIPATH_HWE_RXEMEMPARITYERR_MASK <<
+               INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT;
+
+       dd->ipath_eep_st_masks[2].errs_to_log =
+               INFINIPATH_E_INVALIDADDR | INFINIPATH_E_RESET;
+
 }
 
 /**
@@ -1372,7 +1387,7 @@ static void ipath_ht_quiet_serdes(struct ipath_devdata *dd)
  * ipath_pe_put_tid - write a TID in chip
  * @dd: the infinipath device
  * @tidptr: pointer to the expected TID (in chip) to udpate
- * @tidtype: 0 for eager, 1 for expected
+ * @tidtype: RCVHQ_RCV_TYPE_EAGER (1) for eager, RCVHQ_RCV_TYPE_EXPECTED (0) for expected
  * @pa: physical address of in memory buffer; ipath_tidinvalid if freeing
  *
  * This exists as a separate routine to allow for special locking etc.
@@ -1393,7 +1408,7 @@ static void ipath_ht_put_tid(struct ipath_devdata *dd,
                                 "40 bits, using only 40!!!\n", pa);
                        pa &= INFINIPATH_RT_ADDR_MASK;
                }
-               if (type == 0)
+               if (type == RCVHQ_RCV_TYPE_EAGER)
                        pa |= dd->ipath_tidtemplate;
                else {
                        /* in words (fixed, full page).  */
@@ -1433,7 +1448,8 @@ static void ipath_ht_clear_tids(struct ipath_devdata *dd, unsigned port)
                                   port * dd->ipath_rcvtidcnt *
                                   sizeof(*tidbase));
        for (i = 0; i < dd->ipath_rcvtidcnt; i++)
-               ipath_ht_put_tid(dd, &tidbase[i], 1, dd->ipath_tidinvalid);
+               ipath_ht_put_tid(dd, &tidbase[i], RCVHQ_RCV_TYPE_EXPECTED,
+                                dd->ipath_tidinvalid);
 
        tidbase = (u64 __iomem *) ((char __iomem *)(dd->ipath_kregbase) +
                                   dd->ipath_rcvegrbase +
@@ -1441,7 +1457,8 @@ static void ipath_ht_clear_tids(struct ipath_devdata *dd, unsigned port)
                                   sizeof(*tidbase));
 
        for (i = 0; i < dd->ipath_rcvegrcnt; i++)
-               ipath_ht_put_tid(dd, &tidbase[i], 0, dd->ipath_tidinvalid);
+               ipath_ht_put_tid(dd, &tidbase[i], RCVHQ_RCV_TYPE_EAGER,
+                                dd->ipath_tidinvalid);
 }
 
 /**
@@ -1528,11 +1545,6 @@ static int ipath_ht_early_init(struct ipath_devdata *dd)
                writel(16, piobuf);
                piobuf += pioincr;
        }
-       /*
-        * self-clearing
-        */
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-                        INFINIPATH_S_ABORT);
 
        ipath_get_eeprom_info(dd);
        if (dd->ipath_boardrev == 5 && dd->ipath_serial[0] == '1' &&
@@ -1543,8 +1555,10 @@ static int ipath_ht_early_init(struct ipath_devdata *dd)
                 * with 128, rather than 112.
                 */
                dd->ipath_flags |= IPATH_GPIO_INTR;
-               dd->ipath_flags &= ~IPATH_POLL_RX_INTR;
-       }
+       } else
+               ipath_dev_err(dd, "Unsupported InfiniPath serial "
+                             "number %.16s!\n", dd->ipath_serial);
+
        return 0;
 }
 
@@ -1561,7 +1575,6 @@ static int ipath_ht_txe_recover(struct ipath_devdata *dd)
        }
        dev_info(&dd->pcidev->dev,
                "Recovering from TXE PIO parity error\n");
-       ipath_disarm_senderrbufs(dd, 1);
        return 1;
 }
 
index 4e2e3df..9868ccd 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -296,13 +296,6 @@ static const struct ipath_cregs ipath_pe_cregs = {
 #define IPATH_GPIO_SCL (1ULL << \
        (_IPATH_GPIO_SCL_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT))
 
-/*
- * Rev2 silicon allows suppressing check for ArmLaunch errors.
- * this can speed up short packet sends on systems that do
- * not guaranteee write-order.
- */
-#define INFINIPATH_XGXS_SUPPRESS_ARMLAUNCH_ERR (1ULL<<63)
-
 /* 6120 specific hardware errors... */
 static const struct ipath_hwerror_msgs ipath_6120_hwerror_msgs[] = {
        INFINIPATH_HWE_MSG(PCIEPOISONEDTLP, "PCIe Poisoned TLP"),
@@ -347,6 +340,7 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
        u32 bits, ctrl;
        int isfatal = 0;
        char bitsmsg[64];
+       int log_idx;
 
        hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus);
        if (!hwerrs) {
@@ -374,6 +368,11 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
 
        hwerrs &= dd->ipath_hwerrmask;
 
+       /* We log some errors to EEPROM, check if we have any of those. */
+       for (log_idx = 0; log_idx < IPATH_EEP_LOG_CNT; ++log_idx)
+               if (hwerrs & dd->ipath_eep_st_masks[log_idx].hwerrs_to_log)
+                       ipath_inc_eeprom_err(dd, log_idx, 1);
+
        /*
         * make sure we get this much out, unless told to be quiet,
         * or it's occurred within the last 5 seconds
@@ -431,10 +430,12 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
                        *dd->ipath_statusp |= IPATH_STATUS_HWERROR;
                        dd->ipath_flags &= ~IPATH_INITTED;
                } else {
-                       ipath_dbg("Clearing freezemode on ignored hardware "
-                                 "error\n");
-                       ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
-                                        dd->ipath_control);
+                       static u32 freeze_cnt;
+
+                       freeze_cnt++;
+                       ipath_dbg("Clearing freezemode on ignored or recovered "
+                                 "hardware error (%u)\n", freeze_cnt);
+                       ipath_clear_freeze(dd);
                }
        }
 
@@ -680,17 +681,6 @@ static int ipath_pe_bringup_serdes(struct ipath_devdata *dd)
                val |= dd->ipath_rx_pol_inv <<
                        INFINIPATH_XGXS_RX_POL_SHIFT;
        }
-       if (dd->ipath_minrev >= 2) {
-               /* Rev 2. can tolerate multiple writes to PBC, and
-                * allowing them can provide lower latency on some
-                * CPUs, but this feature is off by default, only
-                * turned on by setting D63 of XGXSconfig reg.
-                * May want to make this conditional more
-                * fine-grained in future. This is not exactly
-                * related to XGXS, but where the bit ended up.
-                */
-               val |= INFINIPATH_XGXS_SUPPRESS_ARMLAUNCH_ERR;
-       }
        if (val != prev_val)
                ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
 
@@ -791,12 +781,24 @@ static void ipath_setup_pe_setextled(struct ipath_devdata *dd, u64 lst,
                                     u64 ltst)
 {
        u64 extctl;
+       unsigned long flags = 0;
 
        /* the diags use the LED to indicate diag info, so we leave
         * the external LED alone when the diags are running */
        if (ipath_diag_inuse)
                return;
 
+       /* Allow override of LED display for, e.g. Locating system in rack */
+       if (dd->ipath_led_override) {
+               ltst = (dd->ipath_led_override & IPATH_LED_PHYS)
+                       ? INFINIPATH_IBCS_LT_STATE_LINKUP
+                       : INFINIPATH_IBCS_LT_STATE_DISABLED;
+               lst = (dd->ipath_led_override & IPATH_LED_LOG)
+                       ? INFINIPATH_IBCS_L_STATE_ACTIVE
+                       : INFINIPATH_IBCS_L_STATE_DOWN;
+       }
+
+       spin_lock_irqsave(&dd->ipath_gpio_lock, flags);
        extctl = dd->ipath_extctrl & ~(INFINIPATH_EXTC_LED1PRIPORT_ON |
                                       INFINIPATH_EXTC_LED2PRIPORT_ON);
 
@@ -806,6 +808,7 @@ static void ipath_setup_pe_setextled(struct ipath_devdata *dd, u64 lst,
                extctl |= INFINIPATH_EXTC_LED1PRIPORT_ON;
        dd->ipath_extctrl = extctl;
        ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, extctl);
+       spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags);
 }
 
 /**
@@ -955,6 +958,27 @@ static void ipath_init_pe_variables(struct ipath_devdata *dd)
 
        dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK;
        dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK;
+
+       /*
+        * EEPROM error log 0 is TXE Parity errors. 1 is RXE Parity.
+        * 2 is Some Misc, 3 is reserved for future.
+        */
+       dd->ipath_eep_st_masks[0].hwerrs_to_log =
+               INFINIPATH_HWE_TXEMEMPARITYERR_MASK <<
+               INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT;
+
+       /* Ignore errors in PIO/PBC on systems with unordered write-combining */
+       if (ipath_unordered_wc())
+               dd->ipath_eep_st_masks[0].hwerrs_to_log &= ~TXE_PIO_PARITY;
+
+       dd->ipath_eep_st_masks[1].hwerrs_to_log =
+               INFINIPATH_HWE_RXEMEMPARITYERR_MASK <<
+               INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT;
+
+       dd->ipath_eep_st_masks[2].errs_to_log =
+               INFINIPATH_E_INVALIDADDR | INFINIPATH_E_RESET;
+
+
 }
 
 /* setup the MSI stuff again after a reset.  I'd like to just call
@@ -1082,7 +1106,7 @@ bail:
  * ipath_pe_put_tid - write a TID in chip
  * @dd: the infinipath device
  * @tidptr: pointer to the expected TID (in chip) to udpate
- * @tidtype: 0 for eager, 1 for expected
+ * @tidtype: RCVHQ_RCV_TYPE_EAGER (1) for eager, RCVHQ_RCV_TYPE_EXPECTED (0) for expected
  * @pa: physical address of in memory buffer; ipath_tidinvalid if freeing
  *
  * This exists as a separate routine to allow for special locking etc.
@@ -1108,7 +1132,7 @@ static void ipath_pe_put_tid(struct ipath_devdata *dd, u64 __iomem *tidptr,
                                      "BUG: Physical page address 0x%lx "
                                      "has bits set in 31-29\n", pa);
 
-               if (type == 0)
+               if (type == RCVHQ_RCV_TYPE_EAGER)
                        pa |= dd->ipath_tidtemplate;
                else /* for now, always full 4KB page */
                        pa |= 2 << 29;
@@ -1132,7 +1156,7 @@ static void ipath_pe_put_tid(struct ipath_devdata *dd, u64 __iomem *tidptr,
  * ipath_pe_put_tid_2 - write a TID in chip, Revision 2 or higher
  * @dd: the infinipath device
  * @tidptr: pointer to the expected TID (in chip) to udpate
- * @tidtype: 0 for eager, 1 for expected
+ * @tidtype: RCVHQ_RCV_TYPE_EAGER (1) for eager, RCVHQ_RCV_TYPE_EXPECTED (0) for expected
  * @pa: physical address of in memory buffer; ipath_tidinvalid if freeing
  *
  * This exists as a separate routine to allow for selection of the
@@ -1157,7 +1181,7 @@ static void ipath_pe_put_tid_2(struct ipath_devdata *dd, u64 __iomem *tidptr,
                                      "BUG: Physical page address 0x%lx "
                                      "has bits set in 31-29\n", pa);
 
-               if (type == 0)
+               if (type == RCVHQ_RCV_TYPE_EAGER)
                        pa |= dd->ipath_tidtemplate;
                else /* for now, always full 4KB page */
                        pa |= 2 << 29;
@@ -1196,7 +1220,8 @@ static void ipath_pe_clear_tids(struct ipath_devdata *dd, unsigned port)
                 port * dd->ipath_rcvtidcnt * sizeof(*tidbase));
 
        for (i = 0; i < dd->ipath_rcvtidcnt; i++)
-               ipath_pe_put_tid(dd, &tidbase[i], 0, tidinv);
+               ipath_pe_put_tid(dd, &tidbase[i], RCVHQ_RCV_TYPE_EXPECTED,
+                                tidinv);
 
        tidbase = (u64 __iomem *)
                ((char __iomem *)(dd->ipath_kregbase) +
@@ -1204,7 +1229,8 @@ static void ipath_pe_clear_tids(struct ipath_devdata *dd, unsigned port)
                 port * dd->ipath_rcvegrcnt * sizeof(*tidbase));
 
        for (i = 0; i < dd->ipath_rcvegrcnt; i++)
-               ipath_pe_put_tid(dd, &tidbase[i], 1, tidinv);
+               ipath_pe_put_tid(dd, &tidbase[i], RCVHQ_RCV_TYPE_EAGER,
+                                tidinv);
 }
 
 /**
@@ -1311,13 +1337,6 @@ static int ipath_pe_get_base_info(struct ipath_portdata *pd, void *kbase)
 
        dd = pd->port_dd;
 
-       if (dd != NULL && dd->ipath_minrev >= 2) {
-               ipath_cdbg(PROC, "IBA6120 Rev2, allow multiple PBC write\n");
-               kinfo->spi_runtime_flags |= IPATH_RUNTIME_PBC_REWRITE;
-               ipath_cdbg(PROC, "IBA6120 Rev2, allow loose DMA alignment\n");
-               kinfo->spi_runtime_flags |= IPATH_RUNTIME_LOOSE_DMA_ALIGN;
-       }
-
 done:
        kinfo->spi_runtime_flags |= IPATH_RUNTIME_PCIE;
        return 0;
@@ -1354,7 +1373,6 @@ static int ipath_pe_txe_recover(struct ipath_devdata *dd)
                dev_info(&dd->pcidev->dev,
                        "Recovering from TXE PIO parity error\n");
        }
-       ipath_disarm_senderrbufs(dd, 1);
        return 1;
 }
 
index 7045ba6..49951d5 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -133,7 +133,8 @@ static int create_port0_egr(struct ipath_devdata *dd)
                                   dd->ipath_ibmaxlen, PCI_DMA_FROMDEVICE);
                dd->ipath_f_put_tid(dd, e + (u64 __iomem *)
                                    ((char __iomem *) dd->ipath_kregbase +
-                                    dd->ipath_rcvegrbase), 0,
+                                    dd->ipath_rcvegrbase),
+                                   RCVHQ_RCV_TYPE_EAGER,
                                    dd->ipath_port0_skbinfo[e].phys);
        }
 
@@ -310,7 +311,12 @@ static int init_chip_first(struct ipath_devdata *dd,
        val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendpiosize);
        dd->ipath_piosize2k = val & ~0U;
        dd->ipath_piosize4k = val >> 32;
-       dd->ipath_ibmtu = 4096; /* default to largest legal MTU */
+       /*
+        * Note: the chips support a maximum MTU of 4096, but the driver
+        * hasn't implemented this feature yet, so set the initial value
+        * to 2048.
+        */
+       dd->ipath_ibmtu = 2048;
        val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendpiobufcnt);
        dd->ipath_piobcnt2k = val & ~0U;
        dd->ipath_piobcnt4k = val >> 32;
@@ -340,6 +346,10 @@ static int init_chip_first(struct ipath_devdata *dd,
 
        spin_lock_init(&dd->ipath_tid_lock);
 
+       spin_lock_init(&dd->ipath_gpio_lock);
+       spin_lock_init(&dd->ipath_eep_st_lock);
+       sema_init(&dd->ipath_eep_sem, 1);
+
 done:
        *pdp = pd;
        return ret;
@@ -646,7 +656,7 @@ static int init_housekeeping(struct ipath_devdata *dd,
        ret = dd->ipath_f_get_boardname(dd, boardn, sizeof boardn);
 
        snprintf(dd->ipath_boardversion, sizeof(dd->ipath_boardversion),
-                "Driver %u.%u, %s, InfiniPath%u %u.%u, PCI %u, "
+                "ChipABI %u.%u, %s, InfiniPath%u %u.%u, PCI %u, "
                 "SW Compat %u\n",
                 IPATH_CHIP_VERS_MAJ, IPATH_CHIP_VERS_MIN, boardn,
                 (unsigned)(dd->ipath_revision >> INFINIPATH_R_ARCH_SHIFT) &
@@ -727,7 +737,7 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
        uports = dd->ipath_cfgports ? dd->ipath_cfgports - 1 : 0;
        if (ipath_kpiobufs == 0) {
                /* not set by user (this is default) */
-               if (piobufs >= (uports * IPATH_MIN_USER_PORT_BUFCNT) + 32)
+               if (piobufs > 144)
                        kpiobufs = 32;
                else
                        kpiobufs = 16;
@@ -767,6 +777,12 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
                   piobufs, dd->ipath_pbufsport, uports);
 
        dd->ipath_f_early_init(dd);
+       /*
+        * cancel any possible active sends from early driver load.
+        * Follows early_init because some chips have to initialize
+        * PIO buffers in early_init to avoid false parity errors.
+        */
+       ipath_cancel_sends(dd);
 
        /* early_init sets rcvhdrentsize and rcvhdrsize, so this must be
         * done after early_init */
index a90d3b5..47aa434 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -93,7 +93,8 @@ void ipath_disarm_senderrbufs(struct ipath_devdata *dd, int rewrite)
 
        if (sbuf[0] || sbuf[1] || (piobcnt > 128 && (sbuf[2] || sbuf[3]))) {
                int i;
-               if (ipath_debug & (__IPATH_PKTDBG|__IPATH_DBG)) {
+               if (ipath_debug & (__IPATH_PKTDBG|__IPATH_DBG) &&
+                       dd->ipath_lastcancel > jiffies) {
                        __IPATH_DBG_WHICH(__IPATH_PKTDBG|__IPATH_DBG,
                                          "SendbufErrs %lx %lx", sbuf[0],
                                          sbuf[1]);
@@ -108,7 +109,8 @@ void ipath_disarm_senderrbufs(struct ipath_devdata *dd, int rewrite)
                                        ipath_clrpiobuf(dd, i);
                                ipath_disarm_piobufs(dd, i, 1);
                        }
-               dd->ipath_lastcancel = jiffies+3; /* no armlaunch for a bit */
+               /* ignore armlaunch errs for a bit */
+               dd->ipath_lastcancel = jiffies+3;
        }
 }
 
@@ -130,6 +132,17 @@ void ipath_disarm_senderrbufs(struct ipath_devdata *dd, int rewrite)
         INFINIPATH_E_SMINPKTLEN | INFINIPATH_E_SPKTLEN | \
         INFINIPATH_E_INVALIDADDR)
 
+/*
+ * this is similar to E_SUM_ERRS, but can't ignore armlaunch, don't ignore
+ * errors not related to freeze and cancelling buffers.  Can't ignore
+ * armlaunch because could get more while still cleaning up, and need
+ * to cancel those as they happen.
+ */
+#define E_SPKT_ERRS_IGNORE \
+        (INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_SDROPPEDSMPPKT | \
+        INFINIPATH_E_SMAXPKTLEN | INFINIPATH_E_SMINPKTLEN | \
+        INFINIPATH_E_SPKTLEN)
+
 /*
  * these are errors that can occur when the link changes state while
  * a packet is being sent or received.  This doesn't cover things
@@ -290,12 +303,7 @@ static void handle_e_ibstatuschanged(struct ipath_devdata *dd,
                 * Flush all queued sends when link went to DOWN or INIT,
                 * to be sure that they don't block SMA and other MAD packets
                 */
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-                                INFINIPATH_S_ABORT);
-               ipath_disarm_piobufs(dd, dd->ipath_lastport_piobuf,
-                                                       (unsigned)(dd->ipath_piobcnt2k +
-                                       dd->ipath_piobcnt4k) -
-                                       dd->ipath_lastport_piobuf);
+               ipath_cancel_sends(dd);
        }
        else if (lstate == IPATH_IBSTATE_INIT || lstate == IPATH_IBSTATE_ARM ||
            lstate == IPATH_IBSTATE_ACTIVE) {
@@ -505,6 +513,7 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
        int i, iserr = 0;
        int chkerrpkts = 0, noprint = 0;
        unsigned supp_msgs;
+       int log_idx;
 
        supp_msgs = handle_frequent_errors(dd, errs, msg, &noprint);
 
@@ -518,6 +527,13 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
        if (errs & INFINIPATH_E_HARDWARE) {
                /* reuse same msg buf */
                dd->ipath_f_handle_hwerrors(dd, msg, sizeof msg);
+       } else {
+               u64 mask;
+               for (log_idx = 0; log_idx < IPATH_EEP_LOG_CNT; ++log_idx) {
+                       mask = dd->ipath_eep_st_masks[log_idx].errs_to_log;
+                       if (errs & mask)
+                               ipath_inc_eeprom_err(dd, log_idx, 1);
+               }
        }
 
        if (!noprint && (errs & ~dd->ipath_e_bitsextant))
@@ -675,6 +691,17 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
                                        chkerrpkts = 1;
                                dd->ipath_lastrcvhdrqtails[i] = tl;
                                pd->port_hdrqfull++;
+                               if (test_bit(IPATH_PORT_WAITING_OVERFLOW,
+                                            &pd->port_flag)) {
+                                       clear_bit(
+                                         IPATH_PORT_WAITING_OVERFLOW,
+                                         &pd->port_flag);
+                                       set_bit(
+                                         IPATH_PORT_WAITING_OVERFLOW,
+                                         &pd->int_flag);
+                                       wake_up_interruptible(
+                                         &pd->port_wait);
+                               }
                        }
                }
        }
@@ -744,6 +771,72 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
        return chkerrpkts;
 }
 
+
+/*
+ * try to cleanup as much as possible for anything that might have gone
+ * wrong while in freeze mode, such as pio buffers being written by user
+ * processes (causing armlaunch), send errors due to going into freeze mode,
+ * etc., and try to avoid causing extra interrupts while doing so.
+ * Forcibly update the in-memory pioavail register copies after cleanup
+ * because the chip won't do it for anything changing while in freeze mode
+ * (we don't want to wait for the next pio buffer state change).
+ * Make sure that we don't lose any important interrupts by using the chip
+ * feature that says that writing 0 to a bit in *clear that is set in
+ * *status will cause an interrupt to be generated again (if allowed by
+ * the *mask value).
+ */
+void ipath_clear_freeze(struct ipath_devdata *dd)
+{
+       int i, im;
+       __le64 val;
+
+       /* disable error interrupts, to avoid confusion */
+       ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, 0ULL);
+
+       /*
+        * clear all sends, because they have may been
+        * completed by usercode while in freeze mode, and
+        * therefore would not be sent, and eventually
+        * might cause the process to run out of bufs
+        */
+       ipath_cancel_sends(dd);
+       ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
+                        dd->ipath_control);
+
+       /* ensure pio avail updates continue */
+       ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
+                dd->ipath_sendctrl & ~IPATH_S_PIOBUFAVAILUPD);
+       ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+       ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
+                dd->ipath_sendctrl);
+
+       /*
+        * We just enabled pioavailupdate, so dma copy is almost certainly
+        * not yet right, so read the registers directly.  Similar to init
+        */
+       for (i = 0; i < dd->ipath_pioavregs; i++) {
+               /* deal with 6110 chip bug */
+               im = i > 3 ? ((i&1) ? i-1 : i+1) : i;
+               val = ipath_read_kreg64(dd, 0x1000+(im*sizeof(u64)));
+               dd->ipath_pioavailregs_dma[i] = dd->ipath_pioavailshadow[i]
+                       = le64_to_cpu(val);
+       }
+
+       /*
+        * force new interrupt if any hwerr, error or interrupt bits are
+        * still set, and clear "safe" send packet errors related to freeze
+        * and cancelling sends.  Re-enable error interrupts before possible
+        * force of re-interrupt on pending interrupts.
+        */
+       ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, 0ULL);
+       ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear,
+               E_SPKT_ERRS_IGNORE);
+       ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
+               ~dd->ipath_maskederrs);
+       ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, 0ULL);
+}
+
+
 /* this is separate to allow for better optimization of ipath_intr() */
 
 static void ipath_bad_intr(struct ipath_devdata *dd, u32 * unexpectp)
@@ -872,14 +965,25 @@ static void handle_urcv(struct ipath_devdata *dd, u32 istat)
                   dd->ipath_i_rcvurg_mask);
        for (i = 1; i < dd->ipath_cfgports; i++) {
                struct ipath_portdata *pd = dd->ipath_pd[i];
-               if (portr & (1 << i) && pd && pd->port_cnt &&
-                       test_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag)) {
-                       clear_bit(IPATH_PORT_WAITING_RCV,
-                                 &pd->port_flag);
-                       clear_bit(i + INFINIPATH_R_INTRAVAIL_SHIFT,
-                                 &dd->ipath_rcvctrl);
-                       wake_up_interruptible(&pd->port_wait);
-                       rcvdint = 1;
+               if (portr & (1 << i) && pd && pd->port_cnt) {
+                       if (test_bit(IPATH_PORT_WAITING_RCV,
+                                    &pd->port_flag)) {
+                               clear_bit(IPATH_PORT_WAITING_RCV,
+                                         &pd->port_flag);
+                               set_bit(IPATH_PORT_WAITING_RCV,
+                                       &pd->int_flag);
+                               clear_bit(i + INFINIPATH_R_INTRAVAIL_SHIFT,
+                                         &dd->ipath_rcvctrl);
+                               wake_up_interruptible(&pd->port_wait);
+                               rcvdint = 1;
+                       } else if (test_bit(IPATH_PORT_WAITING_URG,
+                                           &pd->port_flag)) {
+                               clear_bit(IPATH_PORT_WAITING_URG,
+                                         &pd->port_flag);
+                               set_bit(IPATH_PORT_WAITING_URG,
+                                       &pd->int_flag);
+                               wake_up_interruptible(&pd->port_wait);
+                       }
                }
        }
        if (rcvdint) {
@@ -905,6 +1009,9 @@ irqreturn_t ipath_intr(int irq, void *data)
 
        ipath_stats.sps_ints++;
 
+       if (dd->ipath_int_counter != (u32) -1)
+               dd->ipath_int_counter++;
+
        if (!(dd->ipath_flags & IPATH_PRESENT)) {
                /*
                 * This return value is not great, but we do not want the
index 12194f3..3105005 100644 (file)
@@ -1,7 +1,7 @@
 #ifndef _IPATH_KERNEL_H
 #define _IPATH_KERNEL_H
 /*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
 extern struct infinipath_stats ipath_stats;
 
 #define IPATH_CHIP_SWVERSION IPATH_CHIP_VERS_MAJ
+/*
+ * First-cut critierion for "device is active" is
+ * two thousand dwords combined Tx, Rx traffic per
+ * 5-second interval. SMA packets are 64 dwords,
+ * and occur "a few per second", presumably each way.
+ */
+#define IPATH_TRAFFIC_ACTIVE_THRESHOLD (2000)
+/*
+ * Struct used to indicate which errors are logged in each of the
+ * error-counters that are logged to EEPROM. A counter is incremented
+ * _once_ (saturating at 255) for each event with any bits set in
+ * the error or hwerror register masks below.
+ */
+#define IPATH_EEP_LOG_CNT (4)
+struct ipath_eep_log_mask {
+       u64 errs_to_log;
+       u64 hwerrs_to_log;
+};
 
 struct ipath_portdata {
        void **port_rcvegrbuf;
@@ -109,6 +127,8 @@ struct ipath_portdata {
        u32 port_tidcursor;
        /* next expected TID to check */
        unsigned long port_flag;
+       /* what happened */
+       unsigned long int_flag;
        /* WAIT_RCV that timed out, no interrupt */
        u32 port_rcvwait_to;
        /* WAIT_PIO that timed out, no interrupt */
@@ -137,6 +157,8 @@ struct ipath_portdata {
        u32 userversion;
        /* Bitmask of active slaves */
        u32 active_slaves;
+       /* Type of packets or conditions we want to poll for */
+       u16 poll_type;
 };
 
 struct sk_buff;
@@ -275,6 +297,8 @@ struct ipath_devdata {
        u32 ipath_lastport_piobuf;
        /* is a stats timer active */
        u32 ipath_stats_timer_active;
+       /* number of interrupts for this device -- saturates... */
+       u32 ipath_int_counter;
        /* dwords sent read from counter */
        u32 ipath_lastsword;
        /* dwords received read from counter */
@@ -369,9 +393,6 @@ struct ipath_devdata {
        struct class_device *diag_class_dev;
        /* timer used to prevent stats overflow, error throttling, etc. */
        struct timer_list ipath_stats_timer;
-       /* check for stale messages in rcv queue */
-       /* only allow one intr at a time. */
-       unsigned long ipath_rcv_pending;
        void *ipath_dummy_hdrq; /* used after port close */
        dma_addr_t ipath_dummy_hdrq_phys;
 
@@ -399,6 +420,8 @@ struct ipath_devdata {
        u64 ipath_gpio_out;
        /* shadow the gpio mask register */
        u64 ipath_gpio_mask;
+       /* shadow the gpio output enable, etc... */
+       u64 ipath_extctrl;
        /* kr_revision shadow */
        u64 ipath_revision;
        /*
@@ -473,8 +496,6 @@ struct ipath_devdata {
        u32 ipath_cregbase;
        /* shadow the control register contents */
        u32 ipath_control;
-       /* shadow the gpio output contents */
-       u32 ipath_extctrl;
        /* PCI revision register (HTC rev on FPGA) */
        u32 ipath_pcirev;
 
@@ -552,6 +573,9 @@ struct ipath_devdata {
        u32 ipath_overrun_thresh_errs;
        u32 ipath_lli_errs;
 
+       /* status check work */
+       struct delayed_work status_work;
+
        /*
         * Not all devices managed by a driver instance are the same
         * type, so these fields must be per-device.
@@ -575,6 +599,37 @@ struct ipath_devdata {
        u16 ipath_gpio_scl_num;
        u64 ipath_gpio_sda;
        u64 ipath_gpio_scl;
+
+       /* lock for doing RMW of shadows/regs for ExtCtrl and GPIO */
+       spinlock_t ipath_gpio_lock;
+
+       /* used to override LED behavior */
+       u8 ipath_led_override;  /* Substituted for normal value, if non-zero */
+       u16 ipath_led_override_timeoff; /* delta to next timer event */
+       u8 ipath_led_override_vals[2]; /* Alternates per blink-frame */
+       u8 ipath_led_override_phase; /* Just counts, LSB picks from vals[] */
+       atomic_t ipath_led_override_timer_active;
+       /* Used to flash LEDs in override mode */
+       struct timer_list ipath_led_override_timer;
+
+       /* Support (including locks) for EEPROM logging of errors and time */
+       /* control access to actual counters, timer */
+       spinlock_t ipath_eep_st_lock;
+       /* control high-level access to EEPROM */
+       struct semaphore ipath_eep_sem;
+       /* Below inc'd by ipath_snap_cntrs(), locked by ipath_eep_st_lock */
+       uint64_t ipath_traffic_wds;
+       /* active time is kept in seconds, but logged in hours */
+       atomic_t ipath_active_time;
+       /* Below are nominal shadow of EEPROM, new since last EEPROM update */
+       uint8_t ipath_eep_st_errs[IPATH_EEP_LOG_CNT];
+       uint8_t ipath_eep_st_new_errs[IPATH_EEP_LOG_CNT];
+       uint16_t ipath_eep_hrs;
+       /*
+        * masks for which bits of errs, hwerrs that cause
+        * each of the counters to increment.
+        */
+       struct ipath_eep_log_mask ipath_eep_st_masks[IPATH_EEP_LOG_CNT];
 };
 
 /* Private data for file operations */
@@ -592,6 +647,7 @@ int ipath_enable_wc(struct ipath_devdata *dd);
 void ipath_disable_wc(struct ipath_devdata *dd);
 int ipath_count_units(int *npresentp, int *nupp, u32 *maxportsp);
 void ipath_shutdown_device(struct ipath_devdata *);
+void ipath_clear_freeze(struct ipath_devdata *);
 
 struct file_operations;
 int ipath_cdev_init(int minor, char *name, const struct file_operations *fops,
@@ -627,6 +683,7 @@ int ipath_unordered_wc(void);
 
 void ipath_disarm_piobufs(struct ipath_devdata *, unsigned first,
                          unsigned cnt);
+void ipath_cancel_sends(struct ipath_devdata *);
 
 int ipath_create_rcvhdrq(struct ipath_devdata *, struct ipath_portdata *);
 void ipath_free_pddata(struct ipath_devdata *, struct ipath_portdata *);
@@ -685,7 +742,6 @@ int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv);
                 * are 64bit */
 #define IPATH_32BITCOUNTERS 0x20000
                /* can miss port0 rx interrupts */
-#define IPATH_POLL_RX_INTR  0x40000
 #define IPATH_DISABLED      0x80000 /* administratively disabled */
                /* Use GPIO interrupts for new counters */
 #define IPATH_GPIO_ERRINTRS 0x100000
@@ -704,6 +760,10 @@ int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv);
 #define IPATH_PORT_WAITING_PIO   3
                /* master has not finished initializing */
 #define IPATH_PORT_MASTER_UNINIT 4
+               /* waiting for an urgent packet to arrive */
+#define IPATH_PORT_WAITING_URG 5
+               /* waiting for a header overflow */
+#define IPATH_PORT_WAITING_OVERFLOW 6
 
 /* free up any allocated data at closes */
 void ipath_free_data(struct ipath_portdata *dd);
@@ -713,9 +773,20 @@ u32 __iomem *ipath_getpiobuf(struct ipath_devdata *, u32 *);
 void ipath_init_iba6120_funcs(struct ipath_devdata *);
 void ipath_init_iba6110_funcs(struct ipath_devdata *);
 void ipath_get_eeprom_info(struct ipath_devdata *);
+int ipath_update_eeprom_log(struct ipath_devdata *dd);
+void ipath_inc_eeprom_err(struct ipath_devdata *dd, u32 eidx, u32 incr);
 u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg);
 void ipath_disarm_senderrbufs(struct ipath_devdata *, int);
 
+/*
+ * Set LED override, only the two LSBs have "public" meaning, but
+ * any non-zero value substitutes them for the Link and LinkTrain
+ * LED states.
+ */
+#define IPATH_LED_PHYS 1 /* Physical (linktraining) GREEN LED */
+#define IPATH_LED_LOG 2  /* Logical (link) YELLOW LED */
+void ipath_set_led_override(struct ipath_devdata *dd, unsigned int val);
+
 /*
  * number of words used for protocol header if not set by ipath_userinit();
  */
index dd487c1..85a4aef 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
  * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
index 05a1d2b..82616b7 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
index 3854a4e..415709c 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
index 25908b0..d61c030 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
  * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -103,7 +103,7 @@ static int recv_subn_get_nodeinfo(struct ib_smp *smp,
        /* This is already in network order */
        nip->sys_guid = to_idev(ibdev)->sys_image_guid;
        nip->node_guid = dd->ipath_guid;
-       nip->port_guid = nip->sys_guid;
+       nip->port_guid = dd->ipath_guid;
        nip->partition_cap = cpu_to_be16(ipath_get_npkeys(dd));
        nip->device_id = cpu_to_be16(dd->ipath_deviceid);
        majrev = dd->ipath_majrev;
@@ -292,7 +292,12 @@ static int recv_subn_get_portinfo(struct ib_smp *smp,
        /* pip->vl_arb_high_cap; // only one VL */
        /* pip->vl_arb_low_cap; // only one VL */
        /* InitTypeReply = 0 */
-       pip->inittypereply_mtucap = IB_MTU_4096;
+       /*
+        * Note: the chips support a maximum MTU of 4096, but the driver
+        * hasn't implemented this feature yet, so set the maximum value
+        * to 2048.
+        */
+       pip->inittypereply_mtucap = IB_MTU_2048;
        // HCAs ignore VLStallCount and HOQLife
        /* pip->vlstallcnt_hoqlife; */
        pip->operationalvl_pei_peo_fpi_fpo = 0x10;      /* OVLs = 1 */
index 937bc33..fa830e2 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
index bdeef8d..e442470 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
  * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
index bfef08e..1324b35 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
  * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -336,7 +336,7 @@ static void ipath_reset_qp(struct ipath_qp *qp)
        qp->qkey = 0;
        qp->qp_access_flags = 0;
        qp->s_busy = 0;
-       qp->s_flags &= ~IPATH_S_SIGNAL_REQ_WR;
+       qp->s_flags &= IPATH_S_SIGNAL_REQ_WR;
        qp->s_hdrwords = 0;
        qp->s_psn = 0;
        qp->r_psn = 0;
@@ -507,16 +507,13 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
                    attr->port_num > ibqp->device->phys_port_cnt)
                        goto inval;
 
+       /*
+        * Note: the chips support a maximum MTU of 4096, but the driver
+        * hasn't implemented this feature yet, so don't allow Path MTU
+        * values greater than 2048.
+        */
        if (attr_mask & IB_QP_PATH_MTU)
-               if (attr->path_mtu > IB_MTU_4096)
-                       goto inval;
-
-       if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
-               if (attr->max_dest_rd_atomic > 1)
-                       goto inval;
-
-       if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC)
-               if (attr->max_rd_atomic > 1)
+               if (attr->path_mtu > IB_MTU_2048)
                        goto inval;
 
        if (attr_mask & IB_QP_PATH_MIG_STATE)
index 1915771..46744ea 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
  * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -125,8 +125,10 @@ static int ipath_make_rc_ack(struct ipath_qp *qp,
                        if (len > pmtu) {
                                len = pmtu;
                                qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST);
-                       } else
+                       } else {
                                qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY);
+                               e->sent = 1;
+                       }
                        ohdr->u.aeth = ipath_compute_aeth(qp);
                        hwords++;
                        qp->s_ack_rdma_psn = e->psn;
@@ -143,6 +145,7 @@ static int ipath_make_rc_ack(struct ipath_qp *qp,
                                cpu_to_be32(e->atomic_data);
                        hwords += sizeof(ohdr->u.at) / sizeof(u32);
                        bth2 = e->psn;
+                       e->sent = 1;
                }
                bth0 = qp->s_ack_state << 24;
                break;
@@ -158,6 +161,7 @@ static int ipath_make_rc_ack(struct ipath_qp *qp,
                        ohdr->u.aeth = ipath_compute_aeth(qp);
                        hwords++;
                        qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
+                       qp->s_ack_queue[qp->s_tail_ack_queue].sent = 1;
                }
                bth0 = qp->s_ack_state << 24;
                bth2 = qp->s_ack_rdma_psn++ & IPATH_PSN_MASK;
@@ -188,7 +192,7 @@ static int ipath_make_rc_ack(struct ipath_qp *qp,
        }
        qp->s_hdrwords = hwords;
        qp->s_cur_size = len;
-       *bth0p = bth0;
+       *bth0p = bth0 | (1 << 22); /* Set M bit */
        *bth2p = bth2;
        return 1;
 
@@ -240,7 +244,7 @@ int ipath_make_rc_req(struct ipath_qp *qp,
 
        /* header size in 32-bit words LRH+BTH = (8+12)/4. */
        hwords = 5;
-       bth0 = 0;
+       bth0 = 1 << 22; /* Set M bit */
 
        /* Send a request. */
        wqe = get_swqe_ptr(qp, qp->s_cur);
@@ -604,7 +608,7 @@ static void send_rc_ack(struct ipath_qp *qp)
        }
        /* read pkey_index w/o lock (its atomic) */
        bth0 = ipath_get_pkey(dev->dd, qp->s_pkey_index) |
-               OP(ACKNOWLEDGE) << 24;
+               (OP(ACKNOWLEDGE) << 24) | (1 << 22);
        if (qp->r_nak_state)
                ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) |
                                            (qp->r_nak_state <<
@@ -806,13 +810,15 @@ static inline void update_last_psn(struct ipath_qp *qp, u32 psn)
  * Called at interrupt level with the QP s_lock held and interrupts disabled.
  * Returns 1 if OK, 0 if current operation should be aborted (NAK).
  */
-static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
+static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,
+                    u64 val)
 {
        struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
        struct ib_wc wc;
        struct ipath_swqe *wqe;
        int ret = 0;
        u32 ack_psn;
+       int diff;
 
        /*
         * Remove the QP from the timeout queue (or RNR timeout queue).
@@ -840,7 +846,19 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
         * The MSN might be for a later WQE than the PSN indicates so
         * only complete WQEs that the PSN finishes.
         */
-       while (ipath_cmp24(ack_psn, wqe->lpsn) >= 0) {
+       while ((diff = ipath_cmp24(ack_psn, wqe->lpsn)) >= 0) {
+               /*
+                * RDMA_READ_RESPONSE_ONLY is a special case since
+                * we want to generate completion events for everything
+                * before the RDMA read, copy the data, then generate
+                * the completion for the read.
+                */
+               if (wqe->wr.opcode == IB_WR_RDMA_READ &&
+                   opcode == OP(RDMA_READ_RESPONSE_ONLY) &&
+                   diff == 0) {
+                       ret = 1;
+                       goto bail;
+               }
                /*
                 * If this request is a RDMA read or atomic, and the ACK is
                 * for a later operation, this ACK NAKs the RDMA read or
@@ -851,12 +869,10 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
                 * is sent but before the response is received.
                 */
                if ((wqe->wr.opcode == IB_WR_RDMA_READ &&
-                    (opcode != OP(RDMA_READ_RESPONSE_LAST) ||
-                     ipath_cmp24(ack_psn, wqe->lpsn) != 0)) ||
+                    (opcode != OP(RDMA_READ_RESPONSE_LAST) || diff != 0)) ||
                    ((wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
                      wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) &&
-                    (opcode != OP(ATOMIC_ACKNOWLEDGE) ||
-                     ipath_cmp24(wqe->psn, psn) != 0))) {
+                    (opcode != OP(ATOMIC_ACKNOWLEDGE) || diff != 0))) {
                        /*
                         * The last valid PSN seen is the previous
                         * request's.
@@ -870,6 +886,9 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
                         */
                        goto bail;
                }
+               if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
+                   wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
+                       *(u64 *) wqe->sg_list[0].vaddr = val;
                if (qp->s_num_rd_atomic &&
                    (wqe->wr.opcode == IB_WR_RDMA_READ ||
                     wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
@@ -1079,6 +1098,7 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
        int diff;
        u32 pad;
        u32 aeth;
+       u64 val;
 
        spin_lock_irqsave(&qp->s_lock, flags);
 
@@ -1118,8 +1138,6 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
                        data += sizeof(__be32);
                }
                if (opcode == OP(ATOMIC_ACKNOWLEDGE)) {
-                       u64 val;
-
                        if (!header_in_data) {
                                __be32 *p = ohdr->u.at.atomic_ack_eth;
 
@@ -1127,12 +1145,13 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
                                        be32_to_cpu(p[1]);
                        } else
                                val = be64_to_cpu(((__be64 *) data)[0]);
-                       *(u64 *) wqe->sg_list[0].vaddr = val;
-               }
-               if (!do_rc_ack(qp, aeth, psn, opcode) ||
+               } else
+                       val = 0;
+               if (!do_rc_ack(qp, aeth, psn, opcode, val) ||
                    opcode != OP(RDMA_READ_RESPONSE_FIRST))
                        goto ack_done;
                hdrsize += 4;
+               wqe = get_swqe_ptr(qp, qp->s_last);
                if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
                        goto ack_op_err;
                /*
@@ -1176,13 +1195,12 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
                goto bail;
 
        case OP(RDMA_READ_RESPONSE_ONLY):
-               if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) {
-                       dev->n_rdma_seq++;
-                       ipath_restart_rc(qp, qp->s_last_psn + 1, &wc);
+               if (!header_in_data)
+                       aeth = be32_to_cpu(ohdr->u.aeth);
+               else
+                       aeth = be32_to_cpu(((__be32 *) data)[0]);
+               if (!do_rc_ack(qp, aeth, psn, opcode, 0))
                        goto ack_done;
-               }
-               if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
-                       goto ack_op_err;
                /* Get the number of bytes the message was padded by. */
                pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
                /*
@@ -1197,6 +1215,7 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
                 * have to be careful to copy the data to the right
                 * location.
                 */
+               wqe = get_swqe_ptr(qp, qp->s_last);
                qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
                                                  wqe, psn, pmtu);
                goto read_last;
@@ -1230,7 +1249,8 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
                        data += sizeof(__be32);
                }
                ipath_copy_sge(&qp->s_rdma_read_sge, data, tlen);
-               (void) do_rc_ack(qp, aeth, psn, OP(RDMA_READ_RESPONSE_LAST));
+               (void) do_rc_ack(qp, aeth, psn,
+                                OP(RDMA_READ_RESPONSE_LAST), 0);
                goto ack_done;
        }
 
@@ -1344,8 +1364,11 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
                        e = NULL;
                        break;
                }
-               if (ipath_cmp24(psn, e->psn) >= 0)
+               if (ipath_cmp24(psn, e->psn) >= 0) {
+                       if (prev == qp->s_tail_ack_queue)
+                               old_req = 0;
                        break;
+               }
        }
        switch (opcode) {
        case OP(RDMA_READ_REQUEST): {
@@ -1460,6 +1483,22 @@ static void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err)
        spin_unlock_irqrestore(&qp->s_lock, flags);
 }
 
+static inline void ipath_update_ack_queue(struct ipath_qp *qp, unsigned n)
+{
+       unsigned long flags;
+       unsigned next;
+
+       next = n + 1;
+       if (next > IPATH_MAX_RDMA_ATOMIC)
+               next = 0;
+       spin_lock_irqsave(&qp->s_lock, flags);
+       if (n == qp->s_tail_ack_queue) {
+               qp->s_tail_ack_queue = next;
+               qp->s_ack_state = OP(ACKNOWLEDGE);
+       }
+       spin_unlock_irqrestore(&qp->s_lock, flags);
+}
+
 /**
  * ipath_rc_rcv - process an incoming RC packet
  * @dev: the device this packet came in on
@@ -1672,6 +1711,9 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
        case OP(RDMA_WRITE_FIRST):
        case OP(RDMA_WRITE_ONLY):
        case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE):
+               if (unlikely(!(qp->qp_access_flags &
+                              IB_ACCESS_REMOTE_WRITE)))
+                       goto nack_inv;
                /* consume RWQE */
                /* RETH comes after BTH */
                if (!header_in_data)
@@ -1701,9 +1743,6 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
                        qp->r_sge.sge.length = 0;
                        qp->r_sge.sge.sge_length = 0;
                }
-               if (unlikely(!(qp->qp_access_flags &
-                              IB_ACCESS_REMOTE_WRITE)))
-                       goto nack_acc;
                if (opcode == OP(RDMA_WRITE_FIRST))
                        goto send_middle;
                else if (opcode == OP(RDMA_WRITE_ONLY))
@@ -1717,13 +1756,17 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
                u32 len;
                u8 next;
 
-               if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
-                       goto nack_acc;
+               if (unlikely(!(qp->qp_access_flags &
+                              IB_ACCESS_REMOTE_READ)))
+                       goto nack_inv;
                next = qp->r_head_ack_queue + 1;
                if (next > IPATH_MAX_RDMA_ATOMIC)
                        next = 0;
-               if (unlikely(next == qp->s_tail_ack_queue))
-                       goto nack_inv;
+               if (unlikely(next == qp->s_tail_ack_queue)) {
+                       if (!qp->s_ack_queue[next].sent)
+                               goto nack_inv;
+                       ipath_update_ack_queue(qp, next);
+               }
                e = &qp->s_ack_queue[qp->r_head_ack_queue];
                /* RETH comes after BTH */
                if (!header_in_data)
@@ -1758,6 +1801,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
                        e->rdma_sge.sge.sge_length = 0;
                }
                e->opcode = opcode;
+               e->sent = 0;
                e->psn = psn;
                /*
                 * We need to increment the MSN here instead of when we
@@ -1789,12 +1833,15 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
 
                if (unlikely(!(qp->qp_access_flags &
                               IB_ACCESS_REMOTE_ATOMIC)))
-                       goto nack_acc;
+                       goto nack_inv;
                next = qp->r_head_ack_queue + 1;
                if (next > IPATH_MAX_RDMA_ATOMIC)
                        next = 0;
-               if (unlikely(next == qp->s_tail_ack_queue))
-                       goto nack_inv;
+               if (unlikely(next == qp->s_tail_ack_queue)) {
+                       if (!qp->s_ack_queue[next].sent)
+                               goto nack_inv;
+                       ipath_update_ack_queue(qp, next);
+               }
                if (!header_in_data)
                        ateth = &ohdr->u.atomic_eth;
                else
@@ -1819,6 +1866,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
                                      be64_to_cpu(ateth->compare_data),
                                      sdata);
                e->opcode = opcode;
+               e->sent = 0;
                e->psn = psn & IPATH_PSN_MASK;
                qp->r_msn++;
                qp->r_psn++;
index c182bcd..708eba3 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
index d9c2a9b..8525674 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
  * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -194,6 +194,8 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
                        ret = 0;
                        goto bail;
                }
+               /* Make sure entry is read after head index is read. */
+               smp_rmb();
                wqe = get_rwqe_ptr(rq, tail);
                if (++tail >= rq->size)
                        tail = 0;
@@ -267,7 +269,7 @@ again:
        spin_lock_irqsave(&sqp->s_lock, flags);
 
        if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_SEND_OK) ||
-           qp->s_rnr_timeout) {
+           sqp->s_rnr_timeout) {
                spin_unlock_irqrestore(&sqp->s_lock, flags);
                goto done;
        }
@@ -319,12 +321,22 @@ again:
                break;
 
        case IB_WR_RDMA_WRITE_WITH_IMM:
+               if (unlikely(!(qp->qp_access_flags &
+                              IB_ACCESS_REMOTE_WRITE))) {
+                       wc.status = IB_WC_REM_INV_REQ_ERR;
+                       goto err;
+               }
                wc.wc_flags = IB_WC_WITH_IMM;
                wc.imm_data = wqe->wr.imm_data;
                if (!ipath_get_rwqe(qp, 1))
                        goto rnr_nak;
                /* FALLTHROUGH */
        case IB_WR_RDMA_WRITE:
+               if (unlikely(!(qp->qp_access_flags &
+                              IB_ACCESS_REMOTE_WRITE))) {
+                       wc.status = IB_WC_REM_INV_REQ_ERR;
+                       goto err;
+               }
                if (wqe->length == 0)
                        break;
                if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, wqe->length,
@@ -354,8 +366,10 @@ again:
 
        case IB_WR_RDMA_READ:
                if (unlikely(!(qp->qp_access_flags &
-                              IB_ACCESS_REMOTE_READ)))
-                       goto acc_err;
+                              IB_ACCESS_REMOTE_READ))) {
+                       wc.status = IB_WC_REM_INV_REQ_ERR;
+                       goto err;
+               }
                if (unlikely(!ipath_rkey_ok(qp, &sqp->s_sge, wqe->length,
                                            wqe->wr.wr.rdma.remote_addr,
                                            wqe->wr.wr.rdma.rkey,
@@ -369,8 +383,10 @@ again:
        case IB_WR_ATOMIC_CMP_AND_SWP:
        case IB_WR_ATOMIC_FETCH_AND_ADD:
                if (unlikely(!(qp->qp_access_flags &
-                              IB_ACCESS_REMOTE_ATOMIC)))
-                       goto acc_err;
+                              IB_ACCESS_REMOTE_ATOMIC))) {
+                       wc.status = IB_WC_REM_INV_REQ_ERR;
+                       goto err;
+               }
                if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64),
                                            wqe->wr.wr.atomic.remote_addr,
                                            wqe->wr.wr.atomic.rkey,
@@ -396,6 +412,8 @@ again:
 
                if (len > sge->length)
                        len = sge->length;
+               if (len > sge->sge_length)
+                       len = sge->sge_length;
                BUG_ON(len == 0);
                ipath_copy_sge(&qp->r_sge, sge->vaddr, len);
                sge->vaddr += len;
@@ -503,11 +521,9 @@ void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev)
         * could be called.  If we are still in the tasklet function,
         * tasklet_hi_schedule() will not call us until the next time
         * tasklet_hi_schedule() is called.
-        * We clear the tasklet flag now since we are committing to return
-        * from the tasklet function.
+        * We leave the busy flag set so that another post send doesn't
+        * try to put the same QP on the piowait list again.
         */
-       clear_bit(IPATH_S_BUSY, &qp->s_busy);
-       tasklet_unlock(&qp->s_task);
        want_buffer(dev->dd);
        dev->n_piowait++;
 }
index 03acae6..40c36ec 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
  * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -80,6 +80,8 @@ int ipath_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
                wqe->num_sge = wr->num_sge;
                for (i = 0; i < wr->num_sge; i++)
                        wqe->sg_list[i] = wr->sg_list[i];
+               /* Make sure queue entry is written before the head index. */
+               smp_wmb();
                wq->head = next;
                spin_unlock_irqrestore(&srq->rq.lock, flags);
        }
index d8b5e4c..73ed17d 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -55,6 +55,7 @@ u64 ipath_snap_cntr(struct ipath_devdata *dd, ipath_creg creg)
        u64 val64;
        unsigned long t0, t1;
        u64 ret;
+       unsigned long flags;
 
        t0 = jiffies;
        /* If fast increment counters are only 32 bits, snapshot them,
@@ -91,12 +92,18 @@ u64 ipath_snap_cntr(struct ipath_devdata *dd, ipath_creg creg)
        if (creg == dd->ipath_cregs->cr_wordsendcnt) {
                if (val != dd->ipath_lastsword) {
                        dd->ipath_sword += val - dd->ipath_lastsword;
+                       spin_lock_irqsave(&dd->ipath_eep_st_lock, flags);
+                       dd->ipath_traffic_wds += val - dd->ipath_lastsword;
+                       spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
                        dd->ipath_lastsword = val;
                }
                val64 = dd->ipath_sword;
        } else if (creg == dd->ipath_cregs->cr_wordrcvcnt) {
                if (val != dd->ipath_lastrword) {
                        dd->ipath_rword += val - dd->ipath_lastrword;
+                       spin_lock_irqsave(&dd->ipath_eep_st_lock, flags);
+                       dd->ipath_traffic_wds += val - dd->ipath_lastrword;
+                       spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
                        dd->ipath_lastrword = val;
                }
                val64 = dd->ipath_rword;
@@ -200,6 +207,7 @@ void ipath_get_faststats(unsigned long opaque)
        struct ipath_devdata *dd = (struct ipath_devdata *) opaque;
        u32 val;
        static unsigned cnt;
+       unsigned long flags;
 
        /*
         * don't access the chip while running diags, or memory diags can
@@ -210,9 +218,20 @@ void ipath_get_faststats(unsigned long opaque)
                /* but re-arm the timer, for diags case; won't hurt other */
                goto done;
 
+       /*
+        * We now try to maintain a "active timer", based on traffic
+        * exceeding a threshold, so we need to check the word-counts
+        * even if they are 64-bit.
+        */
+       ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt);
+       ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
+       spin_lock_irqsave(&dd->ipath_eep_st_lock, flags);
+       if (dd->ipath_traffic_wds  >= IPATH_TRAFFIC_ACTIVE_THRESHOLD)
+               atomic_add(5, &dd->ipath_active_time); /* S/B #define */
+       dd->ipath_traffic_wds = 0;
+       spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
+
        if (dd->ipath_flags & IPATH_32BITCOUNTERS) {
-               ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt);
-               ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
                ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
                ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
        }
index 4dc398d..16238cd 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
  * Copyright (c) 2006 PathScale, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -596,6 +596,43 @@ bail:
        return ret;
 }
 
+static ssize_t store_led_override(struct device *dev,
+                         struct device_attribute *attr,
+                         const char *buf,
+                         size_t count)
+{
+       struct ipath_devdata *dd = dev_get_drvdata(dev);
+       int ret;
+       u16 val;
+
+       ret = ipath_parse_ushort(buf, &val);
+       if (ret > 0)
+               ipath_set_led_override(dd, val);
+       else
+               ipath_dev_err(dd, "attempt to set invalid LED override\n");
+       return ret;
+}
+
+static ssize_t show_logged_errs(struct device *dev,
+                               struct device_attribute *attr,
+                               char *buf)
+{
+       struct ipath_devdata *dd = dev_get_drvdata(dev);
+       int idx, count;
+
+       /* force consistency with actual EEPROM */
+       if (ipath_update_eeprom_log(dd) != 0)
+               return -ENXIO;
+
+       count = 0;
+       for (idx = 0; idx < IPATH_EEP_LOG_CNT; ++idx) {
+               count += scnprintf(buf + count, PAGE_SIZE - count, "%d%c",
+                       dd->ipath_eep_st_errs[idx],
+                       idx == (IPATH_EEP_LOG_CNT - 1) ? '\n' : ' ');
+       }
+
+       return count;
+}
 
 static DRIVER_ATTR(num_units, S_IRUGO, show_num_units, NULL);
 static DRIVER_ATTR(version, S_IRUGO, show_version, NULL);
@@ -625,6 +662,8 @@ static DEVICE_ATTR(status_str, S_IRUGO, show_status_str, NULL);
 static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL);
 static DEVICE_ATTR(unit, S_IRUGO, show_unit, NULL);
 static DEVICE_ATTR(rx_pol_inv, S_IWUSR, NULL, store_rx_pol_inv);
+static DEVICE_ATTR(led_override, S_IWUSR, NULL, store_led_override);
+static DEVICE_ATTR(logged_errors, S_IRUGO, show_logged_errs, NULL);
 
 static struct attribute *dev_attributes[] = {
        &dev_attr_guid.attr,
@@ -641,6 +680,8 @@ static struct attribute *dev_attributes[] = {
        &dev_attr_unit.attr,
        &dev_attr_enabled.attr,
        &dev_attr_rx_pol_inv.attr,
+       &dev_attr_led_override.attr,
+       &dev_attr_logged_errors.attr,
        NULL
 };
 
index 1c2b03c..8380fbc 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
  * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -58,7 +58,6 @@ static void complete_last_send(struct ipath_qp *qp, struct ipath_swqe *wqe,
                wc->port_num = 0;
                ipath_cq_enter(to_icq(qp->ibqp.send_cq), wc, 0);
        }
-       wqe = get_swqe_ptr(qp, qp->s_last);
 }
 
 /**
@@ -87,7 +86,7 @@ int ipath_make_uc_req(struct ipath_qp *qp,
 
        /* header size in 32-bit words LRH+BTH = (8+12)/4. */
        hwords = 5;
-       bth0 = 0;
+       bth0 = 1 << 22; /* Set M bit */
 
        /* Get the next send request. */
        wqe = get_swqe_ptr(qp, qp->s_last);
@@ -97,8 +96,10 @@ int ipath_make_uc_req(struct ipath_qp *qp,
                 * Signal the completion of the last send
                 * (if there is one).
                 */
-               if (qp->s_last != qp->s_tail)
+               if (qp->s_last != qp->s_tail) {
                        complete_last_send(qp, wqe, &wc);
+                       wqe = get_swqe_ptr(qp, qp->s_last);
+               }
 
                /* Check if send work queue is empty. */
                if (qp->s_tail == qp->s_head)
index a518f7c..f9a3338 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
  * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -176,6 +176,8 @@ static void ipath_ud_loopback(struct ipath_qp *sqp,
                        dev->n_pkt_drops++;
                        goto bail_sge;
                }
+               /* Make sure entry is read after head index is read. */
+               smp_rmb();
                wqe = get_rwqe_ptr(rq, tail);
                if (++tail >= rq->size)
                        tail = 0;
@@ -231,6 +233,8 @@ static void ipath_ud_loopback(struct ipath_qp *sqp,
 
                if (len > length)
                        len = length;
+               if (len > sge->sge_length)
+                       len = sge->sge_length;
                BUG_ON(len == 0);
                ipath_copy_sge(&rsge, sge->vaddr, len);
                sge->vaddr += len;
index 8536aeb..27034d3 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
index bb70845..65f7181 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
  * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -164,9 +164,11 @@ void ipath_copy_sge(struct ipath_sge_state *ss, void *data, u32 length)
        while (length) {
                u32 len = sge->length;
 
-               BUG_ON(len == 0);
                if (len > length)
                        len = length;
+               if (len > sge->sge_length)
+                       len = sge->sge_length;
+               BUG_ON(len == 0);
                memcpy(sge->vaddr, data, len);
                sge->vaddr += len;
                sge->length -= len;
@@ -202,9 +204,11 @@ void ipath_skip_sge(struct ipath_sge_state *ss, u32 length)
        while (length) {
                u32 len = sge->length;
 
-               BUG_ON(len == 0);
                if (len > length)
                        len = length;
+               if (len > sge->sge_length)
+                       len = sge->sge_length;
+               BUG_ON(len == 0);
                sge->vaddr += len;
                sge->length -= len;
                sge->sge_length -= len;
@@ -323,6 +327,8 @@ static int ipath_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
                wqe->num_sge = wr->num_sge;
                for (i = 0; i < wr->num_sge; i++)
                        wqe->sg_list[i] = wr->sg_list[i];
+               /* Make sure queue entry is written before the head index. */
+               smp_wmb();
                wq->head = next;
                spin_unlock_irqrestore(&qp->r_rq.lock, flags);
        }
@@ -948,6 +954,7 @@ int ipath_ib_piobufavail(struct ipath_ibdev *dev)
                qp = list_entry(dev->piowait.next, struct ipath_qp,
                                piowait);
                list_del_init(&qp->piowait);
+               clear_bit(IPATH_S_BUSY, &qp->s_busy);
                tasklet_hi_schedule(&qp->s_task);
        }
        spin_unlock_irqrestore(&dev->pending_lock, flags);
@@ -981,6 +988,8 @@ static int ipath_query_device(struct ib_device *ibdev,
        props->max_ah = ib_ipath_max_ahs;
        props->max_cqe = ib_ipath_max_cqes;
        props->max_mr = dev->lk_table.max;
+       props->max_fmr = dev->lk_table.max;
+       props->max_map_per_fmr = 32767;
        props->max_pd = ib_ipath_max_pds;
        props->max_qp_rd_atom = IPATH_MAX_RDMA_ATOMIC;
        props->max_qp_init_rd_atom = 255;
@@ -1051,7 +1060,12 @@ static int ipath_query_port(struct ib_device *ibdev,
        props->max_vl_num = 1;          /* VLCap = VL0 */
        props->init_type_reply = 0;
 
-       props->max_mtu = IB_MTU_4096;
+       /*
+        * Note: the chips support a maximum MTU of 4096, but the driver
+        * hasn't implemented this feature yet, so set the maximum value
+        * to 2048.
+        */
+       props->max_mtu = IB_MTU_2048;
        switch (dev->dd->ipath_ibmtu) {
        case 4096:
                mtu = IB_MTU_4096;
@@ -1361,13 +1375,6 @@ static void __verbs_timer(unsigned long arg)
 {
        struct ipath_devdata *dd = (struct ipath_devdata *) arg;
 
-       /*
-        * If port 0 receive packet interrupts are not available, or
-        * can be missed, poll the receive queue
-        */
-       if (dd->ipath_flags & IPATH_POLL_RX_INTR)
-               ipath_kreceive(dd);
-
        /* Handle verbs layer timeouts. */
        ipath_ib_timer(dd->verbs_dev);
 
index 088b837..f3d1f2c 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
  * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -321,6 +321,7 @@ struct ipath_sge_state {
  */
 struct ipath_ack_entry {
        u8 opcode;
+       u8 sent;
        u32 psn;
        union {
                struct ipath_sge_state rdma_sge;
index dd691cf..9e5abf9 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
  * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
index 0095bb7..1d7bd82 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
index 04696e6..3428acb 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -63,12 +63,29 @@ int ipath_enable_wc(struct ipath_devdata *dd)
         * of 2 address matching the length (which has to be a power of 2).
         * For rev1, that means the base address, for rev2, it will be just
         * the PIO buffers themselves.
+        * For chips with two sets of buffers, the calculations are
+        * somewhat more complicated; we need to sum, and the piobufbase
+        * register has both offsets, 2K in low 32 bits, 4K in high 32 bits.
+        * The buffers are still packed, so a single range covers both.
         */
-       pioaddr = addr + dd->ipath_piobufbase;
-       piolen = (dd->ipath_piobcnt2k +
-                 dd->ipath_piobcnt4k) *
-               ALIGN(dd->ipath_piobcnt2k +
-                     dd->ipath_piobcnt4k, dd->ipath_palign);
+       if (dd->ipath_piobcnt2k && dd->ipath_piobcnt4k) { /* 2 sizes */
+               unsigned long pio2kbase, pio4kbase;
+               pio2kbase = dd->ipath_piobufbase & 0xffffffffUL;
+               pio4kbase = (dd->ipath_piobufbase >> 32) & 0xffffffffUL;
+               if (pio2kbase < pio4kbase) { /* all, for now */
+                       pioaddr = addr + pio2kbase;
+                       piolen = pio4kbase - pio2kbase +
+                               dd->ipath_piobcnt4k * dd->ipath_4kalign;
+               } else {
+                       pioaddr = addr + pio4kbase;
+                       piolen = pio2kbase - pio4kbase +
+                               dd->ipath_piobcnt2k * dd->ipath_palign;
+               }
+       } else {  /* single buffer size (2K, currently) */
+               pioaddr = addr + dd->ipath_piobufbase;
+               piolen = dd->ipath_piobcnt2k * dd->ipath_palign +
+                       dd->ipath_piobcnt4k * dd->ipath_4kalign;
+       }
 
        for (bits = 0; !(piolen & (1ULL << bits)); bits++)
                /* do nothing */ ;
index b8912cd..4175a4b 100644 (file)
@@ -1,6 +1,5 @@
 config MLX4_INFINIBAND
        tristate "Mellanox ConnectX HCA support"
-       depends on INFINIBAND
        select MLX4_CORE
        ---help---
          This driver provides low-level InfiniBand support for
index c591616..dde8fe9 100644 (file)
@@ -169,7 +169,7 @@ static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
        props->phys_state       = out_mad->data[33] >> 4;
        props->port_cap_flags   = be32_to_cpup((__be32 *) (out_mad->data + 20));
        props->gid_tbl_len      = to_mdev(ibdev)->dev->caps.gid_table_len[port];
-       props->max_msg_sz       = 0x80000000;
+       props->max_msg_sz       = to_mdev(ibdev)->dev->caps.max_msg_sz;
        props->pkey_tbl_len     = to_mdev(ibdev)->dev->caps.pkey_table_len[port];
        props->bad_pkey_cntr    = be16_to_cpup((__be16 *) (out_mad->data + 46));
        props->qkey_viol_cntr   = be16_to_cpup((__be16 *) (out_mad->data + 48));
@@ -523,11 +523,13 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
                (1ull << IB_USER_VERBS_CMD_DESTROY_CQ)          |
                (1ull << IB_USER_VERBS_CMD_CREATE_QP)           |
                (1ull << IB_USER_VERBS_CMD_MODIFY_QP)           |
+               (1ull << IB_USER_VERBS_CMD_QUERY_QP)            |
                (1ull << IB_USER_VERBS_CMD_DESTROY_QP)          |
                (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST)        |
                (1ull << IB_USER_VERBS_CMD_DETACH_MCAST)        |
                (1ull << IB_USER_VERBS_CMD_CREATE_SRQ)          |
                (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ)          |
+               (1ull << IB_USER_VERBS_CMD_QUERY_SRQ)           |
                (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ);
 
        ibdev->ib_dev.query_device      = mlx4_ib_query_device;
@@ -546,10 +548,12 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
        ibdev->ib_dev.destroy_ah        = mlx4_ib_destroy_ah;
        ibdev->ib_dev.create_srq        = mlx4_ib_create_srq;
        ibdev->ib_dev.modify_srq        = mlx4_ib_modify_srq;
+       ibdev->ib_dev.query_srq         = mlx4_ib_query_srq;
        ibdev->ib_dev.destroy_srq       = mlx4_ib_destroy_srq;
        ibdev->ib_dev.post_srq_recv     = mlx4_ib_post_srq_recv;
        ibdev->ib_dev.create_qp         = mlx4_ib_create_qp;
        ibdev->ib_dev.modify_qp         = mlx4_ib_modify_qp;
+       ibdev->ib_dev.query_qp          = mlx4_ib_query_qp;
        ibdev->ib_dev.destroy_qp        = mlx4_ib_destroy_qp;
        ibdev->ib_dev.post_send         = mlx4_ib_post_send;
        ibdev->ib_dev.post_recv         = mlx4_ib_post_recv;
index 24ccadd..705ff2f 100644 (file)
@@ -35,6 +35,7 @@
 
 #include <linux/compiler.h>
 #include <linux/list.h>
+#include <linux/mutex.h>
 
 #include <rdma/ib_verbs.h>
 #include <rdma/ib_umem.h>
@@ -255,6 +256,7 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
                                  struct ib_udata *udata);
 int mlx4_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
                       enum ib_srq_attr_mask attr_mask, struct ib_udata *udata);
+int mlx4_ib_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
 int mlx4_ib_destroy_srq(struct ib_srq *srq);
 void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index);
 int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
@@ -266,6 +268,8 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
 int mlx4_ib_destroy_qp(struct ib_qp *qp);
 int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
                      int attr_mask, struct ib_udata *udata);
+int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
+                    struct ib_qp_init_attr *qp_init_attr);
 int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                      struct ib_send_wr **bad_wr);
 int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
index 28a08bd..4004218 100644 (file)
@@ -1455,3 +1455,140 @@ out:
 
        return err;
 }
+
+static inline enum ib_qp_state to_ib_qp_state(enum mlx4_qp_state mlx4_state)
+{
+       switch (mlx4_state) {
+       case MLX4_QP_STATE_RST:      return IB_QPS_RESET;
+       case MLX4_QP_STATE_INIT:     return IB_QPS_INIT;
+       case MLX4_QP_STATE_RTR:      return IB_QPS_RTR;
+       case MLX4_QP_STATE_RTS:      return IB_QPS_RTS;
+       case MLX4_QP_STATE_SQ_DRAINING:
+       case MLX4_QP_STATE_SQD:      return IB_QPS_SQD;
+       case MLX4_QP_STATE_SQER:     return IB_QPS_SQE;
+       case MLX4_QP_STATE_ERR:      return IB_QPS_ERR;
+       default:                     return -1;
+       }
+}
+
+static inline enum ib_mig_state to_ib_mig_state(int mlx4_mig_state)
+{
+       switch (mlx4_mig_state) {
+       case MLX4_QP_PM_ARMED:          return IB_MIG_ARMED;
+       case MLX4_QP_PM_REARM:          return IB_MIG_REARM;
+       case MLX4_QP_PM_MIGRATED:       return IB_MIG_MIGRATED;
+       default: return -1;
+       }
+}
+
+static int to_ib_qp_access_flags(int mlx4_flags)
+{
+       int ib_flags = 0;
+
+       if (mlx4_flags & MLX4_QP_BIT_RRE)
+               ib_flags |= IB_ACCESS_REMOTE_READ;
+       if (mlx4_flags & MLX4_QP_BIT_RWE)
+               ib_flags |= IB_ACCESS_REMOTE_WRITE;
+       if (mlx4_flags & MLX4_QP_BIT_RAE)
+               ib_flags |= IB_ACCESS_REMOTE_ATOMIC;
+
+       return ib_flags;
+}
+
+static void to_ib_ah_attr(struct mlx4_dev *dev, struct ib_ah_attr *ib_ah_attr,
+                               struct mlx4_qp_path *path)
+{
+       memset(ib_ah_attr, 0, sizeof *path);
+       ib_ah_attr->port_num      = path->sched_queue & 0x40 ? 2 : 1;
+
+       if (ib_ah_attr->port_num == 0 || ib_ah_attr->port_num > dev->caps.num_ports)
+               return;
+
+       ib_ah_attr->dlid          = be16_to_cpu(path->rlid);
+       ib_ah_attr->sl            = (path->sched_queue >> 2) & 0xf;
+       ib_ah_attr->src_path_bits = path->grh_mylmc & 0x7f;
+       ib_ah_attr->static_rate   = path->static_rate ? path->static_rate - 5 : 0;
+       ib_ah_attr->ah_flags      = (path->grh_mylmc & (1 << 7)) ? IB_AH_GRH : 0;
+       if (ib_ah_attr->ah_flags) {
+               ib_ah_attr->grh.sgid_index = path->mgid_index;
+               ib_ah_attr->grh.hop_limit  = path->hop_limit;
+               ib_ah_attr->grh.traffic_class =
+                       (be32_to_cpu(path->tclass_flowlabel) >> 20) & 0xff;
+               ib_ah_attr->grh.flow_label =
+                       be32_to_cpu(path->tclass_flowlabel) & 0xffffff;
+               memcpy(ib_ah_attr->grh.dgid.raw,
+                       path->rgid, sizeof ib_ah_attr->grh.dgid.raw);
+       }
+}
+
+int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
+                    struct ib_qp_init_attr *qp_init_attr)
+{
+       struct mlx4_ib_dev *dev = to_mdev(ibqp->device);
+       struct mlx4_ib_qp *qp = to_mqp(ibqp);
+       struct mlx4_qp_context context;
+       int mlx4_state;
+       int err;
+
+       if (qp->state == IB_QPS_RESET) {
+               qp_attr->qp_state = IB_QPS_RESET;
+               goto done;
+       }
+
+       err = mlx4_qp_query(dev->dev, &qp->mqp, &context);
+       if (err)
+               return -EINVAL;
+
+       mlx4_state = be32_to_cpu(context.flags) >> 28;
+
+       qp_attr->qp_state            = to_ib_qp_state(mlx4_state);
+       qp_attr->path_mtu            = context.mtu_msgmax >> 5;
+       qp_attr->path_mig_state      =
+               to_ib_mig_state((be32_to_cpu(context.flags) >> 11) & 0x3);
+       qp_attr->qkey                = be32_to_cpu(context.qkey);
+       qp_attr->rq_psn              = be32_to_cpu(context.rnr_nextrecvpsn) & 0xffffff;
+       qp_attr->sq_psn              = be32_to_cpu(context.next_send_psn) & 0xffffff;
+       qp_attr->dest_qp_num         = be32_to_cpu(context.remote_qpn) & 0xffffff;
+       qp_attr->qp_access_flags     =
+               to_ib_qp_access_flags(be32_to_cpu(context.params2));
+
+       if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) {
+               to_ib_ah_attr(dev->dev, &qp_attr->ah_attr, &context.pri_path);
+               to_ib_ah_attr(dev->dev, &qp_attr->alt_ah_attr, &context.alt_path);
+               qp_attr->alt_pkey_index = context.alt_path.pkey_index & 0x7f;
+               qp_attr->alt_port_num   = qp_attr->alt_ah_attr.port_num;
+       }
+
+       qp_attr->pkey_index = context.pri_path.pkey_index & 0x7f;
+       qp_attr->port_num   = context.pri_path.sched_queue & 0x40 ? 2 : 1;
+
+       /* qp_attr->en_sqd_async_notify is only applicable in modify qp */
+       qp_attr->sq_draining = mlx4_state == MLX4_QP_STATE_SQ_DRAINING;
+
+       qp_attr->max_rd_atomic = 1 << ((be32_to_cpu(context.params1) >> 21) & 0x7);
+
+       qp_attr->max_dest_rd_atomic =
+               1 << ((be32_to_cpu(context.params2) >> 21) & 0x7);
+       qp_attr->min_rnr_timer      =
+               (be32_to_cpu(context.rnr_nextrecvpsn) >> 24) & 0x1f;
+       qp_attr->timeout            = context.pri_path.ackto >> 3;
+       qp_attr->retry_cnt          = (be32_to_cpu(context.params1) >> 16) & 0x7;
+       qp_attr->rnr_retry          = (be32_to_cpu(context.params1) >> 13) & 0x7;
+       qp_attr->alt_timeout        = context.alt_path.ackto >> 3;
+
+done:
+       qp_attr->cur_qp_state        = qp_attr->qp_state;
+       if (!ibqp->uobject) {
+               qp_attr->cap.max_send_wr     = qp->sq.wqe_cnt;
+               qp_attr->cap.max_recv_wr     = qp->rq.wqe_cnt;
+               qp_attr->cap.max_send_sge    = qp->sq.max_gs;
+               qp_attr->cap.max_recv_sge    = qp->rq.max_gs;
+               qp_attr->cap.max_inline_data = (1 << qp->sq.wqe_shift) -
+                       send_wqe_overhead(qp->ibqp.qp_type) -
+                       sizeof (struct mlx4_wqe_inline_seg);
+               qp_init_attr->cap            = qp_attr->cap;
+       }
+
+       return 0;
+}
+
index 12fac1c..408748f 100644 (file)
@@ -240,6 +240,24 @@ int mlx4_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
        return 0;
 }
 
+int mlx4_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
+{
+       struct mlx4_ib_dev *dev = to_mdev(ibsrq->device);
+       struct mlx4_ib_srq *srq = to_msrq(ibsrq);
+       int ret;
+       int limit_watermark;
+
+       ret = mlx4_srq_query(dev->dev, &srq->msrq, &limit_watermark);
+       if (ret)
+               return ret;
+
+       srq_attr->srq_limit = be16_to_cpu(limit_watermark);
+       srq_attr->max_wr    = srq->msrq.max - 1;
+       srq_attr->max_sge   = srq->msrq.max_gs;
+
+       return 0;
+}
+
 int mlx4_ib_destroy_srq(struct ib_srq *srq)
 {
        struct mlx4_ib_dev *dev = to_mdev(srq->device);
index 9aa5a44..03efc07 100644 (file)
@@ -1,6 +1,6 @@
 config INFINIBAND_MTHCA
        tristate "Mellanox HCA support"
-       depends on PCI && INFINIBAND
+       depends on PCI
        ---help---
          This is a low-level driver for Mellanox InfiniHost host
          channel adapters (HCAs), including the MT23108 PCI-X HCA
index f930e55..a763067 100644 (file)
@@ -255,7 +255,7 @@ int mthca_buf_alloc(struct mthca_dev *dev, int size, int max_direct,
                        dma_list[i] = t;
                        pci_unmap_addr_set(&buf->page_list[i], mapping, t);
 
-                       memset(buf->page_list[i].buf, 0, PAGE_SIZE);
+                       clear_page(buf->page_list[i].buf);
                }
        }
 
index 8ec9fa1..8592b26 100644 (file)
@@ -522,7 +522,7 @@ static int mthca_create_eq(struct mthca_dev *dev,
                dma_list[i] = t;
                pci_unmap_addr_set(&eq->page_list[i], mapping, t);
 
-               memset(eq->page_list[i].buf, 0, PAGE_SIZE);
+               clear_page(eq->page_list[i].buf);
        }
 
        for (i = 0; i < eq->nent; ++i)
index af78ccc..1f76bad 100644 (file)
@@ -1,6 +1,6 @@
 config INFINIBAND_IPOIB
        tristate "IP-over-InfiniBand"
-       depends on INFINIBAND && NETDEVICES && INET && (IPV6 || IPV6=n)
+       depends on NETDEVICES && INET && (IPV6 || IPV6=n)
        ---help---
          Support for the IP-over-InfiniBand protocol (IPoIB). This
          transports IP packets over InfiniBand so you can use your IB
index ea74d1e..08b4676 100644 (file)
@@ -281,7 +281,6 @@ static int ipoib_cm_send_rep(struct net_device *dev, struct ib_cm_id *cm_id,
        rep.private_data_len = sizeof data;
        rep.flow_control = 0;
        rep.rnr_retry_count = req->rnr_retry_count;
-       rep.target_ack_delay = 20; /* FIXME */
        rep.srq = 1;
        rep.qp_num = qp->qp_num;
        rep.starting_psn = psn;
@@ -1148,7 +1147,6 @@ static void ipoib_cm_skb_reap(struct work_struct *work)
 {
        struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
                                                   cm.skb_task);
-       struct net_device *dev = priv->dev;
        struct sk_buff *skb;
 
        unsigned mtu = priv->mcast_mtu;
@@ -1162,7 +1160,7 @@ static void ipoib_cm_skb_reap(struct work_struct *work)
                        icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
                else if (skb->protocol == htons(ETH_P_IPV6))
-                       icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
+                       icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, priv->dev);
 #endif
                dev_kfree_skb_any(skb);
                spin_lock_irq(&priv->tx_lock);
index 8404f05..1094488 100644 (file)
@@ -196,6 +196,13 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
                return;
        }
 
+       /*
+        * Drop packets that this interface sent, ie multicast packets
+        * that the HCA has replicated.
+        */
+       if (wc->slid == priv->local_lid && wc->src_qp == priv->qp->qp_num)
+               goto repost;
+
        /*
         * If we can't allocate a new RX buffer, dump
         * this packet and reuse the old buffer.
@@ -213,24 +220,18 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
        skb_put(skb, wc->byte_len);
        skb_pull(skb, IB_GRH_BYTES);
 
-       if (wc->slid != priv->local_lid ||
-           wc->src_qp != priv->qp->qp_num) {
-               skb->protocol = ((struct ipoib_header *) skb->data)->proto;
-               skb_reset_mac_header(skb);
-               skb_pull(skb, IPOIB_ENCAP_LEN);
+       skb->protocol = ((struct ipoib_header *) skb->data)->proto;
+       skb_reset_mac_header(skb);
+       skb_pull(skb, IPOIB_ENCAP_LEN);
 
-               dev->last_rx = jiffies;
-               ++priv->stats.rx_packets;
-               priv->stats.rx_bytes += skb->len;
+       dev->last_rx = jiffies;
+       ++priv->stats.rx_packets;
+       priv->stats.rx_bytes += skb->len;
 
-               skb->dev = dev;
-               /* XXX get correct PACKET_ type here */
-               skb->pkt_type = PACKET_HOST;
-               netif_receive_skb(skb);
-       } else {
-               ipoib_dbg_data(priv, "dropping loopback packet\n");
-               dev_kfree_skb_any(skb);
-       }
+       skb->dev = dev;
+       /* XXX get correct PACKET_ type here */
+       skb->pkt_type = PACKET_HOST;
+       netif_receive_skb(skb);
 
 repost:
        if (unlikely(ipoib_ib_post_receive(dev, wr_id)))
index aecbb90..fe604c8 100644 (file)
@@ -1,6 +1,6 @@
 config INFINIBAND_ISER
        tristate "iSCSI Extensions for RDMA (iSER)"
-       depends on INFINIBAND && SCSI && INET
+       depends on SCSI && INET
        select SCSI_ISCSI_ATTRS
        ---help---
          Support for the iSCSI Extensions for RDMA (iSER) Protocol
index 8fe3be4..3432dce 100644 (file)
@@ -1,6 +1,6 @@
 config INFINIBAND_SRP
        tristate "InfiniBand SCSI RDMA Protocol"
-       depends on INFINIBAND && SCSI
+       depends on SCSI
        ---help---
          Support for the SCSI RDMA Protocol over InfiniBand.  This
          allows you to access storage devices that speak SRP over
index 02e994b..4e5e1cb 100644 (file)
@@ -40,6 +40,7 @@
 #define BCM_VLAN 1
 #endif
 #include <net/ip.h>
+#include <net/tcp.h>
 #include <net/checksum.h>
 #include <linux/workqueue.h>
 #include <linux/crc32.h>
index 8eddd23..eb508bf 100644 (file)
@@ -39,6 +39,6 @@
 
 /* Firmware version */
 #define FW_VERSION_MAJOR 4
-#define FW_VERSION_MINOR 1
+#define FW_VERSION_MINOR 3
 #define FW_VERSION_MICRO 0
 #endif                         /* __CHELSIO_VERSION_H */
index d2b0653..c45cbe4 100644 (file)
@@ -138,6 +138,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 #define QUERY_DEV_CAP_ACK_DELAY_OFFSET         0x35
 #define QUERY_DEV_CAP_MTU_WIDTH_OFFSET         0x36
 #define QUERY_DEV_CAP_VL_PORT_OFFSET           0x37
+#define QUERY_DEV_CAP_MAX_MSG_SZ_OFFSET                0x38
 #define QUERY_DEV_CAP_MAX_GID_OFFSET           0x3b
 #define QUERY_DEV_CAP_RATE_SUPPORT_OFFSET      0x3c
 #define QUERY_DEV_CAP_MAX_PKEY_OFFSET          0x3f
@@ -220,6 +221,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
        dev_cap->local_ca_ack_delay = field & 0x1f;
        MLX4_GET(field, outbox, QUERY_DEV_CAP_VL_PORT_OFFSET);
        dev_cap->num_ports = field & 0xf;
+       MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MSG_SZ_OFFSET);
+       dev_cap->max_msg_sz = 1 << (field & 0x1f);
        MLX4_GET(stat_rate, outbox, QUERY_DEV_CAP_RATE_SUPPORT_OFFSET);
        dev_cap->stat_rate_support = stat_rate;
        MLX4_GET(dev_cap->flags, outbox, QUERY_DEV_CAP_FLAGS_OFFSET);
index 296254a..7e1dd9e 100644 (file)
@@ -60,6 +60,7 @@ struct mlx4_dev_cap {
        int max_rdma_global;
        int local_ca_ack_delay;
        int num_ports;
+       u32 max_msg_sz;
        int max_mtu[MLX4_MAX_PORTS + 1];
        int max_port_width[MLX4_MAX_PORTS + 1];
        int max_vl[MLX4_MAX_PORTS + 1];
index c3da2a2..a4f2e04 100644 (file)
@@ -154,6 +154,7 @@ static int __devinit mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev
        dev->caps.reserved_uars      = dev_cap->reserved_uars;
        dev->caps.reserved_pds       = dev_cap->reserved_pds;
        dev->caps.mtt_entry_sz       = MLX4_MTT_ENTRY_PER_SEG * dev_cap->mtt_entry_sz;
+       dev->caps.max_msg_sz         = dev_cap->max_msg_sz;
        dev->caps.page_size_cap      = ~(u32) (dev_cap->min_page_sz - 1);
        dev->caps.flags              = dev_cap->flags;
        dev->caps.stat_rate_support  = dev_cap->stat_rate_support;
index 3d3b6d2..d9c91a7 100644 (file)
@@ -37,6 +37,7 @@
 #ifndef MLX4_H
 #define MLX4_H
 
+#include <linux/mutex.h>
 #include <linux/radix-tree.h>
 
 #include <linux/mlx4/device.h>
index 492cfaa..19b48c7 100644 (file)
@@ -277,3 +277,24 @@ void mlx4_cleanup_qp_table(struct mlx4_dev *dev)
        mlx4_CONF_SPECIAL_QP(dev, 0);
        mlx4_bitmap_cleanup(&mlx4_priv(dev)->qp_table.bitmap);
 }
+
+int mlx4_qp_query(struct mlx4_dev *dev, struct mlx4_qp *qp,
+                 struct mlx4_qp_context *context)
+{
+       struct mlx4_cmd_mailbox *mailbox;
+       int err;
+
+       mailbox = mlx4_alloc_cmd_mailbox(dev);
+       if (IS_ERR(mailbox))
+               return PTR_ERR(mailbox);
+
+       err = mlx4_cmd_box(dev, 0, mailbox->dma, qp->qpn, 0,
+                          MLX4_CMD_QUERY_QP, MLX4_CMD_TIME_CLASS_A);
+       if (!err)
+               memcpy(context, mailbox->buf + 8, sizeof *context);
+
+       mlx4_free_cmd_mailbox(dev, mailbox);
+       return err;
+}
+EXPORT_SYMBOL_GPL(mlx4_qp_query);
+
index 2134f83..b061c86 100644 (file)
@@ -102,6 +102,13 @@ static int mlx4_ARM_SRQ(struct mlx4_dev *dev, int srq_num, int limit_watermark)
                        MLX4_CMD_TIME_CLASS_B);
 }
 
+static int mlx4_QUERY_SRQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
+                         int srq_num)
+{
+       return mlx4_cmd_box(dev, 0, mailbox->dma, srq_num, 0, MLX4_CMD_QUERY_SRQ,
+                           MLX4_CMD_TIME_CLASS_A);
+}
+
 int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, struct mlx4_mtt *mtt,
                   u64 db_rec, struct mlx4_srq *srq)
 {
@@ -205,6 +212,29 @@ int mlx4_srq_arm(struct mlx4_dev *dev, struct mlx4_srq *srq, int limit_watermark
 }
 EXPORT_SYMBOL_GPL(mlx4_srq_arm);
 
+int mlx4_srq_query(struct mlx4_dev *dev, struct mlx4_srq *srq, int *limit_watermark)
+{
+       struct mlx4_cmd_mailbox *mailbox;
+       struct mlx4_srq_context *srq_context;
+       int err;
+
+       mailbox = mlx4_alloc_cmd_mailbox(dev);
+       if (IS_ERR(mailbox))
+               return PTR_ERR(mailbox);
+
+       srq_context = mailbox->buf;
+
+       err = mlx4_QUERY_SRQ(dev, mailbox, srq->srqn);
+       if (err)
+               goto err_out;
+       *limit_watermark = srq_context->limit_watermark;
+
+err_out:
+       mlx4_free_cmd_mailbox(dev, mailbox);
+       return err;
+}
+EXPORT_SYMBOL_GPL(mlx4_srq_query);
+
 int __devinit mlx4_init_srq_table(struct mlx4_dev *dev)
 {
        struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table;
index 3523d25..0149097 100644 (file)
@@ -96,10 +96,12 @@ extern int pci_mmap_page_range (struct pci_dev *dev, struct vm_area_struct *vma,
 #define HAVE_PCI_LEGACY
 extern int pci_mmap_legacy_page_range(struct pci_bus *bus,
                                      struct vm_area_struct *vma);
-extern ssize_t pci_read_legacy_io(struct kobject *kobj, char *buf, loff_t off,
-                                 size_t count);
-extern ssize_t pci_write_legacy_io(struct kobject *kobj, char *buf, loff_t off,
-                                  size_t count);
+extern ssize_t pci_read_legacy_io(struct kobject *kobj,
+                                 struct bin_attribute *bin_attr,
+                                 char *buf, loff_t off, size_t count);
+extern ssize_t pci_write_legacy_io(struct kobject *kobj,
+                                  struct bin_attribute *bin_attr,
+                                  char *buf, loff_t off, size_t count);
 extern int pci_mmap_legacy_mem(struct kobject *kobj,
                               struct bin_attribute *attr,
                               struct vm_area_struct *vma);
index b372f59..cfb78fb 100644 (file)
@@ -172,6 +172,7 @@ struct mlx4_caps {
        int                     num_pds;
        int                     reserved_pds;
        int                     mtt_entry_sz;
+       u32                     max_msg_sz;
        u32                     page_size_cap;
        u32                     flags;
        u16                     stat_rate_support;
@@ -322,6 +323,7 @@ int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, struct mlx4_mtt *mtt,
                   u64 db_rec, struct mlx4_srq *srq);
 void mlx4_srq_free(struct mlx4_dev *dev, struct mlx4_srq *srq);
 int mlx4_srq_arm(struct mlx4_dev *dev, struct mlx4_srq *srq, int limit_watermark);
+int mlx4_srq_query(struct mlx4_dev *dev, struct mlx4_srq *srq, int *limit_watermark);
 
 int mlx4_INIT_PORT(struct mlx4_dev *dev, int port);
 int mlx4_CLOSE_PORT(struct mlx4_dev *dev, int port);
index 10c57d2..3968b94 100644 (file)
@@ -282,6 +282,9 @@ int mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
                   struct mlx4_qp_context *context, enum mlx4_qp_optpar optpar,
                   int sqd_event, struct mlx4_qp *qp);
 
+int mlx4_qp_query(struct mlx4_dev *dev, struct mlx4_qp *qp,
+                 struct mlx4_qp_context *context);
+
 static inline struct mlx4_qp *__mlx4_qp_lookup(struct mlx4_dev *dev, u32 qpn)
 {
        return radix_tree_lookup(&dev->qp_table_tree, qpn & (dev->caps.num_qps - 1));
index 5c07017..12243e8 100644 (file)
@@ -385,7 +385,6 @@ struct ib_cm_rep_param {
        u8              private_data_len;
        u8              responder_resources;
        u8              initiator_depth;
-       u8              target_ack_delay;
        u8              failover_accepted;
        u8              flow_control;
        u8              rnr_retry_count;
index 739fa4d..30712dd 100644 (file)
 #define IB_QP1_QKEY    0x80010000
 #define IB_QP_SET_QKEY 0x80000000
 
+#define IB_DEFAULT_PKEY_PARTIAL 0x7FFF
+#define IB_DEFAULT_PKEY_FULL   0xFFFF
+
 enum {
        IB_MGMT_MAD_HDR = 24,
        IB_MGMT_MAD_DATA = 232,