IB/ipath: Limit # of packets sent without an ACK received
authorBryan O'Sullivan <bos@pathscale.com>
Thu, 28 Sep 2006 15:59:57 +0000 (08:59 -0700)
committerRoland Dreier <rolandd@cisco.com>
Thu, 28 Sep 2006 18:16:21 +0000 (11:16 -0700)
The sender requests an ACK every 1/2 MB to avoid retransmit timeouts that
were causing MVAPICH mod_bw to fail after a predictable number of sends.

Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
drivers/infiniband/hw/ipath/ipath_qp.c
drivers/infiniband/hw/ipath/ipath_rc.c
drivers/infiniband/hw/ipath/ipath_verbs.c
drivers/infiniband/hw/ipath/ipath_verbs.h

index 224b0f4..ecfaca7 100644 (file)
@@ -342,6 +342,7 @@ static void ipath_reset_qp(struct ipath_qp *qp)
        qp->s_last = 0;
        qp->s_ssn = 1;
        qp->s_lsn = 0;
+       qp->s_wait_credit = 0;
        if (qp->r_rq.wq) {
                qp->r_rq.wq->head = 0;
                qp->r_rq.wq->tail = 0;
@@ -516,7 +517,7 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
                qp->remote_qpn = attr->dest_qp_num;
 
        if (attr_mask & IB_QP_SQ_PSN) {
-               qp->s_next_psn = attr->sq_psn;
+               qp->s_psn = qp->s_next_psn = attr->sq_psn;
                qp->s_last_psn = qp->s_next_psn - 1;
        }
 
index a086540..52caa2e 100644 (file)
@@ -201,6 +201,18 @@ int ipath_make_rc_req(struct ipath_qp *qp,
            qp->s_rnr_timeout)
                goto done;
 
+       /* Limit the number of packets sent without an ACK. */
+       if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT) > 0) {
+               qp->s_wait_credit = 1;
+               dev->n_rc_stalls++;
+               spin_lock(&dev->pending_lock);
+               if (list_empty(&qp->timerwait))
+                       list_add_tail(&qp->timerwait,
+                                     &dev->pending[dev->pending_index]);
+               spin_unlock(&dev->pending_lock);
+               goto done;
+       }
+
        /* header size in 32-bit words LRH+BTH = (8+12)/4. */
        hwords = 5;
        bth0 = 0;
@@ -221,7 +233,7 @@ int ipath_make_rc_req(struct ipath_qp *qp,
                        /* Check if send work queue is empty. */
                        if (qp->s_tail == qp->s_head)
                                goto done;
-                       qp->s_psn = wqe->psn = qp->s_next_psn;
+                       wqe->psn = qp->s_next_psn;
                        newreq = 1;
                }
                /*
@@ -393,12 +405,6 @@ int ipath_make_rc_req(struct ipath_qp *qp,
                ss = &qp->s_sge;
                len = qp->s_len;
                if (len > pmtu) {
-                       /*
-                        * Request an ACK every 1/2 MB to avoid retransmit
-                        * timeouts.
-                        */
-                       if (((wqe->length - len) % (512 * 1024)) == 0)
-                               bth2 |= 1 << 31;
                        len = pmtu;
                        break;
                }
@@ -435,12 +441,6 @@ int ipath_make_rc_req(struct ipath_qp *qp,
                ss = &qp->s_sge;
                len = qp->s_len;
                if (len > pmtu) {
-                       /*
-                        * Request an ACK every 1/2 MB to avoid retransmit
-                        * timeouts.
-                        */
-                       if (((wqe->length - len) % (512 * 1024)) == 0)
-                               bth2 |= 1 << 31;
                        len = pmtu;
                        break;
                }
@@ -498,6 +498,8 @@ int ipath_make_rc_req(struct ipath_qp *qp,
                 */
                goto done;
        }
+       if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT - 1) >= 0)
+               bth2 |= 1 << 31;        /* Request ACK. */
        qp->s_len -= len;
        qp->s_hdrwords = hwords;
        qp->s_cur_sge = ss;
@@ -737,6 +739,15 @@ bail:
        return;
 }
 
+static inline void update_last_psn(struct ipath_qp *qp, u32 psn)
+{
+       if (qp->s_wait_credit) {
+               qp->s_wait_credit = 0;
+               tasklet_hi_schedule(&qp->s_task);
+       }
+       qp->s_last_psn = psn;
+}
+
 /**
  * do_rc_ack - process an incoming RC ACK
  * @qp: the QP the ACK came in on
@@ -805,7 +816,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
                         * The last valid PSN seen is the previous
                         * request's.
                         */
-                       qp->s_last_psn = wqe->psn - 1;
+                       update_last_psn(qp, wqe->psn - 1);
                        /* Retry this request. */
                        ipath_restart_rc(qp, wqe->psn, &wc);
                        /*
@@ -864,7 +875,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
                ipath_get_credit(qp, aeth);
                qp->s_rnr_retry = qp->s_rnr_retry_cnt;
                qp->s_retry = qp->s_retry_cnt;
-               qp->s_last_psn = psn;
+               update_last_psn(qp, psn);
                ret = 1;
                goto bail;
 
@@ -883,7 +894,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
                        goto bail;
 
                /* The last valid PSN is the previous PSN. */
-               qp->s_last_psn = psn - 1;
+               update_last_psn(qp, psn - 1);
 
                dev->n_rc_resends += (int)qp->s_psn - (int)psn;
 
@@ -898,7 +909,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
        case 3:         /* NAK */
                /* The last valid PSN seen is the previous request's. */
                if (qp->s_last != qp->s_tail)
-                       qp->s_last_psn = wqe->psn - 1;
+                       update_last_psn(qp, wqe->psn - 1);
                switch ((aeth >> IPATH_AETH_CREDIT_SHIFT) &
                        IPATH_AETH_CREDIT_MASK) {
                case 0: /* PSN sequence error */
@@ -1071,7 +1082,7 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
                 * since we don't want s_sge modified.
                 */
                qp->s_len -= pmtu;
-               qp->s_last_psn = psn;
+               update_last_psn(qp, psn);
                spin_unlock_irqrestore(&qp->s_lock, flags);
                ipath_copy_sge(&qp->s_sge, data, pmtu);
                goto bail;
index b8381c5..a4bf870 100644 (file)
@@ -1683,6 +1683,7 @@ static ssize_t show_stats(struct class_device *cdev, char *buf)
                      "RC OTH NAKs %d\n"
                      "RC timeouts %d\n"
                      "RC RDMA dup %d\n"
+                     "RC stalls   %d\n"
                      "piobuf wait %d\n"
                      "no piobuf   %d\n"
                      "PKT drops   %d\n"
@@ -1690,7 +1691,7 @@ static ssize_t show_stats(struct class_device *cdev, char *buf)
                      dev->n_rc_resends, dev->n_rc_qacks, dev->n_rc_acks,
                      dev->n_seq_naks, dev->n_rdma_seq, dev->n_rnr_naks,
                      dev->n_other_naks, dev->n_timeouts,
-                     dev->n_rdma_dup_busy, dev->n_piowait,
+                     dev->n_rdma_dup_busy, dev->n_rc_stalls, dev->n_piowait,
                      dev->n_no_piobuf, dev->n_pkt_drops, dev->n_wqe_errs);
        for (i = 0; i < ARRAY_SIZE(dev->opstats); i++) {
                const struct ipath_opcode_stats *si = &dev->opstats[i];
index 09bbb3f..3fffaa0 100644 (file)
@@ -370,6 +370,7 @@ struct ipath_qp {
        u8 s_rnr_retry_cnt;
        u8 s_retry;             /* requester retry counter */
        u8 s_rnr_retry;         /* requester RNR retry counter */
+       u8 s_wait_credit;       /* limit number of unacked packets sent */
        u8 s_pkey_index;        /* PKEY index to use */
        u8 timeout;             /* Timeout for this QP */
        enum ib_mtu path_mtu;
@@ -393,6 +394,8 @@ struct ipath_qp {
 #define IPATH_S_BUSY           0
 #define IPATH_S_SIGNAL_REQ_WR  1
 
+#define IPATH_PSN_CREDIT       2048
+
 /*
  * Since struct ipath_swqe is not a fixed size, we can't simply index into
  * struct ipath_qp.s_wq.  This function does the array index computation.
@@ -521,6 +524,7 @@ struct ipath_ibdev {
        u32 n_rnr_naks;
        u32 n_other_naks;
        u32 n_timeouts;
+       u32 n_rc_stalls;
        u32 n_pkt_drops;
        u32 n_vl15_dropped;
        u32 n_wqe_errs;