RDMA/nes: Fix hang issues for large cluster dynamic connections
authorFaisal Latif <faisal.latif@intel.com>
Mon, 27 Apr 2009 20:41:06 +0000 (13:41 -0700)
committerRoland Dreier <rolandd@cisco.com>
Mon, 27 Apr 2009 20:41:06 +0000 (13:41 -0700)
Running large cluster setup, we are hanging after many hours of
testing.  Fixing this required going over the code and making sure the
rexmit entry was properly removed based on the cm_node's state and
packet received.  Also when receiving a FIN packet, check seq# and
make sure there were no errors before calling handle_fin().

Following are the changes done in nes_cm.c:

* handle_ack_pkt() needs to return error value, so in case of error,
  handle_fin() is not called. Some cleanup done while going over the code.

* handle_rst_pkt(), handling of cm_node's NES_CM_STATE_LAST_ACK is missing.

* process_packet(), in case of FIN only packet is received, call
  check_seq() before processing.

* in handle_fin_pkt(), we are calling cleanup_retrans_entry() for all
  conditions, even if the packets need to be dropped.

Signed-off-by: Faisal Latif <faisal.latif@intel.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
drivers/infiniband/hw/nes/nes_cm.c

index 4969c38..2c90b38 100644 (file)
@@ -1326,18 +1326,20 @@ static void handle_fin_pkt(struct nes_cm_node *cm_node)
        nes_debug(NES_DBG_CM, "Received FIN, cm_node = %p, state = %u. "
                "refcnt=%d\n", cm_node, cm_node->state,
                atomic_read(&cm_node->ref_count));
-       cm_node->tcp_cntxt.rcv_nxt++;
-       cleanup_retrans_entry(cm_node);
        switch (cm_node->state) {
        case NES_CM_STATE_SYN_RCVD:
        case NES_CM_STATE_SYN_SENT:
        case NES_CM_STATE_ESTABLISHED:
        case NES_CM_STATE_MPAREQ_SENT:
        case NES_CM_STATE_MPAREJ_RCVD:
+               cm_node->tcp_cntxt.rcv_nxt++;
+               cleanup_retrans_entry(cm_node);
                cm_node->state = NES_CM_STATE_LAST_ACK;
                send_fin(cm_node, NULL);
                break;
        case NES_CM_STATE_FIN_WAIT1:
+               cm_node->tcp_cntxt.rcv_nxt++;
+               cleanup_retrans_entry(cm_node);
                cm_node->state = NES_CM_STATE_CLOSING;
                send_ack(cm_node, NULL);
                /* Wait for ACK as this is simultanous close..
@@ -1345,11 +1347,15 @@ static void handle_fin_pkt(struct nes_cm_node *cm_node)
                * Just rm the node.. Done.. */
                break;
        case NES_CM_STATE_FIN_WAIT2:
+               cm_node->tcp_cntxt.rcv_nxt++;
+               cleanup_retrans_entry(cm_node);
                cm_node->state = NES_CM_STATE_TIME_WAIT;
                send_ack(cm_node, NULL);
                schedule_nes_timer(cm_node, NULL,  NES_TIMER_TYPE_CLOSE, 1, 0);
                break;
        case NES_CM_STATE_TIME_WAIT:
+               cm_node->tcp_cntxt.rcv_nxt++;
+               cleanup_retrans_entry(cm_node);
                cm_node->state = NES_CM_STATE_CLOSED;
                rem_ref_cm_node(cm_node->cm_core, cm_node);
                break;
@@ -1385,7 +1391,6 @@ static void handle_rst_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
                passive_state = atomic_add_return(1, &cm_node->passive_state);
                if (passive_state ==  NES_SEND_RESET_EVENT)
                        create_event(cm_node, NES_CM_EVENT_RESET);
-               cleanup_retrans_entry(cm_node);
                cm_node->state = NES_CM_STATE_CLOSED;
                dev_kfree_skb_any(skb);
                break;
@@ -1399,17 +1404,16 @@ static void handle_rst_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
                active_open_err(cm_node, skb, reset);
                break;
        case NES_CM_STATE_CLOSED:
-               cleanup_retrans_entry(cm_node);
                drop_packet(skb);
                break;
+       case NES_CM_STATE_LAST_ACK:
+               cm_node->cm_id->rem_ref(cm_node->cm_id);
        case NES_CM_STATE_TIME_WAIT:
-               cleanup_retrans_entry(cm_node);
                cm_node->state = NES_CM_STATE_CLOSED;
                rem_ref_cm_node(cm_node->cm_core, cm_node);
                drop_packet(skb);
                break;
        case NES_CM_STATE_FIN_WAIT1:
-               cleanup_retrans_entry(cm_node);
                nes_debug(NES_DBG_CM, "Bad state %s[%u]\n", __func__, __LINE__);
        default:
                drop_packet(skb);
@@ -1456,6 +1460,7 @@ static void handle_rcv_mpa(struct nes_cm_node *cm_node, struct sk_buff *skb)
                                NES_PASSIVE_STATE_INDICATED);
                break;
        case NES_CM_STATE_MPAREQ_SENT:
+               cleanup_retrans_entry(cm_node);
                if (res_type == NES_MPA_REQUEST_REJECT) {
                        type = NES_CM_EVENT_MPA_REJECT;
                        cm_node->state = NES_CM_STATE_MPAREJ_RCVD;
@@ -1653,49 +1658,39 @@ static void handle_synack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
        }
 }
 
-static void handle_ack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
+static int handle_ack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
        struct tcphdr *tcph)
 {
        int datasize = 0;
        u32 inc_sequence;
        u32 rem_seq_ack;
        u32 rem_seq;
-       int ret;
+       int ret = 0;
        int optionsize;
        optionsize = (tcph->doff << 2) - sizeof(struct tcphdr);
 
        if (check_seq(cm_node, tcph, skb))
-               return;
+               return -EINVAL;
 
        skb_pull(skb, tcph->doff << 2);
        inc_sequence = ntohl(tcph->seq);
        rem_seq = ntohl(tcph->seq);
        rem_seq_ack =  ntohl(tcph->ack_seq);
        datasize = skb->len;
-       cleanup_retrans_entry(cm_node);
        switch (cm_node->state) {
        case NES_CM_STATE_SYN_RCVD:
                /* Passive OPEN */
+               cleanup_retrans_entry(cm_node);
                ret = handle_tcp_options(cm_node, tcph, skb, optionsize, 1);
                if (ret)
                        break;
                cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq);
-               if (cm_node->tcp_cntxt.rem_ack_num !=
-                   cm_node->tcp_cntxt.loc_seq_num) {
-                       nes_debug(NES_DBG_CM, "rem_ack_num != loc_seq_num\n");
-                       cleanup_retrans_entry(cm_node);
-                       send_reset(cm_node, skb);
-                       return;
-               }
                cm_node->state = NES_CM_STATE_ESTABLISHED;
-               cleanup_retrans_entry(cm_node);
                if (datasize) {
                        cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize;
                        handle_rcv_mpa(cm_node, skb);
-               } else { /* rcvd ACK only */
+               } else  /* rcvd ACK only */
                        dev_kfree_skb_any(skb);
-                       cleanup_retrans_entry(cm_node);
-                }
                break;
        case NES_CM_STATE_ESTABLISHED:
                /* Passive OPEN */
@@ -1707,15 +1702,12 @@ static void handle_ack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
                        drop_packet(skb);
                break;
        case NES_CM_STATE_MPAREQ_SENT:
-               cleanup_retrans_entry(cm_node);
                cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq);
                if (datasize) {
                        cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize;
                        handle_rcv_mpa(cm_node, skb);
-               } else { /* Could be just an ack pkt.. */
-                       cleanup_retrans_entry(cm_node);
+               } else  /* Could be just an ack pkt.. */
                        dev_kfree_skb_any(skb);
-               }
                break;
        case NES_CM_STATE_LISTENING:
        case NES_CM_STATE_CLOSED:
@@ -1723,11 +1715,10 @@ static void handle_ack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
                send_reset(cm_node, skb);
                break;
        case NES_CM_STATE_LAST_ACK:
+       case NES_CM_STATE_CLOSING:
                cleanup_retrans_entry(cm_node);
                cm_node->state = NES_CM_STATE_CLOSED;
                cm_node->cm_id->rem_ref(cm_node->cm_id);
-       case NES_CM_STATE_CLOSING:
-               cleanup_retrans_entry(cm_node);
                rem_ref_cm_node(cm_node->cm_core, cm_node);
                drop_packet(skb);
                break;
@@ -1742,9 +1733,11 @@ static void handle_ack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
        case NES_CM_STATE_MPAREQ_RCVD:
        case NES_CM_STATE_UNKNOWN:
        default:
+               cleanup_retrans_entry(cm_node);
                drop_packet(skb);
                break;
        }
+       return ret;
 }
 
 
@@ -1850,6 +1843,7 @@ static void process_packet(struct nes_cm_node *cm_node, struct sk_buff *skb,
        enum nes_tcpip_pkt_type pkt_type = NES_PKT_TYPE_UNKNOWN;
        struct tcphdr *tcph = tcp_hdr(skb);
        u32     fin_set = 0;
+       int ret = 0;
        skb_pull(skb, ip_hdr(skb)->ihl << 2);
 
        nes_debug(NES_DBG_CM, "process_packet: cm_node=%p state =%d syn=%d "
@@ -1875,17 +1869,17 @@ static void process_packet(struct nes_cm_node *cm_node, struct sk_buff *skb,
                handle_synack_pkt(cm_node, skb, tcph);
                break;
        case NES_PKT_TYPE_ACK:
-               handle_ack_pkt(cm_node, skb, tcph);
-               if (fin_set)
+               ret = handle_ack_pkt(cm_node, skb, tcph);
+               if (fin_set && !ret)
                        handle_fin_pkt(cm_node);
                break;
        case NES_PKT_TYPE_RST:
                handle_rst_pkt(cm_node, skb, tcph);
                break;
        default:
-               drop_packet(skb);
-               if (fin_set)
+               if ((fin_set) && (!check_seq(cm_node, tcph, skb)))
                        handle_fin_pkt(cm_node);
+               drop_packet(skb);
                break;
        }
 }