Merge branch 'prcm_scm_misc_fixes_3.2' of git://git.pwsan.com/linux-2.6 into fixes
[pandora-kernel.git] / drivers / infiniband / hw / qib / qib_rc.c
1 /*
2  * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
3  * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
4  *
5  * This software is available to you under a choice of one of two
6  * licenses.  You may choose to be licensed under the terms of the GNU
7  * General Public License (GPL) Version 2, available from the file
8  * COPYING in the main directory of this source tree, or the
9  * OpenIB.org BSD license below:
10  *
11  *     Redistribution and use in source and binary forms, with or
12  *     without modification, are permitted provided that the following
13  *     conditions are met:
14  *
15  *      - Redistributions of source code must retain the above
16  *        copyright notice, this list of conditions and the following
17  *        disclaimer.
18  *
19  *      - Redistributions in binary form must reproduce the above
20  *        copyright notice, this list of conditions and the following
21  *        disclaimer in the documentation and/or other materials
22  *        provided with the distribution.
23  *
24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31  * SOFTWARE.
32  */
33
34 #include <linux/io.h>
35
36 #include "qib.h"
37
38 /* cut down ridiculously long IB macro names */
39 #define OP(x) IB_OPCODE_RC_##x
40
41 static void rc_timeout(unsigned long arg);
42
43 static u32 restart_sge(struct qib_sge_state *ss, struct qib_swqe *wqe,
44                        u32 psn, u32 pmtu)
45 {
46         u32 len;
47
48         len = ((psn - wqe->psn) & QIB_PSN_MASK) * pmtu;
49         ss->sge = wqe->sg_list[0];
50         ss->sg_list = wqe->sg_list + 1;
51         ss->num_sge = wqe->wr.num_sge;
52         ss->total_len = wqe->length;
53         qib_skip_sge(ss, len, 0);
54         return wqe->length - len;
55 }
56
57 static void start_timer(struct qib_qp *qp)
58 {
59         qp->s_flags |= QIB_S_TIMER;
60         qp->s_timer.function = rc_timeout;
61         /* 4.096 usec. * (1 << qp->timeout) */
62         qp->s_timer.expires = jiffies + qp->timeout_jiffies;
63         add_timer(&qp->s_timer);
64 }
65
66 /**
67  * qib_make_rc_ack - construct a response packet (ACK, NAK, or RDMA read)
68  * @dev: the device for this QP
69  * @qp: a pointer to the QP
70  * @ohdr: a pointer to the IB header being constructed
71  * @pmtu: the path MTU
72  *
73  * Return 1 if constructed; otherwise, return 0.
74  * Note that we are in the responder's side of the QP context.
75  * Note the QP s_lock must be held.
76  */
77 static int qib_make_rc_ack(struct qib_ibdev *dev, struct qib_qp *qp,
78                            struct qib_other_headers *ohdr, u32 pmtu)
79 {
80         struct qib_ack_entry *e;
81         u32 hwords;
82         u32 len;
83         u32 bth0;
84         u32 bth2;
85
86         /* Don't send an ACK if we aren't supposed to. */
87         if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK))
88                 goto bail;
89
90         /* header size in 32-bit words LRH+BTH = (8+12)/4. */
91         hwords = 5;
92
93         switch (qp->s_ack_state) {
94         case OP(RDMA_READ_RESPONSE_LAST):
95         case OP(RDMA_READ_RESPONSE_ONLY):
96                 e = &qp->s_ack_queue[qp->s_tail_ack_queue];
97                 if (e->rdma_sge.mr) {
98                         atomic_dec(&e->rdma_sge.mr->refcount);
99                         e->rdma_sge.mr = NULL;
100                 }
101                 /* FALLTHROUGH */
102         case OP(ATOMIC_ACKNOWLEDGE):
103                 /*
104                  * We can increment the tail pointer now that the last
105                  * response has been sent instead of only being
106                  * constructed.
107                  */
108                 if (++qp->s_tail_ack_queue > QIB_MAX_RDMA_ATOMIC)
109                         qp->s_tail_ack_queue = 0;
110                 /* FALLTHROUGH */
111         case OP(SEND_ONLY):
112         case OP(ACKNOWLEDGE):
113                 /* Check for no next entry in the queue. */
114                 if (qp->r_head_ack_queue == qp->s_tail_ack_queue) {
115                         if (qp->s_flags & QIB_S_ACK_PENDING)
116                                 goto normal;
117                         goto bail;
118                 }
119
120                 e = &qp->s_ack_queue[qp->s_tail_ack_queue];
121                 if (e->opcode == OP(RDMA_READ_REQUEST)) {
122                         /*
123                          * If a RDMA read response is being resent and
124                          * we haven't seen the duplicate request yet,
125                          * then stop sending the remaining responses the
126                          * responder has seen until the requester resends it.
127                          */
128                         len = e->rdma_sge.sge_length;
129                         if (len && !e->rdma_sge.mr) {
130                                 qp->s_tail_ack_queue = qp->r_head_ack_queue;
131                                 goto bail;
132                         }
133                         /* Copy SGE state in case we need to resend */
134                         qp->s_rdma_mr = e->rdma_sge.mr;
135                         if (qp->s_rdma_mr)
136                                 atomic_inc(&qp->s_rdma_mr->refcount);
137                         qp->s_ack_rdma_sge.sge = e->rdma_sge;
138                         qp->s_ack_rdma_sge.num_sge = 1;
139                         qp->s_cur_sge = &qp->s_ack_rdma_sge;
140                         if (len > pmtu) {
141                                 len = pmtu;
142                                 qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST);
143                         } else {
144                                 qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY);
145                                 e->sent = 1;
146                         }
147                         ohdr->u.aeth = qib_compute_aeth(qp);
148                         hwords++;
149                         qp->s_ack_rdma_psn = e->psn;
150                         bth2 = qp->s_ack_rdma_psn++ & QIB_PSN_MASK;
151                 } else {
152                         /* COMPARE_SWAP or FETCH_ADD */
153                         qp->s_cur_sge = NULL;
154                         len = 0;
155                         qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE);
156                         ohdr->u.at.aeth = qib_compute_aeth(qp);
157                         ohdr->u.at.atomic_ack_eth[0] =
158                                 cpu_to_be32(e->atomic_data >> 32);
159                         ohdr->u.at.atomic_ack_eth[1] =
160                                 cpu_to_be32(e->atomic_data);
161                         hwords += sizeof(ohdr->u.at) / sizeof(u32);
162                         bth2 = e->psn & QIB_PSN_MASK;
163                         e->sent = 1;
164                 }
165                 bth0 = qp->s_ack_state << 24;
166                 break;
167
168         case OP(RDMA_READ_RESPONSE_FIRST):
169                 qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE);
170                 /* FALLTHROUGH */
171         case OP(RDMA_READ_RESPONSE_MIDDLE):
172                 qp->s_cur_sge = &qp->s_ack_rdma_sge;
173                 qp->s_rdma_mr = qp->s_ack_rdma_sge.sge.mr;
174                 if (qp->s_rdma_mr)
175                         atomic_inc(&qp->s_rdma_mr->refcount);
176                 len = qp->s_ack_rdma_sge.sge.sge_length;
177                 if (len > pmtu)
178                         len = pmtu;
179                 else {
180                         ohdr->u.aeth = qib_compute_aeth(qp);
181                         hwords++;
182                         qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
183                         e = &qp->s_ack_queue[qp->s_tail_ack_queue];
184                         e->sent = 1;
185                 }
186                 bth0 = qp->s_ack_state << 24;
187                 bth2 = qp->s_ack_rdma_psn++ & QIB_PSN_MASK;
188                 break;
189
190         default:
191 normal:
192                 /*
193                  * Send a regular ACK.
194                  * Set the s_ack_state so we wait until after sending
195                  * the ACK before setting s_ack_state to ACKNOWLEDGE
196                  * (see above).
197                  */
198                 qp->s_ack_state = OP(SEND_ONLY);
199                 qp->s_flags &= ~QIB_S_ACK_PENDING;
200                 qp->s_cur_sge = NULL;
201                 if (qp->s_nak_state)
202                         ohdr->u.aeth =
203                                 cpu_to_be32((qp->r_msn & QIB_MSN_MASK) |
204                                             (qp->s_nak_state <<
205                                              QIB_AETH_CREDIT_SHIFT));
206                 else
207                         ohdr->u.aeth = qib_compute_aeth(qp);
208                 hwords++;
209                 len = 0;
210                 bth0 = OP(ACKNOWLEDGE) << 24;
211                 bth2 = qp->s_ack_psn & QIB_PSN_MASK;
212         }
213         qp->s_rdma_ack_cnt++;
214         qp->s_hdrwords = hwords;
215         qp->s_cur_size = len;
216         qib_make_ruc_header(qp, ohdr, bth0, bth2);
217         return 1;
218
219 bail:
220         qp->s_ack_state = OP(ACKNOWLEDGE);
221         qp->s_flags &= ~(QIB_S_RESP_PENDING | QIB_S_ACK_PENDING);
222         return 0;
223 }
224
225 /**
226  * qib_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC)
227  * @qp: a pointer to the QP
228  *
229  * Return 1 if constructed; otherwise, return 0.
230  */
231 int qib_make_rc_req(struct qib_qp *qp)
232 {
233         struct qib_ibdev *dev = to_idev(qp->ibqp.device);
234         struct qib_other_headers *ohdr;
235         struct qib_sge_state *ss;
236         struct qib_swqe *wqe;
237         u32 hwords;
238         u32 len;
239         u32 bth0;
240         u32 bth2;
241         u32 pmtu = qp->pmtu;
242         char newreq;
243         unsigned long flags;
244         int ret = 0;
245         int delta;
246
247         ohdr = &qp->s_hdr.u.oth;
248         if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
249                 ohdr = &qp->s_hdr.u.l.oth;
250
251         /*
252          * The lock is needed to synchronize between the sending tasklet,
253          * the receive interrupt handler, and timeout resends.
254          */
255         spin_lock_irqsave(&qp->s_lock, flags);
256
257         /* Sending responses has higher priority over sending requests. */
258         if ((qp->s_flags & QIB_S_RESP_PENDING) &&
259             qib_make_rc_ack(dev, qp, ohdr, pmtu))
260                 goto done;
261
262         if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_SEND_OK)) {
263                 if (!(ib_qib_state_ops[qp->state] & QIB_FLUSH_SEND))
264                         goto bail;
265                 /* We are in the error state, flush the work request. */
266                 if (qp->s_last == qp->s_head)
267                         goto bail;
268                 /* If DMAs are in progress, we can't flush immediately. */
269                 if (atomic_read(&qp->s_dma_busy)) {
270                         qp->s_flags |= QIB_S_WAIT_DMA;
271                         goto bail;
272                 }
273                 wqe = get_swqe_ptr(qp, qp->s_last);
274                 while (qp->s_last != qp->s_acked) {
275                         qib_send_complete(qp, wqe, IB_WC_SUCCESS);
276                         if (++qp->s_last >= qp->s_size)
277                                 qp->s_last = 0;
278                         wqe = get_swqe_ptr(qp, qp->s_last);
279                 }
280                 qib_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
281                 goto done;
282         }
283
284         if (qp->s_flags & (QIB_S_WAIT_RNR | QIB_S_WAIT_ACK))
285                 goto bail;
286
287         if (qib_cmp24(qp->s_psn, qp->s_sending_hpsn) <= 0) {
288                 if (qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) <= 0) {
289                         qp->s_flags |= QIB_S_WAIT_PSN;
290                         goto bail;
291                 }
292                 qp->s_sending_psn = qp->s_psn;
293                 qp->s_sending_hpsn = qp->s_psn - 1;
294         }
295
296         /* header size in 32-bit words LRH+BTH = (8+12)/4. */
297         hwords = 5;
298         bth0 = 0;
299
300         /* Send a request. */
301         wqe = get_swqe_ptr(qp, qp->s_cur);
302         switch (qp->s_state) {
303         default:
304                 if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_NEXT_SEND_OK))
305                         goto bail;
306                 /*
307                  * Resend an old request or start a new one.
308                  *
309                  * We keep track of the current SWQE so that
310                  * we don't reset the "furthest progress" state
311                  * if we need to back up.
312                  */
313                 newreq = 0;
314                 if (qp->s_cur == qp->s_tail) {
315                         /* Check if send work queue is empty. */
316                         if (qp->s_tail == qp->s_head)
317                                 goto bail;
318                         /*
319                          * If a fence is requested, wait for previous
320                          * RDMA read and atomic operations to finish.
321                          */
322                         if ((wqe->wr.send_flags & IB_SEND_FENCE) &&
323                             qp->s_num_rd_atomic) {
324                                 qp->s_flags |= QIB_S_WAIT_FENCE;
325                                 goto bail;
326                         }
327                         wqe->psn = qp->s_next_psn;
328                         newreq = 1;
329                 }
330                 /*
331                  * Note that we have to be careful not to modify the
332                  * original work request since we may need to resend
333                  * it.
334                  */
335                 len = wqe->length;
336                 ss = &qp->s_sge;
337                 bth2 = qp->s_psn & QIB_PSN_MASK;
338                 switch (wqe->wr.opcode) {
339                 case IB_WR_SEND:
340                 case IB_WR_SEND_WITH_IMM:
341                         /* If no credit, return. */
342                         if (!(qp->s_flags & QIB_S_UNLIMITED_CREDIT) &&
343                             qib_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) {
344                                 qp->s_flags |= QIB_S_WAIT_SSN_CREDIT;
345                                 goto bail;
346                         }
347                         wqe->lpsn = wqe->psn;
348                         if (len > pmtu) {
349                                 wqe->lpsn += (len - 1) / pmtu;
350                                 qp->s_state = OP(SEND_FIRST);
351                                 len = pmtu;
352                                 break;
353                         }
354                         if (wqe->wr.opcode == IB_WR_SEND)
355                                 qp->s_state = OP(SEND_ONLY);
356                         else {
357                                 qp->s_state = OP(SEND_ONLY_WITH_IMMEDIATE);
358                                 /* Immediate data comes after the BTH */
359                                 ohdr->u.imm_data = wqe->wr.ex.imm_data;
360                                 hwords += 1;
361                         }
362                         if (wqe->wr.send_flags & IB_SEND_SOLICITED)
363                                 bth0 |= IB_BTH_SOLICITED;
364                         bth2 |= IB_BTH_REQ_ACK;
365                         if (++qp->s_cur == qp->s_size)
366                                 qp->s_cur = 0;
367                         break;
368
369                 case IB_WR_RDMA_WRITE:
370                         if (newreq && !(qp->s_flags & QIB_S_UNLIMITED_CREDIT))
371                                 qp->s_lsn++;
372                         /* FALLTHROUGH */
373                 case IB_WR_RDMA_WRITE_WITH_IMM:
374                         /* If no credit, return. */
375                         if (!(qp->s_flags & QIB_S_UNLIMITED_CREDIT) &&
376                             qib_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) {
377                                 qp->s_flags |= QIB_S_WAIT_SSN_CREDIT;
378                                 goto bail;
379                         }
380                         ohdr->u.rc.reth.vaddr =
381                                 cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
382                         ohdr->u.rc.reth.rkey =
383                                 cpu_to_be32(wqe->wr.wr.rdma.rkey);
384                         ohdr->u.rc.reth.length = cpu_to_be32(len);
385                         hwords += sizeof(struct ib_reth) / sizeof(u32);
386                         wqe->lpsn = wqe->psn;
387                         if (len > pmtu) {
388                                 wqe->lpsn += (len - 1) / pmtu;
389                                 qp->s_state = OP(RDMA_WRITE_FIRST);
390                                 len = pmtu;
391                                 break;
392                         }
393                         if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
394                                 qp->s_state = OP(RDMA_WRITE_ONLY);
395                         else {
396                                 qp->s_state =
397                                         OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
398                                 /* Immediate data comes after RETH */
399                                 ohdr->u.rc.imm_data = wqe->wr.ex.imm_data;
400                                 hwords += 1;
401                                 if (wqe->wr.send_flags & IB_SEND_SOLICITED)
402                                         bth0 |= IB_BTH_SOLICITED;
403                         }
404                         bth2 |= IB_BTH_REQ_ACK;
405                         if (++qp->s_cur == qp->s_size)
406                                 qp->s_cur = 0;
407                         break;
408
409                 case IB_WR_RDMA_READ:
410                         /*
411                          * Don't allow more operations to be started
412                          * than the QP limits allow.
413                          */
414                         if (newreq) {
415                                 if (qp->s_num_rd_atomic >=
416                                     qp->s_max_rd_atomic) {
417                                         qp->s_flags |= QIB_S_WAIT_RDMAR;
418                                         goto bail;
419                                 }
420                                 qp->s_num_rd_atomic++;
421                                 if (!(qp->s_flags & QIB_S_UNLIMITED_CREDIT))
422                                         qp->s_lsn++;
423                                 /*
424                                  * Adjust s_next_psn to count the
425                                  * expected number of responses.
426                                  */
427                                 if (len > pmtu)
428                                         qp->s_next_psn += (len - 1) / pmtu;
429                                 wqe->lpsn = qp->s_next_psn++;
430                         }
431                         ohdr->u.rc.reth.vaddr =
432                                 cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
433                         ohdr->u.rc.reth.rkey =
434                                 cpu_to_be32(wqe->wr.wr.rdma.rkey);
435                         ohdr->u.rc.reth.length = cpu_to_be32(len);
436                         qp->s_state = OP(RDMA_READ_REQUEST);
437                         hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
438                         ss = NULL;
439                         len = 0;
440                         bth2 |= IB_BTH_REQ_ACK;
441                         if (++qp->s_cur == qp->s_size)
442                                 qp->s_cur = 0;
443                         break;
444
445                 case IB_WR_ATOMIC_CMP_AND_SWP:
446                 case IB_WR_ATOMIC_FETCH_AND_ADD:
447                         /*
448                          * Don't allow more operations to be started
449                          * than the QP limits allow.
450                          */
451                         if (newreq) {
452                                 if (qp->s_num_rd_atomic >=
453                                     qp->s_max_rd_atomic) {
454                                         qp->s_flags |= QIB_S_WAIT_RDMAR;
455                                         goto bail;
456                                 }
457                                 qp->s_num_rd_atomic++;
458                                 if (!(qp->s_flags & QIB_S_UNLIMITED_CREDIT))
459                                         qp->s_lsn++;
460                                 wqe->lpsn = wqe->psn;
461                         }
462                         if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
463                                 qp->s_state = OP(COMPARE_SWAP);
464                                 ohdr->u.atomic_eth.swap_data = cpu_to_be64(
465                                         wqe->wr.wr.atomic.swap);
466                                 ohdr->u.atomic_eth.compare_data = cpu_to_be64(
467                                         wqe->wr.wr.atomic.compare_add);
468                         } else {
469                                 qp->s_state = OP(FETCH_ADD);
470                                 ohdr->u.atomic_eth.swap_data = cpu_to_be64(
471                                         wqe->wr.wr.atomic.compare_add);
472                                 ohdr->u.atomic_eth.compare_data = 0;
473                         }
474                         ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32(
475                                 wqe->wr.wr.atomic.remote_addr >> 32);
476                         ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32(
477                                 wqe->wr.wr.atomic.remote_addr);
478                         ohdr->u.atomic_eth.rkey = cpu_to_be32(
479                                 wqe->wr.wr.atomic.rkey);
480                         hwords += sizeof(struct ib_atomic_eth) / sizeof(u32);
481                         ss = NULL;
482                         len = 0;
483                         bth2 |= IB_BTH_REQ_ACK;
484                         if (++qp->s_cur == qp->s_size)
485                                 qp->s_cur = 0;
486                         break;
487
488                 default:
489                         goto bail;
490                 }
491                 qp->s_sge.sge = wqe->sg_list[0];
492                 qp->s_sge.sg_list = wqe->sg_list + 1;
493                 qp->s_sge.num_sge = wqe->wr.num_sge;
494                 qp->s_sge.total_len = wqe->length;
495                 qp->s_len = wqe->length;
496                 if (newreq) {
497                         qp->s_tail++;
498                         if (qp->s_tail >= qp->s_size)
499                                 qp->s_tail = 0;
500                 }
501                 if (wqe->wr.opcode == IB_WR_RDMA_READ)
502                         qp->s_psn = wqe->lpsn + 1;
503                 else {
504                         qp->s_psn++;
505                         if (qib_cmp24(qp->s_psn, qp->s_next_psn) > 0)
506                                 qp->s_next_psn = qp->s_psn;
507                 }
508                 break;
509
510         case OP(RDMA_READ_RESPONSE_FIRST):
511                 /*
512                  * qp->s_state is normally set to the opcode of the
513                  * last packet constructed for new requests and therefore
514                  * is never set to RDMA read response.
515                  * RDMA_READ_RESPONSE_FIRST is used by the ACK processing
516                  * thread to indicate a SEND needs to be restarted from an
517                  * earlier PSN without interferring with the sending thread.
518                  * See qib_restart_rc().
519                  */
520                 qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn, pmtu);
521                 /* FALLTHROUGH */
522         case OP(SEND_FIRST):
523                 qp->s_state = OP(SEND_MIDDLE);
524                 /* FALLTHROUGH */
525         case OP(SEND_MIDDLE):
526                 bth2 = qp->s_psn++ & QIB_PSN_MASK;
527                 if (qib_cmp24(qp->s_psn, qp->s_next_psn) > 0)
528                         qp->s_next_psn = qp->s_psn;
529                 ss = &qp->s_sge;
530                 len = qp->s_len;
531                 if (len > pmtu) {
532                         len = pmtu;
533                         break;
534                 }
535                 if (wqe->wr.opcode == IB_WR_SEND)
536                         qp->s_state = OP(SEND_LAST);
537                 else {
538                         qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE);
539                         /* Immediate data comes after the BTH */
540                         ohdr->u.imm_data = wqe->wr.ex.imm_data;
541                         hwords += 1;
542                 }
543                 if (wqe->wr.send_flags & IB_SEND_SOLICITED)
544                         bth0 |= IB_BTH_SOLICITED;
545                 bth2 |= IB_BTH_REQ_ACK;
546                 qp->s_cur++;
547                 if (qp->s_cur >= qp->s_size)
548                         qp->s_cur = 0;
549                 break;
550
551         case OP(RDMA_READ_RESPONSE_LAST):
552                 /*
553                  * qp->s_state is normally set to the opcode of the
554                  * last packet constructed for new requests and therefore
555                  * is never set to RDMA read response.
556                  * RDMA_READ_RESPONSE_LAST is used by the ACK processing
557                  * thread to indicate a RDMA write needs to be restarted from
558                  * an earlier PSN without interferring with the sending thread.
559                  * See qib_restart_rc().
560                  */
561                 qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn, pmtu);
562                 /* FALLTHROUGH */
563         case OP(RDMA_WRITE_FIRST):
564                 qp->s_state = OP(RDMA_WRITE_MIDDLE);
565                 /* FALLTHROUGH */
566         case OP(RDMA_WRITE_MIDDLE):
567                 bth2 = qp->s_psn++ & QIB_PSN_MASK;
568                 if (qib_cmp24(qp->s_psn, qp->s_next_psn) > 0)
569                         qp->s_next_psn = qp->s_psn;
570                 ss = &qp->s_sge;
571                 len = qp->s_len;
572                 if (len > pmtu) {
573                         len = pmtu;
574                         break;
575                 }
576                 if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
577                         qp->s_state = OP(RDMA_WRITE_LAST);
578                 else {
579                         qp->s_state = OP(RDMA_WRITE_LAST_WITH_IMMEDIATE);
580                         /* Immediate data comes after the BTH */
581                         ohdr->u.imm_data = wqe->wr.ex.imm_data;
582                         hwords += 1;
583                         if (wqe->wr.send_flags & IB_SEND_SOLICITED)
584                                 bth0 |= IB_BTH_SOLICITED;
585                 }
586                 bth2 |= IB_BTH_REQ_ACK;
587                 qp->s_cur++;
588                 if (qp->s_cur >= qp->s_size)
589                         qp->s_cur = 0;
590                 break;
591
592         case OP(RDMA_READ_RESPONSE_MIDDLE):
593                 /*
594                  * qp->s_state is normally set to the opcode of the
595                  * last packet constructed for new requests and therefore
596                  * is never set to RDMA read response.
597                  * RDMA_READ_RESPONSE_MIDDLE is used by the ACK processing
598                  * thread to indicate a RDMA read needs to be restarted from
599                  * an earlier PSN without interferring with the sending thread.
600                  * See qib_restart_rc().
601                  */
602                 len = ((qp->s_psn - wqe->psn) & QIB_PSN_MASK) * pmtu;
603                 ohdr->u.rc.reth.vaddr =
604                         cpu_to_be64(wqe->wr.wr.rdma.remote_addr + len);
605                 ohdr->u.rc.reth.rkey =
606                         cpu_to_be32(wqe->wr.wr.rdma.rkey);
607                 ohdr->u.rc.reth.length = cpu_to_be32(wqe->length - len);
608                 qp->s_state = OP(RDMA_READ_REQUEST);
609                 hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
610                 bth2 = (qp->s_psn & QIB_PSN_MASK) | IB_BTH_REQ_ACK;
611                 qp->s_psn = wqe->lpsn + 1;
612                 ss = NULL;
613                 len = 0;
614                 qp->s_cur++;
615                 if (qp->s_cur == qp->s_size)
616                         qp->s_cur = 0;
617                 break;
618         }
619         qp->s_sending_hpsn = bth2;
620         delta = (((int) bth2 - (int) wqe->psn) << 8) >> 8;
621         if (delta && delta % QIB_PSN_CREDIT == 0)
622                 bth2 |= IB_BTH_REQ_ACK;
623         if (qp->s_flags & QIB_S_SEND_ONE) {
624                 qp->s_flags &= ~QIB_S_SEND_ONE;
625                 qp->s_flags |= QIB_S_WAIT_ACK;
626                 bth2 |= IB_BTH_REQ_ACK;
627         }
628         qp->s_len -= len;
629         qp->s_hdrwords = hwords;
630         qp->s_cur_sge = ss;
631         qp->s_cur_size = len;
632         qib_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24), bth2);
633 done:
634         ret = 1;
635         goto unlock;
636
637 bail:
638         qp->s_flags &= ~QIB_S_BUSY;
639 unlock:
640         spin_unlock_irqrestore(&qp->s_lock, flags);
641         return ret;
642 }
643
644 /**
645  * qib_send_rc_ack - Construct an ACK packet and send it
646  * @qp: a pointer to the QP
647  *
648  * This is called from qib_rc_rcv() and qib_kreceive().
649  * Note that RDMA reads and atomics are handled in the
650  * send side QP state and tasklet.
651  */
652 void qib_send_rc_ack(struct qib_qp *qp)
653 {
654         struct qib_devdata *dd = dd_from_ibdev(qp->ibqp.device);
655         struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
656         struct qib_pportdata *ppd = ppd_from_ibp(ibp);
657         u64 pbc;
658         u16 lrh0;
659         u32 bth0;
660         u32 hwords;
661         u32 pbufn;
662         u32 __iomem *piobuf;
663         struct qib_ib_header hdr;
664         struct qib_other_headers *ohdr;
665         u32 control;
666         unsigned long flags;
667
668         spin_lock_irqsave(&qp->s_lock, flags);
669
670         if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK))
671                 goto unlock;
672
673         /* Don't send ACK or NAK if a RDMA read or atomic is pending. */
674         if ((qp->s_flags & QIB_S_RESP_PENDING) || qp->s_rdma_ack_cnt)
675                 goto queue_ack;
676
677         /* Construct the header with s_lock held so APM doesn't change it. */
678         ohdr = &hdr.u.oth;
679         lrh0 = QIB_LRH_BTH;
680         /* header size in 32-bit words LRH+BTH+AETH = (8+12+4)/4. */
681         hwords = 6;
682         if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
683                 hwords += qib_make_grh(ibp, &hdr.u.l.grh,
684                                        &qp->remote_ah_attr.grh, hwords, 0);
685                 ohdr = &hdr.u.l.oth;
686                 lrh0 = QIB_LRH_GRH;
687         }
688         /* read pkey_index w/o lock (its atomic) */
689         bth0 = qib_get_pkey(ibp, qp->s_pkey_index) | (OP(ACKNOWLEDGE) << 24);
690         if (qp->s_mig_state == IB_MIG_MIGRATED)
691                 bth0 |= IB_BTH_MIG_REQ;
692         if (qp->r_nak_state)
693                 ohdr->u.aeth = cpu_to_be32((qp->r_msn & QIB_MSN_MASK) |
694                                             (qp->r_nak_state <<
695                                              QIB_AETH_CREDIT_SHIFT));
696         else
697                 ohdr->u.aeth = qib_compute_aeth(qp);
698         lrh0 |= ibp->sl_to_vl[qp->remote_ah_attr.sl] << 12 |
699                 qp->remote_ah_attr.sl << 4;
700         hdr.lrh[0] = cpu_to_be16(lrh0);
701         hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
702         hdr.lrh[2] = cpu_to_be16(hwords + SIZE_OF_CRC);
703         hdr.lrh[3] = cpu_to_be16(ppd->lid | qp->remote_ah_attr.src_path_bits);
704         ohdr->bth[0] = cpu_to_be32(bth0);
705         ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
706         ohdr->bth[2] = cpu_to_be32(qp->r_ack_psn & QIB_PSN_MASK);
707
708         spin_unlock_irqrestore(&qp->s_lock, flags);
709
710         /* Don't try to send ACKs if the link isn't ACTIVE */
711         if (!(ppd->lflags & QIBL_LINKACTIVE))
712                 goto done;
713
714         control = dd->f_setpbc_control(ppd, hwords + SIZE_OF_CRC,
715                                        qp->s_srate, lrh0 >> 12);
716         /* length is + 1 for the control dword */
717         pbc = ((u64) control << 32) | (hwords + 1);
718
719         piobuf = dd->f_getsendbuf(ppd, pbc, &pbufn);
720         if (!piobuf) {
721                 /*
722                  * We are out of PIO buffers at the moment.
723                  * Pass responsibility for sending the ACK to the
724                  * send tasklet so that when a PIO buffer becomes
725                  * available, the ACK is sent ahead of other outgoing
726                  * packets.
727                  */
728                 spin_lock_irqsave(&qp->s_lock, flags);
729                 goto queue_ack;
730         }
731
732         /*
733          * Write the pbc.
734          * We have to flush after the PBC for correctness
735          * on some cpus or WC buffer can be written out of order.
736          */
737         writeq(pbc, piobuf);
738
739         if (dd->flags & QIB_PIO_FLUSH_WC) {
740                 u32 *hdrp = (u32 *) &hdr;
741
742                 qib_flush_wc();
743                 qib_pio_copy(piobuf + 2, hdrp, hwords - 1);
744                 qib_flush_wc();
745                 __raw_writel(hdrp[hwords - 1], piobuf + hwords + 1);
746         } else
747                 qib_pio_copy(piobuf + 2, (u32 *) &hdr, hwords);
748
749         if (dd->flags & QIB_USE_SPCL_TRIG) {
750                 u32 spcl_off = (pbufn >= dd->piobcnt2k) ? 2047 : 1023;
751
752                 qib_flush_wc();
753                 __raw_writel(0xaebecede, piobuf + spcl_off);
754         }
755
756         qib_flush_wc();
757         qib_sendbuf_done(dd, pbufn);
758
759         ibp->n_unicast_xmit++;
760         goto done;
761
762 queue_ack:
763         if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK) {
764                 ibp->n_rc_qacks++;
765                 qp->s_flags |= QIB_S_ACK_PENDING | QIB_S_RESP_PENDING;
766                 qp->s_nak_state = qp->r_nak_state;
767                 qp->s_ack_psn = qp->r_ack_psn;
768
769                 /* Schedule the send tasklet. */
770                 qib_schedule_send(qp);
771         }
772 unlock:
773         spin_unlock_irqrestore(&qp->s_lock, flags);
774 done:
775         return;
776 }
777
778 /**
779  * reset_psn - reset the QP state to send starting from PSN
780  * @qp: the QP
781  * @psn: the packet sequence number to restart at
782  *
783  * This is called from qib_rc_rcv() to process an incoming RC ACK
784  * for the given QP.
785  * Called at interrupt level with the QP s_lock held.
786  */
787 static void reset_psn(struct qib_qp *qp, u32 psn)
788 {
789         u32 n = qp->s_acked;
790         struct qib_swqe *wqe = get_swqe_ptr(qp, n);
791         u32 opcode;
792
793         qp->s_cur = n;
794
795         /*
796          * If we are starting the request from the beginning,
797          * let the normal send code handle initialization.
798          */
799         if (qib_cmp24(psn, wqe->psn) <= 0) {
800                 qp->s_state = OP(SEND_LAST);
801                 goto done;
802         }
803
804         /* Find the work request opcode corresponding to the given PSN. */
805         opcode = wqe->wr.opcode;
806         for (;;) {
807                 int diff;
808
809                 if (++n == qp->s_size)
810                         n = 0;
811                 if (n == qp->s_tail)
812                         break;
813                 wqe = get_swqe_ptr(qp, n);
814                 diff = qib_cmp24(psn, wqe->psn);
815                 if (diff < 0)
816                         break;
817                 qp->s_cur = n;
818                 /*
819                  * If we are starting the request from the beginning,
820                  * let the normal send code handle initialization.
821                  */
822                 if (diff == 0) {
823                         qp->s_state = OP(SEND_LAST);
824                         goto done;
825                 }
826                 opcode = wqe->wr.opcode;
827         }
828
829         /*
830          * Set the state to restart in the middle of a request.
831          * Don't change the s_sge, s_cur_sge, or s_cur_size.
832          * See qib_make_rc_req().
833          */
834         switch (opcode) {
835         case IB_WR_SEND:
836         case IB_WR_SEND_WITH_IMM:
837                 qp->s_state = OP(RDMA_READ_RESPONSE_FIRST);
838                 break;
839
840         case IB_WR_RDMA_WRITE:
841         case IB_WR_RDMA_WRITE_WITH_IMM:
842                 qp->s_state = OP(RDMA_READ_RESPONSE_LAST);
843                 break;
844
845         case IB_WR_RDMA_READ:
846                 qp->s_state = OP(RDMA_READ_RESPONSE_MIDDLE);
847                 break;
848
849         default:
850                 /*
851                  * This case shouldn't happen since its only
852                  * one PSN per req.
853                  */
854                 qp->s_state = OP(SEND_LAST);
855         }
856 done:
857         qp->s_psn = psn;
858         /*
859          * Set QIB_S_WAIT_PSN as qib_rc_complete() may start the timer
860          * asynchronously before the send tasklet can get scheduled.
861          * Doing it in qib_make_rc_req() is too late.
862          */
863         if ((qib_cmp24(qp->s_psn, qp->s_sending_hpsn) <= 0) &&
864             (qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) <= 0))
865                 qp->s_flags |= QIB_S_WAIT_PSN;
866 }
867
868 /*
869  * Back up requester to resend the last un-ACKed request.
870  * The QP r_lock and s_lock should be held and interrupts disabled.
871  */
872 static void qib_restart_rc(struct qib_qp *qp, u32 psn, int wait)
873 {
874         struct qib_swqe *wqe = get_swqe_ptr(qp, qp->s_acked);
875         struct qib_ibport *ibp;
876
877         if (qp->s_retry == 0) {
878                 if (qp->s_mig_state == IB_MIG_ARMED) {
879                         qib_migrate_qp(qp);
880                         qp->s_retry = qp->s_retry_cnt;
881                 } else if (qp->s_last == qp->s_acked) {
882                         qib_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR);
883                         qib_error_qp(qp, IB_WC_WR_FLUSH_ERR);
884                         return;
885                 } else /* XXX need to handle delayed completion */
886                         return;
887         } else
888                 qp->s_retry--;
889
890         ibp = to_iport(qp->ibqp.device, qp->port_num);
891         if (wqe->wr.opcode == IB_WR_RDMA_READ)
892                 ibp->n_rc_resends++;
893         else
894                 ibp->n_rc_resends += (qp->s_psn - psn) & QIB_PSN_MASK;
895
896         qp->s_flags &= ~(QIB_S_WAIT_FENCE | QIB_S_WAIT_RDMAR |
897                          QIB_S_WAIT_SSN_CREDIT | QIB_S_WAIT_PSN |
898                          QIB_S_WAIT_ACK);
899         if (wait)
900                 qp->s_flags |= QIB_S_SEND_ONE;
901         reset_psn(qp, psn);
902 }
903
904 /*
905  * This is called from s_timer for missing responses.
906  */
907 static void rc_timeout(unsigned long arg)
908 {
909         struct qib_qp *qp = (struct qib_qp *)arg;
910         struct qib_ibport *ibp;
911         unsigned long flags;
912
913         spin_lock_irqsave(&qp->r_lock, flags);
914         spin_lock(&qp->s_lock);
915         if (qp->s_flags & QIB_S_TIMER) {
916                 ibp = to_iport(qp->ibqp.device, qp->port_num);
917                 ibp->n_rc_timeouts++;
918                 qp->s_flags &= ~QIB_S_TIMER;
919                 del_timer(&qp->s_timer);
920                 qib_restart_rc(qp, qp->s_last_psn + 1, 1);
921                 qib_schedule_send(qp);
922         }
923         spin_unlock(&qp->s_lock);
924         spin_unlock_irqrestore(&qp->r_lock, flags);
925 }
926
927 /*
928  * This is called from s_timer for RNR timeouts.
929  */
930 void qib_rc_rnr_retry(unsigned long arg)
931 {
932         struct qib_qp *qp = (struct qib_qp *)arg;
933         unsigned long flags;
934
935         spin_lock_irqsave(&qp->s_lock, flags);
936         if (qp->s_flags & QIB_S_WAIT_RNR) {
937                 qp->s_flags &= ~QIB_S_WAIT_RNR;
938                 del_timer(&qp->s_timer);
939                 qib_schedule_send(qp);
940         }
941         spin_unlock_irqrestore(&qp->s_lock, flags);
942 }
943
944 /*
945  * Set qp->s_sending_psn to the next PSN after the given one.
946  * This would be psn+1 except when RDMA reads are present.
947  */
948 static void reset_sending_psn(struct qib_qp *qp, u32 psn)
949 {
950         struct qib_swqe *wqe;
951         u32 n = qp->s_last;
952
953         /* Find the work request corresponding to the given PSN. */
954         for (;;) {
955                 wqe = get_swqe_ptr(qp, n);
956                 if (qib_cmp24(psn, wqe->lpsn) <= 0) {
957                         if (wqe->wr.opcode == IB_WR_RDMA_READ)
958                                 qp->s_sending_psn = wqe->lpsn + 1;
959                         else
960                                 qp->s_sending_psn = psn + 1;
961                         break;
962                 }
963                 if (++n == qp->s_size)
964                         n = 0;
965                 if (n == qp->s_tail)
966                         break;
967         }
968 }
969
970 /*
971  * This should be called with the QP s_lock held and interrupts disabled.
972  */
973 void qib_rc_send_complete(struct qib_qp *qp, struct qib_ib_header *hdr)
974 {
975         struct qib_other_headers *ohdr;
976         struct qib_swqe *wqe;
977         struct ib_wc wc;
978         unsigned i;
979         u32 opcode;
980         u32 psn;
981
982         if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_OR_FLUSH_SEND))
983                 return;
984
985         /* Find out where the BTH is */
986         if ((be16_to_cpu(hdr->lrh[0]) & 3) == QIB_LRH_BTH)
987                 ohdr = &hdr->u.oth;
988         else
989                 ohdr = &hdr->u.l.oth;
990
991         opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
992         if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) &&
993             opcode <= OP(ATOMIC_ACKNOWLEDGE)) {
994                 WARN_ON(!qp->s_rdma_ack_cnt);
995                 qp->s_rdma_ack_cnt--;
996                 return;
997         }
998
999         psn = be32_to_cpu(ohdr->bth[2]);
1000         reset_sending_psn(qp, psn);
1001
1002         /*
1003          * Start timer after a packet requesting an ACK has been sent and
1004          * there are still requests that haven't been acked.
1005          */
1006         if ((psn & IB_BTH_REQ_ACK) && qp->s_acked != qp->s_tail &&
1007             !(qp->s_flags & (QIB_S_TIMER | QIB_S_WAIT_RNR | QIB_S_WAIT_PSN)) &&
1008             (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK))
1009                 start_timer(qp);
1010
1011         while (qp->s_last != qp->s_acked) {
1012                 wqe = get_swqe_ptr(qp, qp->s_last);
1013                 if (qib_cmp24(wqe->lpsn, qp->s_sending_psn) >= 0 &&
1014                     qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) <= 0)
1015                         break;
1016                 for (i = 0; i < wqe->wr.num_sge; i++) {
1017                         struct qib_sge *sge = &wqe->sg_list[i];
1018
1019                         atomic_dec(&sge->mr->refcount);
1020                 }
1021                 /* Post a send completion queue entry if requested. */
1022                 if (!(qp->s_flags & QIB_S_SIGNAL_REQ_WR) ||
1023                     (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
1024                         memset(&wc, 0, sizeof wc);
1025                         wc.wr_id = wqe->wr.wr_id;
1026                         wc.status = IB_WC_SUCCESS;
1027                         wc.opcode = ib_qib_wc_opcode[wqe->wr.opcode];
1028                         wc.byte_len = wqe->length;
1029                         wc.qp = &qp->ibqp;
1030                         qib_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0);
1031                 }
1032                 if (++qp->s_last >= qp->s_size)
1033                         qp->s_last = 0;
1034         }
1035         /*
1036          * If we were waiting for sends to complete before resending,
1037          * and they are now complete, restart sending.
1038          */
1039         if (qp->s_flags & QIB_S_WAIT_PSN &&
1040             qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) > 0) {
1041                 qp->s_flags &= ~QIB_S_WAIT_PSN;
1042                 qp->s_sending_psn = qp->s_psn;
1043                 qp->s_sending_hpsn = qp->s_psn - 1;
1044                 qib_schedule_send(qp);
1045         }
1046 }
1047
1048 static inline void update_last_psn(struct qib_qp *qp, u32 psn)
1049 {
1050         qp->s_last_psn = psn;
1051 }
1052
1053 /*
1054  * Generate a SWQE completion.
1055  * This is similar to qib_send_complete but has to check to be sure
1056  * that the SGEs are not being referenced if the SWQE is being resent.
1057  */
1058 static struct qib_swqe *do_rc_completion(struct qib_qp *qp,
1059                                          struct qib_swqe *wqe,
1060                                          struct qib_ibport *ibp)
1061 {
1062         struct ib_wc wc;
1063         unsigned i;
1064
1065         /*
1066          * Don't decrement refcount and don't generate a
1067          * completion if the SWQE is being resent until the send
1068          * is finished.
1069          */
1070         if (qib_cmp24(wqe->lpsn, qp->s_sending_psn) < 0 ||
1071             qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) > 0) {
1072                 for (i = 0; i < wqe->wr.num_sge; i++) {
1073                         struct qib_sge *sge = &wqe->sg_list[i];
1074
1075                         atomic_dec(&sge->mr->refcount);
1076                 }
1077                 /* Post a send completion queue entry if requested. */
1078                 if (!(qp->s_flags & QIB_S_SIGNAL_REQ_WR) ||
1079                     (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
1080                         memset(&wc, 0, sizeof wc);
1081                         wc.wr_id = wqe->wr.wr_id;
1082                         wc.status = IB_WC_SUCCESS;
1083                         wc.opcode = ib_qib_wc_opcode[wqe->wr.opcode];
1084                         wc.byte_len = wqe->length;
1085                         wc.qp = &qp->ibqp;
1086                         qib_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0);
1087                 }
1088                 if (++qp->s_last >= qp->s_size)
1089                         qp->s_last = 0;
1090         } else
1091                 ibp->n_rc_delayed_comp++;
1092
1093         qp->s_retry = qp->s_retry_cnt;
1094         update_last_psn(qp, wqe->lpsn);
1095
1096         /*
1097          * If we are completing a request which is in the process of
1098          * being resent, we can stop resending it since we know the
1099          * responder has already seen it.
1100          */
1101         if (qp->s_acked == qp->s_cur) {
1102                 if (++qp->s_cur >= qp->s_size)
1103                         qp->s_cur = 0;
1104                 qp->s_acked = qp->s_cur;
1105                 wqe = get_swqe_ptr(qp, qp->s_cur);
1106                 if (qp->s_acked != qp->s_tail) {
1107                         qp->s_state = OP(SEND_LAST);
1108                         qp->s_psn = wqe->psn;
1109                 }
1110         } else {
1111                 if (++qp->s_acked >= qp->s_size)
1112                         qp->s_acked = 0;
1113                 if (qp->state == IB_QPS_SQD && qp->s_acked == qp->s_cur)
1114                         qp->s_draining = 0;
1115                 wqe = get_swqe_ptr(qp, qp->s_acked);
1116         }
1117         return wqe;
1118 }
1119
1120 /**
1121  * do_rc_ack - process an incoming RC ACK
1122  * @qp: the QP the ACK came in on
1123  * @psn: the packet sequence number of the ACK
1124  * @opcode: the opcode of the request that resulted in the ACK
1125  *
1126  * This is called from qib_rc_rcv_resp() to process an incoming RC ACK
1127  * for the given QP.
1128  * Called at interrupt level with the QP s_lock held.
1129  * Returns 1 if OK, 0 if current operation should be aborted (NAK).
1130  */
1131 static int do_rc_ack(struct qib_qp *qp, u32 aeth, u32 psn, int opcode,
1132                      u64 val, struct qib_ctxtdata *rcd)
1133 {
1134         struct qib_ibport *ibp;
1135         enum ib_wc_status status;
1136         struct qib_swqe *wqe;
1137         int ret = 0;
1138         u32 ack_psn;
1139         int diff;
1140
1141         /* Remove QP from retry timer */
1142         if (qp->s_flags & (QIB_S_TIMER | QIB_S_WAIT_RNR)) {
1143                 qp->s_flags &= ~(QIB_S_TIMER | QIB_S_WAIT_RNR);
1144                 del_timer(&qp->s_timer);
1145         }
1146
1147         /*
1148          * Note that NAKs implicitly ACK outstanding SEND and RDMA write
1149          * requests and implicitly NAK RDMA read and atomic requests issued
1150          * before the NAK'ed request.  The MSN won't include the NAK'ed
1151          * request but will include an ACK'ed request(s).
1152          */
1153         ack_psn = psn;
1154         if (aeth >> 29)
1155                 ack_psn--;
1156         wqe = get_swqe_ptr(qp, qp->s_acked);
1157         ibp = to_iport(qp->ibqp.device, qp->port_num);
1158
1159         /*
1160          * The MSN might be for a later WQE than the PSN indicates so
1161          * only complete WQEs that the PSN finishes.
1162          */
1163         while ((diff = qib_cmp24(ack_psn, wqe->lpsn)) >= 0) {
1164                 /*
1165                  * RDMA_READ_RESPONSE_ONLY is a special case since
1166                  * we want to generate completion events for everything
1167                  * before the RDMA read, copy the data, then generate
1168                  * the completion for the read.
1169                  */
1170                 if (wqe->wr.opcode == IB_WR_RDMA_READ &&
1171                     opcode == OP(RDMA_READ_RESPONSE_ONLY) &&
1172                     diff == 0) {
1173                         ret = 1;
1174                         goto bail;
1175                 }
1176                 /*
1177                  * If this request is a RDMA read or atomic, and the ACK is
1178                  * for a later operation, this ACK NAKs the RDMA read or
1179                  * atomic.  In other words, only a RDMA_READ_LAST or ONLY
1180                  * can ACK a RDMA read and likewise for atomic ops.  Note
1181                  * that the NAK case can only happen if relaxed ordering is
1182                  * used and requests are sent after an RDMA read or atomic
1183                  * is sent but before the response is received.
1184                  */
1185                 if ((wqe->wr.opcode == IB_WR_RDMA_READ &&
1186                      (opcode != OP(RDMA_READ_RESPONSE_LAST) || diff != 0)) ||
1187                     ((wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
1188                       wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) &&
1189                      (opcode != OP(ATOMIC_ACKNOWLEDGE) || diff != 0))) {
1190                         /* Retry this request. */
1191                         if (!(qp->r_flags & QIB_R_RDMAR_SEQ)) {
1192                                 qp->r_flags |= QIB_R_RDMAR_SEQ;
1193                                 qib_restart_rc(qp, qp->s_last_psn + 1, 0);
1194                                 if (list_empty(&qp->rspwait)) {
1195                                         qp->r_flags |= QIB_R_RSP_SEND;
1196                                         atomic_inc(&qp->refcount);
1197                                         list_add_tail(&qp->rspwait,
1198                                                       &rcd->qp_wait_list);
1199                                 }
1200                         }
1201                         /*
1202                          * No need to process the ACK/NAK since we are
1203                          * restarting an earlier request.
1204                          */
1205                         goto bail;
1206                 }
1207                 if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
1208                     wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
1209                         u64 *vaddr = wqe->sg_list[0].vaddr;
1210                         *vaddr = val;
1211                 }
1212                 if (qp->s_num_rd_atomic &&
1213                     (wqe->wr.opcode == IB_WR_RDMA_READ ||
1214                      wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
1215                      wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) {
1216                         qp->s_num_rd_atomic--;
1217                         /* Restart sending task if fence is complete */
1218                         if ((qp->s_flags & QIB_S_WAIT_FENCE) &&
1219                             !qp->s_num_rd_atomic) {
1220                                 qp->s_flags &= ~(QIB_S_WAIT_FENCE |
1221                                                  QIB_S_WAIT_ACK);
1222                                 qib_schedule_send(qp);
1223                         } else if (qp->s_flags & QIB_S_WAIT_RDMAR) {
1224                                 qp->s_flags &= ~(QIB_S_WAIT_RDMAR |
1225                                                  QIB_S_WAIT_ACK);
1226                                 qib_schedule_send(qp);
1227                         }
1228                 }
1229                 wqe = do_rc_completion(qp, wqe, ibp);
1230                 if (qp->s_acked == qp->s_tail)
1231                         break;
1232         }
1233
1234         switch (aeth >> 29) {
1235         case 0:         /* ACK */
1236                 ibp->n_rc_acks++;
1237                 if (qp->s_acked != qp->s_tail) {
1238                         /*
1239                          * We are expecting more ACKs so
1240                          * reset the retransmit timer.
1241                          */
1242                         start_timer(qp);
1243                         /*
1244                          * We can stop resending the earlier packets and
1245                          * continue with the next packet the receiver wants.
1246                          */
1247                         if (qib_cmp24(qp->s_psn, psn) <= 0)
1248                                 reset_psn(qp, psn + 1);
1249                 } else if (qib_cmp24(qp->s_psn, psn) <= 0) {
1250                         qp->s_state = OP(SEND_LAST);
1251                         qp->s_psn = psn + 1;
1252                 }
1253                 if (qp->s_flags & QIB_S_WAIT_ACK) {
1254                         qp->s_flags &= ~QIB_S_WAIT_ACK;
1255                         qib_schedule_send(qp);
1256                 }
1257                 qib_get_credit(qp, aeth);
1258                 qp->s_rnr_retry = qp->s_rnr_retry_cnt;
1259                 qp->s_retry = qp->s_retry_cnt;
1260                 update_last_psn(qp, psn);
1261                 ret = 1;
1262                 goto bail;
1263
1264         case 1:         /* RNR NAK */
1265                 ibp->n_rnr_naks++;
1266                 if (qp->s_acked == qp->s_tail)
1267                         goto bail;
1268                 if (qp->s_flags & QIB_S_WAIT_RNR)
1269                         goto bail;
1270                 if (qp->s_rnr_retry == 0) {
1271                         status = IB_WC_RNR_RETRY_EXC_ERR;
1272                         goto class_b;
1273                 }
1274                 if (qp->s_rnr_retry_cnt < 7)
1275                         qp->s_rnr_retry--;
1276
1277                 /* The last valid PSN is the previous PSN. */
1278                 update_last_psn(qp, psn - 1);
1279
1280                 ibp->n_rc_resends += (qp->s_psn - psn) & QIB_PSN_MASK;
1281
1282                 reset_psn(qp, psn);
1283
1284                 qp->s_flags &= ~(QIB_S_WAIT_SSN_CREDIT | QIB_S_WAIT_ACK);
1285                 qp->s_flags |= QIB_S_WAIT_RNR;
1286                 qp->s_timer.function = qib_rc_rnr_retry;
1287                 qp->s_timer.expires = jiffies + usecs_to_jiffies(
1288                         ib_qib_rnr_table[(aeth >> QIB_AETH_CREDIT_SHIFT) &
1289                                            QIB_AETH_CREDIT_MASK]);
1290                 add_timer(&qp->s_timer);
1291                 goto bail;
1292
1293         case 3:         /* NAK */
1294                 if (qp->s_acked == qp->s_tail)
1295                         goto bail;
1296                 /* The last valid PSN is the previous PSN. */
1297                 update_last_psn(qp, psn - 1);
1298                 switch ((aeth >> QIB_AETH_CREDIT_SHIFT) &
1299                         QIB_AETH_CREDIT_MASK) {
1300                 case 0: /* PSN sequence error */
1301                         ibp->n_seq_naks++;
1302                         /*
1303                          * Back up to the responder's expected PSN.
1304                          * Note that we might get a NAK in the middle of an
1305                          * RDMA READ response which terminates the RDMA
1306                          * READ.
1307                          */
1308                         qib_restart_rc(qp, psn, 0);
1309                         qib_schedule_send(qp);
1310                         break;
1311
1312                 case 1: /* Invalid Request */
1313                         status = IB_WC_REM_INV_REQ_ERR;
1314                         ibp->n_other_naks++;
1315                         goto class_b;
1316
1317                 case 2: /* Remote Access Error */
1318                         status = IB_WC_REM_ACCESS_ERR;
1319                         ibp->n_other_naks++;
1320                         goto class_b;
1321
1322                 case 3: /* Remote Operation Error */
1323                         status = IB_WC_REM_OP_ERR;
1324                         ibp->n_other_naks++;
1325 class_b:
1326                         if (qp->s_last == qp->s_acked) {
1327                                 qib_send_complete(qp, wqe, status);
1328                                 qib_error_qp(qp, IB_WC_WR_FLUSH_ERR);
1329                         }
1330                         break;
1331
1332                 default:
1333                         /* Ignore other reserved NAK error codes */
1334                         goto reserved;
1335                 }
1336                 qp->s_retry = qp->s_retry_cnt;
1337                 qp->s_rnr_retry = qp->s_rnr_retry_cnt;
1338                 goto bail;
1339
1340         default:                /* 2: reserved */
1341 reserved:
1342                 /* Ignore reserved NAK codes. */
1343                 goto bail;
1344         }
1345
1346 bail:
1347         return ret;
1348 }
1349
1350 /*
1351  * We have seen an out of sequence RDMA read middle or last packet.
1352  * This ACKs SENDs and RDMA writes up to the first RDMA read or atomic SWQE.
1353  */
1354 static void rdma_seq_err(struct qib_qp *qp, struct qib_ibport *ibp, u32 psn,
1355                          struct qib_ctxtdata *rcd)
1356 {
1357         struct qib_swqe *wqe;
1358
1359         /* Remove QP from retry timer */
1360         if (qp->s_flags & (QIB_S_TIMER | QIB_S_WAIT_RNR)) {
1361                 qp->s_flags &= ~(QIB_S_TIMER | QIB_S_WAIT_RNR);
1362                 del_timer(&qp->s_timer);
1363         }
1364
1365         wqe = get_swqe_ptr(qp, qp->s_acked);
1366
1367         while (qib_cmp24(psn, wqe->lpsn) > 0) {
1368                 if (wqe->wr.opcode == IB_WR_RDMA_READ ||
1369                     wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
1370                     wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
1371                         break;
1372                 wqe = do_rc_completion(qp, wqe, ibp);
1373         }
1374
1375         ibp->n_rdma_seq++;
1376         qp->r_flags |= QIB_R_RDMAR_SEQ;
1377         qib_restart_rc(qp, qp->s_last_psn + 1, 0);
1378         if (list_empty(&qp->rspwait)) {
1379                 qp->r_flags |= QIB_R_RSP_SEND;
1380                 atomic_inc(&qp->refcount);
1381                 list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
1382         }
1383 }
1384
1385 /**
1386  * qib_rc_rcv_resp - process an incoming RC response packet
1387  * @ibp: the port this packet came in on
1388  * @ohdr: the other headers for this packet
1389  * @data: the packet data
1390  * @tlen: the packet length
1391  * @qp: the QP for this packet
1392  * @opcode: the opcode for this packet
1393  * @psn: the packet sequence number for this packet
1394  * @hdrsize: the header length
1395  * @pmtu: the path MTU
1396  *
1397  * This is called from qib_rc_rcv() to process an incoming RC response
1398  * packet for the given QP.
1399  * Called at interrupt level.
1400  */
1401 static void qib_rc_rcv_resp(struct qib_ibport *ibp,
1402                             struct qib_other_headers *ohdr,
1403                             void *data, u32 tlen,
1404                             struct qib_qp *qp,
1405                             u32 opcode,
1406                             u32 psn, u32 hdrsize, u32 pmtu,
1407                             struct qib_ctxtdata *rcd)
1408 {
1409         struct qib_swqe *wqe;
1410         struct qib_pportdata *ppd = ppd_from_ibp(ibp);
1411         enum ib_wc_status status;
1412         unsigned long flags;
1413         int diff;
1414         u32 pad;
1415         u32 aeth;
1416         u64 val;
1417
1418         if (opcode != OP(RDMA_READ_RESPONSE_MIDDLE)) {
1419                 /*
1420                  * If ACK'd PSN on SDMA busy list try to make progress to
1421                  * reclaim SDMA credits.
1422                  */
1423                 if ((qib_cmp24(psn, qp->s_sending_psn) >= 0) &&
1424                     (qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) <= 0)) {
1425
1426                         /*
1427                          * If send tasklet not running attempt to progress
1428                          * SDMA queue.
1429                          */
1430                         if (!(qp->s_flags & QIB_S_BUSY)) {
1431                                 /* Acquire SDMA Lock */
1432                                 spin_lock_irqsave(&ppd->sdma_lock, flags);
1433                                 /* Invoke sdma make progress */
1434                                 qib_sdma_make_progress(ppd);
1435                                 /* Release SDMA Lock */
1436                                 spin_unlock_irqrestore(&ppd->sdma_lock, flags);
1437                         }
1438                 }
1439         }
1440
1441         spin_lock_irqsave(&qp->s_lock, flags);
1442         if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK))
1443                 goto ack_done;
1444
1445         /* Ignore invalid responses. */
1446         if (qib_cmp24(psn, qp->s_next_psn) >= 0)
1447                 goto ack_done;
1448
1449         /* Ignore duplicate responses. */
1450         diff = qib_cmp24(psn, qp->s_last_psn);
1451         if (unlikely(diff <= 0)) {
1452                 /* Update credits for "ghost" ACKs */
1453                 if (diff == 0 && opcode == OP(ACKNOWLEDGE)) {
1454                         aeth = be32_to_cpu(ohdr->u.aeth);
1455                         if ((aeth >> 29) == 0)
1456                                 qib_get_credit(qp, aeth);
1457                 }
1458                 goto ack_done;
1459         }
1460
1461         /*
1462          * Skip everything other than the PSN we expect, if we are waiting
1463          * for a reply to a restarted RDMA read or atomic op.
1464          */
1465         if (qp->r_flags & QIB_R_RDMAR_SEQ) {
1466                 if (qib_cmp24(psn, qp->s_last_psn + 1) != 0)
1467                         goto ack_done;
1468                 qp->r_flags &= ~QIB_R_RDMAR_SEQ;
1469         }
1470
1471         if (unlikely(qp->s_acked == qp->s_tail))
1472                 goto ack_done;
1473         wqe = get_swqe_ptr(qp, qp->s_acked);
1474         status = IB_WC_SUCCESS;
1475
1476         switch (opcode) {
1477         case OP(ACKNOWLEDGE):
1478         case OP(ATOMIC_ACKNOWLEDGE):
1479         case OP(RDMA_READ_RESPONSE_FIRST):
1480                 aeth = be32_to_cpu(ohdr->u.aeth);
1481                 if (opcode == OP(ATOMIC_ACKNOWLEDGE)) {
1482                         __be32 *p = ohdr->u.at.atomic_ack_eth;
1483
1484                         val = ((u64) be32_to_cpu(p[0]) << 32) |
1485                                 be32_to_cpu(p[1]);
1486                 } else
1487                         val = 0;
1488                 if (!do_rc_ack(qp, aeth, psn, opcode, val, rcd) ||
1489                     opcode != OP(RDMA_READ_RESPONSE_FIRST))
1490                         goto ack_done;
1491                 hdrsize += 4;
1492                 wqe = get_swqe_ptr(qp, qp->s_acked);
1493                 if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
1494                         goto ack_op_err;
1495                 /*
1496                  * If this is a response to a resent RDMA read, we
1497                  * have to be careful to copy the data to the right
1498                  * location.
1499                  */
1500                 qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
1501                                                   wqe, psn, pmtu);
1502                 goto read_middle;
1503
1504         case OP(RDMA_READ_RESPONSE_MIDDLE):
1505                 /* no AETH, no ACK */
1506                 if (unlikely(qib_cmp24(psn, qp->s_last_psn + 1)))
1507                         goto ack_seq_err;
1508                 if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
1509                         goto ack_op_err;
1510 read_middle:
1511                 if (unlikely(tlen != (hdrsize + pmtu + 4)))
1512                         goto ack_len_err;
1513                 if (unlikely(pmtu >= qp->s_rdma_read_len))
1514                         goto ack_len_err;
1515
1516                 /*
1517                  * We got a response so update the timeout.
1518                  * 4.096 usec. * (1 << qp->timeout)
1519                  */
1520                 qp->s_flags |= QIB_S_TIMER;
1521                 mod_timer(&qp->s_timer, jiffies + qp->timeout_jiffies);
1522                 if (qp->s_flags & QIB_S_WAIT_ACK) {
1523                         qp->s_flags &= ~QIB_S_WAIT_ACK;
1524                         qib_schedule_send(qp);
1525                 }
1526
1527                 if (opcode == OP(RDMA_READ_RESPONSE_MIDDLE))
1528                         qp->s_retry = qp->s_retry_cnt;
1529
1530                 /*
1531                  * Update the RDMA receive state but do the copy w/o
1532                  * holding the locks and blocking interrupts.
1533                  */
1534                 qp->s_rdma_read_len -= pmtu;
1535                 update_last_psn(qp, psn);
1536                 spin_unlock_irqrestore(&qp->s_lock, flags);
1537                 qib_copy_sge(&qp->s_rdma_read_sge, data, pmtu, 0);
1538                 goto bail;
1539
1540         case OP(RDMA_READ_RESPONSE_ONLY):
1541                 aeth = be32_to_cpu(ohdr->u.aeth);
1542                 if (!do_rc_ack(qp, aeth, psn, opcode, 0, rcd))
1543                         goto ack_done;
1544                 /* Get the number of bytes the message was padded by. */
1545                 pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
1546                 /*
1547                  * Check that the data size is >= 0 && <= pmtu.
1548                  * Remember to account for the AETH header (4) and
1549                  * ICRC (4).
1550                  */
1551                 if (unlikely(tlen < (hdrsize + pad + 8)))
1552                         goto ack_len_err;
1553                 /*
1554                  * If this is a response to a resent RDMA read, we
1555                  * have to be careful to copy the data to the right
1556                  * location.
1557                  */
1558                 wqe = get_swqe_ptr(qp, qp->s_acked);
1559                 qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
1560                                                   wqe, psn, pmtu);
1561                 goto read_last;
1562
1563         case OP(RDMA_READ_RESPONSE_LAST):
1564                 /* ACKs READ req. */
1565                 if (unlikely(qib_cmp24(psn, qp->s_last_psn + 1)))
1566                         goto ack_seq_err;
1567                 if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
1568                         goto ack_op_err;
1569                 /* Get the number of bytes the message was padded by. */
1570                 pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
1571                 /*
1572                  * Check that the data size is >= 1 && <= pmtu.
1573                  * Remember to account for the AETH header (4) and
1574                  * ICRC (4).
1575                  */
1576                 if (unlikely(tlen <= (hdrsize + pad + 8)))
1577                         goto ack_len_err;
1578 read_last:
1579                 tlen -= hdrsize + pad + 8;
1580                 if (unlikely(tlen != qp->s_rdma_read_len))
1581                         goto ack_len_err;
1582                 aeth = be32_to_cpu(ohdr->u.aeth);
1583                 qib_copy_sge(&qp->s_rdma_read_sge, data, tlen, 0);
1584                 WARN_ON(qp->s_rdma_read_sge.num_sge);
1585                 (void) do_rc_ack(qp, aeth, psn,
1586                                  OP(RDMA_READ_RESPONSE_LAST), 0, rcd);
1587                 goto ack_done;
1588         }
1589
1590 ack_op_err:
1591         status = IB_WC_LOC_QP_OP_ERR;
1592         goto ack_err;
1593
1594 ack_seq_err:
1595         rdma_seq_err(qp, ibp, psn, rcd);
1596         goto ack_done;
1597
1598 ack_len_err:
1599         status = IB_WC_LOC_LEN_ERR;
1600 ack_err:
1601         if (qp->s_last == qp->s_acked) {
1602                 qib_send_complete(qp, wqe, status);
1603                 qib_error_qp(qp, IB_WC_WR_FLUSH_ERR);
1604         }
1605 ack_done:
1606         spin_unlock_irqrestore(&qp->s_lock, flags);
1607 bail:
1608         return;
1609 }
1610
1611 /**
1612  * qib_rc_rcv_error - process an incoming duplicate or error RC packet
1613  * @ohdr: the other headers for this packet
1614  * @data: the packet data
1615  * @qp: the QP for this packet
1616  * @opcode: the opcode for this packet
1617  * @psn: the packet sequence number for this packet
1618  * @diff: the difference between the PSN and the expected PSN
1619  *
1620  * This is called from qib_rc_rcv() to process an unexpected
1621  * incoming RC packet for the given QP.
1622  * Called at interrupt level.
1623  * Return 1 if no more processing is needed; otherwise return 0 to
1624  * schedule a response to be sent.
1625  */
1626 static int qib_rc_rcv_error(struct qib_other_headers *ohdr,
1627                             void *data,
1628                             struct qib_qp *qp,
1629                             u32 opcode,
1630                             u32 psn,
1631                             int diff,
1632                             struct qib_ctxtdata *rcd)
1633 {
1634         struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
1635         struct qib_ack_entry *e;
1636         unsigned long flags;
1637         u8 i, prev;
1638         int old_req;
1639
1640         if (diff > 0) {
1641                 /*
1642                  * Packet sequence error.
1643                  * A NAK will ACK earlier sends and RDMA writes.
1644                  * Don't queue the NAK if we already sent one.
1645                  */
1646                 if (!qp->r_nak_state) {
1647                         ibp->n_rc_seqnak++;
1648                         qp->r_nak_state = IB_NAK_PSN_ERROR;
1649                         /* Use the expected PSN. */
1650                         qp->r_ack_psn = qp->r_psn;
1651                         /*
1652                          * Wait to send the sequence NAK until all packets
1653                          * in the receive queue have been processed.
1654                          * Otherwise, we end up propagating congestion.
1655                          */
1656                         if (list_empty(&qp->rspwait)) {
1657                                 qp->r_flags |= QIB_R_RSP_NAK;
1658                                 atomic_inc(&qp->refcount);
1659                                 list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
1660                         }
1661                 }
1662                 goto done;
1663         }
1664
1665         /*
1666          * Handle a duplicate request.  Don't re-execute SEND, RDMA
1667          * write or atomic op.  Don't NAK errors, just silently drop
1668          * the duplicate request.  Note that r_sge, r_len, and
1669          * r_rcv_len may be in use so don't modify them.
1670          *
1671          * We are supposed to ACK the earliest duplicate PSN but we
1672          * can coalesce an outstanding duplicate ACK.  We have to
1673          * send the earliest so that RDMA reads can be restarted at
1674          * the requester's expected PSN.
1675          *
1676          * First, find where this duplicate PSN falls within the
1677          * ACKs previously sent.
1678          * old_req is true if there is an older response that is scheduled
1679          * to be sent before sending this one.
1680          */
1681         e = NULL;
1682         old_req = 1;
1683         ibp->n_rc_dupreq++;
1684
1685         spin_lock_irqsave(&qp->s_lock, flags);
1686
1687         for (i = qp->r_head_ack_queue; ; i = prev) {
1688                 if (i == qp->s_tail_ack_queue)
1689                         old_req = 0;
1690                 if (i)
1691                         prev = i - 1;
1692                 else
1693                         prev = QIB_MAX_RDMA_ATOMIC;
1694                 if (prev == qp->r_head_ack_queue) {
1695                         e = NULL;
1696                         break;
1697                 }
1698                 e = &qp->s_ack_queue[prev];
1699                 if (!e->opcode) {
1700                         e = NULL;
1701                         break;
1702                 }
1703                 if (qib_cmp24(psn, e->psn) >= 0) {
1704                         if (prev == qp->s_tail_ack_queue &&
1705                             qib_cmp24(psn, e->lpsn) <= 0)
1706                                 old_req = 0;
1707                         break;
1708                 }
1709         }
1710         switch (opcode) {
1711         case OP(RDMA_READ_REQUEST): {
1712                 struct ib_reth *reth;
1713                 u32 offset;
1714                 u32 len;
1715
1716                 /*
1717                  * If we didn't find the RDMA read request in the ack queue,
1718                  * we can ignore this request.
1719                  */
1720                 if (!e || e->opcode != OP(RDMA_READ_REQUEST))
1721                         goto unlock_done;
1722                 /* RETH comes after BTH */
1723                 reth = &ohdr->u.rc.reth;
1724                 /*
1725                  * Address range must be a subset of the original
1726                  * request and start on pmtu boundaries.
1727                  * We reuse the old ack_queue slot since the requester
1728                  * should not back up and request an earlier PSN for the
1729                  * same request.
1730                  */
1731                 offset = ((psn - e->psn) & QIB_PSN_MASK) *
1732                         qp->pmtu;
1733                 len = be32_to_cpu(reth->length);
1734                 if (unlikely(offset + len != e->rdma_sge.sge_length))
1735                         goto unlock_done;
1736                 if (e->rdma_sge.mr) {
1737                         atomic_dec(&e->rdma_sge.mr->refcount);
1738                         e->rdma_sge.mr = NULL;
1739                 }
1740                 if (len != 0) {
1741                         u32 rkey = be32_to_cpu(reth->rkey);
1742                         u64 vaddr = be64_to_cpu(reth->vaddr);
1743                         int ok;
1744
1745                         ok = qib_rkey_ok(qp, &e->rdma_sge, len, vaddr, rkey,
1746                                          IB_ACCESS_REMOTE_READ);
1747                         if (unlikely(!ok))
1748                                 goto unlock_done;
1749                 } else {
1750                         e->rdma_sge.vaddr = NULL;
1751                         e->rdma_sge.length = 0;
1752                         e->rdma_sge.sge_length = 0;
1753                 }
1754                 e->psn = psn;
1755                 if (old_req)
1756                         goto unlock_done;
1757                 qp->s_tail_ack_queue = prev;
1758                 break;
1759         }
1760
1761         case OP(COMPARE_SWAP):
1762         case OP(FETCH_ADD): {
1763                 /*
1764                  * If we didn't find the atomic request in the ack queue
1765                  * or the send tasklet is already backed up to send an
1766                  * earlier entry, we can ignore this request.
1767                  */
1768                 if (!e || e->opcode != (u8) opcode || old_req)
1769                         goto unlock_done;
1770                 qp->s_tail_ack_queue = prev;
1771                 break;
1772         }
1773
1774         default:
1775                 /*
1776                  * Ignore this operation if it doesn't request an ACK
1777                  * or an earlier RDMA read or atomic is going to be resent.
1778                  */
1779                 if (!(psn & IB_BTH_REQ_ACK) || old_req)
1780                         goto unlock_done;
1781                 /*
1782                  * Resend the most recent ACK if this request is
1783                  * after all the previous RDMA reads and atomics.
1784                  */
1785                 if (i == qp->r_head_ack_queue) {
1786                         spin_unlock_irqrestore(&qp->s_lock, flags);
1787                         qp->r_nak_state = 0;
1788                         qp->r_ack_psn = qp->r_psn - 1;
1789                         goto send_ack;
1790                 }
1791                 /*
1792                  * Try to send a simple ACK to work around a Mellanox bug
1793                  * which doesn't accept a RDMA read response or atomic
1794                  * response as an ACK for earlier SENDs or RDMA writes.
1795                  */
1796                 if (!(qp->s_flags & QIB_S_RESP_PENDING)) {
1797                         spin_unlock_irqrestore(&qp->s_lock, flags);
1798                         qp->r_nak_state = 0;
1799                         qp->r_ack_psn = qp->s_ack_queue[i].psn - 1;
1800                         goto send_ack;
1801                 }
1802                 /*
1803                  * Resend the RDMA read or atomic op which
1804                  * ACKs this duplicate request.
1805                  */
1806                 qp->s_tail_ack_queue = i;
1807                 break;
1808         }
1809         qp->s_ack_state = OP(ACKNOWLEDGE);
1810         qp->s_flags |= QIB_S_RESP_PENDING;
1811         qp->r_nak_state = 0;
1812         qib_schedule_send(qp);
1813
1814 unlock_done:
1815         spin_unlock_irqrestore(&qp->s_lock, flags);
1816 done:
1817         return 1;
1818
1819 send_ack:
1820         return 0;
1821 }
1822
1823 void qib_rc_error(struct qib_qp *qp, enum ib_wc_status err)
1824 {
1825         unsigned long flags;
1826         int lastwqe;
1827
1828         spin_lock_irqsave(&qp->s_lock, flags);
1829         lastwqe = qib_error_qp(qp, err);
1830         spin_unlock_irqrestore(&qp->s_lock, flags);
1831
1832         if (lastwqe) {
1833                 struct ib_event ev;
1834
1835                 ev.device = qp->ibqp.device;
1836                 ev.element.qp = &qp->ibqp;
1837                 ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
1838                 qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
1839         }
1840 }
1841
1842 static inline void qib_update_ack_queue(struct qib_qp *qp, unsigned n)
1843 {
1844         unsigned next;
1845
1846         next = n + 1;
1847         if (next > QIB_MAX_RDMA_ATOMIC)
1848                 next = 0;
1849         qp->s_tail_ack_queue = next;
1850         qp->s_ack_state = OP(ACKNOWLEDGE);
1851 }
1852
1853 /**
1854  * qib_rc_rcv - process an incoming RC packet
1855  * @rcd: the context pointer
1856  * @hdr: the header of this packet
1857  * @has_grh: true if the header has a GRH
1858  * @data: the packet data
1859  * @tlen: the packet length
1860  * @qp: the QP for this packet
1861  *
1862  * This is called from qib_qp_rcv() to process an incoming RC packet
1863  * for the given QP.
1864  * Called at interrupt level.
1865  */
1866 void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr,
1867                 int has_grh, void *data, u32 tlen, struct qib_qp *qp)
1868 {
1869         struct qib_ibport *ibp = &rcd->ppd->ibport_data;
1870         struct qib_other_headers *ohdr;
1871         u32 opcode;
1872         u32 hdrsize;
1873         u32 psn;
1874         u32 pad;
1875         struct ib_wc wc;
1876         u32 pmtu = qp->pmtu;
1877         int diff;
1878         struct ib_reth *reth;
1879         unsigned long flags;
1880         int ret;
1881
1882         /* Check for GRH */
1883         if (!has_grh) {
1884                 ohdr = &hdr->u.oth;
1885                 hdrsize = 8 + 12;       /* LRH + BTH */
1886         } else {
1887                 ohdr = &hdr->u.l.oth;
1888                 hdrsize = 8 + 40 + 12;  /* LRH + GRH + BTH */
1889         }
1890
1891         opcode = be32_to_cpu(ohdr->bth[0]);
1892         if (qib_ruc_check_hdr(ibp, hdr, has_grh, qp, opcode))
1893                 return;
1894
1895         psn = be32_to_cpu(ohdr->bth[2]);
1896         opcode >>= 24;
1897
1898         /*
1899          * Process responses (ACKs) before anything else.  Note that the
1900          * packet sequence number will be for something in the send work
1901          * queue rather than the expected receive packet sequence number.
1902          * In other words, this QP is the requester.
1903          */
1904         if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) &&
1905             opcode <= OP(ATOMIC_ACKNOWLEDGE)) {
1906                 qib_rc_rcv_resp(ibp, ohdr, data, tlen, qp, opcode, psn,
1907                                 hdrsize, pmtu, rcd);
1908                 return;
1909         }
1910
1911         /* Compute 24 bits worth of difference. */
1912         diff = qib_cmp24(psn, qp->r_psn);
1913         if (unlikely(diff)) {
1914                 if (qib_rc_rcv_error(ohdr, data, qp, opcode, psn, diff, rcd))
1915                         return;
1916                 goto send_ack;
1917         }
1918
1919         /* Check for opcode sequence errors. */
1920         switch (qp->r_state) {
1921         case OP(SEND_FIRST):
1922         case OP(SEND_MIDDLE):
1923                 if (opcode == OP(SEND_MIDDLE) ||
1924                     opcode == OP(SEND_LAST) ||
1925                     opcode == OP(SEND_LAST_WITH_IMMEDIATE))
1926                         break;
1927                 goto nack_inv;
1928
1929         case OP(RDMA_WRITE_FIRST):
1930         case OP(RDMA_WRITE_MIDDLE):
1931                 if (opcode == OP(RDMA_WRITE_MIDDLE) ||
1932                     opcode == OP(RDMA_WRITE_LAST) ||
1933                     opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
1934                         break;
1935                 goto nack_inv;
1936
1937         default:
1938                 if (opcode == OP(SEND_MIDDLE) ||
1939                     opcode == OP(SEND_LAST) ||
1940                     opcode == OP(SEND_LAST_WITH_IMMEDIATE) ||
1941                     opcode == OP(RDMA_WRITE_MIDDLE) ||
1942                     opcode == OP(RDMA_WRITE_LAST) ||
1943                     opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
1944                         goto nack_inv;
1945                 /*
1946                  * Note that it is up to the requester to not send a new
1947                  * RDMA read or atomic operation before receiving an ACK
1948                  * for the previous operation.
1949                  */
1950                 break;
1951         }
1952
1953         if (qp->state == IB_QPS_RTR && !(qp->r_flags & QIB_R_COMM_EST)) {
1954                 qp->r_flags |= QIB_R_COMM_EST;
1955                 if (qp->ibqp.event_handler) {
1956                         struct ib_event ev;
1957
1958                         ev.device = qp->ibqp.device;
1959                         ev.element.qp = &qp->ibqp;
1960                         ev.event = IB_EVENT_COMM_EST;
1961                         qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
1962                 }
1963         }
1964
1965         /* OK, process the packet. */
1966         switch (opcode) {
1967         case OP(SEND_FIRST):
1968                 ret = qib_get_rwqe(qp, 0);
1969                 if (ret < 0)
1970                         goto nack_op_err;
1971                 if (!ret)
1972                         goto rnr_nak;
1973                 qp->r_rcv_len = 0;
1974                 /* FALLTHROUGH */
1975         case OP(SEND_MIDDLE):
1976         case OP(RDMA_WRITE_MIDDLE):
1977 send_middle:
1978                 /* Check for invalid length PMTU or posted rwqe len. */
1979                 if (unlikely(tlen != (hdrsize + pmtu + 4)))
1980                         goto nack_inv;
1981                 qp->r_rcv_len += pmtu;
1982                 if (unlikely(qp->r_rcv_len > qp->r_len))
1983                         goto nack_inv;
1984                 qib_copy_sge(&qp->r_sge, data, pmtu, 1);
1985                 break;
1986
1987         case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
1988                 /* consume RWQE */
1989                 ret = qib_get_rwqe(qp, 1);
1990                 if (ret < 0)
1991                         goto nack_op_err;
1992                 if (!ret)
1993                         goto rnr_nak;
1994                 goto send_last_imm;
1995
1996         case OP(SEND_ONLY):
1997         case OP(SEND_ONLY_WITH_IMMEDIATE):
1998                 ret = qib_get_rwqe(qp, 0);
1999                 if (ret < 0)
2000                         goto nack_op_err;
2001                 if (!ret)
2002                         goto rnr_nak;
2003                 qp->r_rcv_len = 0;
2004                 if (opcode == OP(SEND_ONLY))
2005                         goto no_immediate_data;
2006                 /* FALLTHROUGH for SEND_ONLY_WITH_IMMEDIATE */
2007         case OP(SEND_LAST_WITH_IMMEDIATE):
2008 send_last_imm:
2009                 wc.ex.imm_data = ohdr->u.imm_data;
2010                 hdrsize += 4;
2011                 wc.wc_flags = IB_WC_WITH_IMM;
2012                 goto send_last;
2013         case OP(SEND_LAST):
2014         case OP(RDMA_WRITE_LAST):
2015 no_immediate_data:
2016                 wc.wc_flags = 0;
2017                 wc.ex.imm_data = 0;
2018 send_last:
2019                 /* Get the number of bytes the message was padded by. */
2020                 pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
2021                 /* Check for invalid length. */
2022                 /* XXX LAST len should be >= 1 */
2023                 if (unlikely(tlen < (hdrsize + pad + 4)))
2024                         goto nack_inv;
2025                 /* Don't count the CRC. */
2026                 tlen -= (hdrsize + pad + 4);
2027                 wc.byte_len = tlen + qp->r_rcv_len;
2028                 if (unlikely(wc.byte_len > qp->r_len))
2029                         goto nack_inv;
2030                 qib_copy_sge(&qp->r_sge, data, tlen, 1);
2031                 while (qp->r_sge.num_sge) {
2032                         atomic_dec(&qp->r_sge.sge.mr->refcount);
2033                         if (--qp->r_sge.num_sge)
2034                                 qp->r_sge.sge = *qp->r_sge.sg_list++;
2035                 }
2036                 qp->r_msn++;
2037                 if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags))
2038                         break;
2039                 wc.wr_id = qp->r_wr_id;
2040                 wc.status = IB_WC_SUCCESS;
2041                 if (opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE) ||
2042                     opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE))
2043                         wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
2044                 else
2045                         wc.opcode = IB_WC_RECV;
2046                 wc.qp = &qp->ibqp;
2047                 wc.src_qp = qp->remote_qpn;
2048                 wc.slid = qp->remote_ah_attr.dlid;
2049                 wc.sl = qp->remote_ah_attr.sl;
2050                 /* zero fields that are N/A */
2051                 wc.vendor_err = 0;
2052                 wc.pkey_index = 0;
2053                 wc.dlid_path_bits = 0;
2054                 wc.port_num = 0;
2055                 wc.csum_ok = 0;
2056                 /* Signal completion event if the solicited bit is set. */
2057                 qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
2058                              (ohdr->bth[0] &
2059                               cpu_to_be32(IB_BTH_SOLICITED)) != 0);
2060                 break;
2061
2062         case OP(RDMA_WRITE_FIRST):
2063         case OP(RDMA_WRITE_ONLY):
2064         case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE):
2065                 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
2066                         goto nack_inv;
2067                 /* consume RWQE */
2068                 reth = &ohdr->u.rc.reth;
2069                 hdrsize += sizeof(*reth);
2070                 qp->r_len = be32_to_cpu(reth->length);
2071                 qp->r_rcv_len = 0;
2072                 qp->r_sge.sg_list = NULL;
2073                 if (qp->r_len != 0) {
2074                         u32 rkey = be32_to_cpu(reth->rkey);
2075                         u64 vaddr = be64_to_cpu(reth->vaddr);
2076                         int ok;
2077
2078                         /* Check rkey & NAK */
2079                         ok = qib_rkey_ok(qp, &qp->r_sge.sge, qp->r_len, vaddr,
2080                                          rkey, IB_ACCESS_REMOTE_WRITE);
2081                         if (unlikely(!ok))
2082                                 goto nack_acc;
2083                         qp->r_sge.num_sge = 1;
2084                 } else {
2085                         qp->r_sge.num_sge = 0;
2086                         qp->r_sge.sge.mr = NULL;
2087                         qp->r_sge.sge.vaddr = NULL;
2088                         qp->r_sge.sge.length = 0;
2089                         qp->r_sge.sge.sge_length = 0;
2090                 }
2091                 if (opcode == OP(RDMA_WRITE_FIRST))
2092                         goto send_middle;
2093                 else if (opcode == OP(RDMA_WRITE_ONLY))
2094                         goto no_immediate_data;
2095                 ret = qib_get_rwqe(qp, 1);
2096                 if (ret < 0)
2097                         goto nack_op_err;
2098                 if (!ret)
2099                         goto rnr_nak;
2100                 wc.ex.imm_data = ohdr->u.rc.imm_data;
2101                 hdrsize += 4;
2102                 wc.wc_flags = IB_WC_WITH_IMM;
2103                 goto send_last;
2104
2105         case OP(RDMA_READ_REQUEST): {
2106                 struct qib_ack_entry *e;
2107                 u32 len;
2108                 u8 next;
2109
2110                 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
2111                         goto nack_inv;
2112                 next = qp->r_head_ack_queue + 1;
2113                 /* s_ack_queue is size QIB_MAX_RDMA_ATOMIC+1 so use > not >= */
2114                 if (next > QIB_MAX_RDMA_ATOMIC)
2115                         next = 0;
2116                 spin_lock_irqsave(&qp->s_lock, flags);
2117                 if (unlikely(next == qp->s_tail_ack_queue)) {
2118                         if (!qp->s_ack_queue[next].sent)
2119                                 goto nack_inv_unlck;
2120                         qib_update_ack_queue(qp, next);
2121                 }
2122                 e = &qp->s_ack_queue[qp->r_head_ack_queue];
2123                 if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) {
2124                         atomic_dec(&e->rdma_sge.mr->refcount);
2125                         e->rdma_sge.mr = NULL;
2126                 }
2127                 reth = &ohdr->u.rc.reth;
2128                 len = be32_to_cpu(reth->length);
2129                 if (len) {
2130                         u32 rkey = be32_to_cpu(reth->rkey);
2131                         u64 vaddr = be64_to_cpu(reth->vaddr);
2132                         int ok;
2133
2134                         /* Check rkey & NAK */
2135                         ok = qib_rkey_ok(qp, &e->rdma_sge, len, vaddr,
2136                                          rkey, IB_ACCESS_REMOTE_READ);
2137                         if (unlikely(!ok))
2138                                 goto nack_acc_unlck;
2139                         /*
2140                          * Update the next expected PSN.  We add 1 later
2141                          * below, so only add the remainder here.
2142                          */
2143                         if (len > pmtu)
2144                                 qp->r_psn += (len - 1) / pmtu;
2145                 } else {
2146                         e->rdma_sge.mr = NULL;
2147                         e->rdma_sge.vaddr = NULL;
2148                         e->rdma_sge.length = 0;
2149                         e->rdma_sge.sge_length = 0;
2150                 }
2151                 e->opcode = opcode;
2152                 e->sent = 0;
2153                 e->psn = psn;
2154                 e->lpsn = qp->r_psn;
2155                 /*
2156                  * We need to increment the MSN here instead of when we
2157                  * finish sending the result since a duplicate request would
2158                  * increment it more than once.
2159                  */
2160                 qp->r_msn++;
2161                 qp->r_psn++;
2162                 qp->r_state = opcode;
2163                 qp->r_nak_state = 0;
2164                 qp->r_head_ack_queue = next;
2165
2166                 /* Schedule the send tasklet. */
2167                 qp->s_flags |= QIB_S_RESP_PENDING;
2168                 qib_schedule_send(qp);
2169
2170                 goto sunlock;
2171         }
2172
2173         case OP(COMPARE_SWAP):
2174         case OP(FETCH_ADD): {
2175                 struct ib_atomic_eth *ateth;
2176                 struct qib_ack_entry *e;
2177                 u64 vaddr;
2178                 atomic64_t *maddr;
2179                 u64 sdata;
2180                 u32 rkey;
2181                 u8 next;
2182
2183                 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
2184                         goto nack_inv;
2185                 next = qp->r_head_ack_queue + 1;
2186                 if (next > QIB_MAX_RDMA_ATOMIC)
2187                         next = 0;
2188                 spin_lock_irqsave(&qp->s_lock, flags);
2189                 if (unlikely(next == qp->s_tail_ack_queue)) {
2190                         if (!qp->s_ack_queue[next].sent)
2191                                 goto nack_inv_unlck;
2192                         qib_update_ack_queue(qp, next);
2193                 }
2194                 e = &qp->s_ack_queue[qp->r_head_ack_queue];
2195                 if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) {
2196                         atomic_dec(&e->rdma_sge.mr->refcount);
2197                         e->rdma_sge.mr = NULL;
2198                 }
2199                 ateth = &ohdr->u.atomic_eth;
2200                 vaddr = ((u64) be32_to_cpu(ateth->vaddr[0]) << 32) |
2201                         be32_to_cpu(ateth->vaddr[1]);
2202                 if (unlikely(vaddr & (sizeof(u64) - 1)))
2203                         goto nack_inv_unlck;
2204                 rkey = be32_to_cpu(ateth->rkey);
2205                 /* Check rkey & NAK */
2206                 if (unlikely(!qib_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
2207                                           vaddr, rkey,
2208                                           IB_ACCESS_REMOTE_ATOMIC)))
2209                         goto nack_acc_unlck;
2210                 /* Perform atomic OP and save result. */
2211                 maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
2212                 sdata = be64_to_cpu(ateth->swap_data);
2213                 e->atomic_data = (opcode == OP(FETCH_ADD)) ?
2214                         (u64) atomic64_add_return(sdata, maddr) - sdata :
2215                         (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
2216                                       be64_to_cpu(ateth->compare_data),
2217                                       sdata);
2218                 atomic_dec(&qp->r_sge.sge.mr->refcount);
2219                 qp->r_sge.num_sge = 0;
2220                 e->opcode = opcode;
2221                 e->sent = 0;
2222                 e->psn = psn;
2223                 e->lpsn = psn;
2224                 qp->r_msn++;
2225                 qp->r_psn++;
2226                 qp->r_state = opcode;
2227                 qp->r_nak_state = 0;
2228                 qp->r_head_ack_queue = next;
2229
2230                 /* Schedule the send tasklet. */
2231                 qp->s_flags |= QIB_S_RESP_PENDING;
2232                 qib_schedule_send(qp);
2233
2234                 goto sunlock;
2235         }
2236
2237         default:
2238                 /* NAK unknown opcodes. */
2239                 goto nack_inv;
2240         }
2241         qp->r_psn++;
2242         qp->r_state = opcode;
2243         qp->r_ack_psn = psn;
2244         qp->r_nak_state = 0;
2245         /* Send an ACK if requested or required. */
2246         if (psn & (1 << 31))
2247                 goto send_ack;
2248         return;
2249
2250 rnr_nak:
2251         qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer;
2252         qp->r_ack_psn = qp->r_psn;
2253         /* Queue RNR NAK for later */
2254         if (list_empty(&qp->rspwait)) {
2255                 qp->r_flags |= QIB_R_RSP_NAK;
2256                 atomic_inc(&qp->refcount);
2257                 list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
2258         }
2259         return;
2260
2261 nack_op_err:
2262         qib_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
2263         qp->r_nak_state = IB_NAK_REMOTE_OPERATIONAL_ERROR;
2264         qp->r_ack_psn = qp->r_psn;
2265         /* Queue NAK for later */
2266         if (list_empty(&qp->rspwait)) {
2267                 qp->r_flags |= QIB_R_RSP_NAK;
2268                 atomic_inc(&qp->refcount);
2269                 list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
2270         }
2271         return;
2272
2273 nack_inv_unlck:
2274         spin_unlock_irqrestore(&qp->s_lock, flags);
2275 nack_inv:
2276         qib_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
2277         qp->r_nak_state = IB_NAK_INVALID_REQUEST;
2278         qp->r_ack_psn = qp->r_psn;
2279         /* Queue NAK for later */
2280         if (list_empty(&qp->rspwait)) {
2281                 qp->r_flags |= QIB_R_RSP_NAK;
2282                 atomic_inc(&qp->refcount);
2283                 list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
2284         }
2285         return;
2286
2287 nack_acc_unlck:
2288         spin_unlock_irqrestore(&qp->s_lock, flags);
2289 nack_acc:
2290         qib_rc_error(qp, IB_WC_LOC_PROT_ERR);
2291         qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
2292         qp->r_ack_psn = qp->r_psn;
2293 send_ack:
2294         qib_send_rc_ack(qp);
2295         return;
2296
2297 sunlock:
2298         spin_unlock_irqrestore(&qp->s_lock, flags);
2299 }