IB/ipath: Fix problem with next WQE after a UC completion
[pandora-kernel.git] / drivers / infiniband / hw / ipath / ipath_uc.c
1 /*
2  * Copyright (c) 2006 QLogic, Inc. All rights reserved.
3  * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
4  *
5  * This software is available to you under a choice of one of two
6  * licenses.  You may choose to be licensed under the terms of the GNU
7  * General Public License (GPL) Version 2, available from the file
8  * COPYING in the main directory of this source tree, or the
9  * OpenIB.org BSD license below:
10  *
11  *     Redistribution and use in source and binary forms, with or
12  *     without modification, are permitted provided that the following
13  *     conditions are met:
14  *
15  *      - Redistributions of source code must retain the above
16  *        copyright notice, this list of conditions and the following
17  *        disclaimer.
18  *
19  *      - Redistributions in binary form must reproduce the above
20  *        copyright notice, this list of conditions and the following
21  *        disclaimer in the documentation and/or other materials
22  *        provided with the distribution.
23  *
24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31  * SOFTWARE.
32  */
33
34 #include "ipath_verbs.h"
35 #include "ipath_kernel.h"
36
37 /* cut down ridiculously long IB macro names */
38 #define OP(x) IB_OPCODE_UC_##x
39
40 static void complete_last_send(struct ipath_qp *qp, struct ipath_swqe *wqe,
41                                struct ib_wc *wc)
42 {
43         if (++qp->s_last == qp->s_size)
44                 qp->s_last = 0;
45         if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
46             (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
47                 wc->wr_id = wqe->wr.wr_id;
48                 wc->status = IB_WC_SUCCESS;
49                 wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
50                 wc->vendor_err = 0;
51                 wc->byte_len = wqe->length;
52                 wc->qp = &qp->ibqp;
53                 wc->src_qp = qp->remote_qpn;
54                 wc->pkey_index = 0;
55                 wc->slid = qp->remote_ah_attr.dlid;
56                 wc->sl = qp->remote_ah_attr.sl;
57                 wc->dlid_path_bits = 0;
58                 wc->port_num = 0;
59                 ipath_cq_enter(to_icq(qp->ibqp.send_cq), wc, 0);
60         }
61 }
62
63 /**
64  * ipath_make_uc_req - construct a request packet (SEND, RDMA write)
65  * @qp: a pointer to the QP
66  * @ohdr: a pointer to the IB header being constructed
67  * @pmtu: the path MTU
68  * @bth0p: pointer to the BTH opcode word
69  * @bth2p: pointer to the BTH PSN word
70  *
71  * Return 1 if constructed; otherwise, return 0.
72  * Note the QP s_lock must be held and interrupts disabled.
73  */
74 int ipath_make_uc_req(struct ipath_qp *qp,
75                       struct ipath_other_headers *ohdr,
76                       u32 pmtu, u32 *bth0p, u32 *bth2p)
77 {
78         struct ipath_swqe *wqe;
79         u32 hwords;
80         u32 bth0;
81         u32 len;
82         struct ib_wc wc;
83
84         if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK))
85                 goto done;
86
87         /* header size in 32-bit words LRH+BTH = (8+12)/4. */
88         hwords = 5;
89         bth0 = 0;
90
91         /* Get the next send request. */
92         wqe = get_swqe_ptr(qp, qp->s_last);
93         switch (qp->s_state) {
94         default:
95                 /*
96                  * Signal the completion of the last send
97                  * (if there is one).
98                  */
99                 if (qp->s_last != qp->s_tail) {
100                         complete_last_send(qp, wqe, &wc);
101                         wqe = get_swqe_ptr(qp, qp->s_last);
102                 }
103
104                 /* Check if send work queue is empty. */
105                 if (qp->s_tail == qp->s_head)
106                         goto done;
107                 /*
108                  * Start a new request.
109                  */
110                 qp->s_psn = wqe->psn = qp->s_next_psn;
111                 qp->s_sge.sge = wqe->sg_list[0];
112                 qp->s_sge.sg_list = wqe->sg_list + 1;
113                 qp->s_sge.num_sge = wqe->wr.num_sge;
114                 qp->s_len = len = wqe->length;
115                 switch (wqe->wr.opcode) {
116                 case IB_WR_SEND:
117                 case IB_WR_SEND_WITH_IMM:
118                         if (len > pmtu) {
119                                 qp->s_state = OP(SEND_FIRST);
120                                 len = pmtu;
121                                 break;
122                         }
123                         if (wqe->wr.opcode == IB_WR_SEND)
124                                 qp->s_state = OP(SEND_ONLY);
125                         else {
126                                 qp->s_state =
127                                         OP(SEND_ONLY_WITH_IMMEDIATE);
128                                 /* Immediate data comes after the BTH */
129                                 ohdr->u.imm_data = wqe->wr.imm_data;
130                                 hwords += 1;
131                         }
132                         if (wqe->wr.send_flags & IB_SEND_SOLICITED)
133                                 bth0 |= 1 << 23;
134                         break;
135
136                 case IB_WR_RDMA_WRITE:
137                 case IB_WR_RDMA_WRITE_WITH_IMM:
138                         ohdr->u.rc.reth.vaddr =
139                                 cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
140                         ohdr->u.rc.reth.rkey =
141                                 cpu_to_be32(wqe->wr.wr.rdma.rkey);
142                         ohdr->u.rc.reth.length = cpu_to_be32(len);
143                         hwords += sizeof(struct ib_reth) / 4;
144                         if (len > pmtu) {
145                                 qp->s_state = OP(RDMA_WRITE_FIRST);
146                                 len = pmtu;
147                                 break;
148                         }
149                         if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
150                                 qp->s_state = OP(RDMA_WRITE_ONLY);
151                         else {
152                                 qp->s_state =
153                                         OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
154                                 /* Immediate data comes after the RETH */
155                                 ohdr->u.rc.imm_data = wqe->wr.imm_data;
156                                 hwords += 1;
157                                 if (wqe->wr.send_flags & IB_SEND_SOLICITED)
158                                         bth0 |= 1 << 23;
159                         }
160                         break;
161
162                 default:
163                         goto done;
164                 }
165                 if (++qp->s_tail >= qp->s_size)
166                         qp->s_tail = 0;
167                 break;
168
169         case OP(SEND_FIRST):
170                 qp->s_state = OP(SEND_MIDDLE);
171                 /* FALLTHROUGH */
172         case OP(SEND_MIDDLE):
173                 len = qp->s_len;
174                 if (len > pmtu) {
175                         len = pmtu;
176                         break;
177                 }
178                 if (wqe->wr.opcode == IB_WR_SEND)
179                         qp->s_state = OP(SEND_LAST);
180                 else {
181                         qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE);
182                         /* Immediate data comes after the BTH */
183                         ohdr->u.imm_data = wqe->wr.imm_data;
184                         hwords += 1;
185                 }
186                 if (wqe->wr.send_flags & IB_SEND_SOLICITED)
187                         bth0 |= 1 << 23;
188                 break;
189
190         case OP(RDMA_WRITE_FIRST):
191                 qp->s_state = OP(RDMA_WRITE_MIDDLE);
192                 /* FALLTHROUGH */
193         case OP(RDMA_WRITE_MIDDLE):
194                 len = qp->s_len;
195                 if (len > pmtu) {
196                         len = pmtu;
197                         break;
198                 }
199                 if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
200                         qp->s_state = OP(RDMA_WRITE_LAST);
201                 else {
202                         qp->s_state =
203                                 OP(RDMA_WRITE_LAST_WITH_IMMEDIATE);
204                         /* Immediate data comes after the BTH */
205                         ohdr->u.imm_data = wqe->wr.imm_data;
206                         hwords += 1;
207                         if (wqe->wr.send_flags & IB_SEND_SOLICITED)
208                                 bth0 |= 1 << 23;
209                 }
210                 break;
211         }
212         qp->s_len -= len;
213         qp->s_hdrwords = hwords;
214         qp->s_cur_sge = &qp->s_sge;
215         qp->s_cur_size = len;
216         *bth0p = bth0 | (qp->s_state << 24);
217         *bth2p = qp->s_next_psn++ & IPATH_PSN_MASK;
218         return 1;
219
220 done:
221         return 0;
222 }
223
224 /**
225  * ipath_uc_rcv - handle an incoming UC packet
226  * @dev: the device the packet came in on
227  * @hdr: the header of the packet
228  * @has_grh: true if the packet has a GRH
229  * @data: the packet data
230  * @tlen: the length of the packet
231  * @qp: the QP for this packet.
232  *
233  * This is called from ipath_qp_rcv() to process an incoming UC packet
234  * for the given QP.
235  * Called at interrupt level.
236  */
237 void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
238                   int has_grh, void *data, u32 tlen, struct ipath_qp *qp)
239 {
240         struct ipath_other_headers *ohdr;
241         int opcode;
242         u32 hdrsize;
243         u32 psn;
244         u32 pad;
245         struct ib_wc wc;
246         u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
247         struct ib_reth *reth;
248         int header_in_data;
249
250         /* Validate the SLID. See Ch. 9.6.1.5 */
251         if (unlikely(be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid))
252                 goto done;
253
254         /* Check for GRH */
255         if (!has_grh) {
256                 ohdr = &hdr->u.oth;
257                 hdrsize = 8 + 12;       /* LRH + BTH */
258                 psn = be32_to_cpu(ohdr->bth[2]);
259                 header_in_data = 0;
260         } else {
261                 ohdr = &hdr->u.l.oth;
262                 hdrsize = 8 + 40 + 12;  /* LRH + GRH + BTH */
263                 /*
264                  * The header with GRH is 60 bytes and the
265                  * core driver sets the eager header buffer
266                  * size to 56 bytes so the last 4 bytes of
267                  * the BTH header (PSN) is in the data buffer.
268                  */
269                 header_in_data = dev->dd->ipath_rcvhdrentsize == 16;
270                 if (header_in_data) {
271                         psn = be32_to_cpu(((__be32 *) data)[0]);
272                         data += sizeof(__be32);
273                 } else
274                         psn = be32_to_cpu(ohdr->bth[2]);
275         }
276         /*
277          * The opcode is in the low byte when its in network order
278          * (top byte when in host order).
279          */
280         opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
281
282         wc.imm_data = 0;
283         wc.wc_flags = 0;
284
285         /* Compare the PSN verses the expected PSN. */
286         if (unlikely(ipath_cmp24(psn, qp->r_psn) != 0)) {
287                 /*
288                  * Handle a sequence error.
289                  * Silently drop any current message.
290                  */
291                 qp->r_psn = psn;
292         inv:
293                 qp->r_state = OP(SEND_LAST);
294                 switch (opcode) {
295                 case OP(SEND_FIRST):
296                 case OP(SEND_ONLY):
297                 case OP(SEND_ONLY_WITH_IMMEDIATE):
298                         goto send_first;
299
300                 case OP(RDMA_WRITE_FIRST):
301                 case OP(RDMA_WRITE_ONLY):
302                 case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE):
303                         goto rdma_first;
304
305                 default:
306                         dev->n_pkt_drops++;
307                         goto done;
308                 }
309         }
310
311         /* Check for opcode sequence errors. */
312         switch (qp->r_state) {
313         case OP(SEND_FIRST):
314         case OP(SEND_MIDDLE):
315                 if (opcode == OP(SEND_MIDDLE) ||
316                     opcode == OP(SEND_LAST) ||
317                     opcode == OP(SEND_LAST_WITH_IMMEDIATE))
318                         break;
319                 goto inv;
320
321         case OP(RDMA_WRITE_FIRST):
322         case OP(RDMA_WRITE_MIDDLE):
323                 if (opcode == OP(RDMA_WRITE_MIDDLE) ||
324                     opcode == OP(RDMA_WRITE_LAST) ||
325                     opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
326                         break;
327                 goto inv;
328
329         default:
330                 if (opcode == OP(SEND_FIRST) ||
331                     opcode == OP(SEND_ONLY) ||
332                     opcode == OP(SEND_ONLY_WITH_IMMEDIATE) ||
333                     opcode == OP(RDMA_WRITE_FIRST) ||
334                     opcode == OP(RDMA_WRITE_ONLY) ||
335                     opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE))
336                         break;
337                 goto inv;
338         }
339
340         /* OK, process the packet. */
341         switch (opcode) {
342         case OP(SEND_FIRST):
343         case OP(SEND_ONLY):
344         case OP(SEND_ONLY_WITH_IMMEDIATE):
345         send_first:
346                 if (qp->r_reuse_sge) {
347                         qp->r_reuse_sge = 0;
348                         qp->r_sge = qp->s_rdma_read_sge;
349                 } else if (!ipath_get_rwqe(qp, 0)) {
350                         dev->n_pkt_drops++;
351                         goto done;
352                 }
353                 /* Save the WQE so we can reuse it in case of an error. */
354                 qp->s_rdma_read_sge = qp->r_sge;
355                 qp->r_rcv_len = 0;
356                 if (opcode == OP(SEND_ONLY))
357                         goto send_last;
358                 else if (opcode == OP(SEND_ONLY_WITH_IMMEDIATE))
359                         goto send_last_imm;
360                 /* FALLTHROUGH */
361         case OP(SEND_MIDDLE):
362                 /* Check for invalid length PMTU or posted rwqe len. */
363                 if (unlikely(tlen != (hdrsize + pmtu + 4))) {
364                         qp->r_reuse_sge = 1;
365                         dev->n_pkt_drops++;
366                         goto done;
367                 }
368                 qp->r_rcv_len += pmtu;
369                 if (unlikely(qp->r_rcv_len > qp->r_len)) {
370                         qp->r_reuse_sge = 1;
371                         dev->n_pkt_drops++;
372                         goto done;
373                 }
374                 ipath_copy_sge(&qp->r_sge, data, pmtu);
375                 break;
376
377         case OP(SEND_LAST_WITH_IMMEDIATE):
378         send_last_imm:
379                 if (header_in_data) {
380                         wc.imm_data = *(__be32 *) data;
381                         data += sizeof(__be32);
382                 } else {
383                         /* Immediate data comes after BTH */
384                         wc.imm_data = ohdr->u.imm_data;
385                 }
386                 hdrsize += 4;
387                 wc.wc_flags = IB_WC_WITH_IMM;
388                 /* FALLTHROUGH */
389         case OP(SEND_LAST):
390         send_last:
391                 /* Get the number of bytes the message was padded by. */
392                 pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
393                 /* Check for invalid length. */
394                 /* XXX LAST len should be >= 1 */
395                 if (unlikely(tlen < (hdrsize + pad + 4))) {
396                         qp->r_reuse_sge = 1;
397                         dev->n_pkt_drops++;
398                         goto done;
399                 }
400                 /* Don't count the CRC. */
401                 tlen -= (hdrsize + pad + 4);
402                 wc.byte_len = tlen + qp->r_rcv_len;
403                 if (unlikely(wc.byte_len > qp->r_len)) {
404                         qp->r_reuse_sge = 1;
405                         dev->n_pkt_drops++;
406                         goto done;
407                 }
408                 /* XXX Need to free SGEs */
409         last_imm:
410                 ipath_copy_sge(&qp->r_sge, data, tlen);
411                 wc.wr_id = qp->r_wr_id;
412                 wc.status = IB_WC_SUCCESS;
413                 wc.opcode = IB_WC_RECV;
414                 wc.vendor_err = 0;
415                 wc.qp = &qp->ibqp;
416                 wc.src_qp = qp->remote_qpn;
417                 wc.pkey_index = 0;
418                 wc.slid = qp->remote_ah_attr.dlid;
419                 wc.sl = qp->remote_ah_attr.sl;
420                 wc.dlid_path_bits = 0;
421                 wc.port_num = 0;
422                 /* Signal completion event if the solicited bit is set. */
423                 ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
424                                (ohdr->bth[0] &
425                                 __constant_cpu_to_be32(1 << 23)) != 0);
426                 break;
427
428         case OP(RDMA_WRITE_FIRST):
429         case OP(RDMA_WRITE_ONLY):
430         case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE): /* consume RWQE */
431         rdma_first:
432                 /* RETH comes after BTH */
433                 if (!header_in_data)
434                         reth = &ohdr->u.rc.reth;
435                 else {
436                         reth = (struct ib_reth *)data;
437                         data += sizeof(*reth);
438                 }
439                 hdrsize += sizeof(*reth);
440                 qp->r_len = be32_to_cpu(reth->length);
441                 qp->r_rcv_len = 0;
442                 if (qp->r_len != 0) {
443                         u32 rkey = be32_to_cpu(reth->rkey);
444                         u64 vaddr = be64_to_cpu(reth->vaddr);
445                         int ok;
446
447                         /* Check rkey */
448                         ok = ipath_rkey_ok(qp, &qp->r_sge, qp->r_len,
449                                            vaddr, rkey,
450                                            IB_ACCESS_REMOTE_WRITE);
451                         if (unlikely(!ok)) {
452                                 dev->n_pkt_drops++;
453                                 goto done;
454                         }
455                 } else {
456                         qp->r_sge.sg_list = NULL;
457                         qp->r_sge.sge.mr = NULL;
458                         qp->r_sge.sge.vaddr = NULL;
459                         qp->r_sge.sge.length = 0;
460                         qp->r_sge.sge.sge_length = 0;
461                 }
462                 if (unlikely(!(qp->qp_access_flags &
463                                IB_ACCESS_REMOTE_WRITE))) {
464                         dev->n_pkt_drops++;
465                         goto done;
466                 }
467                 if (opcode == OP(RDMA_WRITE_ONLY))
468                         goto rdma_last;
469                 else if (opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE))
470                         goto rdma_last_imm;
471                 /* FALLTHROUGH */
472         case OP(RDMA_WRITE_MIDDLE):
473                 /* Check for invalid length PMTU or posted rwqe len. */
474                 if (unlikely(tlen != (hdrsize + pmtu + 4))) {
475                         dev->n_pkt_drops++;
476                         goto done;
477                 }
478                 qp->r_rcv_len += pmtu;
479                 if (unlikely(qp->r_rcv_len > qp->r_len)) {
480                         dev->n_pkt_drops++;
481                         goto done;
482                 }
483                 ipath_copy_sge(&qp->r_sge, data, pmtu);
484                 break;
485
486         case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
487         rdma_last_imm:
488                 /* Get the number of bytes the message was padded by. */
489                 pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
490                 /* Check for invalid length. */
491                 /* XXX LAST len should be >= 1 */
492                 if (unlikely(tlen < (hdrsize + pad + 4))) {
493                         dev->n_pkt_drops++;
494                         goto done;
495                 }
496                 /* Don't count the CRC. */
497                 tlen -= (hdrsize + pad + 4);
498                 if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) {
499                         dev->n_pkt_drops++;
500                         goto done;
501                 }
502                 if (qp->r_reuse_sge)
503                         qp->r_reuse_sge = 0;
504                 else if (!ipath_get_rwqe(qp, 1)) {
505                         dev->n_pkt_drops++;
506                         goto done;
507                 }
508                 if (header_in_data) {
509                         wc.imm_data = *(__be32 *) data;
510                         data += sizeof(__be32);
511                 } else {
512                         /* Immediate data comes after BTH */
513                         wc.imm_data = ohdr->u.imm_data;
514                 }
515                 hdrsize += 4;
516                 wc.wc_flags = IB_WC_WITH_IMM;
517                 wc.byte_len = 0;
518                 goto last_imm;
519
520         case OP(RDMA_WRITE_LAST):
521         rdma_last:
522                 /* Get the number of bytes the message was padded by. */
523                 pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
524                 /* Check for invalid length. */
525                 /* XXX LAST len should be >= 1 */
526                 if (unlikely(tlen < (hdrsize + pad + 4))) {
527                         dev->n_pkt_drops++;
528                         goto done;
529                 }
530                 /* Don't count the CRC. */
531                 tlen -= (hdrsize + pad + 4);
532                 if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) {
533                         dev->n_pkt_drops++;
534                         goto done;
535                 }
536                 ipath_copy_sge(&qp->r_sge, data, tlen);
537                 break;
538
539         default:
540                 /* Drop packet for unknown opcodes. */
541                 dev->n_pkt_drops++;
542                 goto done;
543         }
544         qp->r_psn++;
545         qp->r_state = opcode;
546 done:
547         return;
548 }