Merge master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6
[pandora-kernel.git] / drivers / infiniband / hw / ipath / ipath_uc.c
1 /*
2  * Copyright (c) 2006 QLogic, Inc. All rights reserved.
3  * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
4  *
5  * This software is available to you under a choice of one of two
6  * licenses.  You may choose to be licensed under the terms of the GNU
7  * General Public License (GPL) Version 2, available from the file
8  * COPYING in the main directory of this source tree, or the
9  * OpenIB.org BSD license below:
10  *
11  *     Redistribution and use in source and binary forms, with or
12  *     without modification, are permitted provided that the following
13  *     conditions are met:
14  *
15  *      - Redistributions of source code must retain the above
16  *        copyright notice, this list of conditions and the following
17  *        disclaimer.
18  *
19  *      - Redistributions in binary form must reproduce the above
20  *        copyright notice, this list of conditions and the following
21  *        disclaimer in the documentation and/or other materials
22  *        provided with the distribution.
23  *
24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31  * SOFTWARE.
32  */
33
34 #include "ipath_verbs.h"
35 #include "ipath_kernel.h"
36
37 /* cut down ridiculously long IB macro names */
38 #define OP(x) IB_OPCODE_UC_##x
39
40 static void complete_last_send(struct ipath_qp *qp, struct ipath_swqe *wqe,
41                                struct ib_wc *wc)
42 {
43         if (++qp->s_last == qp->s_size)
44                 qp->s_last = 0;
45         if (!test_bit(IPATH_S_SIGNAL_REQ_WR, &qp->s_flags) ||
46             (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
47                 wc->wr_id = wqe->wr.wr_id;
48                 wc->status = IB_WC_SUCCESS;
49                 wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
50                 wc->vendor_err = 0;
51                 wc->byte_len = wqe->length;
52                 wc->qp_num = qp->ibqp.qp_num;
53                 wc->src_qp = qp->remote_qpn;
54                 wc->pkey_index = 0;
55                 wc->slid = qp->remote_ah_attr.dlid;
56                 wc->sl = qp->remote_ah_attr.sl;
57                 wc->dlid_path_bits = 0;
58                 wc->port_num = 0;
59                 ipath_cq_enter(to_icq(qp->ibqp.send_cq), wc, 0);
60         }
61         wqe = get_swqe_ptr(qp, qp->s_last);
62 }
63
64 /**
65  * ipath_make_uc_req - construct a request packet (SEND, RDMA write)
66  * @qp: a pointer to the QP
67  * @ohdr: a pointer to the IB header being constructed
68  * @pmtu: the path MTU
69  * @bth0p: pointer to the BTH opcode word
70  * @bth2p: pointer to the BTH PSN word
71  *
72  * Return 1 if constructed; otherwise, return 0.
73  * Note the QP s_lock must be held and interrupts disabled.
74  */
75 int ipath_make_uc_req(struct ipath_qp *qp,
76                       struct ipath_other_headers *ohdr,
77                       u32 pmtu, u32 *bth0p, u32 *bth2p)
78 {
79         struct ipath_swqe *wqe;
80         u32 hwords;
81         u32 bth0;
82         u32 len;
83         struct ib_wc wc;
84
85         if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK))
86                 goto done;
87
88         /* header size in 32-bit words LRH+BTH = (8+12)/4. */
89         hwords = 5;
90         bth0 = 0;
91
92         /* Get the next send request. */
93         wqe = get_swqe_ptr(qp, qp->s_last);
94         switch (qp->s_state) {
95         default:
96                 /*
97                  * Signal the completion of the last send
98                  * (if there is one).
99                  */
100                 if (qp->s_last != qp->s_tail)
101                         complete_last_send(qp, wqe, &wc);
102
103                 /* Check if send work queue is empty. */
104                 if (qp->s_tail == qp->s_head)
105                         goto done;
106                 /*
107                  * Start a new request.
108                  */
109                 qp->s_psn = wqe->psn = qp->s_next_psn;
110                 qp->s_sge.sge = wqe->sg_list[0];
111                 qp->s_sge.sg_list = wqe->sg_list + 1;
112                 qp->s_sge.num_sge = wqe->wr.num_sge;
113                 qp->s_len = len = wqe->length;
114                 switch (wqe->wr.opcode) {
115                 case IB_WR_SEND:
116                 case IB_WR_SEND_WITH_IMM:
117                         if (len > pmtu) {
118                                 qp->s_state = OP(SEND_FIRST);
119                                 len = pmtu;
120                                 break;
121                         }
122                         if (wqe->wr.opcode == IB_WR_SEND)
123                                 qp->s_state = OP(SEND_ONLY);
124                         else {
125                                 qp->s_state =
126                                         OP(SEND_ONLY_WITH_IMMEDIATE);
127                                 /* Immediate data comes after the BTH */
128                                 ohdr->u.imm_data = wqe->wr.imm_data;
129                                 hwords += 1;
130                         }
131                         if (wqe->wr.send_flags & IB_SEND_SOLICITED)
132                                 bth0 |= 1 << 23;
133                         break;
134
135                 case IB_WR_RDMA_WRITE:
136                 case IB_WR_RDMA_WRITE_WITH_IMM:
137                         ohdr->u.rc.reth.vaddr =
138                                 cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
139                         ohdr->u.rc.reth.rkey =
140                                 cpu_to_be32(wqe->wr.wr.rdma.rkey);
141                         ohdr->u.rc.reth.length = cpu_to_be32(len);
142                         hwords += sizeof(struct ib_reth) / 4;
143                         if (len > pmtu) {
144                                 qp->s_state = OP(RDMA_WRITE_FIRST);
145                                 len = pmtu;
146                                 break;
147                         }
148                         if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
149                                 qp->s_state = OP(RDMA_WRITE_ONLY);
150                         else {
151                                 qp->s_state =
152                                         OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
153                                 /* Immediate data comes after the RETH */
154                                 ohdr->u.rc.imm_data = wqe->wr.imm_data;
155                                 hwords += 1;
156                                 if (wqe->wr.send_flags & IB_SEND_SOLICITED)
157                                         bth0 |= 1 << 23;
158                         }
159                         break;
160
161                 default:
162                         goto done;
163                 }
164                 if (++qp->s_tail >= qp->s_size)
165                         qp->s_tail = 0;
166                 break;
167
168         case OP(SEND_FIRST):
169                 qp->s_state = OP(SEND_MIDDLE);
170                 /* FALLTHROUGH */
171         case OP(SEND_MIDDLE):
172                 len = qp->s_len;
173                 if (len > pmtu) {
174                         len = pmtu;
175                         break;
176                 }
177                 if (wqe->wr.opcode == IB_WR_SEND)
178                         qp->s_state = OP(SEND_LAST);
179                 else {
180                         qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE);
181                         /* Immediate data comes after the BTH */
182                         ohdr->u.imm_data = wqe->wr.imm_data;
183                         hwords += 1;
184                 }
185                 if (wqe->wr.send_flags & IB_SEND_SOLICITED)
186                         bth0 |= 1 << 23;
187                 break;
188
189         case OP(RDMA_WRITE_FIRST):
190                 qp->s_state = OP(RDMA_WRITE_MIDDLE);
191                 /* FALLTHROUGH */
192         case OP(RDMA_WRITE_MIDDLE):
193                 len = qp->s_len;
194                 if (len > pmtu) {
195                         len = pmtu;
196                         break;
197                 }
198                 if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
199                         qp->s_state = OP(RDMA_WRITE_LAST);
200                 else {
201                         qp->s_state =
202                                 OP(RDMA_WRITE_LAST_WITH_IMMEDIATE);
203                         /* Immediate data comes after the BTH */
204                         ohdr->u.imm_data = wqe->wr.imm_data;
205                         hwords += 1;
206                         if (wqe->wr.send_flags & IB_SEND_SOLICITED)
207                                 bth0 |= 1 << 23;
208                 }
209                 break;
210         }
211         qp->s_len -= len;
212         qp->s_hdrwords = hwords;
213         qp->s_cur_sge = &qp->s_sge;
214         qp->s_cur_size = len;
215         *bth0p = bth0 | (qp->s_state << 24);
216         *bth2p = qp->s_next_psn++ & IPATH_PSN_MASK;
217         return 1;
218
219 done:
220         return 0;
221 }
222
223 /**
224  * ipath_uc_rcv - handle an incoming UC packet
225  * @dev: the device the packet came in on
226  * @hdr: the header of the packet
227  * @has_grh: true if the packet has a GRH
228  * @data: the packet data
229  * @tlen: the length of the packet
230  * @qp: the QP for this packet.
231  *
232  * This is called from ipath_qp_rcv() to process an incoming UC packet
233  * for the given QP.
234  * Called at interrupt level.
235  */
236 void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
237                   int has_grh, void *data, u32 tlen, struct ipath_qp *qp)
238 {
239         struct ipath_other_headers *ohdr;
240         int opcode;
241         u32 hdrsize;
242         u32 psn;
243         u32 pad;
244         struct ib_wc wc;
245         u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
246         struct ib_reth *reth;
247         int header_in_data;
248
249         /* Check for GRH */
250         if (!has_grh) {
251                 ohdr = &hdr->u.oth;
252                 hdrsize = 8 + 12;       /* LRH + BTH */
253                 psn = be32_to_cpu(ohdr->bth[2]);
254                 header_in_data = 0;
255         } else {
256                 ohdr = &hdr->u.l.oth;
257                 hdrsize = 8 + 40 + 12;  /* LRH + GRH + BTH */
258                 /*
259                  * The header with GRH is 60 bytes and the
260                  * core driver sets the eager header buffer
261                  * size to 56 bytes so the last 4 bytes of
262                  * the BTH header (PSN) is in the data buffer.
263                  */
264                 header_in_data = dev->dd->ipath_rcvhdrentsize == 16;
265                 if (header_in_data) {
266                         psn = be32_to_cpu(((__be32 *) data)[0]);
267                         data += sizeof(__be32);
268                 } else
269                         psn = be32_to_cpu(ohdr->bth[2]);
270         }
271         /*
272          * The opcode is in the low byte when its in network order
273          * (top byte when in host order).
274          */
275         opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
276
277         wc.imm_data = 0;
278         wc.wc_flags = 0;
279
280         /* Compare the PSN verses the expected PSN. */
281         if (unlikely(ipath_cmp24(psn, qp->r_psn) != 0)) {
282                 /*
283                  * Handle a sequence error.
284                  * Silently drop any current message.
285                  */
286                 qp->r_psn = psn;
287         inv:
288                 qp->r_state = OP(SEND_LAST);
289                 switch (opcode) {
290                 case OP(SEND_FIRST):
291                 case OP(SEND_ONLY):
292                 case OP(SEND_ONLY_WITH_IMMEDIATE):
293                         goto send_first;
294
295                 case OP(RDMA_WRITE_FIRST):
296                 case OP(RDMA_WRITE_ONLY):
297                 case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE):
298                         goto rdma_first;
299
300                 default:
301                         dev->n_pkt_drops++;
302                         goto done;
303                 }
304         }
305
306         /* Check for opcode sequence errors. */
307         switch (qp->r_state) {
308         case OP(SEND_FIRST):
309         case OP(SEND_MIDDLE):
310                 if (opcode == OP(SEND_MIDDLE) ||
311                     opcode == OP(SEND_LAST) ||
312                     opcode == OP(SEND_LAST_WITH_IMMEDIATE))
313                         break;
314                 goto inv;
315
316         case OP(RDMA_WRITE_FIRST):
317         case OP(RDMA_WRITE_MIDDLE):
318                 if (opcode == OP(RDMA_WRITE_MIDDLE) ||
319                     opcode == OP(RDMA_WRITE_LAST) ||
320                     opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
321                         break;
322                 goto inv;
323
324         default:
325                 if (opcode == OP(SEND_FIRST) ||
326                     opcode == OP(SEND_ONLY) ||
327                     opcode == OP(SEND_ONLY_WITH_IMMEDIATE) ||
328                     opcode == OP(RDMA_WRITE_FIRST) ||
329                     opcode == OP(RDMA_WRITE_ONLY) ||
330                     opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE))
331                         break;
332                 goto inv;
333         }
334
335         /* OK, process the packet. */
336         switch (opcode) {
337         case OP(SEND_FIRST):
338         case OP(SEND_ONLY):
339         case OP(SEND_ONLY_WITH_IMMEDIATE):
340         send_first:
341                 if (qp->r_reuse_sge) {
342                         qp->r_reuse_sge = 0;
343                         qp->r_sge = qp->s_rdma_sge;
344                 } else if (!ipath_get_rwqe(qp, 0)) {
345                         dev->n_pkt_drops++;
346                         goto done;
347                 }
348                 /* Save the WQE so we can reuse it in case of an error. */
349                 qp->s_rdma_sge = qp->r_sge;
350                 qp->r_rcv_len = 0;
351                 if (opcode == OP(SEND_ONLY))
352                         goto send_last;
353                 else if (opcode == OP(SEND_ONLY_WITH_IMMEDIATE))
354                         goto send_last_imm;
355                 /* FALLTHROUGH */
356         case OP(SEND_MIDDLE):
357                 /* Check for invalid length PMTU or posted rwqe len. */
358                 if (unlikely(tlen != (hdrsize + pmtu + 4))) {
359                         qp->r_reuse_sge = 1;
360                         dev->n_pkt_drops++;
361                         goto done;
362                 }
363                 qp->r_rcv_len += pmtu;
364                 if (unlikely(qp->r_rcv_len > qp->r_len)) {
365                         qp->r_reuse_sge = 1;
366                         dev->n_pkt_drops++;
367                         goto done;
368                 }
369                 ipath_copy_sge(&qp->r_sge, data, pmtu);
370                 break;
371
372         case OP(SEND_LAST_WITH_IMMEDIATE):
373         send_last_imm:
374                 if (header_in_data) {
375                         wc.imm_data = *(__be32 *) data;
376                         data += sizeof(__be32);
377                 } else {
378                         /* Immediate data comes after BTH */
379                         wc.imm_data = ohdr->u.imm_data;
380                 }
381                 hdrsize += 4;
382                 wc.wc_flags = IB_WC_WITH_IMM;
383                 /* FALLTHROUGH */
384         case OP(SEND_LAST):
385         send_last:
386                 /* Get the number of bytes the message was padded by. */
387                 pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
388                 /* Check for invalid length. */
389                 /* XXX LAST len should be >= 1 */
390                 if (unlikely(tlen < (hdrsize + pad + 4))) {
391                         qp->r_reuse_sge = 1;
392                         dev->n_pkt_drops++;
393                         goto done;
394                 }
395                 /* Don't count the CRC. */
396                 tlen -= (hdrsize + pad + 4);
397                 wc.byte_len = tlen + qp->r_rcv_len;
398                 if (unlikely(wc.byte_len > qp->r_len)) {
399                         qp->r_reuse_sge = 1;
400                         dev->n_pkt_drops++;
401                         goto done;
402                 }
403                 /* XXX Need to free SGEs */
404         last_imm:
405                 ipath_copy_sge(&qp->r_sge, data, tlen);
406                 wc.wr_id = qp->r_wr_id;
407                 wc.status = IB_WC_SUCCESS;
408                 wc.opcode = IB_WC_RECV;
409                 wc.vendor_err = 0;
410                 wc.qp_num = qp->ibqp.qp_num;
411                 wc.src_qp = qp->remote_qpn;
412                 wc.pkey_index = 0;
413                 wc.slid = qp->remote_ah_attr.dlid;
414                 wc.sl = qp->remote_ah_attr.sl;
415                 wc.dlid_path_bits = 0;
416                 wc.port_num = 0;
417                 /* Signal completion event if the solicited bit is set. */
418                 ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
419                                (ohdr->bth[0] &
420                                 __constant_cpu_to_be32(1 << 23)) != 0);
421                 break;
422
423         case OP(RDMA_WRITE_FIRST):
424         case OP(RDMA_WRITE_ONLY):
425         case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE): /* consume RWQE */
426         rdma_first:
427                 /* RETH comes after BTH */
428                 if (!header_in_data)
429                         reth = &ohdr->u.rc.reth;
430                 else {
431                         reth = (struct ib_reth *)data;
432                         data += sizeof(*reth);
433                 }
434                 hdrsize += sizeof(*reth);
435                 qp->r_len = be32_to_cpu(reth->length);
436                 qp->r_rcv_len = 0;
437                 if (qp->r_len != 0) {
438                         u32 rkey = be32_to_cpu(reth->rkey);
439                         u64 vaddr = be64_to_cpu(reth->vaddr);
440                         int ok;
441
442                         /* Check rkey */
443                         ok = ipath_rkey_ok(dev, &qp->r_sge, qp->r_len,
444                                            vaddr, rkey,
445                                            IB_ACCESS_REMOTE_WRITE);
446                         if (unlikely(!ok)) {
447                                 dev->n_pkt_drops++;
448                                 goto done;
449                         }
450                 } else {
451                         qp->r_sge.sg_list = NULL;
452                         qp->r_sge.sge.mr = NULL;
453                         qp->r_sge.sge.vaddr = NULL;
454                         qp->r_sge.sge.length = 0;
455                         qp->r_sge.sge.sge_length = 0;
456                 }
457                 if (unlikely(!(qp->qp_access_flags &
458                                IB_ACCESS_REMOTE_WRITE))) {
459                         dev->n_pkt_drops++;
460                         goto done;
461                 }
462                 if (opcode == OP(RDMA_WRITE_ONLY))
463                         goto rdma_last;
464                 else if (opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE))
465                         goto rdma_last_imm;
466                 /* FALLTHROUGH */
467         case OP(RDMA_WRITE_MIDDLE):
468                 /* Check for invalid length PMTU or posted rwqe len. */
469                 if (unlikely(tlen != (hdrsize + pmtu + 4))) {
470                         dev->n_pkt_drops++;
471                         goto done;
472                 }
473                 qp->r_rcv_len += pmtu;
474                 if (unlikely(qp->r_rcv_len > qp->r_len)) {
475                         dev->n_pkt_drops++;
476                         goto done;
477                 }
478                 ipath_copy_sge(&qp->r_sge, data, pmtu);
479                 break;
480
481         case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
482         rdma_last_imm:
483                 /* Get the number of bytes the message was padded by. */
484                 pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
485                 /* Check for invalid length. */
486                 /* XXX LAST len should be >= 1 */
487                 if (unlikely(tlen < (hdrsize + pad + 4))) {
488                         dev->n_pkt_drops++;
489                         goto done;
490                 }
491                 /* Don't count the CRC. */
492                 tlen -= (hdrsize + pad + 4);
493                 if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) {
494                         dev->n_pkt_drops++;
495                         goto done;
496                 }
497                 if (qp->r_reuse_sge)
498                         qp->r_reuse_sge = 0;
499                 else if (!ipath_get_rwqe(qp, 1)) {
500                         dev->n_pkt_drops++;
501                         goto done;
502                 }
503                 if (header_in_data) {
504                         wc.imm_data = *(__be32 *) data;
505                         data += sizeof(__be32);
506                 } else {
507                         /* Immediate data comes after BTH */
508                         wc.imm_data = ohdr->u.imm_data;
509                 }
510                 hdrsize += 4;
511                 wc.wc_flags = IB_WC_WITH_IMM;
512                 wc.byte_len = 0;
513                 goto last_imm;
514
515         case OP(RDMA_WRITE_LAST):
516         rdma_last:
517                 /* Get the number of bytes the message was padded by. */
518                 pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
519                 /* Check for invalid length. */
520                 /* XXX LAST len should be >= 1 */
521                 if (unlikely(tlen < (hdrsize + pad + 4))) {
522                         dev->n_pkt_drops++;
523                         goto done;
524                 }
525                 /* Don't count the CRC. */
526                 tlen -= (hdrsize + pad + 4);
527                 if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) {
528                         dev->n_pkt_drops++;
529                         goto done;
530                 }
531                 ipath_copy_sge(&qp->r_sge, data, tlen);
532                 break;
533
534         default:
535                 /* Drop packet for unknown opcodes. */
536                 dev->n_pkt_drops++;
537                 goto done;
538         }
539         qp->r_psn++;
540         qp->r_state = opcode;
541 done:
542         return;
543 }