[IB] mthca: Better limit checking and reporting
[pandora-kernel.git] / drivers / infiniband / hw / mthca / mthca_provider.c
1 /*
2  * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
3  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
4  * Copyright (c) 2005 Cisco Systems. All rights reserved.
5  * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
6  * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
7  *
8  * This software is available to you under a choice of one of two
9  * licenses.  You may choose to be licensed under the terms of the GNU
10  * General Public License (GPL) Version 2, available from the file
11  * COPYING in the main directory of this source tree, or the
12  * OpenIB.org BSD license below:
13  *
14  *     Redistribution and use in source and binary forms, with or
15  *     without modification, are permitted provided that the following
16  *     conditions are met:
17  *
18  *      - Redistributions of source code must retain the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer.
21  *
22  *      - Redistributions in binary form must reproduce the above
23  *        copyright notice, this list of conditions and the following
24  *        disclaimer in the documentation and/or other materials
25  *        provided with the distribution.
26  *
27  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
28  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
29  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
30  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
31  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
32  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
33  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34  * SOFTWARE.
35  *
36  * $Id: mthca_provider.c 1397 2004-12-28 05:09:00Z roland $
37  */
38
39 #include <rdma/ib_smi.h>
40 #include <linux/mm.h>
41
42 #include "mthca_dev.h"
43 #include "mthca_cmd.h"
44 #include "mthca_user.h"
45 #include "mthca_memfree.h"
46
47 static int mthca_query_device(struct ib_device *ibdev,
48                               struct ib_device_attr *props)
49 {
50         struct ib_smp *in_mad  = NULL;
51         struct ib_smp *out_mad = NULL;
52         int err = -ENOMEM;
53         struct mthca_dev* mdev = to_mdev(ibdev);
54
55         u8 status;
56
57         in_mad  = kmalloc(sizeof *in_mad, GFP_KERNEL);
58         out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
59         if (!in_mad || !out_mad)
60                 goto out;
61
62         memset(props, 0, sizeof *props);
63
64         props->fw_ver              = mdev->fw_ver;
65
66         memset(in_mad, 0, sizeof *in_mad);
67         in_mad->base_version       = 1;
68         in_mad->mgmt_class         = IB_MGMT_CLASS_SUBN_LID_ROUTED;
69         in_mad->class_version      = 1;
70         in_mad->method             = IB_MGMT_METHOD_GET;
71         in_mad->attr_id            = IB_SMP_ATTR_NODE_INFO;
72
73         err = mthca_MAD_IFC(mdev, 1, 1,
74                             1, NULL, NULL, in_mad, out_mad,
75                             &status);
76         if (err)
77                 goto out;
78         if (status) {
79                 err = -EINVAL;
80                 goto out;
81         }
82
83         props->device_cap_flags    = mdev->device_cap_flags;
84         props->vendor_id           = be32_to_cpup((__be32 *) (out_mad->data + 36)) &
85                 0xffffff;
86         props->vendor_part_id      = be16_to_cpup((__be16 *) (out_mad->data + 30));
87         props->hw_ver              = be32_to_cpup((__be32 *) (out_mad->data + 32));
88         memcpy(&props->sys_image_guid, out_mad->data +  4, 8);
89         memcpy(&props->node_guid,      out_mad->data + 12, 8);
90
91         props->max_mr_size         = ~0ull;
92         props->max_qp              = mdev->limits.num_qps - mdev->limits.reserved_qps;
93         props->max_qp_wr           = mdev->limits.max_wqes;
94         props->max_sge             = mdev->limits.max_sg;
95         props->max_cq              = mdev->limits.num_cqs - mdev->limits.reserved_cqs;
96         props->max_cqe             = mdev->limits.max_cqes;
97         props->max_mr              = mdev->limits.num_mpts - mdev->limits.reserved_mrws;
98         props->max_pd              = mdev->limits.num_pds - mdev->limits.reserved_pds;
99         props->max_qp_rd_atom      = 1 << mdev->qp_table.rdb_shift;
100         props->max_qp_init_rd_atom = mdev->limits.max_qp_init_rdma;
101         props->max_res_rd_atom     = props->max_qp_rd_atom * props->max_qp;
102         props->max_srq             = mdev->limits.num_srqs - mdev->limits.reserved_srqs;
103         props->max_srq_wr          = mdev->limits.max_srq_wqes;
104         props->max_srq_sge         = mdev->limits.max_sg;
105         props->local_ca_ack_delay  = mdev->limits.local_ca_ack_delay;
106         props->atomic_cap          = mdev->limits.flags & DEV_LIM_FLAG_ATOMIC ? 
107                                         IB_ATOMIC_HCA : IB_ATOMIC_NONE;
108         props->max_pkeys           = mdev->limits.pkey_table_len;
109         props->max_mcast_grp       = mdev->limits.num_mgms + mdev->limits.num_amgms;
110         props->max_mcast_qp_attach = MTHCA_QP_PER_MGM;
111         props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * 
112                                            props->max_mcast_grp;
113
114         err = 0;
115  out:
116         kfree(in_mad);
117         kfree(out_mad);
118         return err;
119 }
120
121 static int mthca_query_port(struct ib_device *ibdev,
122                             u8 port, struct ib_port_attr *props)
123 {
124         struct ib_smp *in_mad  = NULL;
125         struct ib_smp *out_mad = NULL;
126         int err = -ENOMEM;
127         u8 status;
128
129         in_mad  = kmalloc(sizeof *in_mad, GFP_KERNEL);
130         out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
131         if (!in_mad || !out_mad)
132                 goto out;
133
134         memset(props, 0, sizeof *props);
135
136         memset(in_mad, 0, sizeof *in_mad);
137         in_mad->base_version       = 1;
138         in_mad->mgmt_class         = IB_MGMT_CLASS_SUBN_LID_ROUTED;
139         in_mad->class_version      = 1;
140         in_mad->method             = IB_MGMT_METHOD_GET;
141         in_mad->attr_id            = IB_SMP_ATTR_PORT_INFO;
142         in_mad->attr_mod           = cpu_to_be32(port);
143
144         err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1,
145                             port, NULL, NULL, in_mad, out_mad,
146                             &status);
147         if (err)
148                 goto out;
149         if (status) {
150                 err = -EINVAL;
151                 goto out;
152         }
153
154         props->lid               = be16_to_cpup((__be16 *) (out_mad->data + 16));
155         props->lmc               = out_mad->data[34] & 0x7;
156         props->sm_lid            = be16_to_cpup((__be16 *) (out_mad->data + 18));
157         props->sm_sl             = out_mad->data[36] & 0xf;
158         props->state             = out_mad->data[32] & 0xf;
159         props->phys_state        = out_mad->data[33] >> 4;
160         props->port_cap_flags    = be32_to_cpup((__be32 *) (out_mad->data + 20));
161         props->gid_tbl_len       = to_mdev(ibdev)->limits.gid_table_len;
162         props->max_msg_sz        = 0x80000000;
163         props->pkey_tbl_len      = to_mdev(ibdev)->limits.pkey_table_len;
164         props->bad_pkey_cntr     = be16_to_cpup((__be16 *) (out_mad->data + 46));
165         props->qkey_viol_cntr    = be16_to_cpup((__be16 *) (out_mad->data + 48));
166         props->active_width      = out_mad->data[31] & 0xf;
167         props->active_speed      = out_mad->data[35] >> 4;
168         props->max_mtu           = out_mad->data[41] & 0xf;
169         props->active_mtu        = out_mad->data[36] >> 4;
170         props->subnet_timeout    = out_mad->data[51] & 0x1f;
171
172  out:
173         kfree(in_mad);
174         kfree(out_mad);
175         return err;
176 }
177
178 static int mthca_modify_port(struct ib_device *ibdev,
179                              u8 port, int port_modify_mask,
180                              struct ib_port_modify *props)
181 {
182         struct mthca_set_ib_param set_ib;
183         struct ib_port_attr attr;
184         int err;
185         u8 status;
186
187         if (down_interruptible(&to_mdev(ibdev)->cap_mask_mutex))
188                 return -ERESTARTSYS;
189
190         err = mthca_query_port(ibdev, port, &attr);
191         if (err)
192                 goto out;
193
194         set_ib.set_si_guid     = 0;
195         set_ib.reset_qkey_viol = !!(port_modify_mask & IB_PORT_RESET_QKEY_CNTR);
196
197         set_ib.cap_mask = (attr.port_cap_flags | props->set_port_cap_mask) &
198                 ~props->clr_port_cap_mask;
199
200         err = mthca_SET_IB(to_mdev(ibdev), &set_ib, port, &status);
201         if (err)
202                 goto out;
203         if (status) {
204                 err = -EINVAL;
205                 goto out;
206         }
207
208 out:
209         up(&to_mdev(ibdev)->cap_mask_mutex);
210         return err;
211 }
212
213 static int mthca_query_pkey(struct ib_device *ibdev,
214                             u8 port, u16 index, u16 *pkey)
215 {
216         struct ib_smp *in_mad  = NULL;
217         struct ib_smp *out_mad = NULL;
218         int err = -ENOMEM;
219         u8 status;
220
221         in_mad  = kmalloc(sizeof *in_mad, GFP_KERNEL);
222         out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
223         if (!in_mad || !out_mad)
224                 goto out;
225
226         memset(in_mad, 0, sizeof *in_mad);
227         in_mad->base_version       = 1;
228         in_mad->mgmt_class         = IB_MGMT_CLASS_SUBN_LID_ROUTED;
229         in_mad->class_version      = 1;
230         in_mad->method             = IB_MGMT_METHOD_GET;
231         in_mad->attr_id            = IB_SMP_ATTR_PKEY_TABLE;
232         in_mad->attr_mod           = cpu_to_be32(index / 32);
233
234         err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1,
235                             port, NULL, NULL, in_mad, out_mad,
236                             &status);
237         if (err)
238                 goto out;
239         if (status) {
240                 err = -EINVAL;
241                 goto out;
242         }
243
244         *pkey = be16_to_cpu(((__be16 *) out_mad->data)[index % 32]);
245
246  out:
247         kfree(in_mad);
248         kfree(out_mad);
249         return err;
250 }
251
252 static int mthca_query_gid(struct ib_device *ibdev, u8 port,
253                            int index, union ib_gid *gid)
254 {
255         struct ib_smp *in_mad  = NULL;
256         struct ib_smp *out_mad = NULL;
257         int err = -ENOMEM;
258         u8 status;
259
260         in_mad  = kmalloc(sizeof *in_mad, GFP_KERNEL);
261         out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
262         if (!in_mad || !out_mad)
263                 goto out;
264
265         memset(in_mad, 0, sizeof *in_mad);
266         in_mad->base_version       = 1;
267         in_mad->mgmt_class         = IB_MGMT_CLASS_SUBN_LID_ROUTED;
268         in_mad->class_version      = 1;
269         in_mad->method             = IB_MGMT_METHOD_GET;
270         in_mad->attr_id            = IB_SMP_ATTR_PORT_INFO;
271         in_mad->attr_mod           = cpu_to_be32(port);
272
273         err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1,
274                             port, NULL, NULL, in_mad, out_mad,
275                             &status);
276         if (err)
277                 goto out;
278         if (status) {
279                 err = -EINVAL;
280                 goto out;
281         }
282
283         memcpy(gid->raw, out_mad->data + 8, 8);
284
285         memset(in_mad, 0, sizeof *in_mad);
286         in_mad->base_version       = 1;
287         in_mad->mgmt_class         = IB_MGMT_CLASS_SUBN_LID_ROUTED;
288         in_mad->class_version      = 1;
289         in_mad->method             = IB_MGMT_METHOD_GET;
290         in_mad->attr_id            = IB_SMP_ATTR_GUID_INFO;
291         in_mad->attr_mod           = cpu_to_be32(index / 8);
292
293         err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1,
294                             port, NULL, NULL, in_mad, out_mad,
295                             &status);
296         if (err)
297                 goto out;
298         if (status) {
299                 err = -EINVAL;
300                 goto out;
301         }
302
303         memcpy(gid->raw + 8, out_mad->data + (index % 8) * 16, 8);
304
305  out:
306         kfree(in_mad);
307         kfree(out_mad);
308         return err;
309 }
310
311 static struct ib_ucontext *mthca_alloc_ucontext(struct ib_device *ibdev,
312                                                 struct ib_udata *udata)
313 {
314         struct mthca_alloc_ucontext_resp uresp;
315         struct mthca_ucontext           *context;
316         int                              err;
317
318         memset(&uresp, 0, sizeof uresp);
319
320         uresp.qp_tab_size = to_mdev(ibdev)->limits.num_qps;
321         if (mthca_is_memfree(to_mdev(ibdev)))
322                 uresp.uarc_size = to_mdev(ibdev)->uar_table.uarc_size;
323         else
324                 uresp.uarc_size = 0;
325
326         context = kmalloc(sizeof *context, GFP_KERNEL);
327         if (!context)
328                 return ERR_PTR(-ENOMEM);
329
330         err = mthca_uar_alloc(to_mdev(ibdev), &context->uar);
331         if (err) {
332                 kfree(context);
333                 return ERR_PTR(err);
334         }
335
336         context->db_tab = mthca_init_user_db_tab(to_mdev(ibdev));
337         if (IS_ERR(context->db_tab)) {
338                 err = PTR_ERR(context->db_tab);
339                 mthca_uar_free(to_mdev(ibdev), &context->uar);
340                 kfree(context);
341                 return ERR_PTR(err);
342         }
343
344         if (ib_copy_to_udata(udata, &uresp, sizeof uresp)) {
345                 mthca_cleanup_user_db_tab(to_mdev(ibdev), &context->uar, context->db_tab);
346                 mthca_uar_free(to_mdev(ibdev), &context->uar);
347                 kfree(context);
348                 return ERR_PTR(-EFAULT);
349         }
350
351         return &context->ibucontext;
352 }
353
354 static int mthca_dealloc_ucontext(struct ib_ucontext *context)
355 {
356         mthca_cleanup_user_db_tab(to_mdev(context->device), &to_mucontext(context)->uar,
357                                   to_mucontext(context)->db_tab);
358         mthca_uar_free(to_mdev(context->device), &to_mucontext(context)->uar);
359         kfree(to_mucontext(context));
360
361         return 0;
362 }
363
364 static int mthca_mmap_uar(struct ib_ucontext *context,
365                           struct vm_area_struct *vma)
366 {
367         if (vma->vm_end - vma->vm_start != PAGE_SIZE)
368                 return -EINVAL;
369
370         vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
371
372         if (io_remap_pfn_range(vma, vma->vm_start,
373                                to_mucontext(context)->uar.pfn,
374                                PAGE_SIZE, vma->vm_page_prot))
375                 return -EAGAIN;
376
377         return 0;
378 }
379
380 static struct ib_pd *mthca_alloc_pd(struct ib_device *ibdev,
381                                     struct ib_ucontext *context,
382                                     struct ib_udata *udata)
383 {
384         struct mthca_pd *pd;
385         int err;
386
387         pd = kmalloc(sizeof *pd, GFP_KERNEL);
388         if (!pd)
389                 return ERR_PTR(-ENOMEM);
390
391         err = mthca_pd_alloc(to_mdev(ibdev), !context, pd);
392         if (err) {
393                 kfree(pd);
394                 return ERR_PTR(err);
395         }
396
397         if (context) {
398                 if (ib_copy_to_udata(udata, &pd->pd_num, sizeof (__u32))) {
399                         mthca_pd_free(to_mdev(ibdev), pd);
400                         kfree(pd);
401                         return ERR_PTR(-EFAULT);
402                 }
403         }
404
405         return &pd->ibpd;
406 }
407
408 static int mthca_dealloc_pd(struct ib_pd *pd)
409 {
410         mthca_pd_free(to_mdev(pd->device), to_mpd(pd));
411         kfree(pd);
412
413         return 0;
414 }
415
416 static struct ib_ah *mthca_ah_create(struct ib_pd *pd,
417                                      struct ib_ah_attr *ah_attr)
418 {
419         int err;
420         struct mthca_ah *ah;
421
422         ah = kmalloc(sizeof *ah, GFP_ATOMIC);
423         if (!ah)
424                 return ERR_PTR(-ENOMEM);
425
426         err = mthca_create_ah(to_mdev(pd->device), to_mpd(pd), ah_attr, ah);
427         if (err) {
428                 kfree(ah);
429                 return ERR_PTR(err);
430         }
431
432         return &ah->ibah;
433 }
434
435 static int mthca_ah_destroy(struct ib_ah *ah)
436 {
437         mthca_destroy_ah(to_mdev(ah->device), to_mah(ah));
438         kfree(ah);
439
440         return 0;
441 }
442
443 static struct ib_srq *mthca_create_srq(struct ib_pd *pd,
444                                        struct ib_srq_init_attr *init_attr,
445                                        struct ib_udata *udata)
446 {
447         struct mthca_create_srq ucmd;
448         struct mthca_ucontext *context = NULL;
449         struct mthca_srq *srq;
450         int err;
451
452         srq = kmalloc(sizeof *srq, GFP_KERNEL);
453         if (!srq)
454                 return ERR_PTR(-ENOMEM);
455
456         if (pd->uobject) {
457                 context = to_mucontext(pd->uobject->context);
458
459                 if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd))
460                         return ERR_PTR(-EFAULT);
461
462                 err = mthca_map_user_db(to_mdev(pd->device), &context->uar,
463                                         context->db_tab, ucmd.db_index,
464                                         ucmd.db_page);
465
466                 if (err)
467                         goto err_free;
468
469                 srq->mr.ibmr.lkey = ucmd.lkey;
470                 srq->db_index     = ucmd.db_index;
471         }
472
473         err = mthca_alloc_srq(to_mdev(pd->device), to_mpd(pd),
474                               &init_attr->attr, srq);
475
476         if (err && pd->uobject)
477                 mthca_unmap_user_db(to_mdev(pd->device), &context->uar,
478                                     context->db_tab, ucmd.db_index);
479
480         if (err)
481                 goto err_free;
482
483         if (context && ib_copy_to_udata(udata, &srq->srqn, sizeof (__u32))) {
484                 mthca_free_srq(to_mdev(pd->device), srq);
485                 err = -EFAULT;
486                 goto err_free;
487         }
488
489         return &srq->ibsrq;
490
491 err_free:
492         kfree(srq);
493
494         return ERR_PTR(err);
495 }
496
497 static int mthca_destroy_srq(struct ib_srq *srq)
498 {
499         struct mthca_ucontext *context;
500
501         if (srq->uobject) {
502                 context = to_mucontext(srq->uobject->context);
503
504                 mthca_unmap_user_db(to_mdev(srq->device), &context->uar,
505                                     context->db_tab, to_msrq(srq)->db_index);
506         }
507
508         mthca_free_srq(to_mdev(srq->device), to_msrq(srq));
509         kfree(srq);
510
511         return 0;
512 }
513
514 static struct ib_qp *mthca_create_qp(struct ib_pd *pd,
515                                      struct ib_qp_init_attr *init_attr,
516                                      struct ib_udata *udata)
517 {
518         struct mthca_create_qp ucmd;
519         struct mthca_qp *qp;
520         int err;
521
522         switch (init_attr->qp_type) {
523         case IB_QPT_RC:
524         case IB_QPT_UC:
525         case IB_QPT_UD:
526         {
527                 struct mthca_ucontext *context;
528
529                 qp = kmalloc(sizeof *qp, GFP_KERNEL);
530                 if (!qp)
531                         return ERR_PTR(-ENOMEM);
532
533                 if (pd->uobject) {
534                         context = to_mucontext(pd->uobject->context);
535
536                         if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd))
537                                 return ERR_PTR(-EFAULT);
538
539                         err = mthca_map_user_db(to_mdev(pd->device), &context->uar,
540                                                 context->db_tab,
541                                                 ucmd.sq_db_index, ucmd.sq_db_page);
542                         if (err) {
543                                 kfree(qp);
544                                 return ERR_PTR(err);
545                         }
546
547                         err = mthca_map_user_db(to_mdev(pd->device), &context->uar,
548                                                 context->db_tab,
549                                                 ucmd.rq_db_index, ucmd.rq_db_page);
550                         if (err) {
551                                 mthca_unmap_user_db(to_mdev(pd->device),
552                                                     &context->uar,
553                                                     context->db_tab,
554                                                     ucmd.sq_db_index);
555                                 kfree(qp);
556                                 return ERR_PTR(err);
557                         }
558
559                         qp->mr.ibmr.lkey = ucmd.lkey;
560                         qp->sq.db_index  = ucmd.sq_db_index;
561                         qp->rq.db_index  = ucmd.rq_db_index;
562                 }
563
564                 err = mthca_alloc_qp(to_mdev(pd->device), to_mpd(pd),
565                                      to_mcq(init_attr->send_cq),
566                                      to_mcq(init_attr->recv_cq),
567                                      init_attr->qp_type, init_attr->sq_sig_type,
568                                      &init_attr->cap, qp);
569
570                 if (err && pd->uobject) {
571                         context = to_mucontext(pd->uobject->context);
572
573                         mthca_unmap_user_db(to_mdev(pd->device),
574                                             &context->uar,
575                                             context->db_tab,
576                                             ucmd.sq_db_index);
577                         mthca_unmap_user_db(to_mdev(pd->device),
578                                             &context->uar,
579                                             context->db_tab,
580                                             ucmd.rq_db_index);
581                 }
582
583                 qp->ibqp.qp_num = qp->qpn;
584                 break;
585         }
586         case IB_QPT_SMI:
587         case IB_QPT_GSI:
588         {
589                 /* Don't allow userspace to create special QPs */
590                 if (pd->uobject)
591                         return ERR_PTR(-EINVAL);
592
593                 qp = kmalloc(sizeof (struct mthca_sqp), GFP_KERNEL);
594                 if (!qp)
595                         return ERR_PTR(-ENOMEM);
596
597                 qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1;
598
599                 err = mthca_alloc_sqp(to_mdev(pd->device), to_mpd(pd),
600                                       to_mcq(init_attr->send_cq),
601                                       to_mcq(init_attr->recv_cq),
602                                       init_attr->sq_sig_type, &init_attr->cap,
603                                       qp->ibqp.qp_num, init_attr->port_num,
604                                       to_msqp(qp));
605                 break;
606         }
607         default:
608                 /* Don't support raw QPs */
609                 return ERR_PTR(-ENOSYS);
610         }
611
612         if (err) {
613                 kfree(qp);
614                 return ERR_PTR(err);
615         }
616
617         init_attr->cap.max_inline_data = 0;
618         init_attr->cap.max_send_wr     = qp->sq.max;
619         init_attr->cap.max_recv_wr     = qp->rq.max;
620         init_attr->cap.max_send_sge    = qp->sq.max_gs;
621         init_attr->cap.max_recv_sge    = qp->rq.max_gs;
622
623         return &qp->ibqp;
624 }
625
626 static int mthca_destroy_qp(struct ib_qp *qp)
627 {
628         if (qp->uobject) {
629                 mthca_unmap_user_db(to_mdev(qp->device),
630                                     &to_mucontext(qp->uobject->context)->uar,
631                                     to_mucontext(qp->uobject->context)->db_tab,
632                                     to_mqp(qp)->sq.db_index);
633                 mthca_unmap_user_db(to_mdev(qp->device),
634                                     &to_mucontext(qp->uobject->context)->uar,
635                                     to_mucontext(qp->uobject->context)->db_tab,
636                                     to_mqp(qp)->rq.db_index);
637         }
638         mthca_free_qp(to_mdev(qp->device), to_mqp(qp));
639         kfree(qp);
640         return 0;
641 }
642
643 static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries,
644                                      struct ib_ucontext *context,
645                                      struct ib_udata *udata)
646 {
647         struct mthca_create_cq ucmd;
648         struct mthca_cq *cq;
649         int nent;
650         int err;
651
652         if (entries < 1 || entries > to_mdev(ibdev)->limits.max_cqes)
653                 return ERR_PTR(-EINVAL);
654
655         if (context) {
656                 if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd))
657                         return ERR_PTR(-EFAULT);
658
659                 err = mthca_map_user_db(to_mdev(ibdev), &to_mucontext(context)->uar,
660                                         to_mucontext(context)->db_tab,
661                                         ucmd.set_db_index, ucmd.set_db_page);
662                 if (err)
663                         return ERR_PTR(err);
664
665                 err = mthca_map_user_db(to_mdev(ibdev), &to_mucontext(context)->uar,
666                                         to_mucontext(context)->db_tab,
667                                         ucmd.arm_db_index, ucmd.arm_db_page);
668                 if (err)
669                         goto err_unmap_set;
670         }
671
672         cq = kmalloc(sizeof *cq, GFP_KERNEL);
673         if (!cq) {
674                 err = -ENOMEM;
675                 goto err_unmap_arm;
676         }
677
678         if (context) {
679                 cq->mr.ibmr.lkey    = ucmd.lkey;
680                 cq->set_ci_db_index = ucmd.set_db_index;
681                 cq->arm_db_index    = ucmd.arm_db_index;
682         }
683
684         for (nent = 1; nent <= entries; nent <<= 1)
685                 ; /* nothing */
686
687         err = mthca_init_cq(to_mdev(ibdev), nent,
688                             context ? to_mucontext(context) : NULL,
689                             context ? ucmd.pdn : to_mdev(ibdev)->driver_pd.pd_num,
690                             cq);
691         if (err)
692                 goto err_free;
693
694         if (context && ib_copy_to_udata(udata, &cq->cqn, sizeof (__u32))) {
695                 mthca_free_cq(to_mdev(ibdev), cq);
696                 goto err_free;
697         }
698
699         return &cq->ibcq;
700
701 err_free:
702         kfree(cq);
703
704 err_unmap_arm:
705         if (context)
706                 mthca_unmap_user_db(to_mdev(ibdev), &to_mucontext(context)->uar,
707                                     to_mucontext(context)->db_tab, ucmd.arm_db_index);
708
709 err_unmap_set:
710         if (context)
711                 mthca_unmap_user_db(to_mdev(ibdev), &to_mucontext(context)->uar,
712                                     to_mucontext(context)->db_tab, ucmd.set_db_index);
713
714         return ERR_PTR(err);
715 }
716
717 static int mthca_destroy_cq(struct ib_cq *cq)
718 {
719         if (cq->uobject) {
720                 mthca_unmap_user_db(to_mdev(cq->device),
721                                     &to_mucontext(cq->uobject->context)->uar,
722                                     to_mucontext(cq->uobject->context)->db_tab,
723                                     to_mcq(cq)->arm_db_index);
724                 mthca_unmap_user_db(to_mdev(cq->device),
725                                     &to_mucontext(cq->uobject->context)->uar,
726                                     to_mucontext(cq->uobject->context)->db_tab,
727                                     to_mcq(cq)->set_ci_db_index);
728         }
729         mthca_free_cq(to_mdev(cq->device), to_mcq(cq));
730         kfree(cq);
731
732         return 0;
733 }
734
735 static inline u32 convert_access(int acc)
736 {
737         return (acc & IB_ACCESS_REMOTE_ATOMIC ? MTHCA_MPT_FLAG_ATOMIC       : 0) |
738                (acc & IB_ACCESS_REMOTE_WRITE  ? MTHCA_MPT_FLAG_REMOTE_WRITE : 0) |
739                (acc & IB_ACCESS_REMOTE_READ   ? MTHCA_MPT_FLAG_REMOTE_READ  : 0) |
740                (acc & IB_ACCESS_LOCAL_WRITE   ? MTHCA_MPT_FLAG_LOCAL_WRITE  : 0) |
741                MTHCA_MPT_FLAG_LOCAL_READ;
742 }
743
744 static struct ib_mr *mthca_get_dma_mr(struct ib_pd *pd, int acc)
745 {
746         struct mthca_mr *mr;
747         int err;
748
749         mr = kmalloc(sizeof *mr, GFP_KERNEL);
750         if (!mr)
751                 return ERR_PTR(-ENOMEM);
752
753         err = mthca_mr_alloc_notrans(to_mdev(pd->device),
754                                      to_mpd(pd)->pd_num,
755                                      convert_access(acc), mr);
756
757         if (err) {
758                 kfree(mr);
759                 return ERR_PTR(err);
760         }
761
762         return &mr->ibmr;
763 }
764
765 static struct ib_mr *mthca_reg_phys_mr(struct ib_pd       *pd,
766                                        struct ib_phys_buf *buffer_list,
767                                        int                 num_phys_buf,
768                                        int                 acc,
769                                        u64                *iova_start)
770 {
771         struct mthca_mr *mr;
772         u64 *page_list;
773         u64 total_size;
774         u64 mask;
775         int shift;
776         int npages;
777         int err;
778         int i, j, n;
779
780         /* First check that we have enough alignment */
781         if ((*iova_start & ~PAGE_MASK) != (buffer_list[0].addr & ~PAGE_MASK))
782                 return ERR_PTR(-EINVAL);
783
784         if (num_phys_buf > 1 &&
785             ((buffer_list[0].addr + buffer_list[0].size) & ~PAGE_MASK))
786                 return ERR_PTR(-EINVAL);
787
788         mask = 0;
789         total_size = 0;
790         for (i = 0; i < num_phys_buf; ++i) {
791                 if (i != 0 && buffer_list[i].addr & ~PAGE_MASK)
792                         return ERR_PTR(-EINVAL);
793                 if (i != 0 && i != num_phys_buf - 1 &&
794                     (buffer_list[i].size & ~PAGE_MASK))
795                         return ERR_PTR(-EINVAL);
796
797                 total_size += buffer_list[i].size;
798                 if (i > 0)
799                         mask |= buffer_list[i].addr;
800         }
801
802         /* Find largest page shift we can use to cover buffers */
803         for (shift = PAGE_SHIFT; shift < 31; ++shift)
804                 if (num_phys_buf > 1) {
805                         if ((1ULL << shift) & mask)
806                                 break;
807                 } else {
808                         if (1ULL << shift >=
809                             buffer_list[0].size +
810                             (buffer_list[0].addr & ((1ULL << shift) - 1)))
811                                 break;
812                 }
813
814         buffer_list[0].size += buffer_list[0].addr & ((1ULL << shift) - 1);
815         buffer_list[0].addr &= ~0ull << shift;
816
817         mr = kmalloc(sizeof *mr, GFP_KERNEL);
818         if (!mr)
819                 return ERR_PTR(-ENOMEM);
820
821         npages = 0;
822         for (i = 0; i < num_phys_buf; ++i)
823                 npages += (buffer_list[i].size + (1ULL << shift) - 1) >> shift;
824
825         if (!npages)
826                 return &mr->ibmr;
827
828         page_list = kmalloc(npages * sizeof *page_list, GFP_KERNEL);
829         if (!page_list) {
830                 kfree(mr);
831                 return ERR_PTR(-ENOMEM);
832         }
833
834         n = 0;
835         for (i = 0; i < num_phys_buf; ++i)
836                 for (j = 0;
837                      j < (buffer_list[i].size + (1ULL << shift) - 1) >> shift;
838                      ++j)
839                         page_list[n++] = buffer_list[i].addr + ((u64) j << shift);
840
841         mthca_dbg(to_mdev(pd->device), "Registering memory at %llx (iova %llx) "
842                   "in PD %x; shift %d, npages %d.\n",
843                   (unsigned long long) buffer_list[0].addr,
844                   (unsigned long long) *iova_start,
845                   to_mpd(pd)->pd_num,
846                   shift, npages);
847
848         err = mthca_mr_alloc_phys(to_mdev(pd->device),
849                                   to_mpd(pd)->pd_num,
850                                   page_list, shift, npages,
851                                   *iova_start, total_size,
852                                   convert_access(acc), mr);
853
854         if (err) {
855                 kfree(page_list);
856                 kfree(mr);
857                 return ERR_PTR(err);
858         }
859
860         kfree(page_list);
861         return &mr->ibmr;
862 }
863
864 static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
865                                        int acc, struct ib_udata *udata)
866 {
867         struct mthca_dev *dev = to_mdev(pd->device);
868         struct ib_umem_chunk *chunk;
869         struct mthca_mr *mr;
870         u64 *pages;
871         int shift, n, len;
872         int i, j, k;
873         int err = 0;
874
875         shift = ffs(region->page_size) - 1;
876
877         mr = kmalloc(sizeof *mr, GFP_KERNEL);
878         if (!mr)
879                 return ERR_PTR(-ENOMEM);
880
881         n = 0;
882         list_for_each_entry(chunk, &region->chunk_list, list)
883                 n += chunk->nents;
884
885         mr->mtt = mthca_alloc_mtt(dev, n);
886         if (IS_ERR(mr->mtt)) {
887                 err = PTR_ERR(mr->mtt);
888                 goto err;
889         }
890
891         pages = (u64 *) __get_free_page(GFP_KERNEL);
892         if (!pages) {
893                 err = -ENOMEM;
894                 goto err_mtt;
895         }
896
897         i = n = 0;
898
899         list_for_each_entry(chunk, &region->chunk_list, list)
900                 for (j = 0; j < chunk->nmap; ++j) {
901                         len = sg_dma_len(&chunk->page_list[j]) >> shift;
902                         for (k = 0; k < len; ++k) {
903                                 pages[i++] = sg_dma_address(&chunk->page_list[j]) +
904                                         region->page_size * k;
905                                 /*
906                                  * Be friendly to WRITE_MTT command
907                                  * and leave two empty slots for the
908                                  * index and reserved fields of the
909                                  * mailbox.
910                                  */
911                                 if (i == PAGE_SIZE / sizeof (u64) - 2) {
912                                         err = mthca_write_mtt(dev, mr->mtt,
913                                                               n, pages, i);
914                                         if (err)
915                                                 goto mtt_done;
916                                         n += i;
917                                         i = 0;
918                                 }
919                         }
920                 }
921
922         if (i)
923                 err = mthca_write_mtt(dev, mr->mtt, n, pages, i);
924 mtt_done:
925         free_page((unsigned long) pages);
926         if (err)
927                 goto err_mtt;
928
929         err = mthca_mr_alloc(dev, to_mpd(pd)->pd_num, shift, region->virt_base,
930                              region->length, convert_access(acc), mr);
931
932         if (err)
933                 goto err_mtt;
934
935         return &mr->ibmr;
936
937 err_mtt:
938         mthca_free_mtt(dev, mr->mtt);
939
940 err:
941         kfree(mr);
942         return ERR_PTR(err);
943 }
944
945 static int mthca_dereg_mr(struct ib_mr *mr)
946 {
947         struct mthca_mr *mmr = to_mmr(mr);
948         mthca_free_mr(to_mdev(mr->device), mmr);
949         kfree(mmr);
950         return 0;
951 }
952
953 static struct ib_fmr *mthca_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
954                                       struct ib_fmr_attr *fmr_attr)
955 {
956         struct mthca_fmr *fmr;
957         int err;
958
959         fmr = kmalloc(sizeof *fmr, GFP_KERNEL);
960         if (!fmr)
961                 return ERR_PTR(-ENOMEM);
962
963         memcpy(&fmr->attr, fmr_attr, sizeof *fmr_attr);
964         err = mthca_fmr_alloc(to_mdev(pd->device), to_mpd(pd)->pd_num,
965                              convert_access(mr_access_flags), fmr);
966
967         if (err) {
968                 kfree(fmr);
969                 return ERR_PTR(err);
970         }
971
972         return &fmr->ibmr;
973 }
974
975 static int mthca_dealloc_fmr(struct ib_fmr *fmr)
976 {
977         struct mthca_fmr *mfmr = to_mfmr(fmr);
978         int err;
979
980         err = mthca_free_fmr(to_mdev(fmr->device), mfmr);
981         if (err)
982                 return err;
983
984         kfree(mfmr);
985         return 0;
986 }
987
988 static int mthca_unmap_fmr(struct list_head *fmr_list)
989 {
990         struct ib_fmr *fmr;
991         int err;
992         u8 status;
993         struct mthca_dev *mdev = NULL;
994
995         list_for_each_entry(fmr, fmr_list, list) {
996                 if (mdev && to_mdev(fmr->device) != mdev)
997                         return -EINVAL;
998                 mdev = to_mdev(fmr->device);
999         }
1000
1001         if (!mdev)
1002                 return 0;
1003
1004         if (mthca_is_memfree(mdev)) {
1005                 list_for_each_entry(fmr, fmr_list, list)
1006                         mthca_arbel_fmr_unmap(mdev, to_mfmr(fmr));
1007
1008                 wmb();
1009         } else
1010                 list_for_each_entry(fmr, fmr_list, list)
1011                         mthca_tavor_fmr_unmap(mdev, to_mfmr(fmr));
1012
1013         err = mthca_SYNC_TPT(mdev, &status);
1014         if (err)
1015                 return err;
1016         if (status)
1017                 return -EINVAL;
1018         return 0;
1019 }
1020
1021 static ssize_t show_rev(struct class_device *cdev, char *buf)
1022 {
1023         struct mthca_dev *dev = container_of(cdev, struct mthca_dev, ib_dev.class_dev);
1024         return sprintf(buf, "%x\n", dev->rev_id);
1025 }
1026
1027 static ssize_t show_fw_ver(struct class_device *cdev, char *buf)
1028 {
1029         struct mthca_dev *dev = container_of(cdev, struct mthca_dev, ib_dev.class_dev);
1030         return sprintf(buf, "%x.%x.%x\n", (int) (dev->fw_ver >> 32),
1031                        (int) (dev->fw_ver >> 16) & 0xffff,
1032                        (int) dev->fw_ver & 0xffff);
1033 }
1034
1035 static ssize_t show_hca(struct class_device *cdev, char *buf)
1036 {
1037         struct mthca_dev *dev = container_of(cdev, struct mthca_dev, ib_dev.class_dev);
1038         switch (dev->pdev->device) {
1039         case PCI_DEVICE_ID_MELLANOX_TAVOR:
1040                 return sprintf(buf, "MT23108\n");
1041         case PCI_DEVICE_ID_MELLANOX_ARBEL_COMPAT:
1042                 return sprintf(buf, "MT25208 (MT23108 compat mode)\n");
1043         case PCI_DEVICE_ID_MELLANOX_ARBEL:
1044                 return sprintf(buf, "MT25208\n");
1045         case PCI_DEVICE_ID_MELLANOX_SINAI:
1046         case PCI_DEVICE_ID_MELLANOX_SINAI_OLD:
1047                 return sprintf(buf, "MT25204\n");
1048         default:
1049                 return sprintf(buf, "unknown\n");
1050         }
1051 }
1052
1053 static ssize_t show_board(struct class_device *cdev, char *buf)
1054 {
1055         struct mthca_dev *dev = container_of(cdev, struct mthca_dev, ib_dev.class_dev);
1056         return sprintf(buf, "%.*s\n", MTHCA_BOARD_ID_LEN, dev->board_id);
1057 }
1058
1059 static CLASS_DEVICE_ATTR(hw_rev,   S_IRUGO, show_rev,    NULL);
1060 static CLASS_DEVICE_ATTR(fw_ver,   S_IRUGO, show_fw_ver, NULL);
1061 static CLASS_DEVICE_ATTR(hca_type, S_IRUGO, show_hca,    NULL);
1062 static CLASS_DEVICE_ATTR(board_id, S_IRUGO, show_board,  NULL);
1063
1064 static struct class_device_attribute *mthca_class_attributes[] = {
1065         &class_device_attr_hw_rev,
1066         &class_device_attr_fw_ver,
1067         &class_device_attr_hca_type,
1068         &class_device_attr_board_id
1069 };
1070
1071 int mthca_register_device(struct mthca_dev *dev)
1072 {
1073         int ret;
1074         int i;
1075
1076         strlcpy(dev->ib_dev.name, "mthca%d", IB_DEVICE_NAME_MAX);
1077         dev->ib_dev.owner                = THIS_MODULE;
1078
1079         dev->ib_dev.uverbs_abi_ver       = MTHCA_UVERBS_ABI_VERSION;
1080         dev->ib_dev.node_type            = IB_NODE_CA;
1081         dev->ib_dev.phys_port_cnt        = dev->limits.num_ports;
1082         dev->ib_dev.dma_device           = &dev->pdev->dev;
1083         dev->ib_dev.class_dev.dev        = &dev->pdev->dev;
1084         dev->ib_dev.query_device         = mthca_query_device;
1085         dev->ib_dev.query_port           = mthca_query_port;
1086         dev->ib_dev.modify_port          = mthca_modify_port;
1087         dev->ib_dev.query_pkey           = mthca_query_pkey;
1088         dev->ib_dev.query_gid            = mthca_query_gid;
1089         dev->ib_dev.alloc_ucontext       = mthca_alloc_ucontext;
1090         dev->ib_dev.dealloc_ucontext     = mthca_dealloc_ucontext;
1091         dev->ib_dev.mmap                 = mthca_mmap_uar;
1092         dev->ib_dev.alloc_pd             = mthca_alloc_pd;
1093         dev->ib_dev.dealloc_pd           = mthca_dealloc_pd;
1094         dev->ib_dev.create_ah            = mthca_ah_create;
1095         dev->ib_dev.destroy_ah           = mthca_ah_destroy;
1096
1097         if (dev->mthca_flags & MTHCA_FLAG_SRQ) {
1098                 dev->ib_dev.create_srq           = mthca_create_srq;
1099                 dev->ib_dev.modify_srq           = mthca_modify_srq;
1100                 dev->ib_dev.destroy_srq          = mthca_destroy_srq;
1101
1102                 if (mthca_is_memfree(dev))
1103                         dev->ib_dev.post_srq_recv = mthca_arbel_post_srq_recv;
1104                 else
1105                         dev->ib_dev.post_srq_recv = mthca_tavor_post_srq_recv;
1106         }
1107
1108         dev->ib_dev.create_qp            = mthca_create_qp;
1109         dev->ib_dev.modify_qp            = mthca_modify_qp;
1110         dev->ib_dev.destroy_qp           = mthca_destroy_qp;
1111         dev->ib_dev.create_cq            = mthca_create_cq;
1112         dev->ib_dev.destroy_cq           = mthca_destroy_cq;
1113         dev->ib_dev.poll_cq              = mthca_poll_cq;
1114         dev->ib_dev.get_dma_mr           = mthca_get_dma_mr;
1115         dev->ib_dev.reg_phys_mr          = mthca_reg_phys_mr;
1116         dev->ib_dev.reg_user_mr          = mthca_reg_user_mr;
1117         dev->ib_dev.dereg_mr             = mthca_dereg_mr;
1118
1119         if (dev->mthca_flags & MTHCA_FLAG_FMR) {
1120                 dev->ib_dev.alloc_fmr            = mthca_alloc_fmr;
1121                 dev->ib_dev.unmap_fmr            = mthca_unmap_fmr;
1122                 dev->ib_dev.dealloc_fmr          = mthca_dealloc_fmr;
1123                 if (mthca_is_memfree(dev))
1124                         dev->ib_dev.map_phys_fmr = mthca_arbel_map_phys_fmr;
1125                 else
1126                         dev->ib_dev.map_phys_fmr = mthca_tavor_map_phys_fmr;
1127         }
1128
1129         dev->ib_dev.attach_mcast         = mthca_multicast_attach;
1130         dev->ib_dev.detach_mcast         = mthca_multicast_detach;
1131         dev->ib_dev.process_mad          = mthca_process_mad;
1132
1133         if (mthca_is_memfree(dev)) {
1134                 dev->ib_dev.req_notify_cq = mthca_arbel_arm_cq;
1135                 dev->ib_dev.post_send     = mthca_arbel_post_send;
1136                 dev->ib_dev.post_recv     = mthca_arbel_post_receive;
1137         } else {
1138                 dev->ib_dev.req_notify_cq = mthca_tavor_arm_cq;
1139                 dev->ib_dev.post_send     = mthca_tavor_post_send;
1140                 dev->ib_dev.post_recv     = mthca_tavor_post_receive;
1141         }
1142
1143         init_MUTEX(&dev->cap_mask_mutex);
1144
1145         ret = ib_register_device(&dev->ib_dev);
1146         if (ret)
1147                 return ret;
1148
1149         for (i = 0; i < ARRAY_SIZE(mthca_class_attributes); ++i) {
1150                 ret = class_device_create_file(&dev->ib_dev.class_dev,
1151                                                mthca_class_attributes[i]);
1152                 if (ret) {
1153                         ib_unregister_device(&dev->ib_dev);
1154                         return ret;
1155                 }
1156         }
1157
1158         return 0;
1159 }
1160
1161 void mthca_unregister_device(struct mthca_dev *dev)
1162 {
1163         ib_unregister_device(&dev->ib_dev);
1164 }