RDMA/cma: Override default responder_resources with user value
[pandora-kernel.git] / drivers / infiniband / core / cma.c
1 /*
2  * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
3  * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
4  * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
5  * Copyright (c) 2005-2006 Intel Corporation.  All rights reserved.
6  *
7  * This Software is licensed under one of the following licenses:
8  *
9  * 1) under the terms of the "Common Public License 1.0" a copy of which is
10  *    available from the Open Source Initiative, see
11  *    http://www.opensource.org/licenses/cpl.php.
12  *
13  * 2) under the terms of the "The BSD License" a copy of which is
14  *    available from the Open Source Initiative, see
15  *    http://www.opensource.org/licenses/bsd-license.php.
16  *
17  * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
18  *    copy of which is available from the Open Source Initiative, see
19  *    http://www.opensource.org/licenses/gpl-license.php.
20  *
21  * Licensee has the right to choose one of the above licenses.
22  *
23  * Redistributions of source code must retain the above copyright
24  * notice and one of the license notices.
25  *
26  * Redistributions in binary form must reproduce both the above copyright
27  * notice, one of the license notices in the documentation
28  * and/or other materials provided with the distribution.
29  *
30  */
31
32 #include <linux/completion.h>
33 #include <linux/in.h>
34 #include <linux/in6.h>
35 #include <linux/mutex.h>
36 #include <linux/random.h>
37 #include <linux/idr.h>
38 #include <linux/inetdevice.h>
39
40 #include <net/tcp.h>
41
42 #include <rdma/rdma_cm.h>
43 #include <rdma/rdma_cm_ib.h>
44 #include <rdma/ib_cache.h>
45 #include <rdma/ib_cm.h>
46 #include <rdma/ib_sa.h>
47 #include <rdma/iw_cm.h>
48
49 MODULE_AUTHOR("Sean Hefty");
50 MODULE_DESCRIPTION("Generic RDMA CM Agent");
51 MODULE_LICENSE("Dual BSD/GPL");
52
53 #define CMA_CM_RESPONSE_TIMEOUT 20
54 #define CMA_MAX_CM_RETRIES 15
55 #define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24)
56
57 static void cma_add_one(struct ib_device *device);
58 static void cma_remove_one(struct ib_device *device);
59
60 static struct ib_client cma_client = {
61         .name   = "cma",
62         .add    = cma_add_one,
63         .remove = cma_remove_one
64 };
65
66 static struct ib_sa_client sa_client;
67 static struct rdma_addr_client addr_client;
68 static LIST_HEAD(dev_list);
69 static LIST_HEAD(listen_any_list);
70 static DEFINE_MUTEX(lock);
71 static struct workqueue_struct *cma_wq;
72 static DEFINE_IDR(sdp_ps);
73 static DEFINE_IDR(tcp_ps);
74 static DEFINE_IDR(udp_ps);
75 static DEFINE_IDR(ipoib_ps);
76 static int next_port;
77
78 struct cma_device {
79         struct list_head        list;
80         struct ib_device        *device;
81         struct completion       comp;
82         atomic_t                refcount;
83         struct list_head        id_list;
84 };
85
86 enum cma_state {
87         CMA_IDLE,
88         CMA_ADDR_QUERY,
89         CMA_ADDR_RESOLVED,
90         CMA_ROUTE_QUERY,
91         CMA_ROUTE_RESOLVED,
92         CMA_CONNECT,
93         CMA_DISCONNECT,
94         CMA_ADDR_BOUND,
95         CMA_LISTEN,
96         CMA_DEVICE_REMOVAL,
97         CMA_DESTROYING
98 };
99
100 struct rdma_bind_list {
101         struct idr              *ps;
102         struct hlist_head       owners;
103         unsigned short          port;
104 };
105
106 /*
107  * Device removal can occur at anytime, so we need extra handling to
108  * serialize notifying the user of device removal with other callbacks.
109  * We do this by disabling removal notification while a callback is in process,
110  * and reporting it after the callback completes.
111  */
112 struct rdma_id_private {
113         struct rdma_cm_id       id;
114
115         struct rdma_bind_list   *bind_list;
116         struct hlist_node       node;
117         struct list_head        list; /* listen_any_list or cma_device.list */
118         struct list_head        listen_list; /* per device listens */
119         struct cma_device       *cma_dev;
120         struct list_head        mc_list;
121
122         int                     internal_id;
123         enum cma_state          state;
124         spinlock_t              lock;
125         struct mutex            qp_mutex;
126
127         struct completion       comp;
128         atomic_t                refcount;
129         wait_queue_head_t       wait_remove;
130         atomic_t                dev_remove;
131
132         int                     backlog;
133         int                     timeout_ms;
134         struct ib_sa_query      *query;
135         int                     query_id;
136         union {
137                 struct ib_cm_id *ib;
138                 struct iw_cm_id *iw;
139         } cm_id;
140
141         u32                     seq_num;
142         u32                     qkey;
143         u32                     qp_num;
144         u8                      srq;
145         u8                      tos;
146 };
147
148 struct cma_multicast {
149         struct rdma_id_private *id_priv;
150         union {
151                 struct ib_sa_multicast *ib;
152         } multicast;
153         struct list_head        list;
154         void                    *context;
155         struct sockaddr         addr;
156         u8                      pad[sizeof(struct sockaddr_in6) -
157                                     sizeof(struct sockaddr)];
158 };
159
160 struct cma_work {
161         struct work_struct      work;
162         struct rdma_id_private  *id;
163         enum cma_state          old_state;
164         enum cma_state          new_state;
165         struct rdma_cm_event    event;
166 };
167
168 union cma_ip_addr {
169         struct in6_addr ip6;
170         struct {
171                 __u32 pad[3];
172                 __u32 addr;
173         } ip4;
174 };
175
176 struct cma_hdr {
177         u8 cma_version;
178         u8 ip_version;  /* IP version: 7:4 */
179         __u16 port;
180         union cma_ip_addr src_addr;
181         union cma_ip_addr dst_addr;
182 };
183
184 struct sdp_hh {
185         u8 bsdh[16];
186         u8 sdp_version; /* Major version: 7:4 */
187         u8 ip_version;  /* IP version: 7:4 */
188         u8 sdp_specific1[10];
189         __u16 port;
190         __u16 sdp_specific2;
191         union cma_ip_addr src_addr;
192         union cma_ip_addr dst_addr;
193 };
194
195 struct sdp_hah {
196         u8 bsdh[16];
197         u8 sdp_version;
198 };
199
200 #define CMA_VERSION 0x00
201 #define SDP_MAJ_VERSION 0x2
202
203 static int cma_comp(struct rdma_id_private *id_priv, enum cma_state comp)
204 {
205         unsigned long flags;
206         int ret;
207
208         spin_lock_irqsave(&id_priv->lock, flags);
209         ret = (id_priv->state == comp);
210         spin_unlock_irqrestore(&id_priv->lock, flags);
211         return ret;
212 }
213
214 static int cma_comp_exch(struct rdma_id_private *id_priv,
215                          enum cma_state comp, enum cma_state exch)
216 {
217         unsigned long flags;
218         int ret;
219
220         spin_lock_irqsave(&id_priv->lock, flags);
221         if ((ret = (id_priv->state == comp)))
222                 id_priv->state = exch;
223         spin_unlock_irqrestore(&id_priv->lock, flags);
224         return ret;
225 }
226
227 static enum cma_state cma_exch(struct rdma_id_private *id_priv,
228                                enum cma_state exch)
229 {
230         unsigned long flags;
231         enum cma_state old;
232
233         spin_lock_irqsave(&id_priv->lock, flags);
234         old = id_priv->state;
235         id_priv->state = exch;
236         spin_unlock_irqrestore(&id_priv->lock, flags);
237         return old;
238 }
239
240 static inline u8 cma_get_ip_ver(struct cma_hdr *hdr)
241 {
242         return hdr->ip_version >> 4;
243 }
244
245 static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver)
246 {
247         hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF);
248 }
249
250 static inline u8 sdp_get_majv(u8 sdp_version)
251 {
252         return sdp_version >> 4;
253 }
254
255 static inline u8 sdp_get_ip_ver(struct sdp_hh *hh)
256 {
257         return hh->ip_version >> 4;
258 }
259
260 static inline void sdp_set_ip_ver(struct sdp_hh *hh, u8 ip_ver)
261 {
262         hh->ip_version = (ip_ver << 4) | (hh->ip_version & 0xF);
263 }
264
265 static inline int cma_is_ud_ps(enum rdma_port_space ps)
266 {
267         return (ps == RDMA_PS_UDP || ps == RDMA_PS_IPOIB);
268 }
269
270 static void cma_attach_to_dev(struct rdma_id_private *id_priv,
271                               struct cma_device *cma_dev)
272 {
273         atomic_inc(&cma_dev->refcount);
274         id_priv->cma_dev = cma_dev;
275         id_priv->id.device = cma_dev->device;
276         list_add_tail(&id_priv->list, &cma_dev->id_list);
277 }
278
279 static inline void cma_deref_dev(struct cma_device *cma_dev)
280 {
281         if (atomic_dec_and_test(&cma_dev->refcount))
282                 complete(&cma_dev->comp);
283 }
284
285 static void cma_detach_from_dev(struct rdma_id_private *id_priv)
286 {
287         list_del(&id_priv->list);
288         cma_deref_dev(id_priv->cma_dev);
289         id_priv->cma_dev = NULL;
290 }
291
292 static int cma_set_qkey(struct ib_device *device, u8 port_num,
293                         enum rdma_port_space ps,
294                         struct rdma_dev_addr *dev_addr, u32 *qkey)
295 {
296         struct ib_sa_mcmember_rec rec;
297         int ret = 0;
298
299         switch (ps) {
300         case RDMA_PS_UDP:
301                 *qkey = RDMA_UDP_QKEY;
302                 break;
303         case RDMA_PS_IPOIB:
304                 ib_addr_get_mgid(dev_addr, &rec.mgid);
305                 ret = ib_sa_get_mcmember_rec(device, port_num, &rec.mgid, &rec);
306                 *qkey = be32_to_cpu(rec.qkey);
307                 break;
308         default:
309                 break;
310         }
311         return ret;
312 }
313
314 static int cma_acquire_dev(struct rdma_id_private *id_priv)
315 {
316         struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
317         struct cma_device *cma_dev;
318         union ib_gid gid;
319         int ret = -ENODEV;
320
321         switch (rdma_node_get_transport(dev_addr->dev_type)) {
322         case RDMA_TRANSPORT_IB:
323                 ib_addr_get_sgid(dev_addr, &gid);
324                 break;
325         case RDMA_TRANSPORT_IWARP:
326                 iw_addr_get_sgid(dev_addr, &gid);
327                 break;
328         default:
329                 return -ENODEV;
330         }
331
332         list_for_each_entry(cma_dev, &dev_list, list) {
333                 ret = ib_find_cached_gid(cma_dev->device, &gid,
334                                          &id_priv->id.port_num, NULL);
335                 if (!ret) {
336                         ret = cma_set_qkey(cma_dev->device,
337                                            id_priv->id.port_num,
338                                            id_priv->id.ps, dev_addr,
339                                            &id_priv->qkey);
340                         if (!ret)
341                                 cma_attach_to_dev(id_priv, cma_dev);
342                         break;
343                 }
344         }
345         return ret;
346 }
347
348 static void cma_deref_id(struct rdma_id_private *id_priv)
349 {
350         if (atomic_dec_and_test(&id_priv->refcount))
351                 complete(&id_priv->comp);
352 }
353
354 static int cma_disable_remove(struct rdma_id_private *id_priv,
355                               enum cma_state state)
356 {
357         unsigned long flags;
358         int ret;
359
360         spin_lock_irqsave(&id_priv->lock, flags);
361         if (id_priv->state == state) {
362                 atomic_inc(&id_priv->dev_remove);
363                 ret = 0;
364         } else
365                 ret = -EINVAL;
366         spin_unlock_irqrestore(&id_priv->lock, flags);
367         return ret;
368 }
369
370 static void cma_enable_remove(struct rdma_id_private *id_priv)
371 {
372         if (atomic_dec_and_test(&id_priv->dev_remove))
373                 wake_up(&id_priv->wait_remove);
374 }
375
376 static int cma_has_cm_dev(struct rdma_id_private *id_priv)
377 {
378         return (id_priv->id.device && id_priv->cm_id.ib);
379 }
380
381 struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler,
382                                   void *context, enum rdma_port_space ps)
383 {
384         struct rdma_id_private *id_priv;
385
386         id_priv = kzalloc(sizeof *id_priv, GFP_KERNEL);
387         if (!id_priv)
388                 return ERR_PTR(-ENOMEM);
389
390         id_priv->state = CMA_IDLE;
391         id_priv->id.context = context;
392         id_priv->id.event_handler = event_handler;
393         id_priv->id.ps = ps;
394         spin_lock_init(&id_priv->lock);
395         mutex_init(&id_priv->qp_mutex);
396         init_completion(&id_priv->comp);
397         atomic_set(&id_priv->refcount, 1);
398         init_waitqueue_head(&id_priv->wait_remove);
399         atomic_set(&id_priv->dev_remove, 0);
400         INIT_LIST_HEAD(&id_priv->listen_list);
401         INIT_LIST_HEAD(&id_priv->mc_list);
402         get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num);
403
404         return &id_priv->id;
405 }
406 EXPORT_SYMBOL(rdma_create_id);
407
408 static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
409 {
410         struct ib_qp_attr qp_attr;
411         int qp_attr_mask, ret;
412
413         qp_attr.qp_state = IB_QPS_INIT;
414         ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
415         if (ret)
416                 return ret;
417
418         ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask);
419         if (ret)
420                 return ret;
421
422         qp_attr.qp_state = IB_QPS_RTR;
423         ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE);
424         if (ret)
425                 return ret;
426
427         qp_attr.qp_state = IB_QPS_RTS;
428         qp_attr.sq_psn = 0;
429         ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_SQ_PSN);
430
431         return ret;
432 }
433
434 static int cma_init_conn_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
435 {
436         struct ib_qp_attr qp_attr;
437         int qp_attr_mask, ret;
438
439         qp_attr.qp_state = IB_QPS_INIT;
440         ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
441         if (ret)
442                 return ret;
443
444         return ib_modify_qp(qp, &qp_attr, qp_attr_mask);
445 }
446
447 int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd,
448                    struct ib_qp_init_attr *qp_init_attr)
449 {
450         struct rdma_id_private *id_priv;
451         struct ib_qp *qp;
452         int ret;
453
454         id_priv = container_of(id, struct rdma_id_private, id);
455         if (id->device != pd->device)
456                 return -EINVAL;
457
458         qp = ib_create_qp(pd, qp_init_attr);
459         if (IS_ERR(qp))
460                 return PTR_ERR(qp);
461
462         if (cma_is_ud_ps(id_priv->id.ps))
463                 ret = cma_init_ud_qp(id_priv, qp);
464         else
465                 ret = cma_init_conn_qp(id_priv, qp);
466         if (ret)
467                 goto err;
468
469         id->qp = qp;
470         id_priv->qp_num = qp->qp_num;
471         id_priv->srq = (qp->srq != NULL);
472         return 0;
473 err:
474         ib_destroy_qp(qp);
475         return ret;
476 }
477 EXPORT_SYMBOL(rdma_create_qp);
478
479 void rdma_destroy_qp(struct rdma_cm_id *id)
480 {
481         struct rdma_id_private *id_priv;
482
483         id_priv = container_of(id, struct rdma_id_private, id);
484         mutex_lock(&id_priv->qp_mutex);
485         ib_destroy_qp(id_priv->id.qp);
486         id_priv->id.qp = NULL;
487         mutex_unlock(&id_priv->qp_mutex);
488 }
489 EXPORT_SYMBOL(rdma_destroy_qp);
490
491 static int cma_modify_qp_rtr(struct rdma_id_private *id_priv,
492                              struct rdma_conn_param *conn_param)
493 {
494         struct ib_qp_attr qp_attr;
495         int qp_attr_mask, ret;
496
497         mutex_lock(&id_priv->qp_mutex);
498         if (!id_priv->id.qp) {
499                 ret = 0;
500                 goto out;
501         }
502
503         /* Need to update QP attributes from default values. */
504         qp_attr.qp_state = IB_QPS_INIT;
505         ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
506         if (ret)
507                 goto out;
508
509         ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
510         if (ret)
511                 goto out;
512
513         qp_attr.qp_state = IB_QPS_RTR;
514         ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
515         if (ret)
516                 goto out;
517
518         if (conn_param)
519                 qp_attr.max_dest_rd_atomic = conn_param->responder_resources;
520         ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
521 out:
522         mutex_unlock(&id_priv->qp_mutex);
523         return ret;
524 }
525
526 static int cma_modify_qp_rts(struct rdma_id_private *id_priv,
527                              struct rdma_conn_param *conn_param)
528 {
529         struct ib_qp_attr qp_attr;
530         int qp_attr_mask, ret;
531
532         mutex_lock(&id_priv->qp_mutex);
533         if (!id_priv->id.qp) {
534                 ret = 0;
535                 goto out;
536         }
537
538         qp_attr.qp_state = IB_QPS_RTS;
539         ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
540         if (ret)
541                 goto out;
542
543         if (conn_param)
544                 qp_attr.max_rd_atomic = conn_param->initiator_depth;
545         ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
546 out:
547         mutex_unlock(&id_priv->qp_mutex);
548         return ret;
549 }
550
551 static int cma_modify_qp_err(struct rdma_id_private *id_priv)
552 {
553         struct ib_qp_attr qp_attr;
554         int ret;
555
556         mutex_lock(&id_priv->qp_mutex);
557         if (!id_priv->id.qp) {
558                 ret = 0;
559                 goto out;
560         }
561
562         qp_attr.qp_state = IB_QPS_ERR;
563         ret = ib_modify_qp(id_priv->id.qp, &qp_attr, IB_QP_STATE);
564 out:
565         mutex_unlock(&id_priv->qp_mutex);
566         return ret;
567 }
568
569 static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv,
570                                struct ib_qp_attr *qp_attr, int *qp_attr_mask)
571 {
572         struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
573         int ret;
574
575         ret = ib_find_cached_pkey(id_priv->id.device, id_priv->id.port_num,
576                                   ib_addr_get_pkey(dev_addr),
577                                   &qp_attr->pkey_index);
578         if (ret)
579                 return ret;
580
581         qp_attr->port_num = id_priv->id.port_num;
582         *qp_attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT;
583
584         if (cma_is_ud_ps(id_priv->id.ps)) {
585                 qp_attr->qkey = id_priv->qkey;
586                 *qp_attr_mask |= IB_QP_QKEY;
587         } else {
588                 qp_attr->qp_access_flags = 0;
589                 *qp_attr_mask |= IB_QP_ACCESS_FLAGS;
590         }
591         return 0;
592 }
593
594 int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
595                        int *qp_attr_mask)
596 {
597         struct rdma_id_private *id_priv;
598         int ret = 0;
599
600         id_priv = container_of(id, struct rdma_id_private, id);
601         switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
602         case RDMA_TRANSPORT_IB:
603                 if (!id_priv->cm_id.ib || cma_is_ud_ps(id_priv->id.ps))
604                         ret = cma_ib_init_qp_attr(id_priv, qp_attr, qp_attr_mask);
605                 else
606                         ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr,
607                                                  qp_attr_mask);
608                 if (qp_attr->qp_state == IB_QPS_RTR)
609                         qp_attr->rq_psn = id_priv->seq_num;
610                 break;
611         case RDMA_TRANSPORT_IWARP:
612                 if (!id_priv->cm_id.iw) {
613                         qp_attr->qp_access_flags = 0;
614                         *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS;
615                 } else
616                         ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr,
617                                                  qp_attr_mask);
618                 break;
619         default:
620                 ret = -ENOSYS;
621                 break;
622         }
623
624         return ret;
625 }
626 EXPORT_SYMBOL(rdma_init_qp_attr);
627
628 static inline int cma_zero_addr(struct sockaddr *addr)
629 {
630         struct in6_addr *ip6;
631
632         if (addr->sa_family == AF_INET)
633                 return ZERONET(((struct sockaddr_in *) addr)->sin_addr.s_addr);
634         else {
635                 ip6 = &((struct sockaddr_in6 *) addr)->sin6_addr;
636                 return (ip6->s6_addr32[0] | ip6->s6_addr32[1] |
637                         ip6->s6_addr32[2] | ip6->s6_addr32[3]) == 0;
638         }
639 }
640
641 static inline int cma_loopback_addr(struct sockaddr *addr)
642 {
643         return LOOPBACK(((struct sockaddr_in *) addr)->sin_addr.s_addr);
644 }
645
646 static inline int cma_any_addr(struct sockaddr *addr)
647 {
648         return cma_zero_addr(addr) || cma_loopback_addr(addr);
649 }
650
651 static inline __be16 cma_port(struct sockaddr *addr)
652 {
653         if (addr->sa_family == AF_INET)
654                 return ((struct sockaddr_in *) addr)->sin_port;
655         else
656                 return ((struct sockaddr_in6 *) addr)->sin6_port;
657 }
658
659 static inline int cma_any_port(struct sockaddr *addr)
660 {
661         return !cma_port(addr);
662 }
663
664 static int cma_get_net_info(void *hdr, enum rdma_port_space ps,
665                             u8 *ip_ver, __u16 *port,
666                             union cma_ip_addr **src, union cma_ip_addr **dst)
667 {
668         switch (ps) {
669         case RDMA_PS_SDP:
670                 if (sdp_get_majv(((struct sdp_hh *) hdr)->sdp_version) !=
671                     SDP_MAJ_VERSION)
672                         return -EINVAL;
673
674                 *ip_ver = sdp_get_ip_ver(hdr);
675                 *port   = ((struct sdp_hh *) hdr)->port;
676                 *src    = &((struct sdp_hh *) hdr)->src_addr;
677                 *dst    = &((struct sdp_hh *) hdr)->dst_addr;
678                 break;
679         default:
680                 if (((struct cma_hdr *) hdr)->cma_version != CMA_VERSION)
681                         return -EINVAL;
682
683                 *ip_ver = cma_get_ip_ver(hdr);
684                 *port   = ((struct cma_hdr *) hdr)->port;
685                 *src    = &((struct cma_hdr *) hdr)->src_addr;
686                 *dst    = &((struct cma_hdr *) hdr)->dst_addr;
687                 break;
688         }
689
690         if (*ip_ver != 4 && *ip_ver != 6)
691                 return -EINVAL;
692         return 0;
693 }
694
695 static void cma_save_net_info(struct rdma_addr *addr,
696                               struct rdma_addr *listen_addr,
697                               u8 ip_ver, __u16 port,
698                               union cma_ip_addr *src, union cma_ip_addr *dst)
699 {
700         struct sockaddr_in *listen4, *ip4;
701         struct sockaddr_in6 *listen6, *ip6;
702
703         switch (ip_ver) {
704         case 4:
705                 listen4 = (struct sockaddr_in *) &listen_addr->src_addr;
706                 ip4 = (struct sockaddr_in *) &addr->src_addr;
707                 ip4->sin_family = listen4->sin_family;
708                 ip4->sin_addr.s_addr = dst->ip4.addr;
709                 ip4->sin_port = listen4->sin_port;
710
711                 ip4 = (struct sockaddr_in *) &addr->dst_addr;
712                 ip4->sin_family = listen4->sin_family;
713                 ip4->sin_addr.s_addr = src->ip4.addr;
714                 ip4->sin_port = port;
715                 break;
716         case 6:
717                 listen6 = (struct sockaddr_in6 *) &listen_addr->src_addr;
718                 ip6 = (struct sockaddr_in6 *) &addr->src_addr;
719                 ip6->sin6_family = listen6->sin6_family;
720                 ip6->sin6_addr = dst->ip6;
721                 ip6->sin6_port = listen6->sin6_port;
722
723                 ip6 = (struct sockaddr_in6 *) &addr->dst_addr;
724                 ip6->sin6_family = listen6->sin6_family;
725                 ip6->sin6_addr = src->ip6;
726                 ip6->sin6_port = port;
727                 break;
728         default:
729                 break;
730         }
731 }
732
733 static inline int cma_user_data_offset(enum rdma_port_space ps)
734 {
735         switch (ps) {
736         case RDMA_PS_SDP:
737                 return 0;
738         default:
739                 return sizeof(struct cma_hdr);
740         }
741 }
742
743 static void cma_cancel_route(struct rdma_id_private *id_priv)
744 {
745         switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
746         case RDMA_TRANSPORT_IB:
747                 if (id_priv->query)
748                         ib_sa_cancel_query(id_priv->query_id, id_priv->query);
749                 break;
750         default:
751                 break;
752         }
753 }
754
755 static void cma_cancel_listens(struct rdma_id_private *id_priv)
756 {
757         struct rdma_id_private *dev_id_priv;
758
759         /*
760          * Remove from listen_any_list to prevent added devices from spawning
761          * additional listen requests.
762          */
763         mutex_lock(&lock);
764         list_del(&id_priv->list);
765
766         while (!list_empty(&id_priv->listen_list)) {
767                 dev_id_priv = list_entry(id_priv->listen_list.next,
768                                          struct rdma_id_private, listen_list);
769                 /* sync with device removal to avoid duplicate destruction */
770                 list_del_init(&dev_id_priv->list);
771                 list_del(&dev_id_priv->listen_list);
772                 mutex_unlock(&lock);
773
774                 rdma_destroy_id(&dev_id_priv->id);
775                 mutex_lock(&lock);
776         }
777         mutex_unlock(&lock);
778 }
779
780 static void cma_cancel_operation(struct rdma_id_private *id_priv,
781                                  enum cma_state state)
782 {
783         switch (state) {
784         case CMA_ADDR_QUERY:
785                 rdma_addr_cancel(&id_priv->id.route.addr.dev_addr);
786                 break;
787         case CMA_ROUTE_QUERY:
788                 cma_cancel_route(id_priv);
789                 break;
790         case CMA_LISTEN:
791                 if (cma_any_addr(&id_priv->id.route.addr.src_addr) &&
792                     !id_priv->cma_dev)
793                         cma_cancel_listens(id_priv);
794                 break;
795         default:
796                 break;
797         }
798 }
799
800 static void cma_release_port(struct rdma_id_private *id_priv)
801 {
802         struct rdma_bind_list *bind_list = id_priv->bind_list;
803
804         if (!bind_list)
805                 return;
806
807         mutex_lock(&lock);
808         hlist_del(&id_priv->node);
809         if (hlist_empty(&bind_list->owners)) {
810                 idr_remove(bind_list->ps, bind_list->port);
811                 kfree(bind_list);
812         }
813         mutex_unlock(&lock);
814 }
815
816 static void cma_leave_mc_groups(struct rdma_id_private *id_priv)
817 {
818         struct cma_multicast *mc;
819
820         while (!list_empty(&id_priv->mc_list)) {
821                 mc = container_of(id_priv->mc_list.next,
822                                   struct cma_multicast, list);
823                 list_del(&mc->list);
824                 ib_sa_free_multicast(mc->multicast.ib);
825                 kfree(mc);
826         }
827 }
828
829 void rdma_destroy_id(struct rdma_cm_id *id)
830 {
831         struct rdma_id_private *id_priv;
832         enum cma_state state;
833
834         id_priv = container_of(id, struct rdma_id_private, id);
835         state = cma_exch(id_priv, CMA_DESTROYING);
836         cma_cancel_operation(id_priv, state);
837
838         mutex_lock(&lock);
839         if (id_priv->cma_dev) {
840                 mutex_unlock(&lock);
841                 switch (rdma_node_get_transport(id->device->node_type)) {
842                 case RDMA_TRANSPORT_IB:
843                         if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
844                                 ib_destroy_cm_id(id_priv->cm_id.ib);
845                         break;
846                 case RDMA_TRANSPORT_IWARP:
847                         if (id_priv->cm_id.iw && !IS_ERR(id_priv->cm_id.iw))
848                                 iw_destroy_cm_id(id_priv->cm_id.iw);
849                         break;
850                 default:
851                         break;
852                 }
853                 cma_leave_mc_groups(id_priv);
854                 mutex_lock(&lock);
855                 cma_detach_from_dev(id_priv);
856         }
857         mutex_unlock(&lock);
858
859         cma_release_port(id_priv);
860         cma_deref_id(id_priv);
861         wait_for_completion(&id_priv->comp);
862
863         if (id_priv->internal_id)
864                 cma_deref_id(id_priv->id.context);
865
866         kfree(id_priv->id.route.path_rec);
867         kfree(id_priv);
868 }
869 EXPORT_SYMBOL(rdma_destroy_id);
870
871 static int cma_rep_recv(struct rdma_id_private *id_priv)
872 {
873         int ret;
874
875         ret = cma_modify_qp_rtr(id_priv, NULL);
876         if (ret)
877                 goto reject;
878
879         ret = cma_modify_qp_rts(id_priv, NULL);
880         if (ret)
881                 goto reject;
882
883         ret = ib_send_cm_rtu(id_priv->cm_id.ib, NULL, 0);
884         if (ret)
885                 goto reject;
886
887         return 0;
888 reject:
889         cma_modify_qp_err(id_priv);
890         ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED,
891                        NULL, 0, NULL, 0);
892         return ret;
893 }
894
895 static int cma_verify_rep(struct rdma_id_private *id_priv, void *data)
896 {
897         if (id_priv->id.ps == RDMA_PS_SDP &&
898             sdp_get_majv(((struct sdp_hah *) data)->sdp_version) !=
899             SDP_MAJ_VERSION)
900                 return -EINVAL;
901
902         return 0;
903 }
904
905 static void cma_set_rep_event_data(struct rdma_cm_event *event,
906                                    struct ib_cm_rep_event_param *rep_data,
907                                    void *private_data)
908 {
909         event->param.conn.private_data = private_data;
910         event->param.conn.private_data_len = IB_CM_REP_PRIVATE_DATA_SIZE;
911         event->param.conn.responder_resources = rep_data->responder_resources;
912         event->param.conn.initiator_depth = rep_data->initiator_depth;
913         event->param.conn.flow_control = rep_data->flow_control;
914         event->param.conn.rnr_retry_count = rep_data->rnr_retry_count;
915         event->param.conn.srq = rep_data->srq;
916         event->param.conn.qp_num = rep_data->remote_qpn;
917 }
918
919 static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
920 {
921         struct rdma_id_private *id_priv = cm_id->context;
922         struct rdma_cm_event event;
923         int ret = 0;
924
925         if (cma_disable_remove(id_priv, CMA_CONNECT))
926                 return 0;
927
928         memset(&event, 0, sizeof event);
929         switch (ib_event->event) {
930         case IB_CM_REQ_ERROR:
931         case IB_CM_REP_ERROR:
932                 event.event = RDMA_CM_EVENT_UNREACHABLE;
933                 event.status = -ETIMEDOUT;
934                 break;
935         case IB_CM_REP_RECEIVED:
936                 event.status = cma_verify_rep(id_priv, ib_event->private_data);
937                 if (event.status)
938                         event.event = RDMA_CM_EVENT_CONNECT_ERROR;
939                 else if (id_priv->id.qp && id_priv->id.ps != RDMA_PS_SDP) {
940                         event.status = cma_rep_recv(id_priv);
941                         event.event = event.status ? RDMA_CM_EVENT_CONNECT_ERROR :
942                                                      RDMA_CM_EVENT_ESTABLISHED;
943                 } else
944                         event.event = RDMA_CM_EVENT_CONNECT_RESPONSE;
945                 cma_set_rep_event_data(&event, &ib_event->param.rep_rcvd,
946                                        ib_event->private_data);
947                 break;
948         case IB_CM_RTU_RECEIVED:
949         case IB_CM_USER_ESTABLISHED:
950                 event.event = RDMA_CM_EVENT_ESTABLISHED;
951                 break;
952         case IB_CM_DREQ_ERROR:
953                 event.status = -ETIMEDOUT; /* fall through */
954         case IB_CM_DREQ_RECEIVED:
955         case IB_CM_DREP_RECEIVED:
956                 if (!cma_comp_exch(id_priv, CMA_CONNECT, CMA_DISCONNECT))
957                         goto out;
958                 event.event = RDMA_CM_EVENT_DISCONNECTED;
959                 break;
960         case IB_CM_TIMEWAIT_EXIT:
961         case IB_CM_MRA_RECEIVED:
962                 /* ignore event */
963                 goto out;
964         case IB_CM_REJ_RECEIVED:
965                 cma_modify_qp_err(id_priv);
966                 event.status = ib_event->param.rej_rcvd.reason;
967                 event.event = RDMA_CM_EVENT_REJECTED;
968                 event.param.conn.private_data = ib_event->private_data;
969                 event.param.conn.private_data_len = IB_CM_REJ_PRIVATE_DATA_SIZE;
970                 break;
971         default:
972                 printk(KERN_ERR "RDMA CMA: unexpected IB CM event: %d",
973                        ib_event->event);
974                 goto out;
975         }
976
977         ret = id_priv->id.event_handler(&id_priv->id, &event);
978         if (ret) {
979                 /* Destroy the CM ID by returning a non-zero value. */
980                 id_priv->cm_id.ib = NULL;
981                 cma_exch(id_priv, CMA_DESTROYING);
982                 cma_enable_remove(id_priv);
983                 rdma_destroy_id(&id_priv->id);
984                 return ret;
985         }
986 out:
987         cma_enable_remove(id_priv);
988         return ret;
989 }
990
991 static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
992                                                struct ib_cm_event *ib_event)
993 {
994         struct rdma_id_private *id_priv;
995         struct rdma_cm_id *id;
996         struct rdma_route *rt;
997         union cma_ip_addr *src, *dst;
998         __u16 port;
999         u8 ip_ver;
1000
1001         if (cma_get_net_info(ib_event->private_data, listen_id->ps,
1002                              &ip_ver, &port, &src, &dst))
1003                 goto err;
1004
1005         id = rdma_create_id(listen_id->event_handler, listen_id->context,
1006                             listen_id->ps);
1007         if (IS_ERR(id))
1008                 goto err;
1009
1010         cma_save_net_info(&id->route.addr, &listen_id->route.addr,
1011                           ip_ver, port, src, dst);
1012
1013         rt = &id->route;
1014         rt->num_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1;
1015         rt->path_rec = kmalloc(sizeof *rt->path_rec * rt->num_paths,
1016                                GFP_KERNEL);
1017         if (!rt->path_rec)
1018                 goto destroy_id;
1019
1020         rt->path_rec[0] = *ib_event->param.req_rcvd.primary_path;
1021         if (rt->num_paths == 2)
1022                 rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path;
1023
1024         ib_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid);
1025         ib_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid);
1026         ib_addr_set_pkey(&rt->addr.dev_addr, be16_to_cpu(rt->path_rec[0].pkey));
1027         rt->addr.dev_addr.dev_type = RDMA_NODE_IB_CA;
1028
1029         id_priv = container_of(id, struct rdma_id_private, id);
1030         id_priv->state = CMA_CONNECT;
1031         return id_priv;
1032
1033 destroy_id:
1034         rdma_destroy_id(id);
1035 err:
1036         return NULL;
1037 }
1038
1039 static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id,
1040                                               struct ib_cm_event *ib_event)
1041 {
1042         struct rdma_id_private *id_priv;
1043         struct rdma_cm_id *id;
1044         union cma_ip_addr *src, *dst;
1045         __u16 port;
1046         u8 ip_ver;
1047         int ret;
1048
1049         id = rdma_create_id(listen_id->event_handler, listen_id->context,
1050                             listen_id->ps);
1051         if (IS_ERR(id))
1052                 return NULL;
1053
1054
1055         if (cma_get_net_info(ib_event->private_data, listen_id->ps,
1056                              &ip_ver, &port, &src, &dst))
1057                 goto err;
1058
1059         cma_save_net_info(&id->route.addr, &listen_id->route.addr,
1060                           ip_ver, port, src, dst);
1061
1062         ret = rdma_translate_ip(&id->route.addr.src_addr,
1063                                 &id->route.addr.dev_addr);
1064         if (ret)
1065                 goto err;
1066
1067         id_priv = container_of(id, struct rdma_id_private, id);
1068         id_priv->state = CMA_CONNECT;
1069         return id_priv;
1070 err:
1071         rdma_destroy_id(id);
1072         return NULL;
1073 }
1074
1075 static void cma_set_req_event_data(struct rdma_cm_event *event,
1076                                    struct ib_cm_req_event_param *req_data,
1077                                    void *private_data, int offset)
1078 {
1079         event->param.conn.private_data = private_data + offset;
1080         event->param.conn.private_data_len = IB_CM_REQ_PRIVATE_DATA_SIZE - offset;
1081         event->param.conn.responder_resources = req_data->responder_resources;
1082         event->param.conn.initiator_depth = req_data->initiator_depth;
1083         event->param.conn.flow_control = req_data->flow_control;
1084         event->param.conn.retry_count = req_data->retry_count;
1085         event->param.conn.rnr_retry_count = req_data->rnr_retry_count;
1086         event->param.conn.srq = req_data->srq;
1087         event->param.conn.qp_num = req_data->remote_qpn;
1088 }
1089
1090 static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
1091 {
1092         struct rdma_id_private *listen_id, *conn_id;
1093         struct rdma_cm_event event;
1094         int offset, ret;
1095
1096         listen_id = cm_id->context;
1097         if (cma_disable_remove(listen_id, CMA_LISTEN))
1098                 return -ECONNABORTED;
1099
1100         memset(&event, 0, sizeof event);
1101         offset = cma_user_data_offset(listen_id->id.ps);
1102         event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
1103         if (cma_is_ud_ps(listen_id->id.ps)) {
1104                 conn_id = cma_new_udp_id(&listen_id->id, ib_event);
1105                 event.param.ud.private_data = ib_event->private_data + offset;
1106                 event.param.ud.private_data_len =
1107                                 IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE - offset;
1108         } else {
1109                 ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
1110                 conn_id = cma_new_conn_id(&listen_id->id, ib_event);
1111                 cma_set_req_event_data(&event, &ib_event->param.req_rcvd,
1112                                        ib_event->private_data, offset);
1113         }
1114         if (!conn_id) {
1115                 ret = -ENOMEM;
1116                 goto out;
1117         }
1118
1119         atomic_inc(&conn_id->dev_remove);
1120         mutex_lock(&lock);
1121         ret = cma_acquire_dev(conn_id);
1122         mutex_unlock(&lock);
1123         if (ret)
1124                 goto release_conn_id;
1125
1126         conn_id->cm_id.ib = cm_id;
1127         cm_id->context = conn_id;
1128         cm_id->cm_handler = cma_ib_handler;
1129
1130         ret = conn_id->id.event_handler(&conn_id->id, &event);
1131         if (!ret) {
1132                 cma_enable_remove(conn_id);
1133                 goto out;
1134         }
1135
1136         /* Destroy the CM ID by returning a non-zero value. */
1137         conn_id->cm_id.ib = NULL;
1138
1139 release_conn_id:
1140         cma_exch(conn_id, CMA_DESTROYING);
1141         cma_enable_remove(conn_id);
1142         rdma_destroy_id(&conn_id->id);
1143
1144 out:
1145         cma_enable_remove(listen_id);
1146         return ret;
1147 }
1148
1149 static __be64 cma_get_service_id(enum rdma_port_space ps, struct sockaddr *addr)
1150 {
1151         return cpu_to_be64(((u64)ps << 16) + be16_to_cpu(cma_port(addr)));
1152 }
1153
1154 static void cma_set_compare_data(enum rdma_port_space ps, struct sockaddr *addr,
1155                                  struct ib_cm_compare_data *compare)
1156 {
1157         struct cma_hdr *cma_data, *cma_mask;
1158         struct sdp_hh *sdp_data, *sdp_mask;
1159         __u32 ip4_addr;
1160         struct in6_addr ip6_addr;
1161
1162         memset(compare, 0, sizeof *compare);
1163         cma_data = (void *) compare->data;
1164         cma_mask = (void *) compare->mask;
1165         sdp_data = (void *) compare->data;
1166         sdp_mask = (void *) compare->mask;
1167
1168         switch (addr->sa_family) {
1169         case AF_INET:
1170                 ip4_addr = ((struct sockaddr_in *) addr)->sin_addr.s_addr;
1171                 if (ps == RDMA_PS_SDP) {
1172                         sdp_set_ip_ver(sdp_data, 4);
1173                         sdp_set_ip_ver(sdp_mask, 0xF);
1174                         sdp_data->dst_addr.ip4.addr = ip4_addr;
1175                         sdp_mask->dst_addr.ip4.addr = ~0;
1176                 } else {
1177                         cma_set_ip_ver(cma_data, 4);
1178                         cma_set_ip_ver(cma_mask, 0xF);
1179                         cma_data->dst_addr.ip4.addr = ip4_addr;
1180                         cma_mask->dst_addr.ip4.addr = ~0;
1181                 }
1182                 break;
1183         case AF_INET6:
1184                 ip6_addr = ((struct sockaddr_in6 *) addr)->sin6_addr;
1185                 if (ps == RDMA_PS_SDP) {
1186                         sdp_set_ip_ver(sdp_data, 6);
1187                         sdp_set_ip_ver(sdp_mask, 0xF);
1188                         sdp_data->dst_addr.ip6 = ip6_addr;
1189                         memset(&sdp_mask->dst_addr.ip6, 0xFF,
1190                                sizeof sdp_mask->dst_addr.ip6);
1191                 } else {
1192                         cma_set_ip_ver(cma_data, 6);
1193                         cma_set_ip_ver(cma_mask, 0xF);
1194                         cma_data->dst_addr.ip6 = ip6_addr;
1195                         memset(&cma_mask->dst_addr.ip6, 0xFF,
1196                                sizeof cma_mask->dst_addr.ip6);
1197                 }
1198                 break;
1199         default:
1200                 break;
1201         }
1202 }
1203
1204 static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
1205 {
1206         struct rdma_id_private *id_priv = iw_id->context;
1207         struct rdma_cm_event event;
1208         struct sockaddr_in *sin;
1209         int ret = 0;
1210
1211         if (cma_disable_remove(id_priv, CMA_CONNECT))
1212                 return 0;
1213
1214         memset(&event, 0, sizeof event);
1215         switch (iw_event->event) {
1216         case IW_CM_EVENT_CLOSE:
1217                 event.event = RDMA_CM_EVENT_DISCONNECTED;
1218                 break;
1219         case IW_CM_EVENT_CONNECT_REPLY:
1220                 sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
1221                 *sin = iw_event->local_addr;
1222                 sin = (struct sockaddr_in *) &id_priv->id.route.addr.dst_addr;
1223                 *sin = iw_event->remote_addr;
1224                 switch (iw_event->status) {
1225                 case 0:
1226                         event.event = RDMA_CM_EVENT_ESTABLISHED;
1227                         break;
1228                 case -ECONNRESET:
1229                 case -ECONNREFUSED:
1230                         event.event = RDMA_CM_EVENT_REJECTED;
1231                         break;
1232                 case -ETIMEDOUT:
1233                         event.event = RDMA_CM_EVENT_UNREACHABLE;
1234                         break;
1235                 default:
1236                         event.event = RDMA_CM_EVENT_CONNECT_ERROR;
1237                         break;
1238                 }
1239                 break;
1240         case IW_CM_EVENT_ESTABLISHED:
1241                 event.event = RDMA_CM_EVENT_ESTABLISHED;
1242                 break;
1243         default:
1244                 BUG_ON(1);
1245         }
1246
1247         event.status = iw_event->status;
1248         event.param.conn.private_data = iw_event->private_data;
1249         event.param.conn.private_data_len = iw_event->private_data_len;
1250         ret = id_priv->id.event_handler(&id_priv->id, &event);
1251         if (ret) {
1252                 /* Destroy the CM ID by returning a non-zero value. */
1253                 id_priv->cm_id.iw = NULL;
1254                 cma_exch(id_priv, CMA_DESTROYING);
1255                 cma_enable_remove(id_priv);
1256                 rdma_destroy_id(&id_priv->id);
1257                 return ret;
1258         }
1259
1260         cma_enable_remove(id_priv);
1261         return ret;
1262 }
1263
1264 static int iw_conn_req_handler(struct iw_cm_id *cm_id,
1265                                struct iw_cm_event *iw_event)
1266 {
1267         struct rdma_cm_id *new_cm_id;
1268         struct rdma_id_private *listen_id, *conn_id;
1269         struct sockaddr_in *sin;
1270         struct net_device *dev = NULL;
1271         struct rdma_cm_event event;
1272         int ret;
1273         struct ib_device_attr attr;
1274
1275         listen_id = cm_id->context;
1276         if (cma_disable_remove(listen_id, CMA_LISTEN))
1277                 return -ECONNABORTED;
1278
1279         /* Create a new RDMA id for the new IW CM ID */
1280         new_cm_id = rdma_create_id(listen_id->id.event_handler,
1281                                    listen_id->id.context,
1282                                    RDMA_PS_TCP);
1283         if (!new_cm_id) {
1284                 ret = -ENOMEM;
1285                 goto out;
1286         }
1287         conn_id = container_of(new_cm_id, struct rdma_id_private, id);
1288         atomic_inc(&conn_id->dev_remove);
1289         conn_id->state = CMA_CONNECT;
1290
1291         dev = ip_dev_find(iw_event->local_addr.sin_addr.s_addr);
1292         if (!dev) {
1293                 ret = -EADDRNOTAVAIL;
1294                 cma_enable_remove(conn_id);
1295                 rdma_destroy_id(new_cm_id);
1296                 goto out;
1297         }
1298         ret = rdma_copy_addr(&conn_id->id.route.addr.dev_addr, dev, NULL);
1299         if (ret) {
1300                 cma_enable_remove(conn_id);
1301                 rdma_destroy_id(new_cm_id);
1302                 goto out;
1303         }
1304
1305         mutex_lock(&lock);
1306         ret = cma_acquire_dev(conn_id);
1307         mutex_unlock(&lock);
1308         if (ret) {
1309                 cma_enable_remove(conn_id);
1310                 rdma_destroy_id(new_cm_id);
1311                 goto out;
1312         }
1313
1314         conn_id->cm_id.iw = cm_id;
1315         cm_id->context = conn_id;
1316         cm_id->cm_handler = cma_iw_handler;
1317
1318         sin = (struct sockaddr_in *) &new_cm_id->route.addr.src_addr;
1319         *sin = iw_event->local_addr;
1320         sin = (struct sockaddr_in *) &new_cm_id->route.addr.dst_addr;
1321         *sin = iw_event->remote_addr;
1322
1323         ret = ib_query_device(conn_id->id.device, &attr);
1324         if (ret) {
1325                 cma_enable_remove(conn_id);
1326                 rdma_destroy_id(new_cm_id);
1327                 goto out;
1328         }
1329
1330         memset(&event, 0, sizeof event);
1331         event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
1332         event.param.conn.private_data = iw_event->private_data;
1333         event.param.conn.private_data_len = iw_event->private_data_len;
1334         event.param.conn.initiator_depth = attr.max_qp_init_rd_atom;
1335         event.param.conn.responder_resources = attr.max_qp_rd_atom;
1336         ret = conn_id->id.event_handler(&conn_id->id, &event);
1337         if (ret) {
1338                 /* User wants to destroy the CM ID */
1339                 conn_id->cm_id.iw = NULL;
1340                 cma_exch(conn_id, CMA_DESTROYING);
1341                 cma_enable_remove(conn_id);
1342                 rdma_destroy_id(&conn_id->id);
1343         }
1344
1345 out:
1346         if (dev)
1347                 dev_put(dev);
1348         cma_enable_remove(listen_id);
1349         return ret;
1350 }
1351
1352 static int cma_ib_listen(struct rdma_id_private *id_priv)
1353 {
1354         struct ib_cm_compare_data compare_data;
1355         struct sockaddr *addr;
1356         __be64 svc_id;
1357         int ret;
1358
1359         id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device, cma_req_handler,
1360                                             id_priv);
1361         if (IS_ERR(id_priv->cm_id.ib))
1362                 return PTR_ERR(id_priv->cm_id.ib);
1363
1364         addr = &id_priv->id.route.addr.src_addr;
1365         svc_id = cma_get_service_id(id_priv->id.ps, addr);
1366         if (cma_any_addr(addr))
1367                 ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, NULL);
1368         else {
1369                 cma_set_compare_data(id_priv->id.ps, addr, &compare_data);
1370                 ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, &compare_data);
1371         }
1372
1373         if (ret) {
1374                 ib_destroy_cm_id(id_priv->cm_id.ib);
1375                 id_priv->cm_id.ib = NULL;
1376         }
1377
1378         return ret;
1379 }
1380
1381 static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog)
1382 {
1383         int ret;
1384         struct sockaddr_in *sin;
1385
1386         id_priv->cm_id.iw = iw_create_cm_id(id_priv->id.device,
1387                                             iw_conn_req_handler,
1388                                             id_priv);
1389         if (IS_ERR(id_priv->cm_id.iw))
1390                 return PTR_ERR(id_priv->cm_id.iw);
1391
1392         sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
1393         id_priv->cm_id.iw->local_addr = *sin;
1394
1395         ret = iw_cm_listen(id_priv->cm_id.iw, backlog);
1396
1397         if (ret) {
1398                 iw_destroy_cm_id(id_priv->cm_id.iw);
1399                 id_priv->cm_id.iw = NULL;
1400         }
1401
1402         return ret;
1403 }
1404
1405 static int cma_listen_handler(struct rdma_cm_id *id,
1406                               struct rdma_cm_event *event)
1407 {
1408         struct rdma_id_private *id_priv = id->context;
1409
1410         id->context = id_priv->id.context;
1411         id->event_handler = id_priv->id.event_handler;
1412         return id_priv->id.event_handler(id, event);
1413 }
1414
1415 static void cma_listen_on_dev(struct rdma_id_private *id_priv,
1416                               struct cma_device *cma_dev)
1417 {
1418         struct rdma_id_private *dev_id_priv;
1419         struct rdma_cm_id *id;
1420         int ret;
1421
1422         id = rdma_create_id(cma_listen_handler, id_priv, id_priv->id.ps);
1423         if (IS_ERR(id))
1424                 return;
1425
1426         dev_id_priv = container_of(id, struct rdma_id_private, id);
1427
1428         dev_id_priv->state = CMA_ADDR_BOUND;
1429         memcpy(&id->route.addr.src_addr, &id_priv->id.route.addr.src_addr,
1430                ip_addr_size(&id_priv->id.route.addr.src_addr));
1431
1432         cma_attach_to_dev(dev_id_priv, cma_dev);
1433         list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list);
1434         atomic_inc(&id_priv->refcount);
1435         dev_id_priv->internal_id = 1;
1436
1437         ret = rdma_listen(id, id_priv->backlog);
1438         if (ret)
1439                 printk(KERN_WARNING "RDMA CMA: cma_listen_on_dev, error %d, "
1440                        "listening on device %s", ret, cma_dev->device->name);
1441 }
1442
1443 static void cma_listen_on_all(struct rdma_id_private *id_priv)
1444 {
1445         struct cma_device *cma_dev;
1446
1447         mutex_lock(&lock);
1448         list_add_tail(&id_priv->list, &listen_any_list);
1449         list_for_each_entry(cma_dev, &dev_list, list)
1450                 cma_listen_on_dev(id_priv, cma_dev);
1451         mutex_unlock(&lock);
1452 }
1453
1454 static int cma_bind_any(struct rdma_cm_id *id, sa_family_t af)
1455 {
1456         struct sockaddr_in addr_in;
1457
1458         memset(&addr_in, 0, sizeof addr_in);
1459         addr_in.sin_family = af;
1460         return rdma_bind_addr(id, (struct sockaddr *) &addr_in);
1461 }
1462
1463 int rdma_listen(struct rdma_cm_id *id, int backlog)
1464 {
1465         struct rdma_id_private *id_priv;
1466         int ret;
1467
1468         id_priv = container_of(id, struct rdma_id_private, id);
1469         if (id_priv->state == CMA_IDLE) {
1470                 ret = cma_bind_any(id, AF_INET);
1471                 if (ret)
1472                         return ret;
1473         }
1474
1475         if (!cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_LISTEN))
1476                 return -EINVAL;
1477
1478         id_priv->backlog = backlog;
1479         if (id->device) {
1480                 switch (rdma_node_get_transport(id->device->node_type)) {
1481                 case RDMA_TRANSPORT_IB:
1482                         ret = cma_ib_listen(id_priv);
1483                         if (ret)
1484                                 goto err;
1485                         break;
1486                 case RDMA_TRANSPORT_IWARP:
1487                         ret = cma_iw_listen(id_priv, backlog);
1488                         if (ret)
1489                                 goto err;
1490                         break;
1491                 default:
1492                         ret = -ENOSYS;
1493                         goto err;
1494                 }
1495         } else
1496                 cma_listen_on_all(id_priv);
1497
1498         return 0;
1499 err:
1500         id_priv->backlog = 0;
1501         cma_comp_exch(id_priv, CMA_LISTEN, CMA_ADDR_BOUND);
1502         return ret;
1503 }
1504 EXPORT_SYMBOL(rdma_listen);
1505
1506 void rdma_set_service_type(struct rdma_cm_id *id, int tos)
1507 {
1508         struct rdma_id_private *id_priv;
1509
1510         id_priv = container_of(id, struct rdma_id_private, id);
1511         id_priv->tos = (u8) tos;
1512 }
1513 EXPORT_SYMBOL(rdma_set_service_type);
1514
1515 static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec,
1516                               void *context)
1517 {
1518         struct cma_work *work = context;
1519         struct rdma_route *route;
1520
1521         route = &work->id->id.route;
1522
1523         if (!status) {
1524                 route->num_paths = 1;
1525                 *route->path_rec = *path_rec;
1526         } else {
1527                 work->old_state = CMA_ROUTE_QUERY;
1528                 work->new_state = CMA_ADDR_RESOLVED;
1529                 work->event.event = RDMA_CM_EVENT_ROUTE_ERROR;
1530                 work->event.status = status;
1531         }
1532
1533         queue_work(cma_wq, &work->work);
1534 }
1535
1536 static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms,
1537                               struct cma_work *work)
1538 {
1539         struct rdma_addr *addr = &id_priv->id.route.addr;
1540         struct ib_sa_path_rec path_rec;
1541         ib_sa_comp_mask comp_mask;
1542         struct sockaddr_in6 *sin6;
1543
1544         memset(&path_rec, 0, sizeof path_rec);
1545         ib_addr_get_sgid(&addr->dev_addr, &path_rec.sgid);
1546         ib_addr_get_dgid(&addr->dev_addr, &path_rec.dgid);
1547         path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(&addr->dev_addr));
1548         path_rec.numb_path = 1;
1549         path_rec.reversible = 1;
1550         path_rec.service_id = cma_get_service_id(id_priv->id.ps, &addr->dst_addr);
1551
1552         comp_mask = IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID |
1553                     IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH |
1554                     IB_SA_PATH_REC_REVERSIBLE | IB_SA_PATH_REC_SERVICE_ID;
1555
1556         if (addr->src_addr.sa_family == AF_INET) {
1557                 path_rec.qos_class = cpu_to_be16((u16) id_priv->tos);
1558                 comp_mask |= IB_SA_PATH_REC_QOS_CLASS;
1559         } else {
1560                 sin6 = (struct sockaddr_in6 *) &addr->src_addr;
1561                 path_rec.traffic_class = (u8) (be32_to_cpu(sin6->sin6_flowinfo) >> 20);
1562                 comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS;
1563         }
1564
1565         id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device,
1566                                                id_priv->id.port_num, &path_rec,
1567                                                comp_mask, timeout_ms,
1568                                                GFP_KERNEL, cma_query_handler,
1569                                                work, &id_priv->query);
1570
1571         return (id_priv->query_id < 0) ? id_priv->query_id : 0;
1572 }
1573
1574 static void cma_work_handler(struct work_struct *_work)
1575 {
1576         struct cma_work *work = container_of(_work, struct cma_work, work);
1577         struct rdma_id_private *id_priv = work->id;
1578         int destroy = 0;
1579
1580         atomic_inc(&id_priv->dev_remove);
1581         if (!cma_comp_exch(id_priv, work->old_state, work->new_state))
1582                 goto out;
1583
1584         if (id_priv->id.event_handler(&id_priv->id, &work->event)) {
1585                 cma_exch(id_priv, CMA_DESTROYING);
1586                 destroy = 1;
1587         }
1588 out:
1589         cma_enable_remove(id_priv);
1590         cma_deref_id(id_priv);
1591         if (destroy)
1592                 rdma_destroy_id(&id_priv->id);
1593         kfree(work);
1594 }
1595
1596 static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms)
1597 {
1598         struct rdma_route *route = &id_priv->id.route;
1599         struct cma_work *work;
1600         int ret;
1601
1602         work = kzalloc(sizeof *work, GFP_KERNEL);
1603         if (!work)
1604                 return -ENOMEM;
1605
1606         work->id = id_priv;
1607         INIT_WORK(&work->work, cma_work_handler);
1608         work->old_state = CMA_ROUTE_QUERY;
1609         work->new_state = CMA_ROUTE_RESOLVED;
1610         work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
1611
1612         route->path_rec = kmalloc(sizeof *route->path_rec, GFP_KERNEL);
1613         if (!route->path_rec) {
1614                 ret = -ENOMEM;
1615                 goto err1;
1616         }
1617
1618         ret = cma_query_ib_route(id_priv, timeout_ms, work);
1619         if (ret)
1620                 goto err2;
1621
1622         return 0;
1623 err2:
1624         kfree(route->path_rec);
1625         route->path_rec = NULL;
1626 err1:
1627         kfree(work);
1628         return ret;
1629 }
1630
1631 int rdma_set_ib_paths(struct rdma_cm_id *id,
1632                       struct ib_sa_path_rec *path_rec, int num_paths)
1633 {
1634         struct rdma_id_private *id_priv;
1635         int ret;
1636
1637         id_priv = container_of(id, struct rdma_id_private, id);
1638         if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ROUTE_RESOLVED))
1639                 return -EINVAL;
1640
1641         id->route.path_rec = kmalloc(sizeof *path_rec * num_paths, GFP_KERNEL);
1642         if (!id->route.path_rec) {
1643                 ret = -ENOMEM;
1644                 goto err;
1645         }
1646
1647         memcpy(id->route.path_rec, path_rec, sizeof *path_rec * num_paths);
1648         return 0;
1649 err:
1650         cma_comp_exch(id_priv, CMA_ROUTE_RESOLVED, CMA_ADDR_RESOLVED);
1651         return ret;
1652 }
1653 EXPORT_SYMBOL(rdma_set_ib_paths);
1654
1655 static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms)
1656 {
1657         struct cma_work *work;
1658
1659         work = kzalloc(sizeof *work, GFP_KERNEL);
1660         if (!work)
1661                 return -ENOMEM;
1662
1663         work->id = id_priv;
1664         INIT_WORK(&work->work, cma_work_handler);
1665         work->old_state = CMA_ROUTE_QUERY;
1666         work->new_state = CMA_ROUTE_RESOLVED;
1667         work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
1668         queue_work(cma_wq, &work->work);
1669         return 0;
1670 }
1671
1672 int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
1673 {
1674         struct rdma_id_private *id_priv;
1675         int ret;
1676
1677         id_priv = container_of(id, struct rdma_id_private, id);
1678         if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ROUTE_QUERY))
1679                 return -EINVAL;
1680
1681         atomic_inc(&id_priv->refcount);
1682         switch (rdma_node_get_transport(id->device->node_type)) {
1683         case RDMA_TRANSPORT_IB:
1684                 ret = cma_resolve_ib_route(id_priv, timeout_ms);
1685                 break;
1686         case RDMA_TRANSPORT_IWARP:
1687                 ret = cma_resolve_iw_route(id_priv, timeout_ms);
1688                 break;
1689         default:
1690                 ret = -ENOSYS;
1691                 break;
1692         }
1693         if (ret)
1694                 goto err;
1695
1696         return 0;
1697 err:
1698         cma_comp_exch(id_priv, CMA_ROUTE_QUERY, CMA_ADDR_RESOLVED);
1699         cma_deref_id(id_priv);
1700         return ret;
1701 }
1702 EXPORT_SYMBOL(rdma_resolve_route);
1703
1704 static int cma_bind_loopback(struct rdma_id_private *id_priv)
1705 {
1706         struct cma_device *cma_dev;
1707         struct ib_port_attr port_attr;
1708         union ib_gid gid;
1709         u16 pkey;
1710         int ret;
1711         u8 p;
1712
1713         mutex_lock(&lock);
1714         if (list_empty(&dev_list)) {
1715                 ret = -ENODEV;
1716                 goto out;
1717         }
1718         list_for_each_entry(cma_dev, &dev_list, list)
1719                 for (p = 1; p <= cma_dev->device->phys_port_cnt; ++p)
1720                         if (!ib_query_port(cma_dev->device, p, &port_attr) &&
1721                             port_attr.state == IB_PORT_ACTIVE)
1722                                 goto port_found;
1723
1724         p = 1;
1725         cma_dev = list_entry(dev_list.next, struct cma_device, list);
1726
1727 port_found:
1728         ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid);
1729         if (ret)
1730                 goto out;
1731
1732         ret = ib_get_cached_pkey(cma_dev->device, p, 0, &pkey);
1733         if (ret)
1734                 goto out;
1735
1736         ib_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid);
1737         ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey);
1738         id_priv->id.port_num = p;
1739         cma_attach_to_dev(id_priv, cma_dev);
1740 out:
1741         mutex_unlock(&lock);
1742         return ret;
1743 }
1744
1745 static void addr_handler(int status, struct sockaddr *src_addr,
1746                          struct rdma_dev_addr *dev_addr, void *context)
1747 {
1748         struct rdma_id_private *id_priv = context;
1749         struct rdma_cm_event event;
1750
1751         memset(&event, 0, sizeof event);
1752         atomic_inc(&id_priv->dev_remove);
1753
1754         /*
1755          * Grab mutex to block rdma_destroy_id() from removing the device while
1756          * we're trying to acquire it.
1757          */
1758         mutex_lock(&lock);
1759         if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_RESOLVED)) {
1760                 mutex_unlock(&lock);
1761                 goto out;
1762         }
1763
1764         if (!status && !id_priv->cma_dev)
1765                 status = cma_acquire_dev(id_priv);
1766         mutex_unlock(&lock);
1767
1768         if (status) {
1769                 if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ADDR_BOUND))
1770                         goto out;
1771                 event.event = RDMA_CM_EVENT_ADDR_ERROR;
1772                 event.status = status;
1773         } else {
1774                 memcpy(&id_priv->id.route.addr.src_addr, src_addr,
1775                        ip_addr_size(src_addr));
1776                 event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
1777         }
1778
1779         if (id_priv->id.event_handler(&id_priv->id, &event)) {
1780                 cma_exch(id_priv, CMA_DESTROYING);
1781                 cma_enable_remove(id_priv);
1782                 cma_deref_id(id_priv);
1783                 rdma_destroy_id(&id_priv->id);
1784                 return;
1785         }
1786 out:
1787         cma_enable_remove(id_priv);
1788         cma_deref_id(id_priv);
1789 }
1790
1791 static int cma_resolve_loopback(struct rdma_id_private *id_priv)
1792 {
1793         struct cma_work *work;
1794         struct sockaddr_in *src_in, *dst_in;
1795         union ib_gid gid;
1796         int ret;
1797
1798         work = kzalloc(sizeof *work, GFP_KERNEL);
1799         if (!work)
1800                 return -ENOMEM;
1801
1802         if (!id_priv->cma_dev) {
1803                 ret = cma_bind_loopback(id_priv);
1804                 if (ret)
1805                         goto err;
1806         }
1807
1808         ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
1809         ib_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid);
1810
1811         if (cma_zero_addr(&id_priv->id.route.addr.src_addr)) {
1812                 src_in = (struct sockaddr_in *)&id_priv->id.route.addr.src_addr;
1813                 dst_in = (struct sockaddr_in *)&id_priv->id.route.addr.dst_addr;
1814                 src_in->sin_family = dst_in->sin_family;
1815                 src_in->sin_addr.s_addr = dst_in->sin_addr.s_addr;
1816         }
1817
1818         work->id = id_priv;
1819         INIT_WORK(&work->work, cma_work_handler);
1820         work->old_state = CMA_ADDR_QUERY;
1821         work->new_state = CMA_ADDR_RESOLVED;
1822         work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
1823         queue_work(cma_wq, &work->work);
1824         return 0;
1825 err:
1826         kfree(work);
1827         return ret;
1828 }
1829
1830 static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
1831                          struct sockaddr *dst_addr)
1832 {
1833         if (src_addr && src_addr->sa_family)
1834                 return rdma_bind_addr(id, src_addr);
1835         else
1836                 return cma_bind_any(id, dst_addr->sa_family);
1837 }
1838
1839 int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
1840                       struct sockaddr *dst_addr, int timeout_ms)
1841 {
1842         struct rdma_id_private *id_priv;
1843         int ret;
1844
1845         id_priv = container_of(id, struct rdma_id_private, id);
1846         if (id_priv->state == CMA_IDLE) {
1847                 ret = cma_bind_addr(id, src_addr, dst_addr);
1848                 if (ret)
1849                         return ret;
1850         }
1851
1852         if (!cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_ADDR_QUERY))
1853                 return -EINVAL;
1854
1855         atomic_inc(&id_priv->refcount);
1856         memcpy(&id->route.addr.dst_addr, dst_addr, ip_addr_size(dst_addr));
1857         if (cma_any_addr(dst_addr))
1858                 ret = cma_resolve_loopback(id_priv);
1859         else
1860                 ret = rdma_resolve_ip(&addr_client, &id->route.addr.src_addr,
1861                                       dst_addr, &id->route.addr.dev_addr,
1862                                       timeout_ms, addr_handler, id_priv);
1863         if (ret)
1864                 goto err;
1865
1866         return 0;
1867 err:
1868         cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_BOUND);
1869         cma_deref_id(id_priv);
1870         return ret;
1871 }
1872 EXPORT_SYMBOL(rdma_resolve_addr);
1873
1874 static void cma_bind_port(struct rdma_bind_list *bind_list,
1875                           struct rdma_id_private *id_priv)
1876 {
1877         struct sockaddr_in *sin;
1878
1879         sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
1880         sin->sin_port = htons(bind_list->port);
1881         id_priv->bind_list = bind_list;
1882         hlist_add_head(&id_priv->node, &bind_list->owners);
1883 }
1884
1885 static int cma_alloc_port(struct idr *ps, struct rdma_id_private *id_priv,
1886                           unsigned short snum)
1887 {
1888         struct rdma_bind_list *bind_list;
1889         int port, ret;
1890
1891         bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL);
1892         if (!bind_list)
1893                 return -ENOMEM;
1894
1895         do {
1896                 ret = idr_get_new_above(ps, bind_list, snum, &port);
1897         } while ((ret == -EAGAIN) && idr_pre_get(ps, GFP_KERNEL));
1898
1899         if (ret)
1900                 goto err1;
1901
1902         if (port != snum) {
1903                 ret = -EADDRNOTAVAIL;
1904                 goto err2;
1905         }
1906
1907         bind_list->ps = ps;
1908         bind_list->port = (unsigned short) port;
1909         cma_bind_port(bind_list, id_priv);
1910         return 0;
1911 err2:
1912         idr_remove(ps, port);
1913 err1:
1914         kfree(bind_list);
1915         return ret;
1916 }
1917
1918 static int cma_alloc_any_port(struct idr *ps, struct rdma_id_private *id_priv)
1919 {
1920         struct rdma_bind_list *bind_list;
1921         int port, ret, low, high;
1922
1923         bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL);
1924         if (!bind_list)
1925                 return -ENOMEM;
1926
1927 retry:
1928         /* FIXME: add proper port randomization per like inet_csk_get_port */
1929         do {
1930                 ret = idr_get_new_above(ps, bind_list, next_port, &port);
1931         } while ((ret == -EAGAIN) && idr_pre_get(ps, GFP_KERNEL));
1932
1933         if (ret)
1934                 goto err1;
1935
1936         inet_get_local_port_range(&low, &high);
1937         if (port > high) {
1938                 if (next_port != low) {
1939                         idr_remove(ps, port);
1940                         next_port = low;
1941                         goto retry;
1942                 }
1943                 ret = -EADDRNOTAVAIL;
1944                 goto err2;
1945         }
1946
1947         if (port == high)
1948                 next_port = low;
1949         else
1950                 next_port = port + 1;
1951
1952         bind_list->ps = ps;
1953         bind_list->port = (unsigned short) port;
1954         cma_bind_port(bind_list, id_priv);
1955         return 0;
1956 err2:
1957         idr_remove(ps, port);
1958 err1:
1959         kfree(bind_list);
1960         return ret;
1961 }
1962
1963 static int cma_use_port(struct idr *ps, struct rdma_id_private *id_priv)
1964 {
1965         struct rdma_id_private *cur_id;
1966         struct sockaddr_in *sin, *cur_sin;
1967         struct rdma_bind_list *bind_list;
1968         struct hlist_node *node;
1969         unsigned short snum;
1970
1971         sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
1972         snum = ntohs(sin->sin_port);
1973         if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
1974                 return -EACCES;
1975
1976         bind_list = idr_find(ps, snum);
1977         if (!bind_list)
1978                 return cma_alloc_port(ps, id_priv, snum);
1979
1980         /*
1981          * We don't support binding to any address if anyone is bound to
1982          * a specific address on the same port.
1983          */
1984         if (cma_any_addr(&id_priv->id.route.addr.src_addr))
1985                 return -EADDRNOTAVAIL;
1986
1987         hlist_for_each_entry(cur_id, node, &bind_list->owners, node) {
1988                 if (cma_any_addr(&cur_id->id.route.addr.src_addr))
1989                         return -EADDRNOTAVAIL;
1990
1991                 cur_sin = (struct sockaddr_in *) &cur_id->id.route.addr.src_addr;
1992                 if (sin->sin_addr.s_addr == cur_sin->sin_addr.s_addr)
1993                         return -EADDRINUSE;
1994         }
1995
1996         cma_bind_port(bind_list, id_priv);
1997         return 0;
1998 }
1999
2000 static int cma_get_port(struct rdma_id_private *id_priv)
2001 {
2002         struct idr *ps;
2003         int ret;
2004
2005         switch (id_priv->id.ps) {
2006         case RDMA_PS_SDP:
2007                 ps = &sdp_ps;
2008                 break;
2009         case RDMA_PS_TCP:
2010                 ps = &tcp_ps;
2011                 break;
2012         case RDMA_PS_UDP:
2013                 ps = &udp_ps;
2014                 break;
2015         case RDMA_PS_IPOIB:
2016                 ps = &ipoib_ps;
2017                 break;
2018         default:
2019                 return -EPROTONOSUPPORT;
2020         }
2021
2022         mutex_lock(&lock);
2023         if (cma_any_port(&id_priv->id.route.addr.src_addr))
2024                 ret = cma_alloc_any_port(ps, id_priv);
2025         else
2026                 ret = cma_use_port(ps, id_priv);
2027         mutex_unlock(&lock);
2028
2029         return ret;
2030 }
2031
2032 int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
2033 {
2034         struct rdma_id_private *id_priv;
2035         int ret;
2036
2037         if (addr->sa_family != AF_INET)
2038                 return -EAFNOSUPPORT;
2039
2040         id_priv = container_of(id, struct rdma_id_private, id);
2041         if (!cma_comp_exch(id_priv, CMA_IDLE, CMA_ADDR_BOUND))
2042                 return -EINVAL;
2043
2044         if (!cma_any_addr(addr)) {
2045                 ret = rdma_translate_ip(addr, &id->route.addr.dev_addr);
2046                 if (ret)
2047                         goto err1;
2048
2049                 mutex_lock(&lock);
2050                 ret = cma_acquire_dev(id_priv);
2051                 mutex_unlock(&lock);
2052                 if (ret)
2053                         goto err1;
2054         }
2055
2056         memcpy(&id->route.addr.src_addr, addr, ip_addr_size(addr));
2057         ret = cma_get_port(id_priv);
2058         if (ret)
2059                 goto err2;
2060
2061         return 0;
2062 err2:
2063         if (!cma_any_addr(addr)) {
2064                 mutex_lock(&lock);
2065                 cma_detach_from_dev(id_priv);
2066                 mutex_unlock(&lock);
2067         }
2068 err1:
2069         cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_IDLE);
2070         return ret;
2071 }
2072 EXPORT_SYMBOL(rdma_bind_addr);
2073
2074 static int cma_format_hdr(void *hdr, enum rdma_port_space ps,
2075                           struct rdma_route *route)
2076 {
2077         struct sockaddr_in *src4, *dst4;
2078         struct cma_hdr *cma_hdr;
2079         struct sdp_hh *sdp_hdr;
2080
2081         src4 = (struct sockaddr_in *) &route->addr.src_addr;
2082         dst4 = (struct sockaddr_in *) &route->addr.dst_addr;
2083
2084         switch (ps) {
2085         case RDMA_PS_SDP:
2086                 sdp_hdr = hdr;
2087                 if (sdp_get_majv(sdp_hdr->sdp_version) != SDP_MAJ_VERSION)
2088                         return -EINVAL;
2089                 sdp_set_ip_ver(sdp_hdr, 4);
2090                 sdp_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
2091                 sdp_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
2092                 sdp_hdr->port = src4->sin_port;
2093                 break;
2094         default:
2095                 cma_hdr = hdr;
2096                 cma_hdr->cma_version = CMA_VERSION;
2097                 cma_set_ip_ver(cma_hdr, 4);
2098                 cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
2099                 cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
2100                 cma_hdr->port = src4->sin_port;
2101                 break;
2102         }
2103         return 0;
2104 }
2105
2106 static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
2107                                 struct ib_cm_event *ib_event)
2108 {
2109         struct rdma_id_private *id_priv = cm_id->context;
2110         struct rdma_cm_event event;
2111         struct ib_cm_sidr_rep_event_param *rep = &ib_event->param.sidr_rep_rcvd;
2112         int ret = 0;
2113
2114         if (cma_disable_remove(id_priv, CMA_CONNECT))
2115                 return 0;
2116
2117         memset(&event, 0, sizeof event);
2118         switch (ib_event->event) {
2119         case IB_CM_SIDR_REQ_ERROR:
2120                 event.event = RDMA_CM_EVENT_UNREACHABLE;
2121                 event.status = -ETIMEDOUT;
2122                 break;
2123         case IB_CM_SIDR_REP_RECEIVED:
2124                 event.param.ud.private_data = ib_event->private_data;
2125                 event.param.ud.private_data_len = IB_CM_SIDR_REP_PRIVATE_DATA_SIZE;
2126                 if (rep->status != IB_SIDR_SUCCESS) {
2127                         event.event = RDMA_CM_EVENT_UNREACHABLE;
2128                         event.status = ib_event->param.sidr_rep_rcvd.status;
2129                         break;
2130                 }
2131                 if (id_priv->qkey != rep->qkey) {
2132                         event.event = RDMA_CM_EVENT_UNREACHABLE;
2133                         event.status = -EINVAL;
2134                         break;
2135                 }
2136                 ib_init_ah_from_path(id_priv->id.device, id_priv->id.port_num,
2137                                      id_priv->id.route.path_rec,
2138                                      &event.param.ud.ah_attr);
2139                 event.param.ud.qp_num = rep->qpn;
2140                 event.param.ud.qkey = rep->qkey;
2141                 event.event = RDMA_CM_EVENT_ESTABLISHED;
2142                 event.status = 0;
2143                 break;
2144         default:
2145                 printk(KERN_ERR "RDMA CMA: unexpected IB CM event: %d",
2146                        ib_event->event);
2147                 goto out;
2148         }
2149
2150         ret = id_priv->id.event_handler(&id_priv->id, &event);
2151         if (ret) {
2152                 /* Destroy the CM ID by returning a non-zero value. */
2153                 id_priv->cm_id.ib = NULL;
2154                 cma_exch(id_priv, CMA_DESTROYING);
2155                 cma_enable_remove(id_priv);
2156                 rdma_destroy_id(&id_priv->id);
2157                 return ret;
2158         }
2159 out:
2160         cma_enable_remove(id_priv);
2161         return ret;
2162 }
2163
2164 static int cma_resolve_ib_udp(struct rdma_id_private *id_priv,
2165                               struct rdma_conn_param *conn_param)
2166 {
2167         struct ib_cm_sidr_req_param req;
2168         struct rdma_route *route;
2169         int ret;
2170
2171         req.private_data_len = sizeof(struct cma_hdr) +
2172                                conn_param->private_data_len;
2173         req.private_data = kzalloc(req.private_data_len, GFP_ATOMIC);
2174         if (!req.private_data)
2175                 return -ENOMEM;
2176
2177         if (conn_param->private_data && conn_param->private_data_len)
2178                 memcpy((void *) req.private_data + sizeof(struct cma_hdr),
2179                        conn_param->private_data, conn_param->private_data_len);
2180
2181         route = &id_priv->id.route;
2182         ret = cma_format_hdr((void *) req.private_data, id_priv->id.ps, route);
2183         if (ret)
2184                 goto out;
2185
2186         id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device,
2187                                             cma_sidr_rep_handler, id_priv);
2188         if (IS_ERR(id_priv->cm_id.ib)) {
2189                 ret = PTR_ERR(id_priv->cm_id.ib);
2190                 goto out;
2191         }
2192
2193         req.path = route->path_rec;
2194         req.service_id = cma_get_service_id(id_priv->id.ps,
2195                                             &route->addr.dst_addr);
2196         req.timeout_ms = 1 << (CMA_CM_RESPONSE_TIMEOUT - 8);
2197         req.max_cm_retries = CMA_MAX_CM_RETRIES;
2198
2199         ret = ib_send_cm_sidr_req(id_priv->cm_id.ib, &req);
2200         if (ret) {
2201                 ib_destroy_cm_id(id_priv->cm_id.ib);
2202                 id_priv->cm_id.ib = NULL;
2203         }
2204 out:
2205         kfree(req.private_data);
2206         return ret;
2207 }
2208
2209 static int cma_connect_ib(struct rdma_id_private *id_priv,
2210                           struct rdma_conn_param *conn_param)
2211 {
2212         struct ib_cm_req_param req;
2213         struct rdma_route *route;
2214         void *private_data;
2215         int offset, ret;
2216
2217         memset(&req, 0, sizeof req);
2218         offset = cma_user_data_offset(id_priv->id.ps);
2219         req.private_data_len = offset + conn_param->private_data_len;
2220         private_data = kzalloc(req.private_data_len, GFP_ATOMIC);
2221         if (!private_data)
2222                 return -ENOMEM;
2223
2224         if (conn_param->private_data && conn_param->private_data_len)
2225                 memcpy(private_data + offset, conn_param->private_data,
2226                        conn_param->private_data_len);
2227
2228         id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device, cma_ib_handler,
2229                                             id_priv);
2230         if (IS_ERR(id_priv->cm_id.ib)) {
2231                 ret = PTR_ERR(id_priv->cm_id.ib);
2232                 goto out;
2233         }
2234
2235         route = &id_priv->id.route;
2236         ret = cma_format_hdr(private_data, id_priv->id.ps, route);
2237         if (ret)
2238                 goto out;
2239         req.private_data = private_data;
2240
2241         req.primary_path = &route->path_rec[0];
2242         if (route->num_paths == 2)
2243                 req.alternate_path = &route->path_rec[1];
2244
2245         req.service_id = cma_get_service_id(id_priv->id.ps,
2246                                             &route->addr.dst_addr);
2247         req.qp_num = id_priv->qp_num;
2248         req.qp_type = IB_QPT_RC;
2249         req.starting_psn = id_priv->seq_num;
2250         req.responder_resources = conn_param->responder_resources;
2251         req.initiator_depth = conn_param->initiator_depth;
2252         req.flow_control = conn_param->flow_control;
2253         req.retry_count = conn_param->retry_count;
2254         req.rnr_retry_count = conn_param->rnr_retry_count;
2255         req.remote_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT;
2256         req.local_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT;
2257         req.max_cm_retries = CMA_MAX_CM_RETRIES;
2258         req.srq = id_priv->srq ? 1 : 0;
2259
2260         ret = ib_send_cm_req(id_priv->cm_id.ib, &req);
2261 out:
2262         if (ret && !IS_ERR(id_priv->cm_id.ib)) {
2263                 ib_destroy_cm_id(id_priv->cm_id.ib);
2264                 id_priv->cm_id.ib = NULL;
2265         }
2266
2267         kfree(private_data);
2268         return ret;
2269 }
2270
2271 static int cma_connect_iw(struct rdma_id_private *id_priv,
2272                           struct rdma_conn_param *conn_param)
2273 {
2274         struct iw_cm_id *cm_id;
2275         struct sockaddr_in* sin;
2276         int ret;
2277         struct iw_cm_conn_param iw_param;
2278
2279         cm_id = iw_create_cm_id(id_priv->id.device, cma_iw_handler, id_priv);
2280         if (IS_ERR(cm_id)) {
2281                 ret = PTR_ERR(cm_id);
2282                 goto out;
2283         }
2284
2285         id_priv->cm_id.iw = cm_id;
2286
2287         sin = (struct sockaddr_in*) &id_priv->id.route.addr.src_addr;
2288         cm_id->local_addr = *sin;
2289
2290         sin = (struct sockaddr_in*) &id_priv->id.route.addr.dst_addr;
2291         cm_id->remote_addr = *sin;
2292
2293         ret = cma_modify_qp_rtr(id_priv, conn_param);
2294         if (ret)
2295                 goto out;
2296
2297         iw_param.ord = conn_param->initiator_depth;
2298         iw_param.ird = conn_param->responder_resources;
2299         iw_param.private_data = conn_param->private_data;
2300         iw_param.private_data_len = conn_param->private_data_len;
2301         if (id_priv->id.qp)
2302                 iw_param.qpn = id_priv->qp_num;
2303         else
2304                 iw_param.qpn = conn_param->qp_num;
2305         ret = iw_cm_connect(cm_id, &iw_param);
2306 out:
2307         if (ret && !IS_ERR(cm_id)) {
2308                 iw_destroy_cm_id(cm_id);
2309                 id_priv->cm_id.iw = NULL;
2310         }
2311         return ret;
2312 }
2313
2314 int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
2315 {
2316         struct rdma_id_private *id_priv;
2317         int ret;
2318
2319         id_priv = container_of(id, struct rdma_id_private, id);
2320         if (!cma_comp_exch(id_priv, CMA_ROUTE_RESOLVED, CMA_CONNECT))
2321                 return -EINVAL;
2322
2323         if (!id->qp) {
2324                 id_priv->qp_num = conn_param->qp_num;
2325                 id_priv->srq = conn_param->srq;
2326         }
2327
2328         switch (rdma_node_get_transport(id->device->node_type)) {
2329         case RDMA_TRANSPORT_IB:
2330                 if (cma_is_ud_ps(id->ps))
2331                         ret = cma_resolve_ib_udp(id_priv, conn_param);
2332                 else
2333                         ret = cma_connect_ib(id_priv, conn_param);
2334                 break;
2335         case RDMA_TRANSPORT_IWARP:
2336                 ret = cma_connect_iw(id_priv, conn_param);
2337                 break;
2338         default:
2339                 ret = -ENOSYS;
2340                 break;
2341         }
2342         if (ret)
2343                 goto err;
2344
2345         return 0;
2346 err:
2347         cma_comp_exch(id_priv, CMA_CONNECT, CMA_ROUTE_RESOLVED);
2348         return ret;
2349 }
2350 EXPORT_SYMBOL(rdma_connect);
2351
2352 static int cma_accept_ib(struct rdma_id_private *id_priv,
2353                          struct rdma_conn_param *conn_param)
2354 {
2355         struct ib_cm_rep_param rep;
2356         int ret;
2357
2358         ret = cma_modify_qp_rtr(id_priv, conn_param);
2359         if (ret)
2360                 goto out;
2361
2362         ret = cma_modify_qp_rts(id_priv, conn_param);
2363         if (ret)
2364                 goto out;
2365
2366         memset(&rep, 0, sizeof rep);
2367         rep.qp_num = id_priv->qp_num;
2368         rep.starting_psn = id_priv->seq_num;
2369         rep.private_data = conn_param->private_data;
2370         rep.private_data_len = conn_param->private_data_len;
2371         rep.responder_resources = conn_param->responder_resources;
2372         rep.initiator_depth = conn_param->initiator_depth;
2373         rep.failover_accepted = 0;
2374         rep.flow_control = conn_param->flow_control;
2375         rep.rnr_retry_count = conn_param->rnr_retry_count;
2376         rep.srq = id_priv->srq ? 1 : 0;
2377
2378         ret = ib_send_cm_rep(id_priv->cm_id.ib, &rep);
2379 out:
2380         return ret;
2381 }
2382
2383 static int cma_accept_iw(struct rdma_id_private *id_priv,
2384                   struct rdma_conn_param *conn_param)
2385 {
2386         struct iw_cm_conn_param iw_param;
2387         int ret;
2388
2389         ret = cma_modify_qp_rtr(id_priv, conn_param);
2390         if (ret)
2391                 return ret;
2392
2393         iw_param.ord = conn_param->initiator_depth;
2394         iw_param.ird = conn_param->responder_resources;
2395         iw_param.private_data = conn_param->private_data;
2396         iw_param.private_data_len = conn_param->private_data_len;
2397         if (id_priv->id.qp) {
2398                 iw_param.qpn = id_priv->qp_num;
2399         } else
2400                 iw_param.qpn = conn_param->qp_num;
2401
2402         return iw_cm_accept(id_priv->cm_id.iw, &iw_param);
2403 }
2404
2405 static int cma_send_sidr_rep(struct rdma_id_private *id_priv,
2406                              enum ib_cm_sidr_status status,
2407                              const void *private_data, int private_data_len)
2408 {
2409         struct ib_cm_sidr_rep_param rep;
2410
2411         memset(&rep, 0, sizeof rep);
2412         rep.status = status;
2413         if (status == IB_SIDR_SUCCESS) {
2414                 rep.qp_num = id_priv->qp_num;
2415                 rep.qkey = id_priv->qkey;
2416         }
2417         rep.private_data = private_data;
2418         rep.private_data_len = private_data_len;
2419
2420         return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep);
2421 }
2422
2423 int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
2424 {
2425         struct rdma_id_private *id_priv;
2426         int ret;
2427
2428         id_priv = container_of(id, struct rdma_id_private, id);
2429         if (!cma_comp(id_priv, CMA_CONNECT))
2430                 return -EINVAL;
2431
2432         if (!id->qp && conn_param) {
2433                 id_priv->qp_num = conn_param->qp_num;
2434                 id_priv->srq = conn_param->srq;
2435         }
2436
2437         switch (rdma_node_get_transport(id->device->node_type)) {
2438         case RDMA_TRANSPORT_IB:
2439                 if (cma_is_ud_ps(id->ps))
2440                         ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS,
2441                                                 conn_param->private_data,
2442                                                 conn_param->private_data_len);
2443                 else if (conn_param)
2444                         ret = cma_accept_ib(id_priv, conn_param);
2445                 else
2446                         ret = cma_rep_recv(id_priv);
2447                 break;
2448         case RDMA_TRANSPORT_IWARP:
2449                 ret = cma_accept_iw(id_priv, conn_param);
2450                 break;
2451         default:
2452                 ret = -ENOSYS;
2453                 break;
2454         }
2455
2456         if (ret)
2457                 goto reject;
2458
2459         return 0;
2460 reject:
2461         cma_modify_qp_err(id_priv);
2462         rdma_reject(id, NULL, 0);
2463         return ret;
2464 }
2465 EXPORT_SYMBOL(rdma_accept);
2466
2467 int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event)
2468 {
2469         struct rdma_id_private *id_priv;
2470         int ret;
2471
2472         id_priv = container_of(id, struct rdma_id_private, id);
2473         if (!cma_has_cm_dev(id_priv))
2474                 return -EINVAL;
2475
2476         switch (id->device->node_type) {
2477         case RDMA_NODE_IB_CA:
2478                 ret = ib_cm_notify(id_priv->cm_id.ib, event);
2479                 break;
2480         default:
2481                 ret = 0;
2482                 break;
2483         }
2484         return ret;
2485 }
2486 EXPORT_SYMBOL(rdma_notify);
2487
2488 int rdma_reject(struct rdma_cm_id *id, const void *private_data,
2489                 u8 private_data_len)
2490 {
2491         struct rdma_id_private *id_priv;
2492         int ret;
2493
2494         id_priv = container_of(id, struct rdma_id_private, id);
2495         if (!cma_has_cm_dev(id_priv))
2496                 return -EINVAL;
2497
2498         switch (rdma_node_get_transport(id->device->node_type)) {
2499         case RDMA_TRANSPORT_IB:
2500                 if (cma_is_ud_ps(id->ps))
2501                         ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT,
2502                                                 private_data, private_data_len);
2503                 else
2504                         ret = ib_send_cm_rej(id_priv->cm_id.ib,
2505                                              IB_CM_REJ_CONSUMER_DEFINED, NULL,
2506                                              0, private_data, private_data_len);
2507                 break;
2508         case RDMA_TRANSPORT_IWARP:
2509                 ret = iw_cm_reject(id_priv->cm_id.iw,
2510                                    private_data, private_data_len);
2511                 break;
2512         default:
2513                 ret = -ENOSYS;
2514                 break;
2515         }
2516         return ret;
2517 }
2518 EXPORT_SYMBOL(rdma_reject);
2519
2520 int rdma_disconnect(struct rdma_cm_id *id)
2521 {
2522         struct rdma_id_private *id_priv;
2523         int ret;
2524
2525         id_priv = container_of(id, struct rdma_id_private, id);
2526         if (!cma_has_cm_dev(id_priv))
2527                 return -EINVAL;
2528
2529         switch (rdma_node_get_transport(id->device->node_type)) {
2530         case RDMA_TRANSPORT_IB:
2531                 ret = cma_modify_qp_err(id_priv);
2532                 if (ret)
2533                         goto out;
2534                 /* Initiate or respond to a disconnect. */
2535                 if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0))
2536                         ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0);
2537                 break;
2538         case RDMA_TRANSPORT_IWARP:
2539                 ret = iw_cm_disconnect(id_priv->cm_id.iw, 0);
2540                 break;
2541         default:
2542                 ret = -EINVAL;
2543                 break;
2544         }
2545 out:
2546         return ret;
2547 }
2548 EXPORT_SYMBOL(rdma_disconnect);
2549
2550 static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
2551 {
2552         struct rdma_id_private *id_priv;
2553         struct cma_multicast *mc = multicast->context;
2554         struct rdma_cm_event event;
2555         int ret;
2556
2557         id_priv = mc->id_priv;
2558         if (cma_disable_remove(id_priv, CMA_ADDR_BOUND) &&
2559             cma_disable_remove(id_priv, CMA_ADDR_RESOLVED))
2560                 return 0;
2561
2562         mutex_lock(&id_priv->qp_mutex);
2563         if (!status && id_priv->id.qp)
2564                 status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid,
2565                                          multicast->rec.mlid);
2566         mutex_unlock(&id_priv->qp_mutex);
2567
2568         memset(&event, 0, sizeof event);
2569         event.status = status;
2570         event.param.ud.private_data = mc->context;
2571         if (!status) {
2572                 event.event = RDMA_CM_EVENT_MULTICAST_JOIN;
2573                 ib_init_ah_from_mcmember(id_priv->id.device,
2574                                          id_priv->id.port_num, &multicast->rec,
2575                                          &event.param.ud.ah_attr);
2576                 event.param.ud.qp_num = 0xFFFFFF;
2577                 event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey);
2578         } else
2579                 event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
2580
2581         ret = id_priv->id.event_handler(&id_priv->id, &event);
2582         if (ret) {
2583                 cma_exch(id_priv, CMA_DESTROYING);
2584                 cma_enable_remove(id_priv);
2585                 rdma_destroy_id(&id_priv->id);
2586                 return 0;
2587         }
2588
2589         cma_enable_remove(id_priv);
2590         return 0;
2591 }
2592
2593 static void cma_set_mgid(struct rdma_id_private *id_priv,
2594                          struct sockaddr *addr, union ib_gid *mgid)
2595 {
2596         unsigned char mc_map[MAX_ADDR_LEN];
2597         struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
2598         struct sockaddr_in *sin = (struct sockaddr_in *) addr;
2599         struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) addr;
2600
2601         if (cma_any_addr(addr)) {
2602                 memset(mgid, 0, sizeof *mgid);
2603         } else if ((addr->sa_family == AF_INET6) &&
2604                    ((be32_to_cpu(sin6->sin6_addr.s6_addr32[0]) & 0xFF10A01B) ==
2605                                                                  0xFF10A01B)) {
2606                 /* IPv6 address is an SA assigned MGID. */
2607                 memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
2608         } else {
2609                 ip_ib_mc_map(sin->sin_addr.s_addr, dev_addr->broadcast, mc_map);
2610                 if (id_priv->id.ps == RDMA_PS_UDP)
2611                         mc_map[7] = 0x01;       /* Use RDMA CM signature */
2612                 *mgid = *(union ib_gid *) (mc_map + 4);
2613         }
2614 }
2615
2616 static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
2617                                  struct cma_multicast *mc)
2618 {
2619         struct ib_sa_mcmember_rec rec;
2620         struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
2621         ib_sa_comp_mask comp_mask;
2622         int ret;
2623
2624         ib_addr_get_mgid(dev_addr, &rec.mgid);
2625         ret = ib_sa_get_mcmember_rec(id_priv->id.device, id_priv->id.port_num,
2626                                      &rec.mgid, &rec);
2627         if (ret)
2628                 return ret;
2629
2630         cma_set_mgid(id_priv, &mc->addr, &rec.mgid);
2631         if (id_priv->id.ps == RDMA_PS_UDP)
2632                 rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
2633         ib_addr_get_sgid(dev_addr, &rec.port_gid);
2634         rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
2635         rec.join_state = 1;
2636
2637         comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID |
2638                     IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE |
2639                     IB_SA_MCMEMBER_REC_QKEY | IB_SA_MCMEMBER_REC_SL |
2640                     IB_SA_MCMEMBER_REC_FLOW_LABEL |
2641                     IB_SA_MCMEMBER_REC_TRAFFIC_CLASS;
2642
2643         mc->multicast.ib = ib_sa_join_multicast(&sa_client, id_priv->id.device,
2644                                                 id_priv->id.port_num, &rec,
2645                                                 comp_mask, GFP_KERNEL,
2646                                                 cma_ib_mc_handler, mc);
2647         if (IS_ERR(mc->multicast.ib))
2648                 return PTR_ERR(mc->multicast.ib);
2649
2650         return 0;
2651 }
2652
2653 int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
2654                         void *context)
2655 {
2656         struct rdma_id_private *id_priv;
2657         struct cma_multicast *mc;
2658         int ret;
2659
2660         id_priv = container_of(id, struct rdma_id_private, id);
2661         if (!cma_comp(id_priv, CMA_ADDR_BOUND) &&
2662             !cma_comp(id_priv, CMA_ADDR_RESOLVED))
2663                 return -EINVAL;
2664
2665         mc = kmalloc(sizeof *mc, GFP_KERNEL);
2666         if (!mc)
2667                 return -ENOMEM;
2668
2669         memcpy(&mc->addr, addr, ip_addr_size(addr));
2670         mc->context = context;
2671         mc->id_priv = id_priv;
2672
2673         spin_lock(&id_priv->lock);
2674         list_add(&mc->list, &id_priv->mc_list);
2675         spin_unlock(&id_priv->lock);
2676
2677         switch (rdma_node_get_transport(id->device->node_type)) {
2678         case RDMA_TRANSPORT_IB:
2679                 ret = cma_join_ib_multicast(id_priv, mc);
2680                 break;
2681         default:
2682                 ret = -ENOSYS;
2683                 break;
2684         }
2685
2686         if (ret) {
2687                 spin_lock_irq(&id_priv->lock);
2688                 list_del(&mc->list);
2689                 spin_unlock_irq(&id_priv->lock);
2690                 kfree(mc);
2691         }
2692         return ret;
2693 }
2694 EXPORT_SYMBOL(rdma_join_multicast);
2695
2696 void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
2697 {
2698         struct rdma_id_private *id_priv;
2699         struct cma_multicast *mc;
2700
2701         id_priv = container_of(id, struct rdma_id_private, id);
2702         spin_lock_irq(&id_priv->lock);
2703         list_for_each_entry(mc, &id_priv->mc_list, list) {
2704                 if (!memcmp(&mc->addr, addr, ip_addr_size(addr))) {
2705                         list_del(&mc->list);
2706                         spin_unlock_irq(&id_priv->lock);
2707
2708                         if (id->qp)
2709                                 ib_detach_mcast(id->qp,
2710                                                 &mc->multicast.ib->rec.mgid,
2711                                                 mc->multicast.ib->rec.mlid);
2712                         ib_sa_free_multicast(mc->multicast.ib);
2713                         kfree(mc);
2714                         return;
2715                 }
2716         }
2717         spin_unlock_irq(&id_priv->lock);
2718 }
2719 EXPORT_SYMBOL(rdma_leave_multicast);
2720
2721 static void cma_add_one(struct ib_device *device)
2722 {
2723         struct cma_device *cma_dev;
2724         struct rdma_id_private *id_priv;
2725
2726         cma_dev = kmalloc(sizeof *cma_dev, GFP_KERNEL);
2727         if (!cma_dev)
2728                 return;
2729
2730         cma_dev->device = device;
2731
2732         init_completion(&cma_dev->comp);
2733         atomic_set(&cma_dev->refcount, 1);
2734         INIT_LIST_HEAD(&cma_dev->id_list);
2735         ib_set_client_data(device, &cma_client, cma_dev);
2736
2737         mutex_lock(&lock);
2738         list_add_tail(&cma_dev->list, &dev_list);
2739         list_for_each_entry(id_priv, &listen_any_list, list)
2740                 cma_listen_on_dev(id_priv, cma_dev);
2741         mutex_unlock(&lock);
2742 }
2743
2744 static int cma_remove_id_dev(struct rdma_id_private *id_priv)
2745 {
2746         struct rdma_cm_event event;
2747         enum cma_state state;
2748
2749         /* Record that we want to remove the device */
2750         state = cma_exch(id_priv, CMA_DEVICE_REMOVAL);
2751         if (state == CMA_DESTROYING)
2752                 return 0;
2753
2754         cma_cancel_operation(id_priv, state);
2755         wait_event(id_priv->wait_remove, !atomic_read(&id_priv->dev_remove));
2756
2757         /* Check for destruction from another callback. */
2758         if (!cma_comp(id_priv, CMA_DEVICE_REMOVAL))
2759                 return 0;
2760
2761         memset(&event, 0, sizeof event);
2762         event.event = RDMA_CM_EVENT_DEVICE_REMOVAL;
2763         return id_priv->id.event_handler(&id_priv->id, &event);
2764 }
2765
2766 static void cma_process_remove(struct cma_device *cma_dev)
2767 {
2768         struct rdma_id_private *id_priv;
2769         int ret;
2770
2771         mutex_lock(&lock);
2772         while (!list_empty(&cma_dev->id_list)) {
2773                 id_priv = list_entry(cma_dev->id_list.next,
2774                                      struct rdma_id_private, list);
2775
2776                 list_del(&id_priv->listen_list);
2777                 list_del_init(&id_priv->list);
2778                 atomic_inc(&id_priv->refcount);
2779                 mutex_unlock(&lock);
2780
2781                 ret = id_priv->internal_id ? 1 : cma_remove_id_dev(id_priv);
2782                 cma_deref_id(id_priv);
2783                 if (ret)
2784                         rdma_destroy_id(&id_priv->id);
2785
2786                 mutex_lock(&lock);
2787         }
2788         mutex_unlock(&lock);
2789
2790         cma_deref_dev(cma_dev);
2791         wait_for_completion(&cma_dev->comp);
2792 }
2793
2794 static void cma_remove_one(struct ib_device *device)
2795 {
2796         struct cma_device *cma_dev;
2797
2798         cma_dev = ib_get_client_data(device, &cma_client);
2799         if (!cma_dev)
2800                 return;
2801
2802         mutex_lock(&lock);
2803         list_del(&cma_dev->list);
2804         mutex_unlock(&lock);
2805
2806         cma_process_remove(cma_dev);
2807         kfree(cma_dev);
2808 }
2809
2810 static int cma_init(void)
2811 {
2812         int ret, low, high, remaining;
2813
2814         get_random_bytes(&next_port, sizeof next_port);
2815         inet_get_local_port_range(&low, &high);
2816         remaining = (high - low) + 1;
2817         next_port = ((unsigned int) next_port % remaining) + low;
2818
2819         cma_wq = create_singlethread_workqueue("rdma_cm");
2820         if (!cma_wq)
2821                 return -ENOMEM;
2822
2823         ib_sa_register_client(&sa_client);
2824         rdma_addr_register_client(&addr_client);
2825
2826         ret = ib_register_client(&cma_client);
2827         if (ret)
2828                 goto err;
2829         return 0;
2830
2831 err:
2832         rdma_addr_unregister_client(&addr_client);
2833         ib_sa_unregister_client(&sa_client);
2834         destroy_workqueue(cma_wq);
2835         return ret;
2836 }
2837
2838 static void cma_cleanup(void)
2839 {
2840         ib_unregister_client(&cma_client);
2841         rdma_addr_unregister_client(&addr_client);
2842         ib_sa_unregister_client(&sa_client);
2843         destroy_workqueue(cma_wq);
2844         idr_destroy(&sdp_ps);
2845         idr_destroy(&tcp_ps);
2846         idr_destroy(&udp_ps);
2847         idr_destroy(&ipoib_ps);
2848 }
2849
2850 module_init(cma_init);
2851 module_exit(cma_cleanup);