pandora: defconfig: update
[pandora-kernel.git] / drivers / infiniband / core / cm.c
1 /*
2  * Copyright (c) 2004-2007 Intel Corporation.  All rights reserved.
3  * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
4  * Copyright (c) 2004, 2005 Voltaire Corporation.  All rights reserved.
5  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
6  *
7  * This software is available to you under a choice of one of two
8  * licenses.  You may choose to be licensed under the terms of the GNU
9  * General Public License (GPL) Version 2, available from the file
10  * COPYING in the main directory of this source tree, or the
11  * OpenIB.org BSD license below:
12  *
13  *     Redistribution and use in source and binary forms, with or
14  *     without modification, are permitted provided that the following
15  *     conditions are met:
16  *
17  *      - Redistributions of source code must retain the above
18  *        copyright notice, this list of conditions and the following
19  *        disclaimer.
20  *
21  *      - Redistributions in binary form must reproduce the above
22  *        copyright notice, this list of conditions and the following
23  *        disclaimer in the documentation and/or other materials
24  *        provided with the distribution.
25  *
26  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33  * SOFTWARE.
34  */
35
36 #include <linux/completion.h>
37 #include <linux/dma-mapping.h>
38 #include <linux/device.h>
39 #include <linux/module.h>
40 #include <linux/err.h>
41 #include <linux/idr.h>
42 #include <linux/interrupt.h>
43 #include <linux/random.h>
44 #include <linux/rbtree.h>
45 #include <linux/spinlock.h>
46 #include <linux/slab.h>
47 #include <linux/sysfs.h>
48 #include <linux/workqueue.h>
49 #include <linux/kdev_t.h>
50
51 #include <rdma/ib_cache.h>
52 #include <rdma/ib_cm.h>
53 #include "cm_msgs.h"
54
55 MODULE_AUTHOR("Sean Hefty");
56 MODULE_DESCRIPTION("InfiniBand CM");
57 MODULE_LICENSE("Dual BSD/GPL");
58
59 static void cm_add_one(struct ib_device *device);
60 static void cm_remove_one(struct ib_device *device);
61
62 static struct ib_client cm_client = {
63         .name   = "cm",
64         .add    = cm_add_one,
65         .remove = cm_remove_one
66 };
67
68 static struct ib_cm {
69         spinlock_t lock;
70         struct list_head device_list;
71         rwlock_t device_lock;
72         struct rb_root listen_service_table;
73         u64 listen_service_id;
74         /* struct rb_root peer_service_table; todo: fix peer to peer */
75         struct rb_root remote_qp_table;
76         struct rb_root remote_id_table;
77         struct rb_root remote_sidr_table;
78         struct idr local_id_table;
79         __be32 random_id_operand;
80         struct list_head timewait_list;
81         struct workqueue_struct *wq;
82 } cm;
83
84 /* Counter indexes ordered by attribute ID */
85 enum {
86         CM_REQ_COUNTER,
87         CM_MRA_COUNTER,
88         CM_REJ_COUNTER,
89         CM_REP_COUNTER,
90         CM_RTU_COUNTER,
91         CM_DREQ_COUNTER,
92         CM_DREP_COUNTER,
93         CM_SIDR_REQ_COUNTER,
94         CM_SIDR_REP_COUNTER,
95         CM_LAP_COUNTER,
96         CM_APR_COUNTER,
97         CM_ATTR_COUNT,
98         CM_ATTR_ID_OFFSET = 0x0010,
99 };
100
101 enum {
102         CM_XMIT,
103         CM_XMIT_RETRIES,
104         CM_RECV,
105         CM_RECV_DUPLICATES,
106         CM_COUNTER_GROUPS
107 };
108
109 static char const counter_group_names[CM_COUNTER_GROUPS]
110                                      [sizeof("cm_rx_duplicates")] = {
111         "cm_tx_msgs", "cm_tx_retries",
112         "cm_rx_msgs", "cm_rx_duplicates"
113 };
114
115 struct cm_counter_group {
116         struct kobject obj;
117         atomic_long_t counter[CM_ATTR_COUNT];
118 };
119
120 struct cm_counter_attribute {
121         struct attribute attr;
122         int index;
123 };
124
125 #define CM_COUNTER_ATTR(_name, _index) \
126 struct cm_counter_attribute cm_##_name##_counter_attr = { \
127         .attr = { .name = __stringify(_name), .mode = 0444 }, \
128         .index = _index \
129 }
130
131 static CM_COUNTER_ATTR(req, CM_REQ_COUNTER);
132 static CM_COUNTER_ATTR(mra, CM_MRA_COUNTER);
133 static CM_COUNTER_ATTR(rej, CM_REJ_COUNTER);
134 static CM_COUNTER_ATTR(rep, CM_REP_COUNTER);
135 static CM_COUNTER_ATTR(rtu, CM_RTU_COUNTER);
136 static CM_COUNTER_ATTR(dreq, CM_DREQ_COUNTER);
137 static CM_COUNTER_ATTR(drep, CM_DREP_COUNTER);
138 static CM_COUNTER_ATTR(sidr_req, CM_SIDR_REQ_COUNTER);
139 static CM_COUNTER_ATTR(sidr_rep, CM_SIDR_REP_COUNTER);
140 static CM_COUNTER_ATTR(lap, CM_LAP_COUNTER);
141 static CM_COUNTER_ATTR(apr, CM_APR_COUNTER);
142
143 static struct attribute *cm_counter_default_attrs[] = {
144         &cm_req_counter_attr.attr,
145         &cm_mra_counter_attr.attr,
146         &cm_rej_counter_attr.attr,
147         &cm_rep_counter_attr.attr,
148         &cm_rtu_counter_attr.attr,
149         &cm_dreq_counter_attr.attr,
150         &cm_drep_counter_attr.attr,
151         &cm_sidr_req_counter_attr.attr,
152         &cm_sidr_rep_counter_attr.attr,
153         &cm_lap_counter_attr.attr,
154         &cm_apr_counter_attr.attr,
155         NULL
156 };
157
158 struct cm_port {
159         struct cm_device *cm_dev;
160         struct ib_mad_agent *mad_agent;
161         struct kobject port_obj;
162         u8 port_num;
163         struct cm_counter_group counter_group[CM_COUNTER_GROUPS];
164 };
165
166 struct cm_device {
167         struct list_head list;
168         struct ib_device *ib_device;
169         struct device *device;
170         u8 ack_delay;
171         struct cm_port *port[0];
172 };
173
174 struct cm_av {
175         struct cm_port *port;
176         union ib_gid dgid;
177         struct ib_ah_attr ah_attr;
178         u16 pkey_index;
179         u8 timeout;
180 };
181
182 struct cm_work {
183         struct delayed_work work;
184         struct list_head list;
185         struct cm_port *port;
186         struct ib_mad_recv_wc *mad_recv_wc;     /* Received MADs */
187         __be32 local_id;                        /* Established / timewait */
188         __be32 remote_id;
189         struct ib_cm_event cm_event;
190         struct ib_sa_path_rec path[0];
191 };
192
193 struct cm_timewait_info {
194         struct cm_work work;                    /* Must be first. */
195         struct list_head list;
196         struct rb_node remote_qp_node;
197         struct rb_node remote_id_node;
198         __be64 remote_ca_guid;
199         __be32 remote_qpn;
200         u8 inserted_remote_qp;
201         u8 inserted_remote_id;
202 };
203
204 struct cm_id_private {
205         struct ib_cm_id id;
206
207         struct rb_node service_node;
208         struct rb_node sidr_id_node;
209         spinlock_t lock;        /* Do not acquire inside cm.lock */
210         struct completion comp;
211         atomic_t refcount;
212
213         struct ib_mad_send_buf *msg;
214         struct cm_timewait_info *timewait_info;
215         /* todo: use alternate port on send failure */
216         struct cm_av av;
217         struct cm_av alt_av;
218         struct ib_cm_compare_data *compare_data;
219
220         void *private_data;
221         __be64 tid;
222         __be32 local_qpn;
223         __be32 remote_qpn;
224         enum ib_qp_type qp_type;
225         __be32 sq_psn;
226         __be32 rq_psn;
227         int timeout_ms;
228         enum ib_mtu path_mtu;
229         __be16 pkey;
230         u8 private_data_len;
231         u8 max_cm_retries;
232         u8 peer_to_peer;
233         u8 responder_resources;
234         u8 initiator_depth;
235         u8 retry_count;
236         u8 rnr_retry_count;
237         u8 service_timeout;
238         u8 target_ack_delay;
239
240         struct list_head work_list;
241         atomic_t work_count;
242 };
243
244 static void cm_work_handler(struct work_struct *work);
245
246 static inline void cm_deref_id(struct cm_id_private *cm_id_priv)
247 {
248         if (atomic_dec_and_test(&cm_id_priv->refcount))
249                 complete(&cm_id_priv->comp);
250 }
251
252 static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
253                         struct ib_mad_send_buf **msg)
254 {
255         struct ib_mad_agent *mad_agent;
256         struct ib_mad_send_buf *m;
257         struct ib_ah *ah;
258
259         mad_agent = cm_id_priv->av.port->mad_agent;
260         ah = ib_create_ah(mad_agent->qp->pd, &cm_id_priv->av.ah_attr);
261         if (IS_ERR(ah))
262                 return PTR_ERR(ah);
263
264         m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn,
265                                cm_id_priv->av.pkey_index,
266                                0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
267                                GFP_ATOMIC);
268         if (IS_ERR(m)) {
269                 ib_destroy_ah(ah);
270                 return PTR_ERR(m);
271         }
272
273         /* Timeout set by caller if response is expected. */
274         m->ah = ah;
275         m->retries = cm_id_priv->max_cm_retries;
276
277         atomic_inc(&cm_id_priv->refcount);
278         m->context[0] = cm_id_priv;
279         *msg = m;
280         return 0;
281 }
282
283 static int cm_alloc_response_msg(struct cm_port *port,
284                                  struct ib_mad_recv_wc *mad_recv_wc,
285                                  struct ib_mad_send_buf **msg)
286 {
287         struct ib_mad_send_buf *m;
288         struct ib_ah *ah;
289
290         ah = ib_create_ah_from_wc(port->mad_agent->qp->pd, mad_recv_wc->wc,
291                                   mad_recv_wc->recv_buf.grh, port->port_num);
292         if (IS_ERR(ah))
293                 return PTR_ERR(ah);
294
295         m = ib_create_send_mad(port->mad_agent, 1, mad_recv_wc->wc->pkey_index,
296                                0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
297                                GFP_ATOMIC);
298         if (IS_ERR(m)) {
299                 ib_destroy_ah(ah);
300                 return PTR_ERR(m);
301         }
302         m->ah = ah;
303         *msg = m;
304         return 0;
305 }
306
307 static void cm_free_msg(struct ib_mad_send_buf *msg)
308 {
309         ib_destroy_ah(msg->ah);
310         if (msg->context[0])
311                 cm_deref_id(msg->context[0]);
312         ib_free_send_mad(msg);
313 }
314
315 static void * cm_copy_private_data(const void *private_data,
316                                    u8 private_data_len)
317 {
318         void *data;
319
320         if (!private_data || !private_data_len)
321                 return NULL;
322
323         data = kmemdup(private_data, private_data_len, GFP_KERNEL);
324         if (!data)
325                 return ERR_PTR(-ENOMEM);
326
327         return data;
328 }
329
330 static void cm_set_private_data(struct cm_id_private *cm_id_priv,
331                                  void *private_data, u8 private_data_len)
332 {
333         if (cm_id_priv->private_data && cm_id_priv->private_data_len)
334                 kfree(cm_id_priv->private_data);
335
336         cm_id_priv->private_data = private_data;
337         cm_id_priv->private_data_len = private_data_len;
338 }
339
340 static void cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc,
341                                     struct ib_grh *grh, struct cm_av *av)
342 {
343         av->port = port;
344         av->pkey_index = wc->pkey_index;
345         ib_init_ah_from_wc(port->cm_dev->ib_device, port->port_num, wc,
346                            grh, &av->ah_attr);
347 }
348
349 static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
350 {
351         struct cm_device *cm_dev;
352         struct cm_port *port = NULL;
353         unsigned long flags;
354         int ret;
355         u8 p;
356
357         read_lock_irqsave(&cm.device_lock, flags);
358         list_for_each_entry(cm_dev, &cm.device_list, list) {
359                 if (!ib_find_cached_gid(cm_dev->ib_device, &path->sgid,
360                                         &p, NULL)) {
361                         port = cm_dev->port[p-1];
362                         break;
363                 }
364         }
365         read_unlock_irqrestore(&cm.device_lock, flags);
366
367         if (!port)
368                 return -EINVAL;
369
370         ret = ib_find_cached_pkey(cm_dev->ib_device, port->port_num,
371                                   be16_to_cpu(path->pkey), &av->pkey_index);
372         if (ret)
373                 return ret;
374
375         av->port = port;
376         ib_init_ah_from_path(cm_dev->ib_device, port->port_num, path,
377                              &av->ah_attr);
378         av->timeout = path->packet_life_time + 1;
379         return 0;
380 }
381
382 static int cm_alloc_id(struct cm_id_private *cm_id_priv)
383 {
384         unsigned long flags;
385         int ret, id;
386         static int next_id;
387
388         do {
389                 spin_lock_irqsave(&cm.lock, flags);
390                 ret = idr_get_new_above(&cm.local_id_table, cm_id_priv,
391                                         next_id, &id);
392                 if (!ret)
393                         next_id = ((unsigned) id + 1) & MAX_ID_MASK;
394                 spin_unlock_irqrestore(&cm.lock, flags);
395         } while( (ret == -EAGAIN) && idr_pre_get(&cm.local_id_table, GFP_KERNEL) );
396
397         cm_id_priv->id.local_id = (__force __be32)id ^ cm.random_id_operand;
398         return ret;
399 }
400
401 static void cm_free_id(__be32 local_id)
402 {
403         spin_lock_irq(&cm.lock);
404         idr_remove(&cm.local_id_table,
405                    (__force int) (local_id ^ cm.random_id_operand));
406         spin_unlock_irq(&cm.lock);
407 }
408
409 static struct cm_id_private * cm_get_id(__be32 local_id, __be32 remote_id)
410 {
411         struct cm_id_private *cm_id_priv;
412
413         cm_id_priv = idr_find(&cm.local_id_table,
414                               (__force int) (local_id ^ cm.random_id_operand));
415         if (cm_id_priv) {
416                 if (cm_id_priv->id.remote_id == remote_id)
417                         atomic_inc(&cm_id_priv->refcount);
418                 else
419                         cm_id_priv = NULL;
420         }
421
422         return cm_id_priv;
423 }
424
425 static struct cm_id_private * cm_acquire_id(__be32 local_id, __be32 remote_id)
426 {
427         struct cm_id_private *cm_id_priv;
428
429         spin_lock_irq(&cm.lock);
430         cm_id_priv = cm_get_id(local_id, remote_id);
431         spin_unlock_irq(&cm.lock);
432
433         return cm_id_priv;
434 }
435
436 static void cm_mask_copy(u8 *dst, u8 *src, u8 *mask)
437 {
438         int i;
439
440         for (i = 0; i < IB_CM_COMPARE_SIZE / sizeof(unsigned long); i++)
441                 ((unsigned long *) dst)[i] = ((unsigned long *) src)[i] &
442                                              ((unsigned long *) mask)[i];
443 }
444
445 static int cm_compare_data(struct ib_cm_compare_data *src_data,
446                            struct ib_cm_compare_data *dst_data)
447 {
448         u8 src[IB_CM_COMPARE_SIZE];
449         u8 dst[IB_CM_COMPARE_SIZE];
450
451         if (!src_data || !dst_data)
452                 return 0;
453
454         cm_mask_copy(src, src_data->data, dst_data->mask);
455         cm_mask_copy(dst, dst_data->data, src_data->mask);
456         return memcmp(src, dst, IB_CM_COMPARE_SIZE);
457 }
458
459 static int cm_compare_private_data(u8 *private_data,
460                                    struct ib_cm_compare_data *dst_data)
461 {
462         u8 src[IB_CM_COMPARE_SIZE];
463
464         if (!dst_data)
465                 return 0;
466
467         cm_mask_copy(src, private_data, dst_data->mask);
468         return memcmp(src, dst_data->data, IB_CM_COMPARE_SIZE);
469 }
470
471 /*
472  * Trivial helpers to strip endian annotation and compare; the
473  * endianness doesn't actually matter since we just need a stable
474  * order for the RB tree.
475  */
476 static int be32_lt(__be32 a, __be32 b)
477 {
478         return (__force u32) a < (__force u32) b;
479 }
480
481 static int be32_gt(__be32 a, __be32 b)
482 {
483         return (__force u32) a > (__force u32) b;
484 }
485
486 static int be64_lt(__be64 a, __be64 b)
487 {
488         return (__force u64) a < (__force u64) b;
489 }
490
491 static int be64_gt(__be64 a, __be64 b)
492 {
493         return (__force u64) a > (__force u64) b;
494 }
495
496 static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv)
497 {
498         struct rb_node **link = &cm.listen_service_table.rb_node;
499         struct rb_node *parent = NULL;
500         struct cm_id_private *cur_cm_id_priv;
501         __be64 service_id = cm_id_priv->id.service_id;
502         __be64 service_mask = cm_id_priv->id.service_mask;
503         int data_cmp;
504
505         while (*link) {
506                 parent = *link;
507                 cur_cm_id_priv = rb_entry(parent, struct cm_id_private,
508                                           service_node);
509                 data_cmp = cm_compare_data(cm_id_priv->compare_data,
510                                            cur_cm_id_priv->compare_data);
511                 if ((cur_cm_id_priv->id.service_mask & service_id) ==
512                     (service_mask & cur_cm_id_priv->id.service_id) &&
513                     (cm_id_priv->id.device == cur_cm_id_priv->id.device) &&
514                     !data_cmp)
515                         return cur_cm_id_priv;
516
517                 if (cm_id_priv->id.device < cur_cm_id_priv->id.device)
518                         link = &(*link)->rb_left;
519                 else if (cm_id_priv->id.device > cur_cm_id_priv->id.device)
520                         link = &(*link)->rb_right;
521                 else if (be64_lt(service_id, cur_cm_id_priv->id.service_id))
522                         link = &(*link)->rb_left;
523                 else if (be64_gt(service_id, cur_cm_id_priv->id.service_id))
524                         link = &(*link)->rb_right;
525                 else if (data_cmp < 0)
526                         link = &(*link)->rb_left;
527                 else
528                         link = &(*link)->rb_right;
529         }
530         rb_link_node(&cm_id_priv->service_node, parent, link);
531         rb_insert_color(&cm_id_priv->service_node, &cm.listen_service_table);
532         return NULL;
533 }
534
535 static struct cm_id_private * cm_find_listen(struct ib_device *device,
536                                              __be64 service_id,
537                                              u8 *private_data)
538 {
539         struct rb_node *node = cm.listen_service_table.rb_node;
540         struct cm_id_private *cm_id_priv;
541         int data_cmp;
542
543         while (node) {
544                 cm_id_priv = rb_entry(node, struct cm_id_private, service_node);
545                 data_cmp = cm_compare_private_data(private_data,
546                                                    cm_id_priv->compare_data);
547                 if ((cm_id_priv->id.service_mask & service_id) ==
548                      cm_id_priv->id.service_id &&
549                     (cm_id_priv->id.device == device) && !data_cmp)
550                         return cm_id_priv;
551
552                 if (device < cm_id_priv->id.device)
553                         node = node->rb_left;
554                 else if (device > cm_id_priv->id.device)
555                         node = node->rb_right;
556                 else if (be64_lt(service_id, cm_id_priv->id.service_id))
557                         node = node->rb_left;
558                 else if (be64_gt(service_id, cm_id_priv->id.service_id))
559                         node = node->rb_right;
560                 else if (data_cmp < 0)
561                         node = node->rb_left;
562                 else
563                         node = node->rb_right;
564         }
565         return NULL;
566 }
567
568 static struct cm_timewait_info * cm_insert_remote_id(struct cm_timewait_info
569                                                      *timewait_info)
570 {
571         struct rb_node **link = &cm.remote_id_table.rb_node;
572         struct rb_node *parent = NULL;
573         struct cm_timewait_info *cur_timewait_info;
574         __be64 remote_ca_guid = timewait_info->remote_ca_guid;
575         __be32 remote_id = timewait_info->work.remote_id;
576
577         while (*link) {
578                 parent = *link;
579                 cur_timewait_info = rb_entry(parent, struct cm_timewait_info,
580                                              remote_id_node);
581                 if (be32_lt(remote_id, cur_timewait_info->work.remote_id))
582                         link = &(*link)->rb_left;
583                 else if (be32_gt(remote_id, cur_timewait_info->work.remote_id))
584                         link = &(*link)->rb_right;
585                 else if (be64_lt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
586                         link = &(*link)->rb_left;
587                 else if (be64_gt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
588                         link = &(*link)->rb_right;
589                 else
590                         return cur_timewait_info;
591         }
592         timewait_info->inserted_remote_id = 1;
593         rb_link_node(&timewait_info->remote_id_node, parent, link);
594         rb_insert_color(&timewait_info->remote_id_node, &cm.remote_id_table);
595         return NULL;
596 }
597
598 static struct cm_timewait_info * cm_find_remote_id(__be64 remote_ca_guid,
599                                                    __be32 remote_id)
600 {
601         struct rb_node *node = cm.remote_id_table.rb_node;
602         struct cm_timewait_info *timewait_info;
603
604         while (node) {
605                 timewait_info = rb_entry(node, struct cm_timewait_info,
606                                          remote_id_node);
607                 if (be32_lt(remote_id, timewait_info->work.remote_id))
608                         node = node->rb_left;
609                 else if (be32_gt(remote_id, timewait_info->work.remote_id))
610                         node = node->rb_right;
611                 else if (be64_lt(remote_ca_guid, timewait_info->remote_ca_guid))
612                         node = node->rb_left;
613                 else if (be64_gt(remote_ca_guid, timewait_info->remote_ca_guid))
614                         node = node->rb_right;
615                 else
616                         return timewait_info;
617         }
618         return NULL;
619 }
620
621 static struct cm_timewait_info * cm_insert_remote_qpn(struct cm_timewait_info
622                                                       *timewait_info)
623 {
624         struct rb_node **link = &cm.remote_qp_table.rb_node;
625         struct rb_node *parent = NULL;
626         struct cm_timewait_info *cur_timewait_info;
627         __be64 remote_ca_guid = timewait_info->remote_ca_guid;
628         __be32 remote_qpn = timewait_info->remote_qpn;
629
630         while (*link) {
631                 parent = *link;
632                 cur_timewait_info = rb_entry(parent, struct cm_timewait_info,
633                                              remote_qp_node);
634                 if (be32_lt(remote_qpn, cur_timewait_info->remote_qpn))
635                         link = &(*link)->rb_left;
636                 else if (be32_gt(remote_qpn, cur_timewait_info->remote_qpn))
637                         link = &(*link)->rb_right;
638                 else if (be64_lt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
639                         link = &(*link)->rb_left;
640                 else if (be64_gt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
641                         link = &(*link)->rb_right;
642                 else
643                         return cur_timewait_info;
644         }
645         timewait_info->inserted_remote_qp = 1;
646         rb_link_node(&timewait_info->remote_qp_node, parent, link);
647         rb_insert_color(&timewait_info->remote_qp_node, &cm.remote_qp_table);
648         return NULL;
649 }
650
651 static struct cm_id_private * cm_insert_remote_sidr(struct cm_id_private
652                                                     *cm_id_priv)
653 {
654         struct rb_node **link = &cm.remote_sidr_table.rb_node;
655         struct rb_node *parent = NULL;
656         struct cm_id_private *cur_cm_id_priv;
657         union ib_gid *port_gid = &cm_id_priv->av.dgid;
658         __be32 remote_id = cm_id_priv->id.remote_id;
659
660         while (*link) {
661                 parent = *link;
662                 cur_cm_id_priv = rb_entry(parent, struct cm_id_private,
663                                           sidr_id_node);
664                 if (be32_lt(remote_id, cur_cm_id_priv->id.remote_id))
665                         link = &(*link)->rb_left;
666                 else if (be32_gt(remote_id, cur_cm_id_priv->id.remote_id))
667                         link = &(*link)->rb_right;
668                 else {
669                         int cmp;
670                         cmp = memcmp(port_gid, &cur_cm_id_priv->av.dgid,
671                                      sizeof *port_gid);
672                         if (cmp < 0)
673                                 link = &(*link)->rb_left;
674                         else if (cmp > 0)
675                                 link = &(*link)->rb_right;
676                         else
677                                 return cur_cm_id_priv;
678                 }
679         }
680         rb_link_node(&cm_id_priv->sidr_id_node, parent, link);
681         rb_insert_color(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
682         return NULL;
683 }
684
685 static void cm_reject_sidr_req(struct cm_id_private *cm_id_priv,
686                                enum ib_cm_sidr_status status)
687 {
688         struct ib_cm_sidr_rep_param param;
689
690         memset(&param, 0, sizeof param);
691         param.status = status;
692         ib_send_cm_sidr_rep(&cm_id_priv->id, &param);
693 }
694
695 struct ib_cm_id *ib_create_cm_id(struct ib_device *device,
696                                  ib_cm_handler cm_handler,
697                                  void *context)
698 {
699         struct cm_id_private *cm_id_priv;
700         int ret;
701
702         cm_id_priv = kzalloc(sizeof *cm_id_priv, GFP_KERNEL);
703         if (!cm_id_priv)
704                 return ERR_PTR(-ENOMEM);
705
706         cm_id_priv->id.state = IB_CM_IDLE;
707         cm_id_priv->id.device = device;
708         cm_id_priv->id.cm_handler = cm_handler;
709         cm_id_priv->id.context = context;
710         cm_id_priv->id.remote_cm_qpn = 1;
711         ret = cm_alloc_id(cm_id_priv);
712         if (ret)
713                 goto error;
714
715         spin_lock_init(&cm_id_priv->lock);
716         init_completion(&cm_id_priv->comp);
717         INIT_LIST_HEAD(&cm_id_priv->work_list);
718         atomic_set(&cm_id_priv->work_count, -1);
719         atomic_set(&cm_id_priv->refcount, 1);
720         return &cm_id_priv->id;
721
722 error:
723         kfree(cm_id_priv);
724         return ERR_PTR(-ENOMEM);
725 }
726 EXPORT_SYMBOL(ib_create_cm_id);
727
728 static struct cm_work * cm_dequeue_work(struct cm_id_private *cm_id_priv)
729 {
730         struct cm_work *work;
731
732         if (list_empty(&cm_id_priv->work_list))
733                 return NULL;
734
735         work = list_entry(cm_id_priv->work_list.next, struct cm_work, list);
736         list_del(&work->list);
737         return work;
738 }
739
740 static void cm_free_work(struct cm_work *work)
741 {
742         if (work->mad_recv_wc)
743                 ib_free_recv_mad(work->mad_recv_wc);
744         kfree(work);
745 }
746
747 static inline int cm_convert_to_ms(int iba_time)
748 {
749         /* approximate conversion to ms from 4.096us x 2^iba_time */
750         return 1 << max(iba_time - 8, 0);
751 }
752
753 /*
754  * calculate: 4.096x2^ack_timeout = 4.096x2^ack_delay + 2x4.096x2^life_time
755  * Because of how ack_timeout is stored, adding one doubles the timeout.
756  * To avoid large timeouts, select the max(ack_delay, life_time + 1), and
757  * increment it (round up) only if the other is within 50%.
758  */
759 static u8 cm_ack_timeout(u8 ca_ack_delay, u8 packet_life_time)
760 {
761         int ack_timeout = packet_life_time + 1;
762
763         if (ack_timeout >= ca_ack_delay)
764                 ack_timeout += (ca_ack_delay >= (ack_timeout - 1));
765         else
766                 ack_timeout = ca_ack_delay +
767                               (ack_timeout >= (ca_ack_delay - 1));
768
769         return min(31, ack_timeout);
770 }
771
772 static void cm_cleanup_timewait(struct cm_timewait_info *timewait_info)
773 {
774         if (timewait_info->inserted_remote_id) {
775                 rb_erase(&timewait_info->remote_id_node, &cm.remote_id_table);
776                 timewait_info->inserted_remote_id = 0;
777         }
778
779         if (timewait_info->inserted_remote_qp) {
780                 rb_erase(&timewait_info->remote_qp_node, &cm.remote_qp_table);
781                 timewait_info->inserted_remote_qp = 0;
782         }
783 }
784
785 static struct cm_timewait_info * cm_create_timewait_info(__be32 local_id)
786 {
787         struct cm_timewait_info *timewait_info;
788
789         timewait_info = kzalloc(sizeof *timewait_info, GFP_KERNEL);
790         if (!timewait_info)
791                 return ERR_PTR(-ENOMEM);
792
793         timewait_info->work.local_id = local_id;
794         INIT_DELAYED_WORK(&timewait_info->work.work, cm_work_handler);
795         timewait_info->work.cm_event.event = IB_CM_TIMEWAIT_EXIT;
796         return timewait_info;
797 }
798
799 static void cm_enter_timewait(struct cm_id_private *cm_id_priv)
800 {
801         int wait_time;
802         unsigned long flags;
803
804         spin_lock_irqsave(&cm.lock, flags);
805         cm_cleanup_timewait(cm_id_priv->timewait_info);
806         list_add_tail(&cm_id_priv->timewait_info->list, &cm.timewait_list);
807         spin_unlock_irqrestore(&cm.lock, flags);
808
809         /*
810          * The cm_id could be destroyed by the user before we exit timewait.
811          * To protect against this, we search for the cm_id after exiting
812          * timewait before notifying the user that we've exited timewait.
813          */
814         cm_id_priv->id.state = IB_CM_TIMEWAIT;
815         wait_time = cm_convert_to_ms(cm_id_priv->av.timeout);
816         queue_delayed_work(cm.wq, &cm_id_priv->timewait_info->work.work,
817                            msecs_to_jiffies(wait_time));
818         cm_id_priv->timewait_info = NULL;
819 }
820
821 static void cm_reset_to_idle(struct cm_id_private *cm_id_priv)
822 {
823         unsigned long flags;
824
825         cm_id_priv->id.state = IB_CM_IDLE;
826         if (cm_id_priv->timewait_info) {
827                 spin_lock_irqsave(&cm.lock, flags);
828                 cm_cleanup_timewait(cm_id_priv->timewait_info);
829                 spin_unlock_irqrestore(&cm.lock, flags);
830                 kfree(cm_id_priv->timewait_info);
831                 cm_id_priv->timewait_info = NULL;
832         }
833 }
834
835 static void cm_destroy_id(struct ib_cm_id *cm_id, int err)
836 {
837         struct cm_id_private *cm_id_priv;
838         struct cm_work *work;
839
840         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
841 retest:
842         spin_lock_irq(&cm_id_priv->lock);
843         switch (cm_id->state) {
844         case IB_CM_LISTEN:
845                 cm_id->state = IB_CM_IDLE;
846                 spin_unlock_irq(&cm_id_priv->lock);
847                 spin_lock_irq(&cm.lock);
848                 rb_erase(&cm_id_priv->service_node, &cm.listen_service_table);
849                 spin_unlock_irq(&cm.lock);
850                 break;
851         case IB_CM_SIDR_REQ_SENT:
852                 cm_id->state = IB_CM_IDLE;
853                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
854                 spin_unlock_irq(&cm_id_priv->lock);
855                 break;
856         case IB_CM_SIDR_REQ_RCVD:
857                 spin_unlock_irq(&cm_id_priv->lock);
858                 cm_reject_sidr_req(cm_id_priv, IB_SIDR_REJECT);
859                 spin_lock_irq(&cm.lock);
860                 if (!RB_EMPTY_NODE(&cm_id_priv->sidr_id_node))
861                         rb_erase(&cm_id_priv->sidr_id_node,
862                                  &cm.remote_sidr_table);
863                 spin_unlock_irq(&cm.lock);
864                 break;
865         case IB_CM_REQ_SENT:
866                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
867                 spin_unlock_irq(&cm_id_priv->lock);
868                 ib_send_cm_rej(cm_id, IB_CM_REJ_TIMEOUT,
869                                &cm_id_priv->id.device->node_guid,
870                                sizeof cm_id_priv->id.device->node_guid,
871                                NULL, 0);
872                 break;
873         case IB_CM_REQ_RCVD:
874                 if (err == -ENOMEM) {
875                         /* Do not reject to allow future retries. */
876                         cm_reset_to_idle(cm_id_priv);
877                         spin_unlock_irq(&cm_id_priv->lock);
878                 } else {
879                         spin_unlock_irq(&cm_id_priv->lock);
880                         ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED,
881                                        NULL, 0, NULL, 0);
882                 }
883                 break;
884         case IB_CM_MRA_REQ_RCVD:
885         case IB_CM_REP_SENT:
886         case IB_CM_MRA_REP_RCVD:
887                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
888                 /* Fall through */
889         case IB_CM_MRA_REQ_SENT:
890         case IB_CM_REP_RCVD:
891         case IB_CM_MRA_REP_SENT:
892                 spin_unlock_irq(&cm_id_priv->lock);
893                 ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED,
894                                NULL, 0, NULL, 0);
895                 break;
896         case IB_CM_ESTABLISHED:
897                 spin_unlock_irq(&cm_id_priv->lock);
898                 if (cm_id_priv->qp_type == IB_QPT_XRC_TGT)
899                         break;
900                 ib_send_cm_dreq(cm_id, NULL, 0);
901                 goto retest;
902         case IB_CM_DREQ_SENT:
903                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
904                 cm_enter_timewait(cm_id_priv);
905                 spin_unlock_irq(&cm_id_priv->lock);
906                 break;
907         case IB_CM_DREQ_RCVD:
908                 spin_unlock_irq(&cm_id_priv->lock);
909                 ib_send_cm_drep(cm_id, NULL, 0);
910                 break;
911         default:
912                 spin_unlock_irq(&cm_id_priv->lock);
913                 break;
914         }
915
916         cm_free_id(cm_id->local_id);
917         cm_deref_id(cm_id_priv);
918         wait_for_completion(&cm_id_priv->comp);
919         while ((work = cm_dequeue_work(cm_id_priv)) != NULL)
920                 cm_free_work(work);
921         kfree(cm_id_priv->compare_data);
922         kfree(cm_id_priv->private_data);
923         kfree(cm_id_priv);
924 }
925
926 void ib_destroy_cm_id(struct ib_cm_id *cm_id)
927 {
928         cm_destroy_id(cm_id, 0);
929 }
930 EXPORT_SYMBOL(ib_destroy_cm_id);
931
932 int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask,
933                  struct ib_cm_compare_data *compare_data)
934 {
935         struct cm_id_private *cm_id_priv, *cur_cm_id_priv;
936         unsigned long flags;
937         int ret = 0;
938
939         service_mask = service_mask ? service_mask : ~cpu_to_be64(0);
940         service_id &= service_mask;
941         if ((service_id & IB_SERVICE_ID_AGN_MASK) == IB_CM_ASSIGN_SERVICE_ID &&
942             (service_id != IB_CM_ASSIGN_SERVICE_ID))
943                 return -EINVAL;
944
945         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
946         if (cm_id->state != IB_CM_IDLE)
947                 return -EINVAL;
948
949         if (compare_data) {
950                 cm_id_priv->compare_data = kzalloc(sizeof *compare_data,
951                                                    GFP_KERNEL);
952                 if (!cm_id_priv->compare_data)
953                         return -ENOMEM;
954                 cm_mask_copy(cm_id_priv->compare_data->data,
955                              compare_data->data, compare_data->mask);
956                 memcpy(cm_id_priv->compare_data->mask, compare_data->mask,
957                        IB_CM_COMPARE_SIZE);
958         }
959
960         cm_id->state = IB_CM_LISTEN;
961
962         spin_lock_irqsave(&cm.lock, flags);
963         if (service_id == IB_CM_ASSIGN_SERVICE_ID) {
964                 cm_id->service_id = cpu_to_be64(cm.listen_service_id++);
965                 cm_id->service_mask = ~cpu_to_be64(0);
966         } else {
967                 cm_id->service_id = service_id;
968                 cm_id->service_mask = service_mask;
969         }
970         cur_cm_id_priv = cm_insert_listen(cm_id_priv);
971         spin_unlock_irqrestore(&cm.lock, flags);
972
973         if (cur_cm_id_priv) {
974                 cm_id->state = IB_CM_IDLE;
975                 kfree(cm_id_priv->compare_data);
976                 cm_id_priv->compare_data = NULL;
977                 ret = -EBUSY;
978         }
979         return ret;
980 }
981 EXPORT_SYMBOL(ib_cm_listen);
982
983 static __be64 cm_form_tid(struct cm_id_private *cm_id_priv,
984                           enum cm_msg_sequence msg_seq)
985 {
986         u64 hi_tid, low_tid;
987
988         hi_tid   = ((u64) cm_id_priv->av.port->mad_agent->hi_tid) << 32;
989         low_tid  = (u64) ((__force u32)cm_id_priv->id.local_id |
990                           (msg_seq << 30));
991         return cpu_to_be64(hi_tid | low_tid);
992 }
993
994 static void cm_format_mad_hdr(struct ib_mad_hdr *hdr,
995                               __be16 attr_id, __be64 tid)
996 {
997         hdr->base_version  = IB_MGMT_BASE_VERSION;
998         hdr->mgmt_class    = IB_MGMT_CLASS_CM;
999         hdr->class_version = IB_CM_CLASS_VERSION;
1000         hdr->method        = IB_MGMT_METHOD_SEND;
1001         hdr->attr_id       = attr_id;
1002         hdr->tid           = tid;
1003 }
1004
1005 static void cm_format_req(struct cm_req_msg *req_msg,
1006                           struct cm_id_private *cm_id_priv,
1007                           struct ib_cm_req_param *param)
1008 {
1009         struct ib_sa_path_rec *pri_path = param->primary_path;
1010         struct ib_sa_path_rec *alt_path = param->alternate_path;
1011
1012         cm_format_mad_hdr(&req_msg->hdr, CM_REQ_ATTR_ID,
1013                           cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_REQ));
1014
1015         req_msg->local_comm_id = cm_id_priv->id.local_id;
1016         req_msg->service_id = param->service_id;
1017         req_msg->local_ca_guid = cm_id_priv->id.device->node_guid;
1018         cm_req_set_local_qpn(req_msg, cpu_to_be32(param->qp_num));
1019         cm_req_set_init_depth(req_msg, param->initiator_depth);
1020         cm_req_set_remote_resp_timeout(req_msg,
1021                                        param->remote_cm_response_timeout);
1022         cm_req_set_qp_type(req_msg, param->qp_type);
1023         cm_req_set_flow_ctrl(req_msg, param->flow_control);
1024         cm_req_set_starting_psn(req_msg, cpu_to_be32(param->starting_psn));
1025         cm_req_set_local_resp_timeout(req_msg,
1026                                       param->local_cm_response_timeout);
1027         req_msg->pkey = param->primary_path->pkey;
1028         cm_req_set_path_mtu(req_msg, param->primary_path->mtu);
1029         cm_req_set_max_cm_retries(req_msg, param->max_cm_retries);
1030
1031         if (param->qp_type != IB_QPT_XRC_INI) {
1032                 cm_req_set_resp_res(req_msg, param->responder_resources);
1033                 cm_req_set_retry_count(req_msg, param->retry_count);
1034                 cm_req_set_rnr_retry_count(req_msg, param->rnr_retry_count);
1035                 cm_req_set_srq(req_msg, param->srq);
1036         }
1037
1038         if (pri_path->hop_limit <= 1) {
1039                 req_msg->primary_local_lid = pri_path->slid;
1040                 req_msg->primary_remote_lid = pri_path->dlid;
1041         } else {
1042                 /* Work-around until there's a way to obtain remote LID info */
1043                 req_msg->primary_local_lid = IB_LID_PERMISSIVE;
1044                 req_msg->primary_remote_lid = IB_LID_PERMISSIVE;
1045         }
1046         req_msg->primary_local_gid = pri_path->sgid;
1047         req_msg->primary_remote_gid = pri_path->dgid;
1048         cm_req_set_primary_flow_label(req_msg, pri_path->flow_label);
1049         cm_req_set_primary_packet_rate(req_msg, pri_path->rate);
1050         req_msg->primary_traffic_class = pri_path->traffic_class;
1051         req_msg->primary_hop_limit = pri_path->hop_limit;
1052         cm_req_set_primary_sl(req_msg, pri_path->sl);
1053         cm_req_set_primary_subnet_local(req_msg, (pri_path->hop_limit <= 1));
1054         cm_req_set_primary_local_ack_timeout(req_msg,
1055                 cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
1056                                pri_path->packet_life_time));
1057
1058         if (alt_path) {
1059                 if (alt_path->hop_limit <= 1) {
1060                         req_msg->alt_local_lid = alt_path->slid;
1061                         req_msg->alt_remote_lid = alt_path->dlid;
1062                 } else {
1063                         req_msg->alt_local_lid = IB_LID_PERMISSIVE;
1064                         req_msg->alt_remote_lid = IB_LID_PERMISSIVE;
1065                 }
1066                 req_msg->alt_local_gid = alt_path->sgid;
1067                 req_msg->alt_remote_gid = alt_path->dgid;
1068                 cm_req_set_alt_flow_label(req_msg,
1069                                           alt_path->flow_label);
1070                 cm_req_set_alt_packet_rate(req_msg, alt_path->rate);
1071                 req_msg->alt_traffic_class = alt_path->traffic_class;
1072                 req_msg->alt_hop_limit = alt_path->hop_limit;
1073                 cm_req_set_alt_sl(req_msg, alt_path->sl);
1074                 cm_req_set_alt_subnet_local(req_msg, (alt_path->hop_limit <= 1));
1075                 cm_req_set_alt_local_ack_timeout(req_msg,
1076                         cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
1077                                        alt_path->packet_life_time));
1078         }
1079
1080         if (param->private_data && param->private_data_len)
1081                 memcpy(req_msg->private_data, param->private_data,
1082                        param->private_data_len);
1083 }
1084
1085 static int cm_validate_req_param(struct ib_cm_req_param *param)
1086 {
1087         /* peer-to-peer not supported */
1088         if (param->peer_to_peer)
1089                 return -EINVAL;
1090
1091         if (!param->primary_path)
1092                 return -EINVAL;
1093
1094         if (param->qp_type != IB_QPT_RC && param->qp_type != IB_QPT_UC &&
1095             param->qp_type != IB_QPT_XRC_INI)
1096                 return -EINVAL;
1097
1098         if (param->private_data &&
1099             param->private_data_len > IB_CM_REQ_PRIVATE_DATA_SIZE)
1100                 return -EINVAL;
1101
1102         if (param->alternate_path &&
1103             (param->alternate_path->pkey != param->primary_path->pkey ||
1104              param->alternate_path->mtu != param->primary_path->mtu))
1105                 return -EINVAL;
1106
1107         return 0;
1108 }
1109
1110 int ib_send_cm_req(struct ib_cm_id *cm_id,
1111                    struct ib_cm_req_param *param)
1112 {
1113         struct cm_id_private *cm_id_priv;
1114         struct cm_req_msg *req_msg;
1115         unsigned long flags;
1116         int ret;
1117
1118         ret = cm_validate_req_param(param);
1119         if (ret)
1120                 return ret;
1121
1122         /* Verify that we're not in timewait. */
1123         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1124         spin_lock_irqsave(&cm_id_priv->lock, flags);
1125         if (cm_id->state != IB_CM_IDLE) {
1126                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1127                 ret = -EINVAL;
1128                 goto out;
1129         }
1130         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1131
1132         cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv->
1133                                                             id.local_id);
1134         if (IS_ERR(cm_id_priv->timewait_info)) {
1135                 ret = PTR_ERR(cm_id_priv->timewait_info);
1136                 goto out;
1137         }
1138
1139         ret = cm_init_av_by_path(param->primary_path, &cm_id_priv->av);
1140         if (ret)
1141                 goto error1;
1142         if (param->alternate_path) {
1143                 ret = cm_init_av_by_path(param->alternate_path,
1144                                          &cm_id_priv->alt_av);
1145                 if (ret)
1146                         goto error1;
1147         }
1148         cm_id->service_id = param->service_id;
1149         cm_id->service_mask = ~cpu_to_be64(0);
1150         cm_id_priv->timeout_ms = cm_convert_to_ms(
1151                                     param->primary_path->packet_life_time) * 2 +
1152                                  cm_convert_to_ms(
1153                                     param->remote_cm_response_timeout);
1154         cm_id_priv->max_cm_retries = param->max_cm_retries;
1155         cm_id_priv->initiator_depth = param->initiator_depth;
1156         cm_id_priv->responder_resources = param->responder_resources;
1157         cm_id_priv->retry_count = param->retry_count;
1158         cm_id_priv->path_mtu = param->primary_path->mtu;
1159         cm_id_priv->pkey = param->primary_path->pkey;
1160         cm_id_priv->qp_type = param->qp_type;
1161
1162         ret = cm_alloc_msg(cm_id_priv, &cm_id_priv->msg);
1163         if (ret)
1164                 goto error1;
1165
1166         req_msg = (struct cm_req_msg *) cm_id_priv->msg->mad;
1167         cm_format_req(req_msg, cm_id_priv, param);
1168         cm_id_priv->tid = req_msg->hdr.tid;
1169         cm_id_priv->msg->timeout_ms = cm_id_priv->timeout_ms;
1170         cm_id_priv->msg->context[1] = (void *) (unsigned long) IB_CM_REQ_SENT;
1171
1172         cm_id_priv->local_qpn = cm_req_get_local_qpn(req_msg);
1173         cm_id_priv->rq_psn = cm_req_get_starting_psn(req_msg);
1174
1175         spin_lock_irqsave(&cm_id_priv->lock, flags);
1176         ret = ib_post_send_mad(cm_id_priv->msg, NULL);
1177         if (ret) {
1178                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1179                 goto error2;
1180         }
1181         BUG_ON(cm_id->state != IB_CM_IDLE);
1182         cm_id->state = IB_CM_REQ_SENT;
1183         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1184         return 0;
1185
1186 error2: cm_free_msg(cm_id_priv->msg);
1187 error1: kfree(cm_id_priv->timewait_info);
1188 out:    return ret;
1189 }
1190 EXPORT_SYMBOL(ib_send_cm_req);
1191
1192 static int cm_issue_rej(struct cm_port *port,
1193                         struct ib_mad_recv_wc *mad_recv_wc,
1194                         enum ib_cm_rej_reason reason,
1195                         enum cm_msg_response msg_rejected,
1196                         void *ari, u8 ari_length)
1197 {
1198         struct ib_mad_send_buf *msg = NULL;
1199         struct cm_rej_msg *rej_msg, *rcv_msg;
1200         int ret;
1201
1202         ret = cm_alloc_response_msg(port, mad_recv_wc, &msg);
1203         if (ret)
1204                 return ret;
1205
1206         /* We just need common CM header information.  Cast to any message. */
1207         rcv_msg = (struct cm_rej_msg *) mad_recv_wc->recv_buf.mad;
1208         rej_msg = (struct cm_rej_msg *) msg->mad;
1209
1210         cm_format_mad_hdr(&rej_msg->hdr, CM_REJ_ATTR_ID, rcv_msg->hdr.tid);
1211         rej_msg->remote_comm_id = rcv_msg->local_comm_id;
1212         rej_msg->local_comm_id = rcv_msg->remote_comm_id;
1213         cm_rej_set_msg_rejected(rej_msg, msg_rejected);
1214         rej_msg->reason = cpu_to_be16(reason);
1215
1216         if (ari && ari_length) {
1217                 cm_rej_set_reject_info_len(rej_msg, ari_length);
1218                 memcpy(rej_msg->ari, ari, ari_length);
1219         }
1220
1221         ret = ib_post_send_mad(msg, NULL);
1222         if (ret)
1223                 cm_free_msg(msg);
1224
1225         return ret;
1226 }
1227
1228 static inline int cm_is_active_peer(__be64 local_ca_guid, __be64 remote_ca_guid,
1229                                     __be32 local_qpn, __be32 remote_qpn)
1230 {
1231         return (be64_to_cpu(local_ca_guid) > be64_to_cpu(remote_ca_guid) ||
1232                 ((local_ca_guid == remote_ca_guid) &&
1233                  (be32_to_cpu(local_qpn) > be32_to_cpu(remote_qpn))));
1234 }
1235
1236 static void cm_format_paths_from_req(struct cm_req_msg *req_msg,
1237                                             struct ib_sa_path_rec *primary_path,
1238                                             struct ib_sa_path_rec *alt_path)
1239 {
1240         memset(primary_path, 0, sizeof *primary_path);
1241         primary_path->dgid = req_msg->primary_local_gid;
1242         primary_path->sgid = req_msg->primary_remote_gid;
1243         primary_path->dlid = req_msg->primary_local_lid;
1244         primary_path->slid = req_msg->primary_remote_lid;
1245         primary_path->flow_label = cm_req_get_primary_flow_label(req_msg);
1246         primary_path->hop_limit = req_msg->primary_hop_limit;
1247         primary_path->traffic_class = req_msg->primary_traffic_class;
1248         primary_path->reversible = 1;
1249         primary_path->pkey = req_msg->pkey;
1250         primary_path->sl = cm_req_get_primary_sl(req_msg);
1251         primary_path->mtu_selector = IB_SA_EQ;
1252         primary_path->mtu = cm_req_get_path_mtu(req_msg);
1253         primary_path->rate_selector = IB_SA_EQ;
1254         primary_path->rate = cm_req_get_primary_packet_rate(req_msg);
1255         primary_path->packet_life_time_selector = IB_SA_EQ;
1256         primary_path->packet_life_time =
1257                 cm_req_get_primary_local_ack_timeout(req_msg);
1258         primary_path->packet_life_time -= (primary_path->packet_life_time > 0);
1259
1260         if (req_msg->alt_local_lid) {
1261                 memset(alt_path, 0, sizeof *alt_path);
1262                 alt_path->dgid = req_msg->alt_local_gid;
1263                 alt_path->sgid = req_msg->alt_remote_gid;
1264                 alt_path->dlid = req_msg->alt_local_lid;
1265                 alt_path->slid = req_msg->alt_remote_lid;
1266                 alt_path->flow_label = cm_req_get_alt_flow_label(req_msg);
1267                 alt_path->hop_limit = req_msg->alt_hop_limit;
1268                 alt_path->traffic_class = req_msg->alt_traffic_class;
1269                 alt_path->reversible = 1;
1270                 alt_path->pkey = req_msg->pkey;
1271                 alt_path->sl = cm_req_get_alt_sl(req_msg);
1272                 alt_path->mtu_selector = IB_SA_EQ;
1273                 alt_path->mtu = cm_req_get_path_mtu(req_msg);
1274                 alt_path->rate_selector = IB_SA_EQ;
1275                 alt_path->rate = cm_req_get_alt_packet_rate(req_msg);
1276                 alt_path->packet_life_time_selector = IB_SA_EQ;
1277                 alt_path->packet_life_time =
1278                         cm_req_get_alt_local_ack_timeout(req_msg);
1279                 alt_path->packet_life_time -= (alt_path->packet_life_time > 0);
1280         }
1281 }
1282
1283 static void cm_format_req_event(struct cm_work *work,
1284                                 struct cm_id_private *cm_id_priv,
1285                                 struct ib_cm_id *listen_id)
1286 {
1287         struct cm_req_msg *req_msg;
1288         struct ib_cm_req_event_param *param;
1289
1290         req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
1291         param = &work->cm_event.param.req_rcvd;
1292         param->listen_id = listen_id;
1293         param->port = cm_id_priv->av.port->port_num;
1294         param->primary_path = &work->path[0];
1295         if (req_msg->alt_local_lid)
1296                 param->alternate_path = &work->path[1];
1297         else
1298                 param->alternate_path = NULL;
1299         param->remote_ca_guid = req_msg->local_ca_guid;
1300         param->remote_qkey = be32_to_cpu(req_msg->local_qkey);
1301         param->remote_qpn = be32_to_cpu(cm_req_get_local_qpn(req_msg));
1302         param->qp_type = cm_req_get_qp_type(req_msg);
1303         param->starting_psn = be32_to_cpu(cm_req_get_starting_psn(req_msg));
1304         param->responder_resources = cm_req_get_init_depth(req_msg);
1305         param->initiator_depth = cm_req_get_resp_res(req_msg);
1306         param->local_cm_response_timeout =
1307                                         cm_req_get_remote_resp_timeout(req_msg);
1308         param->flow_control = cm_req_get_flow_ctrl(req_msg);
1309         param->remote_cm_response_timeout =
1310                                         cm_req_get_local_resp_timeout(req_msg);
1311         param->retry_count = cm_req_get_retry_count(req_msg);
1312         param->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg);
1313         param->srq = cm_req_get_srq(req_msg);
1314         work->cm_event.private_data = &req_msg->private_data;
1315 }
1316
1317 static void cm_process_work(struct cm_id_private *cm_id_priv,
1318                             struct cm_work *work)
1319 {
1320         int ret;
1321
1322         /* We will typically only have the current event to report. */
1323         ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &work->cm_event);
1324         cm_free_work(work);
1325
1326         while (!ret && !atomic_add_negative(-1, &cm_id_priv->work_count)) {
1327                 spin_lock_irq(&cm_id_priv->lock);
1328                 work = cm_dequeue_work(cm_id_priv);
1329                 spin_unlock_irq(&cm_id_priv->lock);
1330                 BUG_ON(!work);
1331                 ret = cm_id_priv->id.cm_handler(&cm_id_priv->id,
1332                                                 &work->cm_event);
1333                 cm_free_work(work);
1334         }
1335         cm_deref_id(cm_id_priv);
1336         if (ret)
1337                 cm_destroy_id(&cm_id_priv->id, ret);
1338 }
1339
1340 static void cm_format_mra(struct cm_mra_msg *mra_msg,
1341                           struct cm_id_private *cm_id_priv,
1342                           enum cm_msg_response msg_mraed, u8 service_timeout,
1343                           const void *private_data, u8 private_data_len)
1344 {
1345         cm_format_mad_hdr(&mra_msg->hdr, CM_MRA_ATTR_ID, cm_id_priv->tid);
1346         cm_mra_set_msg_mraed(mra_msg, msg_mraed);
1347         mra_msg->local_comm_id = cm_id_priv->id.local_id;
1348         mra_msg->remote_comm_id = cm_id_priv->id.remote_id;
1349         cm_mra_set_service_timeout(mra_msg, service_timeout);
1350
1351         if (private_data && private_data_len)
1352                 memcpy(mra_msg->private_data, private_data, private_data_len);
1353 }
1354
1355 static void cm_format_rej(struct cm_rej_msg *rej_msg,
1356                           struct cm_id_private *cm_id_priv,
1357                           enum ib_cm_rej_reason reason,
1358                           void *ari,
1359                           u8 ari_length,
1360                           const void *private_data,
1361                           u8 private_data_len)
1362 {
1363         cm_format_mad_hdr(&rej_msg->hdr, CM_REJ_ATTR_ID, cm_id_priv->tid);
1364         rej_msg->remote_comm_id = cm_id_priv->id.remote_id;
1365
1366         switch(cm_id_priv->id.state) {
1367         case IB_CM_REQ_RCVD:
1368                 rej_msg->local_comm_id = 0;
1369                 cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REQ);
1370                 break;
1371         case IB_CM_MRA_REQ_SENT:
1372                 rej_msg->local_comm_id = cm_id_priv->id.local_id;
1373                 cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REQ);
1374                 break;
1375         case IB_CM_REP_RCVD:
1376         case IB_CM_MRA_REP_SENT:
1377                 rej_msg->local_comm_id = cm_id_priv->id.local_id;
1378                 cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REP);
1379                 break;
1380         default:
1381                 rej_msg->local_comm_id = cm_id_priv->id.local_id;
1382                 cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_OTHER);
1383                 break;
1384         }
1385
1386         rej_msg->reason = cpu_to_be16(reason);
1387         if (ari && ari_length) {
1388                 cm_rej_set_reject_info_len(rej_msg, ari_length);
1389                 memcpy(rej_msg->ari, ari, ari_length);
1390         }
1391
1392         if (private_data && private_data_len)
1393                 memcpy(rej_msg->private_data, private_data, private_data_len);
1394 }
1395
1396 static void cm_dup_req_handler(struct cm_work *work,
1397                                struct cm_id_private *cm_id_priv)
1398 {
1399         struct ib_mad_send_buf *msg = NULL;
1400         int ret;
1401
1402         atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
1403                         counter[CM_REQ_COUNTER]);
1404
1405         /* Quick state check to discard duplicate REQs. */
1406         if (cm_id_priv->id.state == IB_CM_REQ_RCVD)
1407                 return;
1408
1409         ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg);
1410         if (ret)
1411                 return;
1412
1413         spin_lock_irq(&cm_id_priv->lock);
1414         switch (cm_id_priv->id.state) {
1415         case IB_CM_MRA_REQ_SENT:
1416                 cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
1417                               CM_MSG_RESPONSE_REQ, cm_id_priv->service_timeout,
1418                               cm_id_priv->private_data,
1419                               cm_id_priv->private_data_len);
1420                 break;
1421         case IB_CM_TIMEWAIT:
1422                 cm_format_rej((struct cm_rej_msg *) msg->mad, cm_id_priv,
1423                               IB_CM_REJ_STALE_CONN, NULL, 0, NULL, 0);
1424                 break;
1425         default:
1426                 goto unlock;
1427         }
1428         spin_unlock_irq(&cm_id_priv->lock);
1429
1430         ret = ib_post_send_mad(msg, NULL);
1431         if (ret)
1432                 goto free;
1433         return;
1434
1435 unlock: spin_unlock_irq(&cm_id_priv->lock);
1436 free:   cm_free_msg(msg);
1437 }
1438
1439 static struct cm_id_private * cm_match_req(struct cm_work *work,
1440                                            struct cm_id_private *cm_id_priv)
1441 {
1442         struct cm_id_private *listen_cm_id_priv, *cur_cm_id_priv;
1443         struct cm_timewait_info *timewait_info;
1444         struct cm_req_msg *req_msg;
1445
1446         req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
1447
1448         /* Check for possible duplicate REQ. */
1449         spin_lock_irq(&cm.lock);
1450         timewait_info = cm_insert_remote_id(cm_id_priv->timewait_info);
1451         if (timewait_info) {
1452                 cur_cm_id_priv = cm_get_id(timewait_info->work.local_id,
1453                                            timewait_info->work.remote_id);
1454                 spin_unlock_irq(&cm.lock);
1455                 if (cur_cm_id_priv) {
1456                         cm_dup_req_handler(work, cur_cm_id_priv);
1457                         cm_deref_id(cur_cm_id_priv);
1458                 }
1459                 return NULL;
1460         }
1461
1462         /* Check for stale connections. */
1463         timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info);
1464         if (timewait_info) {
1465                 cm_cleanup_timewait(cm_id_priv->timewait_info);
1466                 spin_unlock_irq(&cm.lock);
1467                 cm_issue_rej(work->port, work->mad_recv_wc,
1468                              IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REQ,
1469                              NULL, 0);
1470                 return NULL;
1471         }
1472
1473         /* Find matching listen request. */
1474         listen_cm_id_priv = cm_find_listen(cm_id_priv->id.device,
1475                                            req_msg->service_id,
1476                                            req_msg->private_data);
1477         if (!listen_cm_id_priv) {
1478                 cm_cleanup_timewait(cm_id_priv->timewait_info);
1479                 spin_unlock_irq(&cm.lock);
1480                 cm_issue_rej(work->port, work->mad_recv_wc,
1481                              IB_CM_REJ_INVALID_SERVICE_ID, CM_MSG_RESPONSE_REQ,
1482                              NULL, 0);
1483                 goto out;
1484         }
1485         atomic_inc(&listen_cm_id_priv->refcount);
1486         atomic_inc(&cm_id_priv->refcount);
1487         cm_id_priv->id.state = IB_CM_REQ_RCVD;
1488         atomic_inc(&cm_id_priv->work_count);
1489         spin_unlock_irq(&cm.lock);
1490 out:
1491         return listen_cm_id_priv;
1492 }
1493
1494 /*
1495  * Work-around for inter-subnet connections.  If the LIDs are permissive,
1496  * we need to override the LID/SL data in the REQ with the LID information
1497  * in the work completion.
1498  */
1499 static void cm_process_routed_req(struct cm_req_msg *req_msg, struct ib_wc *wc)
1500 {
1501         if (!cm_req_get_primary_subnet_local(req_msg)) {
1502                 if (req_msg->primary_local_lid == IB_LID_PERMISSIVE) {
1503                         req_msg->primary_local_lid = cpu_to_be16(wc->slid);
1504                         cm_req_set_primary_sl(req_msg, wc->sl);
1505                 }
1506
1507                 if (req_msg->primary_remote_lid == IB_LID_PERMISSIVE)
1508                         req_msg->primary_remote_lid = cpu_to_be16(wc->dlid_path_bits);
1509         }
1510
1511         if (!cm_req_get_alt_subnet_local(req_msg)) {
1512                 if (req_msg->alt_local_lid == IB_LID_PERMISSIVE) {
1513                         req_msg->alt_local_lid = cpu_to_be16(wc->slid);
1514                         cm_req_set_alt_sl(req_msg, wc->sl);
1515                 }
1516
1517                 if (req_msg->alt_remote_lid == IB_LID_PERMISSIVE)
1518                         req_msg->alt_remote_lid = cpu_to_be16(wc->dlid_path_bits);
1519         }
1520 }
1521
1522 static int cm_req_handler(struct cm_work *work)
1523 {
1524         struct ib_cm_id *cm_id;
1525         struct cm_id_private *cm_id_priv, *listen_cm_id_priv;
1526         struct cm_req_msg *req_msg;
1527         int ret;
1528
1529         req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
1530
1531         cm_id = ib_create_cm_id(work->port->cm_dev->ib_device, NULL, NULL);
1532         if (IS_ERR(cm_id))
1533                 return PTR_ERR(cm_id);
1534
1535         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1536         cm_id_priv->id.remote_id = req_msg->local_comm_id;
1537         cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
1538                                 work->mad_recv_wc->recv_buf.grh,
1539                                 &cm_id_priv->av);
1540         cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv->
1541                                                             id.local_id);
1542         if (IS_ERR(cm_id_priv->timewait_info)) {
1543                 ret = PTR_ERR(cm_id_priv->timewait_info);
1544                 goto destroy;
1545         }
1546         cm_id_priv->timewait_info->work.remote_id = req_msg->local_comm_id;
1547         cm_id_priv->timewait_info->remote_ca_guid = req_msg->local_ca_guid;
1548         cm_id_priv->timewait_info->remote_qpn = cm_req_get_local_qpn(req_msg);
1549
1550         listen_cm_id_priv = cm_match_req(work, cm_id_priv);
1551         if (!listen_cm_id_priv) {
1552                 ret = -EINVAL;
1553                 kfree(cm_id_priv->timewait_info);
1554                 goto destroy;
1555         }
1556
1557         cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler;
1558         cm_id_priv->id.context = listen_cm_id_priv->id.context;
1559         cm_id_priv->id.service_id = req_msg->service_id;
1560         cm_id_priv->id.service_mask = ~cpu_to_be64(0);
1561
1562         cm_process_routed_req(req_msg, work->mad_recv_wc->wc);
1563         cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]);
1564         ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av);
1565         if (ret) {
1566                 ib_get_cached_gid(work->port->cm_dev->ib_device,
1567                                   work->port->port_num, 0, &work->path[0].sgid);
1568                 ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
1569                                &work->path[0].sgid, sizeof work->path[0].sgid,
1570                                NULL, 0);
1571                 goto rejected;
1572         }
1573         if (req_msg->alt_local_lid) {
1574                 ret = cm_init_av_by_path(&work->path[1], &cm_id_priv->alt_av);
1575                 if (ret) {
1576                         ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_ALT_GID,
1577                                        &work->path[0].sgid,
1578                                        sizeof work->path[0].sgid, NULL, 0);
1579                         goto rejected;
1580                 }
1581         }
1582         cm_id_priv->tid = req_msg->hdr.tid;
1583         cm_id_priv->timeout_ms = cm_convert_to_ms(
1584                                         cm_req_get_local_resp_timeout(req_msg));
1585         cm_id_priv->max_cm_retries = cm_req_get_max_cm_retries(req_msg);
1586         cm_id_priv->remote_qpn = cm_req_get_local_qpn(req_msg);
1587         cm_id_priv->initiator_depth = cm_req_get_resp_res(req_msg);
1588         cm_id_priv->responder_resources = cm_req_get_init_depth(req_msg);
1589         cm_id_priv->path_mtu = cm_req_get_path_mtu(req_msg);
1590         cm_id_priv->pkey = req_msg->pkey;
1591         cm_id_priv->sq_psn = cm_req_get_starting_psn(req_msg);
1592         cm_id_priv->retry_count = cm_req_get_retry_count(req_msg);
1593         cm_id_priv->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg);
1594         cm_id_priv->qp_type = cm_req_get_qp_type(req_msg);
1595
1596         cm_format_req_event(work, cm_id_priv, &listen_cm_id_priv->id);
1597         cm_process_work(cm_id_priv, work);
1598         cm_deref_id(listen_cm_id_priv);
1599         return 0;
1600
1601 rejected:
1602         atomic_dec(&cm_id_priv->refcount);
1603         cm_deref_id(listen_cm_id_priv);
1604 destroy:
1605         ib_destroy_cm_id(cm_id);
1606         return ret;
1607 }
1608
1609 static void cm_format_rep(struct cm_rep_msg *rep_msg,
1610                           struct cm_id_private *cm_id_priv,
1611                           struct ib_cm_rep_param *param)
1612 {
1613         cm_format_mad_hdr(&rep_msg->hdr, CM_REP_ATTR_ID, cm_id_priv->tid);
1614         rep_msg->local_comm_id = cm_id_priv->id.local_id;
1615         rep_msg->remote_comm_id = cm_id_priv->id.remote_id;
1616         cm_rep_set_starting_psn(rep_msg, cpu_to_be32(param->starting_psn));
1617         rep_msg->resp_resources = param->responder_resources;
1618         cm_rep_set_target_ack_delay(rep_msg,
1619                                     cm_id_priv->av.port->cm_dev->ack_delay);
1620         cm_rep_set_failover(rep_msg, param->failover_accepted);
1621         cm_rep_set_rnr_retry_count(rep_msg, param->rnr_retry_count);
1622         rep_msg->local_ca_guid = cm_id_priv->id.device->node_guid;
1623
1624         if (cm_id_priv->qp_type != IB_QPT_XRC_TGT) {
1625                 rep_msg->initiator_depth = param->initiator_depth;
1626                 cm_rep_set_flow_ctrl(rep_msg, param->flow_control);
1627                 cm_rep_set_srq(rep_msg, param->srq);
1628                 cm_rep_set_local_qpn(rep_msg, cpu_to_be32(param->qp_num));
1629         } else {
1630                 cm_rep_set_srq(rep_msg, 1);
1631                 cm_rep_set_local_eecn(rep_msg, cpu_to_be32(param->qp_num));
1632         }
1633
1634         if (param->private_data && param->private_data_len)
1635                 memcpy(rep_msg->private_data, param->private_data,
1636                        param->private_data_len);
1637 }
1638
1639 int ib_send_cm_rep(struct ib_cm_id *cm_id,
1640                    struct ib_cm_rep_param *param)
1641 {
1642         struct cm_id_private *cm_id_priv;
1643         struct ib_mad_send_buf *msg;
1644         struct cm_rep_msg *rep_msg;
1645         unsigned long flags;
1646         int ret;
1647
1648         if (param->private_data &&
1649             param->private_data_len > IB_CM_REP_PRIVATE_DATA_SIZE)
1650                 return -EINVAL;
1651
1652         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1653         spin_lock_irqsave(&cm_id_priv->lock, flags);
1654         if (cm_id->state != IB_CM_REQ_RCVD &&
1655             cm_id->state != IB_CM_MRA_REQ_SENT) {
1656                 ret = -EINVAL;
1657                 goto out;
1658         }
1659
1660         ret = cm_alloc_msg(cm_id_priv, &msg);
1661         if (ret)
1662                 goto out;
1663
1664         rep_msg = (struct cm_rep_msg *) msg->mad;
1665         cm_format_rep(rep_msg, cm_id_priv, param);
1666         msg->timeout_ms = cm_id_priv->timeout_ms;
1667         msg->context[1] = (void *) (unsigned long) IB_CM_REP_SENT;
1668
1669         ret = ib_post_send_mad(msg, NULL);
1670         if (ret) {
1671                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1672                 cm_free_msg(msg);
1673                 return ret;
1674         }
1675
1676         cm_id->state = IB_CM_REP_SENT;
1677         cm_id_priv->msg = msg;
1678         cm_id_priv->initiator_depth = param->initiator_depth;
1679         cm_id_priv->responder_resources = param->responder_resources;
1680         cm_id_priv->rq_psn = cm_rep_get_starting_psn(rep_msg);
1681         cm_id_priv->local_qpn = cpu_to_be32(param->qp_num & 0xFFFFFF);
1682
1683 out:    spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1684         return ret;
1685 }
1686 EXPORT_SYMBOL(ib_send_cm_rep);
1687
1688 static void cm_format_rtu(struct cm_rtu_msg *rtu_msg,
1689                           struct cm_id_private *cm_id_priv,
1690                           const void *private_data,
1691                           u8 private_data_len)
1692 {
1693         cm_format_mad_hdr(&rtu_msg->hdr, CM_RTU_ATTR_ID, cm_id_priv->tid);
1694         rtu_msg->local_comm_id = cm_id_priv->id.local_id;
1695         rtu_msg->remote_comm_id = cm_id_priv->id.remote_id;
1696
1697         if (private_data && private_data_len)
1698                 memcpy(rtu_msg->private_data, private_data, private_data_len);
1699 }
1700
1701 int ib_send_cm_rtu(struct ib_cm_id *cm_id,
1702                    const void *private_data,
1703                    u8 private_data_len)
1704 {
1705         struct cm_id_private *cm_id_priv;
1706         struct ib_mad_send_buf *msg;
1707         unsigned long flags;
1708         void *data;
1709         int ret;
1710
1711         if (private_data && private_data_len > IB_CM_RTU_PRIVATE_DATA_SIZE)
1712                 return -EINVAL;
1713
1714         data = cm_copy_private_data(private_data, private_data_len);
1715         if (IS_ERR(data))
1716                 return PTR_ERR(data);
1717
1718         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1719         spin_lock_irqsave(&cm_id_priv->lock, flags);
1720         if (cm_id->state != IB_CM_REP_RCVD &&
1721             cm_id->state != IB_CM_MRA_REP_SENT) {
1722                 ret = -EINVAL;
1723                 goto error;
1724         }
1725
1726         ret = cm_alloc_msg(cm_id_priv, &msg);
1727         if (ret)
1728                 goto error;
1729
1730         cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv,
1731                       private_data, private_data_len);
1732
1733         ret = ib_post_send_mad(msg, NULL);
1734         if (ret) {
1735                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1736                 cm_free_msg(msg);
1737                 kfree(data);
1738                 return ret;
1739         }
1740
1741         cm_id->state = IB_CM_ESTABLISHED;
1742         cm_set_private_data(cm_id_priv, data, private_data_len);
1743         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1744         return 0;
1745
1746 error:  spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1747         kfree(data);
1748         return ret;
1749 }
1750 EXPORT_SYMBOL(ib_send_cm_rtu);
1751
1752 static void cm_format_rep_event(struct cm_work *work, enum ib_qp_type qp_type)
1753 {
1754         struct cm_rep_msg *rep_msg;
1755         struct ib_cm_rep_event_param *param;
1756
1757         rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad;
1758         param = &work->cm_event.param.rep_rcvd;
1759         param->remote_ca_guid = rep_msg->local_ca_guid;
1760         param->remote_qkey = be32_to_cpu(rep_msg->local_qkey);
1761         param->remote_qpn = be32_to_cpu(cm_rep_get_qpn(rep_msg, qp_type));
1762         param->starting_psn = be32_to_cpu(cm_rep_get_starting_psn(rep_msg));
1763         param->responder_resources = rep_msg->initiator_depth;
1764         param->initiator_depth = rep_msg->resp_resources;
1765         param->target_ack_delay = cm_rep_get_target_ack_delay(rep_msg);
1766         param->failover_accepted = cm_rep_get_failover(rep_msg);
1767         param->flow_control = cm_rep_get_flow_ctrl(rep_msg);
1768         param->rnr_retry_count = cm_rep_get_rnr_retry_count(rep_msg);
1769         param->srq = cm_rep_get_srq(rep_msg);
1770         work->cm_event.private_data = &rep_msg->private_data;
1771 }
1772
1773 static void cm_dup_rep_handler(struct cm_work *work)
1774 {
1775         struct cm_id_private *cm_id_priv;
1776         struct cm_rep_msg *rep_msg;
1777         struct ib_mad_send_buf *msg = NULL;
1778         int ret;
1779
1780         rep_msg = (struct cm_rep_msg *) work->mad_recv_wc->recv_buf.mad;
1781         cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id,
1782                                    rep_msg->local_comm_id);
1783         if (!cm_id_priv)
1784                 return;
1785
1786         atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
1787                         counter[CM_REP_COUNTER]);
1788         ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg);
1789         if (ret)
1790                 goto deref;
1791
1792         spin_lock_irq(&cm_id_priv->lock);
1793         if (cm_id_priv->id.state == IB_CM_ESTABLISHED)
1794                 cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv,
1795                               cm_id_priv->private_data,
1796                               cm_id_priv->private_data_len);
1797         else if (cm_id_priv->id.state == IB_CM_MRA_REP_SENT)
1798                 cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
1799                               CM_MSG_RESPONSE_REP, cm_id_priv->service_timeout,
1800                               cm_id_priv->private_data,
1801                               cm_id_priv->private_data_len);
1802         else
1803                 goto unlock;
1804         spin_unlock_irq(&cm_id_priv->lock);
1805
1806         ret = ib_post_send_mad(msg, NULL);
1807         if (ret)
1808                 goto free;
1809         goto deref;
1810
1811 unlock: spin_unlock_irq(&cm_id_priv->lock);
1812 free:   cm_free_msg(msg);
1813 deref:  cm_deref_id(cm_id_priv);
1814 }
1815
1816 static int cm_rep_handler(struct cm_work *work)
1817 {
1818         struct cm_id_private *cm_id_priv;
1819         struct cm_rep_msg *rep_msg;
1820         int ret;
1821
1822         rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad;
1823         cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id, 0);
1824         if (!cm_id_priv) {
1825                 cm_dup_rep_handler(work);
1826                 return -EINVAL;
1827         }
1828
1829         cm_format_rep_event(work, cm_id_priv->qp_type);
1830
1831         spin_lock_irq(&cm_id_priv->lock);
1832         switch (cm_id_priv->id.state) {
1833         case IB_CM_REQ_SENT:
1834         case IB_CM_MRA_REQ_RCVD:
1835                 break;
1836         default:
1837                 spin_unlock_irq(&cm_id_priv->lock);
1838                 ret = -EINVAL;
1839                 goto error;
1840         }
1841
1842         cm_id_priv->timewait_info->work.remote_id = rep_msg->local_comm_id;
1843         cm_id_priv->timewait_info->remote_ca_guid = rep_msg->local_ca_guid;
1844         cm_id_priv->timewait_info->remote_qpn = cm_rep_get_qpn(rep_msg, cm_id_priv->qp_type);
1845
1846         spin_lock(&cm.lock);
1847         /* Check for duplicate REP. */
1848         if (cm_insert_remote_id(cm_id_priv->timewait_info)) {
1849                 spin_unlock(&cm.lock);
1850                 spin_unlock_irq(&cm_id_priv->lock);
1851                 ret = -EINVAL;
1852                 goto error;
1853         }
1854         /* Check for a stale connection. */
1855         if (cm_insert_remote_qpn(cm_id_priv->timewait_info)) {
1856                 rb_erase(&cm_id_priv->timewait_info->remote_id_node,
1857                          &cm.remote_id_table);
1858                 cm_id_priv->timewait_info->inserted_remote_id = 0;
1859                 spin_unlock(&cm.lock);
1860                 spin_unlock_irq(&cm_id_priv->lock);
1861                 cm_issue_rej(work->port, work->mad_recv_wc,
1862                              IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REP,
1863                              NULL, 0);
1864                 ret = -EINVAL;
1865                 goto error;
1866         }
1867         spin_unlock(&cm.lock);
1868
1869         cm_id_priv->id.state = IB_CM_REP_RCVD;
1870         cm_id_priv->id.remote_id = rep_msg->local_comm_id;
1871         cm_id_priv->remote_qpn = cm_rep_get_qpn(rep_msg, cm_id_priv->qp_type);
1872         cm_id_priv->initiator_depth = rep_msg->resp_resources;
1873         cm_id_priv->responder_resources = rep_msg->initiator_depth;
1874         cm_id_priv->sq_psn = cm_rep_get_starting_psn(rep_msg);
1875         cm_id_priv->rnr_retry_count = cm_rep_get_rnr_retry_count(rep_msg);
1876         cm_id_priv->target_ack_delay = cm_rep_get_target_ack_delay(rep_msg);
1877         cm_id_priv->av.timeout =
1878                         cm_ack_timeout(cm_id_priv->target_ack_delay,
1879                                        cm_id_priv->av.timeout - 1);
1880         cm_id_priv->alt_av.timeout =
1881                         cm_ack_timeout(cm_id_priv->target_ack_delay,
1882                                        cm_id_priv->alt_av.timeout - 1);
1883
1884         /* todo: handle peer_to_peer */
1885
1886         ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
1887         ret = atomic_inc_and_test(&cm_id_priv->work_count);
1888         if (!ret)
1889                 list_add_tail(&work->list, &cm_id_priv->work_list);
1890         spin_unlock_irq(&cm_id_priv->lock);
1891
1892         if (ret)
1893                 cm_process_work(cm_id_priv, work);
1894         else
1895                 cm_deref_id(cm_id_priv);
1896         return 0;
1897
1898 error:
1899         cm_deref_id(cm_id_priv);
1900         return ret;
1901 }
1902
1903 static int cm_establish_handler(struct cm_work *work)
1904 {
1905         struct cm_id_private *cm_id_priv;
1906         int ret;
1907
1908         /* See comment in cm_establish about lookup. */
1909         cm_id_priv = cm_acquire_id(work->local_id, work->remote_id);
1910         if (!cm_id_priv)
1911                 return -EINVAL;
1912
1913         spin_lock_irq(&cm_id_priv->lock);
1914         if (cm_id_priv->id.state != IB_CM_ESTABLISHED) {
1915                 spin_unlock_irq(&cm_id_priv->lock);
1916                 goto out;
1917         }
1918
1919         ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
1920         ret = atomic_inc_and_test(&cm_id_priv->work_count);
1921         if (!ret)
1922                 list_add_tail(&work->list, &cm_id_priv->work_list);
1923         spin_unlock_irq(&cm_id_priv->lock);
1924
1925         if (ret)
1926                 cm_process_work(cm_id_priv, work);
1927         else
1928                 cm_deref_id(cm_id_priv);
1929         return 0;
1930 out:
1931         cm_deref_id(cm_id_priv);
1932         return -EINVAL;
1933 }
1934
1935 static int cm_rtu_handler(struct cm_work *work)
1936 {
1937         struct cm_id_private *cm_id_priv;
1938         struct cm_rtu_msg *rtu_msg;
1939         int ret;
1940
1941         rtu_msg = (struct cm_rtu_msg *)work->mad_recv_wc->recv_buf.mad;
1942         cm_id_priv = cm_acquire_id(rtu_msg->remote_comm_id,
1943                                    rtu_msg->local_comm_id);
1944         if (!cm_id_priv)
1945                 return -EINVAL;
1946
1947         work->cm_event.private_data = &rtu_msg->private_data;
1948
1949         spin_lock_irq(&cm_id_priv->lock);
1950         if (cm_id_priv->id.state != IB_CM_REP_SENT &&
1951             cm_id_priv->id.state != IB_CM_MRA_REP_RCVD) {
1952                 spin_unlock_irq(&cm_id_priv->lock);
1953                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
1954                                 counter[CM_RTU_COUNTER]);
1955                 goto out;
1956         }
1957         cm_id_priv->id.state = IB_CM_ESTABLISHED;
1958
1959         ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
1960         ret = atomic_inc_and_test(&cm_id_priv->work_count);
1961         if (!ret)
1962                 list_add_tail(&work->list, &cm_id_priv->work_list);
1963         spin_unlock_irq(&cm_id_priv->lock);
1964
1965         if (ret)
1966                 cm_process_work(cm_id_priv, work);
1967         else
1968                 cm_deref_id(cm_id_priv);
1969         return 0;
1970 out:
1971         cm_deref_id(cm_id_priv);
1972         return -EINVAL;
1973 }
1974
1975 static void cm_format_dreq(struct cm_dreq_msg *dreq_msg,
1976                           struct cm_id_private *cm_id_priv,
1977                           const void *private_data,
1978                           u8 private_data_len)
1979 {
1980         cm_format_mad_hdr(&dreq_msg->hdr, CM_DREQ_ATTR_ID,
1981                           cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_DREQ));
1982         dreq_msg->local_comm_id = cm_id_priv->id.local_id;
1983         dreq_msg->remote_comm_id = cm_id_priv->id.remote_id;
1984         cm_dreq_set_remote_qpn(dreq_msg, cm_id_priv->remote_qpn);
1985
1986         if (private_data && private_data_len)
1987                 memcpy(dreq_msg->private_data, private_data, private_data_len);
1988 }
1989
1990 int ib_send_cm_dreq(struct ib_cm_id *cm_id,
1991                     const void *private_data,
1992                     u8 private_data_len)
1993 {
1994         struct cm_id_private *cm_id_priv;
1995         struct ib_mad_send_buf *msg;
1996         unsigned long flags;
1997         int ret;
1998
1999         if (private_data && private_data_len > IB_CM_DREQ_PRIVATE_DATA_SIZE)
2000                 return -EINVAL;
2001
2002         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2003         spin_lock_irqsave(&cm_id_priv->lock, flags);
2004         if (cm_id->state != IB_CM_ESTABLISHED) {
2005                 ret = -EINVAL;
2006                 goto out;
2007         }
2008
2009         if (cm_id->lap_state == IB_CM_LAP_SENT ||
2010             cm_id->lap_state == IB_CM_MRA_LAP_RCVD)
2011                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2012
2013         ret = cm_alloc_msg(cm_id_priv, &msg);
2014         if (ret) {
2015                 cm_enter_timewait(cm_id_priv);
2016                 goto out;
2017         }
2018
2019         cm_format_dreq((struct cm_dreq_msg *) msg->mad, cm_id_priv,
2020                        private_data, private_data_len);
2021         msg->timeout_ms = cm_id_priv->timeout_ms;
2022         msg->context[1] = (void *) (unsigned long) IB_CM_DREQ_SENT;
2023
2024         ret = ib_post_send_mad(msg, NULL);
2025         if (ret) {
2026                 cm_enter_timewait(cm_id_priv);
2027                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2028                 cm_free_msg(msg);
2029                 return ret;
2030         }
2031
2032         cm_id->state = IB_CM_DREQ_SENT;
2033         cm_id_priv->msg = msg;
2034 out:    spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2035         return ret;
2036 }
2037 EXPORT_SYMBOL(ib_send_cm_dreq);
2038
2039 static void cm_format_drep(struct cm_drep_msg *drep_msg,
2040                           struct cm_id_private *cm_id_priv,
2041                           const void *private_data,
2042                           u8 private_data_len)
2043 {
2044         cm_format_mad_hdr(&drep_msg->hdr, CM_DREP_ATTR_ID, cm_id_priv->tid);
2045         drep_msg->local_comm_id = cm_id_priv->id.local_id;
2046         drep_msg->remote_comm_id = cm_id_priv->id.remote_id;
2047
2048         if (private_data && private_data_len)
2049                 memcpy(drep_msg->private_data, private_data, private_data_len);
2050 }
2051
2052 int ib_send_cm_drep(struct ib_cm_id *cm_id,
2053                     const void *private_data,
2054                     u8 private_data_len)
2055 {
2056         struct cm_id_private *cm_id_priv;
2057         struct ib_mad_send_buf *msg;
2058         unsigned long flags;
2059         void *data;
2060         int ret;
2061
2062         if (private_data && private_data_len > IB_CM_DREP_PRIVATE_DATA_SIZE)
2063                 return -EINVAL;
2064
2065         data = cm_copy_private_data(private_data, private_data_len);
2066         if (IS_ERR(data))
2067                 return PTR_ERR(data);
2068
2069         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2070         spin_lock_irqsave(&cm_id_priv->lock, flags);
2071         if (cm_id->state != IB_CM_DREQ_RCVD) {
2072                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2073                 kfree(data);
2074                 return -EINVAL;
2075         }
2076
2077         cm_set_private_data(cm_id_priv, data, private_data_len);
2078         cm_enter_timewait(cm_id_priv);
2079
2080         ret = cm_alloc_msg(cm_id_priv, &msg);
2081         if (ret)
2082                 goto out;
2083
2084         cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv,
2085                        private_data, private_data_len);
2086
2087         ret = ib_post_send_mad(msg, NULL);
2088         if (ret) {
2089                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2090                 cm_free_msg(msg);
2091                 return ret;
2092         }
2093
2094 out:    spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2095         return ret;
2096 }
2097 EXPORT_SYMBOL(ib_send_cm_drep);
2098
2099 static int cm_issue_drep(struct cm_port *port,
2100                          struct ib_mad_recv_wc *mad_recv_wc)
2101 {
2102         struct ib_mad_send_buf *msg = NULL;
2103         struct cm_dreq_msg *dreq_msg;
2104         struct cm_drep_msg *drep_msg;
2105         int ret;
2106
2107         ret = cm_alloc_response_msg(port, mad_recv_wc, &msg);
2108         if (ret)
2109                 return ret;
2110
2111         dreq_msg = (struct cm_dreq_msg *) mad_recv_wc->recv_buf.mad;
2112         drep_msg = (struct cm_drep_msg *) msg->mad;
2113
2114         cm_format_mad_hdr(&drep_msg->hdr, CM_DREP_ATTR_ID, dreq_msg->hdr.tid);
2115         drep_msg->remote_comm_id = dreq_msg->local_comm_id;
2116         drep_msg->local_comm_id = dreq_msg->remote_comm_id;
2117
2118         ret = ib_post_send_mad(msg, NULL);
2119         if (ret)
2120                 cm_free_msg(msg);
2121
2122         return ret;
2123 }
2124
2125 static int cm_dreq_handler(struct cm_work *work)
2126 {
2127         struct cm_id_private *cm_id_priv;
2128         struct cm_dreq_msg *dreq_msg;
2129         struct ib_mad_send_buf *msg = NULL;
2130         int ret;
2131
2132         dreq_msg = (struct cm_dreq_msg *)work->mad_recv_wc->recv_buf.mad;
2133         cm_id_priv = cm_acquire_id(dreq_msg->remote_comm_id,
2134                                    dreq_msg->local_comm_id);
2135         if (!cm_id_priv) {
2136                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2137                                 counter[CM_DREQ_COUNTER]);
2138                 cm_issue_drep(work->port, work->mad_recv_wc);
2139                 return -EINVAL;
2140         }
2141
2142         work->cm_event.private_data = &dreq_msg->private_data;
2143
2144         spin_lock_irq(&cm_id_priv->lock);
2145         if (cm_id_priv->local_qpn != cm_dreq_get_remote_qpn(dreq_msg))
2146                 goto unlock;
2147
2148         switch (cm_id_priv->id.state) {
2149         case IB_CM_REP_SENT:
2150         case IB_CM_DREQ_SENT:
2151                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2152                 break;
2153         case IB_CM_ESTABLISHED:
2154                 if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT ||
2155                     cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD)
2156                         ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2157                 break;
2158         case IB_CM_MRA_REP_RCVD:
2159                 break;
2160         case IB_CM_TIMEWAIT:
2161                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2162                                 counter[CM_DREQ_COUNTER]);
2163                 if (cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg))
2164                         goto unlock;
2165
2166                 cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv,
2167                                cm_id_priv->private_data,
2168                                cm_id_priv->private_data_len);
2169                 spin_unlock_irq(&cm_id_priv->lock);
2170
2171                 if (ib_post_send_mad(msg, NULL))
2172                         cm_free_msg(msg);
2173                 goto deref;
2174         case IB_CM_DREQ_RCVD:
2175                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2176                                 counter[CM_DREQ_COUNTER]);
2177                 goto unlock;
2178         default:
2179                 goto unlock;
2180         }
2181         cm_id_priv->id.state = IB_CM_DREQ_RCVD;
2182         cm_id_priv->tid = dreq_msg->hdr.tid;
2183         ret = atomic_inc_and_test(&cm_id_priv->work_count);
2184         if (!ret)
2185                 list_add_tail(&work->list, &cm_id_priv->work_list);
2186         spin_unlock_irq(&cm_id_priv->lock);
2187
2188         if (ret)
2189                 cm_process_work(cm_id_priv, work);
2190         else
2191                 cm_deref_id(cm_id_priv);
2192         return 0;
2193
2194 unlock: spin_unlock_irq(&cm_id_priv->lock);
2195 deref:  cm_deref_id(cm_id_priv);
2196         return -EINVAL;
2197 }
2198
2199 static int cm_drep_handler(struct cm_work *work)
2200 {
2201         struct cm_id_private *cm_id_priv;
2202         struct cm_drep_msg *drep_msg;
2203         int ret;
2204
2205         drep_msg = (struct cm_drep_msg *)work->mad_recv_wc->recv_buf.mad;
2206         cm_id_priv = cm_acquire_id(drep_msg->remote_comm_id,
2207                                    drep_msg->local_comm_id);
2208         if (!cm_id_priv)
2209                 return -EINVAL;
2210
2211         work->cm_event.private_data = &drep_msg->private_data;
2212
2213         spin_lock_irq(&cm_id_priv->lock);
2214         if (cm_id_priv->id.state != IB_CM_DREQ_SENT &&
2215             cm_id_priv->id.state != IB_CM_DREQ_RCVD) {
2216                 spin_unlock_irq(&cm_id_priv->lock);
2217                 goto out;
2218         }
2219         cm_enter_timewait(cm_id_priv);
2220
2221         ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2222         ret = atomic_inc_and_test(&cm_id_priv->work_count);
2223         if (!ret)
2224                 list_add_tail(&work->list, &cm_id_priv->work_list);
2225         spin_unlock_irq(&cm_id_priv->lock);
2226
2227         if (ret)
2228                 cm_process_work(cm_id_priv, work);
2229         else
2230                 cm_deref_id(cm_id_priv);
2231         return 0;
2232 out:
2233         cm_deref_id(cm_id_priv);
2234         return -EINVAL;
2235 }
2236
2237 int ib_send_cm_rej(struct ib_cm_id *cm_id,
2238                    enum ib_cm_rej_reason reason,
2239                    void *ari,
2240                    u8 ari_length,
2241                    const void *private_data,
2242                    u8 private_data_len)
2243 {
2244         struct cm_id_private *cm_id_priv;
2245         struct ib_mad_send_buf *msg;
2246         unsigned long flags;
2247         int ret;
2248
2249         if ((private_data && private_data_len > IB_CM_REJ_PRIVATE_DATA_SIZE) ||
2250             (ari && ari_length > IB_CM_REJ_ARI_LENGTH))
2251                 return -EINVAL;
2252
2253         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2254
2255         spin_lock_irqsave(&cm_id_priv->lock, flags);
2256         switch (cm_id->state) {
2257         case IB_CM_REQ_SENT:
2258         case IB_CM_MRA_REQ_RCVD:
2259         case IB_CM_REQ_RCVD:
2260         case IB_CM_MRA_REQ_SENT:
2261         case IB_CM_REP_RCVD:
2262         case IB_CM_MRA_REP_SENT:
2263                 ret = cm_alloc_msg(cm_id_priv, &msg);
2264                 if (!ret)
2265                         cm_format_rej((struct cm_rej_msg *) msg->mad,
2266                                       cm_id_priv, reason, ari, ari_length,
2267                                       private_data, private_data_len);
2268
2269                 cm_reset_to_idle(cm_id_priv);
2270                 break;
2271         case IB_CM_REP_SENT:
2272         case IB_CM_MRA_REP_RCVD:
2273                 ret = cm_alloc_msg(cm_id_priv, &msg);
2274                 if (!ret)
2275                         cm_format_rej((struct cm_rej_msg *) msg->mad,
2276                                       cm_id_priv, reason, ari, ari_length,
2277                                       private_data, private_data_len);
2278
2279                 cm_enter_timewait(cm_id_priv);
2280                 break;
2281         default:
2282                 ret = -EINVAL;
2283                 goto out;
2284         }
2285
2286         if (ret)
2287                 goto out;
2288
2289         ret = ib_post_send_mad(msg, NULL);
2290         if (ret)
2291                 cm_free_msg(msg);
2292
2293 out:    spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2294         return ret;
2295 }
2296 EXPORT_SYMBOL(ib_send_cm_rej);
2297
2298 static void cm_format_rej_event(struct cm_work *work)
2299 {
2300         struct cm_rej_msg *rej_msg;
2301         struct ib_cm_rej_event_param *param;
2302
2303         rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad;
2304         param = &work->cm_event.param.rej_rcvd;
2305         param->ari = rej_msg->ari;
2306         param->ari_length = cm_rej_get_reject_info_len(rej_msg);
2307         param->reason = __be16_to_cpu(rej_msg->reason);
2308         work->cm_event.private_data = &rej_msg->private_data;
2309 }
2310
2311 static struct cm_id_private * cm_acquire_rejected_id(struct cm_rej_msg *rej_msg)
2312 {
2313         struct cm_timewait_info *timewait_info;
2314         struct cm_id_private *cm_id_priv;
2315         __be32 remote_id;
2316
2317         remote_id = rej_msg->local_comm_id;
2318
2319         if (__be16_to_cpu(rej_msg->reason) == IB_CM_REJ_TIMEOUT) {
2320                 spin_lock_irq(&cm.lock);
2321                 timewait_info = cm_find_remote_id( *((__be64 *) rej_msg->ari),
2322                                                   remote_id);
2323                 if (!timewait_info) {
2324                         spin_unlock_irq(&cm.lock);
2325                         return NULL;
2326                 }
2327                 cm_id_priv = idr_find(&cm.local_id_table, (__force int)
2328                                       (timewait_info->work.local_id ^
2329                                        cm.random_id_operand));
2330                 if (cm_id_priv) {
2331                         if (cm_id_priv->id.remote_id == remote_id)
2332                                 atomic_inc(&cm_id_priv->refcount);
2333                         else
2334                                 cm_id_priv = NULL;
2335                 }
2336                 spin_unlock_irq(&cm.lock);
2337         } else if (cm_rej_get_msg_rejected(rej_msg) == CM_MSG_RESPONSE_REQ)
2338                 cm_id_priv = cm_acquire_id(rej_msg->remote_comm_id, 0);
2339         else
2340                 cm_id_priv = cm_acquire_id(rej_msg->remote_comm_id, remote_id);
2341
2342         return cm_id_priv;
2343 }
2344
2345 static int cm_rej_handler(struct cm_work *work)
2346 {
2347         struct cm_id_private *cm_id_priv;
2348         struct cm_rej_msg *rej_msg;
2349         int ret;
2350
2351         rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad;
2352         cm_id_priv = cm_acquire_rejected_id(rej_msg);
2353         if (!cm_id_priv)
2354                 return -EINVAL;
2355
2356         cm_format_rej_event(work);
2357
2358         spin_lock_irq(&cm_id_priv->lock);
2359         switch (cm_id_priv->id.state) {
2360         case IB_CM_REQ_SENT:
2361         case IB_CM_MRA_REQ_RCVD:
2362         case IB_CM_REP_SENT:
2363         case IB_CM_MRA_REP_RCVD:
2364                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2365                 /* fall through */
2366         case IB_CM_REQ_RCVD:
2367         case IB_CM_MRA_REQ_SENT:
2368                 if (__be16_to_cpu(rej_msg->reason) == IB_CM_REJ_STALE_CONN)
2369                         cm_enter_timewait(cm_id_priv);
2370                 else
2371                         cm_reset_to_idle(cm_id_priv);
2372                 break;
2373         case IB_CM_DREQ_SENT:
2374                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2375                 /* fall through */
2376         case IB_CM_REP_RCVD:
2377         case IB_CM_MRA_REP_SENT:
2378                 cm_enter_timewait(cm_id_priv);
2379                 break;
2380         case IB_CM_ESTABLISHED:
2381                 if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT ||
2382                     cm_id_priv->id.lap_state == IB_CM_LAP_SENT) {
2383                         if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT)
2384                                 ib_cancel_mad(cm_id_priv->av.port->mad_agent,
2385                                               cm_id_priv->msg);
2386                         cm_enter_timewait(cm_id_priv);
2387                         break;
2388                 }
2389                 /* fall through */
2390         default:
2391                 spin_unlock_irq(&cm_id_priv->lock);
2392                 ret = -EINVAL;
2393                 goto out;
2394         }
2395
2396         ret = atomic_inc_and_test(&cm_id_priv->work_count);
2397         if (!ret)
2398                 list_add_tail(&work->list, &cm_id_priv->work_list);
2399         spin_unlock_irq(&cm_id_priv->lock);
2400
2401         if (ret)
2402                 cm_process_work(cm_id_priv, work);
2403         else
2404                 cm_deref_id(cm_id_priv);
2405         return 0;
2406 out:
2407         cm_deref_id(cm_id_priv);
2408         return -EINVAL;
2409 }
2410
2411 int ib_send_cm_mra(struct ib_cm_id *cm_id,
2412                    u8 service_timeout,
2413                    const void *private_data,
2414                    u8 private_data_len)
2415 {
2416         struct cm_id_private *cm_id_priv;
2417         struct ib_mad_send_buf *msg;
2418         enum ib_cm_state cm_state;
2419         enum ib_cm_lap_state lap_state;
2420         enum cm_msg_response msg_response;
2421         void *data;
2422         unsigned long flags;
2423         int ret;
2424
2425         if (private_data && private_data_len > IB_CM_MRA_PRIVATE_DATA_SIZE)
2426                 return -EINVAL;
2427
2428         data = cm_copy_private_data(private_data, private_data_len);
2429         if (IS_ERR(data))
2430                 return PTR_ERR(data);
2431
2432         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2433
2434         spin_lock_irqsave(&cm_id_priv->lock, flags);
2435         switch(cm_id_priv->id.state) {
2436         case IB_CM_REQ_RCVD:
2437                 cm_state = IB_CM_MRA_REQ_SENT;
2438                 lap_state = cm_id->lap_state;
2439                 msg_response = CM_MSG_RESPONSE_REQ;
2440                 break;
2441         case IB_CM_REP_RCVD:
2442                 cm_state = IB_CM_MRA_REP_SENT;
2443                 lap_state = cm_id->lap_state;
2444                 msg_response = CM_MSG_RESPONSE_REP;
2445                 break;
2446         case IB_CM_ESTABLISHED:
2447                 if (cm_id->lap_state == IB_CM_LAP_RCVD) {
2448                         cm_state = cm_id->state;
2449                         lap_state = IB_CM_MRA_LAP_SENT;
2450                         msg_response = CM_MSG_RESPONSE_OTHER;
2451                         break;
2452                 }
2453         default:
2454                 ret = -EINVAL;
2455                 goto error1;
2456         }
2457
2458         if (!(service_timeout & IB_CM_MRA_FLAG_DELAY)) {
2459                 ret = cm_alloc_msg(cm_id_priv, &msg);
2460                 if (ret)
2461                         goto error1;
2462
2463                 cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
2464                               msg_response, service_timeout,
2465                               private_data, private_data_len);
2466                 ret = ib_post_send_mad(msg, NULL);
2467                 if (ret)
2468                         goto error2;
2469         }
2470
2471         cm_id->state = cm_state;
2472         cm_id->lap_state = lap_state;
2473         cm_id_priv->service_timeout = service_timeout;
2474         cm_set_private_data(cm_id_priv, data, private_data_len);
2475         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2476         return 0;
2477
2478 error1: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2479         kfree(data);
2480         return ret;
2481
2482 error2: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2483         kfree(data);
2484         cm_free_msg(msg);
2485         return ret;
2486 }
2487 EXPORT_SYMBOL(ib_send_cm_mra);
2488
2489 static struct cm_id_private * cm_acquire_mraed_id(struct cm_mra_msg *mra_msg)
2490 {
2491         switch (cm_mra_get_msg_mraed(mra_msg)) {
2492         case CM_MSG_RESPONSE_REQ:
2493                 return cm_acquire_id(mra_msg->remote_comm_id, 0);
2494         case CM_MSG_RESPONSE_REP:
2495         case CM_MSG_RESPONSE_OTHER:
2496                 return cm_acquire_id(mra_msg->remote_comm_id,
2497                                      mra_msg->local_comm_id);
2498         default:
2499                 return NULL;
2500         }
2501 }
2502
2503 static int cm_mra_handler(struct cm_work *work)
2504 {
2505         struct cm_id_private *cm_id_priv;
2506         struct cm_mra_msg *mra_msg;
2507         int timeout, ret;
2508
2509         mra_msg = (struct cm_mra_msg *)work->mad_recv_wc->recv_buf.mad;
2510         cm_id_priv = cm_acquire_mraed_id(mra_msg);
2511         if (!cm_id_priv)
2512                 return -EINVAL;
2513
2514         work->cm_event.private_data = &mra_msg->private_data;
2515         work->cm_event.param.mra_rcvd.service_timeout =
2516                                         cm_mra_get_service_timeout(mra_msg);
2517         timeout = cm_convert_to_ms(cm_mra_get_service_timeout(mra_msg)) +
2518                   cm_convert_to_ms(cm_id_priv->av.timeout);
2519
2520         spin_lock_irq(&cm_id_priv->lock);
2521         switch (cm_id_priv->id.state) {
2522         case IB_CM_REQ_SENT:
2523                 if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REQ ||
2524                     ib_modify_mad(cm_id_priv->av.port->mad_agent,
2525                                   cm_id_priv->msg, timeout))
2526                         goto out;
2527                 cm_id_priv->id.state = IB_CM_MRA_REQ_RCVD;
2528                 break;
2529         case IB_CM_REP_SENT:
2530                 if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REP ||
2531                     ib_modify_mad(cm_id_priv->av.port->mad_agent,
2532                                   cm_id_priv->msg, timeout))
2533                         goto out;
2534                 cm_id_priv->id.state = IB_CM_MRA_REP_RCVD;
2535                 break;
2536         case IB_CM_ESTABLISHED:
2537                 if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_OTHER ||
2538                     cm_id_priv->id.lap_state != IB_CM_LAP_SENT ||
2539                     ib_modify_mad(cm_id_priv->av.port->mad_agent,
2540                                   cm_id_priv->msg, timeout)) {
2541                         if (cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD)
2542                                 atomic_long_inc(&work->port->
2543                                                 counter_group[CM_RECV_DUPLICATES].
2544                                                 counter[CM_MRA_COUNTER]);
2545                         goto out;
2546                 }
2547                 cm_id_priv->id.lap_state = IB_CM_MRA_LAP_RCVD;
2548                 break;
2549         case IB_CM_MRA_REQ_RCVD:
2550         case IB_CM_MRA_REP_RCVD:
2551                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2552                                 counter[CM_MRA_COUNTER]);
2553                 /* fall through */
2554         default:
2555                 goto out;
2556         }
2557
2558         cm_id_priv->msg->context[1] = (void *) (unsigned long)
2559                                       cm_id_priv->id.state;
2560         ret = atomic_inc_and_test(&cm_id_priv->work_count);
2561         if (!ret)
2562                 list_add_tail(&work->list, &cm_id_priv->work_list);
2563         spin_unlock_irq(&cm_id_priv->lock);
2564
2565         if (ret)
2566                 cm_process_work(cm_id_priv, work);
2567         else
2568                 cm_deref_id(cm_id_priv);
2569         return 0;
2570 out:
2571         spin_unlock_irq(&cm_id_priv->lock);
2572         cm_deref_id(cm_id_priv);
2573         return -EINVAL;
2574 }
2575
2576 static void cm_format_lap(struct cm_lap_msg *lap_msg,
2577                           struct cm_id_private *cm_id_priv,
2578                           struct ib_sa_path_rec *alternate_path,
2579                           const void *private_data,
2580                           u8 private_data_len)
2581 {
2582         cm_format_mad_hdr(&lap_msg->hdr, CM_LAP_ATTR_ID,
2583                           cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_LAP));
2584         lap_msg->local_comm_id = cm_id_priv->id.local_id;
2585         lap_msg->remote_comm_id = cm_id_priv->id.remote_id;
2586         cm_lap_set_remote_qpn(lap_msg, cm_id_priv->remote_qpn);
2587         /* todo: need remote CM response timeout */
2588         cm_lap_set_remote_resp_timeout(lap_msg, 0x1F);
2589         lap_msg->alt_local_lid = alternate_path->slid;
2590         lap_msg->alt_remote_lid = alternate_path->dlid;
2591         lap_msg->alt_local_gid = alternate_path->sgid;
2592         lap_msg->alt_remote_gid = alternate_path->dgid;
2593         cm_lap_set_flow_label(lap_msg, alternate_path->flow_label);
2594         cm_lap_set_traffic_class(lap_msg, alternate_path->traffic_class);
2595         lap_msg->alt_hop_limit = alternate_path->hop_limit;
2596         cm_lap_set_packet_rate(lap_msg, alternate_path->rate);
2597         cm_lap_set_sl(lap_msg, alternate_path->sl);
2598         cm_lap_set_subnet_local(lap_msg, 1); /* local only... */
2599         cm_lap_set_local_ack_timeout(lap_msg,
2600                 cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
2601                                alternate_path->packet_life_time));
2602
2603         if (private_data && private_data_len)
2604                 memcpy(lap_msg->private_data, private_data, private_data_len);
2605 }
2606
2607 int ib_send_cm_lap(struct ib_cm_id *cm_id,
2608                    struct ib_sa_path_rec *alternate_path,
2609                    const void *private_data,
2610                    u8 private_data_len)
2611 {
2612         struct cm_id_private *cm_id_priv;
2613         struct ib_mad_send_buf *msg;
2614         unsigned long flags;
2615         int ret;
2616
2617         if (private_data && private_data_len > IB_CM_LAP_PRIVATE_DATA_SIZE)
2618                 return -EINVAL;
2619
2620         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2621         spin_lock_irqsave(&cm_id_priv->lock, flags);
2622         if (cm_id->state != IB_CM_ESTABLISHED ||
2623             (cm_id->lap_state != IB_CM_LAP_UNINIT &&
2624              cm_id->lap_state != IB_CM_LAP_IDLE)) {
2625                 ret = -EINVAL;
2626                 goto out;
2627         }
2628
2629         ret = cm_init_av_by_path(alternate_path, &cm_id_priv->alt_av);
2630         if (ret)
2631                 goto out;
2632         cm_id_priv->alt_av.timeout =
2633                         cm_ack_timeout(cm_id_priv->target_ack_delay,
2634                                        cm_id_priv->alt_av.timeout - 1);
2635
2636         ret = cm_alloc_msg(cm_id_priv, &msg);
2637         if (ret)
2638                 goto out;
2639
2640         cm_format_lap((struct cm_lap_msg *) msg->mad, cm_id_priv,
2641                       alternate_path, private_data, private_data_len);
2642         msg->timeout_ms = cm_id_priv->timeout_ms;
2643         msg->context[1] = (void *) (unsigned long) IB_CM_ESTABLISHED;
2644
2645         ret = ib_post_send_mad(msg, NULL);
2646         if (ret) {
2647                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2648                 cm_free_msg(msg);
2649                 return ret;
2650         }
2651
2652         cm_id->lap_state = IB_CM_LAP_SENT;
2653         cm_id_priv->msg = msg;
2654
2655 out:    spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2656         return ret;
2657 }
2658 EXPORT_SYMBOL(ib_send_cm_lap);
2659
2660 static void cm_format_path_from_lap(struct cm_id_private *cm_id_priv,
2661                                     struct ib_sa_path_rec *path,
2662                                     struct cm_lap_msg *lap_msg)
2663 {
2664         memset(path, 0, sizeof *path);
2665         path->dgid = lap_msg->alt_local_gid;
2666         path->sgid = lap_msg->alt_remote_gid;
2667         path->dlid = lap_msg->alt_local_lid;
2668         path->slid = lap_msg->alt_remote_lid;
2669         path->flow_label = cm_lap_get_flow_label(lap_msg);
2670         path->hop_limit = lap_msg->alt_hop_limit;
2671         path->traffic_class = cm_lap_get_traffic_class(lap_msg);
2672         path->reversible = 1;
2673         path->pkey = cm_id_priv->pkey;
2674         path->sl = cm_lap_get_sl(lap_msg);
2675         path->mtu_selector = IB_SA_EQ;
2676         path->mtu = cm_id_priv->path_mtu;
2677         path->rate_selector = IB_SA_EQ;
2678         path->rate = cm_lap_get_packet_rate(lap_msg);
2679         path->packet_life_time_selector = IB_SA_EQ;
2680         path->packet_life_time = cm_lap_get_local_ack_timeout(lap_msg);
2681         path->packet_life_time -= (path->packet_life_time > 0);
2682 }
2683
2684 static int cm_lap_handler(struct cm_work *work)
2685 {
2686         struct cm_id_private *cm_id_priv;
2687         struct cm_lap_msg *lap_msg;
2688         struct ib_cm_lap_event_param *param;
2689         struct ib_mad_send_buf *msg = NULL;
2690         int ret;
2691
2692         /* todo: verify LAP request and send reject APR if invalid. */
2693         lap_msg = (struct cm_lap_msg *)work->mad_recv_wc->recv_buf.mad;
2694         cm_id_priv = cm_acquire_id(lap_msg->remote_comm_id,
2695                                    lap_msg->local_comm_id);
2696         if (!cm_id_priv)
2697                 return -EINVAL;
2698
2699         param = &work->cm_event.param.lap_rcvd;
2700         param->alternate_path = &work->path[0];
2701         cm_format_path_from_lap(cm_id_priv, param->alternate_path, lap_msg);
2702         work->cm_event.private_data = &lap_msg->private_data;
2703
2704         spin_lock_irq(&cm_id_priv->lock);
2705         if (cm_id_priv->id.state != IB_CM_ESTABLISHED)
2706                 goto unlock;
2707
2708         switch (cm_id_priv->id.lap_state) {
2709         case IB_CM_LAP_UNINIT:
2710         case IB_CM_LAP_IDLE:
2711                 break;
2712         case IB_CM_MRA_LAP_SENT:
2713                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2714                                 counter[CM_LAP_COUNTER]);
2715                 if (cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg))
2716                         goto unlock;
2717
2718                 cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
2719                               CM_MSG_RESPONSE_OTHER,
2720                               cm_id_priv->service_timeout,
2721                               cm_id_priv->private_data,
2722                               cm_id_priv->private_data_len);
2723                 spin_unlock_irq(&cm_id_priv->lock);
2724
2725                 if (ib_post_send_mad(msg, NULL))
2726                         cm_free_msg(msg);
2727                 goto deref;
2728         case IB_CM_LAP_RCVD:
2729                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2730                                 counter[CM_LAP_COUNTER]);
2731                 goto unlock;
2732         default:
2733                 goto unlock;
2734         }
2735
2736         cm_id_priv->id.lap_state = IB_CM_LAP_RCVD;
2737         cm_id_priv->tid = lap_msg->hdr.tid;
2738         cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
2739                                 work->mad_recv_wc->recv_buf.grh,
2740                                 &cm_id_priv->av);
2741         cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av);
2742         ret = atomic_inc_and_test(&cm_id_priv->work_count);
2743         if (!ret)
2744                 list_add_tail(&work->list, &cm_id_priv->work_list);
2745         spin_unlock_irq(&cm_id_priv->lock);
2746
2747         if (ret)
2748                 cm_process_work(cm_id_priv, work);
2749         else
2750                 cm_deref_id(cm_id_priv);
2751         return 0;
2752
2753 unlock: spin_unlock_irq(&cm_id_priv->lock);
2754 deref:  cm_deref_id(cm_id_priv);
2755         return -EINVAL;
2756 }
2757
2758 static void cm_format_apr(struct cm_apr_msg *apr_msg,
2759                           struct cm_id_private *cm_id_priv,
2760                           enum ib_cm_apr_status status,
2761                           void *info,
2762                           u8 info_length,
2763                           const void *private_data,
2764                           u8 private_data_len)
2765 {
2766         cm_format_mad_hdr(&apr_msg->hdr, CM_APR_ATTR_ID, cm_id_priv->tid);
2767         apr_msg->local_comm_id = cm_id_priv->id.local_id;
2768         apr_msg->remote_comm_id = cm_id_priv->id.remote_id;
2769         apr_msg->ap_status = (u8) status;
2770
2771         if (info && info_length) {
2772                 apr_msg->info_length = info_length;
2773                 memcpy(apr_msg->info, info, info_length);
2774         }
2775
2776         if (private_data && private_data_len)
2777                 memcpy(apr_msg->private_data, private_data, private_data_len);
2778 }
2779
2780 int ib_send_cm_apr(struct ib_cm_id *cm_id,
2781                    enum ib_cm_apr_status status,
2782                    void *info,
2783                    u8 info_length,
2784                    const void *private_data,
2785                    u8 private_data_len)
2786 {
2787         struct cm_id_private *cm_id_priv;
2788         struct ib_mad_send_buf *msg;
2789         unsigned long flags;
2790         int ret;
2791
2792         if ((private_data && private_data_len > IB_CM_APR_PRIVATE_DATA_SIZE) ||
2793             (info && info_length > IB_CM_APR_INFO_LENGTH))
2794                 return -EINVAL;
2795
2796         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2797         spin_lock_irqsave(&cm_id_priv->lock, flags);
2798         if (cm_id->state != IB_CM_ESTABLISHED ||
2799             (cm_id->lap_state != IB_CM_LAP_RCVD &&
2800              cm_id->lap_state != IB_CM_MRA_LAP_SENT)) {
2801                 ret = -EINVAL;
2802                 goto out;
2803         }
2804
2805         ret = cm_alloc_msg(cm_id_priv, &msg);
2806         if (ret)
2807                 goto out;
2808
2809         cm_format_apr((struct cm_apr_msg *) msg->mad, cm_id_priv, status,
2810                       info, info_length, private_data, private_data_len);
2811         ret = ib_post_send_mad(msg, NULL);
2812         if (ret) {
2813                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2814                 cm_free_msg(msg);
2815                 return ret;
2816         }
2817
2818         cm_id->lap_state = IB_CM_LAP_IDLE;
2819 out:    spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2820         return ret;
2821 }
2822 EXPORT_SYMBOL(ib_send_cm_apr);
2823
2824 static int cm_apr_handler(struct cm_work *work)
2825 {
2826         struct cm_id_private *cm_id_priv;
2827         struct cm_apr_msg *apr_msg;
2828         int ret;
2829
2830         apr_msg = (struct cm_apr_msg *)work->mad_recv_wc->recv_buf.mad;
2831         cm_id_priv = cm_acquire_id(apr_msg->remote_comm_id,
2832                                    apr_msg->local_comm_id);
2833         if (!cm_id_priv)
2834                 return -EINVAL; /* Unmatched reply. */
2835
2836         work->cm_event.param.apr_rcvd.ap_status = apr_msg->ap_status;
2837         work->cm_event.param.apr_rcvd.apr_info = &apr_msg->info;
2838         work->cm_event.param.apr_rcvd.info_len = apr_msg->info_length;
2839         work->cm_event.private_data = &apr_msg->private_data;
2840
2841         spin_lock_irq(&cm_id_priv->lock);
2842         if (cm_id_priv->id.state != IB_CM_ESTABLISHED ||
2843             (cm_id_priv->id.lap_state != IB_CM_LAP_SENT &&
2844              cm_id_priv->id.lap_state != IB_CM_MRA_LAP_RCVD)) {
2845                 spin_unlock_irq(&cm_id_priv->lock);
2846                 goto out;
2847         }
2848         cm_id_priv->id.lap_state = IB_CM_LAP_IDLE;
2849         ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2850         cm_id_priv->msg = NULL;
2851
2852         ret = atomic_inc_and_test(&cm_id_priv->work_count);
2853         if (!ret)
2854                 list_add_tail(&work->list, &cm_id_priv->work_list);
2855         spin_unlock_irq(&cm_id_priv->lock);
2856
2857         if (ret)
2858                 cm_process_work(cm_id_priv, work);
2859         else
2860                 cm_deref_id(cm_id_priv);
2861         return 0;
2862 out:
2863         cm_deref_id(cm_id_priv);
2864         return -EINVAL;
2865 }
2866
2867 static int cm_timewait_handler(struct cm_work *work)
2868 {
2869         struct cm_timewait_info *timewait_info;
2870         struct cm_id_private *cm_id_priv;
2871         int ret;
2872
2873         timewait_info = (struct cm_timewait_info *)work;
2874         spin_lock_irq(&cm.lock);
2875         list_del(&timewait_info->list);
2876         spin_unlock_irq(&cm.lock);
2877
2878         cm_id_priv = cm_acquire_id(timewait_info->work.local_id,
2879                                    timewait_info->work.remote_id);
2880         if (!cm_id_priv)
2881                 return -EINVAL;
2882
2883         spin_lock_irq(&cm_id_priv->lock);
2884         if (cm_id_priv->id.state != IB_CM_TIMEWAIT ||
2885             cm_id_priv->remote_qpn != timewait_info->remote_qpn) {
2886                 spin_unlock_irq(&cm_id_priv->lock);
2887                 goto out;
2888         }
2889         cm_id_priv->id.state = IB_CM_IDLE;
2890         ret = atomic_inc_and_test(&cm_id_priv->work_count);
2891         if (!ret)
2892                 list_add_tail(&work->list, &cm_id_priv->work_list);
2893         spin_unlock_irq(&cm_id_priv->lock);
2894
2895         if (ret)
2896                 cm_process_work(cm_id_priv, work);
2897         else
2898                 cm_deref_id(cm_id_priv);
2899         return 0;
2900 out:
2901         cm_deref_id(cm_id_priv);
2902         return -EINVAL;
2903 }
2904
2905 static void cm_format_sidr_req(struct cm_sidr_req_msg *sidr_req_msg,
2906                                struct cm_id_private *cm_id_priv,
2907                                struct ib_cm_sidr_req_param *param)
2908 {
2909         cm_format_mad_hdr(&sidr_req_msg->hdr, CM_SIDR_REQ_ATTR_ID,
2910                           cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_SIDR));
2911         sidr_req_msg->request_id = cm_id_priv->id.local_id;
2912         sidr_req_msg->pkey = param->path->pkey;
2913         sidr_req_msg->service_id = param->service_id;
2914
2915         if (param->private_data && param->private_data_len)
2916                 memcpy(sidr_req_msg->private_data, param->private_data,
2917                        param->private_data_len);
2918 }
2919
2920 int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
2921                         struct ib_cm_sidr_req_param *param)
2922 {
2923         struct cm_id_private *cm_id_priv;
2924         struct ib_mad_send_buf *msg;
2925         unsigned long flags;
2926         int ret;
2927
2928         if (!param->path || (param->private_data &&
2929              param->private_data_len > IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE))
2930                 return -EINVAL;
2931
2932         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2933         ret = cm_init_av_by_path(param->path, &cm_id_priv->av);
2934         if (ret)
2935                 goto out;
2936
2937         cm_id->service_id = param->service_id;
2938         cm_id->service_mask = ~cpu_to_be64(0);
2939         cm_id_priv->timeout_ms = param->timeout_ms;
2940         cm_id_priv->max_cm_retries = param->max_cm_retries;
2941         ret = cm_alloc_msg(cm_id_priv, &msg);
2942         if (ret)
2943                 goto out;
2944
2945         cm_format_sidr_req((struct cm_sidr_req_msg *) msg->mad, cm_id_priv,
2946                            param);
2947         msg->timeout_ms = cm_id_priv->timeout_ms;
2948         msg->context[1] = (void *) (unsigned long) IB_CM_SIDR_REQ_SENT;
2949
2950         spin_lock_irqsave(&cm_id_priv->lock, flags);
2951         if (cm_id->state == IB_CM_IDLE)
2952                 ret = ib_post_send_mad(msg, NULL);
2953         else
2954                 ret = -EINVAL;
2955
2956         if (ret) {
2957                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2958                 cm_free_msg(msg);
2959                 goto out;
2960         }
2961         cm_id->state = IB_CM_SIDR_REQ_SENT;
2962         cm_id_priv->msg = msg;
2963         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2964 out:
2965         return ret;
2966 }
2967 EXPORT_SYMBOL(ib_send_cm_sidr_req);
2968
2969 static void cm_format_sidr_req_event(struct cm_work *work,
2970                                      struct ib_cm_id *listen_id)
2971 {
2972         struct cm_sidr_req_msg *sidr_req_msg;
2973         struct ib_cm_sidr_req_event_param *param;
2974
2975         sidr_req_msg = (struct cm_sidr_req_msg *)
2976                                 work->mad_recv_wc->recv_buf.mad;
2977         param = &work->cm_event.param.sidr_req_rcvd;
2978         param->pkey = __be16_to_cpu(sidr_req_msg->pkey);
2979         param->listen_id = listen_id;
2980         param->port = work->port->port_num;
2981         work->cm_event.private_data = &sidr_req_msg->private_data;
2982 }
2983
2984 static int cm_sidr_req_handler(struct cm_work *work)
2985 {
2986         struct ib_cm_id *cm_id;
2987         struct cm_id_private *cm_id_priv, *cur_cm_id_priv;
2988         struct cm_sidr_req_msg *sidr_req_msg;
2989         struct ib_wc *wc;
2990
2991         cm_id = ib_create_cm_id(work->port->cm_dev->ib_device, NULL, NULL);
2992         if (IS_ERR(cm_id))
2993                 return PTR_ERR(cm_id);
2994         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2995
2996         /* Record SGID/SLID and request ID for lookup. */
2997         sidr_req_msg = (struct cm_sidr_req_msg *)
2998                                 work->mad_recv_wc->recv_buf.mad;
2999         wc = work->mad_recv_wc->wc;
3000         cm_id_priv->av.dgid.global.subnet_prefix = cpu_to_be64(wc->slid);
3001         cm_id_priv->av.dgid.global.interface_id = 0;
3002         cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
3003                                 work->mad_recv_wc->recv_buf.grh,
3004                                 &cm_id_priv->av);
3005         cm_id_priv->id.remote_id = sidr_req_msg->request_id;
3006         cm_id_priv->tid = sidr_req_msg->hdr.tid;
3007         atomic_inc(&cm_id_priv->work_count);
3008
3009         spin_lock_irq(&cm.lock);
3010         cur_cm_id_priv = cm_insert_remote_sidr(cm_id_priv);
3011         if (cur_cm_id_priv) {
3012                 spin_unlock_irq(&cm.lock);
3013                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
3014                                 counter[CM_SIDR_REQ_COUNTER]);
3015                 goto out; /* Duplicate message. */
3016         }
3017         cm_id_priv->id.state = IB_CM_SIDR_REQ_RCVD;
3018         cur_cm_id_priv = cm_find_listen(cm_id->device,
3019                                         sidr_req_msg->service_id,
3020                                         sidr_req_msg->private_data);
3021         if (!cur_cm_id_priv) {
3022                 spin_unlock_irq(&cm.lock);
3023                 cm_reject_sidr_req(cm_id_priv, IB_SIDR_UNSUPPORTED);
3024                 goto out; /* No match. */
3025         }
3026         atomic_inc(&cur_cm_id_priv->refcount);
3027         atomic_inc(&cm_id_priv->refcount);
3028         spin_unlock_irq(&cm.lock);
3029
3030         cm_id_priv->id.cm_handler = cur_cm_id_priv->id.cm_handler;
3031         cm_id_priv->id.context = cur_cm_id_priv->id.context;
3032         cm_id_priv->id.service_id = sidr_req_msg->service_id;
3033         cm_id_priv->id.service_mask = ~cpu_to_be64(0);
3034
3035         cm_format_sidr_req_event(work, &cur_cm_id_priv->id);
3036         cm_process_work(cm_id_priv, work);
3037         cm_deref_id(cur_cm_id_priv);
3038         return 0;
3039 out:
3040         ib_destroy_cm_id(&cm_id_priv->id);
3041         return -EINVAL;
3042 }
3043
3044 static void cm_format_sidr_rep(struct cm_sidr_rep_msg *sidr_rep_msg,
3045                                struct cm_id_private *cm_id_priv,
3046                                struct ib_cm_sidr_rep_param *param)
3047 {
3048         cm_format_mad_hdr(&sidr_rep_msg->hdr, CM_SIDR_REP_ATTR_ID,
3049                           cm_id_priv->tid);
3050         sidr_rep_msg->request_id = cm_id_priv->id.remote_id;
3051         sidr_rep_msg->status = param->status;
3052         cm_sidr_rep_set_qpn(sidr_rep_msg, cpu_to_be32(param->qp_num));
3053         sidr_rep_msg->service_id = cm_id_priv->id.service_id;
3054         sidr_rep_msg->qkey = cpu_to_be32(param->qkey);
3055
3056         if (param->info && param->info_length)
3057                 memcpy(sidr_rep_msg->info, param->info, param->info_length);
3058
3059         if (param->private_data && param->private_data_len)
3060                 memcpy(sidr_rep_msg->private_data, param->private_data,
3061                        param->private_data_len);
3062 }
3063
3064 int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id,
3065                         struct ib_cm_sidr_rep_param *param)
3066 {
3067         struct cm_id_private *cm_id_priv;
3068         struct ib_mad_send_buf *msg;
3069         unsigned long flags;
3070         int ret;
3071
3072         if ((param->info && param->info_length > IB_CM_SIDR_REP_INFO_LENGTH) ||
3073             (param->private_data &&
3074              param->private_data_len > IB_CM_SIDR_REP_PRIVATE_DATA_SIZE))
3075                 return -EINVAL;
3076
3077         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3078         spin_lock_irqsave(&cm_id_priv->lock, flags);
3079         if (cm_id->state != IB_CM_SIDR_REQ_RCVD) {
3080                 ret = -EINVAL;
3081                 goto error;
3082         }
3083
3084         ret = cm_alloc_msg(cm_id_priv, &msg);
3085         if (ret)
3086                 goto error;
3087
3088         cm_format_sidr_rep((struct cm_sidr_rep_msg *) msg->mad, cm_id_priv,
3089                            param);
3090         ret = ib_post_send_mad(msg, NULL);
3091         if (ret) {
3092                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3093                 cm_free_msg(msg);
3094                 return ret;
3095         }
3096         cm_id->state = IB_CM_IDLE;
3097         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3098
3099         spin_lock_irqsave(&cm.lock, flags);
3100         if (!RB_EMPTY_NODE(&cm_id_priv->sidr_id_node)) {
3101                 rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
3102                 RB_CLEAR_NODE(&cm_id_priv->sidr_id_node);
3103         }
3104         spin_unlock_irqrestore(&cm.lock, flags);
3105         return 0;
3106
3107 error:  spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3108         return ret;
3109 }
3110 EXPORT_SYMBOL(ib_send_cm_sidr_rep);
3111
3112 static void cm_format_sidr_rep_event(struct cm_work *work)
3113 {
3114         struct cm_sidr_rep_msg *sidr_rep_msg;
3115         struct ib_cm_sidr_rep_event_param *param;
3116
3117         sidr_rep_msg = (struct cm_sidr_rep_msg *)
3118                                 work->mad_recv_wc->recv_buf.mad;
3119         param = &work->cm_event.param.sidr_rep_rcvd;
3120         param->status = sidr_rep_msg->status;
3121         param->qkey = be32_to_cpu(sidr_rep_msg->qkey);
3122         param->qpn = be32_to_cpu(cm_sidr_rep_get_qpn(sidr_rep_msg));
3123         param->info = &sidr_rep_msg->info;
3124         param->info_len = sidr_rep_msg->info_length;
3125         work->cm_event.private_data = &sidr_rep_msg->private_data;
3126 }
3127
3128 static int cm_sidr_rep_handler(struct cm_work *work)
3129 {
3130         struct cm_sidr_rep_msg *sidr_rep_msg;
3131         struct cm_id_private *cm_id_priv;
3132
3133         sidr_rep_msg = (struct cm_sidr_rep_msg *)
3134                                 work->mad_recv_wc->recv_buf.mad;
3135         cm_id_priv = cm_acquire_id(sidr_rep_msg->request_id, 0);
3136         if (!cm_id_priv)
3137                 return -EINVAL; /* Unmatched reply. */
3138
3139         spin_lock_irq(&cm_id_priv->lock);
3140         if (cm_id_priv->id.state != IB_CM_SIDR_REQ_SENT) {
3141                 spin_unlock_irq(&cm_id_priv->lock);
3142                 goto out;
3143         }
3144         cm_id_priv->id.state = IB_CM_IDLE;
3145         ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
3146         spin_unlock_irq(&cm_id_priv->lock);
3147
3148         cm_format_sidr_rep_event(work);
3149         cm_process_work(cm_id_priv, work);
3150         return 0;
3151 out:
3152         cm_deref_id(cm_id_priv);
3153         return -EINVAL;
3154 }
3155
3156 static void cm_process_send_error(struct ib_mad_send_buf *msg,
3157                                   enum ib_wc_status wc_status)
3158 {
3159         struct cm_id_private *cm_id_priv;
3160         struct ib_cm_event cm_event;
3161         enum ib_cm_state state;
3162         int ret;
3163
3164         memset(&cm_event, 0, sizeof cm_event);
3165         cm_id_priv = msg->context[0];
3166
3167         /* Discard old sends or ones without a response. */
3168         spin_lock_irq(&cm_id_priv->lock);
3169         state = (enum ib_cm_state) (unsigned long) msg->context[1];
3170         if (msg != cm_id_priv->msg || state != cm_id_priv->id.state)
3171                 goto discard;
3172
3173         switch (state) {
3174         case IB_CM_REQ_SENT:
3175         case IB_CM_MRA_REQ_RCVD:
3176                 cm_reset_to_idle(cm_id_priv);
3177                 cm_event.event = IB_CM_REQ_ERROR;
3178                 break;
3179         case IB_CM_REP_SENT:
3180         case IB_CM_MRA_REP_RCVD:
3181                 cm_reset_to_idle(cm_id_priv);
3182                 cm_event.event = IB_CM_REP_ERROR;
3183                 break;
3184         case IB_CM_DREQ_SENT:
3185                 cm_enter_timewait(cm_id_priv);
3186                 cm_event.event = IB_CM_DREQ_ERROR;
3187                 break;
3188         case IB_CM_SIDR_REQ_SENT:
3189                 cm_id_priv->id.state = IB_CM_IDLE;
3190                 cm_event.event = IB_CM_SIDR_REQ_ERROR;
3191                 break;
3192         default:
3193                 goto discard;
3194         }
3195         spin_unlock_irq(&cm_id_priv->lock);
3196         cm_event.param.send_status = wc_status;
3197
3198         /* No other events can occur on the cm_id at this point. */
3199         ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &cm_event);
3200         cm_free_msg(msg);
3201         if (ret)
3202                 ib_destroy_cm_id(&cm_id_priv->id);
3203         return;
3204 discard:
3205         spin_unlock_irq(&cm_id_priv->lock);
3206         cm_free_msg(msg);
3207 }
3208
3209 static void cm_send_handler(struct ib_mad_agent *mad_agent,
3210                             struct ib_mad_send_wc *mad_send_wc)
3211 {
3212         struct ib_mad_send_buf *msg = mad_send_wc->send_buf;
3213         struct cm_port *port;
3214         u16 attr_index;
3215
3216         port = mad_agent->context;
3217         attr_index = be16_to_cpu(((struct ib_mad_hdr *)
3218                                   msg->mad)->attr_id) - CM_ATTR_ID_OFFSET;
3219
3220         /*
3221          * If the send was in response to a received message (context[0] is not
3222          * set to a cm_id), and is not a REJ, then it is a send that was
3223          * manually retried.
3224          */
3225         if (!msg->context[0] && (attr_index != CM_REJ_COUNTER))
3226                 msg->retries = 1;
3227
3228         atomic_long_add(1 + msg->retries,
3229                         &port->counter_group[CM_XMIT].counter[attr_index]);
3230         if (msg->retries)
3231                 atomic_long_add(msg->retries,
3232                                 &port->counter_group[CM_XMIT_RETRIES].
3233                                 counter[attr_index]);
3234
3235         switch (mad_send_wc->status) {
3236         case IB_WC_SUCCESS:
3237         case IB_WC_WR_FLUSH_ERR:
3238                 cm_free_msg(msg);
3239                 break;
3240         default:
3241                 if (msg->context[0] && msg->context[1])
3242                         cm_process_send_error(msg, mad_send_wc->status);
3243                 else
3244                         cm_free_msg(msg);
3245                 break;
3246         }
3247 }
3248
3249 static void cm_work_handler(struct work_struct *_work)
3250 {
3251         struct cm_work *work = container_of(_work, struct cm_work, work.work);
3252         int ret;
3253
3254         switch (work->cm_event.event) {
3255         case IB_CM_REQ_RECEIVED:
3256                 ret = cm_req_handler(work);
3257                 break;
3258         case IB_CM_MRA_RECEIVED:
3259                 ret = cm_mra_handler(work);
3260                 break;
3261         case IB_CM_REJ_RECEIVED:
3262                 ret = cm_rej_handler(work);
3263                 break;
3264         case IB_CM_REP_RECEIVED:
3265                 ret = cm_rep_handler(work);
3266                 break;
3267         case IB_CM_RTU_RECEIVED:
3268                 ret = cm_rtu_handler(work);
3269                 break;
3270         case IB_CM_USER_ESTABLISHED:
3271                 ret = cm_establish_handler(work);
3272                 break;
3273         case IB_CM_DREQ_RECEIVED:
3274                 ret = cm_dreq_handler(work);
3275                 break;
3276         case IB_CM_DREP_RECEIVED:
3277                 ret = cm_drep_handler(work);
3278                 break;
3279         case IB_CM_SIDR_REQ_RECEIVED:
3280                 ret = cm_sidr_req_handler(work);
3281                 break;
3282         case IB_CM_SIDR_REP_RECEIVED:
3283                 ret = cm_sidr_rep_handler(work);
3284                 break;
3285         case IB_CM_LAP_RECEIVED:
3286                 ret = cm_lap_handler(work);
3287                 break;
3288         case IB_CM_APR_RECEIVED:
3289                 ret = cm_apr_handler(work);
3290                 break;
3291         case IB_CM_TIMEWAIT_EXIT:
3292                 ret = cm_timewait_handler(work);
3293                 break;
3294         default:
3295                 ret = -EINVAL;
3296                 break;
3297         }
3298         if (ret)
3299                 cm_free_work(work);
3300 }
3301
3302 static int cm_establish(struct ib_cm_id *cm_id)
3303 {
3304         struct cm_id_private *cm_id_priv;
3305         struct cm_work *work;
3306         unsigned long flags;
3307         int ret = 0;
3308
3309         work = kmalloc(sizeof *work, GFP_ATOMIC);
3310         if (!work)
3311                 return -ENOMEM;
3312
3313         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3314         spin_lock_irqsave(&cm_id_priv->lock, flags);
3315         switch (cm_id->state)
3316         {
3317         case IB_CM_REP_SENT:
3318         case IB_CM_MRA_REP_RCVD:
3319                 cm_id->state = IB_CM_ESTABLISHED;
3320                 break;
3321         case IB_CM_ESTABLISHED:
3322                 ret = -EISCONN;
3323                 break;
3324         default:
3325                 ret = -EINVAL;
3326                 break;
3327         }
3328         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3329
3330         if (ret) {
3331                 kfree(work);
3332                 goto out;
3333         }
3334
3335         /*
3336          * The CM worker thread may try to destroy the cm_id before it
3337          * can execute this work item.  To prevent potential deadlock,
3338          * we need to find the cm_id once we're in the context of the
3339          * worker thread, rather than holding a reference on it.
3340          */
3341         INIT_DELAYED_WORK(&work->work, cm_work_handler);
3342         work->local_id = cm_id->local_id;
3343         work->remote_id = cm_id->remote_id;
3344         work->mad_recv_wc = NULL;
3345         work->cm_event.event = IB_CM_USER_ESTABLISHED;
3346         queue_delayed_work(cm.wq, &work->work, 0);
3347 out:
3348         return ret;
3349 }
3350
3351 static int cm_migrate(struct ib_cm_id *cm_id)
3352 {
3353         struct cm_id_private *cm_id_priv;
3354         unsigned long flags;
3355         int ret = 0;
3356
3357         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3358         spin_lock_irqsave(&cm_id_priv->lock, flags);
3359         if (cm_id->state == IB_CM_ESTABLISHED &&
3360             (cm_id->lap_state == IB_CM_LAP_UNINIT ||
3361              cm_id->lap_state == IB_CM_LAP_IDLE)) {
3362                 cm_id->lap_state = IB_CM_LAP_IDLE;
3363                 cm_id_priv->av = cm_id_priv->alt_av;
3364         } else
3365                 ret = -EINVAL;
3366         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3367
3368         return ret;
3369 }
3370
3371 int ib_cm_notify(struct ib_cm_id *cm_id, enum ib_event_type event)
3372 {
3373         int ret;
3374
3375         switch (event) {
3376         case IB_EVENT_COMM_EST:
3377                 ret = cm_establish(cm_id);
3378                 break;
3379         case IB_EVENT_PATH_MIG:
3380                 ret = cm_migrate(cm_id);
3381                 break;
3382         default:
3383                 ret = -EINVAL;
3384         }
3385         return ret;
3386 }
3387 EXPORT_SYMBOL(ib_cm_notify);
3388
3389 static void cm_recv_handler(struct ib_mad_agent *mad_agent,
3390                             struct ib_mad_recv_wc *mad_recv_wc)
3391 {
3392         struct cm_port *port = mad_agent->context;
3393         struct cm_work *work;
3394         enum ib_cm_event_type event;
3395         u16 attr_id;
3396         int paths = 0;
3397
3398         switch (mad_recv_wc->recv_buf.mad->mad_hdr.attr_id) {
3399         case CM_REQ_ATTR_ID:
3400                 paths = 1 + (((struct cm_req_msg *) mad_recv_wc->recv_buf.mad)->
3401                                                     alt_local_lid != 0);
3402                 event = IB_CM_REQ_RECEIVED;
3403                 break;
3404         case CM_MRA_ATTR_ID:
3405                 event = IB_CM_MRA_RECEIVED;
3406                 break;
3407         case CM_REJ_ATTR_ID:
3408                 event = IB_CM_REJ_RECEIVED;
3409                 break;
3410         case CM_REP_ATTR_ID:
3411                 event = IB_CM_REP_RECEIVED;
3412                 break;
3413         case CM_RTU_ATTR_ID:
3414                 event = IB_CM_RTU_RECEIVED;
3415                 break;
3416         case CM_DREQ_ATTR_ID:
3417                 event = IB_CM_DREQ_RECEIVED;
3418                 break;
3419         case CM_DREP_ATTR_ID:
3420                 event = IB_CM_DREP_RECEIVED;
3421                 break;
3422         case CM_SIDR_REQ_ATTR_ID:
3423                 event = IB_CM_SIDR_REQ_RECEIVED;
3424                 break;
3425         case CM_SIDR_REP_ATTR_ID:
3426                 event = IB_CM_SIDR_REP_RECEIVED;
3427                 break;
3428         case CM_LAP_ATTR_ID:
3429                 paths = 1;
3430                 event = IB_CM_LAP_RECEIVED;
3431                 break;
3432         case CM_APR_ATTR_ID:
3433                 event = IB_CM_APR_RECEIVED;
3434                 break;
3435         default:
3436                 ib_free_recv_mad(mad_recv_wc);
3437                 return;
3438         }
3439
3440         attr_id = be16_to_cpu(mad_recv_wc->recv_buf.mad->mad_hdr.attr_id);
3441         atomic_long_inc(&port->counter_group[CM_RECV].
3442                         counter[attr_id - CM_ATTR_ID_OFFSET]);
3443
3444         work = kmalloc(sizeof *work + sizeof(struct ib_sa_path_rec) * paths,
3445                        GFP_KERNEL);
3446         if (!work) {
3447                 ib_free_recv_mad(mad_recv_wc);
3448                 return;
3449         }
3450
3451         INIT_DELAYED_WORK(&work->work, cm_work_handler);
3452         work->cm_event.event = event;
3453         work->mad_recv_wc = mad_recv_wc;
3454         work->port = port;
3455         queue_delayed_work(cm.wq, &work->work, 0);
3456 }
3457
3458 static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv,
3459                                 struct ib_qp_attr *qp_attr,
3460                                 int *qp_attr_mask)
3461 {
3462         unsigned long flags;
3463         int ret;
3464
3465         spin_lock_irqsave(&cm_id_priv->lock, flags);
3466         switch (cm_id_priv->id.state) {
3467         case IB_CM_REQ_SENT:
3468         case IB_CM_MRA_REQ_RCVD:
3469         case IB_CM_REQ_RCVD:
3470         case IB_CM_MRA_REQ_SENT:
3471         case IB_CM_REP_RCVD:
3472         case IB_CM_MRA_REP_SENT:
3473         case IB_CM_REP_SENT:
3474         case IB_CM_MRA_REP_RCVD:
3475         case IB_CM_ESTABLISHED:
3476                 *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS |
3477                                 IB_QP_PKEY_INDEX | IB_QP_PORT;
3478                 qp_attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE;
3479                 if (cm_id_priv->responder_resources)
3480                         qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ |
3481                                                     IB_ACCESS_REMOTE_ATOMIC;
3482                 qp_attr->pkey_index = cm_id_priv->av.pkey_index;
3483                 qp_attr->port_num = cm_id_priv->av.port->port_num;
3484                 ret = 0;
3485                 break;
3486         default:
3487                 ret = -EINVAL;
3488                 break;
3489         }
3490         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3491         return ret;
3492 }
3493
3494 static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
3495                                struct ib_qp_attr *qp_attr,
3496                                int *qp_attr_mask)
3497 {
3498         unsigned long flags;
3499         int ret;
3500
3501         spin_lock_irqsave(&cm_id_priv->lock, flags);
3502         switch (cm_id_priv->id.state) {
3503         case IB_CM_REQ_RCVD:
3504         case IB_CM_MRA_REQ_SENT:
3505         case IB_CM_REP_RCVD:
3506         case IB_CM_MRA_REP_SENT:
3507         case IB_CM_REP_SENT:
3508         case IB_CM_MRA_REP_RCVD:
3509         case IB_CM_ESTABLISHED:
3510                 *qp_attr_mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU |
3511                                 IB_QP_DEST_QPN | IB_QP_RQ_PSN;
3512                 qp_attr->ah_attr = cm_id_priv->av.ah_attr;
3513                 qp_attr->path_mtu = cm_id_priv->path_mtu;
3514                 qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn);
3515                 qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn);
3516                 if (cm_id_priv->qp_type == IB_QPT_RC ||
3517                     cm_id_priv->qp_type == IB_QPT_XRC_TGT) {
3518                         *qp_attr_mask |= IB_QP_MAX_DEST_RD_ATOMIC |
3519                                          IB_QP_MIN_RNR_TIMER;
3520                         qp_attr->max_dest_rd_atomic =
3521                                         cm_id_priv->responder_resources;
3522                         qp_attr->min_rnr_timer = 0;
3523                 }
3524                 if (cm_id_priv->alt_av.ah_attr.dlid) {
3525                         *qp_attr_mask |= IB_QP_ALT_PATH;
3526                         qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num;
3527                         qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index;
3528                         qp_attr->alt_timeout = cm_id_priv->alt_av.timeout;
3529                         qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr;
3530                 }
3531                 ret = 0;
3532                 break;
3533         default:
3534                 ret = -EINVAL;
3535                 break;
3536         }
3537         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3538         return ret;
3539 }
3540
3541 static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv,
3542                                struct ib_qp_attr *qp_attr,
3543                                int *qp_attr_mask)
3544 {
3545         unsigned long flags;
3546         int ret;
3547
3548         spin_lock_irqsave(&cm_id_priv->lock, flags);
3549         switch (cm_id_priv->id.state) {
3550         /* Allow transition to RTS before sending REP */
3551         case IB_CM_REQ_RCVD:
3552         case IB_CM_MRA_REQ_SENT:
3553
3554         case IB_CM_REP_RCVD:
3555         case IB_CM_MRA_REP_SENT:
3556         case IB_CM_REP_SENT:
3557         case IB_CM_MRA_REP_RCVD:
3558         case IB_CM_ESTABLISHED:
3559                 if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT) {
3560                         *qp_attr_mask = IB_QP_STATE | IB_QP_SQ_PSN;
3561                         qp_attr->sq_psn = be32_to_cpu(cm_id_priv->sq_psn);
3562                         switch (cm_id_priv->qp_type) {
3563                         case IB_QPT_RC:
3564                         case IB_QPT_XRC_INI:
3565                                 *qp_attr_mask |= IB_QP_RETRY_CNT | IB_QP_RNR_RETRY |
3566                                                  IB_QP_MAX_QP_RD_ATOMIC;
3567                                 qp_attr->retry_cnt = cm_id_priv->retry_count;
3568                                 qp_attr->rnr_retry = cm_id_priv->rnr_retry_count;
3569                                 qp_attr->max_rd_atomic = cm_id_priv->initiator_depth;
3570                                 /* fall through */
3571                         case IB_QPT_XRC_TGT:
3572                                 *qp_attr_mask |= IB_QP_TIMEOUT;
3573                                 qp_attr->timeout = cm_id_priv->av.timeout;
3574                                 break;
3575                         default:
3576                                 break;
3577                         }
3578                         if (cm_id_priv->alt_av.ah_attr.dlid) {
3579                                 *qp_attr_mask |= IB_QP_PATH_MIG_STATE;
3580                                 qp_attr->path_mig_state = IB_MIG_REARM;
3581                         }
3582                 } else {
3583                         *qp_attr_mask = IB_QP_ALT_PATH | IB_QP_PATH_MIG_STATE;
3584                         qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num;
3585                         qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index;
3586                         qp_attr->alt_timeout = cm_id_priv->alt_av.timeout;
3587                         qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr;
3588                         qp_attr->path_mig_state = IB_MIG_REARM;
3589                 }
3590                 ret = 0;
3591                 break;
3592         default:
3593                 ret = -EINVAL;
3594                 break;
3595         }
3596         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3597         return ret;
3598 }
3599
3600 int ib_cm_init_qp_attr(struct ib_cm_id *cm_id,
3601                        struct ib_qp_attr *qp_attr,
3602                        int *qp_attr_mask)
3603 {
3604         struct cm_id_private *cm_id_priv;
3605         int ret;
3606
3607         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3608         switch (qp_attr->qp_state) {
3609         case IB_QPS_INIT:
3610                 ret = cm_init_qp_init_attr(cm_id_priv, qp_attr, qp_attr_mask);
3611                 break;
3612         case IB_QPS_RTR:
3613                 ret = cm_init_qp_rtr_attr(cm_id_priv, qp_attr, qp_attr_mask);
3614                 break;
3615         case IB_QPS_RTS:
3616                 ret = cm_init_qp_rts_attr(cm_id_priv, qp_attr, qp_attr_mask);
3617                 break;
3618         default:
3619                 ret = -EINVAL;
3620                 break;
3621         }
3622         return ret;
3623 }
3624 EXPORT_SYMBOL(ib_cm_init_qp_attr);
3625
3626 static void cm_get_ack_delay(struct cm_device *cm_dev)
3627 {
3628         struct ib_device_attr attr;
3629
3630         if (ib_query_device(cm_dev->ib_device, &attr))
3631                 cm_dev->ack_delay = 0; /* acks will rely on packet life time */
3632         else
3633                 cm_dev->ack_delay = attr.local_ca_ack_delay;
3634 }
3635
3636 static ssize_t cm_show_counter(struct kobject *obj, struct attribute *attr,
3637                                char *buf)
3638 {
3639         struct cm_counter_group *group;
3640         struct cm_counter_attribute *cm_attr;
3641
3642         group = container_of(obj, struct cm_counter_group, obj);
3643         cm_attr = container_of(attr, struct cm_counter_attribute, attr);
3644
3645         return sprintf(buf, "%ld\n",
3646                        atomic_long_read(&group->counter[cm_attr->index]));
3647 }
3648
3649 static const struct sysfs_ops cm_counter_ops = {
3650         .show = cm_show_counter
3651 };
3652
3653 static struct kobj_type cm_counter_obj_type = {
3654         .sysfs_ops = &cm_counter_ops,
3655         .default_attrs = cm_counter_default_attrs
3656 };
3657
3658 static void cm_release_port_obj(struct kobject *obj)
3659 {
3660         struct cm_port *cm_port;
3661
3662         cm_port = container_of(obj, struct cm_port, port_obj);
3663         kfree(cm_port);
3664 }
3665
3666 static struct kobj_type cm_port_obj_type = {
3667         .release = cm_release_port_obj
3668 };
3669
3670 static char *cm_devnode(struct device *dev, mode_t *mode)
3671 {
3672         if (mode)
3673                 *mode = 0666;
3674         return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev));
3675 }
3676
3677 struct class cm_class = {
3678         .owner   = THIS_MODULE,
3679         .name    = "infiniband_cm",
3680         .devnode = cm_devnode,
3681 };
3682 EXPORT_SYMBOL(cm_class);
3683
3684 static int cm_create_port_fs(struct cm_port *port)
3685 {
3686         int i, ret;
3687
3688         ret = kobject_init_and_add(&port->port_obj, &cm_port_obj_type,
3689                                    &port->cm_dev->device->kobj,
3690                                    "%d", port->port_num);
3691         if (ret) {
3692                 kfree(port);
3693                 return ret;
3694         }
3695
3696         for (i = 0; i < CM_COUNTER_GROUPS; i++) {
3697                 ret = kobject_init_and_add(&port->counter_group[i].obj,
3698                                            &cm_counter_obj_type,
3699                                            &port->port_obj,
3700                                            "%s", counter_group_names[i]);
3701                 if (ret)
3702                         goto error;
3703         }
3704
3705         return 0;
3706
3707 error:
3708         while (i--)
3709                 kobject_put(&port->counter_group[i].obj);
3710         kobject_put(&port->port_obj);
3711         return ret;
3712
3713 }
3714
3715 static void cm_remove_port_fs(struct cm_port *port)
3716 {
3717         int i;
3718
3719         for (i = 0; i < CM_COUNTER_GROUPS; i++)
3720                 kobject_put(&port->counter_group[i].obj);
3721
3722         kobject_put(&port->port_obj);
3723 }
3724
3725 static void cm_add_one(struct ib_device *ib_device)
3726 {
3727         struct cm_device *cm_dev;
3728         struct cm_port *port;
3729         struct ib_mad_reg_req reg_req = {
3730                 .mgmt_class = IB_MGMT_CLASS_CM,
3731                 .mgmt_class_version = IB_CM_CLASS_VERSION
3732         };
3733         struct ib_port_modify port_modify = {
3734                 .set_port_cap_mask = IB_PORT_CM_SUP
3735         };
3736         unsigned long flags;
3737         int ret;
3738         u8 i;
3739
3740         if (rdma_node_get_transport(ib_device->node_type) != RDMA_TRANSPORT_IB)
3741                 return;
3742
3743         cm_dev = kzalloc(sizeof(*cm_dev) + sizeof(*port) *
3744                          ib_device->phys_port_cnt, GFP_KERNEL);
3745         if (!cm_dev)
3746                 return;
3747
3748         cm_dev->ib_device = ib_device;
3749         cm_get_ack_delay(cm_dev);
3750
3751         cm_dev->device = device_create(&cm_class, &ib_device->dev,
3752                                        MKDEV(0, 0), NULL,
3753                                        "%s", ib_device->name);
3754         if (IS_ERR(cm_dev->device)) {
3755                 kfree(cm_dev);
3756                 return;
3757         }
3758
3759         set_bit(IB_MGMT_METHOD_SEND, reg_req.method_mask);
3760         for (i = 1; i <= ib_device->phys_port_cnt; i++) {
3761                 port = kzalloc(sizeof *port, GFP_KERNEL);
3762                 if (!port)
3763                         goto error1;
3764
3765                 cm_dev->port[i-1] = port;
3766                 port->cm_dev = cm_dev;
3767                 port->port_num = i;
3768
3769                 ret = cm_create_port_fs(port);
3770                 if (ret)
3771                         goto error1;
3772
3773                 port->mad_agent = ib_register_mad_agent(ib_device, i,
3774                                                         IB_QPT_GSI,
3775                                                         &reg_req,
3776                                                         0,
3777                                                         cm_send_handler,
3778                                                         cm_recv_handler,
3779                                                         port);
3780                 if (IS_ERR(port->mad_agent))
3781                         goto error2;
3782
3783                 ret = ib_modify_port(ib_device, i, 0, &port_modify);
3784                 if (ret)
3785                         goto error3;
3786         }
3787         ib_set_client_data(ib_device, &cm_client, cm_dev);
3788
3789         write_lock_irqsave(&cm.device_lock, flags);
3790         list_add_tail(&cm_dev->list, &cm.device_list);
3791         write_unlock_irqrestore(&cm.device_lock, flags);
3792         return;
3793
3794 error3:
3795         ib_unregister_mad_agent(port->mad_agent);
3796 error2:
3797         cm_remove_port_fs(port);
3798 error1:
3799         port_modify.set_port_cap_mask = 0;
3800         port_modify.clr_port_cap_mask = IB_PORT_CM_SUP;
3801         while (--i) {
3802                 port = cm_dev->port[i-1];
3803                 ib_modify_port(ib_device, port->port_num, 0, &port_modify);
3804                 ib_unregister_mad_agent(port->mad_agent);
3805                 cm_remove_port_fs(port);
3806         }
3807         device_unregister(cm_dev->device);
3808         kfree(cm_dev);
3809 }
3810
3811 static void cm_remove_one(struct ib_device *ib_device)
3812 {
3813         struct cm_device *cm_dev;
3814         struct cm_port *port;
3815         struct ib_port_modify port_modify = {
3816                 .clr_port_cap_mask = IB_PORT_CM_SUP
3817         };
3818         unsigned long flags;
3819         int i;
3820
3821         cm_dev = ib_get_client_data(ib_device, &cm_client);
3822         if (!cm_dev)
3823                 return;
3824
3825         write_lock_irqsave(&cm.device_lock, flags);
3826         list_del(&cm_dev->list);
3827         write_unlock_irqrestore(&cm.device_lock, flags);
3828
3829         for (i = 1; i <= ib_device->phys_port_cnt; i++) {
3830                 port = cm_dev->port[i-1];
3831                 ib_modify_port(ib_device, port->port_num, 0, &port_modify);
3832                 ib_unregister_mad_agent(port->mad_agent);
3833                 flush_workqueue(cm.wq);
3834                 cm_remove_port_fs(port);
3835         }
3836         device_unregister(cm_dev->device);
3837         kfree(cm_dev);
3838 }
3839
3840 static int __init ib_cm_init(void)
3841 {
3842         int ret;
3843
3844         memset(&cm, 0, sizeof cm);
3845         INIT_LIST_HEAD(&cm.device_list);
3846         rwlock_init(&cm.device_lock);
3847         spin_lock_init(&cm.lock);
3848         cm.listen_service_table = RB_ROOT;
3849         cm.listen_service_id = be64_to_cpu(IB_CM_ASSIGN_SERVICE_ID);
3850         cm.remote_id_table = RB_ROOT;
3851         cm.remote_qp_table = RB_ROOT;
3852         cm.remote_sidr_table = RB_ROOT;
3853         idr_init(&cm.local_id_table);
3854         get_random_bytes(&cm.random_id_operand, sizeof cm.random_id_operand);
3855         idr_pre_get(&cm.local_id_table, GFP_KERNEL);
3856         INIT_LIST_HEAD(&cm.timewait_list);
3857
3858         ret = class_register(&cm_class);
3859         if (ret)
3860                 return -ENOMEM;
3861
3862         cm.wq = create_workqueue("ib_cm");
3863         if (!cm.wq) {
3864                 ret = -ENOMEM;
3865                 goto error1;
3866         }
3867
3868         ret = ib_register_client(&cm_client);
3869         if (ret)
3870                 goto error2;
3871
3872         return 0;
3873 error2:
3874         destroy_workqueue(cm.wq);
3875 error1:
3876         class_unregister(&cm_class);
3877         return ret;
3878 }
3879
3880 static void __exit ib_cm_cleanup(void)
3881 {
3882         struct cm_timewait_info *timewait_info, *tmp;
3883
3884         spin_lock_irq(&cm.lock);
3885         list_for_each_entry(timewait_info, &cm.timewait_list, list)
3886                 cancel_delayed_work(&timewait_info->work.work);
3887         spin_unlock_irq(&cm.lock);
3888
3889         ib_unregister_client(&cm_client);
3890         destroy_workqueue(cm.wq);
3891
3892         list_for_each_entry_safe(timewait_info, tmp, &cm.timewait_list, list) {
3893                 list_del(&timewait_info->list);
3894                 kfree(timewait_info);
3895         }
3896
3897         class_unregister(&cm_class);
3898         idr_destroy(&cm.local_id_table);
3899 }
3900
3901 module_init(ib_cm_init);
3902 module_exit(ib_cm_cleanup);
3903