GNU Linux-libre 4.19.286-gnu1
[releases.git] / drivers / infiniband / core / cm.c
1 /*
2  * Copyright (c) 2004-2007 Intel Corporation.  All rights reserved.
3  * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
4  * Copyright (c) 2004, 2005 Voltaire Corporation.  All rights reserved.
5  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
6  *
7  * This software is available to you under a choice of one of two
8  * licenses.  You may choose to be licensed under the terms of the GNU
9  * General Public License (GPL) Version 2, available from the file
10  * COPYING in the main directory of this source tree, or the
11  * OpenIB.org BSD license below:
12  *
13  *     Redistribution and use in source and binary forms, with or
14  *     without modification, are permitted provided that the following
15  *     conditions are met:
16  *
17  *      - Redistributions of source code must retain the above
18  *        copyright notice, this list of conditions and the following
19  *        disclaimer.
20  *
21  *      - Redistributions in binary form must reproduce the above
22  *        copyright notice, this list of conditions and the following
23  *        disclaimer in the documentation and/or other materials
24  *        provided with the distribution.
25  *
26  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33  * SOFTWARE.
34  */
35
36 #include <linux/completion.h>
37 #include <linux/dma-mapping.h>
38 #include <linux/device.h>
39 #include <linux/module.h>
40 #include <linux/err.h>
41 #include <linux/idr.h>
42 #include <linux/interrupt.h>
43 #include <linux/random.h>
44 #include <linux/rbtree.h>
45 #include <linux/spinlock.h>
46 #include <linux/slab.h>
47 #include <linux/sysfs.h>
48 #include <linux/workqueue.h>
49 #include <linux/kdev_t.h>
50 #include <linux/etherdevice.h>
51
52 #include <rdma/ib_cache.h>
53 #include <rdma/ib_cm.h>
54 #include "cm_msgs.h"
55
56 MODULE_AUTHOR("Sean Hefty");
57 MODULE_DESCRIPTION("InfiniBand CM");
58 MODULE_LICENSE("Dual BSD/GPL");
59
60 static const char * const ibcm_rej_reason_strs[] = {
61         [IB_CM_REJ_NO_QP]                       = "no QP",
62         [IB_CM_REJ_NO_EEC]                      = "no EEC",
63         [IB_CM_REJ_NO_RESOURCES]                = "no resources",
64         [IB_CM_REJ_TIMEOUT]                     = "timeout",
65         [IB_CM_REJ_UNSUPPORTED]                 = "unsupported",
66         [IB_CM_REJ_INVALID_COMM_ID]             = "invalid comm ID",
67         [IB_CM_REJ_INVALID_COMM_INSTANCE]       = "invalid comm instance",
68         [IB_CM_REJ_INVALID_SERVICE_ID]          = "invalid service ID",
69         [IB_CM_REJ_INVALID_TRANSPORT_TYPE]      = "invalid transport type",
70         [IB_CM_REJ_STALE_CONN]                  = "stale conn",
71         [IB_CM_REJ_RDC_NOT_EXIST]               = "RDC not exist",
72         [IB_CM_REJ_INVALID_GID]                 = "invalid GID",
73         [IB_CM_REJ_INVALID_LID]                 = "invalid LID",
74         [IB_CM_REJ_INVALID_SL]                  = "invalid SL",
75         [IB_CM_REJ_INVALID_TRAFFIC_CLASS]       = "invalid traffic class",
76         [IB_CM_REJ_INVALID_HOP_LIMIT]           = "invalid hop limit",
77         [IB_CM_REJ_INVALID_PACKET_RATE]         = "invalid packet rate",
78         [IB_CM_REJ_INVALID_ALT_GID]             = "invalid alt GID",
79         [IB_CM_REJ_INVALID_ALT_LID]             = "invalid alt LID",
80         [IB_CM_REJ_INVALID_ALT_SL]              = "invalid alt SL",
81         [IB_CM_REJ_INVALID_ALT_TRAFFIC_CLASS]   = "invalid alt traffic class",
82         [IB_CM_REJ_INVALID_ALT_HOP_LIMIT]       = "invalid alt hop limit",
83         [IB_CM_REJ_INVALID_ALT_PACKET_RATE]     = "invalid alt packet rate",
84         [IB_CM_REJ_PORT_CM_REDIRECT]            = "port CM redirect",
85         [IB_CM_REJ_PORT_REDIRECT]               = "port redirect",
86         [IB_CM_REJ_INVALID_MTU]                 = "invalid MTU",
87         [IB_CM_REJ_INSUFFICIENT_RESP_RESOURCES] = "insufficient resp resources",
88         [IB_CM_REJ_CONSUMER_DEFINED]            = "consumer defined",
89         [IB_CM_REJ_INVALID_RNR_RETRY]           = "invalid RNR retry",
90         [IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID]     = "duplicate local comm ID",
91         [IB_CM_REJ_INVALID_CLASS_VERSION]       = "invalid class version",
92         [IB_CM_REJ_INVALID_FLOW_LABEL]          = "invalid flow label",
93         [IB_CM_REJ_INVALID_ALT_FLOW_LABEL]      = "invalid alt flow label",
94 };
95
96 const char *__attribute_const__ ibcm_reject_msg(int reason)
97 {
98         size_t index = reason;
99
100         if (index < ARRAY_SIZE(ibcm_rej_reason_strs) &&
101             ibcm_rej_reason_strs[index])
102                 return ibcm_rej_reason_strs[index];
103         else
104                 return "unrecognized reason";
105 }
106 EXPORT_SYMBOL(ibcm_reject_msg);
107
108 static void cm_add_one(struct ib_device *device);
109 static void cm_remove_one(struct ib_device *device, void *client_data);
110
111 static struct ib_client cm_client = {
112         .name   = "cm",
113         .add    = cm_add_one,
114         .remove = cm_remove_one
115 };
116
117 static struct ib_cm {
118         spinlock_t lock;
119         struct list_head device_list;
120         rwlock_t device_lock;
121         struct rb_root listen_service_table;
122         u64 listen_service_id;
123         /* struct rb_root peer_service_table; todo: fix peer to peer */
124         struct rb_root remote_qp_table;
125         struct rb_root remote_id_table;
126         struct rb_root remote_sidr_table;
127         struct idr local_id_table;
128         __be32 random_id_operand;
129         struct list_head timewait_list;
130         struct workqueue_struct *wq;
131         /* Sync on cm change port state */
132         spinlock_t state_lock;
133 } cm;
134
135 /* Counter indexes ordered by attribute ID */
136 enum {
137         CM_REQ_COUNTER,
138         CM_MRA_COUNTER,
139         CM_REJ_COUNTER,
140         CM_REP_COUNTER,
141         CM_RTU_COUNTER,
142         CM_DREQ_COUNTER,
143         CM_DREP_COUNTER,
144         CM_SIDR_REQ_COUNTER,
145         CM_SIDR_REP_COUNTER,
146         CM_LAP_COUNTER,
147         CM_APR_COUNTER,
148         CM_ATTR_COUNT,
149         CM_ATTR_ID_OFFSET = 0x0010,
150 };
151
152 enum {
153         CM_XMIT,
154         CM_XMIT_RETRIES,
155         CM_RECV,
156         CM_RECV_DUPLICATES,
157         CM_COUNTER_GROUPS
158 };
159
160 static char const counter_group_names[CM_COUNTER_GROUPS]
161                                      [sizeof("cm_rx_duplicates")] = {
162         "cm_tx_msgs", "cm_tx_retries",
163         "cm_rx_msgs", "cm_rx_duplicates"
164 };
165
166 struct cm_counter_group {
167         struct kobject obj;
168         atomic_long_t counter[CM_ATTR_COUNT];
169 };
170
171 struct cm_counter_attribute {
172         struct attribute attr;
173         int index;
174 };
175
176 #define CM_COUNTER_ATTR(_name, _index) \
177 struct cm_counter_attribute cm_##_name##_counter_attr = { \
178         .attr = { .name = __stringify(_name), .mode = 0444 }, \
179         .index = _index \
180 }
181
182 static CM_COUNTER_ATTR(req, CM_REQ_COUNTER);
183 static CM_COUNTER_ATTR(mra, CM_MRA_COUNTER);
184 static CM_COUNTER_ATTR(rej, CM_REJ_COUNTER);
185 static CM_COUNTER_ATTR(rep, CM_REP_COUNTER);
186 static CM_COUNTER_ATTR(rtu, CM_RTU_COUNTER);
187 static CM_COUNTER_ATTR(dreq, CM_DREQ_COUNTER);
188 static CM_COUNTER_ATTR(drep, CM_DREP_COUNTER);
189 static CM_COUNTER_ATTR(sidr_req, CM_SIDR_REQ_COUNTER);
190 static CM_COUNTER_ATTR(sidr_rep, CM_SIDR_REP_COUNTER);
191 static CM_COUNTER_ATTR(lap, CM_LAP_COUNTER);
192 static CM_COUNTER_ATTR(apr, CM_APR_COUNTER);
193
194 static struct attribute *cm_counter_default_attrs[] = {
195         &cm_req_counter_attr.attr,
196         &cm_mra_counter_attr.attr,
197         &cm_rej_counter_attr.attr,
198         &cm_rep_counter_attr.attr,
199         &cm_rtu_counter_attr.attr,
200         &cm_dreq_counter_attr.attr,
201         &cm_drep_counter_attr.attr,
202         &cm_sidr_req_counter_attr.attr,
203         &cm_sidr_rep_counter_attr.attr,
204         &cm_lap_counter_attr.attr,
205         &cm_apr_counter_attr.attr,
206         NULL
207 };
208
209 struct cm_port {
210         struct cm_device *cm_dev;
211         struct ib_mad_agent *mad_agent;
212         struct kobject port_obj;
213         u8 port_num;
214         struct list_head cm_priv_prim_list;
215         struct list_head cm_priv_altr_list;
216         struct cm_counter_group counter_group[CM_COUNTER_GROUPS];
217 };
218
219 struct cm_device {
220         struct list_head list;
221         struct ib_device *ib_device;
222         struct device *device;
223         u8 ack_delay;
224         int going_down;
225         struct cm_port *port[0];
226 };
227
228 struct cm_av {
229         struct cm_port *port;
230         union ib_gid dgid;
231         struct rdma_ah_attr ah_attr;
232         u16 pkey_index;
233         u8 timeout;
234 };
235
236 struct cm_work {
237         struct delayed_work work;
238         struct list_head list;
239         struct cm_port *port;
240         struct ib_mad_recv_wc *mad_recv_wc;     /* Received MADs */
241         __be32 local_id;                        /* Established / timewait */
242         __be32 remote_id;
243         struct ib_cm_event cm_event;
244         struct sa_path_rec path[0];
245 };
246
247 struct cm_timewait_info {
248         struct cm_work work;                    /* Must be first. */
249         struct list_head list;
250         struct rb_node remote_qp_node;
251         struct rb_node remote_id_node;
252         __be64 remote_ca_guid;
253         __be32 remote_qpn;
254         u8 inserted_remote_qp;
255         u8 inserted_remote_id;
256 };
257
258 struct cm_id_private {
259         struct ib_cm_id id;
260
261         struct rb_node service_node;
262         struct rb_node sidr_id_node;
263         spinlock_t lock;        /* Do not acquire inside cm.lock */
264         struct completion comp;
265         atomic_t refcount;
266         /* Number of clients sharing this ib_cm_id. Only valid for listeners.
267          * Protected by the cm.lock spinlock. */
268         int listen_sharecount;
269
270         struct ib_mad_send_buf *msg;
271         struct cm_timewait_info *timewait_info;
272         /* todo: use alternate port on send failure */
273         struct cm_av av;
274         struct cm_av alt_av;
275
276         void *private_data;
277         __be64 tid;
278         __be32 local_qpn;
279         __be32 remote_qpn;
280         enum ib_qp_type qp_type;
281         __be32 sq_psn;
282         __be32 rq_psn;
283         int timeout_ms;
284         enum ib_mtu path_mtu;
285         __be16 pkey;
286         u8 private_data_len;
287         u8 max_cm_retries;
288         u8 peer_to_peer;
289         u8 responder_resources;
290         u8 initiator_depth;
291         u8 retry_count;
292         u8 rnr_retry_count;
293         u8 service_timeout;
294         u8 target_ack_delay;
295
296         struct list_head prim_list;
297         struct list_head altr_list;
298         /* Indicates that the send port mad is registered and av is set */
299         int prim_send_port_not_ready;
300         int altr_send_port_not_ready;
301
302         struct list_head work_list;
303         atomic_t work_count;
304 };
305
306 static void cm_work_handler(struct work_struct *work);
307
308 static inline void cm_deref_id(struct cm_id_private *cm_id_priv)
309 {
310         if (atomic_dec_and_test(&cm_id_priv->refcount))
311                 complete(&cm_id_priv->comp);
312 }
313
314 static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
315                         struct ib_mad_send_buf **msg)
316 {
317         struct ib_mad_agent *mad_agent;
318         struct ib_mad_send_buf *m;
319         struct ib_ah *ah;
320         struct cm_av *av;
321         unsigned long flags, flags2;
322         int ret = 0;
323
324         /* don't let the port to be released till the agent is down */
325         spin_lock_irqsave(&cm.state_lock, flags2);
326         spin_lock_irqsave(&cm.lock, flags);
327         if (!cm_id_priv->prim_send_port_not_ready)
328                 av = &cm_id_priv->av;
329         else if (!cm_id_priv->altr_send_port_not_ready &&
330                  (cm_id_priv->alt_av.port))
331                 av = &cm_id_priv->alt_av;
332         else {
333                 pr_info("%s: not valid CM id\n", __func__);
334                 ret = -ENODEV;
335                 spin_unlock_irqrestore(&cm.lock, flags);
336                 goto out;
337         }
338         spin_unlock_irqrestore(&cm.lock, flags);
339         /* Make sure the port haven't released the mad yet */
340         mad_agent = cm_id_priv->av.port->mad_agent;
341         if (!mad_agent) {
342                 pr_info("%s: not a valid MAD agent\n", __func__);
343                 ret = -ENODEV;
344                 goto out;
345         }
346         ah = rdma_create_ah(mad_agent->qp->pd, &av->ah_attr);
347         if (IS_ERR(ah)) {
348                 ret = PTR_ERR(ah);
349                 goto out;
350         }
351
352         m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn,
353                                av->pkey_index,
354                                0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
355                                GFP_ATOMIC,
356                                IB_MGMT_BASE_VERSION);
357         if (IS_ERR(m)) {
358                 rdma_destroy_ah(ah);
359                 ret = PTR_ERR(m);
360                 goto out;
361         }
362
363         /* Timeout set by caller if response is expected. */
364         m->ah = ah;
365         m->retries = cm_id_priv->max_cm_retries;
366
367         atomic_inc(&cm_id_priv->refcount);
368         m->context[0] = cm_id_priv;
369         *msg = m;
370
371 out:
372         spin_unlock_irqrestore(&cm.state_lock, flags2);
373         return ret;
374 }
375
376 static struct ib_mad_send_buf *cm_alloc_response_msg_no_ah(struct cm_port *port,
377                                                            struct ib_mad_recv_wc *mad_recv_wc)
378 {
379         return ib_create_send_mad(port->mad_agent, 1, mad_recv_wc->wc->pkey_index,
380                                   0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
381                                   GFP_ATOMIC,
382                                   IB_MGMT_BASE_VERSION);
383 }
384
385 static int cm_create_response_msg_ah(struct cm_port *port,
386                                      struct ib_mad_recv_wc *mad_recv_wc,
387                                      struct ib_mad_send_buf *msg)
388 {
389         struct ib_ah *ah;
390
391         ah = ib_create_ah_from_wc(port->mad_agent->qp->pd, mad_recv_wc->wc,
392                                   mad_recv_wc->recv_buf.grh, port->port_num);
393         if (IS_ERR(ah))
394                 return PTR_ERR(ah);
395
396         msg->ah = ah;
397         return 0;
398 }
399
400 static void cm_free_msg(struct ib_mad_send_buf *msg)
401 {
402         if (msg->ah)
403                 rdma_destroy_ah(msg->ah);
404         if (msg->context[0])
405                 cm_deref_id(msg->context[0]);
406         ib_free_send_mad(msg);
407 }
408
409 static int cm_alloc_response_msg(struct cm_port *port,
410                                  struct ib_mad_recv_wc *mad_recv_wc,
411                                  struct ib_mad_send_buf **msg)
412 {
413         struct ib_mad_send_buf *m;
414         int ret;
415
416         m = cm_alloc_response_msg_no_ah(port, mad_recv_wc);
417         if (IS_ERR(m))
418                 return PTR_ERR(m);
419
420         ret = cm_create_response_msg_ah(port, mad_recv_wc, m);
421         if (ret) {
422                 cm_free_msg(m);
423                 return ret;
424         }
425
426         *msg = m;
427         return 0;
428 }
429
430 static void * cm_copy_private_data(const void *private_data,
431                                    u8 private_data_len)
432 {
433         void *data;
434
435         if (!private_data || !private_data_len)
436                 return NULL;
437
438         data = kmemdup(private_data, private_data_len, GFP_KERNEL);
439         if (!data)
440                 return ERR_PTR(-ENOMEM);
441
442         return data;
443 }
444
445 static void cm_set_private_data(struct cm_id_private *cm_id_priv,
446                                  void *private_data, u8 private_data_len)
447 {
448         if (cm_id_priv->private_data && cm_id_priv->private_data_len)
449                 kfree(cm_id_priv->private_data);
450
451         cm_id_priv->private_data = private_data;
452         cm_id_priv->private_data_len = private_data_len;
453 }
454
455 static int cm_init_av_for_lap(struct cm_port *port, struct ib_wc *wc,
456                               struct ib_grh *grh, struct cm_av *av)
457 {
458         struct rdma_ah_attr new_ah_attr;
459         int ret;
460
461         av->port = port;
462         av->pkey_index = wc->pkey_index;
463
464         /*
465          * av->ah_attr might be initialized based on past wc during incoming
466          * connect request or while sending out connect request. So initialize
467          * a new ah_attr on stack. If initialization fails, old ah_attr is
468          * used for sending any responses. If initialization is successful,
469          * than new ah_attr is used by overwriting old one.
470          */
471         ret = ib_init_ah_attr_from_wc(port->cm_dev->ib_device,
472                                       port->port_num, wc,
473                                       grh, &new_ah_attr);
474         if (ret)
475                 return ret;
476
477         rdma_move_ah_attr(&av->ah_attr, &new_ah_attr);
478         return 0;
479 }
480
481 static int cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc,
482                                    struct ib_grh *grh, struct cm_av *av)
483 {
484         av->port = port;
485         av->pkey_index = wc->pkey_index;
486         return ib_init_ah_attr_from_wc(port->cm_dev->ib_device,
487                                        port->port_num, wc,
488                                        grh, &av->ah_attr);
489 }
490
491 static int add_cm_id_to_port_list(struct cm_id_private *cm_id_priv,
492                                   struct cm_av *av,
493                                   struct cm_port *port)
494 {
495         unsigned long flags;
496         int ret = 0;
497
498         spin_lock_irqsave(&cm.lock, flags);
499
500         if (&cm_id_priv->av == av)
501                 list_add_tail(&cm_id_priv->prim_list, &port->cm_priv_prim_list);
502         else if (&cm_id_priv->alt_av == av)
503                 list_add_tail(&cm_id_priv->altr_list, &port->cm_priv_altr_list);
504         else
505                 ret = -EINVAL;
506
507         spin_unlock_irqrestore(&cm.lock, flags);
508         return ret;
509 }
510
511 static struct cm_port *
512 get_cm_port_from_path(struct sa_path_rec *path, const struct ib_gid_attr *attr)
513 {
514         struct cm_device *cm_dev;
515         struct cm_port *port = NULL;
516         unsigned long flags;
517
518         if (attr) {
519                 read_lock_irqsave(&cm.device_lock, flags);
520                 list_for_each_entry(cm_dev, &cm.device_list, list) {
521                         if (cm_dev->ib_device == attr->device) {
522                                 port = cm_dev->port[attr->port_num - 1];
523                                 break;
524                         }
525                 }
526                 read_unlock_irqrestore(&cm.device_lock, flags);
527         } else {
528                 /* SGID attribute can be NULL in following
529                  * conditions.
530                  * (a) Alternative path
531                  * (b) IB link layer without GRH
532                  * (c) LAP send messages
533                  */
534                 read_lock_irqsave(&cm.device_lock, flags);
535                 list_for_each_entry(cm_dev, &cm.device_list, list) {
536                         attr = rdma_find_gid(cm_dev->ib_device,
537                                              &path->sgid,
538                                              sa_conv_pathrec_to_gid_type(path),
539                                              NULL);
540                         if (!IS_ERR(attr)) {
541                                 port = cm_dev->port[attr->port_num - 1];
542                                 break;
543                         }
544                 }
545                 read_unlock_irqrestore(&cm.device_lock, flags);
546                 if (port)
547                         rdma_put_gid_attr(attr);
548         }
549         return port;
550 }
551
552 static int cm_init_av_by_path(struct sa_path_rec *path,
553                               const struct ib_gid_attr *sgid_attr,
554                               struct cm_av *av,
555                               struct cm_id_private *cm_id_priv)
556 {
557         struct rdma_ah_attr new_ah_attr;
558         struct cm_device *cm_dev;
559         struct cm_port *port;
560         int ret;
561
562         port = get_cm_port_from_path(path, sgid_attr);
563         if (!port)
564                 return -EINVAL;
565         cm_dev = port->cm_dev;
566
567         ret = ib_find_cached_pkey(cm_dev->ib_device, port->port_num,
568                                   be16_to_cpu(path->pkey), &av->pkey_index);
569         if (ret)
570                 return ret;
571
572         av->port = port;
573
574         /*
575          * av->ah_attr might be initialized based on wc or during
576          * request processing time which might have reference to sgid_attr.
577          * So initialize a new ah_attr on stack.
578          * If initialization fails, old ah_attr is used for sending any
579          * responses. If initialization is successful, than new ah_attr
580          * is used by overwriting the old one. So that right ah_attr
581          * can be used to return an error response.
582          */
583         ret = ib_init_ah_attr_from_path(cm_dev->ib_device, port->port_num, path,
584                                         &new_ah_attr, sgid_attr);
585         if (ret)
586                 return ret;
587
588         av->timeout = path->packet_life_time + 1;
589
590         ret = add_cm_id_to_port_list(cm_id_priv, av, port);
591         if (ret) {
592                 rdma_destroy_ah_attr(&new_ah_attr);
593                 return ret;
594         }
595         rdma_move_ah_attr(&av->ah_attr, &new_ah_attr);
596         return 0;
597 }
598
599 static int cm_alloc_id(struct cm_id_private *cm_id_priv)
600 {
601         unsigned long flags;
602         int id;
603
604         idr_preload(GFP_KERNEL);
605         spin_lock_irqsave(&cm.lock, flags);
606
607         id = idr_alloc_cyclic(&cm.local_id_table, cm_id_priv, 0, 0, GFP_NOWAIT);
608
609         spin_unlock_irqrestore(&cm.lock, flags);
610         idr_preload_end();
611
612         cm_id_priv->id.local_id = (__force __be32)id ^ cm.random_id_operand;
613         return id < 0 ? id : 0;
614 }
615
616 static void cm_free_id(__be32 local_id)
617 {
618         spin_lock_irq(&cm.lock);
619         idr_remove(&cm.local_id_table,
620                    (__force int) (local_id ^ cm.random_id_operand));
621         spin_unlock_irq(&cm.lock);
622 }
623
624 static struct cm_id_private * cm_get_id(__be32 local_id, __be32 remote_id)
625 {
626         struct cm_id_private *cm_id_priv;
627
628         cm_id_priv = idr_find(&cm.local_id_table,
629                               (__force int) (local_id ^ cm.random_id_operand));
630         if (cm_id_priv) {
631                 if (cm_id_priv->id.remote_id == remote_id)
632                         atomic_inc(&cm_id_priv->refcount);
633                 else
634                         cm_id_priv = NULL;
635         }
636
637         return cm_id_priv;
638 }
639
640 static struct cm_id_private * cm_acquire_id(__be32 local_id, __be32 remote_id)
641 {
642         struct cm_id_private *cm_id_priv;
643
644         spin_lock_irq(&cm.lock);
645         cm_id_priv = cm_get_id(local_id, remote_id);
646         spin_unlock_irq(&cm.lock);
647
648         return cm_id_priv;
649 }
650
651 /*
652  * Trivial helpers to strip endian annotation and compare; the
653  * endianness doesn't actually matter since we just need a stable
654  * order for the RB tree.
655  */
656 static int be32_lt(__be32 a, __be32 b)
657 {
658         return (__force u32) a < (__force u32) b;
659 }
660
661 static int be32_gt(__be32 a, __be32 b)
662 {
663         return (__force u32) a > (__force u32) b;
664 }
665
666 static int be64_lt(__be64 a, __be64 b)
667 {
668         return (__force u64) a < (__force u64) b;
669 }
670
671 static int be64_gt(__be64 a, __be64 b)
672 {
673         return (__force u64) a > (__force u64) b;
674 }
675
676 static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv)
677 {
678         struct rb_node **link = &cm.listen_service_table.rb_node;
679         struct rb_node *parent = NULL;
680         struct cm_id_private *cur_cm_id_priv;
681         __be64 service_id = cm_id_priv->id.service_id;
682         __be64 service_mask = cm_id_priv->id.service_mask;
683
684         while (*link) {
685                 parent = *link;
686                 cur_cm_id_priv = rb_entry(parent, struct cm_id_private,
687                                           service_node);
688                 if ((cur_cm_id_priv->id.service_mask & service_id) ==
689                     (service_mask & cur_cm_id_priv->id.service_id) &&
690                     (cm_id_priv->id.device == cur_cm_id_priv->id.device))
691                         return cur_cm_id_priv;
692
693                 if (cm_id_priv->id.device < cur_cm_id_priv->id.device)
694                         link = &(*link)->rb_left;
695                 else if (cm_id_priv->id.device > cur_cm_id_priv->id.device)
696                         link = &(*link)->rb_right;
697                 else if (be64_lt(service_id, cur_cm_id_priv->id.service_id))
698                         link = &(*link)->rb_left;
699                 else if (be64_gt(service_id, cur_cm_id_priv->id.service_id))
700                         link = &(*link)->rb_right;
701                 else
702                         link = &(*link)->rb_right;
703         }
704         rb_link_node(&cm_id_priv->service_node, parent, link);
705         rb_insert_color(&cm_id_priv->service_node, &cm.listen_service_table);
706         return NULL;
707 }
708
709 static struct cm_id_private * cm_find_listen(struct ib_device *device,
710                                              __be64 service_id)
711 {
712         struct rb_node *node = cm.listen_service_table.rb_node;
713         struct cm_id_private *cm_id_priv;
714
715         while (node) {
716                 cm_id_priv = rb_entry(node, struct cm_id_private, service_node);
717                 if ((cm_id_priv->id.service_mask & service_id) ==
718                      cm_id_priv->id.service_id &&
719                     (cm_id_priv->id.device == device))
720                         return cm_id_priv;
721
722                 if (device < cm_id_priv->id.device)
723                         node = node->rb_left;
724                 else if (device > cm_id_priv->id.device)
725                         node = node->rb_right;
726                 else if (be64_lt(service_id, cm_id_priv->id.service_id))
727                         node = node->rb_left;
728                 else if (be64_gt(service_id, cm_id_priv->id.service_id))
729                         node = node->rb_right;
730                 else
731                         node = node->rb_right;
732         }
733         return NULL;
734 }
735
736 static struct cm_timewait_info * cm_insert_remote_id(struct cm_timewait_info
737                                                      *timewait_info)
738 {
739         struct rb_node **link = &cm.remote_id_table.rb_node;
740         struct rb_node *parent = NULL;
741         struct cm_timewait_info *cur_timewait_info;
742         __be64 remote_ca_guid = timewait_info->remote_ca_guid;
743         __be32 remote_id = timewait_info->work.remote_id;
744
745         while (*link) {
746                 parent = *link;
747                 cur_timewait_info = rb_entry(parent, struct cm_timewait_info,
748                                              remote_id_node);
749                 if (be32_lt(remote_id, cur_timewait_info->work.remote_id))
750                         link = &(*link)->rb_left;
751                 else if (be32_gt(remote_id, cur_timewait_info->work.remote_id))
752                         link = &(*link)->rb_right;
753                 else if (be64_lt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
754                         link = &(*link)->rb_left;
755                 else if (be64_gt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
756                         link = &(*link)->rb_right;
757                 else
758                         return cur_timewait_info;
759         }
760         timewait_info->inserted_remote_id = 1;
761         rb_link_node(&timewait_info->remote_id_node, parent, link);
762         rb_insert_color(&timewait_info->remote_id_node, &cm.remote_id_table);
763         return NULL;
764 }
765
766 static struct cm_timewait_info * cm_find_remote_id(__be64 remote_ca_guid,
767                                                    __be32 remote_id)
768 {
769         struct rb_node *node = cm.remote_id_table.rb_node;
770         struct cm_timewait_info *timewait_info;
771
772         while (node) {
773                 timewait_info = rb_entry(node, struct cm_timewait_info,
774                                          remote_id_node);
775                 if (be32_lt(remote_id, timewait_info->work.remote_id))
776                         node = node->rb_left;
777                 else if (be32_gt(remote_id, timewait_info->work.remote_id))
778                         node = node->rb_right;
779                 else if (be64_lt(remote_ca_guid, timewait_info->remote_ca_guid))
780                         node = node->rb_left;
781                 else if (be64_gt(remote_ca_guid, timewait_info->remote_ca_guid))
782                         node = node->rb_right;
783                 else
784                         return timewait_info;
785         }
786         return NULL;
787 }
788
789 static struct cm_timewait_info * cm_insert_remote_qpn(struct cm_timewait_info
790                                                       *timewait_info)
791 {
792         struct rb_node **link = &cm.remote_qp_table.rb_node;
793         struct rb_node *parent = NULL;
794         struct cm_timewait_info *cur_timewait_info;
795         __be64 remote_ca_guid = timewait_info->remote_ca_guid;
796         __be32 remote_qpn = timewait_info->remote_qpn;
797
798         while (*link) {
799                 parent = *link;
800                 cur_timewait_info = rb_entry(parent, struct cm_timewait_info,
801                                              remote_qp_node);
802                 if (be32_lt(remote_qpn, cur_timewait_info->remote_qpn))
803                         link = &(*link)->rb_left;
804                 else if (be32_gt(remote_qpn, cur_timewait_info->remote_qpn))
805                         link = &(*link)->rb_right;
806                 else if (be64_lt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
807                         link = &(*link)->rb_left;
808                 else if (be64_gt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
809                         link = &(*link)->rb_right;
810                 else
811                         return cur_timewait_info;
812         }
813         timewait_info->inserted_remote_qp = 1;
814         rb_link_node(&timewait_info->remote_qp_node, parent, link);
815         rb_insert_color(&timewait_info->remote_qp_node, &cm.remote_qp_table);
816         return NULL;
817 }
818
819 static struct cm_id_private * cm_insert_remote_sidr(struct cm_id_private
820                                                     *cm_id_priv)
821 {
822         struct rb_node **link = &cm.remote_sidr_table.rb_node;
823         struct rb_node *parent = NULL;
824         struct cm_id_private *cur_cm_id_priv;
825         union ib_gid *port_gid = &cm_id_priv->av.dgid;
826         __be32 remote_id = cm_id_priv->id.remote_id;
827
828         while (*link) {
829                 parent = *link;
830                 cur_cm_id_priv = rb_entry(parent, struct cm_id_private,
831                                           sidr_id_node);
832                 if (be32_lt(remote_id, cur_cm_id_priv->id.remote_id))
833                         link = &(*link)->rb_left;
834                 else if (be32_gt(remote_id, cur_cm_id_priv->id.remote_id))
835                         link = &(*link)->rb_right;
836                 else {
837                         int cmp;
838                         cmp = memcmp(port_gid, &cur_cm_id_priv->av.dgid,
839                                      sizeof *port_gid);
840                         if (cmp < 0)
841                                 link = &(*link)->rb_left;
842                         else if (cmp > 0)
843                                 link = &(*link)->rb_right;
844                         else
845                                 return cur_cm_id_priv;
846                 }
847         }
848         rb_link_node(&cm_id_priv->sidr_id_node, parent, link);
849         rb_insert_color(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
850         return NULL;
851 }
852
853 static void cm_reject_sidr_req(struct cm_id_private *cm_id_priv,
854                                enum ib_cm_sidr_status status)
855 {
856         struct ib_cm_sidr_rep_param param;
857
858         memset(&param, 0, sizeof param);
859         param.status = status;
860         ib_send_cm_sidr_rep(&cm_id_priv->id, &param);
861 }
862
863 struct ib_cm_id *ib_create_cm_id(struct ib_device *device,
864                                  ib_cm_handler cm_handler,
865                                  void *context)
866 {
867         struct cm_id_private *cm_id_priv;
868         int ret;
869
870         cm_id_priv = kzalloc(sizeof *cm_id_priv, GFP_KERNEL);
871         if (!cm_id_priv)
872                 return ERR_PTR(-ENOMEM);
873
874         cm_id_priv->id.state = IB_CM_IDLE;
875         cm_id_priv->id.device = device;
876         cm_id_priv->id.cm_handler = cm_handler;
877         cm_id_priv->id.context = context;
878         cm_id_priv->id.remote_cm_qpn = 1;
879         ret = cm_alloc_id(cm_id_priv);
880         if (ret)
881                 goto error;
882
883         spin_lock_init(&cm_id_priv->lock);
884         init_completion(&cm_id_priv->comp);
885         INIT_LIST_HEAD(&cm_id_priv->work_list);
886         INIT_LIST_HEAD(&cm_id_priv->prim_list);
887         INIT_LIST_HEAD(&cm_id_priv->altr_list);
888         atomic_set(&cm_id_priv->work_count, -1);
889         atomic_set(&cm_id_priv->refcount, 1);
890         return &cm_id_priv->id;
891
892 error:
893         kfree(cm_id_priv);
894         return ERR_PTR(-ENOMEM);
895 }
896 EXPORT_SYMBOL(ib_create_cm_id);
897
898 static struct cm_work * cm_dequeue_work(struct cm_id_private *cm_id_priv)
899 {
900         struct cm_work *work;
901
902         if (list_empty(&cm_id_priv->work_list))
903                 return NULL;
904
905         work = list_entry(cm_id_priv->work_list.next, struct cm_work, list);
906         list_del(&work->list);
907         return work;
908 }
909
910 static void cm_free_work(struct cm_work *work)
911 {
912         if (work->mad_recv_wc)
913                 ib_free_recv_mad(work->mad_recv_wc);
914         kfree(work);
915 }
916
917 static inline int cm_convert_to_ms(int iba_time)
918 {
919         /* approximate conversion to ms from 4.096us x 2^iba_time */
920         return 1 << max(iba_time - 8, 0);
921 }
922
923 /*
924  * calculate: 4.096x2^ack_timeout = 4.096x2^ack_delay + 2x4.096x2^life_time
925  * Because of how ack_timeout is stored, adding one doubles the timeout.
926  * To avoid large timeouts, select the max(ack_delay, life_time + 1), and
927  * increment it (round up) only if the other is within 50%.
928  */
929 static u8 cm_ack_timeout(u8 ca_ack_delay, u8 packet_life_time)
930 {
931         int ack_timeout = packet_life_time + 1;
932
933         if (ack_timeout >= ca_ack_delay)
934                 ack_timeout += (ca_ack_delay >= (ack_timeout - 1));
935         else
936                 ack_timeout = ca_ack_delay +
937                               (ack_timeout >= (ca_ack_delay - 1));
938
939         return min(31, ack_timeout);
940 }
941
942 static void cm_cleanup_timewait(struct cm_timewait_info *timewait_info)
943 {
944         if (timewait_info->inserted_remote_id) {
945                 rb_erase(&timewait_info->remote_id_node, &cm.remote_id_table);
946                 timewait_info->inserted_remote_id = 0;
947         }
948
949         if (timewait_info->inserted_remote_qp) {
950                 rb_erase(&timewait_info->remote_qp_node, &cm.remote_qp_table);
951                 timewait_info->inserted_remote_qp = 0;
952         }
953 }
954
955 static struct cm_timewait_info * cm_create_timewait_info(__be32 local_id)
956 {
957         struct cm_timewait_info *timewait_info;
958
959         timewait_info = kzalloc(sizeof *timewait_info, GFP_KERNEL);
960         if (!timewait_info)
961                 return ERR_PTR(-ENOMEM);
962
963         timewait_info->work.local_id = local_id;
964         INIT_DELAYED_WORK(&timewait_info->work.work, cm_work_handler);
965         timewait_info->work.cm_event.event = IB_CM_TIMEWAIT_EXIT;
966         return timewait_info;
967 }
968
969 static void cm_enter_timewait(struct cm_id_private *cm_id_priv)
970 {
971         int wait_time;
972         unsigned long flags;
973         struct cm_device *cm_dev;
974
975         cm_dev = ib_get_client_data(cm_id_priv->id.device, &cm_client);
976         if (!cm_dev)
977                 return;
978
979         spin_lock_irqsave(&cm.lock, flags);
980         cm_cleanup_timewait(cm_id_priv->timewait_info);
981         list_add_tail(&cm_id_priv->timewait_info->list, &cm.timewait_list);
982         spin_unlock_irqrestore(&cm.lock, flags);
983
984         /*
985          * The cm_id could be destroyed by the user before we exit timewait.
986          * To protect against this, we search for the cm_id after exiting
987          * timewait before notifying the user that we've exited timewait.
988          */
989         cm_id_priv->id.state = IB_CM_TIMEWAIT;
990         wait_time = cm_convert_to_ms(cm_id_priv->av.timeout);
991
992         /* Check if the device started its remove_one */
993         spin_lock_irqsave(&cm.lock, flags);
994         if (!cm_dev->going_down)
995                 queue_delayed_work(cm.wq, &cm_id_priv->timewait_info->work.work,
996                                    msecs_to_jiffies(wait_time));
997         spin_unlock_irqrestore(&cm.lock, flags);
998
999         cm_id_priv->timewait_info = NULL;
1000 }
1001
1002 static void cm_reset_to_idle(struct cm_id_private *cm_id_priv)
1003 {
1004         unsigned long flags;
1005
1006         cm_id_priv->id.state = IB_CM_IDLE;
1007         if (cm_id_priv->timewait_info) {
1008                 spin_lock_irqsave(&cm.lock, flags);
1009                 cm_cleanup_timewait(cm_id_priv->timewait_info);
1010                 spin_unlock_irqrestore(&cm.lock, flags);
1011                 kfree(cm_id_priv->timewait_info);
1012                 cm_id_priv->timewait_info = NULL;
1013         }
1014 }
1015
1016 static void cm_destroy_id(struct ib_cm_id *cm_id, int err)
1017 {
1018         struct cm_id_private *cm_id_priv;
1019         struct cm_work *work;
1020
1021         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1022 retest:
1023         spin_lock_irq(&cm_id_priv->lock);
1024         switch (cm_id->state) {
1025         case IB_CM_LISTEN:
1026                 spin_unlock_irq(&cm_id_priv->lock);
1027
1028                 spin_lock_irq(&cm.lock);
1029                 if (--cm_id_priv->listen_sharecount > 0) {
1030                         /* The id is still shared. */
1031                         cm_deref_id(cm_id_priv);
1032                         spin_unlock_irq(&cm.lock);
1033                         return;
1034                 }
1035                 rb_erase(&cm_id_priv->service_node, &cm.listen_service_table);
1036                 spin_unlock_irq(&cm.lock);
1037                 break;
1038         case IB_CM_SIDR_REQ_SENT:
1039                 cm_id->state = IB_CM_IDLE;
1040                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
1041                 spin_unlock_irq(&cm_id_priv->lock);
1042                 break;
1043         case IB_CM_SIDR_REQ_RCVD:
1044                 spin_unlock_irq(&cm_id_priv->lock);
1045                 cm_reject_sidr_req(cm_id_priv, IB_SIDR_REJECT);
1046                 spin_lock_irq(&cm.lock);
1047                 if (!RB_EMPTY_NODE(&cm_id_priv->sidr_id_node))
1048                         rb_erase(&cm_id_priv->sidr_id_node,
1049                                  &cm.remote_sidr_table);
1050                 spin_unlock_irq(&cm.lock);
1051                 break;
1052         case IB_CM_REQ_SENT:
1053         case IB_CM_MRA_REQ_RCVD:
1054                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
1055                 spin_unlock_irq(&cm_id_priv->lock);
1056                 ib_send_cm_rej(cm_id, IB_CM_REJ_TIMEOUT,
1057                                &cm_id_priv->id.device->node_guid,
1058                                sizeof cm_id_priv->id.device->node_guid,
1059                                NULL, 0);
1060                 break;
1061         case IB_CM_REQ_RCVD:
1062                 if (err == -ENOMEM) {
1063                         /* Do not reject to allow future retries. */
1064                         cm_reset_to_idle(cm_id_priv);
1065                         spin_unlock_irq(&cm_id_priv->lock);
1066                 } else {
1067                         spin_unlock_irq(&cm_id_priv->lock);
1068                         ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED,
1069                                        NULL, 0, NULL, 0);
1070                 }
1071                 break;
1072         case IB_CM_REP_SENT:
1073         case IB_CM_MRA_REP_RCVD:
1074                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
1075                 /* Fall through */
1076         case IB_CM_MRA_REQ_SENT:
1077         case IB_CM_REP_RCVD:
1078         case IB_CM_MRA_REP_SENT:
1079                 spin_unlock_irq(&cm_id_priv->lock);
1080                 ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED,
1081                                NULL, 0, NULL, 0);
1082                 break;
1083         case IB_CM_ESTABLISHED:
1084                 spin_unlock_irq(&cm_id_priv->lock);
1085                 if (cm_id_priv->qp_type == IB_QPT_XRC_TGT)
1086                         break;
1087                 ib_send_cm_dreq(cm_id, NULL, 0);
1088                 goto retest;
1089         case IB_CM_DREQ_SENT:
1090                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
1091                 cm_enter_timewait(cm_id_priv);
1092                 spin_unlock_irq(&cm_id_priv->lock);
1093                 break;
1094         case IB_CM_DREQ_RCVD:
1095                 spin_unlock_irq(&cm_id_priv->lock);
1096                 ib_send_cm_drep(cm_id, NULL, 0);
1097                 break;
1098         default:
1099                 spin_unlock_irq(&cm_id_priv->lock);
1100                 break;
1101         }
1102
1103         spin_lock_irq(&cm_id_priv->lock);
1104         spin_lock(&cm.lock);
1105         /* Required for cleanup paths related cm_req_handler() */
1106         if (cm_id_priv->timewait_info) {
1107                 cm_cleanup_timewait(cm_id_priv->timewait_info);
1108                 kfree(cm_id_priv->timewait_info);
1109                 cm_id_priv->timewait_info = NULL;
1110         }
1111         if (!list_empty(&cm_id_priv->altr_list) &&
1112             (!cm_id_priv->altr_send_port_not_ready))
1113                 list_del(&cm_id_priv->altr_list);
1114         if (!list_empty(&cm_id_priv->prim_list) &&
1115             (!cm_id_priv->prim_send_port_not_ready))
1116                 list_del(&cm_id_priv->prim_list);
1117         spin_unlock(&cm.lock);
1118         spin_unlock_irq(&cm_id_priv->lock);
1119
1120         cm_free_id(cm_id->local_id);
1121         cm_deref_id(cm_id_priv);
1122         wait_for_completion(&cm_id_priv->comp);
1123         while ((work = cm_dequeue_work(cm_id_priv)) != NULL)
1124                 cm_free_work(work);
1125
1126         rdma_destroy_ah_attr(&cm_id_priv->av.ah_attr);
1127         rdma_destroy_ah_attr(&cm_id_priv->alt_av.ah_attr);
1128         kfree(cm_id_priv->private_data);
1129         kfree(cm_id_priv);
1130 }
1131
1132 void ib_destroy_cm_id(struct ib_cm_id *cm_id)
1133 {
1134         cm_destroy_id(cm_id, 0);
1135 }
1136 EXPORT_SYMBOL(ib_destroy_cm_id);
1137
1138 /**
1139  * __ib_cm_listen - Initiates listening on the specified service ID for
1140  *   connection and service ID resolution requests.
1141  * @cm_id: Connection identifier associated with the listen request.
1142  * @service_id: Service identifier matched against incoming connection
1143  *   and service ID resolution requests.  The service ID should be specified
1144  *   network-byte order.  If set to IB_CM_ASSIGN_SERVICE_ID, the CM will
1145  *   assign a service ID to the caller.
1146  * @service_mask: Mask applied to service ID used to listen across a
1147  *   range of service IDs.  If set to 0, the service ID is matched
1148  *   exactly.  This parameter is ignored if %service_id is set to
1149  *   IB_CM_ASSIGN_SERVICE_ID.
1150  */
1151 static int __ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id,
1152                           __be64 service_mask)
1153 {
1154         struct cm_id_private *cm_id_priv, *cur_cm_id_priv;
1155         int ret = 0;
1156
1157         service_mask = service_mask ? service_mask : ~cpu_to_be64(0);
1158         service_id &= service_mask;
1159         if ((service_id & IB_SERVICE_ID_AGN_MASK) == IB_CM_ASSIGN_SERVICE_ID &&
1160             (service_id != IB_CM_ASSIGN_SERVICE_ID))
1161                 return -EINVAL;
1162
1163         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1164         if (cm_id->state != IB_CM_IDLE)
1165                 return -EINVAL;
1166
1167         cm_id->state = IB_CM_LISTEN;
1168         ++cm_id_priv->listen_sharecount;
1169
1170         if (service_id == IB_CM_ASSIGN_SERVICE_ID) {
1171                 cm_id->service_id = cpu_to_be64(cm.listen_service_id++);
1172                 cm_id->service_mask = ~cpu_to_be64(0);
1173         } else {
1174                 cm_id->service_id = service_id;
1175                 cm_id->service_mask = service_mask;
1176         }
1177         cur_cm_id_priv = cm_insert_listen(cm_id_priv);
1178
1179         if (cur_cm_id_priv) {
1180                 cm_id->state = IB_CM_IDLE;
1181                 --cm_id_priv->listen_sharecount;
1182                 ret = -EBUSY;
1183         }
1184         return ret;
1185 }
1186
1187 int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask)
1188 {
1189         unsigned long flags;
1190         int ret;
1191
1192         spin_lock_irqsave(&cm.lock, flags);
1193         ret = __ib_cm_listen(cm_id, service_id, service_mask);
1194         spin_unlock_irqrestore(&cm.lock, flags);
1195
1196         return ret;
1197 }
1198 EXPORT_SYMBOL(ib_cm_listen);
1199
1200 /**
1201  * Create a new listening ib_cm_id and listen on the given service ID.
1202  *
1203  * If there's an existing ID listening on that same device and service ID,
1204  * return it.
1205  *
1206  * @device: Device associated with the cm_id.  All related communication will
1207  * be associated with the specified device.
1208  * @cm_handler: Callback invoked to notify the user of CM events.
1209  * @service_id: Service identifier matched against incoming connection
1210  *   and service ID resolution requests.  The service ID should be specified
1211  *   network-byte order.  If set to IB_CM_ASSIGN_SERVICE_ID, the CM will
1212  *   assign a service ID to the caller.
1213  *
1214  * Callers should call ib_destroy_cm_id when done with the listener ID.
1215  */
1216 struct ib_cm_id *ib_cm_insert_listen(struct ib_device *device,
1217                                      ib_cm_handler cm_handler,
1218                                      __be64 service_id)
1219 {
1220         struct cm_id_private *cm_id_priv;
1221         struct ib_cm_id *cm_id;
1222         unsigned long flags;
1223         int err = 0;
1224
1225         /* Create an ID in advance, since the creation may sleep */
1226         cm_id = ib_create_cm_id(device, cm_handler, NULL);
1227         if (IS_ERR(cm_id))
1228                 return cm_id;
1229
1230         spin_lock_irqsave(&cm.lock, flags);
1231
1232         if (service_id == IB_CM_ASSIGN_SERVICE_ID)
1233                 goto new_id;
1234
1235         /* Find an existing ID */
1236         cm_id_priv = cm_find_listen(device, service_id);
1237         if (cm_id_priv) {
1238                 if (cm_id->cm_handler != cm_handler || cm_id->context) {
1239                         /* Sharing an ib_cm_id with different handlers is not
1240                          * supported */
1241                         spin_unlock_irqrestore(&cm.lock, flags);
1242                         ib_destroy_cm_id(cm_id);
1243                         return ERR_PTR(-EINVAL);
1244                 }
1245                 atomic_inc(&cm_id_priv->refcount);
1246                 ++cm_id_priv->listen_sharecount;
1247                 spin_unlock_irqrestore(&cm.lock, flags);
1248
1249                 ib_destroy_cm_id(cm_id);
1250                 cm_id = &cm_id_priv->id;
1251                 return cm_id;
1252         }
1253
1254 new_id:
1255         /* Use newly created ID */
1256         err = __ib_cm_listen(cm_id, service_id, 0);
1257
1258         spin_unlock_irqrestore(&cm.lock, flags);
1259
1260         if (err) {
1261                 ib_destroy_cm_id(cm_id);
1262                 return ERR_PTR(err);
1263         }
1264         return cm_id;
1265 }
1266 EXPORT_SYMBOL(ib_cm_insert_listen);
1267
1268 static __be64 cm_form_tid(struct cm_id_private *cm_id_priv)
1269 {
1270         u64 hi_tid, low_tid;
1271
1272         hi_tid   = ((u64) cm_id_priv->av.port->mad_agent->hi_tid) << 32;
1273         low_tid  = (u64)cm_id_priv->id.local_id;
1274         return cpu_to_be64(hi_tid | low_tid);
1275 }
1276
1277 static void cm_format_mad_hdr(struct ib_mad_hdr *hdr,
1278                               __be16 attr_id, __be64 tid)
1279 {
1280         hdr->base_version  = IB_MGMT_BASE_VERSION;
1281         hdr->mgmt_class    = IB_MGMT_CLASS_CM;
1282         hdr->class_version = IB_CM_CLASS_VERSION;
1283         hdr->method        = IB_MGMT_METHOD_SEND;
1284         hdr->attr_id       = attr_id;
1285         hdr->tid           = tid;
1286 }
1287
1288 static void cm_format_req(struct cm_req_msg *req_msg,
1289                           struct cm_id_private *cm_id_priv,
1290                           struct ib_cm_req_param *param)
1291 {
1292         struct sa_path_rec *pri_path = param->primary_path;
1293         struct sa_path_rec *alt_path = param->alternate_path;
1294         bool pri_ext = false;
1295
1296         if (pri_path->rec_type == SA_PATH_REC_TYPE_OPA)
1297                 pri_ext = opa_is_extended_lid(pri_path->opa.dlid,
1298                                               pri_path->opa.slid);
1299
1300         cm_format_mad_hdr(&req_msg->hdr, CM_REQ_ATTR_ID,
1301                           cm_form_tid(cm_id_priv));
1302
1303         req_msg->local_comm_id = cm_id_priv->id.local_id;
1304         req_msg->service_id = param->service_id;
1305         req_msg->local_ca_guid = cm_id_priv->id.device->node_guid;
1306         cm_req_set_local_qpn(req_msg, cpu_to_be32(param->qp_num));
1307         cm_req_set_init_depth(req_msg, param->initiator_depth);
1308         cm_req_set_remote_resp_timeout(req_msg,
1309                                        param->remote_cm_response_timeout);
1310         cm_req_set_qp_type(req_msg, param->qp_type);
1311         cm_req_set_flow_ctrl(req_msg, param->flow_control);
1312         cm_req_set_starting_psn(req_msg, cpu_to_be32(param->starting_psn));
1313         cm_req_set_local_resp_timeout(req_msg,
1314                                       param->local_cm_response_timeout);
1315         req_msg->pkey = param->primary_path->pkey;
1316         cm_req_set_path_mtu(req_msg, param->primary_path->mtu);
1317         cm_req_set_max_cm_retries(req_msg, param->max_cm_retries);
1318
1319         if (param->qp_type != IB_QPT_XRC_INI) {
1320                 cm_req_set_resp_res(req_msg, param->responder_resources);
1321                 cm_req_set_retry_count(req_msg, param->retry_count);
1322                 cm_req_set_rnr_retry_count(req_msg, param->rnr_retry_count);
1323                 cm_req_set_srq(req_msg, param->srq);
1324         }
1325
1326         req_msg->primary_local_gid = pri_path->sgid;
1327         req_msg->primary_remote_gid = pri_path->dgid;
1328         if (pri_ext) {
1329                 req_msg->primary_local_gid.global.interface_id
1330                         = OPA_MAKE_ID(be32_to_cpu(pri_path->opa.slid));
1331                 req_msg->primary_remote_gid.global.interface_id
1332                         = OPA_MAKE_ID(be32_to_cpu(pri_path->opa.dlid));
1333         }
1334         if (pri_path->hop_limit <= 1) {
1335                 req_msg->primary_local_lid = pri_ext ? 0 :
1336                         htons(ntohl(sa_path_get_slid(pri_path)));
1337                 req_msg->primary_remote_lid = pri_ext ? 0 :
1338                         htons(ntohl(sa_path_get_dlid(pri_path)));
1339         } else {
1340                 /* Work-around until there's a way to obtain remote LID info */
1341                 req_msg->primary_local_lid = IB_LID_PERMISSIVE;
1342                 req_msg->primary_remote_lid = IB_LID_PERMISSIVE;
1343         }
1344         cm_req_set_primary_flow_label(req_msg, pri_path->flow_label);
1345         cm_req_set_primary_packet_rate(req_msg, pri_path->rate);
1346         req_msg->primary_traffic_class = pri_path->traffic_class;
1347         req_msg->primary_hop_limit = pri_path->hop_limit;
1348         cm_req_set_primary_sl(req_msg, pri_path->sl);
1349         cm_req_set_primary_subnet_local(req_msg, (pri_path->hop_limit <= 1));
1350         cm_req_set_primary_local_ack_timeout(req_msg,
1351                 cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
1352                                pri_path->packet_life_time));
1353
1354         if (alt_path) {
1355                 bool alt_ext = false;
1356
1357                 if (alt_path->rec_type == SA_PATH_REC_TYPE_OPA)
1358                         alt_ext = opa_is_extended_lid(alt_path->opa.dlid,
1359                                                       alt_path->opa.slid);
1360
1361                 req_msg->alt_local_gid = alt_path->sgid;
1362                 req_msg->alt_remote_gid = alt_path->dgid;
1363                 if (alt_ext) {
1364                         req_msg->alt_local_gid.global.interface_id
1365                                 = OPA_MAKE_ID(be32_to_cpu(alt_path->opa.slid));
1366                         req_msg->alt_remote_gid.global.interface_id
1367                                 = OPA_MAKE_ID(be32_to_cpu(alt_path->opa.dlid));
1368                 }
1369                 if (alt_path->hop_limit <= 1) {
1370                         req_msg->alt_local_lid = alt_ext ? 0 :
1371                                 htons(ntohl(sa_path_get_slid(alt_path)));
1372                         req_msg->alt_remote_lid = alt_ext ? 0 :
1373                                 htons(ntohl(sa_path_get_dlid(alt_path)));
1374                 } else {
1375                         req_msg->alt_local_lid = IB_LID_PERMISSIVE;
1376                         req_msg->alt_remote_lid = IB_LID_PERMISSIVE;
1377                 }
1378                 cm_req_set_alt_flow_label(req_msg,
1379                                           alt_path->flow_label);
1380                 cm_req_set_alt_packet_rate(req_msg, alt_path->rate);
1381                 req_msg->alt_traffic_class = alt_path->traffic_class;
1382                 req_msg->alt_hop_limit = alt_path->hop_limit;
1383                 cm_req_set_alt_sl(req_msg, alt_path->sl);
1384                 cm_req_set_alt_subnet_local(req_msg, (alt_path->hop_limit <= 1));
1385                 cm_req_set_alt_local_ack_timeout(req_msg,
1386                         cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
1387                                        alt_path->packet_life_time));
1388         }
1389
1390         if (param->private_data && param->private_data_len)
1391                 memcpy(req_msg->private_data, param->private_data,
1392                        param->private_data_len);
1393 }
1394
1395 static int cm_validate_req_param(struct ib_cm_req_param *param)
1396 {
1397         /* peer-to-peer not supported */
1398         if (param->peer_to_peer)
1399                 return -EINVAL;
1400
1401         if (!param->primary_path)
1402                 return -EINVAL;
1403
1404         if (param->qp_type != IB_QPT_RC && param->qp_type != IB_QPT_UC &&
1405             param->qp_type != IB_QPT_XRC_INI)
1406                 return -EINVAL;
1407
1408         if (param->private_data &&
1409             param->private_data_len > IB_CM_REQ_PRIVATE_DATA_SIZE)
1410                 return -EINVAL;
1411
1412         if (param->alternate_path &&
1413             (param->alternate_path->pkey != param->primary_path->pkey ||
1414              param->alternate_path->mtu != param->primary_path->mtu))
1415                 return -EINVAL;
1416
1417         return 0;
1418 }
1419
1420 int ib_send_cm_req(struct ib_cm_id *cm_id,
1421                    struct ib_cm_req_param *param)
1422 {
1423         struct cm_id_private *cm_id_priv;
1424         struct cm_req_msg *req_msg;
1425         unsigned long flags;
1426         int ret;
1427
1428         ret = cm_validate_req_param(param);
1429         if (ret)
1430                 return ret;
1431
1432         /* Verify that we're not in timewait. */
1433         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1434         spin_lock_irqsave(&cm_id_priv->lock, flags);
1435         if (cm_id->state != IB_CM_IDLE || WARN_ON(cm_id_priv->timewait_info)) {
1436                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1437                 ret = -EINVAL;
1438                 goto out;
1439         }
1440         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1441
1442         cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv->
1443                                                             id.local_id);
1444         if (IS_ERR(cm_id_priv->timewait_info)) {
1445                 ret = PTR_ERR(cm_id_priv->timewait_info);
1446                 cm_id_priv->timewait_info = NULL;
1447                 goto out;
1448         }
1449
1450         ret = cm_init_av_by_path(param->primary_path,
1451                                  param->ppath_sgid_attr, &cm_id_priv->av,
1452                                  cm_id_priv);
1453         if (ret)
1454                 goto out;
1455         if (param->alternate_path) {
1456                 ret = cm_init_av_by_path(param->alternate_path, NULL,
1457                                          &cm_id_priv->alt_av, cm_id_priv);
1458                 if (ret)
1459                         goto out;
1460         }
1461         cm_id->service_id = param->service_id;
1462         cm_id->service_mask = ~cpu_to_be64(0);
1463         cm_id_priv->timeout_ms = cm_convert_to_ms(
1464                                     param->primary_path->packet_life_time) * 2 +
1465                                  cm_convert_to_ms(
1466                                     param->remote_cm_response_timeout);
1467         cm_id_priv->max_cm_retries = param->max_cm_retries;
1468         cm_id_priv->initiator_depth = param->initiator_depth;
1469         cm_id_priv->responder_resources = param->responder_resources;
1470         cm_id_priv->retry_count = param->retry_count;
1471         cm_id_priv->path_mtu = param->primary_path->mtu;
1472         cm_id_priv->pkey = param->primary_path->pkey;
1473         cm_id_priv->qp_type = param->qp_type;
1474
1475         ret = cm_alloc_msg(cm_id_priv, &cm_id_priv->msg);
1476         if (ret)
1477                 goto out;
1478
1479         req_msg = (struct cm_req_msg *) cm_id_priv->msg->mad;
1480         cm_format_req(req_msg, cm_id_priv, param);
1481         cm_id_priv->tid = req_msg->hdr.tid;
1482         cm_id_priv->msg->timeout_ms = cm_id_priv->timeout_ms;
1483         cm_id_priv->msg->context[1] = (void *) (unsigned long) IB_CM_REQ_SENT;
1484
1485         cm_id_priv->local_qpn = cm_req_get_local_qpn(req_msg);
1486         cm_id_priv->rq_psn = cm_req_get_starting_psn(req_msg);
1487
1488         spin_lock_irqsave(&cm_id_priv->lock, flags);
1489         ret = ib_post_send_mad(cm_id_priv->msg, NULL);
1490         if (ret) {
1491                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1492                 goto error2;
1493         }
1494         BUG_ON(cm_id->state != IB_CM_IDLE);
1495         cm_id->state = IB_CM_REQ_SENT;
1496         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1497         return 0;
1498
1499 error2: cm_free_msg(cm_id_priv->msg);
1500 out:    return ret;
1501 }
1502 EXPORT_SYMBOL(ib_send_cm_req);
1503
1504 static int cm_issue_rej(struct cm_port *port,
1505                         struct ib_mad_recv_wc *mad_recv_wc,
1506                         enum ib_cm_rej_reason reason,
1507                         enum cm_msg_response msg_rejected,
1508                         void *ari, u8 ari_length)
1509 {
1510         struct ib_mad_send_buf *msg = NULL;
1511         struct cm_rej_msg *rej_msg, *rcv_msg;
1512         int ret;
1513
1514         ret = cm_alloc_response_msg(port, mad_recv_wc, &msg);
1515         if (ret)
1516                 return ret;
1517
1518         /* We just need common CM header information.  Cast to any message. */
1519         rcv_msg = (struct cm_rej_msg *) mad_recv_wc->recv_buf.mad;
1520         rej_msg = (struct cm_rej_msg *) msg->mad;
1521
1522         cm_format_mad_hdr(&rej_msg->hdr, CM_REJ_ATTR_ID, rcv_msg->hdr.tid);
1523         rej_msg->remote_comm_id = rcv_msg->local_comm_id;
1524         rej_msg->local_comm_id = rcv_msg->remote_comm_id;
1525         cm_rej_set_msg_rejected(rej_msg, msg_rejected);
1526         rej_msg->reason = cpu_to_be16(reason);
1527
1528         if (ari && ari_length) {
1529                 cm_rej_set_reject_info_len(rej_msg, ari_length);
1530                 memcpy(rej_msg->ari, ari, ari_length);
1531         }
1532
1533         ret = ib_post_send_mad(msg, NULL);
1534         if (ret)
1535                 cm_free_msg(msg);
1536
1537         return ret;
1538 }
1539
1540 static inline int cm_is_active_peer(__be64 local_ca_guid, __be64 remote_ca_guid,
1541                                     __be32 local_qpn, __be32 remote_qpn)
1542 {
1543         return (be64_to_cpu(local_ca_guid) > be64_to_cpu(remote_ca_guid) ||
1544                 ((local_ca_guid == remote_ca_guid) &&
1545                  (be32_to_cpu(local_qpn) > be32_to_cpu(remote_qpn))));
1546 }
1547
1548 static bool cm_req_has_alt_path(struct cm_req_msg *req_msg)
1549 {
1550         return ((req_msg->alt_local_lid) ||
1551                 (ib_is_opa_gid(&req_msg->alt_local_gid)));
1552 }
1553
1554 static void cm_path_set_rec_type(struct ib_device *ib_device, u8 port_num,
1555                                  struct sa_path_rec *path, union ib_gid *gid)
1556 {
1557         if (ib_is_opa_gid(gid) && rdma_cap_opa_ah(ib_device, port_num))
1558                 path->rec_type = SA_PATH_REC_TYPE_OPA;
1559         else
1560                 path->rec_type = SA_PATH_REC_TYPE_IB;
1561 }
1562
1563 static void cm_format_path_lid_from_req(struct cm_req_msg *req_msg,
1564                                         struct sa_path_rec *primary_path,
1565                                         struct sa_path_rec *alt_path)
1566 {
1567         u32 lid;
1568
1569         if (primary_path->rec_type != SA_PATH_REC_TYPE_OPA) {
1570                 sa_path_set_dlid(primary_path,
1571                                  ntohs(req_msg->primary_local_lid));
1572                 sa_path_set_slid(primary_path,
1573                                  ntohs(req_msg->primary_remote_lid));
1574         } else {
1575                 lid = opa_get_lid_from_gid(&req_msg->primary_local_gid);
1576                 sa_path_set_dlid(primary_path, lid);
1577
1578                 lid = opa_get_lid_from_gid(&req_msg->primary_remote_gid);
1579                 sa_path_set_slid(primary_path, lid);
1580         }
1581
1582         if (!cm_req_has_alt_path(req_msg))
1583                 return;
1584
1585         if (alt_path->rec_type != SA_PATH_REC_TYPE_OPA) {
1586                 sa_path_set_dlid(alt_path, ntohs(req_msg->alt_local_lid));
1587                 sa_path_set_slid(alt_path, ntohs(req_msg->alt_remote_lid));
1588         } else {
1589                 lid = opa_get_lid_from_gid(&req_msg->alt_local_gid);
1590                 sa_path_set_dlid(alt_path, lid);
1591
1592                 lid = opa_get_lid_from_gid(&req_msg->alt_remote_gid);
1593                 sa_path_set_slid(alt_path, lid);
1594         }
1595 }
1596
1597 static void cm_format_paths_from_req(struct cm_req_msg *req_msg,
1598                                      struct sa_path_rec *primary_path,
1599                                      struct sa_path_rec *alt_path)
1600 {
1601         primary_path->dgid = req_msg->primary_local_gid;
1602         primary_path->sgid = req_msg->primary_remote_gid;
1603         primary_path->flow_label = cm_req_get_primary_flow_label(req_msg);
1604         primary_path->hop_limit = req_msg->primary_hop_limit;
1605         primary_path->traffic_class = req_msg->primary_traffic_class;
1606         primary_path->reversible = 1;
1607         primary_path->pkey = req_msg->pkey;
1608         primary_path->sl = cm_req_get_primary_sl(req_msg);
1609         primary_path->mtu_selector = IB_SA_EQ;
1610         primary_path->mtu = cm_req_get_path_mtu(req_msg);
1611         primary_path->rate_selector = IB_SA_EQ;
1612         primary_path->rate = cm_req_get_primary_packet_rate(req_msg);
1613         primary_path->packet_life_time_selector = IB_SA_EQ;
1614         primary_path->packet_life_time =
1615                 cm_req_get_primary_local_ack_timeout(req_msg);
1616         primary_path->packet_life_time -= (primary_path->packet_life_time > 0);
1617         primary_path->service_id = req_msg->service_id;
1618         if (sa_path_is_roce(primary_path))
1619                 primary_path->roce.route_resolved = false;
1620
1621         if (cm_req_has_alt_path(req_msg)) {
1622                 alt_path->dgid = req_msg->alt_local_gid;
1623                 alt_path->sgid = req_msg->alt_remote_gid;
1624                 alt_path->flow_label = cm_req_get_alt_flow_label(req_msg);
1625                 alt_path->hop_limit = req_msg->alt_hop_limit;
1626                 alt_path->traffic_class = req_msg->alt_traffic_class;
1627                 alt_path->reversible = 1;
1628                 alt_path->pkey = req_msg->pkey;
1629                 alt_path->sl = cm_req_get_alt_sl(req_msg);
1630                 alt_path->mtu_selector = IB_SA_EQ;
1631                 alt_path->mtu = cm_req_get_path_mtu(req_msg);
1632                 alt_path->rate_selector = IB_SA_EQ;
1633                 alt_path->rate = cm_req_get_alt_packet_rate(req_msg);
1634                 alt_path->packet_life_time_selector = IB_SA_EQ;
1635                 alt_path->packet_life_time =
1636                         cm_req_get_alt_local_ack_timeout(req_msg);
1637                 alt_path->packet_life_time -= (alt_path->packet_life_time > 0);
1638                 alt_path->service_id = req_msg->service_id;
1639
1640                 if (sa_path_is_roce(alt_path))
1641                         alt_path->roce.route_resolved = false;
1642         }
1643         cm_format_path_lid_from_req(req_msg, primary_path, alt_path);
1644 }
1645
1646 static u16 cm_get_bth_pkey(struct cm_work *work)
1647 {
1648         struct ib_device *ib_dev = work->port->cm_dev->ib_device;
1649         u8 port_num = work->port->port_num;
1650         u16 pkey_index = work->mad_recv_wc->wc->pkey_index;
1651         u16 pkey;
1652         int ret;
1653
1654         ret = ib_get_cached_pkey(ib_dev, port_num, pkey_index, &pkey);
1655         if (ret) {
1656                 dev_warn_ratelimited(&ib_dev->dev, "ib_cm: Couldn't retrieve pkey for incoming request (port %d, pkey index %d). %d\n",
1657                                      port_num, pkey_index, ret);
1658                 return 0;
1659         }
1660
1661         return pkey;
1662 }
1663
1664 /**
1665  * Convert OPA SGID to IB SGID
1666  * ULPs (such as IPoIB) do not understand OPA GIDs and will
1667  * reject them as the local_gid will not match the sgid. Therefore,
1668  * change the pathrec's SGID to an IB SGID.
1669  *
1670  * @work: Work completion
1671  * @path: Path record
1672  */
1673 static void cm_opa_to_ib_sgid(struct cm_work *work,
1674                               struct sa_path_rec *path)
1675 {
1676         struct ib_device *dev = work->port->cm_dev->ib_device;
1677         u8 port_num = work->port->port_num;
1678
1679         if (rdma_cap_opa_ah(dev, port_num) &&
1680             (ib_is_opa_gid(&path->sgid))) {
1681                 union ib_gid sgid;
1682
1683                 if (rdma_query_gid(dev, port_num, 0, &sgid)) {
1684                         dev_warn(&dev->dev,
1685                                  "Error updating sgid in CM request\n");
1686                         return;
1687                 }
1688
1689                 path->sgid = sgid;
1690         }
1691 }
1692
1693 static void cm_format_req_event(struct cm_work *work,
1694                                 struct cm_id_private *cm_id_priv,
1695                                 struct ib_cm_id *listen_id)
1696 {
1697         struct cm_req_msg *req_msg;
1698         struct ib_cm_req_event_param *param;
1699
1700         req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
1701         param = &work->cm_event.param.req_rcvd;
1702         param->listen_id = listen_id;
1703         param->bth_pkey = cm_get_bth_pkey(work);
1704         param->port = cm_id_priv->av.port->port_num;
1705         param->primary_path = &work->path[0];
1706         cm_opa_to_ib_sgid(work, param->primary_path);
1707         if (cm_req_has_alt_path(req_msg)) {
1708                 param->alternate_path = &work->path[1];
1709                 cm_opa_to_ib_sgid(work, param->alternate_path);
1710         } else {
1711                 param->alternate_path = NULL;
1712         }
1713         param->remote_ca_guid = req_msg->local_ca_guid;
1714         param->remote_qkey = be32_to_cpu(req_msg->local_qkey);
1715         param->remote_qpn = be32_to_cpu(cm_req_get_local_qpn(req_msg));
1716         param->qp_type = cm_req_get_qp_type(req_msg);
1717         param->starting_psn = be32_to_cpu(cm_req_get_starting_psn(req_msg));
1718         param->responder_resources = cm_req_get_init_depth(req_msg);
1719         param->initiator_depth = cm_req_get_resp_res(req_msg);
1720         param->local_cm_response_timeout =
1721                                         cm_req_get_remote_resp_timeout(req_msg);
1722         param->flow_control = cm_req_get_flow_ctrl(req_msg);
1723         param->remote_cm_response_timeout =
1724                                         cm_req_get_local_resp_timeout(req_msg);
1725         param->retry_count = cm_req_get_retry_count(req_msg);
1726         param->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg);
1727         param->srq = cm_req_get_srq(req_msg);
1728         param->ppath_sgid_attr = cm_id_priv->av.ah_attr.grh.sgid_attr;
1729         work->cm_event.private_data = &req_msg->private_data;
1730 }
1731
1732 static void cm_process_work(struct cm_id_private *cm_id_priv,
1733                             struct cm_work *work)
1734 {
1735         int ret;
1736
1737         /* We will typically only have the current event to report. */
1738         ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &work->cm_event);
1739         cm_free_work(work);
1740
1741         while (!ret && !atomic_add_negative(-1, &cm_id_priv->work_count)) {
1742                 spin_lock_irq(&cm_id_priv->lock);
1743                 work = cm_dequeue_work(cm_id_priv);
1744                 spin_unlock_irq(&cm_id_priv->lock);
1745                 if (!work)
1746                         return;
1747
1748                 ret = cm_id_priv->id.cm_handler(&cm_id_priv->id,
1749                                                 &work->cm_event);
1750                 cm_free_work(work);
1751         }
1752         cm_deref_id(cm_id_priv);
1753         if (ret)
1754                 cm_destroy_id(&cm_id_priv->id, ret);
1755 }
1756
1757 static void cm_format_mra(struct cm_mra_msg *mra_msg,
1758                           struct cm_id_private *cm_id_priv,
1759                           enum cm_msg_response msg_mraed, u8 service_timeout,
1760                           const void *private_data, u8 private_data_len)
1761 {
1762         cm_format_mad_hdr(&mra_msg->hdr, CM_MRA_ATTR_ID, cm_id_priv->tid);
1763         cm_mra_set_msg_mraed(mra_msg, msg_mraed);
1764         mra_msg->local_comm_id = cm_id_priv->id.local_id;
1765         mra_msg->remote_comm_id = cm_id_priv->id.remote_id;
1766         cm_mra_set_service_timeout(mra_msg, service_timeout);
1767
1768         if (private_data && private_data_len)
1769                 memcpy(mra_msg->private_data, private_data, private_data_len);
1770 }
1771
1772 static void cm_format_rej(struct cm_rej_msg *rej_msg,
1773                           struct cm_id_private *cm_id_priv,
1774                           enum ib_cm_rej_reason reason,
1775                           void *ari,
1776                           u8 ari_length,
1777                           const void *private_data,
1778                           u8 private_data_len)
1779 {
1780         cm_format_mad_hdr(&rej_msg->hdr, CM_REJ_ATTR_ID, cm_id_priv->tid);
1781         rej_msg->remote_comm_id = cm_id_priv->id.remote_id;
1782
1783         switch(cm_id_priv->id.state) {
1784         case IB_CM_REQ_RCVD:
1785                 rej_msg->local_comm_id = 0;
1786                 cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REQ);
1787                 break;
1788         case IB_CM_MRA_REQ_SENT:
1789                 rej_msg->local_comm_id = cm_id_priv->id.local_id;
1790                 cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REQ);
1791                 break;
1792         case IB_CM_REP_RCVD:
1793         case IB_CM_MRA_REP_SENT:
1794                 rej_msg->local_comm_id = cm_id_priv->id.local_id;
1795                 cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REP);
1796                 break;
1797         default:
1798                 rej_msg->local_comm_id = cm_id_priv->id.local_id;
1799                 cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_OTHER);
1800                 break;
1801         }
1802
1803         rej_msg->reason = cpu_to_be16(reason);
1804         if (ari && ari_length) {
1805                 cm_rej_set_reject_info_len(rej_msg, ari_length);
1806                 memcpy(rej_msg->ari, ari, ari_length);
1807         }
1808
1809         if (private_data && private_data_len)
1810                 memcpy(rej_msg->private_data, private_data, private_data_len);
1811 }
1812
1813 static void cm_dup_req_handler(struct cm_work *work,
1814                                struct cm_id_private *cm_id_priv)
1815 {
1816         struct ib_mad_send_buf *msg = NULL;
1817         int ret;
1818
1819         atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
1820                         counter[CM_REQ_COUNTER]);
1821
1822         /* Quick state check to discard duplicate REQs. */
1823         if (cm_id_priv->id.state == IB_CM_REQ_RCVD)
1824                 return;
1825
1826         ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg);
1827         if (ret)
1828                 return;
1829
1830         spin_lock_irq(&cm_id_priv->lock);
1831         switch (cm_id_priv->id.state) {
1832         case IB_CM_MRA_REQ_SENT:
1833                 cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
1834                               CM_MSG_RESPONSE_REQ, cm_id_priv->service_timeout,
1835                               cm_id_priv->private_data,
1836                               cm_id_priv->private_data_len);
1837                 break;
1838         case IB_CM_TIMEWAIT:
1839                 cm_format_rej((struct cm_rej_msg *) msg->mad, cm_id_priv,
1840                               IB_CM_REJ_STALE_CONN, NULL, 0, NULL, 0);
1841                 break;
1842         default:
1843                 goto unlock;
1844         }
1845         spin_unlock_irq(&cm_id_priv->lock);
1846
1847         ret = ib_post_send_mad(msg, NULL);
1848         if (ret)
1849                 goto free;
1850         return;
1851
1852 unlock: spin_unlock_irq(&cm_id_priv->lock);
1853 free:   cm_free_msg(msg);
1854 }
1855
1856 static struct cm_id_private * cm_match_req(struct cm_work *work,
1857                                            struct cm_id_private *cm_id_priv)
1858 {
1859         struct cm_id_private *listen_cm_id_priv, *cur_cm_id_priv;
1860         struct cm_timewait_info *timewait_info;
1861         struct cm_req_msg *req_msg;
1862         struct ib_cm_id *cm_id;
1863
1864         req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
1865
1866         /* Check for possible duplicate REQ. */
1867         spin_lock_irq(&cm.lock);
1868         timewait_info = cm_insert_remote_id(cm_id_priv->timewait_info);
1869         if (timewait_info) {
1870                 cur_cm_id_priv = cm_get_id(timewait_info->work.local_id,
1871                                            timewait_info->work.remote_id);
1872                 spin_unlock_irq(&cm.lock);
1873                 if (cur_cm_id_priv) {
1874                         cm_dup_req_handler(work, cur_cm_id_priv);
1875                         cm_deref_id(cur_cm_id_priv);
1876                 }
1877                 return NULL;
1878         }
1879
1880         /* Check for stale connections. */
1881         timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info);
1882         if (timewait_info) {
1883                 cm_cleanup_timewait(cm_id_priv->timewait_info);
1884                 cur_cm_id_priv = cm_get_id(timewait_info->work.local_id,
1885                                            timewait_info->work.remote_id);
1886
1887                 spin_unlock_irq(&cm.lock);
1888                 cm_issue_rej(work->port, work->mad_recv_wc,
1889                              IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REQ,
1890                              NULL, 0);
1891                 if (cur_cm_id_priv) {
1892                         cm_id = &cur_cm_id_priv->id;
1893                         ib_send_cm_dreq(cm_id, NULL, 0);
1894                         cm_deref_id(cur_cm_id_priv);
1895                 }
1896                 return NULL;
1897         }
1898
1899         /* Find matching listen request. */
1900         listen_cm_id_priv = cm_find_listen(cm_id_priv->id.device,
1901                                            req_msg->service_id);
1902         if (!listen_cm_id_priv) {
1903                 cm_cleanup_timewait(cm_id_priv->timewait_info);
1904                 spin_unlock_irq(&cm.lock);
1905                 cm_issue_rej(work->port, work->mad_recv_wc,
1906                              IB_CM_REJ_INVALID_SERVICE_ID, CM_MSG_RESPONSE_REQ,
1907                              NULL, 0);
1908                 goto out;
1909         }
1910         atomic_inc(&listen_cm_id_priv->refcount);
1911         atomic_inc(&cm_id_priv->refcount);
1912         cm_id_priv->id.state = IB_CM_REQ_RCVD;
1913         atomic_inc(&cm_id_priv->work_count);
1914         spin_unlock_irq(&cm.lock);
1915 out:
1916         return listen_cm_id_priv;
1917 }
1918
1919 /*
1920  * Work-around for inter-subnet connections.  If the LIDs are permissive,
1921  * we need to override the LID/SL data in the REQ with the LID information
1922  * in the work completion.
1923  */
1924 static void cm_process_routed_req(struct cm_req_msg *req_msg, struct ib_wc *wc)
1925 {
1926         if (!cm_req_get_primary_subnet_local(req_msg)) {
1927                 if (req_msg->primary_local_lid == IB_LID_PERMISSIVE) {
1928                         req_msg->primary_local_lid = ib_lid_be16(wc->slid);
1929                         cm_req_set_primary_sl(req_msg, wc->sl);
1930                 }
1931
1932                 if (req_msg->primary_remote_lid == IB_LID_PERMISSIVE)
1933                         req_msg->primary_remote_lid = cpu_to_be16(wc->dlid_path_bits);
1934         }
1935
1936         if (!cm_req_get_alt_subnet_local(req_msg)) {
1937                 if (req_msg->alt_local_lid == IB_LID_PERMISSIVE) {
1938                         req_msg->alt_local_lid = ib_lid_be16(wc->slid);
1939                         cm_req_set_alt_sl(req_msg, wc->sl);
1940                 }
1941
1942                 if (req_msg->alt_remote_lid == IB_LID_PERMISSIVE)
1943                         req_msg->alt_remote_lid = cpu_to_be16(wc->dlid_path_bits);
1944         }
1945 }
1946
1947 static int cm_req_handler(struct cm_work *work)
1948 {
1949         struct ib_cm_id *cm_id;
1950         struct cm_id_private *cm_id_priv, *listen_cm_id_priv;
1951         struct cm_req_msg *req_msg;
1952         const struct ib_global_route *grh;
1953         const struct ib_gid_attr *gid_attr;
1954         int ret;
1955
1956         req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
1957
1958         cm_id = ib_create_cm_id(work->port->cm_dev->ib_device, NULL, NULL);
1959         if (IS_ERR(cm_id))
1960                 return PTR_ERR(cm_id);
1961
1962         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1963         cm_id_priv->id.remote_id = req_msg->local_comm_id;
1964         ret = cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
1965                                       work->mad_recv_wc->recv_buf.grh,
1966                                       &cm_id_priv->av);
1967         if (ret)
1968                 goto destroy;
1969         cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv->
1970                                                             id.local_id);
1971         if (IS_ERR(cm_id_priv->timewait_info)) {
1972                 ret = PTR_ERR(cm_id_priv->timewait_info);
1973                 cm_id_priv->timewait_info = NULL;
1974                 goto destroy;
1975         }
1976         cm_id_priv->timewait_info->work.remote_id = req_msg->local_comm_id;
1977         cm_id_priv->timewait_info->remote_ca_guid = req_msg->local_ca_guid;
1978         cm_id_priv->timewait_info->remote_qpn = cm_req_get_local_qpn(req_msg);
1979
1980         listen_cm_id_priv = cm_match_req(work, cm_id_priv);
1981         if (!listen_cm_id_priv) {
1982                 pr_debug("%s: local_id %d, no listen_cm_id_priv\n", __func__,
1983                          be32_to_cpu(cm_id->local_id));
1984                 ret = -EINVAL;
1985                 goto destroy;
1986         }
1987
1988         cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler;
1989         cm_id_priv->id.context = listen_cm_id_priv->id.context;
1990         cm_id_priv->id.service_id = req_msg->service_id;
1991         cm_id_priv->id.service_mask = ~cpu_to_be64(0);
1992
1993         cm_process_routed_req(req_msg, work->mad_recv_wc->wc);
1994
1995         memset(&work->path[0], 0, sizeof(work->path[0]));
1996         if (cm_req_has_alt_path(req_msg))
1997                 memset(&work->path[1], 0, sizeof(work->path[1]));
1998         grh = rdma_ah_read_grh(&cm_id_priv->av.ah_attr);
1999         gid_attr = grh->sgid_attr;
2000
2001         if (gid_attr && gid_attr->ndev) {
2002                 work->path[0].rec_type =
2003                         sa_conv_gid_to_pathrec_type(gid_attr->gid_type);
2004         } else {
2005                 /* If no GID attribute or ndev is null, it is not RoCE. */
2006                 cm_path_set_rec_type(work->port->cm_dev->ib_device,
2007                                      work->port->port_num,
2008                                      &work->path[0],
2009                                      &req_msg->primary_local_gid);
2010         }
2011         if (cm_req_has_alt_path(req_msg))
2012                 work->path[1].rec_type = work->path[0].rec_type;
2013         cm_format_paths_from_req(req_msg, &work->path[0],
2014                                  &work->path[1]);
2015         if (cm_id_priv->av.ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE)
2016                 sa_path_set_dmac(&work->path[0],
2017                                  cm_id_priv->av.ah_attr.roce.dmac);
2018         work->path[0].hop_limit = grh->hop_limit;
2019         ret = cm_init_av_by_path(&work->path[0], gid_attr, &cm_id_priv->av,
2020                                  cm_id_priv);
2021         if (ret) {
2022                 int err;
2023
2024                 err = rdma_query_gid(work->port->cm_dev->ib_device,
2025                                      work->port->port_num, 0,
2026                                      &work->path[0].sgid);
2027                 if (err)
2028                         ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
2029                                        NULL, 0, NULL, 0);
2030                 else
2031                         ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
2032                                        &work->path[0].sgid,
2033                                        sizeof(work->path[0].sgid),
2034                                        NULL, 0);
2035                 goto rejected;
2036         }
2037         if (cm_req_has_alt_path(req_msg)) {
2038                 ret = cm_init_av_by_path(&work->path[1], NULL,
2039                                          &cm_id_priv->alt_av, cm_id_priv);
2040                 if (ret) {
2041                         ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_ALT_GID,
2042                                        &work->path[0].sgid,
2043                                        sizeof(work->path[0].sgid), NULL, 0);
2044                         goto rejected;
2045                 }
2046         }
2047         cm_id_priv->tid = req_msg->hdr.tid;
2048         cm_id_priv->timeout_ms = cm_convert_to_ms(
2049                                         cm_req_get_local_resp_timeout(req_msg));
2050         cm_id_priv->max_cm_retries = cm_req_get_max_cm_retries(req_msg);
2051         cm_id_priv->remote_qpn = cm_req_get_local_qpn(req_msg);
2052         cm_id_priv->initiator_depth = cm_req_get_resp_res(req_msg);
2053         cm_id_priv->responder_resources = cm_req_get_init_depth(req_msg);
2054         cm_id_priv->path_mtu = cm_req_get_path_mtu(req_msg);
2055         cm_id_priv->pkey = req_msg->pkey;
2056         cm_id_priv->sq_psn = cm_req_get_starting_psn(req_msg);
2057         cm_id_priv->retry_count = cm_req_get_retry_count(req_msg);
2058         cm_id_priv->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg);
2059         cm_id_priv->qp_type = cm_req_get_qp_type(req_msg);
2060
2061         cm_format_req_event(work, cm_id_priv, &listen_cm_id_priv->id);
2062         cm_process_work(cm_id_priv, work);
2063         cm_deref_id(listen_cm_id_priv);
2064         return 0;
2065
2066 rejected:
2067         atomic_dec(&cm_id_priv->refcount);
2068         cm_deref_id(listen_cm_id_priv);
2069 destroy:
2070         ib_destroy_cm_id(cm_id);
2071         return ret;
2072 }
2073
2074 static void cm_format_rep(struct cm_rep_msg *rep_msg,
2075                           struct cm_id_private *cm_id_priv,
2076                           struct ib_cm_rep_param *param)
2077 {
2078         cm_format_mad_hdr(&rep_msg->hdr, CM_REP_ATTR_ID, cm_id_priv->tid);
2079         rep_msg->local_comm_id = cm_id_priv->id.local_id;
2080         rep_msg->remote_comm_id = cm_id_priv->id.remote_id;
2081         cm_rep_set_starting_psn(rep_msg, cpu_to_be32(param->starting_psn));
2082         rep_msg->resp_resources = param->responder_resources;
2083         cm_rep_set_target_ack_delay(rep_msg,
2084                                     cm_id_priv->av.port->cm_dev->ack_delay);
2085         cm_rep_set_failover(rep_msg, param->failover_accepted);
2086         cm_rep_set_rnr_retry_count(rep_msg, param->rnr_retry_count);
2087         rep_msg->local_ca_guid = cm_id_priv->id.device->node_guid;
2088
2089         if (cm_id_priv->qp_type != IB_QPT_XRC_TGT) {
2090                 rep_msg->initiator_depth = param->initiator_depth;
2091                 cm_rep_set_flow_ctrl(rep_msg, param->flow_control);
2092                 cm_rep_set_srq(rep_msg, param->srq);
2093                 cm_rep_set_local_qpn(rep_msg, cpu_to_be32(param->qp_num));
2094         } else {
2095                 cm_rep_set_srq(rep_msg, 1);
2096                 cm_rep_set_local_eecn(rep_msg, cpu_to_be32(param->qp_num));
2097         }
2098
2099         if (param->private_data && param->private_data_len)
2100                 memcpy(rep_msg->private_data, param->private_data,
2101                        param->private_data_len);
2102 }
2103
2104 int ib_send_cm_rep(struct ib_cm_id *cm_id,
2105                    struct ib_cm_rep_param *param)
2106 {
2107         struct cm_id_private *cm_id_priv;
2108         struct ib_mad_send_buf *msg;
2109         struct cm_rep_msg *rep_msg;
2110         unsigned long flags;
2111         int ret;
2112
2113         if (param->private_data &&
2114             param->private_data_len > IB_CM_REP_PRIVATE_DATA_SIZE)
2115                 return -EINVAL;
2116
2117         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2118         spin_lock_irqsave(&cm_id_priv->lock, flags);
2119         if (cm_id->state != IB_CM_REQ_RCVD &&
2120             cm_id->state != IB_CM_MRA_REQ_SENT) {
2121                 pr_debug("%s: local_comm_id %d, cm_id->state: %d\n", __func__,
2122                          be32_to_cpu(cm_id_priv->id.local_id), cm_id->state);
2123                 ret = -EINVAL;
2124                 goto out;
2125         }
2126
2127         ret = cm_alloc_msg(cm_id_priv, &msg);
2128         if (ret)
2129                 goto out;
2130
2131         rep_msg = (struct cm_rep_msg *) msg->mad;
2132         cm_format_rep(rep_msg, cm_id_priv, param);
2133         msg->timeout_ms = cm_id_priv->timeout_ms;
2134         msg->context[1] = (void *) (unsigned long) IB_CM_REP_SENT;
2135
2136         ret = ib_post_send_mad(msg, NULL);
2137         if (ret) {
2138                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2139                 cm_free_msg(msg);
2140                 return ret;
2141         }
2142
2143         cm_id->state = IB_CM_REP_SENT;
2144         cm_id_priv->msg = msg;
2145         cm_id_priv->initiator_depth = param->initiator_depth;
2146         cm_id_priv->responder_resources = param->responder_resources;
2147         cm_id_priv->rq_psn = cm_rep_get_starting_psn(rep_msg);
2148         cm_id_priv->local_qpn = cpu_to_be32(param->qp_num & 0xFFFFFF);
2149
2150 out:    spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2151         return ret;
2152 }
2153 EXPORT_SYMBOL(ib_send_cm_rep);
2154
2155 static void cm_format_rtu(struct cm_rtu_msg *rtu_msg,
2156                           struct cm_id_private *cm_id_priv,
2157                           const void *private_data,
2158                           u8 private_data_len)
2159 {
2160         cm_format_mad_hdr(&rtu_msg->hdr, CM_RTU_ATTR_ID, cm_id_priv->tid);
2161         rtu_msg->local_comm_id = cm_id_priv->id.local_id;
2162         rtu_msg->remote_comm_id = cm_id_priv->id.remote_id;
2163
2164         if (private_data && private_data_len)
2165                 memcpy(rtu_msg->private_data, private_data, private_data_len);
2166 }
2167
2168 int ib_send_cm_rtu(struct ib_cm_id *cm_id,
2169                    const void *private_data,
2170                    u8 private_data_len)
2171 {
2172         struct cm_id_private *cm_id_priv;
2173         struct ib_mad_send_buf *msg;
2174         unsigned long flags;
2175         void *data;
2176         int ret;
2177
2178         if (private_data && private_data_len > IB_CM_RTU_PRIVATE_DATA_SIZE)
2179                 return -EINVAL;
2180
2181         data = cm_copy_private_data(private_data, private_data_len);
2182         if (IS_ERR(data))
2183                 return PTR_ERR(data);
2184
2185         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2186         spin_lock_irqsave(&cm_id_priv->lock, flags);
2187         if (cm_id->state != IB_CM_REP_RCVD &&
2188             cm_id->state != IB_CM_MRA_REP_SENT) {
2189                 pr_debug("%s: local_id %d, cm_id->state %d\n", __func__,
2190                          be32_to_cpu(cm_id->local_id), cm_id->state);
2191                 ret = -EINVAL;
2192                 goto error;
2193         }
2194
2195         ret = cm_alloc_msg(cm_id_priv, &msg);
2196         if (ret)
2197                 goto error;
2198
2199         cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv,
2200                       private_data, private_data_len);
2201
2202         ret = ib_post_send_mad(msg, NULL);
2203         if (ret) {
2204                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2205                 cm_free_msg(msg);
2206                 kfree(data);
2207                 return ret;
2208         }
2209
2210         cm_id->state = IB_CM_ESTABLISHED;
2211         cm_set_private_data(cm_id_priv, data, private_data_len);
2212         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2213         return 0;
2214
2215 error:  spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2216         kfree(data);
2217         return ret;
2218 }
2219 EXPORT_SYMBOL(ib_send_cm_rtu);
2220
2221 static void cm_format_rep_event(struct cm_work *work, enum ib_qp_type qp_type)
2222 {
2223         struct cm_rep_msg *rep_msg;
2224         struct ib_cm_rep_event_param *param;
2225
2226         rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad;
2227         param = &work->cm_event.param.rep_rcvd;
2228         param->remote_ca_guid = rep_msg->local_ca_guid;
2229         param->remote_qkey = be32_to_cpu(rep_msg->local_qkey);
2230         param->remote_qpn = be32_to_cpu(cm_rep_get_qpn(rep_msg, qp_type));
2231         param->starting_psn = be32_to_cpu(cm_rep_get_starting_psn(rep_msg));
2232         param->responder_resources = rep_msg->initiator_depth;
2233         param->initiator_depth = rep_msg->resp_resources;
2234         param->target_ack_delay = cm_rep_get_target_ack_delay(rep_msg);
2235         param->failover_accepted = cm_rep_get_failover(rep_msg);
2236         param->flow_control = cm_rep_get_flow_ctrl(rep_msg);
2237         param->rnr_retry_count = cm_rep_get_rnr_retry_count(rep_msg);
2238         param->srq = cm_rep_get_srq(rep_msg);
2239         work->cm_event.private_data = &rep_msg->private_data;
2240 }
2241
2242 static void cm_dup_rep_handler(struct cm_work *work)
2243 {
2244         struct cm_id_private *cm_id_priv;
2245         struct cm_rep_msg *rep_msg;
2246         struct ib_mad_send_buf *msg = NULL;
2247         int ret;
2248
2249         rep_msg = (struct cm_rep_msg *) work->mad_recv_wc->recv_buf.mad;
2250         cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id,
2251                                    rep_msg->local_comm_id);
2252         if (!cm_id_priv)
2253                 return;
2254
2255         atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2256                         counter[CM_REP_COUNTER]);
2257         ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg);
2258         if (ret)
2259                 goto deref;
2260
2261         spin_lock_irq(&cm_id_priv->lock);
2262         if (cm_id_priv->id.state == IB_CM_ESTABLISHED)
2263                 cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv,
2264                               cm_id_priv->private_data,
2265                               cm_id_priv->private_data_len);
2266         else if (cm_id_priv->id.state == IB_CM_MRA_REP_SENT)
2267                 cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
2268                               CM_MSG_RESPONSE_REP, cm_id_priv->service_timeout,
2269                               cm_id_priv->private_data,
2270                               cm_id_priv->private_data_len);
2271         else
2272                 goto unlock;
2273         spin_unlock_irq(&cm_id_priv->lock);
2274
2275         ret = ib_post_send_mad(msg, NULL);
2276         if (ret)
2277                 goto free;
2278         goto deref;
2279
2280 unlock: spin_unlock_irq(&cm_id_priv->lock);
2281 free:   cm_free_msg(msg);
2282 deref:  cm_deref_id(cm_id_priv);
2283 }
2284
2285 static int cm_rep_handler(struct cm_work *work)
2286 {
2287         struct cm_id_private *cm_id_priv;
2288         struct cm_rep_msg *rep_msg;
2289         int ret;
2290         struct cm_id_private *cur_cm_id_priv;
2291         struct ib_cm_id *cm_id;
2292         struct cm_timewait_info *timewait_info;
2293
2294         rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad;
2295         cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id, 0);
2296         if (!cm_id_priv) {
2297                 cm_dup_rep_handler(work);
2298                 pr_debug("%s: remote_comm_id %d, no cm_id_priv\n", __func__,
2299                          be32_to_cpu(rep_msg->remote_comm_id));
2300                 return -EINVAL;
2301         }
2302
2303         cm_format_rep_event(work, cm_id_priv->qp_type);
2304
2305         spin_lock_irq(&cm_id_priv->lock);
2306         switch (cm_id_priv->id.state) {
2307         case IB_CM_REQ_SENT:
2308         case IB_CM_MRA_REQ_RCVD:
2309                 break;
2310         default:
2311                 spin_unlock_irq(&cm_id_priv->lock);
2312                 ret = -EINVAL;
2313                 pr_debug("%s: cm_id_priv->id.state: %d, local_comm_id %d, remote_comm_id %d\n",
2314                          __func__, cm_id_priv->id.state,
2315                          be32_to_cpu(rep_msg->local_comm_id),
2316                          be32_to_cpu(rep_msg->remote_comm_id));
2317                 goto error;
2318         }
2319
2320         cm_id_priv->timewait_info->work.remote_id = rep_msg->local_comm_id;
2321         cm_id_priv->timewait_info->remote_ca_guid = rep_msg->local_ca_guid;
2322         cm_id_priv->timewait_info->remote_qpn = cm_rep_get_qpn(rep_msg, cm_id_priv->qp_type);
2323
2324         spin_lock(&cm.lock);
2325         /* Check for duplicate REP. */
2326         if (cm_insert_remote_id(cm_id_priv->timewait_info)) {
2327                 spin_unlock(&cm.lock);
2328                 spin_unlock_irq(&cm_id_priv->lock);
2329                 ret = -EINVAL;
2330                 pr_debug("%s: Failed to insert remote id %d\n", __func__,
2331                          be32_to_cpu(rep_msg->remote_comm_id));
2332                 goto error;
2333         }
2334         /* Check for a stale connection. */
2335         timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info);
2336         if (timewait_info) {
2337                 rb_erase(&cm_id_priv->timewait_info->remote_id_node,
2338                          &cm.remote_id_table);
2339                 cm_id_priv->timewait_info->inserted_remote_id = 0;
2340                 cur_cm_id_priv = cm_get_id(timewait_info->work.local_id,
2341                                            timewait_info->work.remote_id);
2342
2343                 spin_unlock(&cm.lock);
2344                 spin_unlock_irq(&cm_id_priv->lock);
2345                 cm_issue_rej(work->port, work->mad_recv_wc,
2346                              IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REP,
2347                              NULL, 0);
2348                 ret = -EINVAL;
2349                 pr_debug("%s: Stale connection. local_comm_id %d, remote_comm_id %d\n",
2350                          __func__, be32_to_cpu(rep_msg->local_comm_id),
2351                          be32_to_cpu(rep_msg->remote_comm_id));
2352
2353                 if (cur_cm_id_priv) {
2354                         cm_id = &cur_cm_id_priv->id;
2355                         ib_send_cm_dreq(cm_id, NULL, 0);
2356                         cm_deref_id(cur_cm_id_priv);
2357                 }
2358
2359                 goto error;
2360         }
2361         spin_unlock(&cm.lock);
2362
2363         cm_id_priv->id.state = IB_CM_REP_RCVD;
2364         cm_id_priv->id.remote_id = rep_msg->local_comm_id;
2365         cm_id_priv->remote_qpn = cm_rep_get_qpn(rep_msg, cm_id_priv->qp_type);
2366         cm_id_priv->initiator_depth = rep_msg->resp_resources;
2367         cm_id_priv->responder_resources = rep_msg->initiator_depth;
2368         cm_id_priv->sq_psn = cm_rep_get_starting_psn(rep_msg);
2369         cm_id_priv->rnr_retry_count = cm_rep_get_rnr_retry_count(rep_msg);
2370         cm_id_priv->target_ack_delay = cm_rep_get_target_ack_delay(rep_msg);
2371         cm_id_priv->av.timeout =
2372                         cm_ack_timeout(cm_id_priv->target_ack_delay,
2373                                        cm_id_priv->av.timeout - 1);
2374         cm_id_priv->alt_av.timeout =
2375                         cm_ack_timeout(cm_id_priv->target_ack_delay,
2376                                        cm_id_priv->alt_av.timeout - 1);
2377
2378         /* todo: handle peer_to_peer */
2379
2380         ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2381         ret = atomic_inc_and_test(&cm_id_priv->work_count);
2382         if (!ret)
2383                 list_add_tail(&work->list, &cm_id_priv->work_list);
2384         spin_unlock_irq(&cm_id_priv->lock);
2385
2386         if (ret)
2387                 cm_process_work(cm_id_priv, work);
2388         else
2389                 cm_deref_id(cm_id_priv);
2390         return 0;
2391
2392 error:
2393         cm_deref_id(cm_id_priv);
2394         return ret;
2395 }
2396
2397 static int cm_establish_handler(struct cm_work *work)
2398 {
2399         struct cm_id_private *cm_id_priv;
2400         int ret;
2401
2402         /* See comment in cm_establish about lookup. */
2403         cm_id_priv = cm_acquire_id(work->local_id, work->remote_id);
2404         if (!cm_id_priv)
2405                 return -EINVAL;
2406
2407         spin_lock_irq(&cm_id_priv->lock);
2408         if (cm_id_priv->id.state != IB_CM_ESTABLISHED) {
2409                 spin_unlock_irq(&cm_id_priv->lock);
2410                 goto out;
2411         }
2412
2413         ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2414         ret = atomic_inc_and_test(&cm_id_priv->work_count);
2415         if (!ret)
2416                 list_add_tail(&work->list, &cm_id_priv->work_list);
2417         spin_unlock_irq(&cm_id_priv->lock);
2418
2419         if (ret)
2420                 cm_process_work(cm_id_priv, work);
2421         else
2422                 cm_deref_id(cm_id_priv);
2423         return 0;
2424 out:
2425         cm_deref_id(cm_id_priv);
2426         return -EINVAL;
2427 }
2428
2429 static int cm_rtu_handler(struct cm_work *work)
2430 {
2431         struct cm_id_private *cm_id_priv;
2432         struct cm_rtu_msg *rtu_msg;
2433         int ret;
2434
2435         rtu_msg = (struct cm_rtu_msg *)work->mad_recv_wc->recv_buf.mad;
2436         cm_id_priv = cm_acquire_id(rtu_msg->remote_comm_id,
2437                                    rtu_msg->local_comm_id);
2438         if (!cm_id_priv)
2439                 return -EINVAL;
2440
2441         work->cm_event.private_data = &rtu_msg->private_data;
2442
2443         spin_lock_irq(&cm_id_priv->lock);
2444         if (cm_id_priv->id.state != IB_CM_REP_SENT &&
2445             cm_id_priv->id.state != IB_CM_MRA_REP_RCVD) {
2446                 spin_unlock_irq(&cm_id_priv->lock);
2447                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2448                                 counter[CM_RTU_COUNTER]);
2449                 goto out;
2450         }
2451         cm_id_priv->id.state = IB_CM_ESTABLISHED;
2452
2453         ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2454         ret = atomic_inc_and_test(&cm_id_priv->work_count);
2455         if (!ret)
2456                 list_add_tail(&work->list, &cm_id_priv->work_list);
2457         spin_unlock_irq(&cm_id_priv->lock);
2458
2459         if (ret)
2460                 cm_process_work(cm_id_priv, work);
2461         else
2462                 cm_deref_id(cm_id_priv);
2463         return 0;
2464 out:
2465         cm_deref_id(cm_id_priv);
2466         return -EINVAL;
2467 }
2468
2469 static void cm_format_dreq(struct cm_dreq_msg *dreq_msg,
2470                           struct cm_id_private *cm_id_priv,
2471                           const void *private_data,
2472                           u8 private_data_len)
2473 {
2474         cm_format_mad_hdr(&dreq_msg->hdr, CM_DREQ_ATTR_ID,
2475                           cm_form_tid(cm_id_priv));
2476         dreq_msg->local_comm_id = cm_id_priv->id.local_id;
2477         dreq_msg->remote_comm_id = cm_id_priv->id.remote_id;
2478         cm_dreq_set_remote_qpn(dreq_msg, cm_id_priv->remote_qpn);
2479
2480         if (private_data && private_data_len)
2481                 memcpy(dreq_msg->private_data, private_data, private_data_len);
2482 }
2483
2484 int ib_send_cm_dreq(struct ib_cm_id *cm_id,
2485                     const void *private_data,
2486                     u8 private_data_len)
2487 {
2488         struct cm_id_private *cm_id_priv;
2489         struct ib_mad_send_buf *msg;
2490         unsigned long flags;
2491         int ret;
2492
2493         if (private_data && private_data_len > IB_CM_DREQ_PRIVATE_DATA_SIZE)
2494                 return -EINVAL;
2495
2496         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2497         spin_lock_irqsave(&cm_id_priv->lock, flags);
2498         if (cm_id->state != IB_CM_ESTABLISHED) {
2499                 pr_debug("%s: local_id %d, cm_id->state: %d\n", __func__,
2500                          be32_to_cpu(cm_id->local_id), cm_id->state);
2501                 ret = -EINVAL;
2502                 goto out;
2503         }
2504
2505         if (cm_id->lap_state == IB_CM_LAP_SENT ||
2506             cm_id->lap_state == IB_CM_MRA_LAP_RCVD)
2507                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2508
2509         ret = cm_alloc_msg(cm_id_priv, &msg);
2510         if (ret) {
2511                 cm_enter_timewait(cm_id_priv);
2512                 goto out;
2513         }
2514
2515         cm_format_dreq((struct cm_dreq_msg *) msg->mad, cm_id_priv,
2516                        private_data, private_data_len);
2517         msg->timeout_ms = cm_id_priv->timeout_ms;
2518         msg->context[1] = (void *) (unsigned long) IB_CM_DREQ_SENT;
2519
2520         ret = ib_post_send_mad(msg, NULL);
2521         if (ret) {
2522                 cm_enter_timewait(cm_id_priv);
2523                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2524                 cm_free_msg(msg);
2525                 return ret;
2526         }
2527
2528         cm_id->state = IB_CM_DREQ_SENT;
2529         cm_id_priv->msg = msg;
2530 out:    spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2531         return ret;
2532 }
2533 EXPORT_SYMBOL(ib_send_cm_dreq);
2534
2535 static void cm_format_drep(struct cm_drep_msg *drep_msg,
2536                           struct cm_id_private *cm_id_priv,
2537                           const void *private_data,
2538                           u8 private_data_len)
2539 {
2540         cm_format_mad_hdr(&drep_msg->hdr, CM_DREP_ATTR_ID, cm_id_priv->tid);
2541         drep_msg->local_comm_id = cm_id_priv->id.local_id;
2542         drep_msg->remote_comm_id = cm_id_priv->id.remote_id;
2543
2544         if (private_data && private_data_len)
2545                 memcpy(drep_msg->private_data, private_data, private_data_len);
2546 }
2547
2548 int ib_send_cm_drep(struct ib_cm_id *cm_id,
2549                     const void *private_data,
2550                     u8 private_data_len)
2551 {
2552         struct cm_id_private *cm_id_priv;
2553         struct ib_mad_send_buf *msg;
2554         unsigned long flags;
2555         void *data;
2556         int ret;
2557
2558         if (private_data && private_data_len > IB_CM_DREP_PRIVATE_DATA_SIZE)
2559                 return -EINVAL;
2560
2561         data = cm_copy_private_data(private_data, private_data_len);
2562         if (IS_ERR(data))
2563                 return PTR_ERR(data);
2564
2565         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2566         spin_lock_irqsave(&cm_id_priv->lock, flags);
2567         if (cm_id->state != IB_CM_DREQ_RCVD) {
2568                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2569                 kfree(data);
2570                 pr_debug("%s: local_id %d, cm_idcm_id->state(%d) != IB_CM_DREQ_RCVD\n",
2571                          __func__, be32_to_cpu(cm_id->local_id), cm_id->state);
2572                 return -EINVAL;
2573         }
2574
2575         cm_set_private_data(cm_id_priv, data, private_data_len);
2576         cm_enter_timewait(cm_id_priv);
2577
2578         ret = cm_alloc_msg(cm_id_priv, &msg);
2579         if (ret)
2580                 goto out;
2581
2582         cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv,
2583                        private_data, private_data_len);
2584
2585         ret = ib_post_send_mad(msg, NULL);
2586         if (ret) {
2587                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2588                 cm_free_msg(msg);
2589                 return ret;
2590         }
2591
2592 out:    spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2593         return ret;
2594 }
2595 EXPORT_SYMBOL(ib_send_cm_drep);
2596
2597 static int cm_issue_drep(struct cm_port *port,
2598                          struct ib_mad_recv_wc *mad_recv_wc)
2599 {
2600         struct ib_mad_send_buf *msg = NULL;
2601         struct cm_dreq_msg *dreq_msg;
2602         struct cm_drep_msg *drep_msg;
2603         int ret;
2604
2605         ret = cm_alloc_response_msg(port, mad_recv_wc, &msg);
2606         if (ret)
2607                 return ret;
2608
2609         dreq_msg = (struct cm_dreq_msg *) mad_recv_wc->recv_buf.mad;
2610         drep_msg = (struct cm_drep_msg *) msg->mad;
2611
2612         cm_format_mad_hdr(&drep_msg->hdr, CM_DREP_ATTR_ID, dreq_msg->hdr.tid);
2613         drep_msg->remote_comm_id = dreq_msg->local_comm_id;
2614         drep_msg->local_comm_id = dreq_msg->remote_comm_id;
2615
2616         ret = ib_post_send_mad(msg, NULL);
2617         if (ret)
2618                 cm_free_msg(msg);
2619
2620         return ret;
2621 }
2622
2623 static int cm_dreq_handler(struct cm_work *work)
2624 {
2625         struct cm_id_private *cm_id_priv;
2626         struct cm_dreq_msg *dreq_msg;
2627         struct ib_mad_send_buf *msg = NULL;
2628         int ret;
2629
2630         dreq_msg = (struct cm_dreq_msg *)work->mad_recv_wc->recv_buf.mad;
2631         cm_id_priv = cm_acquire_id(dreq_msg->remote_comm_id,
2632                                    dreq_msg->local_comm_id);
2633         if (!cm_id_priv) {
2634                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2635                                 counter[CM_DREQ_COUNTER]);
2636                 cm_issue_drep(work->port, work->mad_recv_wc);
2637                 pr_debug("%s: no cm_id_priv, local_comm_id %d, remote_comm_id %d\n",
2638                          __func__, be32_to_cpu(dreq_msg->local_comm_id),
2639                          be32_to_cpu(dreq_msg->remote_comm_id));
2640                 return -EINVAL;
2641         }
2642
2643         work->cm_event.private_data = &dreq_msg->private_data;
2644
2645         spin_lock_irq(&cm_id_priv->lock);
2646         if (cm_id_priv->local_qpn != cm_dreq_get_remote_qpn(dreq_msg))
2647                 goto unlock;
2648
2649         switch (cm_id_priv->id.state) {
2650         case IB_CM_REP_SENT:
2651         case IB_CM_DREQ_SENT:
2652                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2653                 break;
2654         case IB_CM_ESTABLISHED:
2655                 if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT ||
2656                     cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD)
2657                         ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2658                 break;
2659         case IB_CM_MRA_REP_RCVD:
2660                 break;
2661         case IB_CM_TIMEWAIT:
2662                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2663                                 counter[CM_DREQ_COUNTER]);
2664                 msg = cm_alloc_response_msg_no_ah(work->port, work->mad_recv_wc);
2665                 if (IS_ERR(msg))
2666                         goto unlock;
2667
2668                 cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv,
2669                                cm_id_priv->private_data,
2670                                cm_id_priv->private_data_len);
2671                 spin_unlock_irq(&cm_id_priv->lock);
2672
2673                 if (cm_create_response_msg_ah(work->port, work->mad_recv_wc, msg) ||
2674                     ib_post_send_mad(msg, NULL))
2675                         cm_free_msg(msg);
2676                 goto deref;
2677         case IB_CM_DREQ_RCVD:
2678                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2679                                 counter[CM_DREQ_COUNTER]);
2680                 goto unlock;
2681         default:
2682                 pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
2683                          __func__, be32_to_cpu(cm_id_priv->id.local_id),
2684                          cm_id_priv->id.state);
2685                 goto unlock;
2686         }
2687         cm_id_priv->id.state = IB_CM_DREQ_RCVD;
2688         cm_id_priv->tid = dreq_msg->hdr.tid;
2689         ret = atomic_inc_and_test(&cm_id_priv->work_count);
2690         if (!ret)
2691                 list_add_tail(&work->list, &cm_id_priv->work_list);
2692         spin_unlock_irq(&cm_id_priv->lock);
2693
2694         if (ret)
2695                 cm_process_work(cm_id_priv, work);
2696         else
2697                 cm_deref_id(cm_id_priv);
2698         return 0;
2699
2700 unlock: spin_unlock_irq(&cm_id_priv->lock);
2701 deref:  cm_deref_id(cm_id_priv);
2702         return -EINVAL;
2703 }
2704
2705 static int cm_drep_handler(struct cm_work *work)
2706 {
2707         struct cm_id_private *cm_id_priv;
2708         struct cm_drep_msg *drep_msg;
2709         int ret;
2710
2711         drep_msg = (struct cm_drep_msg *)work->mad_recv_wc->recv_buf.mad;
2712         cm_id_priv = cm_acquire_id(drep_msg->remote_comm_id,
2713                                    drep_msg->local_comm_id);
2714         if (!cm_id_priv)
2715                 return -EINVAL;
2716
2717         work->cm_event.private_data = &drep_msg->private_data;
2718
2719         spin_lock_irq(&cm_id_priv->lock);
2720         if (cm_id_priv->id.state != IB_CM_DREQ_SENT &&
2721             cm_id_priv->id.state != IB_CM_DREQ_RCVD) {
2722                 spin_unlock_irq(&cm_id_priv->lock);
2723                 goto out;
2724         }
2725         cm_enter_timewait(cm_id_priv);
2726
2727         ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2728         ret = atomic_inc_and_test(&cm_id_priv->work_count);
2729         if (!ret)
2730                 list_add_tail(&work->list, &cm_id_priv->work_list);
2731         spin_unlock_irq(&cm_id_priv->lock);
2732
2733         if (ret)
2734                 cm_process_work(cm_id_priv, work);
2735         else
2736                 cm_deref_id(cm_id_priv);
2737         return 0;
2738 out:
2739         cm_deref_id(cm_id_priv);
2740         return -EINVAL;
2741 }
2742
2743 int ib_send_cm_rej(struct ib_cm_id *cm_id,
2744                    enum ib_cm_rej_reason reason,
2745                    void *ari,
2746                    u8 ari_length,
2747                    const void *private_data,
2748                    u8 private_data_len)
2749 {
2750         struct cm_id_private *cm_id_priv;
2751         struct ib_mad_send_buf *msg;
2752         unsigned long flags;
2753         int ret;
2754
2755         if ((private_data && private_data_len > IB_CM_REJ_PRIVATE_DATA_SIZE) ||
2756             (ari && ari_length > IB_CM_REJ_ARI_LENGTH))
2757                 return -EINVAL;
2758
2759         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2760
2761         spin_lock_irqsave(&cm_id_priv->lock, flags);
2762         switch (cm_id->state) {
2763         case IB_CM_REQ_SENT:
2764         case IB_CM_MRA_REQ_RCVD:
2765         case IB_CM_REQ_RCVD:
2766         case IB_CM_MRA_REQ_SENT:
2767         case IB_CM_REP_RCVD:
2768         case IB_CM_MRA_REP_SENT:
2769                 ret = cm_alloc_msg(cm_id_priv, &msg);
2770                 if (!ret)
2771                         cm_format_rej((struct cm_rej_msg *) msg->mad,
2772                                       cm_id_priv, reason, ari, ari_length,
2773                                       private_data, private_data_len);
2774
2775                 cm_reset_to_idle(cm_id_priv);
2776                 break;
2777         case IB_CM_REP_SENT:
2778         case IB_CM_MRA_REP_RCVD:
2779                 ret = cm_alloc_msg(cm_id_priv, &msg);
2780                 if (!ret)
2781                         cm_format_rej((struct cm_rej_msg *) msg->mad,
2782                                       cm_id_priv, reason, ari, ari_length,
2783                                       private_data, private_data_len);
2784
2785                 cm_enter_timewait(cm_id_priv);
2786                 break;
2787         default:
2788                 pr_debug("%s: local_id %d, cm_id->state: %d\n", __func__,
2789                          be32_to_cpu(cm_id_priv->id.local_id), cm_id->state);
2790                 ret = -EINVAL;
2791                 goto out;
2792         }
2793
2794         if (ret)
2795                 goto out;
2796
2797         ret = ib_post_send_mad(msg, NULL);
2798         if (ret)
2799                 cm_free_msg(msg);
2800
2801 out:    spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2802         return ret;
2803 }
2804 EXPORT_SYMBOL(ib_send_cm_rej);
2805
2806 static void cm_format_rej_event(struct cm_work *work)
2807 {
2808         struct cm_rej_msg *rej_msg;
2809         struct ib_cm_rej_event_param *param;
2810
2811         rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad;
2812         param = &work->cm_event.param.rej_rcvd;
2813         param->ari = rej_msg->ari;
2814         param->ari_length = cm_rej_get_reject_info_len(rej_msg);
2815         param->reason = __be16_to_cpu(rej_msg->reason);
2816         work->cm_event.private_data = &rej_msg->private_data;
2817 }
2818
2819 static struct cm_id_private * cm_acquire_rejected_id(struct cm_rej_msg *rej_msg)
2820 {
2821         struct cm_timewait_info *timewait_info;
2822         struct cm_id_private *cm_id_priv;
2823         __be32 remote_id;
2824
2825         remote_id = rej_msg->local_comm_id;
2826
2827         if (__be16_to_cpu(rej_msg->reason) == IB_CM_REJ_TIMEOUT) {
2828                 spin_lock_irq(&cm.lock);
2829                 timewait_info = cm_find_remote_id( *((__be64 *) rej_msg->ari),
2830                                                   remote_id);
2831                 if (!timewait_info) {
2832                         spin_unlock_irq(&cm.lock);
2833                         return NULL;
2834                 }
2835                 cm_id_priv = idr_find(&cm.local_id_table, (__force int)
2836                                       (timewait_info->work.local_id ^
2837                                        cm.random_id_operand));
2838                 if (cm_id_priv) {
2839                         if (cm_id_priv->id.remote_id == remote_id)
2840                                 atomic_inc(&cm_id_priv->refcount);
2841                         else
2842                                 cm_id_priv = NULL;
2843                 }
2844                 spin_unlock_irq(&cm.lock);
2845         } else if (cm_rej_get_msg_rejected(rej_msg) == CM_MSG_RESPONSE_REQ)
2846                 cm_id_priv = cm_acquire_id(rej_msg->remote_comm_id, 0);
2847         else
2848                 cm_id_priv = cm_acquire_id(rej_msg->remote_comm_id, remote_id);
2849
2850         return cm_id_priv;
2851 }
2852
2853 static int cm_rej_handler(struct cm_work *work)
2854 {
2855         struct cm_id_private *cm_id_priv;
2856         struct cm_rej_msg *rej_msg;
2857         int ret;
2858
2859         rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad;
2860         cm_id_priv = cm_acquire_rejected_id(rej_msg);
2861         if (!cm_id_priv)
2862                 return -EINVAL;
2863
2864         cm_format_rej_event(work);
2865
2866         spin_lock_irq(&cm_id_priv->lock);
2867         switch (cm_id_priv->id.state) {
2868         case IB_CM_REQ_SENT:
2869         case IB_CM_MRA_REQ_RCVD:
2870         case IB_CM_REP_SENT:
2871         case IB_CM_MRA_REP_RCVD:
2872                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2873                 /* fall through */
2874         case IB_CM_REQ_RCVD:
2875         case IB_CM_MRA_REQ_SENT:
2876                 if (__be16_to_cpu(rej_msg->reason) == IB_CM_REJ_STALE_CONN)
2877                         cm_enter_timewait(cm_id_priv);
2878                 else
2879                         cm_reset_to_idle(cm_id_priv);
2880                 break;
2881         case IB_CM_DREQ_SENT:
2882                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2883                 /* fall through */
2884         case IB_CM_REP_RCVD:
2885         case IB_CM_MRA_REP_SENT:
2886                 cm_enter_timewait(cm_id_priv);
2887                 break;
2888         case IB_CM_ESTABLISHED:
2889                 if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT ||
2890                     cm_id_priv->id.lap_state == IB_CM_LAP_SENT) {
2891                         if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT)
2892                                 ib_cancel_mad(cm_id_priv->av.port->mad_agent,
2893                                               cm_id_priv->msg);
2894                         cm_enter_timewait(cm_id_priv);
2895                         break;
2896                 }
2897                 /* fall through */
2898         default:
2899                 spin_unlock_irq(&cm_id_priv->lock);
2900                 pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
2901                          __func__, be32_to_cpu(cm_id_priv->id.local_id),
2902                          cm_id_priv->id.state);
2903                 ret = -EINVAL;
2904                 goto out;
2905         }
2906
2907         ret = atomic_inc_and_test(&cm_id_priv->work_count);
2908         if (!ret)
2909                 list_add_tail(&work->list, &cm_id_priv->work_list);
2910         spin_unlock_irq(&cm_id_priv->lock);
2911
2912         if (ret)
2913                 cm_process_work(cm_id_priv, work);
2914         else
2915                 cm_deref_id(cm_id_priv);
2916         return 0;
2917 out:
2918         cm_deref_id(cm_id_priv);
2919         return -EINVAL;
2920 }
2921
2922 int ib_send_cm_mra(struct ib_cm_id *cm_id,
2923                    u8 service_timeout,
2924                    const void *private_data,
2925                    u8 private_data_len)
2926 {
2927         struct cm_id_private *cm_id_priv;
2928         struct ib_mad_send_buf *msg;
2929         enum ib_cm_state cm_state;
2930         enum ib_cm_lap_state lap_state;
2931         enum cm_msg_response msg_response;
2932         void *data;
2933         unsigned long flags;
2934         int ret;
2935
2936         if (private_data && private_data_len > IB_CM_MRA_PRIVATE_DATA_SIZE)
2937                 return -EINVAL;
2938
2939         data = cm_copy_private_data(private_data, private_data_len);
2940         if (IS_ERR(data))
2941                 return PTR_ERR(data);
2942
2943         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2944
2945         spin_lock_irqsave(&cm_id_priv->lock, flags);
2946         switch(cm_id_priv->id.state) {
2947         case IB_CM_REQ_RCVD:
2948                 cm_state = IB_CM_MRA_REQ_SENT;
2949                 lap_state = cm_id->lap_state;
2950                 msg_response = CM_MSG_RESPONSE_REQ;
2951                 break;
2952         case IB_CM_REP_RCVD:
2953                 cm_state = IB_CM_MRA_REP_SENT;
2954                 lap_state = cm_id->lap_state;
2955                 msg_response = CM_MSG_RESPONSE_REP;
2956                 break;
2957         case IB_CM_ESTABLISHED:
2958                 if (cm_id->lap_state == IB_CM_LAP_RCVD) {
2959                         cm_state = cm_id->state;
2960                         lap_state = IB_CM_MRA_LAP_SENT;
2961                         msg_response = CM_MSG_RESPONSE_OTHER;
2962                         break;
2963                 }
2964                 /* fall through */
2965         default:
2966                 pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
2967                          __func__, be32_to_cpu(cm_id_priv->id.local_id),
2968                          cm_id_priv->id.state);
2969                 ret = -EINVAL;
2970                 goto error1;
2971         }
2972
2973         if (!(service_timeout & IB_CM_MRA_FLAG_DELAY)) {
2974                 ret = cm_alloc_msg(cm_id_priv, &msg);
2975                 if (ret)
2976                         goto error1;
2977
2978                 cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
2979                               msg_response, service_timeout,
2980                               private_data, private_data_len);
2981                 ret = ib_post_send_mad(msg, NULL);
2982                 if (ret)
2983                         goto error2;
2984         }
2985
2986         cm_id->state = cm_state;
2987         cm_id->lap_state = lap_state;
2988         cm_id_priv->service_timeout = service_timeout;
2989         cm_set_private_data(cm_id_priv, data, private_data_len);
2990         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2991         return 0;
2992
2993 error1: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2994         kfree(data);
2995         return ret;
2996
2997 error2: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2998         kfree(data);
2999         cm_free_msg(msg);
3000         return ret;
3001 }
3002 EXPORT_SYMBOL(ib_send_cm_mra);
3003
3004 static struct cm_id_private * cm_acquire_mraed_id(struct cm_mra_msg *mra_msg)
3005 {
3006         switch (cm_mra_get_msg_mraed(mra_msg)) {
3007         case CM_MSG_RESPONSE_REQ:
3008                 return cm_acquire_id(mra_msg->remote_comm_id, 0);
3009         case CM_MSG_RESPONSE_REP:
3010         case CM_MSG_RESPONSE_OTHER:
3011                 return cm_acquire_id(mra_msg->remote_comm_id,
3012                                      mra_msg->local_comm_id);
3013         default:
3014                 return NULL;
3015         }
3016 }
3017
3018 static int cm_mra_handler(struct cm_work *work)
3019 {
3020         struct cm_id_private *cm_id_priv;
3021         struct cm_mra_msg *mra_msg;
3022         int timeout, ret;
3023
3024         mra_msg = (struct cm_mra_msg *)work->mad_recv_wc->recv_buf.mad;
3025         cm_id_priv = cm_acquire_mraed_id(mra_msg);
3026         if (!cm_id_priv)
3027                 return -EINVAL;
3028
3029         work->cm_event.private_data = &mra_msg->private_data;
3030         work->cm_event.param.mra_rcvd.service_timeout =
3031                                         cm_mra_get_service_timeout(mra_msg);
3032         timeout = cm_convert_to_ms(cm_mra_get_service_timeout(mra_msg)) +
3033                   cm_convert_to_ms(cm_id_priv->av.timeout);
3034
3035         spin_lock_irq(&cm_id_priv->lock);
3036         switch (cm_id_priv->id.state) {
3037         case IB_CM_REQ_SENT:
3038                 if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REQ ||
3039                     ib_modify_mad(cm_id_priv->av.port->mad_agent,
3040                                   cm_id_priv->msg, timeout))
3041                         goto out;
3042                 cm_id_priv->id.state = IB_CM_MRA_REQ_RCVD;
3043                 break;
3044         case IB_CM_REP_SENT:
3045                 if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REP ||
3046                     ib_modify_mad(cm_id_priv->av.port->mad_agent,
3047                                   cm_id_priv->msg, timeout))
3048                         goto out;
3049                 cm_id_priv->id.state = IB_CM_MRA_REP_RCVD;
3050                 break;
3051         case IB_CM_ESTABLISHED:
3052                 if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_OTHER ||
3053                     cm_id_priv->id.lap_state != IB_CM_LAP_SENT ||
3054                     ib_modify_mad(cm_id_priv->av.port->mad_agent,
3055                                   cm_id_priv->msg, timeout)) {
3056                         if (cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD)
3057                                 atomic_long_inc(&work->port->
3058                                                 counter_group[CM_RECV_DUPLICATES].
3059                                                 counter[CM_MRA_COUNTER]);
3060                         goto out;
3061                 }
3062                 cm_id_priv->id.lap_state = IB_CM_MRA_LAP_RCVD;
3063                 break;
3064         case IB_CM_MRA_REQ_RCVD:
3065         case IB_CM_MRA_REP_RCVD:
3066                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
3067                                 counter[CM_MRA_COUNTER]);
3068                 /* fall through */
3069         default:
3070                 pr_debug("%s local_id %d, cm_id_priv->id.state: %d\n",
3071                          __func__, be32_to_cpu(cm_id_priv->id.local_id),
3072                          cm_id_priv->id.state);
3073                 goto out;
3074         }
3075
3076         cm_id_priv->msg->context[1] = (void *) (unsigned long)
3077                                       cm_id_priv->id.state;
3078         ret = atomic_inc_and_test(&cm_id_priv->work_count);
3079         if (!ret)
3080                 list_add_tail(&work->list, &cm_id_priv->work_list);
3081         spin_unlock_irq(&cm_id_priv->lock);
3082
3083         if (ret)
3084                 cm_process_work(cm_id_priv, work);
3085         else
3086                 cm_deref_id(cm_id_priv);
3087         return 0;
3088 out:
3089         spin_unlock_irq(&cm_id_priv->lock);
3090         cm_deref_id(cm_id_priv);
3091         return -EINVAL;
3092 }
3093
3094 static void cm_format_lap(struct cm_lap_msg *lap_msg,
3095                           struct cm_id_private *cm_id_priv,
3096                           struct sa_path_rec *alternate_path,
3097                           const void *private_data,
3098                           u8 private_data_len)
3099 {
3100         bool alt_ext = false;
3101
3102         if (alternate_path->rec_type == SA_PATH_REC_TYPE_OPA)
3103                 alt_ext = opa_is_extended_lid(alternate_path->opa.dlid,
3104                                               alternate_path->opa.slid);
3105         cm_format_mad_hdr(&lap_msg->hdr, CM_LAP_ATTR_ID,
3106                           cm_form_tid(cm_id_priv));
3107         lap_msg->local_comm_id = cm_id_priv->id.local_id;
3108         lap_msg->remote_comm_id = cm_id_priv->id.remote_id;
3109         cm_lap_set_remote_qpn(lap_msg, cm_id_priv->remote_qpn);
3110         /* todo: need remote CM response timeout */
3111         cm_lap_set_remote_resp_timeout(lap_msg, 0x1F);
3112         lap_msg->alt_local_lid =
3113                 htons(ntohl(sa_path_get_slid(alternate_path)));
3114         lap_msg->alt_remote_lid =
3115                 htons(ntohl(sa_path_get_dlid(alternate_path)));
3116         lap_msg->alt_local_gid = alternate_path->sgid;
3117         lap_msg->alt_remote_gid = alternate_path->dgid;
3118         if (alt_ext) {
3119                 lap_msg->alt_local_gid.global.interface_id
3120                         = OPA_MAKE_ID(be32_to_cpu(alternate_path->opa.slid));
3121                 lap_msg->alt_remote_gid.global.interface_id
3122                         = OPA_MAKE_ID(be32_to_cpu(alternate_path->opa.dlid));
3123         }
3124         cm_lap_set_flow_label(lap_msg, alternate_path->flow_label);
3125         cm_lap_set_traffic_class(lap_msg, alternate_path->traffic_class);
3126         lap_msg->alt_hop_limit = alternate_path->hop_limit;
3127         cm_lap_set_packet_rate(lap_msg, alternate_path->rate);
3128         cm_lap_set_sl(lap_msg, alternate_path->sl);
3129         cm_lap_set_subnet_local(lap_msg, 1); /* local only... */
3130         cm_lap_set_local_ack_timeout(lap_msg,
3131                 cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
3132                                alternate_path->packet_life_time));
3133
3134         if (private_data && private_data_len)
3135                 memcpy(lap_msg->private_data, private_data, private_data_len);
3136 }
3137
3138 int ib_send_cm_lap(struct ib_cm_id *cm_id,
3139                    struct sa_path_rec *alternate_path,
3140                    const void *private_data,
3141                    u8 private_data_len)
3142 {
3143         struct cm_id_private *cm_id_priv;
3144         struct ib_mad_send_buf *msg;
3145         unsigned long flags;
3146         int ret;
3147
3148         if (private_data && private_data_len > IB_CM_LAP_PRIVATE_DATA_SIZE)
3149                 return -EINVAL;
3150
3151         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3152         spin_lock_irqsave(&cm_id_priv->lock, flags);
3153         if (cm_id->state != IB_CM_ESTABLISHED ||
3154             (cm_id->lap_state != IB_CM_LAP_UNINIT &&
3155              cm_id->lap_state != IB_CM_LAP_IDLE)) {
3156                 ret = -EINVAL;
3157                 goto out;
3158         }
3159
3160         ret = cm_init_av_by_path(alternate_path, NULL, &cm_id_priv->alt_av,
3161                                  cm_id_priv);
3162         if (ret)
3163                 goto out;
3164         cm_id_priv->alt_av.timeout =
3165                         cm_ack_timeout(cm_id_priv->target_ack_delay,
3166                                        cm_id_priv->alt_av.timeout - 1);
3167
3168         ret = cm_alloc_msg(cm_id_priv, &msg);
3169         if (ret)
3170                 goto out;
3171
3172         cm_format_lap((struct cm_lap_msg *) msg->mad, cm_id_priv,
3173                       alternate_path, private_data, private_data_len);
3174         msg->timeout_ms = cm_id_priv->timeout_ms;
3175         msg->context[1] = (void *) (unsigned long) IB_CM_ESTABLISHED;
3176
3177         ret = ib_post_send_mad(msg, NULL);
3178         if (ret) {
3179                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3180                 cm_free_msg(msg);
3181                 return ret;
3182         }
3183
3184         cm_id->lap_state = IB_CM_LAP_SENT;
3185         cm_id_priv->msg = msg;
3186
3187 out:    spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3188         return ret;
3189 }
3190 EXPORT_SYMBOL(ib_send_cm_lap);
3191
3192 static void cm_format_path_lid_from_lap(struct cm_lap_msg *lap_msg,
3193                                         struct sa_path_rec *path)
3194 {
3195         u32 lid;
3196
3197         if (path->rec_type != SA_PATH_REC_TYPE_OPA) {
3198                 sa_path_set_dlid(path, ntohs(lap_msg->alt_local_lid));
3199                 sa_path_set_slid(path, ntohs(lap_msg->alt_remote_lid));
3200         } else {
3201                 lid = opa_get_lid_from_gid(&lap_msg->alt_local_gid);
3202                 sa_path_set_dlid(path, lid);
3203
3204                 lid = opa_get_lid_from_gid(&lap_msg->alt_remote_gid);
3205                 sa_path_set_slid(path, lid);
3206         }
3207 }
3208
3209 static void cm_format_path_from_lap(struct cm_id_private *cm_id_priv,
3210                                     struct sa_path_rec *path,
3211                                     struct cm_lap_msg *lap_msg)
3212 {
3213         path->dgid = lap_msg->alt_local_gid;
3214         path->sgid = lap_msg->alt_remote_gid;
3215         path->flow_label = cm_lap_get_flow_label(lap_msg);
3216         path->hop_limit = lap_msg->alt_hop_limit;
3217         path->traffic_class = cm_lap_get_traffic_class(lap_msg);
3218         path->reversible = 1;
3219         path->pkey = cm_id_priv->pkey;
3220         path->sl = cm_lap_get_sl(lap_msg);
3221         path->mtu_selector = IB_SA_EQ;
3222         path->mtu = cm_id_priv->path_mtu;
3223         path->rate_selector = IB_SA_EQ;
3224         path->rate = cm_lap_get_packet_rate(lap_msg);
3225         path->packet_life_time_selector = IB_SA_EQ;
3226         path->packet_life_time = cm_lap_get_local_ack_timeout(lap_msg);
3227         path->packet_life_time -= (path->packet_life_time > 0);
3228         cm_format_path_lid_from_lap(lap_msg, path);
3229 }
3230
3231 static int cm_lap_handler(struct cm_work *work)
3232 {
3233         struct cm_id_private *cm_id_priv;
3234         struct cm_lap_msg *lap_msg;
3235         struct ib_cm_lap_event_param *param;
3236         struct ib_mad_send_buf *msg = NULL;
3237         int ret;
3238
3239         /* Currently Alternate path messages are not supported for
3240          * RoCE link layer.
3241          */
3242         if (rdma_protocol_roce(work->port->cm_dev->ib_device,
3243                                work->port->port_num))
3244                 return -EINVAL;
3245
3246         /* todo: verify LAP request and send reject APR if invalid. */
3247         lap_msg = (struct cm_lap_msg *)work->mad_recv_wc->recv_buf.mad;
3248         cm_id_priv = cm_acquire_id(lap_msg->remote_comm_id,
3249                                    lap_msg->local_comm_id);
3250         if (!cm_id_priv)
3251                 return -EINVAL;
3252
3253         param = &work->cm_event.param.lap_rcvd;
3254         memset(&work->path[0], 0, sizeof(work->path[1]));
3255         cm_path_set_rec_type(work->port->cm_dev->ib_device,
3256                              work->port->port_num,
3257                              &work->path[0],
3258                              &lap_msg->alt_local_gid);
3259         param->alternate_path = &work->path[0];
3260         cm_format_path_from_lap(cm_id_priv, param->alternate_path, lap_msg);
3261         work->cm_event.private_data = &lap_msg->private_data;
3262
3263         spin_lock_irq(&cm_id_priv->lock);
3264         if (cm_id_priv->id.state != IB_CM_ESTABLISHED)
3265                 goto unlock;
3266
3267         switch (cm_id_priv->id.lap_state) {
3268         case IB_CM_LAP_UNINIT:
3269         case IB_CM_LAP_IDLE:
3270                 break;
3271         case IB_CM_MRA_LAP_SENT:
3272                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
3273                                 counter[CM_LAP_COUNTER]);
3274                 msg = cm_alloc_response_msg_no_ah(work->port, work->mad_recv_wc);
3275                 if (IS_ERR(msg))
3276                         goto unlock;
3277
3278                 cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
3279                               CM_MSG_RESPONSE_OTHER,
3280                               cm_id_priv->service_timeout,
3281                               cm_id_priv->private_data,
3282                               cm_id_priv->private_data_len);
3283                 spin_unlock_irq(&cm_id_priv->lock);
3284
3285                 if (cm_create_response_msg_ah(work->port, work->mad_recv_wc, msg) ||
3286                     ib_post_send_mad(msg, NULL))
3287                         cm_free_msg(msg);
3288                 goto deref;
3289         case IB_CM_LAP_RCVD:
3290                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
3291                                 counter[CM_LAP_COUNTER]);
3292                 goto unlock;
3293         default:
3294                 goto unlock;
3295         }
3296
3297         ret = cm_init_av_for_lap(work->port, work->mad_recv_wc->wc,
3298                                  work->mad_recv_wc->recv_buf.grh,
3299                                  &cm_id_priv->av);
3300         if (ret)
3301                 goto unlock;
3302
3303         ret = cm_init_av_by_path(param->alternate_path, NULL,
3304                                  &cm_id_priv->alt_av, cm_id_priv);
3305         if (ret)
3306                 goto unlock;
3307
3308         cm_id_priv->id.lap_state = IB_CM_LAP_RCVD;
3309         cm_id_priv->tid = lap_msg->hdr.tid;
3310         ret = atomic_inc_and_test(&cm_id_priv->work_count);
3311         if (!ret)
3312                 list_add_tail(&work->list, &cm_id_priv->work_list);
3313         spin_unlock_irq(&cm_id_priv->lock);
3314
3315         if (ret)
3316                 cm_process_work(cm_id_priv, work);
3317         else
3318                 cm_deref_id(cm_id_priv);
3319         return 0;
3320
3321 unlock: spin_unlock_irq(&cm_id_priv->lock);
3322 deref:  cm_deref_id(cm_id_priv);
3323         return -EINVAL;
3324 }
3325
3326 static void cm_format_apr(struct cm_apr_msg *apr_msg,
3327                           struct cm_id_private *cm_id_priv,
3328                           enum ib_cm_apr_status status,
3329                           void *info,
3330                           u8 info_length,
3331                           const void *private_data,
3332                           u8 private_data_len)
3333 {
3334         cm_format_mad_hdr(&apr_msg->hdr, CM_APR_ATTR_ID, cm_id_priv->tid);
3335         apr_msg->local_comm_id = cm_id_priv->id.local_id;
3336         apr_msg->remote_comm_id = cm_id_priv->id.remote_id;
3337         apr_msg->ap_status = (u8) status;
3338
3339         if (info && info_length) {
3340                 apr_msg->info_length = info_length;
3341                 memcpy(apr_msg->info, info, info_length);
3342         }
3343
3344         if (private_data && private_data_len)
3345                 memcpy(apr_msg->private_data, private_data, private_data_len);
3346 }
3347
3348 int ib_send_cm_apr(struct ib_cm_id *cm_id,
3349                    enum ib_cm_apr_status status,
3350                    void *info,
3351                    u8 info_length,
3352                    const void *private_data,
3353                    u8 private_data_len)
3354 {
3355         struct cm_id_private *cm_id_priv;
3356         struct ib_mad_send_buf *msg;
3357         unsigned long flags;
3358         int ret;
3359
3360         if ((private_data && private_data_len > IB_CM_APR_PRIVATE_DATA_SIZE) ||
3361             (info && info_length > IB_CM_APR_INFO_LENGTH))
3362                 return -EINVAL;
3363
3364         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3365         spin_lock_irqsave(&cm_id_priv->lock, flags);
3366         if (cm_id->state != IB_CM_ESTABLISHED ||
3367             (cm_id->lap_state != IB_CM_LAP_RCVD &&
3368              cm_id->lap_state != IB_CM_MRA_LAP_SENT)) {
3369                 ret = -EINVAL;
3370                 goto out;
3371         }
3372
3373         ret = cm_alloc_msg(cm_id_priv, &msg);
3374         if (ret)
3375                 goto out;
3376
3377         cm_format_apr((struct cm_apr_msg *) msg->mad, cm_id_priv, status,
3378                       info, info_length, private_data, private_data_len);
3379         ret = ib_post_send_mad(msg, NULL);
3380         if (ret) {
3381                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3382                 cm_free_msg(msg);
3383                 return ret;
3384         }
3385
3386         cm_id->lap_state = IB_CM_LAP_IDLE;
3387 out:    spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3388         return ret;
3389 }
3390 EXPORT_SYMBOL(ib_send_cm_apr);
3391
3392 static int cm_apr_handler(struct cm_work *work)
3393 {
3394         struct cm_id_private *cm_id_priv;
3395         struct cm_apr_msg *apr_msg;
3396         int ret;
3397
3398         /* Currently Alternate path messages are not supported for
3399          * RoCE link layer.
3400          */
3401         if (rdma_protocol_roce(work->port->cm_dev->ib_device,
3402                                work->port->port_num))
3403                 return -EINVAL;
3404
3405         apr_msg = (struct cm_apr_msg *)work->mad_recv_wc->recv_buf.mad;
3406         cm_id_priv = cm_acquire_id(apr_msg->remote_comm_id,
3407                                    apr_msg->local_comm_id);
3408         if (!cm_id_priv)
3409                 return -EINVAL; /* Unmatched reply. */
3410
3411         work->cm_event.param.apr_rcvd.ap_status = apr_msg->ap_status;
3412         work->cm_event.param.apr_rcvd.apr_info = &apr_msg->info;
3413         work->cm_event.param.apr_rcvd.info_len = apr_msg->info_length;
3414         work->cm_event.private_data = &apr_msg->private_data;
3415
3416         spin_lock_irq(&cm_id_priv->lock);
3417         if (cm_id_priv->id.state != IB_CM_ESTABLISHED ||
3418             (cm_id_priv->id.lap_state != IB_CM_LAP_SENT &&
3419              cm_id_priv->id.lap_state != IB_CM_MRA_LAP_RCVD)) {
3420                 spin_unlock_irq(&cm_id_priv->lock);
3421                 goto out;
3422         }
3423         cm_id_priv->id.lap_state = IB_CM_LAP_IDLE;
3424         ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
3425         cm_id_priv->msg = NULL;
3426
3427         ret = atomic_inc_and_test(&cm_id_priv->work_count);
3428         if (!ret)
3429                 list_add_tail(&work->list, &cm_id_priv->work_list);
3430         spin_unlock_irq(&cm_id_priv->lock);
3431
3432         if (ret)
3433                 cm_process_work(cm_id_priv, work);
3434         else
3435                 cm_deref_id(cm_id_priv);
3436         return 0;
3437 out:
3438         cm_deref_id(cm_id_priv);
3439         return -EINVAL;
3440 }
3441
3442 static int cm_timewait_handler(struct cm_work *work)
3443 {
3444         struct cm_timewait_info *timewait_info;
3445         struct cm_id_private *cm_id_priv;
3446         int ret;
3447
3448         timewait_info = (struct cm_timewait_info *)work;
3449         spin_lock_irq(&cm.lock);
3450         list_del(&timewait_info->list);
3451         spin_unlock_irq(&cm.lock);
3452
3453         cm_id_priv = cm_acquire_id(timewait_info->work.local_id,
3454                                    timewait_info->work.remote_id);
3455         if (!cm_id_priv)
3456                 return -EINVAL;
3457
3458         spin_lock_irq(&cm_id_priv->lock);
3459         if (cm_id_priv->id.state != IB_CM_TIMEWAIT ||
3460             cm_id_priv->remote_qpn != timewait_info->remote_qpn) {
3461                 spin_unlock_irq(&cm_id_priv->lock);
3462                 goto out;
3463         }
3464         cm_id_priv->id.state = IB_CM_IDLE;
3465         ret = atomic_inc_and_test(&cm_id_priv->work_count);
3466         if (!ret)
3467                 list_add_tail(&work->list, &cm_id_priv->work_list);
3468         spin_unlock_irq(&cm_id_priv->lock);
3469
3470         if (ret)
3471                 cm_process_work(cm_id_priv, work);
3472         else
3473                 cm_deref_id(cm_id_priv);
3474         return 0;
3475 out:
3476         cm_deref_id(cm_id_priv);
3477         return -EINVAL;
3478 }
3479
3480 static void cm_format_sidr_req(struct cm_sidr_req_msg *sidr_req_msg,
3481                                struct cm_id_private *cm_id_priv,
3482                                struct ib_cm_sidr_req_param *param)
3483 {
3484         cm_format_mad_hdr(&sidr_req_msg->hdr, CM_SIDR_REQ_ATTR_ID,
3485                           cm_form_tid(cm_id_priv));
3486         sidr_req_msg->request_id = cm_id_priv->id.local_id;
3487         sidr_req_msg->pkey = param->path->pkey;
3488         sidr_req_msg->service_id = param->service_id;
3489
3490         if (param->private_data && param->private_data_len)
3491                 memcpy(sidr_req_msg->private_data, param->private_data,
3492                        param->private_data_len);
3493 }
3494
3495 int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
3496                         struct ib_cm_sidr_req_param *param)
3497 {
3498         struct cm_id_private *cm_id_priv;
3499         struct ib_mad_send_buf *msg;
3500         unsigned long flags;
3501         int ret;
3502
3503         if (!param->path || (param->private_data &&
3504              param->private_data_len > IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE))
3505                 return -EINVAL;
3506
3507         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3508         ret = cm_init_av_by_path(param->path, param->sgid_attr,
3509                                  &cm_id_priv->av,
3510                                  cm_id_priv);
3511         if (ret)
3512                 goto out;
3513
3514         cm_id->service_id = param->service_id;
3515         cm_id->service_mask = ~cpu_to_be64(0);
3516         cm_id_priv->timeout_ms = param->timeout_ms;
3517         cm_id_priv->max_cm_retries = param->max_cm_retries;
3518         ret = cm_alloc_msg(cm_id_priv, &msg);
3519         if (ret)
3520                 goto out;
3521
3522         cm_format_sidr_req((struct cm_sidr_req_msg *) msg->mad, cm_id_priv,
3523                            param);
3524         msg->timeout_ms = cm_id_priv->timeout_ms;
3525         msg->context[1] = (void *) (unsigned long) IB_CM_SIDR_REQ_SENT;
3526
3527         spin_lock_irqsave(&cm_id_priv->lock, flags);
3528         if (cm_id->state == IB_CM_IDLE)
3529                 ret = ib_post_send_mad(msg, NULL);
3530         else
3531                 ret = -EINVAL;
3532
3533         if (ret) {
3534                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3535                 cm_free_msg(msg);
3536                 goto out;
3537         }
3538         cm_id->state = IB_CM_SIDR_REQ_SENT;
3539         cm_id_priv->msg = msg;
3540         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3541 out:
3542         return ret;
3543 }
3544 EXPORT_SYMBOL(ib_send_cm_sidr_req);
3545
3546 static void cm_format_sidr_req_event(struct cm_work *work,
3547                                      const struct cm_id_private *rx_cm_id,
3548                                      struct ib_cm_id *listen_id)
3549 {
3550         struct cm_sidr_req_msg *sidr_req_msg;
3551         struct ib_cm_sidr_req_event_param *param;
3552
3553         sidr_req_msg = (struct cm_sidr_req_msg *)
3554                                 work->mad_recv_wc->recv_buf.mad;
3555         param = &work->cm_event.param.sidr_req_rcvd;
3556         param->pkey = __be16_to_cpu(sidr_req_msg->pkey);
3557         param->listen_id = listen_id;
3558         param->service_id = sidr_req_msg->service_id;
3559         param->bth_pkey = cm_get_bth_pkey(work);
3560         param->port = work->port->port_num;
3561         param->sgid_attr = rx_cm_id->av.ah_attr.grh.sgid_attr;
3562         work->cm_event.private_data = &sidr_req_msg->private_data;
3563 }
3564
3565 static int cm_sidr_req_handler(struct cm_work *work)
3566 {
3567         struct ib_cm_id *cm_id;
3568         struct cm_id_private *cm_id_priv, *cur_cm_id_priv;
3569         struct cm_sidr_req_msg *sidr_req_msg;
3570         struct ib_wc *wc;
3571         int ret;
3572
3573         cm_id = ib_create_cm_id(work->port->cm_dev->ib_device, NULL, NULL);
3574         if (IS_ERR(cm_id))
3575                 return PTR_ERR(cm_id);
3576         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3577
3578         /* Record SGID/SLID and request ID for lookup. */
3579         sidr_req_msg = (struct cm_sidr_req_msg *)
3580                                 work->mad_recv_wc->recv_buf.mad;
3581         wc = work->mad_recv_wc->wc;
3582         cm_id_priv->av.dgid.global.subnet_prefix = cpu_to_be64(wc->slid);
3583         cm_id_priv->av.dgid.global.interface_id = 0;
3584         ret = cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
3585                                       work->mad_recv_wc->recv_buf.grh,
3586                                       &cm_id_priv->av);
3587         if (ret)
3588                 goto out;
3589
3590         cm_id_priv->id.remote_id = sidr_req_msg->request_id;
3591         cm_id_priv->tid = sidr_req_msg->hdr.tid;
3592         atomic_inc(&cm_id_priv->work_count);
3593
3594         spin_lock_irq(&cm.lock);
3595         cur_cm_id_priv = cm_insert_remote_sidr(cm_id_priv);
3596         if (cur_cm_id_priv) {
3597                 spin_unlock_irq(&cm.lock);
3598                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
3599                                 counter[CM_SIDR_REQ_COUNTER]);
3600                 goto out; /* Duplicate message. */
3601         }
3602         cm_id_priv->id.state = IB_CM_SIDR_REQ_RCVD;
3603         cur_cm_id_priv = cm_find_listen(cm_id->device,
3604                                         sidr_req_msg->service_id);
3605         if (!cur_cm_id_priv) {
3606                 spin_unlock_irq(&cm.lock);
3607                 cm_reject_sidr_req(cm_id_priv, IB_SIDR_UNSUPPORTED);
3608                 goto out; /* No match. */
3609         }
3610         atomic_inc(&cur_cm_id_priv->refcount);
3611         atomic_inc(&cm_id_priv->refcount);
3612         spin_unlock_irq(&cm.lock);
3613
3614         cm_id_priv->id.cm_handler = cur_cm_id_priv->id.cm_handler;
3615         cm_id_priv->id.context = cur_cm_id_priv->id.context;
3616         cm_id_priv->id.service_id = sidr_req_msg->service_id;
3617         cm_id_priv->id.service_mask = ~cpu_to_be64(0);
3618
3619         cm_format_sidr_req_event(work, cm_id_priv, &cur_cm_id_priv->id);
3620         cm_process_work(cm_id_priv, work);
3621         cm_deref_id(cur_cm_id_priv);
3622         return 0;
3623 out:
3624         ib_destroy_cm_id(&cm_id_priv->id);
3625         return -EINVAL;
3626 }
3627
3628 static void cm_format_sidr_rep(struct cm_sidr_rep_msg *sidr_rep_msg,
3629                                struct cm_id_private *cm_id_priv,
3630                                struct ib_cm_sidr_rep_param *param)
3631 {
3632         cm_format_mad_hdr(&sidr_rep_msg->hdr, CM_SIDR_REP_ATTR_ID,
3633                           cm_id_priv->tid);
3634         sidr_rep_msg->request_id = cm_id_priv->id.remote_id;
3635         sidr_rep_msg->status = param->status;
3636         cm_sidr_rep_set_qpn(sidr_rep_msg, cpu_to_be32(param->qp_num));
3637         sidr_rep_msg->service_id = cm_id_priv->id.service_id;
3638         sidr_rep_msg->qkey = cpu_to_be32(param->qkey);
3639
3640         if (param->info && param->info_length)
3641                 memcpy(sidr_rep_msg->info, param->info, param->info_length);
3642
3643         if (param->private_data && param->private_data_len)
3644                 memcpy(sidr_rep_msg->private_data, param->private_data,
3645                        param->private_data_len);
3646 }
3647
3648 int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id,
3649                         struct ib_cm_sidr_rep_param *param)
3650 {
3651         struct cm_id_private *cm_id_priv;
3652         struct ib_mad_send_buf *msg;
3653         unsigned long flags;
3654         int ret;
3655
3656         if ((param->info && param->info_length > IB_CM_SIDR_REP_INFO_LENGTH) ||
3657             (param->private_data &&
3658              param->private_data_len > IB_CM_SIDR_REP_PRIVATE_DATA_SIZE))
3659                 return -EINVAL;
3660
3661         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3662         spin_lock_irqsave(&cm_id_priv->lock, flags);
3663         if (cm_id->state != IB_CM_SIDR_REQ_RCVD) {
3664                 ret = -EINVAL;
3665                 goto error;
3666         }
3667
3668         ret = cm_alloc_msg(cm_id_priv, &msg);
3669         if (ret)
3670                 goto error;
3671
3672         cm_format_sidr_rep((struct cm_sidr_rep_msg *) msg->mad, cm_id_priv,
3673                            param);
3674         ret = ib_post_send_mad(msg, NULL);
3675         if (ret) {
3676                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3677                 cm_free_msg(msg);
3678                 return ret;
3679         }
3680         cm_id->state = IB_CM_IDLE;
3681         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3682
3683         spin_lock_irqsave(&cm.lock, flags);
3684         if (!RB_EMPTY_NODE(&cm_id_priv->sidr_id_node)) {
3685                 rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
3686                 RB_CLEAR_NODE(&cm_id_priv->sidr_id_node);
3687         }
3688         spin_unlock_irqrestore(&cm.lock, flags);
3689         return 0;
3690
3691 error:  spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3692         return ret;
3693 }
3694 EXPORT_SYMBOL(ib_send_cm_sidr_rep);
3695
3696 static void cm_format_sidr_rep_event(struct cm_work *work,
3697                                      const struct cm_id_private *cm_id_priv)
3698 {
3699         struct cm_sidr_rep_msg *sidr_rep_msg;
3700         struct ib_cm_sidr_rep_event_param *param;
3701
3702         sidr_rep_msg = (struct cm_sidr_rep_msg *)
3703                                 work->mad_recv_wc->recv_buf.mad;
3704         param = &work->cm_event.param.sidr_rep_rcvd;
3705         param->status = sidr_rep_msg->status;
3706         param->qkey = be32_to_cpu(sidr_rep_msg->qkey);
3707         param->qpn = be32_to_cpu(cm_sidr_rep_get_qpn(sidr_rep_msg));
3708         param->info = &sidr_rep_msg->info;
3709         param->info_len = sidr_rep_msg->info_length;
3710         param->sgid_attr = cm_id_priv->av.ah_attr.grh.sgid_attr;
3711         work->cm_event.private_data = &sidr_rep_msg->private_data;
3712 }
3713
3714 static int cm_sidr_rep_handler(struct cm_work *work)
3715 {
3716         struct cm_sidr_rep_msg *sidr_rep_msg;
3717         struct cm_id_private *cm_id_priv;
3718
3719         sidr_rep_msg = (struct cm_sidr_rep_msg *)
3720                                 work->mad_recv_wc->recv_buf.mad;
3721         cm_id_priv = cm_acquire_id(sidr_rep_msg->request_id, 0);
3722         if (!cm_id_priv)
3723                 return -EINVAL; /* Unmatched reply. */
3724
3725         spin_lock_irq(&cm_id_priv->lock);
3726         if (cm_id_priv->id.state != IB_CM_SIDR_REQ_SENT) {
3727                 spin_unlock_irq(&cm_id_priv->lock);
3728                 goto out;
3729         }
3730         cm_id_priv->id.state = IB_CM_IDLE;
3731         ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
3732         spin_unlock_irq(&cm_id_priv->lock);
3733
3734         cm_format_sidr_rep_event(work, cm_id_priv);
3735         cm_process_work(cm_id_priv, work);
3736         return 0;
3737 out:
3738         cm_deref_id(cm_id_priv);
3739         return -EINVAL;
3740 }
3741
3742 static void cm_process_send_error(struct ib_mad_send_buf *msg,
3743                                   enum ib_wc_status wc_status)
3744 {
3745         struct cm_id_private *cm_id_priv;
3746         struct ib_cm_event cm_event;
3747         enum ib_cm_state state;
3748         int ret;
3749
3750         memset(&cm_event, 0, sizeof cm_event);
3751         cm_id_priv = msg->context[0];
3752
3753         /* Discard old sends or ones without a response. */
3754         spin_lock_irq(&cm_id_priv->lock);
3755         state = (enum ib_cm_state) (unsigned long) msg->context[1];
3756         if (msg != cm_id_priv->msg || state != cm_id_priv->id.state)
3757                 goto discard;
3758
3759         pr_debug_ratelimited("CM: failed sending MAD in state %d. (%s)\n",
3760                              state, ib_wc_status_msg(wc_status));
3761         switch (state) {
3762         case IB_CM_REQ_SENT:
3763         case IB_CM_MRA_REQ_RCVD:
3764                 cm_reset_to_idle(cm_id_priv);
3765                 cm_event.event = IB_CM_REQ_ERROR;
3766                 break;
3767         case IB_CM_REP_SENT:
3768         case IB_CM_MRA_REP_RCVD:
3769                 cm_reset_to_idle(cm_id_priv);
3770                 cm_event.event = IB_CM_REP_ERROR;
3771                 break;
3772         case IB_CM_DREQ_SENT:
3773                 cm_enter_timewait(cm_id_priv);
3774                 cm_event.event = IB_CM_DREQ_ERROR;
3775                 break;
3776         case IB_CM_SIDR_REQ_SENT:
3777                 cm_id_priv->id.state = IB_CM_IDLE;
3778                 cm_event.event = IB_CM_SIDR_REQ_ERROR;
3779                 break;
3780         default:
3781                 goto discard;
3782         }
3783         spin_unlock_irq(&cm_id_priv->lock);
3784         cm_event.param.send_status = wc_status;
3785
3786         /* No other events can occur on the cm_id at this point. */
3787         ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &cm_event);
3788         cm_free_msg(msg);
3789         if (ret)
3790                 ib_destroy_cm_id(&cm_id_priv->id);
3791         return;
3792 discard:
3793         spin_unlock_irq(&cm_id_priv->lock);
3794         cm_free_msg(msg);
3795 }
3796
3797 static void cm_send_handler(struct ib_mad_agent *mad_agent,
3798                             struct ib_mad_send_wc *mad_send_wc)
3799 {
3800         struct ib_mad_send_buf *msg = mad_send_wc->send_buf;
3801         struct cm_port *port;
3802         u16 attr_index;
3803
3804         port = mad_agent->context;
3805         attr_index = be16_to_cpu(((struct ib_mad_hdr *)
3806                                   msg->mad)->attr_id) - CM_ATTR_ID_OFFSET;
3807
3808         /*
3809          * If the send was in response to a received message (context[0] is not
3810          * set to a cm_id), and is not a REJ, then it is a send that was
3811          * manually retried.
3812          */
3813         if (!msg->context[0] && (attr_index != CM_REJ_COUNTER))
3814                 msg->retries = 1;
3815
3816         atomic_long_add(1 + msg->retries,
3817                         &port->counter_group[CM_XMIT].counter[attr_index]);
3818         if (msg->retries)
3819                 atomic_long_add(msg->retries,
3820                                 &port->counter_group[CM_XMIT_RETRIES].
3821                                 counter[attr_index]);
3822
3823         switch (mad_send_wc->status) {
3824         case IB_WC_SUCCESS:
3825         case IB_WC_WR_FLUSH_ERR:
3826                 cm_free_msg(msg);
3827                 break;
3828         default:
3829                 if (msg->context[0] && msg->context[1])
3830                         cm_process_send_error(msg, mad_send_wc->status);
3831                 else
3832                         cm_free_msg(msg);
3833                 break;
3834         }
3835 }
3836
3837 static void cm_work_handler(struct work_struct *_work)
3838 {
3839         struct cm_work *work = container_of(_work, struct cm_work, work.work);
3840         int ret;
3841
3842         switch (work->cm_event.event) {
3843         case IB_CM_REQ_RECEIVED:
3844                 ret = cm_req_handler(work);
3845                 break;
3846         case IB_CM_MRA_RECEIVED:
3847                 ret = cm_mra_handler(work);
3848                 break;
3849         case IB_CM_REJ_RECEIVED:
3850                 ret = cm_rej_handler(work);
3851                 break;
3852         case IB_CM_REP_RECEIVED:
3853                 ret = cm_rep_handler(work);
3854                 break;
3855         case IB_CM_RTU_RECEIVED:
3856                 ret = cm_rtu_handler(work);
3857                 break;
3858         case IB_CM_USER_ESTABLISHED:
3859                 ret = cm_establish_handler(work);
3860                 break;
3861         case IB_CM_DREQ_RECEIVED:
3862                 ret = cm_dreq_handler(work);
3863                 break;
3864         case IB_CM_DREP_RECEIVED:
3865                 ret = cm_drep_handler(work);
3866                 break;
3867         case IB_CM_SIDR_REQ_RECEIVED:
3868                 ret = cm_sidr_req_handler(work);
3869                 break;
3870         case IB_CM_SIDR_REP_RECEIVED:
3871                 ret = cm_sidr_rep_handler(work);
3872                 break;
3873         case IB_CM_LAP_RECEIVED:
3874                 ret = cm_lap_handler(work);
3875                 break;
3876         case IB_CM_APR_RECEIVED:
3877                 ret = cm_apr_handler(work);
3878                 break;
3879         case IB_CM_TIMEWAIT_EXIT:
3880                 ret = cm_timewait_handler(work);
3881                 break;
3882         default:
3883                 pr_debug("cm_event.event: 0x%x\n", work->cm_event.event);
3884                 ret = -EINVAL;
3885                 break;
3886         }
3887         if (ret)
3888                 cm_free_work(work);
3889 }
3890
3891 static int cm_establish(struct ib_cm_id *cm_id)
3892 {
3893         struct cm_id_private *cm_id_priv;
3894         struct cm_work *work;
3895         unsigned long flags;
3896         int ret = 0;
3897         struct cm_device *cm_dev;
3898
3899         cm_dev = ib_get_client_data(cm_id->device, &cm_client);
3900         if (!cm_dev)
3901                 return -ENODEV;
3902
3903         work = kmalloc(sizeof *work, GFP_ATOMIC);
3904         if (!work)
3905                 return -ENOMEM;
3906
3907         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3908         spin_lock_irqsave(&cm_id_priv->lock, flags);
3909         switch (cm_id->state)
3910         {
3911         case IB_CM_REP_SENT:
3912         case IB_CM_MRA_REP_RCVD:
3913                 cm_id->state = IB_CM_ESTABLISHED;
3914                 break;
3915         case IB_CM_ESTABLISHED:
3916                 ret = -EISCONN;
3917                 break;
3918         default:
3919                 pr_debug("%s: local_id %d, cm_id->state: %d\n", __func__,
3920                          be32_to_cpu(cm_id->local_id), cm_id->state);
3921                 ret = -EINVAL;
3922                 break;
3923         }
3924         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3925
3926         if (ret) {
3927                 kfree(work);
3928                 goto out;
3929         }
3930
3931         /*
3932          * The CM worker thread may try to destroy the cm_id before it
3933          * can execute this work item.  To prevent potential deadlock,
3934          * we need to find the cm_id once we're in the context of the
3935          * worker thread, rather than holding a reference on it.
3936          */
3937         INIT_DELAYED_WORK(&work->work, cm_work_handler);
3938         work->local_id = cm_id->local_id;
3939         work->remote_id = cm_id->remote_id;
3940         work->mad_recv_wc = NULL;
3941         work->cm_event.event = IB_CM_USER_ESTABLISHED;
3942
3943         /* Check if the device started its remove_one */
3944         spin_lock_irqsave(&cm.lock, flags);
3945         if (!cm_dev->going_down) {
3946                 queue_delayed_work(cm.wq, &work->work, 0);
3947         } else {
3948                 kfree(work);
3949                 ret = -ENODEV;
3950         }
3951         spin_unlock_irqrestore(&cm.lock, flags);
3952
3953 out:
3954         return ret;
3955 }
3956
3957 static int cm_migrate(struct ib_cm_id *cm_id)
3958 {
3959         struct cm_id_private *cm_id_priv;
3960         struct cm_av tmp_av;
3961         unsigned long flags;
3962         int tmp_send_port_not_ready;
3963         int ret = 0;
3964
3965         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3966         spin_lock_irqsave(&cm_id_priv->lock, flags);
3967         if (cm_id->state == IB_CM_ESTABLISHED &&
3968             (cm_id->lap_state == IB_CM_LAP_UNINIT ||
3969              cm_id->lap_state == IB_CM_LAP_IDLE)) {
3970                 cm_id->lap_state = IB_CM_LAP_IDLE;
3971                 /* Swap address vector */
3972                 tmp_av = cm_id_priv->av;
3973                 cm_id_priv->av = cm_id_priv->alt_av;
3974                 cm_id_priv->alt_av = tmp_av;
3975                 /* Swap port send ready state */
3976                 tmp_send_port_not_ready = cm_id_priv->prim_send_port_not_ready;
3977                 cm_id_priv->prim_send_port_not_ready = cm_id_priv->altr_send_port_not_ready;
3978                 cm_id_priv->altr_send_port_not_ready = tmp_send_port_not_ready;
3979         } else
3980                 ret = -EINVAL;
3981         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3982
3983         return ret;
3984 }
3985
3986 int ib_cm_notify(struct ib_cm_id *cm_id, enum ib_event_type event)
3987 {
3988         int ret;
3989
3990         switch (event) {
3991         case IB_EVENT_COMM_EST:
3992                 ret = cm_establish(cm_id);
3993                 break;
3994         case IB_EVENT_PATH_MIG:
3995                 ret = cm_migrate(cm_id);
3996                 break;
3997         default:
3998                 ret = -EINVAL;
3999         }
4000         return ret;
4001 }
4002 EXPORT_SYMBOL(ib_cm_notify);
4003
4004 static void cm_recv_handler(struct ib_mad_agent *mad_agent,
4005                             struct ib_mad_send_buf *send_buf,
4006                             struct ib_mad_recv_wc *mad_recv_wc)
4007 {
4008         struct cm_port *port = mad_agent->context;
4009         struct cm_work *work;
4010         enum ib_cm_event_type event;
4011         bool alt_path = false;
4012         u16 attr_id;
4013         int paths = 0;
4014         int going_down = 0;
4015
4016         switch (mad_recv_wc->recv_buf.mad->mad_hdr.attr_id) {
4017         case CM_REQ_ATTR_ID:
4018                 alt_path = cm_req_has_alt_path((struct cm_req_msg *)
4019                                                 mad_recv_wc->recv_buf.mad);
4020                 paths = 1 + (alt_path != 0);
4021                 event = IB_CM_REQ_RECEIVED;
4022                 break;
4023         case CM_MRA_ATTR_ID:
4024                 event = IB_CM_MRA_RECEIVED;
4025                 break;
4026         case CM_REJ_ATTR_ID:
4027                 event = IB_CM_REJ_RECEIVED;
4028                 break;
4029         case CM_REP_ATTR_ID:
4030                 event = IB_CM_REP_RECEIVED;
4031                 break;
4032         case CM_RTU_ATTR_ID:
4033                 event = IB_CM_RTU_RECEIVED;
4034                 break;
4035         case CM_DREQ_ATTR_ID:
4036                 event = IB_CM_DREQ_RECEIVED;
4037                 break;
4038         case CM_DREP_ATTR_ID:
4039                 event = IB_CM_DREP_RECEIVED;
4040                 break;
4041         case CM_SIDR_REQ_ATTR_ID:
4042                 event = IB_CM_SIDR_REQ_RECEIVED;
4043                 break;
4044         case CM_SIDR_REP_ATTR_ID:
4045                 event = IB_CM_SIDR_REP_RECEIVED;
4046                 break;
4047         case CM_LAP_ATTR_ID:
4048                 paths = 1;
4049                 event = IB_CM_LAP_RECEIVED;
4050                 break;
4051         case CM_APR_ATTR_ID:
4052                 event = IB_CM_APR_RECEIVED;
4053                 break;
4054         default:
4055                 ib_free_recv_mad(mad_recv_wc);
4056                 return;
4057         }
4058
4059         attr_id = be16_to_cpu(mad_recv_wc->recv_buf.mad->mad_hdr.attr_id);
4060         atomic_long_inc(&port->counter_group[CM_RECV].
4061                         counter[attr_id - CM_ATTR_ID_OFFSET]);
4062
4063         work = kmalloc(sizeof(*work) + sizeof(struct sa_path_rec) * paths,
4064                        GFP_KERNEL);
4065         if (!work) {
4066                 ib_free_recv_mad(mad_recv_wc);
4067                 return;
4068         }
4069
4070         INIT_DELAYED_WORK(&work->work, cm_work_handler);
4071         work->cm_event.event = event;
4072         work->mad_recv_wc = mad_recv_wc;
4073         work->port = port;
4074
4075         /* Check if the device started its remove_one */
4076         spin_lock_irq(&cm.lock);
4077         if (!port->cm_dev->going_down)
4078                 queue_delayed_work(cm.wq, &work->work, 0);
4079         else
4080                 going_down = 1;
4081         spin_unlock_irq(&cm.lock);
4082
4083         if (going_down) {
4084                 kfree(work);
4085                 ib_free_recv_mad(mad_recv_wc);
4086         }
4087 }
4088
4089 static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv,
4090                                 struct ib_qp_attr *qp_attr,
4091                                 int *qp_attr_mask)
4092 {
4093         unsigned long flags;
4094         int ret;
4095
4096         spin_lock_irqsave(&cm_id_priv->lock, flags);
4097         switch (cm_id_priv->id.state) {
4098         case IB_CM_REQ_SENT:
4099         case IB_CM_MRA_REQ_RCVD:
4100         case IB_CM_REQ_RCVD:
4101         case IB_CM_MRA_REQ_SENT:
4102         case IB_CM_REP_RCVD:
4103         case IB_CM_MRA_REP_SENT:
4104         case IB_CM_REP_SENT:
4105         case IB_CM_MRA_REP_RCVD:
4106         case IB_CM_ESTABLISHED:
4107                 *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS |
4108                                 IB_QP_PKEY_INDEX | IB_QP_PORT;
4109                 qp_attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE;
4110                 if (cm_id_priv->responder_resources)
4111                         qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ |
4112                                                     IB_ACCESS_REMOTE_ATOMIC;
4113                 qp_attr->pkey_index = cm_id_priv->av.pkey_index;
4114                 qp_attr->port_num = cm_id_priv->av.port->port_num;
4115                 ret = 0;
4116                 break;
4117         default:
4118                 pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
4119                          __func__, be32_to_cpu(cm_id_priv->id.local_id),
4120                          cm_id_priv->id.state);
4121                 ret = -EINVAL;
4122                 break;
4123         }
4124         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
4125         return ret;
4126 }
4127
4128 static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
4129                                struct ib_qp_attr *qp_attr,
4130                                int *qp_attr_mask)
4131 {
4132         unsigned long flags;
4133         int ret;
4134
4135         spin_lock_irqsave(&cm_id_priv->lock, flags);
4136         switch (cm_id_priv->id.state) {
4137         case IB_CM_REQ_RCVD:
4138         case IB_CM_MRA_REQ_SENT:
4139         case IB_CM_REP_RCVD:
4140         case IB_CM_MRA_REP_SENT:
4141         case IB_CM_REP_SENT:
4142         case IB_CM_MRA_REP_RCVD:
4143         case IB_CM_ESTABLISHED:
4144                 *qp_attr_mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU |
4145                                 IB_QP_DEST_QPN | IB_QP_RQ_PSN;
4146                 qp_attr->ah_attr = cm_id_priv->av.ah_attr;
4147                 qp_attr->path_mtu = cm_id_priv->path_mtu;
4148                 qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn);
4149                 qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn);
4150                 if (cm_id_priv->qp_type == IB_QPT_RC ||
4151                     cm_id_priv->qp_type == IB_QPT_XRC_TGT) {
4152                         *qp_attr_mask |= IB_QP_MAX_DEST_RD_ATOMIC |
4153                                          IB_QP_MIN_RNR_TIMER;
4154                         qp_attr->max_dest_rd_atomic =
4155                                         cm_id_priv->responder_resources;
4156                         qp_attr->min_rnr_timer = 0;
4157                 }
4158                 if (rdma_ah_get_dlid(&cm_id_priv->alt_av.ah_attr)) {
4159                         *qp_attr_mask |= IB_QP_ALT_PATH;
4160                         qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num;
4161                         qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index;
4162                         qp_attr->alt_timeout = cm_id_priv->alt_av.timeout;
4163                         qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr;
4164                 }
4165                 ret = 0;
4166                 break;
4167         default:
4168                 pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
4169                          __func__, be32_to_cpu(cm_id_priv->id.local_id),
4170                          cm_id_priv->id.state);
4171                 ret = -EINVAL;
4172                 break;
4173         }
4174         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
4175         return ret;
4176 }
4177
4178 static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv,
4179                                struct ib_qp_attr *qp_attr,
4180                                int *qp_attr_mask)
4181 {
4182         unsigned long flags;
4183         int ret;
4184
4185         spin_lock_irqsave(&cm_id_priv->lock, flags);
4186         switch (cm_id_priv->id.state) {
4187         /* Allow transition to RTS before sending REP */
4188         case IB_CM_REQ_RCVD:
4189         case IB_CM_MRA_REQ_SENT:
4190
4191         case IB_CM_REP_RCVD:
4192         case IB_CM_MRA_REP_SENT:
4193         case IB_CM_REP_SENT:
4194         case IB_CM_MRA_REP_RCVD:
4195         case IB_CM_ESTABLISHED:
4196                 if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT) {
4197                         *qp_attr_mask = IB_QP_STATE | IB_QP_SQ_PSN;
4198                         qp_attr->sq_psn = be32_to_cpu(cm_id_priv->sq_psn);
4199                         switch (cm_id_priv->qp_type) {
4200                         case IB_QPT_RC:
4201                         case IB_QPT_XRC_INI:
4202                                 *qp_attr_mask |= IB_QP_RETRY_CNT | IB_QP_RNR_RETRY |
4203                                                  IB_QP_MAX_QP_RD_ATOMIC;
4204                                 qp_attr->retry_cnt = cm_id_priv->retry_count;
4205                                 qp_attr->rnr_retry = cm_id_priv->rnr_retry_count;
4206                                 qp_attr->max_rd_atomic = cm_id_priv->initiator_depth;
4207                                 /* fall through */
4208                         case IB_QPT_XRC_TGT:
4209                                 *qp_attr_mask |= IB_QP_TIMEOUT;
4210                                 qp_attr->timeout = cm_id_priv->av.timeout;
4211                                 break;
4212                         default:
4213                                 break;
4214                         }
4215                         if (rdma_ah_get_dlid(&cm_id_priv->alt_av.ah_attr)) {
4216                                 *qp_attr_mask |= IB_QP_PATH_MIG_STATE;
4217                                 qp_attr->path_mig_state = IB_MIG_REARM;
4218                         }
4219                 } else {
4220                         *qp_attr_mask = IB_QP_ALT_PATH | IB_QP_PATH_MIG_STATE;
4221                         qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num;
4222                         qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index;
4223                         qp_attr->alt_timeout = cm_id_priv->alt_av.timeout;
4224                         qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr;
4225                         qp_attr->path_mig_state = IB_MIG_REARM;
4226                 }
4227                 ret = 0;
4228                 break;
4229         default:
4230                 pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
4231                          __func__, be32_to_cpu(cm_id_priv->id.local_id),
4232                          cm_id_priv->id.state);
4233                 ret = -EINVAL;
4234                 break;
4235         }
4236         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
4237         return ret;
4238 }
4239
4240 int ib_cm_init_qp_attr(struct ib_cm_id *cm_id,
4241                        struct ib_qp_attr *qp_attr,
4242                        int *qp_attr_mask)
4243 {
4244         struct cm_id_private *cm_id_priv;
4245         int ret;
4246
4247         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
4248         switch (qp_attr->qp_state) {
4249         case IB_QPS_INIT:
4250                 ret = cm_init_qp_init_attr(cm_id_priv, qp_attr, qp_attr_mask);
4251                 break;
4252         case IB_QPS_RTR:
4253                 ret = cm_init_qp_rtr_attr(cm_id_priv, qp_attr, qp_attr_mask);
4254                 break;
4255         case IB_QPS_RTS:
4256                 ret = cm_init_qp_rts_attr(cm_id_priv, qp_attr, qp_attr_mask);
4257                 break;
4258         default:
4259                 ret = -EINVAL;
4260                 break;
4261         }
4262         return ret;
4263 }
4264 EXPORT_SYMBOL(ib_cm_init_qp_attr);
4265
4266 static ssize_t cm_show_counter(struct kobject *obj, struct attribute *attr,
4267                                char *buf)
4268 {
4269         struct cm_counter_group *group;
4270         struct cm_counter_attribute *cm_attr;
4271
4272         group = container_of(obj, struct cm_counter_group, obj);
4273         cm_attr = container_of(attr, struct cm_counter_attribute, attr);
4274
4275         return sprintf(buf, "%ld\n",
4276                        atomic_long_read(&group->counter[cm_attr->index]));
4277 }
4278
4279 static const struct sysfs_ops cm_counter_ops = {
4280         .show = cm_show_counter
4281 };
4282
4283 static struct kobj_type cm_counter_obj_type = {
4284         .sysfs_ops = &cm_counter_ops,
4285         .default_attrs = cm_counter_default_attrs
4286 };
4287
4288 static void cm_release_port_obj(struct kobject *obj)
4289 {
4290         struct cm_port *cm_port;
4291
4292         cm_port = container_of(obj, struct cm_port, port_obj);
4293         kfree(cm_port);
4294 }
4295
4296 static struct kobj_type cm_port_obj_type = {
4297         .release = cm_release_port_obj
4298 };
4299
4300 static char *cm_devnode(struct device *dev, umode_t *mode)
4301 {
4302         if (mode)
4303                 *mode = 0666;
4304         return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev));
4305 }
4306
4307 struct class cm_class = {
4308         .owner   = THIS_MODULE,
4309         .name    = "infiniband_cm",
4310         .devnode = cm_devnode,
4311 };
4312 EXPORT_SYMBOL(cm_class);
4313
4314 static int cm_create_port_fs(struct cm_port *port)
4315 {
4316         int i, ret;
4317
4318         ret = kobject_init_and_add(&port->port_obj, &cm_port_obj_type,
4319                                    &port->cm_dev->device->kobj,
4320                                    "%d", port->port_num);
4321         if (ret) {
4322                 kfree(port);
4323                 return ret;
4324         }
4325
4326         for (i = 0; i < CM_COUNTER_GROUPS; i++) {
4327                 ret = kobject_init_and_add(&port->counter_group[i].obj,
4328                                            &cm_counter_obj_type,
4329                                            &port->port_obj,
4330                                            "%s", counter_group_names[i]);
4331                 if (ret)
4332                         goto error;
4333         }
4334
4335         return 0;
4336
4337 error:
4338         while (i--)
4339                 kobject_put(&port->counter_group[i].obj);
4340         kobject_put(&port->port_obj);
4341         return ret;
4342
4343 }
4344
4345 static void cm_remove_port_fs(struct cm_port *port)
4346 {
4347         int i;
4348
4349         for (i = 0; i < CM_COUNTER_GROUPS; i++)
4350                 kobject_put(&port->counter_group[i].obj);
4351
4352         kobject_put(&port->port_obj);
4353 }
4354
4355 static void cm_add_one(struct ib_device *ib_device)
4356 {
4357         struct cm_device *cm_dev;
4358         struct cm_port *port;
4359         struct ib_mad_reg_req reg_req = {
4360                 .mgmt_class = IB_MGMT_CLASS_CM,
4361                 .mgmt_class_version = IB_CM_CLASS_VERSION,
4362         };
4363         struct ib_port_modify port_modify = {
4364                 .set_port_cap_mask = IB_PORT_CM_SUP
4365         };
4366         unsigned long flags;
4367         int ret;
4368         int count = 0;
4369         u8 i;
4370
4371         cm_dev = kzalloc(struct_size(cm_dev, port, ib_device->phys_port_cnt),
4372                          GFP_KERNEL);
4373         if (!cm_dev)
4374                 return;
4375
4376         cm_dev->ib_device = ib_device;
4377         cm_dev->ack_delay = ib_device->attrs.local_ca_ack_delay;
4378         cm_dev->going_down = 0;
4379         cm_dev->device = device_create(&cm_class, &ib_device->dev,
4380                                        MKDEV(0, 0), NULL,
4381                                        "%s", ib_device->name);
4382         if (IS_ERR(cm_dev->device)) {
4383                 kfree(cm_dev);
4384                 return;
4385         }
4386
4387         set_bit(IB_MGMT_METHOD_SEND, reg_req.method_mask);
4388         for (i = 1; i <= ib_device->phys_port_cnt; i++) {
4389                 if (!rdma_cap_ib_cm(ib_device, i))
4390                         continue;
4391
4392                 port = kzalloc(sizeof *port, GFP_KERNEL);
4393                 if (!port)
4394                         goto error1;
4395
4396                 cm_dev->port[i-1] = port;
4397                 port->cm_dev = cm_dev;
4398                 port->port_num = i;
4399
4400                 INIT_LIST_HEAD(&port->cm_priv_prim_list);
4401                 INIT_LIST_HEAD(&port->cm_priv_altr_list);
4402
4403                 ret = cm_create_port_fs(port);
4404                 if (ret)
4405                         goto error1;
4406
4407                 port->mad_agent = ib_register_mad_agent(ib_device, i,
4408                                                         IB_QPT_GSI,
4409                                                         &reg_req,
4410                                                         0,
4411                                                         cm_send_handler,
4412                                                         cm_recv_handler,
4413                                                         port,
4414                                                         0);
4415                 if (IS_ERR(port->mad_agent))
4416                         goto error2;
4417
4418                 ret = ib_modify_port(ib_device, i, 0, &port_modify);
4419                 if (ret)
4420                         goto error3;
4421
4422                 count++;
4423         }
4424
4425         if (!count)
4426                 goto free;
4427
4428         ib_set_client_data(ib_device, &cm_client, cm_dev);
4429
4430         write_lock_irqsave(&cm.device_lock, flags);
4431         list_add_tail(&cm_dev->list, &cm.device_list);
4432         write_unlock_irqrestore(&cm.device_lock, flags);
4433         return;
4434
4435 error3:
4436         ib_unregister_mad_agent(port->mad_agent);
4437 error2:
4438         cm_remove_port_fs(port);
4439 error1:
4440         port_modify.set_port_cap_mask = 0;
4441         port_modify.clr_port_cap_mask = IB_PORT_CM_SUP;
4442         while (--i) {
4443                 if (!rdma_cap_ib_cm(ib_device, i))
4444                         continue;
4445
4446                 port = cm_dev->port[i-1];
4447                 ib_modify_port(ib_device, port->port_num, 0, &port_modify);
4448                 ib_unregister_mad_agent(port->mad_agent);
4449                 cm_remove_port_fs(port);
4450         }
4451 free:
4452         device_unregister(cm_dev->device);
4453         kfree(cm_dev);
4454 }
4455
4456 static void cm_remove_one(struct ib_device *ib_device, void *client_data)
4457 {
4458         struct cm_device *cm_dev = client_data;
4459         struct cm_port *port;
4460         struct cm_id_private *cm_id_priv;
4461         struct ib_mad_agent *cur_mad_agent;
4462         struct ib_port_modify port_modify = {
4463                 .clr_port_cap_mask = IB_PORT_CM_SUP
4464         };
4465         unsigned long flags;
4466         int i;
4467
4468         if (!cm_dev)
4469                 return;
4470
4471         write_lock_irqsave(&cm.device_lock, flags);
4472         list_del(&cm_dev->list);
4473         write_unlock_irqrestore(&cm.device_lock, flags);
4474
4475         spin_lock_irq(&cm.lock);
4476         cm_dev->going_down = 1;
4477         spin_unlock_irq(&cm.lock);
4478
4479         for (i = 1; i <= ib_device->phys_port_cnt; i++) {
4480                 if (!rdma_cap_ib_cm(ib_device, i))
4481                         continue;
4482
4483                 port = cm_dev->port[i-1];
4484                 ib_modify_port(ib_device, port->port_num, 0, &port_modify);
4485                 /* Mark all the cm_id's as not valid */
4486                 spin_lock_irq(&cm.lock);
4487                 list_for_each_entry(cm_id_priv, &port->cm_priv_altr_list, altr_list)
4488                         cm_id_priv->altr_send_port_not_ready = 1;
4489                 list_for_each_entry(cm_id_priv, &port->cm_priv_prim_list, prim_list)
4490                         cm_id_priv->prim_send_port_not_ready = 1;
4491                 spin_unlock_irq(&cm.lock);
4492                 /*
4493                  * We flush the queue here after the going_down set, this
4494                  * verify that no new works will be queued in the recv handler,
4495                  * after that we can call the unregister_mad_agent
4496                  */
4497                 flush_workqueue(cm.wq);
4498                 spin_lock_irq(&cm.state_lock);
4499                 cur_mad_agent = port->mad_agent;
4500                 port->mad_agent = NULL;
4501                 spin_unlock_irq(&cm.state_lock);
4502                 ib_unregister_mad_agent(cur_mad_agent);
4503                 cm_remove_port_fs(port);
4504         }
4505
4506         device_unregister(cm_dev->device);
4507         kfree(cm_dev);
4508 }
4509
4510 static int __init ib_cm_init(void)
4511 {
4512         int ret;
4513
4514         memset(&cm, 0, sizeof cm);
4515         INIT_LIST_HEAD(&cm.device_list);
4516         rwlock_init(&cm.device_lock);
4517         spin_lock_init(&cm.lock);
4518         spin_lock_init(&cm.state_lock);
4519         cm.listen_service_table = RB_ROOT;
4520         cm.listen_service_id = be64_to_cpu(IB_CM_ASSIGN_SERVICE_ID);
4521         cm.remote_id_table = RB_ROOT;
4522         cm.remote_qp_table = RB_ROOT;
4523         cm.remote_sidr_table = RB_ROOT;
4524         idr_init(&cm.local_id_table);
4525         get_random_bytes(&cm.random_id_operand, sizeof cm.random_id_operand);
4526         INIT_LIST_HEAD(&cm.timewait_list);
4527
4528         ret = class_register(&cm_class);
4529         if (ret) {
4530                 ret = -ENOMEM;
4531                 goto error1;
4532         }
4533
4534         cm.wq = alloc_workqueue("ib_cm", 0, 1);
4535         if (!cm.wq) {
4536                 ret = -ENOMEM;
4537                 goto error2;
4538         }
4539
4540         ret = ib_register_client(&cm_client);
4541         if (ret)
4542                 goto error3;
4543
4544         return 0;
4545 error3:
4546         destroy_workqueue(cm.wq);
4547 error2:
4548         class_unregister(&cm_class);
4549 error1:
4550         idr_destroy(&cm.local_id_table);
4551         return ret;
4552 }
4553
4554 static void __exit ib_cm_cleanup(void)
4555 {
4556         struct cm_timewait_info *timewait_info, *tmp;
4557
4558         spin_lock_irq(&cm.lock);
4559         list_for_each_entry(timewait_info, &cm.timewait_list, list)
4560                 cancel_delayed_work(&timewait_info->work.work);
4561         spin_unlock_irq(&cm.lock);
4562
4563         ib_unregister_client(&cm_client);
4564         destroy_workqueue(cm.wq);
4565
4566         list_for_each_entry_safe(timewait_info, tmp, &cm.timewait_list, list) {
4567                 list_del(&timewait_info->list);
4568                 kfree(timewait_info);
4569         }
4570
4571         class_unregister(&cm_class);
4572         idr_destroy(&cm.local_id_table);
4573 }
4574
4575 module_init(ib_cm_init);
4576 module_exit(ib_cm_cleanup);
4577