GNU Linux-libre 4.4.288-gnu1
[releases.git] / drivers / infiniband / core / uverbs_cmd.c
1 /*
2  * Copyright (c) 2005 Topspin Communications.  All rights reserved.
3  * Copyright (c) 2005, 2006, 2007 Cisco Systems.  All rights reserved.
4  * Copyright (c) 2005 PathScale, Inc.  All rights reserved.
5  * Copyright (c) 2006 Mellanox Technologies.  All rights reserved.
6  *
7  * This software is available to you under a choice of one of two
8  * licenses.  You may choose to be licensed under the terms of the GNU
9  * General Public License (GPL) Version 2, available from the file
10  * COPYING in the main directory of this source tree, or the
11  * OpenIB.org BSD license below:
12  *
13  *     Redistribution and use in source and binary forms, with or
14  *     without modification, are permitted provided that the following
15  *     conditions are met:
16  *
17  *      - Redistributions of source code must retain the above
18  *        copyright notice, this list of conditions and the following
19  *        disclaimer.
20  *
21  *      - Redistributions in binary form must reproduce the above
22  *        copyright notice, this list of conditions and the following
23  *        disclaimer in the documentation and/or other materials
24  *        provided with the distribution.
25  *
26  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33  * SOFTWARE.
34  */
35
36 #include <linux/file.h>
37 #include <linux/fs.h>
38 #include <linux/slab.h>
39 #include <linux/sched.h>
40
41 #include <asm/uaccess.h>
42
43 #include "uverbs.h"
44 #include "core_priv.h"
45
46 struct uverbs_lock_class {
47         struct lock_class_key   key;
48         char                    name[16];
49 };
50
51 static struct uverbs_lock_class pd_lock_class   = { .name = "PD-uobj" };
52 static struct uverbs_lock_class mr_lock_class   = { .name = "MR-uobj" };
53 static struct uverbs_lock_class mw_lock_class   = { .name = "MW-uobj" };
54 static struct uverbs_lock_class cq_lock_class   = { .name = "CQ-uobj" };
55 static struct uverbs_lock_class qp_lock_class   = { .name = "QP-uobj" };
56 static struct uverbs_lock_class ah_lock_class   = { .name = "AH-uobj" };
57 static struct uverbs_lock_class srq_lock_class  = { .name = "SRQ-uobj" };
58 static struct uverbs_lock_class xrcd_lock_class = { .name = "XRCD-uobj" };
59 static struct uverbs_lock_class rule_lock_class = { .name = "RULE-uobj" };
60
61 /*
62  * The ib_uobject locking scheme is as follows:
63  *
64  * - ib_uverbs_idr_lock protects the uverbs idrs themselves, so it
65  *   needs to be held during all idr write operations.  When an object is
66  *   looked up, a reference must be taken on the object's kref before
67  *   dropping this lock.  For read operations, the rcu_read_lock()
68  *   and rcu_write_lock() but similarly the kref reference is grabbed
69  *   before the rcu_read_unlock().
70  *
71  * - Each object also has an rwsem.  This rwsem must be held for
72  *   reading while an operation that uses the object is performed.
73  *   For example, while registering an MR, the associated PD's
74  *   uobject.mutex must be held for reading.  The rwsem must be held
75  *   for writing while initializing or destroying an object.
76  *
77  * - In addition, each object has a "live" flag.  If this flag is not
78  *   set, then lookups of the object will fail even if it is found in
79  *   the idr.  This handles a reader that blocks and does not acquire
80  *   the rwsem until after the object is destroyed.  The destroy
81  *   operation will set the live flag to 0 and then drop the rwsem;
82  *   this will allow the reader to acquire the rwsem, see that the
83  *   live flag is 0, and then drop the rwsem and its reference to
84  *   object.  The underlying storage will not be freed until the last
85  *   reference to the object is dropped.
86  */
87
88 static void init_uobj(struct ib_uobject *uobj, u64 user_handle,
89                       struct ib_ucontext *context, struct uverbs_lock_class *c)
90 {
91         uobj->user_handle = user_handle;
92         uobj->context     = context;
93         kref_init(&uobj->ref);
94         init_rwsem(&uobj->mutex);
95         lockdep_set_class_and_name(&uobj->mutex, &c->key, c->name);
96         uobj->live        = 0;
97 }
98
99 static void release_uobj(struct kref *kref)
100 {
101         kfree_rcu(container_of(kref, struct ib_uobject, ref), rcu);
102 }
103
104 static void put_uobj(struct ib_uobject *uobj)
105 {
106         kref_put(&uobj->ref, release_uobj);
107 }
108
109 static void put_uobj_read(struct ib_uobject *uobj)
110 {
111         up_read(&uobj->mutex);
112         put_uobj(uobj);
113 }
114
115 static void put_uobj_write(struct ib_uobject *uobj)
116 {
117         up_write(&uobj->mutex);
118         put_uobj(uobj);
119 }
120
121 static int idr_add_uobj(struct idr *idr, struct ib_uobject *uobj)
122 {
123         int ret;
124
125         idr_preload(GFP_KERNEL);
126         spin_lock(&ib_uverbs_idr_lock);
127
128         ret = idr_alloc(idr, uobj, 0, 0, GFP_NOWAIT);
129         if (ret >= 0)
130                 uobj->id = ret;
131
132         spin_unlock(&ib_uverbs_idr_lock);
133         idr_preload_end();
134
135         return ret < 0 ? ret : 0;
136 }
137
138 void idr_remove_uobj(struct idr *idr, struct ib_uobject *uobj)
139 {
140         spin_lock(&ib_uverbs_idr_lock);
141         idr_remove(idr, uobj->id);
142         spin_unlock(&ib_uverbs_idr_lock);
143 }
144
145 static struct ib_uobject *__idr_get_uobj(struct idr *idr, int id,
146                                          struct ib_ucontext *context)
147 {
148         struct ib_uobject *uobj;
149
150         rcu_read_lock();
151         uobj = idr_find(idr, id);
152         if (uobj) {
153                 if (uobj->context == context)
154                         kref_get(&uobj->ref);
155                 else
156                         uobj = NULL;
157         }
158         rcu_read_unlock();
159
160         return uobj;
161 }
162
163 static struct ib_uobject *idr_read_uobj(struct idr *idr, int id,
164                                         struct ib_ucontext *context, int nested)
165 {
166         struct ib_uobject *uobj;
167
168         uobj = __idr_get_uobj(idr, id, context);
169         if (!uobj)
170                 return NULL;
171
172         if (nested)
173                 down_read_nested(&uobj->mutex, SINGLE_DEPTH_NESTING);
174         else
175                 down_read(&uobj->mutex);
176         if (!uobj->live) {
177                 put_uobj_read(uobj);
178                 return NULL;
179         }
180
181         return uobj;
182 }
183
184 static struct ib_uobject *idr_write_uobj(struct idr *idr, int id,
185                                          struct ib_ucontext *context)
186 {
187         struct ib_uobject *uobj;
188
189         uobj = __idr_get_uobj(idr, id, context);
190         if (!uobj)
191                 return NULL;
192
193         down_write(&uobj->mutex);
194         if (!uobj->live) {
195                 put_uobj_write(uobj);
196                 return NULL;
197         }
198
199         return uobj;
200 }
201
202 static void *idr_read_obj(struct idr *idr, int id, struct ib_ucontext *context,
203                           int nested)
204 {
205         struct ib_uobject *uobj;
206
207         uobj = idr_read_uobj(idr, id, context, nested);
208         return uobj ? uobj->object : NULL;
209 }
210
211 static struct ib_pd *idr_read_pd(int pd_handle, struct ib_ucontext *context)
212 {
213         return idr_read_obj(&ib_uverbs_pd_idr, pd_handle, context, 0);
214 }
215
216 static void put_pd_read(struct ib_pd *pd)
217 {
218         put_uobj_read(pd->uobject);
219 }
220
221 static struct ib_cq *idr_read_cq(int cq_handle, struct ib_ucontext *context, int nested)
222 {
223         return idr_read_obj(&ib_uverbs_cq_idr, cq_handle, context, nested);
224 }
225
226 static void put_cq_read(struct ib_cq *cq)
227 {
228         put_uobj_read(cq->uobject);
229 }
230
231 static struct ib_ah *idr_read_ah(int ah_handle, struct ib_ucontext *context)
232 {
233         return idr_read_obj(&ib_uverbs_ah_idr, ah_handle, context, 0);
234 }
235
236 static void put_ah_read(struct ib_ah *ah)
237 {
238         put_uobj_read(ah->uobject);
239 }
240
241 static struct ib_qp *idr_read_qp(int qp_handle, struct ib_ucontext *context)
242 {
243         return idr_read_obj(&ib_uverbs_qp_idr, qp_handle, context, 0);
244 }
245
246 static struct ib_qp *idr_write_qp(int qp_handle, struct ib_ucontext *context)
247 {
248         struct ib_uobject *uobj;
249
250         uobj = idr_write_uobj(&ib_uverbs_qp_idr, qp_handle, context);
251         return uobj ? uobj->object : NULL;
252 }
253
254 static void put_qp_read(struct ib_qp *qp)
255 {
256         put_uobj_read(qp->uobject);
257 }
258
259 static void put_qp_write(struct ib_qp *qp)
260 {
261         put_uobj_write(qp->uobject);
262 }
263
264 static struct ib_srq *idr_read_srq(int srq_handle, struct ib_ucontext *context)
265 {
266         return idr_read_obj(&ib_uverbs_srq_idr, srq_handle, context, 0);
267 }
268
269 static void put_srq_read(struct ib_srq *srq)
270 {
271         put_uobj_read(srq->uobject);
272 }
273
274 static struct ib_xrcd *idr_read_xrcd(int xrcd_handle, struct ib_ucontext *context,
275                                      struct ib_uobject **uobj)
276 {
277         *uobj = idr_read_uobj(&ib_uverbs_xrcd_idr, xrcd_handle, context, 0);
278         return *uobj ? (*uobj)->object : NULL;
279 }
280
281 static void put_xrcd_read(struct ib_uobject *uobj)
282 {
283         put_uobj_read(uobj);
284 }
285
286 ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
287                               struct ib_device *ib_dev,
288                               const char __user *buf,
289                               int in_len, int out_len)
290 {
291         struct ib_uverbs_get_context      cmd;
292         struct ib_uverbs_get_context_resp resp;
293         struct ib_udata                   udata;
294 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
295         struct ib_device_attr             dev_attr;
296 #endif
297         struct ib_ucontext               *ucontext;
298         struct file                      *filp;
299         int ret;
300
301         if (out_len < sizeof resp)
302                 return -ENOSPC;
303
304         if (copy_from_user(&cmd, buf, sizeof cmd))
305                 return -EFAULT;
306
307         mutex_lock(&file->mutex);
308
309         if (file->ucontext) {
310                 ret = -EINVAL;
311                 goto err;
312         }
313
314         INIT_UDATA(&udata, buf + sizeof cmd,
315                    (unsigned long) cmd.response + sizeof resp,
316                    in_len - sizeof cmd, out_len - sizeof resp);
317
318         ucontext = ib_dev->alloc_ucontext(ib_dev, &udata);
319         if (IS_ERR(ucontext)) {
320                 ret = PTR_ERR(ucontext);
321                 goto err;
322         }
323
324         ucontext->device = ib_dev;
325         INIT_LIST_HEAD(&ucontext->pd_list);
326         INIT_LIST_HEAD(&ucontext->mr_list);
327         INIT_LIST_HEAD(&ucontext->mw_list);
328         INIT_LIST_HEAD(&ucontext->cq_list);
329         INIT_LIST_HEAD(&ucontext->qp_list);
330         INIT_LIST_HEAD(&ucontext->srq_list);
331         INIT_LIST_HEAD(&ucontext->ah_list);
332         INIT_LIST_HEAD(&ucontext->xrcd_list);
333         INIT_LIST_HEAD(&ucontext->rule_list);
334         rcu_read_lock();
335         ucontext->tgid = get_task_pid(current->group_leader, PIDTYPE_PID);
336         rcu_read_unlock();
337         ucontext->closing = 0;
338
339 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
340         ucontext->umem_tree = RB_ROOT;
341         init_rwsem(&ucontext->umem_rwsem);
342         ucontext->odp_mrs_count = 0;
343         INIT_LIST_HEAD(&ucontext->no_private_counters);
344
345         ret = ib_query_device(ib_dev, &dev_attr);
346         if (ret)
347                 goto err_free;
348         if (!(dev_attr.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING))
349                 ucontext->invalidate_range = NULL;
350
351 #endif
352
353         resp.num_comp_vectors = file->device->num_comp_vectors;
354
355         ret = get_unused_fd_flags(O_CLOEXEC);
356         if (ret < 0)
357                 goto err_free;
358         resp.async_fd = ret;
359
360         filp = ib_uverbs_alloc_event_file(file, ib_dev, 1);
361         if (IS_ERR(filp)) {
362                 ret = PTR_ERR(filp);
363                 goto err_fd;
364         }
365
366         if (copy_to_user((void __user *) (unsigned long) cmd.response,
367                          &resp, sizeof resp)) {
368                 ret = -EFAULT;
369                 goto err_file;
370         }
371
372         file->ucontext = ucontext;
373
374         fd_install(resp.async_fd, filp);
375
376         mutex_unlock(&file->mutex);
377
378         return in_len;
379
380 err_file:
381         ib_uverbs_free_async_event_file(file);
382         fput(filp);
383
384 err_fd:
385         put_unused_fd(resp.async_fd);
386
387 err_free:
388         put_pid(ucontext->tgid);
389         ib_dev->dealloc_ucontext(ucontext);
390
391 err:
392         mutex_unlock(&file->mutex);
393         return ret;
394 }
395
396 static void copy_query_dev_fields(struct ib_uverbs_file *file,
397                                   struct ib_device *ib_dev,
398                                   struct ib_uverbs_query_device_resp *resp,
399                                   struct ib_device_attr *attr)
400 {
401         resp->fw_ver            = attr->fw_ver;
402         resp->node_guid         = ib_dev->node_guid;
403         resp->sys_image_guid    = attr->sys_image_guid;
404         resp->max_mr_size       = attr->max_mr_size;
405         resp->page_size_cap     = attr->page_size_cap;
406         resp->vendor_id         = attr->vendor_id;
407         resp->vendor_part_id    = attr->vendor_part_id;
408         resp->hw_ver            = attr->hw_ver;
409         resp->max_qp            = attr->max_qp;
410         resp->max_qp_wr         = attr->max_qp_wr;
411         resp->device_cap_flags  = attr->device_cap_flags;
412         resp->max_sge           = attr->max_sge;
413         resp->max_sge_rd        = attr->max_sge_rd;
414         resp->max_cq            = attr->max_cq;
415         resp->max_cqe           = attr->max_cqe;
416         resp->max_mr            = attr->max_mr;
417         resp->max_pd            = attr->max_pd;
418         resp->max_qp_rd_atom    = attr->max_qp_rd_atom;
419         resp->max_ee_rd_atom    = attr->max_ee_rd_atom;
420         resp->max_res_rd_atom   = attr->max_res_rd_atom;
421         resp->max_qp_init_rd_atom       = attr->max_qp_init_rd_atom;
422         resp->max_ee_init_rd_atom       = attr->max_ee_init_rd_atom;
423         resp->atomic_cap                = attr->atomic_cap;
424         resp->max_ee                    = attr->max_ee;
425         resp->max_rdd                   = attr->max_rdd;
426         resp->max_mw                    = attr->max_mw;
427         resp->max_raw_ipv6_qp           = attr->max_raw_ipv6_qp;
428         resp->max_raw_ethy_qp           = attr->max_raw_ethy_qp;
429         resp->max_mcast_grp             = attr->max_mcast_grp;
430         resp->max_mcast_qp_attach       = attr->max_mcast_qp_attach;
431         resp->max_total_mcast_qp_attach = attr->max_total_mcast_qp_attach;
432         resp->max_ah                    = attr->max_ah;
433         resp->max_fmr                   = attr->max_fmr;
434         resp->max_map_per_fmr           = attr->max_map_per_fmr;
435         resp->max_srq                   = attr->max_srq;
436         resp->max_srq_wr                = attr->max_srq_wr;
437         resp->max_srq_sge               = attr->max_srq_sge;
438         resp->max_pkeys                 = attr->max_pkeys;
439         resp->local_ca_ack_delay        = attr->local_ca_ack_delay;
440         resp->phys_port_cnt             = ib_dev->phys_port_cnt;
441 }
442
443 ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file,
444                                struct ib_device *ib_dev,
445                                const char __user *buf,
446                                int in_len, int out_len)
447 {
448         struct ib_uverbs_query_device      cmd;
449         struct ib_uverbs_query_device_resp resp;
450         struct ib_device_attr              attr;
451         int                                ret;
452
453         if (out_len < sizeof resp)
454                 return -ENOSPC;
455
456         if (copy_from_user(&cmd, buf, sizeof cmd))
457                 return -EFAULT;
458
459         ret = ib_query_device(ib_dev, &attr);
460         if (ret)
461                 return ret;
462
463         memset(&resp, 0, sizeof resp);
464         copy_query_dev_fields(file, ib_dev, &resp, &attr);
465
466         if (copy_to_user((void __user *) (unsigned long) cmd.response,
467                          &resp, sizeof resp))
468                 return -EFAULT;
469
470         return in_len;
471 }
472
473 ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file,
474                              struct ib_device *ib_dev,
475                              const char __user *buf,
476                              int in_len, int out_len)
477 {
478         struct ib_uverbs_query_port      cmd;
479         struct ib_uverbs_query_port_resp resp;
480         struct ib_port_attr              attr;
481         int                              ret;
482
483         if (out_len < sizeof resp)
484                 return -ENOSPC;
485
486         if (copy_from_user(&cmd, buf, sizeof cmd))
487                 return -EFAULT;
488
489         ret = ib_query_port(ib_dev, cmd.port_num, &attr);
490         if (ret)
491                 return ret;
492
493         memset(&resp, 0, sizeof resp);
494
495         resp.state           = attr.state;
496         resp.max_mtu         = attr.max_mtu;
497         resp.active_mtu      = attr.active_mtu;
498         resp.gid_tbl_len     = attr.gid_tbl_len;
499         resp.port_cap_flags  = attr.port_cap_flags;
500         resp.max_msg_sz      = attr.max_msg_sz;
501         resp.bad_pkey_cntr   = attr.bad_pkey_cntr;
502         resp.qkey_viol_cntr  = attr.qkey_viol_cntr;
503         resp.pkey_tbl_len    = attr.pkey_tbl_len;
504         resp.lid             = attr.lid;
505         resp.sm_lid          = attr.sm_lid;
506         resp.lmc             = attr.lmc;
507         resp.max_vl_num      = attr.max_vl_num;
508         resp.sm_sl           = attr.sm_sl;
509         resp.subnet_timeout  = attr.subnet_timeout;
510         resp.init_type_reply = attr.init_type_reply;
511         resp.active_width    = attr.active_width;
512         resp.active_speed    = attr.active_speed;
513         resp.phys_state      = attr.phys_state;
514         resp.link_layer      = rdma_port_get_link_layer(ib_dev,
515                                                         cmd.port_num);
516
517         if (copy_to_user((void __user *) (unsigned long) cmd.response,
518                          &resp, sizeof resp))
519                 return -EFAULT;
520
521         return in_len;
522 }
523
524 ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
525                            struct ib_device *ib_dev,
526                            const char __user *buf,
527                            int in_len, int out_len)
528 {
529         struct ib_uverbs_alloc_pd      cmd;
530         struct ib_uverbs_alloc_pd_resp resp;
531         struct ib_udata                udata;
532         struct ib_uobject             *uobj;
533         struct ib_pd                  *pd;
534         int                            ret;
535
536         if (out_len < sizeof resp)
537                 return -ENOSPC;
538
539         if (copy_from_user(&cmd, buf, sizeof cmd))
540                 return -EFAULT;
541
542         INIT_UDATA(&udata, buf + sizeof cmd,
543                    (unsigned long) cmd.response + sizeof resp,
544                    in_len - sizeof cmd, out_len - sizeof resp);
545
546         uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
547         if (!uobj)
548                 return -ENOMEM;
549
550         init_uobj(uobj, 0, file->ucontext, &pd_lock_class);
551         down_write(&uobj->mutex);
552
553         pd = ib_dev->alloc_pd(ib_dev, file->ucontext, &udata);
554         if (IS_ERR(pd)) {
555                 ret = PTR_ERR(pd);
556                 goto err;
557         }
558
559         pd->device  = ib_dev;
560         pd->uobject = uobj;
561         pd->local_mr = NULL;
562         atomic_set(&pd->usecnt, 0);
563
564         uobj->object = pd;
565         ret = idr_add_uobj(&ib_uverbs_pd_idr, uobj);
566         if (ret)
567                 goto err_idr;
568
569         memset(&resp, 0, sizeof resp);
570         resp.pd_handle = uobj->id;
571
572         if (copy_to_user((void __user *) (unsigned long) cmd.response,
573                          &resp, sizeof resp)) {
574                 ret = -EFAULT;
575                 goto err_copy;
576         }
577
578         mutex_lock(&file->mutex);
579         list_add_tail(&uobj->list, &file->ucontext->pd_list);
580         mutex_unlock(&file->mutex);
581
582         uobj->live = 1;
583
584         up_write(&uobj->mutex);
585
586         return in_len;
587
588 err_copy:
589         idr_remove_uobj(&ib_uverbs_pd_idr, uobj);
590
591 err_idr:
592         ib_dealloc_pd(pd);
593
594 err:
595         put_uobj_write(uobj);
596         return ret;
597 }
598
599 ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file,
600                              struct ib_device *ib_dev,
601                              const char __user *buf,
602                              int in_len, int out_len)
603 {
604         struct ib_uverbs_dealloc_pd cmd;
605         struct ib_uobject          *uobj;
606         struct ib_pd               *pd;
607         int                         ret;
608
609         if (copy_from_user(&cmd, buf, sizeof cmd))
610                 return -EFAULT;
611
612         uobj = idr_write_uobj(&ib_uverbs_pd_idr, cmd.pd_handle, file->ucontext);
613         if (!uobj)
614                 return -EINVAL;
615         pd = uobj->object;
616
617         if (atomic_read(&pd->usecnt)) {
618                 ret = -EBUSY;
619                 goto err_put;
620         }
621
622         ret = pd->device->dealloc_pd(uobj->object);
623         WARN_ONCE(ret, "Infiniband HW driver failed dealloc_pd");
624         if (ret)
625                 goto err_put;
626
627         uobj->live = 0;
628         put_uobj_write(uobj);
629
630         idr_remove_uobj(&ib_uverbs_pd_idr, uobj);
631
632         mutex_lock(&file->mutex);
633         list_del(&uobj->list);
634         mutex_unlock(&file->mutex);
635
636         put_uobj(uobj);
637
638         return in_len;
639
640 err_put:
641         put_uobj_write(uobj);
642         return ret;
643 }
644
645 struct xrcd_table_entry {
646         struct rb_node  node;
647         struct ib_xrcd *xrcd;
648         struct inode   *inode;
649 };
650
651 static int xrcd_table_insert(struct ib_uverbs_device *dev,
652                             struct inode *inode,
653                             struct ib_xrcd *xrcd)
654 {
655         struct xrcd_table_entry *entry, *scan;
656         struct rb_node **p = &dev->xrcd_tree.rb_node;
657         struct rb_node *parent = NULL;
658
659         entry = kmalloc(sizeof *entry, GFP_KERNEL);
660         if (!entry)
661                 return -ENOMEM;
662
663         entry->xrcd  = xrcd;
664         entry->inode = inode;
665
666         while (*p) {
667                 parent = *p;
668                 scan = rb_entry(parent, struct xrcd_table_entry, node);
669
670                 if (inode < scan->inode) {
671                         p = &(*p)->rb_left;
672                 } else if (inode > scan->inode) {
673                         p = &(*p)->rb_right;
674                 } else {
675                         kfree(entry);
676                         return -EEXIST;
677                 }
678         }
679
680         rb_link_node(&entry->node, parent, p);
681         rb_insert_color(&entry->node, &dev->xrcd_tree);
682         igrab(inode);
683         return 0;
684 }
685
686 static struct xrcd_table_entry *xrcd_table_search(struct ib_uverbs_device *dev,
687                                                   struct inode *inode)
688 {
689         struct xrcd_table_entry *entry;
690         struct rb_node *p = dev->xrcd_tree.rb_node;
691
692         while (p) {
693                 entry = rb_entry(p, struct xrcd_table_entry, node);
694
695                 if (inode < entry->inode)
696                         p = p->rb_left;
697                 else if (inode > entry->inode)
698                         p = p->rb_right;
699                 else
700                         return entry;
701         }
702
703         return NULL;
704 }
705
706 static struct ib_xrcd *find_xrcd(struct ib_uverbs_device *dev, struct inode *inode)
707 {
708         struct xrcd_table_entry *entry;
709
710         entry = xrcd_table_search(dev, inode);
711         if (!entry)
712                 return NULL;
713
714         return entry->xrcd;
715 }
716
717 static void xrcd_table_delete(struct ib_uverbs_device *dev,
718                               struct inode *inode)
719 {
720         struct xrcd_table_entry *entry;
721
722         entry = xrcd_table_search(dev, inode);
723         if (entry) {
724                 iput(inode);
725                 rb_erase(&entry->node, &dev->xrcd_tree);
726                 kfree(entry);
727         }
728 }
729
730 ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
731                             struct ib_device *ib_dev,
732                             const char __user *buf, int in_len,
733                             int out_len)
734 {
735         struct ib_uverbs_open_xrcd      cmd;
736         struct ib_uverbs_open_xrcd_resp resp;
737         struct ib_udata                 udata;
738         struct ib_uxrcd_object         *obj;
739         struct ib_xrcd                 *xrcd = NULL;
740         struct fd                       f = {NULL, 0};
741         struct inode                   *inode = NULL;
742         int                             ret = 0;
743         int                             new_xrcd = 0;
744
745         if (out_len < sizeof resp)
746                 return -ENOSPC;
747
748         if (copy_from_user(&cmd, buf, sizeof cmd))
749                 return -EFAULT;
750
751         INIT_UDATA(&udata, buf + sizeof cmd,
752                    (unsigned long) cmd.response + sizeof resp,
753                    in_len - sizeof cmd, out_len - sizeof  resp);
754
755         mutex_lock(&file->device->xrcd_tree_mutex);
756
757         if (cmd.fd != -1) {
758                 /* search for file descriptor */
759                 f = fdget(cmd.fd);
760                 if (!f.file) {
761                         ret = -EBADF;
762                         goto err_tree_mutex_unlock;
763                 }
764
765                 inode = file_inode(f.file);
766                 xrcd = find_xrcd(file->device, inode);
767                 if (!xrcd && !(cmd.oflags & O_CREAT)) {
768                         /* no file descriptor. Need CREATE flag */
769                         ret = -EAGAIN;
770                         goto err_tree_mutex_unlock;
771                 }
772
773                 if (xrcd && cmd.oflags & O_EXCL) {
774                         ret = -EINVAL;
775                         goto err_tree_mutex_unlock;
776                 }
777         }
778
779         obj = kmalloc(sizeof *obj, GFP_KERNEL);
780         if (!obj) {
781                 ret = -ENOMEM;
782                 goto err_tree_mutex_unlock;
783         }
784
785         init_uobj(&obj->uobject, 0, file->ucontext, &xrcd_lock_class);
786
787         down_write(&obj->uobject.mutex);
788
789         if (!xrcd) {
790                 xrcd = ib_dev->alloc_xrcd(ib_dev, file->ucontext, &udata);
791                 if (IS_ERR(xrcd)) {
792                         ret = PTR_ERR(xrcd);
793                         goto err;
794                 }
795
796                 xrcd->inode   = inode;
797                 xrcd->device  = ib_dev;
798                 atomic_set(&xrcd->usecnt, 0);
799                 mutex_init(&xrcd->tgt_qp_mutex);
800                 INIT_LIST_HEAD(&xrcd->tgt_qp_list);
801                 new_xrcd = 1;
802         }
803
804         atomic_set(&obj->refcnt, 0);
805         obj->uobject.object = xrcd;
806         ret = idr_add_uobj(&ib_uverbs_xrcd_idr, &obj->uobject);
807         if (ret)
808                 goto err_idr;
809
810         memset(&resp, 0, sizeof resp);
811         resp.xrcd_handle = obj->uobject.id;
812
813         if (inode) {
814                 if (new_xrcd) {
815                         /* create new inode/xrcd table entry */
816                         ret = xrcd_table_insert(file->device, inode, xrcd);
817                         if (ret)
818                                 goto err_insert_xrcd;
819                 }
820                 atomic_inc(&xrcd->usecnt);
821         }
822
823         if (copy_to_user((void __user *) (unsigned long) cmd.response,
824                          &resp, sizeof resp)) {
825                 ret = -EFAULT;
826                 goto err_copy;
827         }
828
829         if (f.file)
830                 fdput(f);
831
832         mutex_lock(&file->mutex);
833         list_add_tail(&obj->uobject.list, &file->ucontext->xrcd_list);
834         mutex_unlock(&file->mutex);
835
836         obj->uobject.live = 1;
837         up_write(&obj->uobject.mutex);
838
839         mutex_unlock(&file->device->xrcd_tree_mutex);
840         return in_len;
841
842 err_copy:
843         if (inode) {
844                 if (new_xrcd)
845                         xrcd_table_delete(file->device, inode);
846                 atomic_dec(&xrcd->usecnt);
847         }
848
849 err_insert_xrcd:
850         idr_remove_uobj(&ib_uverbs_xrcd_idr, &obj->uobject);
851
852 err_idr:
853         ib_dealloc_xrcd(xrcd);
854
855 err:
856         put_uobj_write(&obj->uobject);
857
858 err_tree_mutex_unlock:
859         if (f.file)
860                 fdput(f);
861
862         mutex_unlock(&file->device->xrcd_tree_mutex);
863
864         return ret;
865 }
866
867 ssize_t ib_uverbs_close_xrcd(struct ib_uverbs_file *file,
868                              struct ib_device *ib_dev,
869                              const char __user *buf, int in_len,
870                              int out_len)
871 {
872         struct ib_uverbs_close_xrcd cmd;
873         struct ib_uobject           *uobj;
874         struct ib_xrcd              *xrcd = NULL;
875         struct inode                *inode = NULL;
876         struct ib_uxrcd_object      *obj;
877         int                         live;
878         int                         ret = 0;
879
880         if (copy_from_user(&cmd, buf, sizeof cmd))
881                 return -EFAULT;
882
883         mutex_lock(&file->device->xrcd_tree_mutex);
884         uobj = idr_write_uobj(&ib_uverbs_xrcd_idr, cmd.xrcd_handle, file->ucontext);
885         if (!uobj) {
886                 ret = -EINVAL;
887                 goto out;
888         }
889
890         xrcd  = uobj->object;
891         inode = xrcd->inode;
892         obj   = container_of(uobj, struct ib_uxrcd_object, uobject);
893         if (atomic_read(&obj->refcnt)) {
894                 put_uobj_write(uobj);
895                 ret = -EBUSY;
896                 goto out;
897         }
898
899         if (!inode || atomic_dec_and_test(&xrcd->usecnt)) {
900                 ret = ib_dealloc_xrcd(uobj->object);
901                 if (!ret)
902                         uobj->live = 0;
903         }
904
905         live = uobj->live;
906         if (inode && ret)
907                 atomic_inc(&xrcd->usecnt);
908
909         put_uobj_write(uobj);
910
911         if (ret)
912                 goto out;
913
914         if (inode && !live)
915                 xrcd_table_delete(file->device, inode);
916
917         idr_remove_uobj(&ib_uverbs_xrcd_idr, uobj);
918         mutex_lock(&file->mutex);
919         list_del(&uobj->list);
920         mutex_unlock(&file->mutex);
921
922         put_uobj(uobj);
923         ret = in_len;
924
925 out:
926         mutex_unlock(&file->device->xrcd_tree_mutex);
927         return ret;
928 }
929
930 void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev,
931                             struct ib_xrcd *xrcd)
932 {
933         struct inode *inode;
934
935         inode = xrcd->inode;
936         if (inode && !atomic_dec_and_test(&xrcd->usecnt))
937                 return;
938
939         ib_dealloc_xrcd(xrcd);
940
941         if (inode)
942                 xrcd_table_delete(dev, inode);
943 }
944
945 ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
946                          struct ib_device *ib_dev,
947                          const char __user *buf, int in_len,
948                          int out_len)
949 {
950         struct ib_uverbs_reg_mr      cmd;
951         struct ib_uverbs_reg_mr_resp resp;
952         struct ib_udata              udata;
953         struct ib_uobject           *uobj;
954         struct ib_pd                *pd;
955         struct ib_mr                *mr;
956         int                          ret;
957
958         if (out_len < sizeof resp)
959                 return -ENOSPC;
960
961         if (copy_from_user(&cmd, buf, sizeof cmd))
962                 return -EFAULT;
963
964         INIT_UDATA(&udata, buf + sizeof cmd,
965                    (unsigned long) cmd.response + sizeof resp,
966                    in_len - sizeof cmd, out_len - sizeof resp);
967
968         if ((cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK))
969                 return -EINVAL;
970
971         ret = ib_check_mr_access(cmd.access_flags);
972         if (ret)
973                 return ret;
974
975         uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
976         if (!uobj)
977                 return -ENOMEM;
978
979         init_uobj(uobj, 0, file->ucontext, &mr_lock_class);
980         down_write(&uobj->mutex);
981
982         pd = idr_read_pd(cmd.pd_handle, file->ucontext);
983         if (!pd) {
984                 ret = -EINVAL;
985                 goto err_free;
986         }
987
988         if (cmd.access_flags & IB_ACCESS_ON_DEMAND) {
989                 struct ib_device_attr attr;
990
991                 ret = ib_query_device(pd->device, &attr);
992                 if (ret || !(attr.device_cap_flags &
993                                 IB_DEVICE_ON_DEMAND_PAGING)) {
994                         pr_debug("ODP support not available\n");
995                         ret = -EINVAL;
996                         goto err_put;
997                 }
998         }
999
1000         mr = pd->device->reg_user_mr(pd, cmd.start, cmd.length, cmd.hca_va,
1001                                      cmd.access_flags, &udata);
1002         if (IS_ERR(mr)) {
1003                 ret = PTR_ERR(mr);
1004                 goto err_put;
1005         }
1006
1007         mr->device  = pd->device;
1008         mr->pd      = pd;
1009         mr->uobject = uobj;
1010         atomic_inc(&pd->usecnt);
1011         atomic_set(&mr->usecnt, 0);
1012
1013         uobj->object = mr;
1014         ret = idr_add_uobj(&ib_uverbs_mr_idr, uobj);
1015         if (ret)
1016                 goto err_unreg;
1017
1018         memset(&resp, 0, sizeof resp);
1019         resp.lkey      = mr->lkey;
1020         resp.rkey      = mr->rkey;
1021         resp.mr_handle = uobj->id;
1022
1023         if (copy_to_user((void __user *) (unsigned long) cmd.response,
1024                          &resp, sizeof resp)) {
1025                 ret = -EFAULT;
1026                 goto err_copy;
1027         }
1028
1029         put_pd_read(pd);
1030
1031         mutex_lock(&file->mutex);
1032         list_add_tail(&uobj->list, &file->ucontext->mr_list);
1033         mutex_unlock(&file->mutex);
1034
1035         uobj->live = 1;
1036
1037         up_write(&uobj->mutex);
1038
1039         return in_len;
1040
1041 err_copy:
1042         idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
1043
1044 err_unreg:
1045         ib_dereg_mr(mr);
1046
1047 err_put:
1048         put_pd_read(pd);
1049
1050 err_free:
1051         put_uobj_write(uobj);
1052         return ret;
1053 }
1054
1055 ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
1056                            struct ib_device *ib_dev,
1057                            const char __user *buf, int in_len,
1058                            int out_len)
1059 {
1060         struct ib_uverbs_rereg_mr      cmd;
1061         struct ib_uverbs_rereg_mr_resp resp;
1062         struct ib_udata              udata;
1063         struct ib_pd                *pd = NULL;
1064         struct ib_mr                *mr;
1065         struct ib_pd                *old_pd;
1066         int                          ret;
1067         struct ib_uobject           *uobj;
1068
1069         if (out_len < sizeof(resp))
1070                 return -ENOSPC;
1071
1072         if (copy_from_user(&cmd, buf, sizeof(cmd)))
1073                 return -EFAULT;
1074
1075         INIT_UDATA(&udata, buf + sizeof(cmd),
1076                    (unsigned long) cmd.response + sizeof(resp),
1077                    in_len - sizeof(cmd), out_len - sizeof(resp));
1078
1079         if (cmd.flags & ~IB_MR_REREG_SUPPORTED || !cmd.flags)
1080                 return -EINVAL;
1081
1082         if ((cmd.flags & IB_MR_REREG_TRANS) &&
1083             (!cmd.start || !cmd.hca_va || 0 >= cmd.length ||
1084              (cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK)))
1085                         return -EINVAL;
1086
1087         uobj = idr_write_uobj(&ib_uverbs_mr_idr, cmd.mr_handle,
1088                               file->ucontext);
1089
1090         if (!uobj)
1091                 return -EINVAL;
1092
1093         mr = uobj->object;
1094
1095         if (cmd.flags & IB_MR_REREG_ACCESS) {
1096                 ret = ib_check_mr_access(cmd.access_flags);
1097                 if (ret)
1098                         goto put_uobjs;
1099         }
1100
1101         if (cmd.flags & IB_MR_REREG_PD) {
1102                 pd = idr_read_pd(cmd.pd_handle, file->ucontext);
1103                 if (!pd) {
1104                         ret = -EINVAL;
1105                         goto put_uobjs;
1106                 }
1107         }
1108
1109         if (atomic_read(&mr->usecnt)) {
1110                 ret = -EBUSY;
1111                 goto put_uobj_pd;
1112         }
1113
1114         old_pd = mr->pd;
1115         ret = mr->device->rereg_user_mr(mr, cmd.flags, cmd.start,
1116                                         cmd.length, cmd.hca_va,
1117                                         cmd.access_flags, pd, &udata);
1118         if (!ret) {
1119                 if (cmd.flags & IB_MR_REREG_PD) {
1120                         atomic_inc(&pd->usecnt);
1121                         mr->pd = pd;
1122                         atomic_dec(&old_pd->usecnt);
1123                 }
1124         } else {
1125                 goto put_uobj_pd;
1126         }
1127
1128         memset(&resp, 0, sizeof(resp));
1129         resp.lkey      = mr->lkey;
1130         resp.rkey      = mr->rkey;
1131
1132         if (copy_to_user((void __user *)(unsigned long)cmd.response,
1133                          &resp, sizeof(resp)))
1134                 ret = -EFAULT;
1135         else
1136                 ret = in_len;
1137
1138 put_uobj_pd:
1139         if (cmd.flags & IB_MR_REREG_PD)
1140                 put_pd_read(pd);
1141
1142 put_uobjs:
1143
1144         put_uobj_write(mr->uobject);
1145
1146         return ret;
1147 }
1148
1149 ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
1150                            struct ib_device *ib_dev,
1151                            const char __user *buf, int in_len,
1152                            int out_len)
1153 {
1154         struct ib_uverbs_dereg_mr cmd;
1155         struct ib_mr             *mr;
1156         struct ib_uobject        *uobj;
1157         int                       ret = -EINVAL;
1158
1159         if (copy_from_user(&cmd, buf, sizeof cmd))
1160                 return -EFAULT;
1161
1162         uobj = idr_write_uobj(&ib_uverbs_mr_idr, cmd.mr_handle, file->ucontext);
1163         if (!uobj)
1164                 return -EINVAL;
1165
1166         mr = uobj->object;
1167
1168         ret = ib_dereg_mr(mr);
1169         if (!ret)
1170                 uobj->live = 0;
1171
1172         put_uobj_write(uobj);
1173
1174         if (ret)
1175                 return ret;
1176
1177         idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
1178
1179         mutex_lock(&file->mutex);
1180         list_del(&uobj->list);
1181         mutex_unlock(&file->mutex);
1182
1183         put_uobj(uobj);
1184
1185         return in_len;
1186 }
1187
1188 ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
1189                            struct ib_device *ib_dev,
1190                            const char __user *buf, int in_len,
1191                            int out_len)
1192 {
1193         struct ib_uverbs_alloc_mw      cmd;
1194         struct ib_uverbs_alloc_mw_resp resp;
1195         struct ib_uobject             *uobj;
1196         struct ib_pd                  *pd;
1197         struct ib_mw                  *mw;
1198         int                            ret;
1199
1200         if (out_len < sizeof(resp))
1201                 return -ENOSPC;
1202
1203         if (copy_from_user(&cmd, buf, sizeof(cmd)))
1204                 return -EFAULT;
1205
1206         uobj = kmalloc(sizeof(*uobj), GFP_KERNEL);
1207         if (!uobj)
1208                 return -ENOMEM;
1209
1210         init_uobj(uobj, 0, file->ucontext, &mw_lock_class);
1211         down_write(&uobj->mutex);
1212
1213         pd = idr_read_pd(cmd.pd_handle, file->ucontext);
1214         if (!pd) {
1215                 ret = -EINVAL;
1216                 goto err_free;
1217         }
1218
1219         mw = pd->device->alloc_mw(pd, cmd.mw_type);
1220         if (IS_ERR(mw)) {
1221                 ret = PTR_ERR(mw);
1222                 goto err_put;
1223         }
1224
1225         mw->device  = pd->device;
1226         mw->pd      = pd;
1227         mw->uobject = uobj;
1228         atomic_inc(&pd->usecnt);
1229
1230         uobj->object = mw;
1231         ret = idr_add_uobj(&ib_uverbs_mw_idr, uobj);
1232         if (ret)
1233                 goto err_unalloc;
1234
1235         memset(&resp, 0, sizeof(resp));
1236         resp.rkey      = mw->rkey;
1237         resp.mw_handle = uobj->id;
1238
1239         if (copy_to_user((void __user *)(unsigned long)cmd.response,
1240                          &resp, sizeof(resp))) {
1241                 ret = -EFAULT;
1242                 goto err_copy;
1243         }
1244
1245         put_pd_read(pd);
1246
1247         mutex_lock(&file->mutex);
1248         list_add_tail(&uobj->list, &file->ucontext->mw_list);
1249         mutex_unlock(&file->mutex);
1250
1251         uobj->live = 1;
1252
1253         up_write(&uobj->mutex);
1254
1255         return in_len;
1256
1257 err_copy:
1258         idr_remove_uobj(&ib_uverbs_mw_idr, uobj);
1259
1260 err_unalloc:
1261         ib_dealloc_mw(mw);
1262
1263 err_put:
1264         put_pd_read(pd);
1265
1266 err_free:
1267         put_uobj_write(uobj);
1268         return ret;
1269 }
1270
1271 ssize_t ib_uverbs_dealloc_mw(struct ib_uverbs_file *file,
1272                              struct ib_device *ib_dev,
1273                              const char __user *buf, int in_len,
1274                              int out_len)
1275 {
1276         struct ib_uverbs_dealloc_mw cmd;
1277         struct ib_mw               *mw;
1278         struct ib_uobject          *uobj;
1279         int                         ret = -EINVAL;
1280
1281         if (copy_from_user(&cmd, buf, sizeof(cmd)))
1282                 return -EFAULT;
1283
1284         uobj = idr_write_uobj(&ib_uverbs_mw_idr, cmd.mw_handle, file->ucontext);
1285         if (!uobj)
1286                 return -EINVAL;
1287
1288         mw = uobj->object;
1289
1290         ret = ib_dealloc_mw(mw);
1291         if (!ret)
1292                 uobj->live = 0;
1293
1294         put_uobj_write(uobj);
1295
1296         if (ret)
1297                 return ret;
1298
1299         idr_remove_uobj(&ib_uverbs_mw_idr, uobj);
1300
1301         mutex_lock(&file->mutex);
1302         list_del(&uobj->list);
1303         mutex_unlock(&file->mutex);
1304
1305         put_uobj(uobj);
1306
1307         return in_len;
1308 }
1309
1310 ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
1311                                       struct ib_device *ib_dev,
1312                                       const char __user *buf, int in_len,
1313                                       int out_len)
1314 {
1315         struct ib_uverbs_create_comp_channel       cmd;
1316         struct ib_uverbs_create_comp_channel_resp  resp;
1317         struct file                               *filp;
1318         int ret;
1319
1320         if (out_len < sizeof resp)
1321                 return -ENOSPC;
1322
1323         if (copy_from_user(&cmd, buf, sizeof cmd))
1324                 return -EFAULT;
1325
1326         ret = get_unused_fd_flags(O_CLOEXEC);
1327         if (ret < 0)
1328                 return ret;
1329         resp.fd = ret;
1330
1331         filp = ib_uverbs_alloc_event_file(file, ib_dev, 0);
1332         if (IS_ERR(filp)) {
1333                 put_unused_fd(resp.fd);
1334                 return PTR_ERR(filp);
1335         }
1336
1337         if (copy_to_user((void __user *) (unsigned long) cmd.response,
1338                          &resp, sizeof resp)) {
1339                 put_unused_fd(resp.fd);
1340                 fput(filp);
1341                 return -EFAULT;
1342         }
1343
1344         fd_install(resp.fd, filp);
1345         return in_len;
1346 }
1347
1348 static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
1349                                         struct ib_device *ib_dev,
1350                                        struct ib_udata *ucore,
1351                                        struct ib_udata *uhw,
1352                                        struct ib_uverbs_ex_create_cq *cmd,
1353                                        size_t cmd_sz,
1354                                        int (*cb)(struct ib_uverbs_file *file,
1355                                                  struct ib_ucq_object *obj,
1356                                                  struct ib_uverbs_ex_create_cq_resp *resp,
1357                                                  struct ib_udata *udata,
1358                                                  void *context),
1359                                        void *context)
1360 {
1361         struct ib_ucq_object           *obj;
1362         struct ib_uverbs_event_file    *ev_file = NULL;
1363         struct ib_cq                   *cq;
1364         int                             ret;
1365         struct ib_uverbs_ex_create_cq_resp resp;
1366         struct ib_cq_init_attr attr = {};
1367
1368         if (cmd->comp_vector >= file->device->num_comp_vectors)
1369                 return ERR_PTR(-EINVAL);
1370
1371         obj = kmalloc(sizeof *obj, GFP_KERNEL);
1372         if (!obj)
1373                 return ERR_PTR(-ENOMEM);
1374
1375         init_uobj(&obj->uobject, cmd->user_handle, file->ucontext, &cq_lock_class);
1376         down_write(&obj->uobject.mutex);
1377
1378         if (cmd->comp_channel >= 0) {
1379                 ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel);
1380                 if (!ev_file) {
1381                         ret = -EINVAL;
1382                         goto err;
1383                 }
1384         }
1385
1386         obj->uverbs_file           = file;
1387         obj->comp_events_reported  = 0;
1388         obj->async_events_reported = 0;
1389         INIT_LIST_HEAD(&obj->comp_list);
1390         INIT_LIST_HEAD(&obj->async_list);
1391
1392         attr.cqe = cmd->cqe;
1393         attr.comp_vector = cmd->comp_vector;
1394
1395         if (cmd_sz > offsetof(typeof(*cmd), flags) + sizeof(cmd->flags))
1396                 attr.flags = cmd->flags;
1397
1398         cq = ib_dev->create_cq(ib_dev, &attr,
1399                                              file->ucontext, uhw);
1400         if (IS_ERR(cq)) {
1401                 ret = PTR_ERR(cq);
1402                 goto err_file;
1403         }
1404
1405         cq->device        = ib_dev;
1406         cq->uobject       = &obj->uobject;
1407         cq->comp_handler  = ib_uverbs_comp_handler;
1408         cq->event_handler = ib_uverbs_cq_event_handler;
1409         cq->cq_context    = ev_file;
1410         atomic_set(&cq->usecnt, 0);
1411
1412         obj->uobject.object = cq;
1413         ret = idr_add_uobj(&ib_uverbs_cq_idr, &obj->uobject);
1414         if (ret)
1415                 goto err_free;
1416
1417         memset(&resp, 0, sizeof resp);
1418         resp.base.cq_handle = obj->uobject.id;
1419         resp.base.cqe       = cq->cqe;
1420
1421         resp.response_length = offsetof(typeof(resp), response_length) +
1422                 sizeof(resp.response_length);
1423
1424         ret = cb(file, obj, &resp, ucore, context);
1425         if (ret)
1426                 goto err_cb;
1427
1428         mutex_lock(&file->mutex);
1429         list_add_tail(&obj->uobject.list, &file->ucontext->cq_list);
1430         mutex_unlock(&file->mutex);
1431
1432         obj->uobject.live = 1;
1433
1434         up_write(&obj->uobject.mutex);
1435
1436         return obj;
1437
1438 err_cb:
1439         idr_remove_uobj(&ib_uverbs_cq_idr, &obj->uobject);
1440
1441 err_free:
1442         ib_destroy_cq(cq);
1443
1444 err_file:
1445         if (ev_file)
1446                 ib_uverbs_release_ucq(file, ev_file, obj);
1447
1448 err:
1449         put_uobj_write(&obj->uobject);
1450
1451         return ERR_PTR(ret);
1452 }
1453
1454 static int ib_uverbs_create_cq_cb(struct ib_uverbs_file *file,
1455                                   struct ib_ucq_object *obj,
1456                                   struct ib_uverbs_ex_create_cq_resp *resp,
1457                                   struct ib_udata *ucore, void *context)
1458 {
1459         if (ib_copy_to_udata(ucore, &resp->base, sizeof(resp->base)))
1460                 return -EFAULT;
1461
1462         return 0;
1463 }
1464
1465 ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
1466                             struct ib_device *ib_dev,
1467                             const char __user *buf, int in_len,
1468                             int out_len)
1469 {
1470         struct ib_uverbs_create_cq      cmd;
1471         struct ib_uverbs_ex_create_cq   cmd_ex;
1472         struct ib_uverbs_create_cq_resp resp;
1473         struct ib_udata                 ucore;
1474         struct ib_udata                 uhw;
1475         struct ib_ucq_object           *obj;
1476
1477         if (out_len < sizeof(resp))
1478                 return -ENOSPC;
1479
1480         if (copy_from_user(&cmd, buf, sizeof(cmd)))
1481                 return -EFAULT;
1482
1483         INIT_UDATA(&ucore, buf, (unsigned long)cmd.response, sizeof(cmd), sizeof(resp));
1484
1485         INIT_UDATA(&uhw, buf + sizeof(cmd),
1486                    (unsigned long)cmd.response + sizeof(resp),
1487                    in_len - sizeof(cmd), out_len - sizeof(resp));
1488
1489         memset(&cmd_ex, 0, sizeof(cmd_ex));
1490         cmd_ex.user_handle = cmd.user_handle;
1491         cmd_ex.cqe = cmd.cqe;
1492         cmd_ex.comp_vector = cmd.comp_vector;
1493         cmd_ex.comp_channel = cmd.comp_channel;
1494
1495         obj = create_cq(file, ib_dev, &ucore, &uhw, &cmd_ex,
1496                         offsetof(typeof(cmd_ex), comp_channel) +
1497                         sizeof(cmd.comp_channel), ib_uverbs_create_cq_cb,
1498                         NULL);
1499
1500         if (IS_ERR(obj))
1501                 return PTR_ERR(obj);
1502
1503         return in_len;
1504 }
1505
1506 static int ib_uverbs_ex_create_cq_cb(struct ib_uverbs_file *file,
1507                                      struct ib_ucq_object *obj,
1508                                      struct ib_uverbs_ex_create_cq_resp *resp,
1509                                      struct ib_udata *ucore, void *context)
1510 {
1511         if (ib_copy_to_udata(ucore, resp, resp->response_length))
1512                 return -EFAULT;
1513
1514         return 0;
1515 }
1516
1517 int ib_uverbs_ex_create_cq(struct ib_uverbs_file *file,
1518                          struct ib_device *ib_dev,
1519                            struct ib_udata *ucore,
1520                            struct ib_udata *uhw)
1521 {
1522         struct ib_uverbs_ex_create_cq_resp resp;
1523         struct ib_uverbs_ex_create_cq  cmd;
1524         struct ib_ucq_object           *obj;
1525         int err;
1526
1527         if (ucore->inlen < sizeof(cmd))
1528                 return -EINVAL;
1529
1530         err = ib_copy_from_udata(&cmd, ucore, sizeof(cmd));
1531         if (err)
1532                 return err;
1533
1534         if (cmd.comp_mask)
1535                 return -EINVAL;
1536
1537         if (cmd.reserved)
1538                 return -EINVAL;
1539
1540         if (ucore->outlen < (offsetof(typeof(resp), response_length) +
1541                              sizeof(resp.response_length)))
1542                 return -ENOSPC;
1543
1544         obj = create_cq(file, ib_dev, ucore, uhw, &cmd,
1545                         min(ucore->inlen, sizeof(cmd)),
1546                         ib_uverbs_ex_create_cq_cb, NULL);
1547
1548         if (IS_ERR(obj))
1549                 return PTR_ERR(obj);
1550
1551         return 0;
1552 }
1553
1554 ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file,
1555                             struct ib_device *ib_dev,
1556                             const char __user *buf, int in_len,
1557                             int out_len)
1558 {
1559         struct ib_uverbs_resize_cq      cmd;
1560         struct ib_uverbs_resize_cq_resp resp;
1561         struct ib_udata                 udata;
1562         struct ib_cq                    *cq;
1563         int                             ret = -EINVAL;
1564
1565         if (copy_from_user(&cmd, buf, sizeof cmd))
1566                 return -EFAULT;
1567
1568         INIT_UDATA(&udata, buf + sizeof cmd,
1569                    (unsigned long) cmd.response + sizeof resp,
1570                    in_len - sizeof cmd, out_len - sizeof resp);
1571
1572         cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
1573         if (!cq)
1574                 return -EINVAL;
1575
1576         ret = cq->device->resize_cq(cq, cmd.cqe, &udata);
1577         if (ret)
1578                 goto out;
1579
1580         resp.cqe = cq->cqe;
1581
1582         if (copy_to_user((void __user *) (unsigned long) cmd.response,
1583                          &resp, sizeof resp.cqe))
1584                 ret = -EFAULT;
1585
1586 out:
1587         put_cq_read(cq);
1588
1589         return ret ? ret : in_len;
1590 }
1591
1592 static int copy_wc_to_user(void __user *dest, struct ib_wc *wc)
1593 {
1594         struct ib_uverbs_wc tmp;
1595
1596         tmp.wr_id               = wc->wr_id;
1597         tmp.status              = wc->status;
1598         tmp.opcode              = wc->opcode;
1599         tmp.vendor_err          = wc->vendor_err;
1600         tmp.byte_len            = wc->byte_len;
1601         tmp.ex.imm_data         = (__u32 __force) wc->ex.imm_data;
1602         tmp.qp_num              = wc->qp->qp_num;
1603         tmp.src_qp              = wc->src_qp;
1604         tmp.wc_flags            = wc->wc_flags;
1605         tmp.pkey_index          = wc->pkey_index;
1606         tmp.slid                = wc->slid;
1607         tmp.sl                  = wc->sl;
1608         tmp.dlid_path_bits      = wc->dlid_path_bits;
1609         tmp.port_num            = wc->port_num;
1610         tmp.reserved            = 0;
1611
1612         if (copy_to_user(dest, &tmp, sizeof tmp))
1613                 return -EFAULT;
1614
1615         return 0;
1616 }
1617
1618 ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
1619                           struct ib_device *ib_dev,
1620                           const char __user *buf, int in_len,
1621                           int out_len)
1622 {
1623         struct ib_uverbs_poll_cq       cmd;
1624         struct ib_uverbs_poll_cq_resp  resp;
1625         u8 __user                     *header_ptr;
1626         u8 __user                     *data_ptr;
1627         struct ib_cq                  *cq;
1628         struct ib_wc                   wc;
1629         int                            ret;
1630
1631         if (copy_from_user(&cmd, buf, sizeof cmd))
1632                 return -EFAULT;
1633
1634         cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
1635         if (!cq)
1636                 return -EINVAL;
1637
1638         /* we copy a struct ib_uverbs_poll_cq_resp to user space */
1639         header_ptr = (void __user *)(unsigned long) cmd.response;
1640         data_ptr = header_ptr + sizeof resp;
1641
1642         memset(&resp, 0, sizeof resp);
1643         while (resp.count < cmd.ne) {
1644                 ret = ib_poll_cq(cq, 1, &wc);
1645                 if (ret < 0)
1646                         goto out_put;
1647                 if (!ret)
1648                         break;
1649
1650                 ret = copy_wc_to_user(data_ptr, &wc);
1651                 if (ret)
1652                         goto out_put;
1653
1654                 data_ptr += sizeof(struct ib_uverbs_wc);
1655                 ++resp.count;
1656         }
1657
1658         if (copy_to_user(header_ptr, &resp, sizeof resp)) {
1659                 ret = -EFAULT;
1660                 goto out_put;
1661         }
1662
1663         ret = in_len;
1664
1665 out_put:
1666         put_cq_read(cq);
1667         return ret;
1668 }
1669
1670 ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file,
1671                                 struct ib_device *ib_dev,
1672                                 const char __user *buf, int in_len,
1673                                 int out_len)
1674 {
1675         struct ib_uverbs_req_notify_cq cmd;
1676         struct ib_cq                  *cq;
1677
1678         if (copy_from_user(&cmd, buf, sizeof cmd))
1679                 return -EFAULT;
1680
1681         cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
1682         if (!cq)
1683                 return -EINVAL;
1684
1685         ib_req_notify_cq(cq, cmd.solicited_only ?
1686                          IB_CQ_SOLICITED : IB_CQ_NEXT_COMP);
1687
1688         put_cq_read(cq);
1689
1690         return in_len;
1691 }
1692
1693 ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
1694                              struct ib_device *ib_dev,
1695                              const char __user *buf, int in_len,
1696                              int out_len)
1697 {
1698         struct ib_uverbs_destroy_cq      cmd;
1699         struct ib_uverbs_destroy_cq_resp resp;
1700         struct ib_uobject               *uobj;
1701         struct ib_cq                    *cq;
1702         struct ib_ucq_object            *obj;
1703         struct ib_uverbs_event_file     *ev_file;
1704         int                              ret = -EINVAL;
1705
1706         if (copy_from_user(&cmd, buf, sizeof cmd))
1707                 return -EFAULT;
1708
1709         uobj = idr_write_uobj(&ib_uverbs_cq_idr, cmd.cq_handle, file->ucontext);
1710         if (!uobj)
1711                 return -EINVAL;
1712         cq      = uobj->object;
1713         ev_file = cq->cq_context;
1714         obj     = container_of(cq->uobject, struct ib_ucq_object, uobject);
1715
1716         ret = ib_destroy_cq(cq);
1717         if (!ret)
1718                 uobj->live = 0;
1719
1720         put_uobj_write(uobj);
1721
1722         if (ret)
1723                 return ret;
1724
1725         idr_remove_uobj(&ib_uverbs_cq_idr, uobj);
1726
1727         mutex_lock(&file->mutex);
1728         list_del(&uobj->list);
1729         mutex_unlock(&file->mutex);
1730
1731         ib_uverbs_release_ucq(file, ev_file, obj);
1732
1733         memset(&resp, 0, sizeof resp);
1734         resp.comp_events_reported  = obj->comp_events_reported;
1735         resp.async_events_reported = obj->async_events_reported;
1736
1737         put_uobj(uobj);
1738
1739         if (copy_to_user((void __user *) (unsigned long) cmd.response,
1740                          &resp, sizeof resp))
1741                 return -EFAULT;
1742
1743         return in_len;
1744 }
1745
1746 static int create_qp(struct ib_uverbs_file *file,
1747                      struct ib_udata *ucore,
1748                      struct ib_udata *uhw,
1749                      struct ib_uverbs_ex_create_qp *cmd,
1750                      size_t cmd_sz,
1751                      int (*cb)(struct ib_uverbs_file *file,
1752                                struct ib_uverbs_ex_create_qp_resp *resp,
1753                                struct ib_udata *udata),
1754                      void *context)
1755 {
1756         struct ib_uqp_object            *obj;
1757         struct ib_device                *device;
1758         struct ib_pd                    *pd = NULL;
1759         struct ib_xrcd                  *xrcd = NULL;
1760         struct ib_uobject               *uninitialized_var(xrcd_uobj);
1761         struct ib_cq                    *scq = NULL, *rcq = NULL;
1762         struct ib_srq                   *srq = NULL;
1763         struct ib_qp                    *qp;
1764         char                            *buf;
1765         struct ib_qp_init_attr          attr;
1766         struct ib_uverbs_ex_create_qp_resp resp;
1767         int                             ret;
1768
1769         if (cmd->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
1770                 return -EPERM;
1771
1772         obj = kzalloc(sizeof *obj, GFP_KERNEL);
1773         if (!obj)
1774                 return -ENOMEM;
1775
1776         init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext,
1777                   &qp_lock_class);
1778         down_write(&obj->uevent.uobject.mutex);
1779
1780         if (cmd->qp_type == IB_QPT_XRC_TGT) {
1781                 xrcd = idr_read_xrcd(cmd->pd_handle, file->ucontext,
1782                                      &xrcd_uobj);
1783                 if (!xrcd) {
1784                         ret = -EINVAL;
1785                         goto err_put;
1786                 }
1787                 device = xrcd->device;
1788         } else {
1789                 if (cmd->qp_type == IB_QPT_XRC_INI) {
1790                         cmd->max_recv_wr = 0;
1791                         cmd->max_recv_sge = 0;
1792                 } else {
1793                         if (cmd->is_srq) {
1794                                 srq = idr_read_srq(cmd->srq_handle,
1795                                                    file->ucontext);
1796                                 if (!srq || srq->srq_type != IB_SRQT_BASIC) {
1797                                         ret = -EINVAL;
1798                                         goto err_put;
1799                                 }
1800                         }
1801
1802                         if (cmd->recv_cq_handle != cmd->send_cq_handle) {
1803                                 rcq = idr_read_cq(cmd->recv_cq_handle,
1804                                                   file->ucontext, 0);
1805                                 if (!rcq) {
1806                                         ret = -EINVAL;
1807                                         goto err_put;
1808                                 }
1809                         }
1810                 }
1811
1812                 scq = idr_read_cq(cmd->send_cq_handle, file->ucontext, !!rcq);
1813                 rcq = rcq ?: scq;
1814                 pd  = idr_read_pd(cmd->pd_handle, file->ucontext);
1815                 if (!pd || !scq) {
1816                         ret = -EINVAL;
1817                         goto err_put;
1818                 }
1819
1820                 device = pd->device;
1821         }
1822
1823         attr.event_handler = ib_uverbs_qp_event_handler;
1824         attr.qp_context    = file;
1825         attr.send_cq       = scq;
1826         attr.recv_cq       = rcq;
1827         attr.srq           = srq;
1828         attr.xrcd          = xrcd;
1829         attr.sq_sig_type   = cmd->sq_sig_all ? IB_SIGNAL_ALL_WR :
1830                                               IB_SIGNAL_REQ_WR;
1831         attr.qp_type       = cmd->qp_type;
1832         attr.create_flags  = 0;
1833
1834         attr.cap.max_send_wr     = cmd->max_send_wr;
1835         attr.cap.max_recv_wr     = cmd->max_recv_wr;
1836         attr.cap.max_send_sge    = cmd->max_send_sge;
1837         attr.cap.max_recv_sge    = cmd->max_recv_sge;
1838         attr.cap.max_inline_data = cmd->max_inline_data;
1839
1840         obj->uevent.events_reported     = 0;
1841         INIT_LIST_HEAD(&obj->uevent.event_list);
1842         INIT_LIST_HEAD(&obj->mcast_list);
1843
1844         if (cmd_sz >= offsetof(typeof(*cmd), create_flags) +
1845                       sizeof(cmd->create_flags))
1846                 attr.create_flags = cmd->create_flags;
1847
1848         if (attr.create_flags & ~IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) {
1849                 ret = -EINVAL;
1850                 goto err_put;
1851         }
1852
1853         buf = (void *)cmd + sizeof(*cmd);
1854         if (cmd_sz > sizeof(*cmd))
1855                 if (!(buf[0] == 0 && !memcmp(buf, buf + 1,
1856                                              cmd_sz - sizeof(*cmd) - 1))) {
1857                         ret = -EINVAL;
1858                         goto err_put;
1859                 }
1860
1861         if (cmd->qp_type == IB_QPT_XRC_TGT)
1862                 qp = ib_create_qp(pd, &attr);
1863         else
1864                 qp = device->create_qp(pd, &attr, uhw);
1865
1866         if (IS_ERR(qp)) {
1867                 ret = PTR_ERR(qp);
1868                 goto err_put;
1869         }
1870
1871         if (cmd->qp_type != IB_QPT_XRC_TGT) {
1872                 qp->real_qp       = qp;
1873                 qp->device        = device;
1874                 qp->pd            = pd;
1875                 qp->send_cq       = attr.send_cq;
1876                 qp->recv_cq       = attr.recv_cq;
1877                 qp->srq           = attr.srq;
1878                 qp->event_handler = attr.event_handler;
1879                 qp->qp_context    = attr.qp_context;
1880                 qp->qp_type       = attr.qp_type;
1881                 atomic_set(&qp->usecnt, 0);
1882                 atomic_inc(&pd->usecnt);
1883                 atomic_inc(&attr.send_cq->usecnt);
1884                 if (attr.recv_cq)
1885                         atomic_inc(&attr.recv_cq->usecnt);
1886                 if (attr.srq)
1887                         atomic_inc(&attr.srq->usecnt);
1888         }
1889         qp->uobject = &obj->uevent.uobject;
1890
1891         obj->uevent.uobject.object = qp;
1892         ret = idr_add_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
1893         if (ret)
1894                 goto err_destroy;
1895
1896         memset(&resp, 0, sizeof resp);
1897         resp.base.qpn             = qp->qp_num;
1898         resp.base.qp_handle       = obj->uevent.uobject.id;
1899         resp.base.max_recv_sge    = attr.cap.max_recv_sge;
1900         resp.base.max_send_sge    = attr.cap.max_send_sge;
1901         resp.base.max_recv_wr     = attr.cap.max_recv_wr;
1902         resp.base.max_send_wr     = attr.cap.max_send_wr;
1903         resp.base.max_inline_data = attr.cap.max_inline_data;
1904
1905         resp.response_length = offsetof(typeof(resp), response_length) +
1906                                sizeof(resp.response_length);
1907
1908         ret = cb(file, &resp, ucore);
1909         if (ret)
1910                 goto err_cb;
1911
1912         if (xrcd) {
1913                 obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object,
1914                                           uobject);
1915                 atomic_inc(&obj->uxrcd->refcnt);
1916                 put_xrcd_read(xrcd_uobj);
1917         }
1918
1919         if (pd)
1920                 put_pd_read(pd);
1921         if (scq)
1922                 put_cq_read(scq);
1923         if (rcq && rcq != scq)
1924                 put_cq_read(rcq);
1925         if (srq)
1926                 put_srq_read(srq);
1927
1928         mutex_lock(&file->mutex);
1929         list_add_tail(&obj->uevent.uobject.list, &file->ucontext->qp_list);
1930         mutex_unlock(&file->mutex);
1931
1932         obj->uevent.uobject.live = 1;
1933
1934         up_write(&obj->uevent.uobject.mutex);
1935
1936         return 0;
1937 err_cb:
1938         idr_remove_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
1939
1940 err_destroy:
1941         ib_destroy_qp(qp);
1942
1943 err_put:
1944         if (xrcd)
1945                 put_xrcd_read(xrcd_uobj);
1946         if (pd)
1947                 put_pd_read(pd);
1948         if (scq)
1949                 put_cq_read(scq);
1950         if (rcq && rcq != scq)
1951                 put_cq_read(rcq);
1952         if (srq)
1953                 put_srq_read(srq);
1954
1955         put_uobj_write(&obj->uevent.uobject);
1956         return ret;
1957 }
1958
1959 static int ib_uverbs_create_qp_cb(struct ib_uverbs_file *file,
1960                                   struct ib_uverbs_ex_create_qp_resp *resp,
1961                                   struct ib_udata *ucore)
1962 {
1963         if (ib_copy_to_udata(ucore, &resp->base, sizeof(resp->base)))
1964                 return -EFAULT;
1965
1966         return 0;
1967 }
1968
1969 ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
1970                             struct ib_device *ib_dev,
1971                             const char __user *buf, int in_len,
1972                             int out_len)
1973 {
1974         struct ib_uverbs_create_qp      cmd;
1975         struct ib_uverbs_ex_create_qp   cmd_ex;
1976         struct ib_udata                 ucore;
1977         struct ib_udata                 uhw;
1978         ssize_t resp_size = sizeof(struct ib_uverbs_create_qp_resp);
1979         int                             err;
1980
1981         if (out_len < resp_size)
1982                 return -ENOSPC;
1983
1984         if (copy_from_user(&cmd, buf, sizeof(cmd)))
1985                 return -EFAULT;
1986
1987         INIT_UDATA(&ucore, buf, (unsigned long)cmd.response, sizeof(cmd),
1988                    resp_size);
1989         INIT_UDATA(&uhw, buf + sizeof(cmd),
1990                    (unsigned long)cmd.response + resp_size,
1991                    in_len - sizeof(cmd), out_len - resp_size);
1992
1993         memset(&cmd_ex, 0, sizeof(cmd_ex));
1994         cmd_ex.user_handle = cmd.user_handle;
1995         cmd_ex.pd_handle = cmd.pd_handle;
1996         cmd_ex.send_cq_handle = cmd.send_cq_handle;
1997         cmd_ex.recv_cq_handle = cmd.recv_cq_handle;
1998         cmd_ex.srq_handle = cmd.srq_handle;
1999         cmd_ex.max_send_wr = cmd.max_send_wr;
2000         cmd_ex.max_recv_wr = cmd.max_recv_wr;
2001         cmd_ex.max_send_sge = cmd.max_send_sge;
2002         cmd_ex.max_recv_sge = cmd.max_recv_sge;
2003         cmd_ex.max_inline_data = cmd.max_inline_data;
2004         cmd_ex.sq_sig_all = cmd.sq_sig_all;
2005         cmd_ex.qp_type = cmd.qp_type;
2006         cmd_ex.is_srq = cmd.is_srq;
2007
2008         err = create_qp(file, &ucore, &uhw, &cmd_ex,
2009                         offsetof(typeof(cmd_ex), is_srq) +
2010                         sizeof(cmd.is_srq), ib_uverbs_create_qp_cb,
2011                         NULL);
2012
2013         if (err)
2014                 return err;
2015
2016         return in_len;
2017 }
2018
2019 static int ib_uverbs_ex_create_qp_cb(struct ib_uverbs_file *file,
2020                                      struct ib_uverbs_ex_create_qp_resp *resp,
2021                                      struct ib_udata *ucore)
2022 {
2023         if (ib_copy_to_udata(ucore, resp, resp->response_length))
2024                 return -EFAULT;
2025
2026         return 0;
2027 }
2028
2029 int ib_uverbs_ex_create_qp(struct ib_uverbs_file *file,
2030                            struct ib_device *ib_dev,
2031                            struct ib_udata *ucore,
2032                            struct ib_udata *uhw)
2033 {
2034         struct ib_uverbs_ex_create_qp_resp resp;
2035         struct ib_uverbs_ex_create_qp cmd = {0};
2036         int err;
2037
2038         if (ucore->inlen < (offsetof(typeof(cmd), comp_mask) +
2039                             sizeof(cmd.comp_mask)))
2040                 return -EINVAL;
2041
2042         err = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen));
2043         if (err)
2044                 return err;
2045
2046         if (cmd.comp_mask)
2047                 return -EINVAL;
2048
2049         if (cmd.reserved)
2050                 return -EINVAL;
2051
2052         if (ucore->outlen < (offsetof(typeof(resp), response_length) +
2053                              sizeof(resp.response_length)))
2054                 return -ENOSPC;
2055
2056         err = create_qp(file, ucore, uhw, &cmd,
2057                         min(ucore->inlen, sizeof(cmd)),
2058                         ib_uverbs_ex_create_qp_cb, NULL);
2059
2060         if (err)
2061                 return err;
2062
2063         return 0;
2064 }
2065
2066 ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
2067                           struct ib_device *ib_dev,
2068                           const char __user *buf, int in_len, int out_len)
2069 {
2070         struct ib_uverbs_open_qp        cmd;
2071         struct ib_uverbs_create_qp_resp resp;
2072         struct ib_udata                 udata;
2073         struct ib_uqp_object           *obj;
2074         struct ib_xrcd                 *xrcd;
2075         struct ib_uobject              *uninitialized_var(xrcd_uobj);
2076         struct ib_qp                   *qp;
2077         struct ib_qp_open_attr          attr;
2078         int ret;
2079
2080         if (out_len < sizeof resp)
2081                 return -ENOSPC;
2082
2083         if (copy_from_user(&cmd, buf, sizeof cmd))
2084                 return -EFAULT;
2085
2086         INIT_UDATA(&udata, buf + sizeof cmd,
2087                    (unsigned long) cmd.response + sizeof resp,
2088                    in_len - sizeof cmd, out_len - sizeof resp);
2089
2090         obj = kmalloc(sizeof *obj, GFP_KERNEL);
2091         if (!obj)
2092                 return -ENOMEM;
2093
2094         init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_class);
2095         down_write(&obj->uevent.uobject.mutex);
2096
2097         xrcd = idr_read_xrcd(cmd.pd_handle, file->ucontext, &xrcd_uobj);
2098         if (!xrcd) {
2099                 ret = -EINVAL;
2100                 goto err_put;
2101         }
2102
2103         attr.event_handler = ib_uverbs_qp_event_handler;
2104         attr.qp_context    = file;
2105         attr.qp_num        = cmd.qpn;
2106         attr.qp_type       = cmd.qp_type;
2107
2108         obj->uevent.events_reported = 0;
2109         INIT_LIST_HEAD(&obj->uevent.event_list);
2110         INIT_LIST_HEAD(&obj->mcast_list);
2111
2112         qp = ib_open_qp(xrcd, &attr);
2113         if (IS_ERR(qp)) {
2114                 ret = PTR_ERR(qp);
2115                 goto err_put;
2116         }
2117
2118         qp->uobject = &obj->uevent.uobject;
2119
2120         obj->uevent.uobject.object = qp;
2121         ret = idr_add_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
2122         if (ret)
2123                 goto err_destroy;
2124
2125         memset(&resp, 0, sizeof resp);
2126         resp.qpn       = qp->qp_num;
2127         resp.qp_handle = obj->uevent.uobject.id;
2128
2129         if (copy_to_user((void __user *) (unsigned long) cmd.response,
2130                          &resp, sizeof resp)) {
2131                 ret = -EFAULT;
2132                 goto err_remove;
2133         }
2134
2135         obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject);
2136         atomic_inc(&obj->uxrcd->refcnt);
2137         put_xrcd_read(xrcd_uobj);
2138
2139         mutex_lock(&file->mutex);
2140         list_add_tail(&obj->uevent.uobject.list, &file->ucontext->qp_list);
2141         mutex_unlock(&file->mutex);
2142
2143         obj->uevent.uobject.live = 1;
2144
2145         up_write(&obj->uevent.uobject.mutex);
2146
2147         return in_len;
2148
2149 err_remove:
2150         idr_remove_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
2151
2152 err_destroy:
2153         ib_destroy_qp(qp);
2154
2155 err_put:
2156         put_xrcd_read(xrcd_uobj);
2157         put_uobj_write(&obj->uevent.uobject);
2158         return ret;
2159 }
2160
2161 ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
2162                            struct ib_device *ib_dev,
2163                            const char __user *buf, int in_len,
2164                            int out_len)
2165 {
2166         struct ib_uverbs_query_qp      cmd;
2167         struct ib_uverbs_query_qp_resp resp;
2168         struct ib_qp                   *qp;
2169         struct ib_qp_attr              *attr;
2170         struct ib_qp_init_attr         *init_attr;
2171         int                            ret;
2172
2173         if (copy_from_user(&cmd, buf, sizeof cmd))
2174                 return -EFAULT;
2175
2176         attr      = kmalloc(sizeof *attr, GFP_KERNEL);
2177         init_attr = kmalloc(sizeof *init_attr, GFP_KERNEL);
2178         if (!attr || !init_attr) {
2179                 ret = -ENOMEM;
2180                 goto out;
2181         }
2182
2183         qp = idr_read_qp(cmd.qp_handle, file->ucontext);
2184         if (!qp) {
2185                 ret = -EINVAL;
2186                 goto out;
2187         }
2188
2189         ret = ib_query_qp(qp, attr, cmd.attr_mask, init_attr);
2190
2191         put_qp_read(qp);
2192
2193         if (ret)
2194                 goto out;
2195
2196         memset(&resp, 0, sizeof resp);
2197
2198         resp.qp_state               = attr->qp_state;
2199         resp.cur_qp_state           = attr->cur_qp_state;
2200         resp.path_mtu               = attr->path_mtu;
2201         resp.path_mig_state         = attr->path_mig_state;
2202         resp.qkey                   = attr->qkey;
2203         resp.rq_psn                 = attr->rq_psn;
2204         resp.sq_psn                 = attr->sq_psn;
2205         resp.dest_qp_num            = attr->dest_qp_num;
2206         resp.qp_access_flags        = attr->qp_access_flags;
2207         resp.pkey_index             = attr->pkey_index;
2208         resp.alt_pkey_index         = attr->alt_pkey_index;
2209         resp.sq_draining            = attr->sq_draining;
2210         resp.max_rd_atomic          = attr->max_rd_atomic;
2211         resp.max_dest_rd_atomic     = attr->max_dest_rd_atomic;
2212         resp.min_rnr_timer          = attr->min_rnr_timer;
2213         resp.port_num               = attr->port_num;
2214         resp.timeout                = attr->timeout;
2215         resp.retry_cnt              = attr->retry_cnt;
2216         resp.rnr_retry              = attr->rnr_retry;
2217         resp.alt_port_num           = attr->alt_port_num;
2218         resp.alt_timeout            = attr->alt_timeout;
2219
2220         memcpy(resp.dest.dgid, attr->ah_attr.grh.dgid.raw, 16);
2221         resp.dest.flow_label        = attr->ah_attr.grh.flow_label;
2222         resp.dest.sgid_index        = attr->ah_attr.grh.sgid_index;
2223         resp.dest.hop_limit         = attr->ah_attr.grh.hop_limit;
2224         resp.dest.traffic_class     = attr->ah_attr.grh.traffic_class;
2225         resp.dest.dlid              = attr->ah_attr.dlid;
2226         resp.dest.sl                = attr->ah_attr.sl;
2227         resp.dest.src_path_bits     = attr->ah_attr.src_path_bits;
2228         resp.dest.static_rate       = attr->ah_attr.static_rate;
2229         resp.dest.is_global         = !!(attr->ah_attr.ah_flags & IB_AH_GRH);
2230         resp.dest.port_num          = attr->ah_attr.port_num;
2231
2232         memcpy(resp.alt_dest.dgid, attr->alt_ah_attr.grh.dgid.raw, 16);
2233         resp.alt_dest.flow_label    = attr->alt_ah_attr.grh.flow_label;
2234         resp.alt_dest.sgid_index    = attr->alt_ah_attr.grh.sgid_index;
2235         resp.alt_dest.hop_limit     = attr->alt_ah_attr.grh.hop_limit;
2236         resp.alt_dest.traffic_class = attr->alt_ah_attr.grh.traffic_class;
2237         resp.alt_dest.dlid          = attr->alt_ah_attr.dlid;
2238         resp.alt_dest.sl            = attr->alt_ah_attr.sl;
2239         resp.alt_dest.src_path_bits = attr->alt_ah_attr.src_path_bits;
2240         resp.alt_dest.static_rate   = attr->alt_ah_attr.static_rate;
2241         resp.alt_dest.is_global     = !!(attr->alt_ah_attr.ah_flags & IB_AH_GRH);
2242         resp.alt_dest.port_num      = attr->alt_ah_attr.port_num;
2243
2244         resp.max_send_wr            = init_attr->cap.max_send_wr;
2245         resp.max_recv_wr            = init_attr->cap.max_recv_wr;
2246         resp.max_send_sge           = init_attr->cap.max_send_sge;
2247         resp.max_recv_sge           = init_attr->cap.max_recv_sge;
2248         resp.max_inline_data        = init_attr->cap.max_inline_data;
2249         resp.sq_sig_all             = init_attr->sq_sig_type == IB_SIGNAL_ALL_WR;
2250
2251         if (copy_to_user((void __user *) (unsigned long) cmd.response,
2252                          &resp, sizeof resp))
2253                 ret = -EFAULT;
2254
2255 out:
2256         kfree(attr);
2257         kfree(init_attr);
2258
2259         return ret ? ret : in_len;
2260 }
2261
2262 /* Remove ignored fields set in the attribute mask */
2263 static int modify_qp_mask(enum ib_qp_type qp_type, int mask)
2264 {
2265         switch (qp_type) {
2266         case IB_QPT_XRC_INI:
2267                 return mask & ~(IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER);
2268         case IB_QPT_XRC_TGT:
2269                 return mask & ~(IB_QP_MAX_QP_RD_ATOMIC | IB_QP_RETRY_CNT |
2270                                 IB_QP_RNR_RETRY);
2271         default:
2272                 return mask;
2273         }
2274 }
2275
2276 ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
2277                             struct ib_device *ib_dev,
2278                             const char __user *buf, int in_len,
2279                             int out_len)
2280 {
2281         struct ib_uverbs_modify_qp cmd;
2282         struct ib_udata            udata;
2283         struct ib_qp              *qp;
2284         struct ib_qp_attr         *attr;
2285         int                        ret;
2286
2287         if (copy_from_user(&cmd, buf, sizeof cmd))
2288                 return -EFAULT;
2289
2290         if ((cmd.attr_mask & IB_QP_PORT) &&
2291             (cmd.port_num < rdma_start_port(ib_dev) ||
2292              cmd.port_num > rdma_end_port(ib_dev)))
2293                 return -EINVAL;
2294
2295         INIT_UDATA(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd,
2296                    out_len);
2297
2298         attr = kmalloc(sizeof *attr, GFP_KERNEL);
2299         if (!attr)
2300                 return -ENOMEM;
2301
2302         qp = idr_read_qp(cmd.qp_handle, file->ucontext);
2303         if (!qp) {
2304                 ret = -EINVAL;
2305                 goto out;
2306         }
2307
2308         attr->qp_state            = cmd.qp_state;
2309         attr->cur_qp_state        = cmd.cur_qp_state;
2310         attr->path_mtu            = cmd.path_mtu;
2311         attr->path_mig_state      = cmd.path_mig_state;
2312         attr->qkey                = cmd.qkey;
2313         attr->rq_psn              = cmd.rq_psn;
2314         attr->sq_psn              = cmd.sq_psn;
2315         attr->dest_qp_num         = cmd.dest_qp_num;
2316         attr->qp_access_flags     = cmd.qp_access_flags;
2317         attr->pkey_index          = cmd.pkey_index;
2318         attr->alt_pkey_index      = cmd.alt_pkey_index;
2319         attr->en_sqd_async_notify = cmd.en_sqd_async_notify;
2320         attr->max_rd_atomic       = cmd.max_rd_atomic;
2321         attr->max_dest_rd_atomic  = cmd.max_dest_rd_atomic;
2322         attr->min_rnr_timer       = cmd.min_rnr_timer;
2323         attr->port_num            = cmd.port_num;
2324         attr->timeout             = cmd.timeout;
2325         attr->retry_cnt           = cmd.retry_cnt;
2326         attr->rnr_retry           = cmd.rnr_retry;
2327         attr->alt_port_num        = cmd.alt_port_num;
2328         attr->alt_timeout         = cmd.alt_timeout;
2329
2330         memcpy(attr->ah_attr.grh.dgid.raw, cmd.dest.dgid, 16);
2331         attr->ah_attr.grh.flow_label        = cmd.dest.flow_label;
2332         attr->ah_attr.grh.sgid_index        = cmd.dest.sgid_index;
2333         attr->ah_attr.grh.hop_limit         = cmd.dest.hop_limit;
2334         attr->ah_attr.grh.traffic_class     = cmd.dest.traffic_class;
2335         attr->ah_attr.dlid                  = cmd.dest.dlid;
2336         attr->ah_attr.sl                    = cmd.dest.sl;
2337         attr->ah_attr.src_path_bits         = cmd.dest.src_path_bits;
2338         attr->ah_attr.static_rate           = cmd.dest.static_rate;
2339         attr->ah_attr.ah_flags              = cmd.dest.is_global ? IB_AH_GRH : 0;
2340         attr->ah_attr.port_num              = cmd.dest.port_num;
2341
2342         memcpy(attr->alt_ah_attr.grh.dgid.raw, cmd.alt_dest.dgid, 16);
2343         attr->alt_ah_attr.grh.flow_label    = cmd.alt_dest.flow_label;
2344         attr->alt_ah_attr.grh.sgid_index    = cmd.alt_dest.sgid_index;
2345         attr->alt_ah_attr.grh.hop_limit     = cmd.alt_dest.hop_limit;
2346         attr->alt_ah_attr.grh.traffic_class = cmd.alt_dest.traffic_class;
2347         attr->alt_ah_attr.dlid              = cmd.alt_dest.dlid;
2348         attr->alt_ah_attr.sl                = cmd.alt_dest.sl;
2349         attr->alt_ah_attr.src_path_bits     = cmd.alt_dest.src_path_bits;
2350         attr->alt_ah_attr.static_rate       = cmd.alt_dest.static_rate;
2351         attr->alt_ah_attr.ah_flags          = cmd.alt_dest.is_global ? IB_AH_GRH : 0;
2352         attr->alt_ah_attr.port_num          = cmd.alt_dest.port_num;
2353
2354         if (qp->real_qp == qp) {
2355                 ret = ib_resolve_eth_dmac(qp, attr, &cmd.attr_mask);
2356                 if (ret)
2357                         goto release_qp;
2358                 ret = qp->device->modify_qp(qp, attr,
2359                         modify_qp_mask(qp->qp_type, cmd.attr_mask), &udata);
2360         } else {
2361                 ret = ib_modify_qp(qp, attr, modify_qp_mask(qp->qp_type, cmd.attr_mask));
2362         }
2363
2364         if (ret)
2365                 goto release_qp;
2366
2367         ret = in_len;
2368
2369 release_qp:
2370         put_qp_read(qp);
2371
2372 out:
2373         kfree(attr);
2374
2375         return ret;
2376 }
2377
2378 ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
2379                              struct ib_device *ib_dev,
2380                              const char __user *buf, int in_len,
2381                              int out_len)
2382 {
2383         struct ib_uverbs_destroy_qp      cmd;
2384         struct ib_uverbs_destroy_qp_resp resp;
2385         struct ib_uobject               *uobj;
2386         struct ib_qp                    *qp;
2387         struct ib_uqp_object            *obj;
2388         int                              ret = -EINVAL;
2389
2390         if (copy_from_user(&cmd, buf, sizeof cmd))
2391                 return -EFAULT;
2392
2393         memset(&resp, 0, sizeof resp);
2394
2395         uobj = idr_write_uobj(&ib_uverbs_qp_idr, cmd.qp_handle, file->ucontext);
2396         if (!uobj)
2397                 return -EINVAL;
2398         qp  = uobj->object;
2399         obj = container_of(uobj, struct ib_uqp_object, uevent.uobject);
2400
2401         if (!list_empty(&obj->mcast_list)) {
2402                 put_uobj_write(uobj);
2403                 return -EBUSY;
2404         }
2405
2406         ret = ib_destroy_qp(qp);
2407         if (!ret)
2408                 uobj->live = 0;
2409
2410         put_uobj_write(uobj);
2411
2412         if (ret)
2413                 return ret;
2414
2415         if (obj->uxrcd)
2416                 atomic_dec(&obj->uxrcd->refcnt);
2417
2418         idr_remove_uobj(&ib_uverbs_qp_idr, uobj);
2419
2420         mutex_lock(&file->mutex);
2421         list_del(&uobj->list);
2422         mutex_unlock(&file->mutex);
2423
2424         ib_uverbs_release_uevent(file, &obj->uevent);
2425
2426         resp.events_reported = obj->uevent.events_reported;
2427
2428         put_uobj(uobj);
2429
2430         if (copy_to_user((void __user *) (unsigned long) cmd.response,
2431                          &resp, sizeof resp))
2432                 return -EFAULT;
2433
2434         return in_len;
2435 }
2436
2437 static void *alloc_wr(size_t wr_size, __u32 num_sge)
2438 {
2439         if (num_sge >= (U32_MAX - ALIGN(wr_size, sizeof (struct ib_sge))) /
2440                        sizeof (struct ib_sge))
2441                 return NULL;
2442
2443         return kmalloc(ALIGN(wr_size, sizeof (struct ib_sge)) +
2444                          num_sge * sizeof (struct ib_sge), GFP_KERNEL);
2445 }
2446
2447 ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
2448                             struct ib_device *ib_dev,
2449                             const char __user *buf, int in_len,
2450                             int out_len)
2451 {
2452         struct ib_uverbs_post_send      cmd;
2453         struct ib_uverbs_post_send_resp resp;
2454         struct ib_uverbs_send_wr       *user_wr;
2455         struct ib_send_wr              *wr = NULL, *last, *next, *bad_wr;
2456         struct ib_qp                   *qp;
2457         int                             i, sg_ind;
2458         int                             is_ud;
2459         ssize_t                         ret = -EINVAL;
2460         size_t                          next_size;
2461
2462         if (copy_from_user(&cmd, buf, sizeof cmd))
2463                 return -EFAULT;
2464
2465         if (in_len < sizeof cmd + cmd.wqe_size * cmd.wr_count +
2466             cmd.sge_count * sizeof (struct ib_uverbs_sge))
2467                 return -EINVAL;
2468
2469         if (cmd.wqe_size < sizeof (struct ib_uverbs_send_wr))
2470                 return -EINVAL;
2471
2472         user_wr = kmalloc(cmd.wqe_size, GFP_KERNEL);
2473         if (!user_wr)
2474                 return -ENOMEM;
2475
2476         qp = idr_read_qp(cmd.qp_handle, file->ucontext);
2477         if (!qp)
2478                 goto out;
2479
2480         is_ud = qp->qp_type == IB_QPT_UD;
2481         sg_ind = 0;
2482         last = NULL;
2483         for (i = 0; i < cmd.wr_count; ++i) {
2484                 if (copy_from_user(user_wr,
2485                                    buf + sizeof cmd + i * cmd.wqe_size,
2486                                    cmd.wqe_size)) {
2487                         ret = -EFAULT;
2488                         goto out_put;
2489                 }
2490
2491                 if (user_wr->num_sge + sg_ind > cmd.sge_count) {
2492                         ret = -EINVAL;
2493                         goto out_put;
2494                 }
2495
2496                 if (is_ud) {
2497                         struct ib_ud_wr *ud;
2498
2499                         if (user_wr->opcode != IB_WR_SEND &&
2500                             user_wr->opcode != IB_WR_SEND_WITH_IMM) {
2501                                 ret = -EINVAL;
2502                                 goto out_put;
2503                         }
2504
2505                         next_size = sizeof(*ud);
2506                         ud = alloc_wr(next_size, user_wr->num_sge);
2507                         if (!ud) {
2508                                 ret = -ENOMEM;
2509                                 goto out_put;
2510                         }
2511
2512                         ud->ah = idr_read_ah(user_wr->wr.ud.ah, file->ucontext);
2513                         if (!ud->ah) {
2514                                 kfree(ud);
2515                                 ret = -EINVAL;
2516                                 goto out_put;
2517                         }
2518                         ud->remote_qpn = user_wr->wr.ud.remote_qpn;
2519                         ud->remote_qkey = user_wr->wr.ud.remote_qkey;
2520
2521                         next = &ud->wr;
2522                 } else if (user_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
2523                            user_wr->opcode == IB_WR_RDMA_WRITE ||
2524                            user_wr->opcode == IB_WR_RDMA_READ) {
2525                         struct ib_rdma_wr *rdma;
2526
2527                         next_size = sizeof(*rdma);
2528                         rdma = alloc_wr(next_size, user_wr->num_sge);
2529                         if (!rdma) {
2530                                 ret = -ENOMEM;
2531                                 goto out_put;
2532                         }
2533
2534                         rdma->remote_addr = user_wr->wr.rdma.remote_addr;
2535                         rdma->rkey = user_wr->wr.rdma.rkey;
2536
2537                         next = &rdma->wr;
2538                 } else if (user_wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
2539                            user_wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
2540                         struct ib_atomic_wr *atomic;
2541
2542                         next_size = sizeof(*atomic);
2543                         atomic = alloc_wr(next_size, user_wr->num_sge);
2544                         if (!atomic) {
2545                                 ret = -ENOMEM;
2546                                 goto out_put;
2547                         }
2548
2549                         atomic->remote_addr = user_wr->wr.atomic.remote_addr;
2550                         atomic->compare_add = user_wr->wr.atomic.compare_add;
2551                         atomic->swap = user_wr->wr.atomic.swap;
2552                         atomic->rkey = user_wr->wr.atomic.rkey;
2553
2554                         next = &atomic->wr;
2555                 } else if (user_wr->opcode == IB_WR_SEND ||
2556                            user_wr->opcode == IB_WR_SEND_WITH_IMM ||
2557                            user_wr->opcode == IB_WR_SEND_WITH_INV) {
2558                         next_size = sizeof(*next);
2559                         next = alloc_wr(next_size, user_wr->num_sge);
2560                         if (!next) {
2561                                 ret = -ENOMEM;
2562                                 goto out_put;
2563                         }
2564                 } else {
2565                         ret = -EINVAL;
2566                         goto out_put;
2567                 }
2568
2569                 if (user_wr->opcode == IB_WR_SEND_WITH_IMM ||
2570                     user_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) {
2571                         next->ex.imm_data =
2572                                         (__be32 __force) user_wr->ex.imm_data;
2573                 } else if (user_wr->opcode == IB_WR_SEND_WITH_INV) {
2574                         next->ex.invalidate_rkey = user_wr->ex.invalidate_rkey;
2575                 }
2576
2577                 if (!last)
2578                         wr = next;
2579                 else
2580                         last->next = next;
2581                 last = next;
2582
2583                 next->next       = NULL;
2584                 next->wr_id      = user_wr->wr_id;
2585                 next->num_sge    = user_wr->num_sge;
2586                 next->opcode     = user_wr->opcode;
2587                 next->send_flags = user_wr->send_flags;
2588
2589                 if (next->num_sge) {
2590                         next->sg_list = (void *) next +
2591                                 ALIGN(next_size, sizeof(struct ib_sge));
2592                         if (copy_from_user(next->sg_list,
2593                                            buf + sizeof cmd +
2594                                            cmd.wr_count * cmd.wqe_size +
2595                                            sg_ind * sizeof (struct ib_sge),
2596                                            next->num_sge * sizeof (struct ib_sge))) {
2597                                 ret = -EFAULT;
2598                                 goto out_put;
2599                         }
2600                         sg_ind += next->num_sge;
2601                 } else
2602                         next->sg_list = NULL;
2603         }
2604
2605         resp.bad_wr = 0;
2606         ret = qp->device->post_send(qp->real_qp, wr, &bad_wr);
2607         if (ret)
2608                 for (next = wr; next; next = next->next) {
2609                         ++resp.bad_wr;
2610                         if (next == bad_wr)
2611                                 break;
2612                 }
2613
2614         if (copy_to_user((void __user *) (unsigned long) cmd.response,
2615                          &resp, sizeof resp))
2616                 ret = -EFAULT;
2617
2618 out_put:
2619         put_qp_read(qp);
2620
2621         while (wr) {
2622                 if (is_ud && ud_wr(wr)->ah)
2623                         put_ah_read(ud_wr(wr)->ah);
2624                 next = wr->next;
2625                 kfree(wr);
2626                 wr = next;
2627         }
2628
2629 out:
2630         kfree(user_wr);
2631
2632         return ret ? ret : in_len;
2633 }
2634
2635 static struct ib_recv_wr *ib_uverbs_unmarshall_recv(const char __user *buf,
2636                                                     int in_len,
2637                                                     u32 wr_count,
2638                                                     u32 sge_count,
2639                                                     u32 wqe_size)
2640 {
2641         struct ib_uverbs_recv_wr *user_wr;
2642         struct ib_recv_wr        *wr = NULL, *last, *next;
2643         int                       sg_ind;
2644         int                       i;
2645         int                       ret;
2646
2647         if (in_len < wqe_size * wr_count +
2648             sge_count * sizeof (struct ib_uverbs_sge))
2649                 return ERR_PTR(-EINVAL);
2650
2651         if (wqe_size < sizeof (struct ib_uverbs_recv_wr))
2652                 return ERR_PTR(-EINVAL);
2653
2654         user_wr = kmalloc(wqe_size, GFP_KERNEL);
2655         if (!user_wr)
2656                 return ERR_PTR(-ENOMEM);
2657
2658         sg_ind = 0;
2659         last = NULL;
2660         for (i = 0; i < wr_count; ++i) {
2661                 if (copy_from_user(user_wr, buf + i * wqe_size,
2662                                    wqe_size)) {
2663                         ret = -EFAULT;
2664                         goto err;
2665                 }
2666
2667                 if (user_wr->num_sge + sg_ind > sge_count) {
2668                         ret = -EINVAL;
2669                         goto err;
2670                 }
2671
2672                 if (user_wr->num_sge >=
2673                     (U32_MAX - ALIGN(sizeof *next, sizeof (struct ib_sge))) /
2674                     sizeof (struct ib_sge)) {
2675                         ret = -EINVAL;
2676                         goto err;
2677                 }
2678
2679                 next = kmalloc(ALIGN(sizeof *next, sizeof (struct ib_sge)) +
2680                                user_wr->num_sge * sizeof (struct ib_sge),
2681                                GFP_KERNEL);
2682                 if (!next) {
2683                         ret = -ENOMEM;
2684                         goto err;
2685                 }
2686
2687                 if (!last)
2688                         wr = next;
2689                 else
2690                         last->next = next;
2691                 last = next;
2692
2693                 next->next       = NULL;
2694                 next->wr_id      = user_wr->wr_id;
2695                 next->num_sge    = user_wr->num_sge;
2696
2697                 if (next->num_sge) {
2698                         next->sg_list = (void *) next +
2699                                 ALIGN(sizeof *next, sizeof (struct ib_sge));
2700                         if (copy_from_user(next->sg_list,
2701                                            buf + wr_count * wqe_size +
2702                                            sg_ind * sizeof (struct ib_sge),
2703                                            next->num_sge * sizeof (struct ib_sge))) {
2704                                 ret = -EFAULT;
2705                                 goto err;
2706                         }
2707                         sg_ind += next->num_sge;
2708                 } else
2709                         next->sg_list = NULL;
2710         }
2711
2712         kfree(user_wr);
2713         return wr;
2714
2715 err:
2716         kfree(user_wr);
2717
2718         while (wr) {
2719                 next = wr->next;
2720                 kfree(wr);
2721                 wr = next;
2722         }
2723
2724         return ERR_PTR(ret);
2725 }
2726
2727 ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file,
2728                             struct ib_device *ib_dev,
2729                             const char __user *buf, int in_len,
2730                             int out_len)
2731 {
2732         struct ib_uverbs_post_recv      cmd;
2733         struct ib_uverbs_post_recv_resp resp;
2734         struct ib_recv_wr              *wr, *next, *bad_wr;
2735         struct ib_qp                   *qp;
2736         ssize_t                         ret = -EINVAL;
2737
2738         if (copy_from_user(&cmd, buf, sizeof cmd))
2739                 return -EFAULT;
2740
2741         wr = ib_uverbs_unmarshall_recv(buf + sizeof cmd,
2742                                        in_len - sizeof cmd, cmd.wr_count,
2743                                        cmd.sge_count, cmd.wqe_size);
2744         if (IS_ERR(wr))
2745                 return PTR_ERR(wr);
2746
2747         qp = idr_read_qp(cmd.qp_handle, file->ucontext);
2748         if (!qp)
2749                 goto out;
2750
2751         resp.bad_wr = 0;
2752         ret = qp->device->post_recv(qp->real_qp, wr, &bad_wr);
2753
2754         put_qp_read(qp);
2755
2756         if (ret)
2757                 for (next = wr; next; next = next->next) {
2758                         ++resp.bad_wr;
2759                         if (next == bad_wr)
2760                                 break;
2761                 }
2762
2763         if (copy_to_user((void __user *) (unsigned long) cmd.response,
2764                          &resp, sizeof resp))
2765                 ret = -EFAULT;
2766
2767 out:
2768         while (wr) {
2769                 next = wr->next;
2770                 kfree(wr);
2771                 wr = next;
2772         }
2773
2774         return ret ? ret : in_len;
2775 }
2776
2777 ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file,
2778                                 struct ib_device *ib_dev,
2779                                 const char __user *buf, int in_len,
2780                                 int out_len)
2781 {
2782         struct ib_uverbs_post_srq_recv      cmd;
2783         struct ib_uverbs_post_srq_recv_resp resp;
2784         struct ib_recv_wr                  *wr, *next, *bad_wr;
2785         struct ib_srq                      *srq;
2786         ssize_t                             ret = -EINVAL;
2787
2788         if (copy_from_user(&cmd, buf, sizeof cmd))
2789                 return -EFAULT;
2790
2791         wr = ib_uverbs_unmarshall_recv(buf + sizeof cmd,
2792                                        in_len - sizeof cmd, cmd.wr_count,
2793                                        cmd.sge_count, cmd.wqe_size);
2794         if (IS_ERR(wr))
2795                 return PTR_ERR(wr);
2796
2797         srq = idr_read_srq(cmd.srq_handle, file->ucontext);
2798         if (!srq)
2799                 goto out;
2800
2801         resp.bad_wr = 0;
2802         ret = srq->device->post_srq_recv(srq, wr, &bad_wr);
2803
2804         put_srq_read(srq);
2805
2806         if (ret)
2807                 for (next = wr; next; next = next->next) {
2808                         ++resp.bad_wr;
2809                         if (next == bad_wr)
2810                                 break;
2811                 }
2812
2813         if (copy_to_user((void __user *) (unsigned long) cmd.response,
2814                          &resp, sizeof resp))
2815                 ret = -EFAULT;
2816
2817 out:
2818         while (wr) {
2819                 next = wr->next;
2820                 kfree(wr);
2821                 wr = next;
2822         }
2823
2824         return ret ? ret : in_len;
2825 }
2826
2827 ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
2828                             struct ib_device *ib_dev,
2829                             const char __user *buf, int in_len,
2830                             int out_len)
2831 {
2832         struct ib_uverbs_create_ah       cmd;
2833         struct ib_uverbs_create_ah_resp  resp;
2834         struct ib_uobject               *uobj;
2835         struct ib_pd                    *pd;
2836         struct ib_ah                    *ah;
2837         struct ib_ah_attr               attr;
2838         int ret;
2839
2840         if (out_len < sizeof resp)
2841                 return -ENOSPC;
2842
2843         if (copy_from_user(&cmd, buf, sizeof cmd))
2844                 return -EFAULT;
2845
2846         if (cmd.attr.port_num < rdma_start_port(ib_dev) ||
2847             cmd.attr.port_num > rdma_end_port(ib_dev))
2848                 return -EINVAL;
2849
2850         uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
2851         if (!uobj)
2852                 return -ENOMEM;
2853
2854         init_uobj(uobj, cmd.user_handle, file->ucontext, &ah_lock_class);
2855         down_write(&uobj->mutex);
2856
2857         pd = idr_read_pd(cmd.pd_handle, file->ucontext);
2858         if (!pd) {
2859                 ret = -EINVAL;
2860                 goto err;
2861         }
2862
2863         attr.dlid              = cmd.attr.dlid;
2864         attr.sl                = cmd.attr.sl;
2865         attr.src_path_bits     = cmd.attr.src_path_bits;
2866         attr.static_rate       = cmd.attr.static_rate;
2867         attr.ah_flags          = cmd.attr.is_global ? IB_AH_GRH : 0;
2868         attr.port_num          = cmd.attr.port_num;
2869         attr.grh.flow_label    = cmd.attr.grh.flow_label;
2870         attr.grh.sgid_index    = cmd.attr.grh.sgid_index;
2871         attr.grh.hop_limit     = cmd.attr.grh.hop_limit;
2872         attr.grh.traffic_class = cmd.attr.grh.traffic_class;
2873         memset(&attr.dmac, 0, sizeof(attr.dmac));
2874         memcpy(attr.grh.dgid.raw, cmd.attr.grh.dgid, 16);
2875
2876         ah = ib_create_ah(pd, &attr);
2877         if (IS_ERR(ah)) {
2878                 ret = PTR_ERR(ah);
2879                 goto err_put;
2880         }
2881
2882         ah->uobject  = uobj;
2883         uobj->object = ah;
2884
2885         ret = idr_add_uobj(&ib_uverbs_ah_idr, uobj);
2886         if (ret)
2887                 goto err_destroy;
2888
2889         resp.ah_handle = uobj->id;
2890
2891         if (copy_to_user((void __user *) (unsigned long) cmd.response,
2892                          &resp, sizeof resp)) {
2893                 ret = -EFAULT;
2894                 goto err_copy;
2895         }
2896
2897         put_pd_read(pd);
2898
2899         mutex_lock(&file->mutex);
2900         list_add_tail(&uobj->list, &file->ucontext->ah_list);
2901         mutex_unlock(&file->mutex);
2902
2903         uobj->live = 1;
2904
2905         up_write(&uobj->mutex);
2906
2907         return in_len;
2908
2909 err_copy:
2910         idr_remove_uobj(&ib_uverbs_ah_idr, uobj);
2911
2912 err_destroy:
2913         ib_destroy_ah(ah);
2914
2915 err_put:
2916         put_pd_read(pd);
2917
2918 err:
2919         put_uobj_write(uobj);
2920         return ret;
2921 }
2922
2923 ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file,
2924                              struct ib_device *ib_dev,
2925                              const char __user *buf, int in_len, int out_len)
2926 {
2927         struct ib_uverbs_destroy_ah cmd;
2928         struct ib_ah               *ah;
2929         struct ib_uobject          *uobj;
2930         int                         ret;
2931
2932         if (copy_from_user(&cmd, buf, sizeof cmd))
2933                 return -EFAULT;
2934
2935         uobj = idr_write_uobj(&ib_uverbs_ah_idr, cmd.ah_handle, file->ucontext);
2936         if (!uobj)
2937                 return -EINVAL;
2938         ah = uobj->object;
2939
2940         ret = ib_destroy_ah(ah);
2941         if (!ret)
2942                 uobj->live = 0;
2943
2944         put_uobj_write(uobj);
2945
2946         if (ret)
2947                 return ret;
2948
2949         idr_remove_uobj(&ib_uverbs_ah_idr, uobj);
2950
2951         mutex_lock(&file->mutex);
2952         list_del(&uobj->list);
2953         mutex_unlock(&file->mutex);
2954
2955         put_uobj(uobj);
2956
2957         return in_len;
2958 }
2959
2960 ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,
2961                                struct ib_device *ib_dev,
2962                                const char __user *buf, int in_len,
2963                                int out_len)
2964 {
2965         struct ib_uverbs_attach_mcast cmd;
2966         struct ib_qp                 *qp;
2967         struct ib_uqp_object         *obj;
2968         struct ib_uverbs_mcast_entry *mcast;
2969         int                           ret;
2970
2971         if (copy_from_user(&cmd, buf, sizeof cmd))
2972                 return -EFAULT;
2973
2974         qp = idr_write_qp(cmd.qp_handle, file->ucontext);
2975         if (!qp)
2976                 return -EINVAL;
2977
2978         obj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject);
2979
2980         list_for_each_entry(mcast, &obj->mcast_list, list)
2981                 if (cmd.mlid == mcast->lid &&
2982                     !memcmp(cmd.gid, mcast->gid.raw, sizeof mcast->gid.raw)) {
2983                         ret = 0;
2984                         goto out_put;
2985                 }
2986
2987         mcast = kmalloc(sizeof *mcast, GFP_KERNEL);
2988         if (!mcast) {
2989                 ret = -ENOMEM;
2990                 goto out_put;
2991         }
2992
2993         mcast->lid = cmd.mlid;
2994         memcpy(mcast->gid.raw, cmd.gid, sizeof mcast->gid.raw);
2995
2996         ret = ib_attach_mcast(qp, &mcast->gid, cmd.mlid);
2997         if (!ret)
2998                 list_add_tail(&mcast->list, &obj->mcast_list);
2999         else
3000                 kfree(mcast);
3001
3002 out_put:
3003         put_qp_write(qp);
3004
3005         return ret ? ret : in_len;
3006 }
3007
3008 ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file,
3009                                struct ib_device *ib_dev,
3010                                const char __user *buf, int in_len,
3011                                int out_len)
3012 {
3013         struct ib_uverbs_detach_mcast cmd;
3014         struct ib_uqp_object         *obj;
3015         struct ib_qp                 *qp;
3016         struct ib_uverbs_mcast_entry *mcast;
3017         int                           ret = -EINVAL;
3018
3019         if (copy_from_user(&cmd, buf, sizeof cmd))
3020                 return -EFAULT;
3021
3022         qp = idr_write_qp(cmd.qp_handle, file->ucontext);
3023         if (!qp)
3024                 return -EINVAL;
3025
3026         ret = ib_detach_mcast(qp, (union ib_gid *) cmd.gid, cmd.mlid);
3027         if (ret)
3028                 goto out_put;
3029
3030         obj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject);
3031
3032         list_for_each_entry(mcast, &obj->mcast_list, list)
3033                 if (cmd.mlid == mcast->lid &&
3034                     !memcmp(cmd.gid, mcast->gid.raw, sizeof mcast->gid.raw)) {
3035                         list_del(&mcast->list);
3036                         kfree(mcast);
3037                         break;
3038                 }
3039
3040 out_put:
3041         put_qp_write(qp);
3042
3043         return ret ? ret : in_len;
3044 }
3045
3046 static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec,
3047                                 union ib_flow_spec *ib_spec)
3048 {
3049         if (kern_spec->reserved)
3050                 return -EINVAL;
3051
3052         ib_spec->type = kern_spec->type;
3053
3054         switch (ib_spec->type) {
3055         case IB_FLOW_SPEC_ETH:
3056                 ib_spec->eth.size = sizeof(struct ib_flow_spec_eth);
3057                 if (ib_spec->eth.size != kern_spec->eth.size)
3058                         return -EINVAL;
3059                 memcpy(&ib_spec->eth.val, &kern_spec->eth.val,
3060                        sizeof(struct ib_flow_eth_filter));
3061                 memcpy(&ib_spec->eth.mask, &kern_spec->eth.mask,
3062                        sizeof(struct ib_flow_eth_filter));
3063                 break;
3064         case IB_FLOW_SPEC_IPV4:
3065                 ib_spec->ipv4.size = sizeof(struct ib_flow_spec_ipv4);
3066                 if (ib_spec->ipv4.size != kern_spec->ipv4.size)
3067                         return -EINVAL;
3068                 memcpy(&ib_spec->ipv4.val, &kern_spec->ipv4.val,
3069                        sizeof(struct ib_flow_ipv4_filter));
3070                 memcpy(&ib_spec->ipv4.mask, &kern_spec->ipv4.mask,
3071                        sizeof(struct ib_flow_ipv4_filter));
3072                 break;
3073         case IB_FLOW_SPEC_TCP:
3074         case IB_FLOW_SPEC_UDP:
3075                 ib_spec->tcp_udp.size = sizeof(struct ib_flow_spec_tcp_udp);
3076                 if (ib_spec->tcp_udp.size != kern_spec->tcp_udp.size)
3077                         return -EINVAL;
3078                 memcpy(&ib_spec->tcp_udp.val, &kern_spec->tcp_udp.val,
3079                        sizeof(struct ib_flow_tcp_udp_filter));
3080                 memcpy(&ib_spec->tcp_udp.mask, &kern_spec->tcp_udp.mask,
3081                        sizeof(struct ib_flow_tcp_udp_filter));
3082                 break;
3083         default:
3084                 return -EINVAL;
3085         }
3086         return 0;
3087 }
3088
3089 int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
3090                              struct ib_device *ib_dev,
3091                              struct ib_udata *ucore,
3092                              struct ib_udata *uhw)
3093 {
3094         struct ib_uverbs_create_flow      cmd;
3095         struct ib_uverbs_create_flow_resp resp;
3096         struct ib_uobject                 *uobj;
3097         struct ib_flow                    *flow_id;
3098         struct ib_uverbs_flow_attr        *kern_flow_attr;
3099         struct ib_flow_attr               *flow_attr;
3100         struct ib_qp                      *qp;
3101         int err = 0;
3102         void *kern_spec;
3103         void *ib_spec;
3104         int i;
3105
3106         if (ucore->inlen < sizeof(cmd))
3107                 return -EINVAL;
3108
3109         if (ucore->outlen < sizeof(resp))
3110                 return -ENOSPC;
3111
3112         err = ib_copy_from_udata(&cmd, ucore, sizeof(cmd));
3113         if (err)
3114                 return err;
3115
3116         ucore->inbuf += sizeof(cmd);
3117         ucore->inlen -= sizeof(cmd);
3118
3119         if (cmd.comp_mask)
3120                 return -EINVAL;
3121
3122         if ((cmd.flow_attr.type == IB_FLOW_ATTR_SNIFFER &&
3123              !capable(CAP_NET_ADMIN)) || !capable(CAP_NET_RAW))
3124                 return -EPERM;
3125
3126         if (cmd.flow_attr.num_of_specs > IB_FLOW_SPEC_SUPPORT_LAYERS)
3127                 return -EINVAL;
3128
3129         if (cmd.flow_attr.size > ucore->inlen ||
3130             cmd.flow_attr.size >
3131             (cmd.flow_attr.num_of_specs * sizeof(struct ib_uverbs_flow_spec)))
3132                 return -EINVAL;
3133
3134         if (cmd.flow_attr.reserved[0] ||
3135             cmd.flow_attr.reserved[1])
3136                 return -EINVAL;
3137
3138         if (cmd.flow_attr.num_of_specs) {
3139                 kern_flow_attr = kmalloc(sizeof(*kern_flow_attr) + cmd.flow_attr.size,
3140                                          GFP_KERNEL);
3141                 if (!kern_flow_attr)
3142                         return -ENOMEM;
3143
3144                 memcpy(kern_flow_attr, &cmd.flow_attr, sizeof(*kern_flow_attr));
3145                 err = ib_copy_from_udata(kern_flow_attr + 1, ucore,
3146                                          cmd.flow_attr.size);
3147                 if (err)
3148                         goto err_free_attr;
3149         } else {
3150                 kern_flow_attr = &cmd.flow_attr;
3151         }
3152
3153         uobj = kmalloc(sizeof(*uobj), GFP_KERNEL);
3154         if (!uobj) {
3155                 err = -ENOMEM;
3156                 goto err_free_attr;
3157         }
3158         init_uobj(uobj, 0, file->ucontext, &rule_lock_class);
3159         down_write(&uobj->mutex);
3160
3161         qp = idr_read_qp(cmd.qp_handle, file->ucontext);
3162         if (!qp) {
3163                 err = -EINVAL;
3164                 goto err_uobj;
3165         }
3166
3167         flow_attr = kmalloc(sizeof(*flow_attr) + cmd.flow_attr.size, GFP_KERNEL);
3168         if (!flow_attr) {
3169                 err = -ENOMEM;
3170                 goto err_put;
3171         }
3172
3173         flow_attr->type = kern_flow_attr->type;
3174         flow_attr->priority = kern_flow_attr->priority;
3175         flow_attr->num_of_specs = kern_flow_attr->num_of_specs;
3176         flow_attr->port = kern_flow_attr->port;
3177         flow_attr->flags = kern_flow_attr->flags;
3178         flow_attr->size = sizeof(*flow_attr);
3179
3180         kern_spec = kern_flow_attr + 1;
3181         ib_spec = flow_attr + 1;
3182         for (i = 0; i < flow_attr->num_of_specs &&
3183              cmd.flow_attr.size > offsetof(struct ib_uverbs_flow_spec, reserved) &&
3184              cmd.flow_attr.size >=
3185              ((struct ib_uverbs_flow_spec *)kern_spec)->size; i++) {
3186                 err = kern_spec_to_ib_spec(kern_spec, ib_spec);
3187                 if (err)
3188                         goto err_free;
3189                 flow_attr->size +=
3190                         ((union ib_flow_spec *) ib_spec)->size;
3191                 cmd.flow_attr.size -= ((struct ib_uverbs_flow_spec *)kern_spec)->size;
3192                 kern_spec += ((struct ib_uverbs_flow_spec *) kern_spec)->size;
3193                 ib_spec += ((union ib_flow_spec *) ib_spec)->size;
3194         }
3195         if (cmd.flow_attr.size || (i != flow_attr->num_of_specs)) {
3196                 pr_warn("create flow failed, flow %d: %d bytes left from uverb cmd\n",
3197                         i, cmd.flow_attr.size);
3198                 err = -EINVAL;
3199                 goto err_free;
3200         }
3201         flow_id = ib_create_flow(qp, flow_attr, IB_FLOW_DOMAIN_USER);
3202         if (IS_ERR(flow_id)) {
3203                 err = PTR_ERR(flow_id);
3204                 goto err_free;
3205         }
3206         flow_id->qp = qp;
3207         flow_id->uobject = uobj;
3208         uobj->object = flow_id;
3209
3210         err = idr_add_uobj(&ib_uverbs_rule_idr, uobj);
3211         if (err)
3212                 goto destroy_flow;
3213
3214         memset(&resp, 0, sizeof(resp));
3215         resp.flow_handle = uobj->id;
3216
3217         err = ib_copy_to_udata(ucore,
3218                                &resp, sizeof(resp));
3219         if (err)
3220                 goto err_copy;
3221
3222         put_qp_read(qp);
3223         mutex_lock(&file->mutex);
3224         list_add_tail(&uobj->list, &file->ucontext->rule_list);
3225         mutex_unlock(&file->mutex);
3226
3227         uobj->live = 1;
3228
3229         up_write(&uobj->mutex);
3230         kfree(flow_attr);
3231         if (cmd.flow_attr.num_of_specs)
3232                 kfree(kern_flow_attr);
3233         return 0;
3234 err_copy:
3235         idr_remove_uobj(&ib_uverbs_rule_idr, uobj);
3236 destroy_flow:
3237         ib_destroy_flow(flow_id);
3238 err_free:
3239         kfree(flow_attr);
3240 err_put:
3241         put_qp_read(qp);
3242 err_uobj:
3243         put_uobj_write(uobj);
3244 err_free_attr:
3245         if (cmd.flow_attr.num_of_specs)
3246                 kfree(kern_flow_attr);
3247         return err;
3248 }
3249
3250 int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file,
3251                               struct ib_device *ib_dev,
3252                               struct ib_udata *ucore,
3253                               struct ib_udata *uhw)
3254 {
3255         struct ib_uverbs_destroy_flow   cmd;
3256         struct ib_flow                  *flow_id;
3257         struct ib_uobject               *uobj;
3258         int                             ret;
3259
3260         if (ucore->inlen < sizeof(cmd))
3261                 return -EINVAL;
3262
3263         ret = ib_copy_from_udata(&cmd, ucore, sizeof(cmd));
3264         if (ret)
3265                 return ret;
3266
3267         if (cmd.comp_mask)
3268                 return -EINVAL;
3269
3270         uobj = idr_write_uobj(&ib_uverbs_rule_idr, cmd.flow_handle,
3271                               file->ucontext);
3272         if (!uobj)
3273                 return -EINVAL;
3274         flow_id = uobj->object;
3275
3276         ret = ib_destroy_flow(flow_id);
3277         if (!ret)
3278                 uobj->live = 0;
3279
3280         put_uobj_write(uobj);
3281
3282         idr_remove_uobj(&ib_uverbs_rule_idr, uobj);
3283
3284         mutex_lock(&file->mutex);
3285         list_del(&uobj->list);
3286         mutex_unlock(&file->mutex);
3287
3288         put_uobj(uobj);
3289
3290         return ret;
3291 }
3292
3293 static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
3294                                 struct ib_device *ib_dev,
3295                                 struct ib_uverbs_create_xsrq *cmd,
3296                                 struct ib_udata *udata)
3297 {
3298         struct ib_uverbs_create_srq_resp resp;
3299         struct ib_usrq_object           *obj;
3300         struct ib_pd                    *pd;
3301         struct ib_srq                   *srq;
3302         struct ib_uobject               *uninitialized_var(xrcd_uobj);
3303         struct ib_srq_init_attr          attr;
3304         int ret;
3305
3306         obj = kmalloc(sizeof *obj, GFP_KERNEL);
3307         if (!obj)
3308                 return -ENOMEM;
3309
3310         init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext, &srq_lock_class);
3311         down_write(&obj->uevent.uobject.mutex);
3312
3313         if (cmd->srq_type == IB_SRQT_XRC) {
3314                 attr.ext.xrc.xrcd  = idr_read_xrcd(cmd->xrcd_handle, file->ucontext, &xrcd_uobj);
3315                 if (!attr.ext.xrc.xrcd) {
3316                         ret = -EINVAL;
3317                         goto err;
3318                 }
3319
3320                 obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject);
3321                 atomic_inc(&obj->uxrcd->refcnt);
3322
3323                 attr.ext.xrc.cq  = idr_read_cq(cmd->cq_handle, file->ucontext, 0);
3324                 if (!attr.ext.xrc.cq) {
3325                         ret = -EINVAL;
3326                         goto err_put_xrcd;
3327                 }
3328         }
3329
3330         pd  = idr_read_pd(cmd->pd_handle, file->ucontext);
3331         if (!pd) {
3332                 ret = -EINVAL;
3333                 goto err_put_cq;
3334         }
3335
3336         attr.event_handler  = ib_uverbs_srq_event_handler;
3337         attr.srq_context    = file;
3338         attr.srq_type       = cmd->srq_type;
3339         attr.attr.max_wr    = cmd->max_wr;
3340         attr.attr.max_sge   = cmd->max_sge;
3341         attr.attr.srq_limit = cmd->srq_limit;
3342
3343         obj->uevent.events_reported = 0;
3344         INIT_LIST_HEAD(&obj->uevent.event_list);
3345
3346         srq = pd->device->create_srq(pd, &attr, udata);
3347         if (IS_ERR(srq)) {
3348                 ret = PTR_ERR(srq);
3349                 goto err_put;
3350         }
3351
3352         srq->device        = pd->device;
3353         srq->pd            = pd;
3354         srq->srq_type      = cmd->srq_type;
3355         srq->uobject       = &obj->uevent.uobject;
3356         srq->event_handler = attr.event_handler;
3357         srq->srq_context   = attr.srq_context;
3358
3359         if (cmd->srq_type == IB_SRQT_XRC) {
3360                 srq->ext.xrc.cq   = attr.ext.xrc.cq;
3361                 srq->ext.xrc.xrcd = attr.ext.xrc.xrcd;
3362                 atomic_inc(&attr.ext.xrc.cq->usecnt);
3363                 atomic_inc(&attr.ext.xrc.xrcd->usecnt);
3364         }
3365
3366         atomic_inc(&pd->usecnt);
3367         atomic_set(&srq->usecnt, 0);
3368
3369         obj->uevent.uobject.object = srq;
3370         ret = idr_add_uobj(&ib_uverbs_srq_idr, &obj->uevent.uobject);
3371         if (ret)
3372                 goto err_destroy;
3373
3374         memset(&resp, 0, sizeof resp);
3375         resp.srq_handle = obj->uevent.uobject.id;
3376         resp.max_wr     = attr.attr.max_wr;
3377         resp.max_sge    = attr.attr.max_sge;
3378         if (cmd->srq_type == IB_SRQT_XRC)
3379                 resp.srqn = srq->ext.xrc.srq_num;
3380
3381         if (copy_to_user((void __user *) (unsigned long) cmd->response,
3382                          &resp, sizeof resp)) {
3383                 ret = -EFAULT;
3384                 goto err_copy;
3385         }
3386
3387         if (cmd->srq_type == IB_SRQT_XRC) {
3388                 put_uobj_read(xrcd_uobj);
3389                 put_cq_read(attr.ext.xrc.cq);
3390         }
3391         put_pd_read(pd);
3392
3393         mutex_lock(&file->mutex);
3394         list_add_tail(&obj->uevent.uobject.list, &file->ucontext->srq_list);
3395         mutex_unlock(&file->mutex);
3396
3397         obj->uevent.uobject.live = 1;
3398
3399         up_write(&obj->uevent.uobject.mutex);
3400
3401         return 0;
3402
3403 err_copy:
3404         idr_remove_uobj(&ib_uverbs_srq_idr, &obj->uevent.uobject);
3405
3406 err_destroy:
3407         ib_destroy_srq(srq);
3408
3409 err_put:
3410         put_pd_read(pd);
3411
3412 err_put_cq:
3413         if (cmd->srq_type == IB_SRQT_XRC)
3414                 put_cq_read(attr.ext.xrc.cq);
3415
3416 err_put_xrcd:
3417         if (cmd->srq_type == IB_SRQT_XRC) {
3418                 atomic_dec(&obj->uxrcd->refcnt);
3419                 put_uobj_read(xrcd_uobj);
3420         }
3421
3422 err:
3423         put_uobj_write(&obj->uevent.uobject);
3424         return ret;
3425 }
3426
3427 ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file,
3428                              struct ib_device *ib_dev,
3429                              const char __user *buf, int in_len,
3430                              int out_len)
3431 {
3432         struct ib_uverbs_create_srq      cmd;
3433         struct ib_uverbs_create_xsrq     xcmd;
3434         struct ib_uverbs_create_srq_resp resp;
3435         struct ib_udata                  udata;
3436         int ret;
3437
3438         if (out_len < sizeof resp)
3439                 return -ENOSPC;
3440
3441         if (copy_from_user(&cmd, buf, sizeof cmd))
3442                 return -EFAULT;
3443
3444         xcmd.response    = cmd.response;
3445         xcmd.user_handle = cmd.user_handle;
3446         xcmd.srq_type    = IB_SRQT_BASIC;
3447         xcmd.pd_handle   = cmd.pd_handle;
3448         xcmd.max_wr      = cmd.max_wr;
3449         xcmd.max_sge     = cmd.max_sge;
3450         xcmd.srq_limit   = cmd.srq_limit;
3451
3452         INIT_UDATA(&udata, buf + sizeof cmd,
3453                    (unsigned long) cmd.response + sizeof resp,
3454                    in_len - sizeof cmd, out_len - sizeof resp);
3455
3456         ret = __uverbs_create_xsrq(file, ib_dev, &xcmd, &udata);
3457         if (ret)
3458                 return ret;
3459
3460         return in_len;
3461 }
3462
3463 ssize_t ib_uverbs_create_xsrq(struct ib_uverbs_file *file,
3464                               struct ib_device *ib_dev,
3465                               const char __user *buf, int in_len, int out_len)
3466 {
3467         struct ib_uverbs_create_xsrq     cmd;
3468         struct ib_uverbs_create_srq_resp resp;
3469         struct ib_udata                  udata;
3470         int ret;
3471
3472         if (out_len < sizeof resp)
3473                 return -ENOSPC;
3474
3475         if (copy_from_user(&cmd, buf, sizeof cmd))
3476                 return -EFAULT;
3477
3478         INIT_UDATA(&udata, buf + sizeof cmd,
3479                    (unsigned long) cmd.response + sizeof resp,
3480                    in_len - sizeof cmd, out_len - sizeof resp);
3481
3482         ret = __uverbs_create_xsrq(file, ib_dev, &cmd, &udata);
3483         if (ret)
3484                 return ret;
3485
3486         return in_len;
3487 }
3488
3489 ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file,
3490                              struct ib_device *ib_dev,
3491                              const char __user *buf, int in_len,
3492                              int out_len)
3493 {
3494         struct ib_uverbs_modify_srq cmd;
3495         struct ib_udata             udata;
3496         struct ib_srq              *srq;
3497         struct ib_srq_attr          attr;
3498         int                         ret;
3499
3500         if (copy_from_user(&cmd, buf, sizeof cmd))
3501                 return -EFAULT;
3502
3503         INIT_UDATA(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd,
3504                    out_len);
3505
3506         srq = idr_read_srq(cmd.srq_handle, file->ucontext);
3507         if (!srq)
3508                 return -EINVAL;
3509
3510         attr.max_wr    = cmd.max_wr;
3511         attr.srq_limit = cmd.srq_limit;
3512
3513         ret = srq->device->modify_srq(srq, &attr, cmd.attr_mask, &udata);
3514
3515         put_srq_read(srq);
3516
3517         return ret ? ret : in_len;
3518 }
3519
3520 ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file,
3521                             struct ib_device *ib_dev,
3522                             const char __user *buf,
3523                             int in_len, int out_len)
3524 {
3525         struct ib_uverbs_query_srq      cmd;
3526         struct ib_uverbs_query_srq_resp resp;
3527         struct ib_srq_attr              attr;
3528         struct ib_srq                   *srq;
3529         int                             ret;
3530
3531         if (out_len < sizeof resp)
3532                 return -ENOSPC;
3533
3534         if (copy_from_user(&cmd, buf, sizeof cmd))
3535                 return -EFAULT;
3536
3537         srq = idr_read_srq(cmd.srq_handle, file->ucontext);
3538         if (!srq)
3539                 return -EINVAL;
3540
3541         ret = ib_query_srq(srq, &attr);
3542
3543         put_srq_read(srq);
3544
3545         if (ret)
3546                 return ret;
3547
3548         memset(&resp, 0, sizeof resp);
3549
3550         resp.max_wr    = attr.max_wr;
3551         resp.max_sge   = attr.max_sge;
3552         resp.srq_limit = attr.srq_limit;
3553
3554         if (copy_to_user((void __user *) (unsigned long) cmd.response,
3555                          &resp, sizeof resp))
3556                 return -EFAULT;
3557
3558         return in_len;
3559 }
3560
3561 ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
3562                               struct ib_device *ib_dev,
3563                               const char __user *buf, int in_len,
3564                               int out_len)
3565 {
3566         struct ib_uverbs_destroy_srq      cmd;
3567         struct ib_uverbs_destroy_srq_resp resp;
3568         struct ib_uobject                *uobj;
3569         struct ib_srq                    *srq;
3570         struct ib_uevent_object          *obj;
3571         int                               ret = -EINVAL;
3572         struct ib_usrq_object            *us;
3573         enum ib_srq_type                  srq_type;
3574
3575         if (copy_from_user(&cmd, buf, sizeof cmd))
3576                 return -EFAULT;
3577
3578         uobj = idr_write_uobj(&ib_uverbs_srq_idr, cmd.srq_handle, file->ucontext);
3579         if (!uobj)
3580                 return -EINVAL;
3581         srq = uobj->object;
3582         obj = container_of(uobj, struct ib_uevent_object, uobject);
3583         srq_type = srq->srq_type;
3584
3585         ret = ib_destroy_srq(srq);
3586         if (!ret)
3587                 uobj->live = 0;
3588
3589         put_uobj_write(uobj);
3590
3591         if (ret)
3592                 return ret;
3593
3594         if (srq_type == IB_SRQT_XRC) {
3595                 us = container_of(obj, struct ib_usrq_object, uevent);
3596                 atomic_dec(&us->uxrcd->refcnt);
3597         }
3598
3599         idr_remove_uobj(&ib_uverbs_srq_idr, uobj);
3600
3601         mutex_lock(&file->mutex);
3602         list_del(&uobj->list);
3603         mutex_unlock(&file->mutex);
3604
3605         ib_uverbs_release_uevent(file, obj);
3606
3607         memset(&resp, 0, sizeof resp);
3608         resp.events_reported = obj->events_reported;
3609
3610         put_uobj(uobj);
3611
3612         if (copy_to_user((void __user *) (unsigned long) cmd.response,
3613                          &resp, sizeof resp))
3614                 ret = -EFAULT;
3615
3616         return ret ? ret : in_len;
3617 }
3618
3619 int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
3620                               struct ib_device *ib_dev,
3621                               struct ib_udata *ucore,
3622                               struct ib_udata *uhw)
3623 {
3624         struct ib_uverbs_ex_query_device_resp resp;
3625         struct ib_uverbs_ex_query_device  cmd;
3626         struct ib_device_attr attr;
3627         int err;
3628
3629         if (ucore->inlen < sizeof(cmd))
3630                 return -EINVAL;
3631
3632         err = ib_copy_from_udata(&cmd, ucore, sizeof(cmd));
3633         if (err)
3634                 return err;
3635
3636         if (cmd.comp_mask)
3637                 return -EINVAL;
3638
3639         if (cmd.reserved)
3640                 return -EINVAL;
3641
3642         resp.response_length = offsetof(typeof(resp), odp_caps);
3643
3644         if (ucore->outlen < resp.response_length)
3645                 return -ENOSPC;
3646
3647         memset(&attr, 0, sizeof(attr));
3648
3649         err = ib_dev->query_device(ib_dev, &attr, uhw);
3650         if (err)
3651                 return err;
3652
3653         copy_query_dev_fields(file, ib_dev, &resp.base, &attr);
3654         resp.comp_mask = 0;
3655
3656         if (ucore->outlen < resp.response_length + sizeof(resp.odp_caps))
3657                 goto end;
3658
3659 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
3660         resp.odp_caps.general_caps = attr.odp_caps.general_caps;
3661         resp.odp_caps.per_transport_caps.rc_odp_caps =
3662                 attr.odp_caps.per_transport_caps.rc_odp_caps;
3663         resp.odp_caps.per_transport_caps.uc_odp_caps =
3664                 attr.odp_caps.per_transport_caps.uc_odp_caps;
3665         resp.odp_caps.per_transport_caps.ud_odp_caps =
3666                 attr.odp_caps.per_transport_caps.ud_odp_caps;
3667         resp.odp_caps.reserved = 0;
3668 #else
3669         memset(&resp.odp_caps, 0, sizeof(resp.odp_caps));
3670 #endif
3671         resp.response_length += sizeof(resp.odp_caps);
3672
3673         if (ucore->outlen < resp.response_length + sizeof(resp.timestamp_mask))
3674                 goto end;
3675
3676         resp.timestamp_mask = attr.timestamp_mask;
3677         resp.response_length += sizeof(resp.timestamp_mask);
3678
3679         if (ucore->outlen < resp.response_length + sizeof(resp.hca_core_clock))
3680                 goto end;
3681
3682         resp.hca_core_clock = attr.hca_core_clock;
3683         resp.response_length += sizeof(resp.hca_core_clock);
3684
3685 end:
3686         err = ib_copy_to_udata(ucore, &resp, resp.response_length);
3687         if (err)
3688                 return err;
3689
3690         return 0;
3691 }