GNU Linux-libre 4.19.286-gnu1
[releases.git] / drivers / infiniband / hw / ocrdma / ocrdma_verbs.c
1 /* This file is part of the Emulex RoCE Device Driver for
2  * RoCE (RDMA over Converged Ethernet) adapters.
3  * Copyright (C) 2012-2015 Emulex. All rights reserved.
4  * EMULEX and SLI are trademarks of Emulex.
5  * www.emulex.com
6  *
7  * This software is available to you under a choice of one of two licenses.
8  * You may choose to be licensed under the terms of the GNU General Public
9  * License (GPL) Version 2, available from the file COPYING in the main
10  * directory of this source tree, or the BSD license below:
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  *
16  * - Redistributions of source code must retain the above copyright notice,
17  *   this list of conditions and the following disclaimer.
18  *
19  * - Redistributions in binary form must reproduce the above copyright
20  *   notice, this list of conditions and the following disclaimer in
21  *   the documentation and/or other materials provided with the distribution.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
24  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
27  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
30  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
31  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
32  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
33  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34  *
35  * Contact Information:
36  * linux-drivers@emulex.com
37  *
38  * Emulex
39  * 3333 Susan Street
40  * Costa Mesa, CA 92626
41  */
42
43 #include <linux/dma-mapping.h>
44 #include <rdma/ib_verbs.h>
45 #include <rdma/ib_user_verbs.h>
46 #include <rdma/iw_cm.h>
47 #include <rdma/ib_umem.h>
48 #include <rdma/ib_addr.h>
49 #include <rdma/ib_cache.h>
50
51 #include "ocrdma.h"
52 #include "ocrdma_hw.h"
53 #include "ocrdma_verbs.h"
54 #include <rdma/ocrdma-abi.h>
55
56 int ocrdma_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
57 {
58         if (index > 0)
59                 return -EINVAL;
60
61         *pkey = 0xffff;
62         return 0;
63 }
64
65 int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr,
66                         struct ib_udata *uhw)
67 {
68         struct ocrdma_dev *dev = get_ocrdma_dev(ibdev);
69
70         if (uhw->inlen || uhw->outlen)
71                 return -EINVAL;
72
73         memset(attr, 0, sizeof *attr);
74         memcpy(&attr->fw_ver, &dev->attr.fw_ver[0],
75                min(sizeof(dev->attr.fw_ver), sizeof(attr->fw_ver)));
76         ocrdma_get_guid(dev, (u8 *)&attr->sys_image_guid);
77         attr->max_mr_size = dev->attr.max_mr_size;
78         attr->page_size_cap = 0xffff000;
79         attr->vendor_id = dev->nic_info.pdev->vendor;
80         attr->vendor_part_id = dev->nic_info.pdev->device;
81         attr->hw_ver = dev->asic_id;
82         attr->max_qp = dev->attr.max_qp;
83         attr->max_ah = OCRDMA_MAX_AH;
84         attr->max_qp_wr = dev->attr.max_wqe;
85
86         attr->device_cap_flags = IB_DEVICE_CURR_QP_STATE_MOD |
87                                         IB_DEVICE_RC_RNR_NAK_GEN |
88                                         IB_DEVICE_SHUTDOWN_PORT |
89                                         IB_DEVICE_SYS_IMAGE_GUID |
90                                         IB_DEVICE_LOCAL_DMA_LKEY |
91                                         IB_DEVICE_MEM_MGT_EXTENSIONS;
92         attr->max_send_sge = dev->attr.max_send_sge;
93         attr->max_recv_sge = dev->attr.max_recv_sge;
94         attr->max_sge_rd = dev->attr.max_rdma_sge;
95         attr->max_cq = dev->attr.max_cq;
96         attr->max_cqe = dev->attr.max_cqe;
97         attr->max_mr = dev->attr.max_mr;
98         attr->max_mw = dev->attr.max_mw;
99         attr->max_pd = dev->attr.max_pd;
100         attr->atomic_cap = 0;
101         attr->max_fmr = 0;
102         attr->max_map_per_fmr = 0;
103         attr->max_qp_rd_atom =
104             min(dev->attr.max_ord_per_qp, dev->attr.max_ird_per_qp);
105         attr->max_qp_init_rd_atom = dev->attr.max_ord_per_qp;
106         attr->max_srq = dev->attr.max_srq;
107         attr->max_srq_sge = dev->attr.max_srq_sge;
108         attr->max_srq_wr = dev->attr.max_rqe;
109         attr->local_ca_ack_delay = dev->attr.local_ca_ack_delay;
110         attr->max_fast_reg_page_list_len = dev->attr.max_pages_per_frmr;
111         attr->max_pkeys = 1;
112         return 0;
113 }
114
115 struct net_device *ocrdma_get_netdev(struct ib_device *ibdev, u8 port_num)
116 {
117         struct ocrdma_dev *dev;
118         struct net_device *ndev = NULL;
119
120         rcu_read_lock();
121
122         dev = get_ocrdma_dev(ibdev);
123         if (dev)
124                 ndev = dev->nic_info.netdev;
125         if (ndev)
126                 dev_hold(ndev);
127
128         rcu_read_unlock();
129
130         return ndev;
131 }
132
133 static inline void get_link_speed_and_width(struct ocrdma_dev *dev,
134                                             u8 *ib_speed, u8 *ib_width)
135 {
136         int status;
137         u8 speed;
138
139         status = ocrdma_mbx_get_link_speed(dev, &speed, NULL);
140         if (status)
141                 speed = OCRDMA_PHYS_LINK_SPEED_ZERO;
142
143         switch (speed) {
144         case OCRDMA_PHYS_LINK_SPEED_1GBPS:
145                 *ib_speed = IB_SPEED_SDR;
146                 *ib_width = IB_WIDTH_1X;
147                 break;
148
149         case OCRDMA_PHYS_LINK_SPEED_10GBPS:
150                 *ib_speed = IB_SPEED_QDR;
151                 *ib_width = IB_WIDTH_1X;
152                 break;
153
154         case OCRDMA_PHYS_LINK_SPEED_20GBPS:
155                 *ib_speed = IB_SPEED_DDR;
156                 *ib_width = IB_WIDTH_4X;
157                 break;
158
159         case OCRDMA_PHYS_LINK_SPEED_40GBPS:
160                 *ib_speed = IB_SPEED_QDR;
161                 *ib_width = IB_WIDTH_4X;
162                 break;
163
164         default:
165                 /* Unsupported */
166                 *ib_speed = IB_SPEED_SDR;
167                 *ib_width = IB_WIDTH_1X;
168         }
169 }
170
171 int ocrdma_query_port(struct ib_device *ibdev,
172                       u8 port, struct ib_port_attr *props)
173 {
174         enum ib_port_state port_state;
175         struct ocrdma_dev *dev;
176         struct net_device *netdev;
177
178         /* props being zeroed by the caller, avoid zeroing it here */
179         dev = get_ocrdma_dev(ibdev);
180         if (port > 1) {
181                 pr_err("%s(%d) invalid_port=0x%x\n", __func__,
182                        dev->id, port);
183                 return -EINVAL;
184         }
185         netdev = dev->nic_info.netdev;
186         if (netif_running(netdev) && netif_oper_up(netdev)) {
187                 port_state = IB_PORT_ACTIVE;
188                 props->phys_state = 5;
189         } else {
190                 port_state = IB_PORT_DOWN;
191                 props->phys_state = 3;
192         }
193         props->max_mtu = IB_MTU_4096;
194         props->active_mtu = iboe_get_mtu(netdev->mtu);
195         props->lid = 0;
196         props->lmc = 0;
197         props->sm_lid = 0;
198         props->sm_sl = 0;
199         props->state = port_state;
200         props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_REINIT_SUP |
201                                 IB_PORT_DEVICE_MGMT_SUP |
202                                 IB_PORT_VENDOR_CLASS_SUP;
203         props->ip_gids = true;
204         props->gid_tbl_len = OCRDMA_MAX_SGID;
205         props->pkey_tbl_len = 1;
206         props->bad_pkey_cntr = 0;
207         props->qkey_viol_cntr = 0;
208         get_link_speed_and_width(dev, &props->active_speed,
209                                  &props->active_width);
210         props->max_msg_sz = 0x80000000;
211         props->max_vl_num = 4;
212         return 0;
213 }
214
215 int ocrdma_modify_port(struct ib_device *ibdev, u8 port, int mask,
216                        struct ib_port_modify *props)
217 {
218         struct ocrdma_dev *dev;
219
220         dev = get_ocrdma_dev(ibdev);
221         if (port > 1) {
222                 pr_err("%s(%d) invalid_port=0x%x\n", __func__, dev->id, port);
223                 return -EINVAL;
224         }
225         return 0;
226 }
227
228 static int ocrdma_add_mmap(struct ocrdma_ucontext *uctx, u64 phy_addr,
229                            unsigned long len)
230 {
231         struct ocrdma_mm *mm;
232
233         mm = kzalloc(sizeof(*mm), GFP_KERNEL);
234         if (mm == NULL)
235                 return -ENOMEM;
236         mm->key.phy_addr = phy_addr;
237         mm->key.len = len;
238         INIT_LIST_HEAD(&mm->entry);
239
240         mutex_lock(&uctx->mm_list_lock);
241         list_add_tail(&mm->entry, &uctx->mm_head);
242         mutex_unlock(&uctx->mm_list_lock);
243         return 0;
244 }
245
246 static void ocrdma_del_mmap(struct ocrdma_ucontext *uctx, u64 phy_addr,
247                             unsigned long len)
248 {
249         struct ocrdma_mm *mm, *tmp;
250
251         mutex_lock(&uctx->mm_list_lock);
252         list_for_each_entry_safe(mm, tmp, &uctx->mm_head, entry) {
253                 if (len != mm->key.len && phy_addr != mm->key.phy_addr)
254                         continue;
255
256                 list_del(&mm->entry);
257                 kfree(mm);
258                 break;
259         }
260         mutex_unlock(&uctx->mm_list_lock);
261 }
262
263 static bool ocrdma_search_mmap(struct ocrdma_ucontext *uctx, u64 phy_addr,
264                               unsigned long len)
265 {
266         bool found = false;
267         struct ocrdma_mm *mm;
268
269         mutex_lock(&uctx->mm_list_lock);
270         list_for_each_entry(mm, &uctx->mm_head, entry) {
271                 if (len != mm->key.len && phy_addr != mm->key.phy_addr)
272                         continue;
273
274                 found = true;
275                 break;
276         }
277         mutex_unlock(&uctx->mm_list_lock);
278         return found;
279 }
280
281
282 static u16 _ocrdma_pd_mgr_get_bitmap(struct ocrdma_dev *dev, bool dpp_pool)
283 {
284         u16 pd_bitmap_idx = 0;
285         const unsigned long *pd_bitmap;
286
287         if (dpp_pool) {
288                 pd_bitmap = dev->pd_mgr->pd_dpp_bitmap;
289                 pd_bitmap_idx = find_first_zero_bit(pd_bitmap,
290                                                     dev->pd_mgr->max_dpp_pd);
291                 __set_bit(pd_bitmap_idx, dev->pd_mgr->pd_dpp_bitmap);
292                 dev->pd_mgr->pd_dpp_count++;
293                 if (dev->pd_mgr->pd_dpp_count > dev->pd_mgr->pd_dpp_thrsh)
294                         dev->pd_mgr->pd_dpp_thrsh = dev->pd_mgr->pd_dpp_count;
295         } else {
296                 pd_bitmap = dev->pd_mgr->pd_norm_bitmap;
297                 pd_bitmap_idx = find_first_zero_bit(pd_bitmap,
298                                                     dev->pd_mgr->max_normal_pd);
299                 __set_bit(pd_bitmap_idx, dev->pd_mgr->pd_norm_bitmap);
300                 dev->pd_mgr->pd_norm_count++;
301                 if (dev->pd_mgr->pd_norm_count > dev->pd_mgr->pd_norm_thrsh)
302                         dev->pd_mgr->pd_norm_thrsh = dev->pd_mgr->pd_norm_count;
303         }
304         return pd_bitmap_idx;
305 }
306
307 static int _ocrdma_pd_mgr_put_bitmap(struct ocrdma_dev *dev, u16 pd_id,
308                                         bool dpp_pool)
309 {
310         u16 pd_count;
311         u16 pd_bit_index;
312
313         pd_count = dpp_pool ? dev->pd_mgr->pd_dpp_count :
314                               dev->pd_mgr->pd_norm_count;
315         if (pd_count == 0)
316                 return -EINVAL;
317
318         if (dpp_pool) {
319                 pd_bit_index = pd_id - dev->pd_mgr->pd_dpp_start;
320                 if (pd_bit_index >= dev->pd_mgr->max_dpp_pd) {
321                         return -EINVAL;
322                 } else {
323                         __clear_bit(pd_bit_index, dev->pd_mgr->pd_dpp_bitmap);
324                         dev->pd_mgr->pd_dpp_count--;
325                 }
326         } else {
327                 pd_bit_index = pd_id - dev->pd_mgr->pd_norm_start;
328                 if (pd_bit_index >= dev->pd_mgr->max_normal_pd) {
329                         return -EINVAL;
330                 } else {
331                         __clear_bit(pd_bit_index, dev->pd_mgr->pd_norm_bitmap);
332                         dev->pd_mgr->pd_norm_count--;
333                 }
334         }
335
336         return 0;
337 }
338
339 static int ocrdma_put_pd_num(struct ocrdma_dev *dev, u16 pd_id,
340                                    bool dpp_pool)
341 {
342         int status;
343
344         mutex_lock(&dev->dev_lock);
345         status = _ocrdma_pd_mgr_put_bitmap(dev, pd_id, dpp_pool);
346         mutex_unlock(&dev->dev_lock);
347         return status;
348 }
349
350 static int ocrdma_get_pd_num(struct ocrdma_dev *dev, struct ocrdma_pd *pd)
351 {
352         u16 pd_idx = 0;
353         int status = 0;
354
355         mutex_lock(&dev->dev_lock);
356         if (pd->dpp_enabled) {
357                 /* try allocating DPP PD, if not available then normal PD */
358                 if (dev->pd_mgr->pd_dpp_count < dev->pd_mgr->max_dpp_pd) {
359                         pd_idx = _ocrdma_pd_mgr_get_bitmap(dev, true);
360                         pd->id = dev->pd_mgr->pd_dpp_start + pd_idx;
361                         pd->dpp_page = dev->pd_mgr->dpp_page_index + pd_idx;
362                 } else if (dev->pd_mgr->pd_norm_count <
363                            dev->pd_mgr->max_normal_pd) {
364                         pd_idx = _ocrdma_pd_mgr_get_bitmap(dev, false);
365                         pd->id = dev->pd_mgr->pd_norm_start + pd_idx;
366                         pd->dpp_enabled = false;
367                 } else {
368                         status = -EINVAL;
369                 }
370         } else {
371                 if (dev->pd_mgr->pd_norm_count < dev->pd_mgr->max_normal_pd) {
372                         pd_idx = _ocrdma_pd_mgr_get_bitmap(dev, false);
373                         pd->id = dev->pd_mgr->pd_norm_start + pd_idx;
374                 } else {
375                         status = -EINVAL;
376                 }
377         }
378         mutex_unlock(&dev->dev_lock);
379         return status;
380 }
381
382 static struct ocrdma_pd *_ocrdma_alloc_pd(struct ocrdma_dev *dev,
383                                           struct ocrdma_ucontext *uctx,
384                                           struct ib_udata *udata)
385 {
386         struct ocrdma_pd *pd = NULL;
387         int status;
388
389         pd = kzalloc(sizeof(*pd), GFP_KERNEL);
390         if (!pd)
391                 return ERR_PTR(-ENOMEM);
392
393         if (udata && uctx && dev->attr.max_dpp_pds) {
394                 pd->dpp_enabled =
395                         ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R;
396                 pd->num_dpp_qp =
397                         pd->dpp_enabled ? (dev->nic_info.db_page_size /
398                                            dev->attr.wqe_size) : 0;
399         }
400
401         if (dev->pd_mgr->pd_prealloc_valid) {
402                 status = ocrdma_get_pd_num(dev, pd);
403                 if (status == 0) {
404                         return pd;
405                 } else {
406                         kfree(pd);
407                         return ERR_PTR(status);
408                 }
409         }
410
411 retry:
412         status = ocrdma_mbx_alloc_pd(dev, pd);
413         if (status) {
414                 if (pd->dpp_enabled) {
415                         pd->dpp_enabled = false;
416                         pd->num_dpp_qp = 0;
417                         goto retry;
418                 } else {
419                         kfree(pd);
420                         return ERR_PTR(status);
421                 }
422         }
423
424         return pd;
425 }
426
427 static inline int is_ucontext_pd(struct ocrdma_ucontext *uctx,
428                                  struct ocrdma_pd *pd)
429 {
430         return (uctx->cntxt_pd == pd);
431 }
432
433 static int _ocrdma_dealloc_pd(struct ocrdma_dev *dev,
434                               struct ocrdma_pd *pd)
435 {
436         int status;
437
438         if (dev->pd_mgr->pd_prealloc_valid)
439                 status = ocrdma_put_pd_num(dev, pd->id, pd->dpp_enabled);
440         else
441                 status = ocrdma_mbx_dealloc_pd(dev, pd);
442
443         kfree(pd);
444         return status;
445 }
446
447 static int ocrdma_alloc_ucontext_pd(struct ocrdma_dev *dev,
448                                     struct ocrdma_ucontext *uctx,
449                                     struct ib_udata *udata)
450 {
451         int status = 0;
452
453         uctx->cntxt_pd = _ocrdma_alloc_pd(dev, uctx, udata);
454         if (IS_ERR(uctx->cntxt_pd)) {
455                 status = PTR_ERR(uctx->cntxt_pd);
456                 uctx->cntxt_pd = NULL;
457                 goto err;
458         }
459
460         uctx->cntxt_pd->uctx = uctx;
461         uctx->cntxt_pd->ibpd.device = &dev->ibdev;
462 err:
463         return status;
464 }
465
466 static int ocrdma_dealloc_ucontext_pd(struct ocrdma_ucontext *uctx)
467 {
468         struct ocrdma_pd *pd = uctx->cntxt_pd;
469         struct ocrdma_dev *dev = get_ocrdma_dev(pd->ibpd.device);
470
471         if (uctx->pd_in_use) {
472                 pr_err("%s(%d) Freeing in use pdid=0x%x.\n",
473                        __func__, dev->id, pd->id);
474         }
475         uctx->cntxt_pd = NULL;
476         (void)_ocrdma_dealloc_pd(dev, pd);
477         return 0;
478 }
479
480 static struct ocrdma_pd *ocrdma_get_ucontext_pd(struct ocrdma_ucontext *uctx)
481 {
482         struct ocrdma_pd *pd = NULL;
483
484         mutex_lock(&uctx->mm_list_lock);
485         if (!uctx->pd_in_use) {
486                 uctx->pd_in_use = true;
487                 pd = uctx->cntxt_pd;
488         }
489         mutex_unlock(&uctx->mm_list_lock);
490
491         return pd;
492 }
493
494 static void ocrdma_release_ucontext_pd(struct ocrdma_ucontext *uctx)
495 {
496         mutex_lock(&uctx->mm_list_lock);
497         uctx->pd_in_use = false;
498         mutex_unlock(&uctx->mm_list_lock);
499 }
500
501 struct ib_ucontext *ocrdma_alloc_ucontext(struct ib_device *ibdev,
502                                           struct ib_udata *udata)
503 {
504         int status;
505         struct ocrdma_ucontext *ctx;
506         struct ocrdma_alloc_ucontext_resp resp;
507         struct ocrdma_dev *dev = get_ocrdma_dev(ibdev);
508         struct pci_dev *pdev = dev->nic_info.pdev;
509         u32 map_len = roundup(sizeof(u32) * 2048, PAGE_SIZE);
510
511         if (!udata)
512                 return ERR_PTR(-EFAULT);
513         ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
514         if (!ctx)
515                 return ERR_PTR(-ENOMEM);
516         INIT_LIST_HEAD(&ctx->mm_head);
517         mutex_init(&ctx->mm_list_lock);
518
519         ctx->ah_tbl.va = dma_zalloc_coherent(&pdev->dev, map_len,
520                                              &ctx->ah_tbl.pa, GFP_KERNEL);
521         if (!ctx->ah_tbl.va) {
522                 kfree(ctx);
523                 return ERR_PTR(-ENOMEM);
524         }
525         ctx->ah_tbl.len = map_len;
526
527         memset(&resp, 0, sizeof(resp));
528         resp.ah_tbl_len = ctx->ah_tbl.len;
529         resp.ah_tbl_page = virt_to_phys(ctx->ah_tbl.va);
530
531         status = ocrdma_add_mmap(ctx, resp.ah_tbl_page, resp.ah_tbl_len);
532         if (status)
533                 goto map_err;
534
535         status = ocrdma_alloc_ucontext_pd(dev, ctx, udata);
536         if (status)
537                 goto pd_err;
538
539         resp.dev_id = dev->id;
540         resp.max_inline_data = dev->attr.max_inline_data;
541         resp.wqe_size = dev->attr.wqe_size;
542         resp.rqe_size = dev->attr.rqe_size;
543         resp.dpp_wqe_size = dev->attr.wqe_size;
544
545         memcpy(resp.fw_ver, dev->attr.fw_ver, sizeof(resp.fw_ver));
546         status = ib_copy_to_udata(udata, &resp, sizeof(resp));
547         if (status)
548                 goto cpy_err;
549         return &ctx->ibucontext;
550
551 cpy_err:
552 pd_err:
553         ocrdma_del_mmap(ctx, ctx->ah_tbl.pa, ctx->ah_tbl.len);
554 map_err:
555         dma_free_coherent(&pdev->dev, ctx->ah_tbl.len, ctx->ah_tbl.va,
556                           ctx->ah_tbl.pa);
557         kfree(ctx);
558         return ERR_PTR(status);
559 }
560
561 int ocrdma_dealloc_ucontext(struct ib_ucontext *ibctx)
562 {
563         int status;
564         struct ocrdma_mm *mm, *tmp;
565         struct ocrdma_ucontext *uctx = get_ocrdma_ucontext(ibctx);
566         struct ocrdma_dev *dev = get_ocrdma_dev(ibctx->device);
567         struct pci_dev *pdev = dev->nic_info.pdev;
568
569         status = ocrdma_dealloc_ucontext_pd(uctx);
570
571         ocrdma_del_mmap(uctx, uctx->ah_tbl.pa, uctx->ah_tbl.len);
572         dma_free_coherent(&pdev->dev, uctx->ah_tbl.len, uctx->ah_tbl.va,
573                           uctx->ah_tbl.pa);
574
575         list_for_each_entry_safe(mm, tmp, &uctx->mm_head, entry) {
576                 list_del(&mm->entry);
577                 kfree(mm);
578         }
579         kfree(uctx);
580         return status;
581 }
582
583 int ocrdma_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
584 {
585         struct ocrdma_ucontext *ucontext = get_ocrdma_ucontext(context);
586         struct ocrdma_dev *dev = get_ocrdma_dev(context->device);
587         unsigned long vm_page = vma->vm_pgoff << PAGE_SHIFT;
588         u64 unmapped_db = (u64) dev->nic_info.unmapped_db;
589         unsigned long len = (vma->vm_end - vma->vm_start);
590         int status;
591         bool found;
592
593         if (vma->vm_start & (PAGE_SIZE - 1))
594                 return -EINVAL;
595         found = ocrdma_search_mmap(ucontext, vma->vm_pgoff << PAGE_SHIFT, len);
596         if (!found)
597                 return -EINVAL;
598
599         if ((vm_page >= unmapped_db) && (vm_page <= (unmapped_db +
600                 dev->nic_info.db_total_size)) &&
601                 (len <= dev->nic_info.db_page_size)) {
602                 if (vma->vm_flags & VM_READ)
603                         return -EPERM;
604
605                 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
606                 status = io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
607                                             len, vma->vm_page_prot);
608         } else if (dev->nic_info.dpp_unmapped_len &&
609                 (vm_page >= (u64) dev->nic_info.dpp_unmapped_addr) &&
610                 (vm_page <= (u64) (dev->nic_info.dpp_unmapped_addr +
611                         dev->nic_info.dpp_unmapped_len)) &&
612                 (len <= dev->nic_info.dpp_unmapped_len)) {
613                 if (vma->vm_flags & VM_READ)
614                         return -EPERM;
615
616                 vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
617                 status = io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
618                                             len, vma->vm_page_prot);
619         } else {
620                 status = remap_pfn_range(vma, vma->vm_start,
621                                          vma->vm_pgoff, len, vma->vm_page_prot);
622         }
623         return status;
624 }
625
626 static int ocrdma_copy_pd_uresp(struct ocrdma_dev *dev, struct ocrdma_pd *pd,
627                                 struct ib_ucontext *ib_ctx,
628                                 struct ib_udata *udata)
629 {
630         int status;
631         u64 db_page_addr;
632         u64 dpp_page_addr = 0;
633         u32 db_page_size;
634         struct ocrdma_alloc_pd_uresp rsp;
635         struct ocrdma_ucontext *uctx = get_ocrdma_ucontext(ib_ctx);
636
637         memset(&rsp, 0, sizeof(rsp));
638         rsp.id = pd->id;
639         rsp.dpp_enabled = pd->dpp_enabled;
640         db_page_addr = ocrdma_get_db_addr(dev, pd->id);
641         db_page_size = dev->nic_info.db_page_size;
642
643         status = ocrdma_add_mmap(uctx, db_page_addr, db_page_size);
644         if (status)
645                 return status;
646
647         if (pd->dpp_enabled) {
648                 dpp_page_addr = dev->nic_info.dpp_unmapped_addr +
649                                 (pd->id * PAGE_SIZE);
650                 status = ocrdma_add_mmap(uctx, dpp_page_addr,
651                                  PAGE_SIZE);
652                 if (status)
653                         goto dpp_map_err;
654                 rsp.dpp_page_addr_hi = upper_32_bits(dpp_page_addr);
655                 rsp.dpp_page_addr_lo = dpp_page_addr;
656         }
657
658         status = ib_copy_to_udata(udata, &rsp, sizeof(rsp));
659         if (status)
660                 goto ucopy_err;
661
662         pd->uctx = uctx;
663         return 0;
664
665 ucopy_err:
666         if (pd->dpp_enabled)
667                 ocrdma_del_mmap(pd->uctx, dpp_page_addr, PAGE_SIZE);
668 dpp_map_err:
669         ocrdma_del_mmap(pd->uctx, db_page_addr, db_page_size);
670         return status;
671 }
672
673 struct ib_pd *ocrdma_alloc_pd(struct ib_device *ibdev,
674                               struct ib_ucontext *context,
675                               struct ib_udata *udata)
676 {
677         struct ocrdma_dev *dev = get_ocrdma_dev(ibdev);
678         struct ocrdma_pd *pd;
679         struct ocrdma_ucontext *uctx = NULL;
680         int status;
681         u8 is_uctx_pd = false;
682
683         if (udata && context) {
684                 uctx = get_ocrdma_ucontext(context);
685                 pd = ocrdma_get_ucontext_pd(uctx);
686                 if (pd) {
687                         is_uctx_pd = true;
688                         goto pd_mapping;
689                 }
690         }
691
692         pd = _ocrdma_alloc_pd(dev, uctx, udata);
693         if (IS_ERR(pd)) {
694                 status = PTR_ERR(pd);
695                 goto exit;
696         }
697
698 pd_mapping:
699         if (udata && context) {
700                 status = ocrdma_copy_pd_uresp(dev, pd, context, udata);
701                 if (status)
702                         goto err;
703         }
704         return &pd->ibpd;
705
706 err:
707         if (is_uctx_pd) {
708                 ocrdma_release_ucontext_pd(uctx);
709         } else {
710                 if (_ocrdma_dealloc_pd(dev, pd))
711                         pr_err("%s: _ocrdma_dealloc_pd() failed\n", __func__);
712         }
713 exit:
714         return ERR_PTR(status);
715 }
716
717 int ocrdma_dealloc_pd(struct ib_pd *ibpd)
718 {
719         struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
720         struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
721         struct ocrdma_ucontext *uctx = NULL;
722         int status = 0;
723         u64 usr_db;
724
725         uctx = pd->uctx;
726         if (uctx) {
727                 u64 dpp_db = dev->nic_info.dpp_unmapped_addr +
728                         (pd->id * PAGE_SIZE);
729                 if (pd->dpp_enabled)
730                         ocrdma_del_mmap(pd->uctx, dpp_db, PAGE_SIZE);
731                 usr_db = ocrdma_get_db_addr(dev, pd->id);
732                 ocrdma_del_mmap(pd->uctx, usr_db, dev->nic_info.db_page_size);
733
734                 if (is_ucontext_pd(uctx, pd)) {
735                         ocrdma_release_ucontext_pd(uctx);
736                         return status;
737                 }
738         }
739         status = _ocrdma_dealloc_pd(dev, pd);
740         return status;
741 }
742
743 static int ocrdma_alloc_lkey(struct ocrdma_dev *dev, struct ocrdma_mr *mr,
744                             u32 pdid, int acc, u32 num_pbls, u32 addr_check)
745 {
746         int status;
747
748         mr->hwmr.fr_mr = 0;
749         mr->hwmr.local_rd = 1;
750         mr->hwmr.remote_rd = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
751         mr->hwmr.remote_wr = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
752         mr->hwmr.local_wr = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
753         mr->hwmr.mw_bind = (acc & IB_ACCESS_MW_BIND) ? 1 : 0;
754         mr->hwmr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
755         mr->hwmr.num_pbls = num_pbls;
756
757         status = ocrdma_mbx_alloc_lkey(dev, &mr->hwmr, pdid, addr_check);
758         if (status)
759                 return status;
760
761         mr->ibmr.lkey = mr->hwmr.lkey;
762         if (mr->hwmr.remote_wr || mr->hwmr.remote_rd)
763                 mr->ibmr.rkey = mr->hwmr.lkey;
764         return 0;
765 }
766
767 struct ib_mr *ocrdma_get_dma_mr(struct ib_pd *ibpd, int acc)
768 {
769         int status;
770         struct ocrdma_mr *mr;
771         struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
772         struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
773
774         if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE)) {
775                 pr_err("%s err, invalid access rights\n", __func__);
776                 return ERR_PTR(-EINVAL);
777         }
778
779         mr = kzalloc(sizeof(*mr), GFP_KERNEL);
780         if (!mr)
781                 return ERR_PTR(-ENOMEM);
782
783         status = ocrdma_alloc_lkey(dev, mr, pd->id, acc, 0,
784                                    OCRDMA_ADDR_CHECK_DISABLE);
785         if (status) {
786                 kfree(mr);
787                 return ERR_PTR(status);
788         }
789
790         return &mr->ibmr;
791 }
792
793 static void ocrdma_free_mr_pbl_tbl(struct ocrdma_dev *dev,
794                                    struct ocrdma_hw_mr *mr)
795 {
796         struct pci_dev *pdev = dev->nic_info.pdev;
797         int i = 0;
798
799         if (mr->pbl_table) {
800                 for (i = 0; i < mr->num_pbls; i++) {
801                         if (!mr->pbl_table[i].va)
802                                 continue;
803                         dma_free_coherent(&pdev->dev, mr->pbl_size,
804                                           mr->pbl_table[i].va,
805                                           mr->pbl_table[i].pa);
806                 }
807                 kfree(mr->pbl_table);
808                 mr->pbl_table = NULL;
809         }
810 }
811
812 static int ocrdma_get_pbl_info(struct ocrdma_dev *dev, struct ocrdma_mr *mr,
813                               u32 num_pbes)
814 {
815         u32 num_pbls = 0;
816         u32 idx = 0;
817         int status = 0;
818         u32 pbl_size;
819
820         do {
821                 pbl_size = OCRDMA_MIN_HPAGE_SIZE * (1 << idx);
822                 if (pbl_size > MAX_OCRDMA_PBL_SIZE) {
823                         status = -EFAULT;
824                         break;
825                 }
826                 num_pbls = roundup(num_pbes, (pbl_size / sizeof(u64)));
827                 num_pbls = num_pbls / (pbl_size / sizeof(u64));
828                 idx++;
829         } while (num_pbls >= dev->attr.max_num_mr_pbl);
830
831         mr->hwmr.num_pbes = num_pbes;
832         mr->hwmr.num_pbls = num_pbls;
833         mr->hwmr.pbl_size = pbl_size;
834         return status;
835 }
836
837 static int ocrdma_build_pbl_tbl(struct ocrdma_dev *dev, struct ocrdma_hw_mr *mr)
838 {
839         int status = 0;
840         int i;
841         u32 dma_len = mr->pbl_size;
842         struct pci_dev *pdev = dev->nic_info.pdev;
843         void *va;
844         dma_addr_t pa;
845
846         mr->pbl_table = kcalloc(mr->num_pbls, sizeof(struct ocrdma_pbl),
847                                 GFP_KERNEL);
848
849         if (!mr->pbl_table)
850                 return -ENOMEM;
851
852         for (i = 0; i < mr->num_pbls; i++) {
853                 va = dma_zalloc_coherent(&pdev->dev, dma_len, &pa, GFP_KERNEL);
854                 if (!va) {
855                         ocrdma_free_mr_pbl_tbl(dev, mr);
856                         status = -ENOMEM;
857                         break;
858                 }
859                 mr->pbl_table[i].va = va;
860                 mr->pbl_table[i].pa = pa;
861         }
862         return status;
863 }
864
865 static void build_user_pbes(struct ocrdma_dev *dev, struct ocrdma_mr *mr,
866                             u32 num_pbes)
867 {
868         struct ocrdma_pbe *pbe;
869         struct scatterlist *sg;
870         struct ocrdma_pbl *pbl_tbl = mr->hwmr.pbl_table;
871         struct ib_umem *umem = mr->umem;
872         int shift, pg_cnt, pages, pbe_cnt, entry, total_num_pbes = 0;
873
874         if (!mr->hwmr.num_pbes)
875                 return;
876
877         pbe = (struct ocrdma_pbe *)pbl_tbl->va;
878         pbe_cnt = 0;
879
880         shift = umem->page_shift;
881
882         for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
883                 pages = sg_dma_len(sg) >> shift;
884                 for (pg_cnt = 0; pg_cnt < pages; pg_cnt++) {
885                         /* store the page address in pbe */
886                         pbe->pa_lo =
887                             cpu_to_le32(sg_dma_address(sg) +
888                                         (pg_cnt << shift));
889                         pbe->pa_hi =
890                             cpu_to_le32(upper_32_bits(sg_dma_address(sg) +
891                                          (pg_cnt << shift)));
892                         pbe_cnt += 1;
893                         total_num_pbes += 1;
894                         pbe++;
895
896                         /* if done building pbes, issue the mbx cmd. */
897                         if (total_num_pbes == num_pbes)
898                                 return;
899
900                         /* if the given pbl is full storing the pbes,
901                          * move to next pbl.
902                          */
903                         if (pbe_cnt ==
904                                 (mr->hwmr.pbl_size / sizeof(u64))) {
905                                 pbl_tbl++;
906                                 pbe = (struct ocrdma_pbe *)pbl_tbl->va;
907                                 pbe_cnt = 0;
908                         }
909
910                 }
911         }
912 }
913
914 struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
915                                  u64 usr_addr, int acc, struct ib_udata *udata)
916 {
917         int status = -ENOMEM;
918         struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
919         struct ocrdma_mr *mr;
920         struct ocrdma_pd *pd;
921         u32 num_pbes;
922
923         pd = get_ocrdma_pd(ibpd);
924
925         if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE))
926                 return ERR_PTR(-EINVAL);
927
928         mr = kzalloc(sizeof(*mr), GFP_KERNEL);
929         if (!mr)
930                 return ERR_PTR(status);
931         mr->umem = ib_umem_get(ibpd->uobject->context, start, len, acc, 0);
932         if (IS_ERR(mr->umem)) {
933                 status = -EFAULT;
934                 goto umem_err;
935         }
936         num_pbes = ib_umem_page_count(mr->umem);
937         status = ocrdma_get_pbl_info(dev, mr, num_pbes);
938         if (status)
939                 goto umem_err;
940
941         mr->hwmr.pbe_size = BIT(mr->umem->page_shift);
942         mr->hwmr.fbo = ib_umem_offset(mr->umem);
943         mr->hwmr.va = usr_addr;
944         mr->hwmr.len = len;
945         mr->hwmr.remote_wr = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
946         mr->hwmr.remote_rd = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
947         mr->hwmr.local_wr = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
948         mr->hwmr.local_rd = 1;
949         mr->hwmr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
950         status = ocrdma_build_pbl_tbl(dev, &mr->hwmr);
951         if (status)
952                 goto umem_err;
953         build_user_pbes(dev, mr, num_pbes);
954         status = ocrdma_reg_mr(dev, &mr->hwmr, pd->id, acc);
955         if (status)
956                 goto mbx_err;
957         mr->ibmr.lkey = mr->hwmr.lkey;
958         if (mr->hwmr.remote_wr || mr->hwmr.remote_rd)
959                 mr->ibmr.rkey = mr->hwmr.lkey;
960
961         return &mr->ibmr;
962
963 mbx_err:
964         ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
965 umem_err:
966         kfree(mr);
967         return ERR_PTR(status);
968 }
969
970 int ocrdma_dereg_mr(struct ib_mr *ib_mr)
971 {
972         struct ocrdma_mr *mr = get_ocrdma_mr(ib_mr);
973         struct ocrdma_dev *dev = get_ocrdma_dev(ib_mr->device);
974
975         (void) ocrdma_mbx_dealloc_lkey(dev, mr->hwmr.fr_mr, mr->hwmr.lkey);
976
977         kfree(mr->pages);
978         ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
979
980         /* it could be user registered memory. */
981         if (mr->umem)
982                 ib_umem_release(mr->umem);
983         kfree(mr);
984
985         /* Don't stop cleanup, in case FW is unresponsive */
986         if (dev->mqe_ctx.fw_error_state) {
987                 pr_err("%s(%d) fw not responding.\n",
988                        __func__, dev->id);
989         }
990         return 0;
991 }
992
993 static int ocrdma_copy_cq_uresp(struct ocrdma_dev *dev, struct ocrdma_cq *cq,
994                                 struct ib_udata *udata,
995                                 struct ib_ucontext *ib_ctx)
996 {
997         int status;
998         struct ocrdma_ucontext *uctx = get_ocrdma_ucontext(ib_ctx);
999         struct ocrdma_create_cq_uresp uresp;
1000
1001         memset(&uresp, 0, sizeof(uresp));
1002         uresp.cq_id = cq->id;
1003         uresp.page_size = PAGE_ALIGN(cq->len);
1004         uresp.num_pages = 1;
1005         uresp.max_hw_cqe = cq->max_hw_cqe;
1006         uresp.page_addr[0] = virt_to_phys(cq->va);
1007         uresp.db_page_addr =  ocrdma_get_db_addr(dev, uctx->cntxt_pd->id);
1008         uresp.db_page_size = dev->nic_info.db_page_size;
1009         uresp.phase_change = cq->phase_change ? 1 : 0;
1010         status = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
1011         if (status) {
1012                 pr_err("%s(%d) copy error cqid=0x%x.\n",
1013                        __func__, dev->id, cq->id);
1014                 goto err;
1015         }
1016         status = ocrdma_add_mmap(uctx, uresp.db_page_addr, uresp.db_page_size);
1017         if (status)
1018                 goto err;
1019         status = ocrdma_add_mmap(uctx, uresp.page_addr[0], uresp.page_size);
1020         if (status) {
1021                 ocrdma_del_mmap(uctx, uresp.db_page_addr, uresp.db_page_size);
1022                 goto err;
1023         }
1024         cq->ucontext = uctx;
1025 err:
1026         return status;
1027 }
1028
1029 struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev,
1030                                const struct ib_cq_init_attr *attr,
1031                                struct ib_ucontext *ib_ctx,
1032                                struct ib_udata *udata)
1033 {
1034         int entries = attr->cqe;
1035         struct ocrdma_cq *cq;
1036         struct ocrdma_dev *dev = get_ocrdma_dev(ibdev);
1037         struct ocrdma_ucontext *uctx = NULL;
1038         u16 pd_id = 0;
1039         int status;
1040         struct ocrdma_create_cq_ureq ureq;
1041
1042         if (attr->flags)
1043                 return ERR_PTR(-EINVAL);
1044
1045         if (udata) {
1046                 if (ib_copy_from_udata(&ureq, udata, sizeof(ureq)))
1047                         return ERR_PTR(-EFAULT);
1048         } else
1049                 ureq.dpp_cq = 0;
1050         cq = kzalloc(sizeof(*cq), GFP_KERNEL);
1051         if (!cq)
1052                 return ERR_PTR(-ENOMEM);
1053
1054         spin_lock_init(&cq->cq_lock);
1055         spin_lock_init(&cq->comp_handler_lock);
1056         INIT_LIST_HEAD(&cq->sq_head);
1057         INIT_LIST_HEAD(&cq->rq_head);
1058
1059         if (ib_ctx) {
1060                 uctx = get_ocrdma_ucontext(ib_ctx);
1061                 pd_id = uctx->cntxt_pd->id;
1062         }
1063
1064         status = ocrdma_mbx_create_cq(dev, cq, entries, ureq.dpp_cq, pd_id);
1065         if (status) {
1066                 kfree(cq);
1067                 return ERR_PTR(status);
1068         }
1069         if (ib_ctx) {
1070                 status = ocrdma_copy_cq_uresp(dev, cq, udata, ib_ctx);
1071                 if (status)
1072                         goto ctx_err;
1073         }
1074         cq->phase = OCRDMA_CQE_VALID;
1075         dev->cq_tbl[cq->id] = cq;
1076         return &cq->ibcq;
1077
1078 ctx_err:
1079         ocrdma_mbx_destroy_cq(dev, cq);
1080         kfree(cq);
1081         return ERR_PTR(status);
1082 }
1083
1084 int ocrdma_resize_cq(struct ib_cq *ibcq, int new_cnt,
1085                      struct ib_udata *udata)
1086 {
1087         int status = 0;
1088         struct ocrdma_cq *cq = get_ocrdma_cq(ibcq);
1089
1090         if (new_cnt < 1 || new_cnt > cq->max_hw_cqe) {
1091                 status = -EINVAL;
1092                 return status;
1093         }
1094         ibcq->cqe = new_cnt;
1095         return status;
1096 }
1097
1098 static void ocrdma_flush_cq(struct ocrdma_cq *cq)
1099 {
1100         int cqe_cnt;
1101         int valid_count = 0;
1102         unsigned long flags;
1103
1104         struct ocrdma_dev *dev = get_ocrdma_dev(cq->ibcq.device);
1105         struct ocrdma_cqe *cqe = NULL;
1106
1107         cqe = cq->va;
1108         cqe_cnt = cq->cqe_cnt;
1109
1110         /* Last irq might have scheduled a polling thread
1111          * sync-up with it before hard flushing.
1112          */
1113         spin_lock_irqsave(&cq->cq_lock, flags);
1114         while (cqe_cnt) {
1115                 if (is_cqe_valid(cq, cqe))
1116                         valid_count++;
1117                 cqe++;
1118                 cqe_cnt--;
1119         }
1120         ocrdma_ring_cq_db(dev, cq->id, false, false, valid_count);
1121         spin_unlock_irqrestore(&cq->cq_lock, flags);
1122 }
1123
1124 int ocrdma_destroy_cq(struct ib_cq *ibcq)
1125 {
1126         struct ocrdma_cq *cq = get_ocrdma_cq(ibcq);
1127         struct ocrdma_eq *eq = NULL;
1128         struct ocrdma_dev *dev = get_ocrdma_dev(ibcq->device);
1129         int pdid = 0;
1130         u32 irq, indx;
1131
1132         dev->cq_tbl[cq->id] = NULL;
1133         indx = ocrdma_get_eq_table_index(dev, cq->eqn);
1134         BUG_ON(indx == -EINVAL);
1135
1136         eq = &dev->eq_tbl[indx];
1137         irq = ocrdma_get_irq(dev, eq);
1138         synchronize_irq(irq);
1139         ocrdma_flush_cq(cq);
1140
1141         (void)ocrdma_mbx_destroy_cq(dev, cq);
1142         if (cq->ucontext) {
1143                 pdid = cq->ucontext->cntxt_pd->id;
1144                 ocrdma_del_mmap(cq->ucontext, (u64) cq->pa,
1145                                 PAGE_ALIGN(cq->len));
1146                 ocrdma_del_mmap(cq->ucontext,
1147                                 ocrdma_get_db_addr(dev, pdid),
1148                                 dev->nic_info.db_page_size);
1149         }
1150
1151         kfree(cq);
1152         return 0;
1153 }
1154
1155 static int ocrdma_add_qpn_map(struct ocrdma_dev *dev, struct ocrdma_qp *qp)
1156 {
1157         int status = -EINVAL;
1158
1159         if (qp->id < OCRDMA_MAX_QP && dev->qp_tbl[qp->id] == NULL) {
1160                 dev->qp_tbl[qp->id] = qp;
1161                 status = 0;
1162         }
1163         return status;
1164 }
1165
1166 static void ocrdma_del_qpn_map(struct ocrdma_dev *dev, struct ocrdma_qp *qp)
1167 {
1168         dev->qp_tbl[qp->id] = NULL;
1169 }
1170
1171 static int ocrdma_check_qp_params(struct ib_pd *ibpd, struct ocrdma_dev *dev,
1172                                   struct ib_qp_init_attr *attrs)
1173 {
1174         if ((attrs->qp_type != IB_QPT_GSI) &&
1175             (attrs->qp_type != IB_QPT_RC) &&
1176             (attrs->qp_type != IB_QPT_UC) &&
1177             (attrs->qp_type != IB_QPT_UD)) {
1178                 pr_err("%s(%d) unsupported qp type=0x%x requested\n",
1179                        __func__, dev->id, attrs->qp_type);
1180                 return -EINVAL;
1181         }
1182         /* Skip the check for QP1 to support CM size of 128 */
1183         if ((attrs->qp_type != IB_QPT_GSI) &&
1184             (attrs->cap.max_send_wr > dev->attr.max_wqe)) {
1185                 pr_err("%s(%d) unsupported send_wr=0x%x requested\n",
1186                        __func__, dev->id, attrs->cap.max_send_wr);
1187                 pr_err("%s(%d) supported send_wr=0x%x\n",
1188                        __func__, dev->id, dev->attr.max_wqe);
1189                 return -EINVAL;
1190         }
1191         if (!attrs->srq && (attrs->cap.max_recv_wr > dev->attr.max_rqe)) {
1192                 pr_err("%s(%d) unsupported recv_wr=0x%x requested\n",
1193                        __func__, dev->id, attrs->cap.max_recv_wr);
1194                 pr_err("%s(%d) supported recv_wr=0x%x\n",
1195                        __func__, dev->id, dev->attr.max_rqe);
1196                 return -EINVAL;
1197         }
1198         if (attrs->cap.max_inline_data > dev->attr.max_inline_data) {
1199                 pr_err("%s(%d) unsupported inline data size=0x%x requested\n",
1200                        __func__, dev->id, attrs->cap.max_inline_data);
1201                 pr_err("%s(%d) supported inline data size=0x%x\n",
1202                        __func__, dev->id, dev->attr.max_inline_data);
1203                 return -EINVAL;
1204         }
1205         if (attrs->cap.max_send_sge > dev->attr.max_send_sge) {
1206                 pr_err("%s(%d) unsupported send_sge=0x%x requested\n",
1207                        __func__, dev->id, attrs->cap.max_send_sge);
1208                 pr_err("%s(%d) supported send_sge=0x%x\n",
1209                        __func__, dev->id, dev->attr.max_send_sge);
1210                 return -EINVAL;
1211         }
1212         if (attrs->cap.max_recv_sge > dev->attr.max_recv_sge) {
1213                 pr_err("%s(%d) unsupported recv_sge=0x%x requested\n",
1214                        __func__, dev->id, attrs->cap.max_recv_sge);
1215                 pr_err("%s(%d) supported recv_sge=0x%x\n",
1216                        __func__, dev->id, dev->attr.max_recv_sge);
1217                 return -EINVAL;
1218         }
1219         /* unprivileged user space cannot create special QP */
1220         if (ibpd->uobject && attrs->qp_type == IB_QPT_GSI) {
1221                 pr_err
1222                     ("%s(%d) Userspace can't create special QPs of type=0x%x\n",
1223                      __func__, dev->id, attrs->qp_type);
1224                 return -EINVAL;
1225         }
1226         /* allow creating only one GSI type of QP */
1227         if (attrs->qp_type == IB_QPT_GSI && dev->gsi_qp_created) {
1228                 pr_err("%s(%d) GSI special QPs already created.\n",
1229                        __func__, dev->id);
1230                 return -EINVAL;
1231         }
1232         /* verify consumer QPs are not trying to use GSI QP's CQ */
1233         if ((attrs->qp_type != IB_QPT_GSI) && (dev->gsi_qp_created)) {
1234                 if ((dev->gsi_sqcq == get_ocrdma_cq(attrs->send_cq)) ||
1235                         (dev->gsi_rqcq == get_ocrdma_cq(attrs->recv_cq))) {
1236                         pr_err("%s(%d) Consumer QP cannot use GSI CQs.\n",
1237                                 __func__, dev->id);
1238                         return -EINVAL;
1239                 }
1240         }
1241         return 0;
1242 }
1243
1244 static int ocrdma_copy_qp_uresp(struct ocrdma_qp *qp,
1245                                 struct ib_udata *udata, int dpp_offset,
1246                                 int dpp_credit_lmt, int srq)
1247 {
1248         int status;
1249         u64 usr_db;
1250         struct ocrdma_create_qp_uresp uresp;
1251         struct ocrdma_pd *pd = qp->pd;
1252         struct ocrdma_dev *dev = get_ocrdma_dev(pd->ibpd.device);
1253
1254         memset(&uresp, 0, sizeof(uresp));
1255         usr_db = dev->nic_info.unmapped_db +
1256                         (pd->id * dev->nic_info.db_page_size);
1257         uresp.qp_id = qp->id;
1258         uresp.sq_dbid = qp->sq.dbid;
1259         uresp.num_sq_pages = 1;
1260         uresp.sq_page_size = PAGE_ALIGN(qp->sq.len);
1261         uresp.sq_page_addr[0] = virt_to_phys(qp->sq.va);
1262         uresp.num_wqe_allocated = qp->sq.max_cnt;
1263         if (!srq) {
1264                 uresp.rq_dbid = qp->rq.dbid;
1265                 uresp.num_rq_pages = 1;
1266                 uresp.rq_page_size = PAGE_ALIGN(qp->rq.len);
1267                 uresp.rq_page_addr[0] = virt_to_phys(qp->rq.va);
1268                 uresp.num_rqe_allocated = qp->rq.max_cnt;
1269         }
1270         uresp.db_page_addr = usr_db;
1271         uresp.db_page_size = dev->nic_info.db_page_size;
1272         uresp.db_sq_offset = OCRDMA_DB_GEN2_SQ_OFFSET;
1273         uresp.db_rq_offset = OCRDMA_DB_GEN2_RQ_OFFSET;
1274         uresp.db_shift = OCRDMA_DB_RQ_SHIFT;
1275
1276         if (qp->dpp_enabled) {
1277                 uresp.dpp_credit = dpp_credit_lmt;
1278                 uresp.dpp_offset = dpp_offset;
1279         }
1280         status = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
1281         if (status) {
1282                 pr_err("%s(%d) user copy error.\n", __func__, dev->id);
1283                 goto err;
1284         }
1285         status = ocrdma_add_mmap(pd->uctx, uresp.sq_page_addr[0],
1286                                  uresp.sq_page_size);
1287         if (status)
1288                 goto err;
1289
1290         if (!srq) {
1291                 status = ocrdma_add_mmap(pd->uctx, uresp.rq_page_addr[0],
1292                                          uresp.rq_page_size);
1293                 if (status)
1294                         goto rq_map_err;
1295         }
1296         return status;
1297 rq_map_err:
1298         ocrdma_del_mmap(pd->uctx, uresp.sq_page_addr[0], uresp.sq_page_size);
1299 err:
1300         return status;
1301 }
1302
1303 static void ocrdma_set_qp_db(struct ocrdma_dev *dev, struct ocrdma_qp *qp,
1304                              struct ocrdma_pd *pd)
1305 {
1306         if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) {
1307                 qp->sq_db = dev->nic_info.db +
1308                         (pd->id * dev->nic_info.db_page_size) +
1309                         OCRDMA_DB_GEN2_SQ_OFFSET;
1310                 qp->rq_db = dev->nic_info.db +
1311                         (pd->id * dev->nic_info.db_page_size) +
1312                         OCRDMA_DB_GEN2_RQ_OFFSET;
1313         } else {
1314                 qp->sq_db = dev->nic_info.db +
1315                         (pd->id * dev->nic_info.db_page_size) +
1316                         OCRDMA_DB_SQ_OFFSET;
1317                 qp->rq_db = dev->nic_info.db +
1318                         (pd->id * dev->nic_info.db_page_size) +
1319                         OCRDMA_DB_RQ_OFFSET;
1320         }
1321 }
1322
1323 static int ocrdma_alloc_wr_id_tbl(struct ocrdma_qp *qp)
1324 {
1325         qp->wqe_wr_id_tbl =
1326             kcalloc(qp->sq.max_cnt, sizeof(*(qp->wqe_wr_id_tbl)),
1327                     GFP_KERNEL);
1328         if (qp->wqe_wr_id_tbl == NULL)
1329                 return -ENOMEM;
1330         qp->rqe_wr_id_tbl =
1331             kcalloc(qp->rq.max_cnt, sizeof(u64), GFP_KERNEL);
1332         if (qp->rqe_wr_id_tbl == NULL)
1333                 return -ENOMEM;
1334
1335         return 0;
1336 }
1337
1338 static void ocrdma_set_qp_init_params(struct ocrdma_qp *qp,
1339                                       struct ocrdma_pd *pd,
1340                                       struct ib_qp_init_attr *attrs)
1341 {
1342         qp->pd = pd;
1343         spin_lock_init(&qp->q_lock);
1344         INIT_LIST_HEAD(&qp->sq_entry);
1345         INIT_LIST_HEAD(&qp->rq_entry);
1346
1347         qp->qp_type = attrs->qp_type;
1348         qp->cap_flags = OCRDMA_QP_INB_RD | OCRDMA_QP_INB_WR;
1349         qp->max_inline_data = attrs->cap.max_inline_data;
1350         qp->sq.max_sges = attrs->cap.max_send_sge;
1351         qp->rq.max_sges = attrs->cap.max_recv_sge;
1352         qp->state = OCRDMA_QPS_RST;
1353         qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false;
1354 }
1355
1356 static void ocrdma_store_gsi_qp_cq(struct ocrdma_dev *dev,
1357                                    struct ib_qp_init_attr *attrs)
1358 {
1359         if (attrs->qp_type == IB_QPT_GSI) {
1360                 dev->gsi_qp_created = 1;
1361                 dev->gsi_sqcq = get_ocrdma_cq(attrs->send_cq);
1362                 dev->gsi_rqcq = get_ocrdma_cq(attrs->recv_cq);
1363         }
1364 }
1365
1366 struct ib_qp *ocrdma_create_qp(struct ib_pd *ibpd,
1367                                struct ib_qp_init_attr *attrs,
1368                                struct ib_udata *udata)
1369 {
1370         int status;
1371         struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
1372         struct ocrdma_qp *qp;
1373         struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
1374         struct ocrdma_create_qp_ureq ureq;
1375         u16 dpp_credit_lmt, dpp_offset;
1376
1377         status = ocrdma_check_qp_params(ibpd, dev, attrs);
1378         if (status)
1379                 goto gen_err;
1380
1381         memset(&ureq, 0, sizeof(ureq));
1382         if (udata) {
1383                 if (ib_copy_from_udata(&ureq, udata, sizeof(ureq)))
1384                         return ERR_PTR(-EFAULT);
1385         }
1386         qp = kzalloc(sizeof(*qp), GFP_KERNEL);
1387         if (!qp) {
1388                 status = -ENOMEM;
1389                 goto gen_err;
1390         }
1391         ocrdma_set_qp_init_params(qp, pd, attrs);
1392         if (udata == NULL)
1393                 qp->cap_flags |= (OCRDMA_QP_MW_BIND | OCRDMA_QP_LKEY0 |
1394                                         OCRDMA_QP_FAST_REG);
1395
1396         mutex_lock(&dev->dev_lock);
1397         status = ocrdma_mbx_create_qp(qp, attrs, ureq.enable_dpp_cq,
1398                                         ureq.dpp_cq_id,
1399                                         &dpp_offset, &dpp_credit_lmt);
1400         if (status)
1401                 goto mbx_err;
1402
1403         /* user space QP's wr_id table are managed in library */
1404         if (udata == NULL) {
1405                 status = ocrdma_alloc_wr_id_tbl(qp);
1406                 if (status)
1407                         goto map_err;
1408         }
1409
1410         status = ocrdma_add_qpn_map(dev, qp);
1411         if (status)
1412                 goto map_err;
1413         ocrdma_set_qp_db(dev, qp, pd);
1414         if (udata) {
1415                 status = ocrdma_copy_qp_uresp(qp, udata, dpp_offset,
1416                                               dpp_credit_lmt,
1417                                               (attrs->srq != NULL));
1418                 if (status)
1419                         goto cpy_err;
1420         }
1421         ocrdma_store_gsi_qp_cq(dev, attrs);
1422         qp->ibqp.qp_num = qp->id;
1423         mutex_unlock(&dev->dev_lock);
1424         return &qp->ibqp;
1425
1426 cpy_err:
1427         ocrdma_del_qpn_map(dev, qp);
1428 map_err:
1429         ocrdma_mbx_destroy_qp(dev, qp);
1430 mbx_err:
1431         mutex_unlock(&dev->dev_lock);
1432         kfree(qp->wqe_wr_id_tbl);
1433         kfree(qp->rqe_wr_id_tbl);
1434         kfree(qp);
1435         pr_err("%s(%d) error=%d\n", __func__, dev->id, status);
1436 gen_err:
1437         return ERR_PTR(status);
1438 }
1439
1440 int _ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1441                       int attr_mask)
1442 {
1443         int status = 0;
1444         struct ocrdma_qp *qp;
1445         struct ocrdma_dev *dev;
1446         enum ib_qp_state old_qps;
1447
1448         qp = get_ocrdma_qp(ibqp);
1449         dev = get_ocrdma_dev(ibqp->device);
1450         if (attr_mask & IB_QP_STATE)
1451                 status = ocrdma_qp_state_change(qp, attr->qp_state, &old_qps);
1452         /* if new and previous states are same hw doesn't need to
1453          * know about it.
1454          */
1455         if (status < 0)
1456                 return status;
1457         return ocrdma_mbx_modify_qp(dev, qp, attr, attr_mask);
1458 }
1459
1460 int ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1461                      int attr_mask, struct ib_udata *udata)
1462 {
1463         unsigned long flags;
1464         int status = -EINVAL;
1465         struct ocrdma_qp *qp;
1466         struct ocrdma_dev *dev;
1467         enum ib_qp_state old_qps, new_qps;
1468
1469         qp = get_ocrdma_qp(ibqp);
1470         dev = get_ocrdma_dev(ibqp->device);
1471
1472         /* syncronize with multiple context trying to change, retrive qps */
1473         mutex_lock(&dev->dev_lock);
1474         /* syncronize with wqe, rqe posting and cqe processing contexts */
1475         spin_lock_irqsave(&qp->q_lock, flags);
1476         old_qps = get_ibqp_state(qp->state);
1477         if (attr_mask & IB_QP_STATE)
1478                 new_qps = attr->qp_state;
1479         else
1480                 new_qps = old_qps;
1481         spin_unlock_irqrestore(&qp->q_lock, flags);
1482
1483         if (!ib_modify_qp_is_ok(old_qps, new_qps, ibqp->qp_type, attr_mask,
1484                                 IB_LINK_LAYER_ETHERNET)) {
1485                 pr_err("%s(%d) invalid attribute mask=0x%x specified for\n"
1486                        "qpn=0x%x of type=0x%x old_qps=0x%x, new_qps=0x%x\n",
1487                        __func__, dev->id, attr_mask, qp->id, ibqp->qp_type,
1488                        old_qps, new_qps);
1489                 goto param_err;
1490         }
1491
1492         status = _ocrdma_modify_qp(ibqp, attr, attr_mask);
1493         if (status > 0)
1494                 status = 0;
1495 param_err:
1496         mutex_unlock(&dev->dev_lock);
1497         return status;
1498 }
1499
1500 static enum ib_mtu ocrdma_mtu_int_to_enum(u16 mtu)
1501 {
1502         switch (mtu) {
1503         case 256:
1504                 return IB_MTU_256;
1505         case 512:
1506                 return IB_MTU_512;
1507         case 1024:
1508                 return IB_MTU_1024;
1509         case 2048:
1510                 return IB_MTU_2048;
1511         case 4096:
1512                 return IB_MTU_4096;
1513         default:
1514                 return IB_MTU_1024;
1515         }
1516 }
1517
1518 static int ocrdma_to_ib_qp_acc_flags(int qp_cap_flags)
1519 {
1520         int ib_qp_acc_flags = 0;
1521
1522         if (qp_cap_flags & OCRDMA_QP_INB_WR)
1523                 ib_qp_acc_flags |= IB_ACCESS_REMOTE_WRITE;
1524         if (qp_cap_flags & OCRDMA_QP_INB_RD)
1525                 ib_qp_acc_flags |= IB_ACCESS_LOCAL_WRITE;
1526         return ib_qp_acc_flags;
1527 }
1528
1529 int ocrdma_query_qp(struct ib_qp *ibqp,
1530                     struct ib_qp_attr *qp_attr,
1531                     int attr_mask, struct ib_qp_init_attr *qp_init_attr)
1532 {
1533         int status;
1534         u32 qp_state;
1535         struct ocrdma_qp_params params;
1536         struct ocrdma_qp *qp = get_ocrdma_qp(ibqp);
1537         struct ocrdma_dev *dev = get_ocrdma_dev(ibqp->device);
1538
1539         memset(&params, 0, sizeof(params));
1540         mutex_lock(&dev->dev_lock);
1541         status = ocrdma_mbx_query_qp(dev, qp, &params);
1542         mutex_unlock(&dev->dev_lock);
1543         if (status)
1544                 goto mbx_err;
1545         if (qp->qp_type == IB_QPT_UD)
1546                 qp_attr->qkey = params.qkey;
1547         qp_attr->path_mtu =
1548                 ocrdma_mtu_int_to_enum(params.path_mtu_pkey_indx &
1549                                 OCRDMA_QP_PARAMS_PATH_MTU_MASK) >>
1550                                 OCRDMA_QP_PARAMS_PATH_MTU_SHIFT;
1551         qp_attr->path_mig_state = IB_MIG_MIGRATED;
1552         qp_attr->rq_psn = params.hop_lmt_rq_psn & OCRDMA_QP_PARAMS_RQ_PSN_MASK;
1553         qp_attr->sq_psn = params.tclass_sq_psn & OCRDMA_QP_PARAMS_SQ_PSN_MASK;
1554         qp_attr->dest_qp_num =
1555             params.ack_to_rnr_rtc_dest_qpn & OCRDMA_QP_PARAMS_DEST_QPN_MASK;
1556
1557         qp_attr->qp_access_flags = ocrdma_to_ib_qp_acc_flags(qp->cap_flags);
1558         qp_attr->cap.max_send_wr = qp->sq.max_cnt - 1;
1559         qp_attr->cap.max_recv_wr = qp->rq.max_cnt - 1;
1560         qp_attr->cap.max_send_sge = qp->sq.max_sges;
1561         qp_attr->cap.max_recv_sge = qp->rq.max_sges;
1562         qp_attr->cap.max_inline_data = qp->max_inline_data;
1563         qp_init_attr->cap = qp_attr->cap;
1564         qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
1565
1566         rdma_ah_set_grh(&qp_attr->ah_attr, NULL,
1567                         params.rnt_rc_sl_fl &
1568                           OCRDMA_QP_PARAMS_FLOW_LABEL_MASK,
1569                         qp->sgid_idx,
1570                         (params.hop_lmt_rq_psn &
1571                          OCRDMA_QP_PARAMS_HOP_LMT_MASK) >>
1572                          OCRDMA_QP_PARAMS_HOP_LMT_SHIFT,
1573                         (params.tclass_sq_psn &
1574                          OCRDMA_QP_PARAMS_TCLASS_MASK) >>
1575                          OCRDMA_QP_PARAMS_TCLASS_SHIFT);
1576         rdma_ah_set_dgid_raw(&qp_attr->ah_attr, &params.dgid[0]);
1577
1578         rdma_ah_set_port_num(&qp_attr->ah_attr, 1);
1579         rdma_ah_set_sl(&qp_attr->ah_attr, (params.rnt_rc_sl_fl &
1580                                            OCRDMA_QP_PARAMS_SL_MASK) >>
1581                                            OCRDMA_QP_PARAMS_SL_SHIFT);
1582         qp_attr->timeout = (params.ack_to_rnr_rtc_dest_qpn &
1583                             OCRDMA_QP_PARAMS_ACK_TIMEOUT_MASK) >>
1584                                 OCRDMA_QP_PARAMS_ACK_TIMEOUT_SHIFT;
1585         qp_attr->rnr_retry = (params.ack_to_rnr_rtc_dest_qpn &
1586                               OCRDMA_QP_PARAMS_RNR_RETRY_CNT_MASK) >>
1587                                 OCRDMA_QP_PARAMS_RNR_RETRY_CNT_SHIFT;
1588         qp_attr->retry_cnt =
1589             (params.rnt_rc_sl_fl & OCRDMA_QP_PARAMS_RETRY_CNT_MASK) >>
1590                 OCRDMA_QP_PARAMS_RETRY_CNT_SHIFT;
1591         qp_attr->min_rnr_timer = 0;
1592         qp_attr->pkey_index = 0;
1593         qp_attr->port_num = 1;
1594         rdma_ah_set_path_bits(&qp_attr->ah_attr, 0);
1595         rdma_ah_set_static_rate(&qp_attr->ah_attr, 0);
1596         qp_attr->alt_pkey_index = 0;
1597         qp_attr->alt_port_num = 0;
1598         qp_attr->alt_timeout = 0;
1599         memset(&qp_attr->alt_ah_attr, 0, sizeof(qp_attr->alt_ah_attr));
1600         qp_state = (params.max_sge_recv_flags & OCRDMA_QP_PARAMS_STATE_MASK) >>
1601                     OCRDMA_QP_PARAMS_STATE_SHIFT;
1602         qp_attr->qp_state = get_ibqp_state(qp_state);
1603         qp_attr->cur_qp_state = qp_attr->qp_state;
1604         qp_attr->sq_draining = (qp_state == OCRDMA_QPS_SQ_DRAINING) ? 1 : 0;
1605         qp_attr->max_dest_rd_atomic =
1606             params.max_ord_ird >> OCRDMA_QP_PARAMS_MAX_ORD_SHIFT;
1607         qp_attr->max_rd_atomic =
1608             params.max_ord_ird & OCRDMA_QP_PARAMS_MAX_IRD_MASK;
1609         qp_attr->en_sqd_async_notify = (params.max_sge_recv_flags &
1610                                 OCRDMA_QP_PARAMS_FLAGS_SQD_ASYNC) ? 1 : 0;
1611         /* Sync driver QP state with FW */
1612         ocrdma_qp_state_change(qp, qp_attr->qp_state, NULL);
1613 mbx_err:
1614         return status;
1615 }
1616
1617 static void ocrdma_srq_toggle_bit(struct ocrdma_srq *srq, unsigned int idx)
1618 {
1619         unsigned int i = idx / 32;
1620         u32 mask = (1U << (idx % 32));
1621
1622         srq->idx_bit_fields[i] ^= mask;
1623 }
1624
1625 static int ocrdma_hwq_free_cnt(struct ocrdma_qp_hwq_info *q)
1626 {
1627         return ((q->max_wqe_idx - q->head) + q->tail) % q->max_cnt;
1628 }
1629
1630 static int is_hw_sq_empty(struct ocrdma_qp *qp)
1631 {
1632         return (qp->sq.tail == qp->sq.head);
1633 }
1634
1635 static int is_hw_rq_empty(struct ocrdma_qp *qp)
1636 {
1637         return (qp->rq.tail == qp->rq.head);
1638 }
1639
1640 static void *ocrdma_hwq_head(struct ocrdma_qp_hwq_info *q)
1641 {
1642         return q->va + (q->head * q->entry_size);
1643 }
1644
1645 static void *ocrdma_hwq_head_from_idx(struct ocrdma_qp_hwq_info *q,
1646                                       u32 idx)
1647 {
1648         return q->va + (idx * q->entry_size);
1649 }
1650
1651 static void ocrdma_hwq_inc_head(struct ocrdma_qp_hwq_info *q)
1652 {
1653         q->head = (q->head + 1) & q->max_wqe_idx;
1654 }
1655
1656 static void ocrdma_hwq_inc_tail(struct ocrdma_qp_hwq_info *q)
1657 {
1658         q->tail = (q->tail + 1) & q->max_wqe_idx;
1659 }
1660
1661 /* discard the cqe for a given QP */
1662 static void ocrdma_discard_cqes(struct ocrdma_qp *qp, struct ocrdma_cq *cq)
1663 {
1664         unsigned long cq_flags;
1665         unsigned long flags;
1666         int discard_cnt = 0;
1667         u32 cur_getp, stop_getp;
1668         struct ocrdma_cqe *cqe;
1669         u32 qpn = 0, wqe_idx = 0;
1670
1671         spin_lock_irqsave(&cq->cq_lock, cq_flags);
1672
1673         /* traverse through the CQEs in the hw CQ,
1674          * find the matching CQE for a given qp,
1675          * mark the matching one discarded by clearing qpn.
1676          * ring the doorbell in the poll_cq() as
1677          * we don't complete out of order cqe.
1678          */
1679
1680         cur_getp = cq->getp;
1681         /* find upto when do we reap the cq. */
1682         stop_getp = cur_getp;
1683         do {
1684                 if (is_hw_sq_empty(qp) && (!qp->srq && is_hw_rq_empty(qp)))
1685                         break;
1686
1687                 cqe = cq->va + cur_getp;
1688                 /* if (a) done reaping whole hw cq, or
1689                  *    (b) qp_xq becomes empty.
1690                  * then exit
1691                  */
1692                 qpn = cqe->cmn.qpn & OCRDMA_CQE_QPN_MASK;
1693                 /* if previously discarded cqe found, skip that too. */
1694                 /* check for matching qp */
1695                 if (qpn == 0 || qpn != qp->id)
1696                         goto skip_cqe;
1697
1698                 if (is_cqe_for_sq(cqe)) {
1699                         ocrdma_hwq_inc_tail(&qp->sq);
1700                 } else {
1701                         if (qp->srq) {
1702                                 wqe_idx = (le32_to_cpu(cqe->rq.buftag_qpn) >>
1703                                         OCRDMA_CQE_BUFTAG_SHIFT) &
1704                                         qp->srq->rq.max_wqe_idx;
1705                                 BUG_ON(wqe_idx < 1);
1706                                 spin_lock_irqsave(&qp->srq->q_lock, flags);
1707                                 ocrdma_hwq_inc_tail(&qp->srq->rq);
1708                                 ocrdma_srq_toggle_bit(qp->srq, wqe_idx - 1);
1709                                 spin_unlock_irqrestore(&qp->srq->q_lock, flags);
1710
1711                         } else {
1712                                 ocrdma_hwq_inc_tail(&qp->rq);
1713                         }
1714                 }
1715                 /* mark cqe discarded so that it is not picked up later
1716                  * in the poll_cq().
1717                  */
1718                 discard_cnt += 1;
1719                 cqe->cmn.qpn = 0;
1720 skip_cqe:
1721                 cur_getp = (cur_getp + 1) % cq->max_hw_cqe;
1722         } while (cur_getp != stop_getp);
1723         spin_unlock_irqrestore(&cq->cq_lock, cq_flags);
1724 }
1725
1726 void ocrdma_del_flush_qp(struct ocrdma_qp *qp)
1727 {
1728         int found = false;
1729         unsigned long flags;
1730         struct ocrdma_dev *dev = get_ocrdma_dev(qp->ibqp.device);
1731         /* sync with any active CQ poll */
1732
1733         spin_lock_irqsave(&dev->flush_q_lock, flags);
1734         found = ocrdma_is_qp_in_sq_flushlist(qp->sq_cq, qp);
1735         if (found)
1736                 list_del(&qp->sq_entry);
1737         if (!qp->srq) {
1738                 found = ocrdma_is_qp_in_rq_flushlist(qp->rq_cq, qp);
1739                 if (found)
1740                         list_del(&qp->rq_entry);
1741         }
1742         spin_unlock_irqrestore(&dev->flush_q_lock, flags);
1743 }
1744
1745 int ocrdma_destroy_qp(struct ib_qp *ibqp)
1746 {
1747         struct ocrdma_pd *pd;
1748         struct ocrdma_qp *qp;
1749         struct ocrdma_dev *dev;
1750         struct ib_qp_attr attrs;
1751         int attr_mask;
1752         unsigned long flags;
1753
1754         qp = get_ocrdma_qp(ibqp);
1755         dev = get_ocrdma_dev(ibqp->device);
1756
1757         pd = qp->pd;
1758
1759         /* change the QP state to ERROR */
1760         if (qp->state != OCRDMA_QPS_RST) {
1761                 attrs.qp_state = IB_QPS_ERR;
1762                 attr_mask = IB_QP_STATE;
1763                 _ocrdma_modify_qp(ibqp, &attrs, attr_mask);
1764         }
1765         /* ensure that CQEs for newly created QP (whose id may be same with
1766          * one which just getting destroyed are same), dont get
1767          * discarded until the old CQEs are discarded.
1768          */
1769         mutex_lock(&dev->dev_lock);
1770         (void) ocrdma_mbx_destroy_qp(dev, qp);
1771
1772         /*
1773          * acquire CQ lock while destroy is in progress, in order to
1774          * protect against proessing in-flight CQEs for this QP.
1775          */
1776         spin_lock_irqsave(&qp->sq_cq->cq_lock, flags);
1777         if (qp->rq_cq && (qp->rq_cq != qp->sq_cq)) {
1778                 spin_lock(&qp->rq_cq->cq_lock);
1779                 ocrdma_del_qpn_map(dev, qp);
1780                 spin_unlock(&qp->rq_cq->cq_lock);
1781         } else {
1782                 ocrdma_del_qpn_map(dev, qp);
1783         }
1784         spin_unlock_irqrestore(&qp->sq_cq->cq_lock, flags);
1785
1786         if (!pd->uctx) {
1787                 ocrdma_discard_cqes(qp, qp->sq_cq);
1788                 ocrdma_discard_cqes(qp, qp->rq_cq);
1789         }
1790         mutex_unlock(&dev->dev_lock);
1791
1792         if (pd->uctx) {
1793                 ocrdma_del_mmap(pd->uctx, (u64) qp->sq.pa,
1794                                 PAGE_ALIGN(qp->sq.len));
1795                 if (!qp->srq)
1796                         ocrdma_del_mmap(pd->uctx, (u64) qp->rq.pa,
1797                                         PAGE_ALIGN(qp->rq.len));
1798         }
1799
1800         ocrdma_del_flush_qp(qp);
1801
1802         kfree(qp->wqe_wr_id_tbl);
1803         kfree(qp->rqe_wr_id_tbl);
1804         kfree(qp);
1805         return 0;
1806 }
1807
1808 static int ocrdma_copy_srq_uresp(struct ocrdma_dev *dev, struct ocrdma_srq *srq,
1809                                 struct ib_udata *udata)
1810 {
1811         int status;
1812         struct ocrdma_create_srq_uresp uresp;
1813
1814         memset(&uresp, 0, sizeof(uresp));
1815         uresp.rq_dbid = srq->rq.dbid;
1816         uresp.num_rq_pages = 1;
1817         uresp.rq_page_addr[0] = virt_to_phys(srq->rq.va);
1818         uresp.rq_page_size = srq->rq.len;
1819         uresp.db_page_addr = dev->nic_info.unmapped_db +
1820             (srq->pd->id * dev->nic_info.db_page_size);
1821         uresp.db_page_size = dev->nic_info.db_page_size;
1822         uresp.num_rqe_allocated = srq->rq.max_cnt;
1823         if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) {
1824                 uresp.db_rq_offset = OCRDMA_DB_GEN2_RQ_OFFSET;
1825                 uresp.db_shift = 24;
1826         } else {
1827                 uresp.db_rq_offset = OCRDMA_DB_RQ_OFFSET;
1828                 uresp.db_shift = 16;
1829         }
1830
1831         status = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
1832         if (status)
1833                 return status;
1834         status = ocrdma_add_mmap(srq->pd->uctx, uresp.rq_page_addr[0],
1835                                  uresp.rq_page_size);
1836         if (status)
1837                 return status;
1838         return status;
1839 }
1840
1841 struct ib_srq *ocrdma_create_srq(struct ib_pd *ibpd,
1842                                  struct ib_srq_init_attr *init_attr,
1843                                  struct ib_udata *udata)
1844 {
1845         int status = -ENOMEM;
1846         struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
1847         struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
1848         struct ocrdma_srq *srq;
1849
1850         if (init_attr->attr.max_sge > dev->attr.max_recv_sge)
1851                 return ERR_PTR(-EINVAL);
1852         if (init_attr->attr.max_wr > dev->attr.max_rqe)
1853                 return ERR_PTR(-EINVAL);
1854
1855         srq = kzalloc(sizeof(*srq), GFP_KERNEL);
1856         if (!srq)
1857                 return ERR_PTR(status);
1858
1859         spin_lock_init(&srq->q_lock);
1860         srq->pd = pd;
1861         srq->db = dev->nic_info.db + (pd->id * dev->nic_info.db_page_size);
1862         status = ocrdma_mbx_create_srq(dev, srq, init_attr, pd);
1863         if (status)
1864                 goto err;
1865
1866         if (udata == NULL) {
1867                 status = -ENOMEM;
1868                 srq->rqe_wr_id_tbl = kcalloc(srq->rq.max_cnt, sizeof(u64),
1869                                              GFP_KERNEL);
1870                 if (srq->rqe_wr_id_tbl == NULL)
1871                         goto arm_err;
1872
1873                 srq->bit_fields_len = (srq->rq.max_cnt / 32) +
1874                     (srq->rq.max_cnt % 32 ? 1 : 0);
1875                 srq->idx_bit_fields =
1876                     kmalloc_array(srq->bit_fields_len, sizeof(u32),
1877                                   GFP_KERNEL);
1878                 if (srq->idx_bit_fields == NULL)
1879                         goto arm_err;
1880                 memset(srq->idx_bit_fields, 0xff,
1881                        srq->bit_fields_len * sizeof(u32));
1882         }
1883
1884         if (init_attr->attr.srq_limit) {
1885                 status = ocrdma_mbx_modify_srq(srq, &init_attr->attr);
1886                 if (status)
1887                         goto arm_err;
1888         }
1889
1890         if (udata) {
1891                 status = ocrdma_copy_srq_uresp(dev, srq, udata);
1892                 if (status)
1893                         goto arm_err;
1894         }
1895
1896         return &srq->ibsrq;
1897
1898 arm_err:
1899         ocrdma_mbx_destroy_srq(dev, srq);
1900 err:
1901         kfree(srq->rqe_wr_id_tbl);
1902         kfree(srq->idx_bit_fields);
1903         kfree(srq);
1904         return ERR_PTR(status);
1905 }
1906
1907 int ocrdma_modify_srq(struct ib_srq *ibsrq,
1908                       struct ib_srq_attr *srq_attr,
1909                       enum ib_srq_attr_mask srq_attr_mask,
1910                       struct ib_udata *udata)
1911 {
1912         int status;
1913         struct ocrdma_srq *srq;
1914
1915         srq = get_ocrdma_srq(ibsrq);
1916         if (srq_attr_mask & IB_SRQ_MAX_WR)
1917                 status = -EINVAL;
1918         else
1919                 status = ocrdma_mbx_modify_srq(srq, srq_attr);
1920         return status;
1921 }
1922
1923 int ocrdma_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
1924 {
1925         int status;
1926         struct ocrdma_srq *srq;
1927
1928         srq = get_ocrdma_srq(ibsrq);
1929         status = ocrdma_mbx_query_srq(srq, srq_attr);
1930         return status;
1931 }
1932
1933 int ocrdma_destroy_srq(struct ib_srq *ibsrq)
1934 {
1935         int status;
1936         struct ocrdma_srq *srq;
1937         struct ocrdma_dev *dev = get_ocrdma_dev(ibsrq->device);
1938
1939         srq = get_ocrdma_srq(ibsrq);
1940
1941         status = ocrdma_mbx_destroy_srq(dev, srq);
1942
1943         if (srq->pd->uctx)
1944                 ocrdma_del_mmap(srq->pd->uctx, (u64) srq->rq.pa,
1945                                 PAGE_ALIGN(srq->rq.len));
1946
1947         kfree(srq->idx_bit_fields);
1948         kfree(srq->rqe_wr_id_tbl);
1949         kfree(srq);
1950         return status;
1951 }
1952
1953 /* unprivileged verbs and their support functions. */
1954 static void ocrdma_build_ud_hdr(struct ocrdma_qp *qp,
1955                                 struct ocrdma_hdr_wqe *hdr,
1956                                 const struct ib_send_wr *wr)
1957 {
1958         struct ocrdma_ewqe_ud_hdr *ud_hdr =
1959                 (struct ocrdma_ewqe_ud_hdr *)(hdr + 1);
1960         struct ocrdma_ah *ah = get_ocrdma_ah(ud_wr(wr)->ah);
1961
1962         ud_hdr->rsvd_dest_qpn = ud_wr(wr)->remote_qpn;
1963         if (qp->qp_type == IB_QPT_GSI)
1964                 ud_hdr->qkey = qp->qkey;
1965         else
1966                 ud_hdr->qkey = ud_wr(wr)->remote_qkey;
1967         ud_hdr->rsvd_ahid = ah->id;
1968         ud_hdr->hdr_type = ah->hdr_type;
1969         if (ah->av->valid & OCRDMA_AV_VLAN_VALID)
1970                 hdr->cw |= (OCRDMA_FLAG_AH_VLAN_PR << OCRDMA_WQE_FLAGS_SHIFT);
1971 }
1972
1973 static void ocrdma_build_sges(struct ocrdma_hdr_wqe *hdr,
1974                               struct ocrdma_sge *sge, int num_sge,
1975                               struct ib_sge *sg_list)
1976 {
1977         int i;
1978
1979         for (i = 0; i < num_sge; i++) {
1980                 sge[i].lrkey = sg_list[i].lkey;
1981                 sge[i].addr_lo = sg_list[i].addr;
1982                 sge[i].addr_hi = upper_32_bits(sg_list[i].addr);
1983                 sge[i].len = sg_list[i].length;
1984                 hdr->total_len += sg_list[i].length;
1985         }
1986         if (num_sge == 0)
1987                 memset(sge, 0, sizeof(*sge));
1988 }
1989
1990 static inline uint32_t ocrdma_sglist_len(struct ib_sge *sg_list, int num_sge)
1991 {
1992         uint32_t total_len = 0, i;
1993
1994         for (i = 0; i < num_sge; i++)
1995                 total_len += sg_list[i].length;
1996         return total_len;
1997 }
1998
1999
2000 static int ocrdma_build_inline_sges(struct ocrdma_qp *qp,
2001                                     struct ocrdma_hdr_wqe *hdr,
2002                                     struct ocrdma_sge *sge,
2003                                     const struct ib_send_wr *wr, u32 wqe_size)
2004 {
2005         int i;
2006         char *dpp_addr;
2007
2008         if (wr->send_flags & IB_SEND_INLINE && qp->qp_type != IB_QPT_UD) {
2009                 hdr->total_len = ocrdma_sglist_len(wr->sg_list, wr->num_sge);
2010                 if (unlikely(hdr->total_len > qp->max_inline_data)) {
2011                         pr_err("%s() supported_len=0x%x,\n"
2012                                " unsupported len req=0x%x\n", __func__,
2013                                 qp->max_inline_data, hdr->total_len);
2014                         return -EINVAL;
2015                 }
2016                 dpp_addr = (char *)sge;
2017                 for (i = 0; i < wr->num_sge; i++) {
2018                         memcpy(dpp_addr,
2019                                (void *)(unsigned long)wr->sg_list[i].addr,
2020                                wr->sg_list[i].length);
2021                         dpp_addr += wr->sg_list[i].length;
2022                 }
2023
2024                 wqe_size += roundup(hdr->total_len, OCRDMA_WQE_ALIGN_BYTES);
2025                 if (0 == hdr->total_len)
2026                         wqe_size += sizeof(struct ocrdma_sge);
2027                 hdr->cw |= (OCRDMA_TYPE_INLINE << OCRDMA_WQE_TYPE_SHIFT);
2028         } else {
2029                 ocrdma_build_sges(hdr, sge, wr->num_sge, wr->sg_list);
2030                 if (wr->num_sge)
2031                         wqe_size += (wr->num_sge * sizeof(struct ocrdma_sge));
2032                 else
2033                         wqe_size += sizeof(struct ocrdma_sge);
2034                 hdr->cw |= (OCRDMA_TYPE_LKEY << OCRDMA_WQE_TYPE_SHIFT);
2035         }
2036         hdr->cw |= ((wqe_size / OCRDMA_WQE_STRIDE) << OCRDMA_WQE_SIZE_SHIFT);
2037         return 0;
2038 }
2039
2040 static int ocrdma_build_send(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
2041                              const struct ib_send_wr *wr)
2042 {
2043         int status;
2044         struct ocrdma_sge *sge;
2045         u32 wqe_size = sizeof(*hdr);
2046
2047         if (qp->qp_type == IB_QPT_UD || qp->qp_type == IB_QPT_GSI) {
2048                 ocrdma_build_ud_hdr(qp, hdr, wr);
2049                 sge = (struct ocrdma_sge *)(hdr + 2);
2050                 wqe_size += sizeof(struct ocrdma_ewqe_ud_hdr);
2051         } else {
2052                 sge = (struct ocrdma_sge *)(hdr + 1);
2053         }
2054
2055         status = ocrdma_build_inline_sges(qp, hdr, sge, wr, wqe_size);
2056         return status;
2057 }
2058
2059 static int ocrdma_build_write(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
2060                               const struct ib_send_wr *wr)
2061 {
2062         int status;
2063         struct ocrdma_sge *ext_rw = (struct ocrdma_sge *)(hdr + 1);
2064         struct ocrdma_sge *sge = ext_rw + 1;
2065         u32 wqe_size = sizeof(*hdr) + sizeof(*ext_rw);
2066
2067         status = ocrdma_build_inline_sges(qp, hdr, sge, wr, wqe_size);
2068         if (status)
2069                 return status;
2070         ext_rw->addr_lo = rdma_wr(wr)->remote_addr;
2071         ext_rw->addr_hi = upper_32_bits(rdma_wr(wr)->remote_addr);
2072         ext_rw->lrkey = rdma_wr(wr)->rkey;
2073         ext_rw->len = hdr->total_len;
2074         return 0;
2075 }
2076
2077 static void ocrdma_build_read(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
2078                               const struct ib_send_wr *wr)
2079 {
2080         struct ocrdma_sge *ext_rw = (struct ocrdma_sge *)(hdr + 1);
2081         struct ocrdma_sge *sge = ext_rw + 1;
2082         u32 wqe_size = ((wr->num_sge + 1) * sizeof(struct ocrdma_sge)) +
2083             sizeof(struct ocrdma_hdr_wqe);
2084
2085         ocrdma_build_sges(hdr, sge, wr->num_sge, wr->sg_list);
2086         hdr->cw |= ((wqe_size / OCRDMA_WQE_STRIDE) << OCRDMA_WQE_SIZE_SHIFT);
2087         hdr->cw |= (OCRDMA_READ << OCRDMA_WQE_OPCODE_SHIFT);
2088         hdr->cw |= (OCRDMA_TYPE_LKEY << OCRDMA_WQE_TYPE_SHIFT);
2089
2090         ext_rw->addr_lo = rdma_wr(wr)->remote_addr;
2091         ext_rw->addr_hi = upper_32_bits(rdma_wr(wr)->remote_addr);
2092         ext_rw->lrkey = rdma_wr(wr)->rkey;
2093         ext_rw->len = hdr->total_len;
2094 }
2095
2096 static int get_encoded_page_size(int pg_sz)
2097 {
2098         /* Max size is 256M 4096 << 16 */
2099         int i = 0;
2100         for (; i < 17; i++)
2101                 if (pg_sz == (4096 << i))
2102                         break;
2103         return i;
2104 }
2105
2106 static int ocrdma_build_reg(struct ocrdma_qp *qp,
2107                             struct ocrdma_hdr_wqe *hdr,
2108                             const struct ib_reg_wr *wr)
2109 {
2110         u64 fbo;
2111         struct ocrdma_ewqe_fr *fast_reg = (struct ocrdma_ewqe_fr *)(hdr + 1);
2112         struct ocrdma_mr *mr = get_ocrdma_mr(wr->mr);
2113         struct ocrdma_pbl *pbl_tbl = mr->hwmr.pbl_table;
2114         struct ocrdma_pbe *pbe;
2115         u32 wqe_size = sizeof(*fast_reg) + sizeof(*hdr);
2116         int num_pbes = 0, i;
2117
2118         wqe_size = roundup(wqe_size, OCRDMA_WQE_ALIGN_BYTES);
2119
2120         hdr->cw |= (OCRDMA_FR_MR << OCRDMA_WQE_OPCODE_SHIFT);
2121         hdr->cw |= ((wqe_size / OCRDMA_WQE_STRIDE) << OCRDMA_WQE_SIZE_SHIFT);
2122
2123         if (wr->access & IB_ACCESS_LOCAL_WRITE)
2124                 hdr->rsvd_lkey_flags |= OCRDMA_LKEY_FLAG_LOCAL_WR;
2125         if (wr->access & IB_ACCESS_REMOTE_WRITE)
2126                 hdr->rsvd_lkey_flags |= OCRDMA_LKEY_FLAG_REMOTE_WR;
2127         if (wr->access & IB_ACCESS_REMOTE_READ)
2128                 hdr->rsvd_lkey_flags |= OCRDMA_LKEY_FLAG_REMOTE_RD;
2129         hdr->lkey = wr->key;
2130         hdr->total_len = mr->ibmr.length;
2131
2132         fbo = mr->ibmr.iova - mr->pages[0];
2133
2134         fast_reg->va_hi = upper_32_bits(mr->ibmr.iova);
2135         fast_reg->va_lo = (u32) (mr->ibmr.iova & 0xffffffff);
2136         fast_reg->fbo_hi = upper_32_bits(fbo);
2137         fast_reg->fbo_lo = (u32) fbo & 0xffffffff;
2138         fast_reg->num_sges = mr->npages;
2139         fast_reg->size_sge = get_encoded_page_size(mr->ibmr.page_size);
2140
2141         pbe = pbl_tbl->va;
2142         for (i = 0; i < mr->npages; i++) {
2143                 u64 buf_addr = mr->pages[i];
2144
2145                 pbe->pa_lo = cpu_to_le32((u32) (buf_addr & PAGE_MASK));
2146                 pbe->pa_hi = cpu_to_le32((u32) upper_32_bits(buf_addr));
2147                 num_pbes += 1;
2148                 pbe++;
2149
2150                 /* if the pbl is full storing the pbes,
2151                  * move to next pbl.
2152                 */
2153                 if (num_pbes == (mr->hwmr.pbl_size/sizeof(u64))) {
2154                         pbl_tbl++;
2155                         pbe = (struct ocrdma_pbe *)pbl_tbl->va;
2156                 }
2157         }
2158
2159         return 0;
2160 }
2161
2162 static void ocrdma_ring_sq_db(struct ocrdma_qp *qp)
2163 {
2164         u32 val = qp->sq.dbid | (1 << OCRDMA_DB_SQ_SHIFT);
2165
2166         iowrite32(val, qp->sq_db);
2167 }
2168
2169 int ocrdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
2170                      const struct ib_send_wr **bad_wr)
2171 {
2172         int status = 0;
2173         struct ocrdma_qp *qp = get_ocrdma_qp(ibqp);
2174         struct ocrdma_hdr_wqe *hdr;
2175         unsigned long flags;
2176
2177         spin_lock_irqsave(&qp->q_lock, flags);
2178         if (qp->state != OCRDMA_QPS_RTS && qp->state != OCRDMA_QPS_SQD) {
2179                 spin_unlock_irqrestore(&qp->q_lock, flags);
2180                 *bad_wr = wr;
2181                 return -EINVAL;
2182         }
2183
2184         while (wr) {
2185                 if (qp->qp_type == IB_QPT_UD &&
2186                     (wr->opcode != IB_WR_SEND &&
2187                      wr->opcode != IB_WR_SEND_WITH_IMM)) {
2188                         *bad_wr = wr;
2189                         status = -EINVAL;
2190                         break;
2191                 }
2192                 if (ocrdma_hwq_free_cnt(&qp->sq) == 0 ||
2193                     wr->num_sge > qp->sq.max_sges) {
2194                         *bad_wr = wr;
2195                         status = -ENOMEM;
2196                         break;
2197                 }
2198                 hdr = ocrdma_hwq_head(&qp->sq);
2199                 hdr->cw = 0;
2200                 if (wr->send_flags & IB_SEND_SIGNALED || qp->signaled)
2201                         hdr->cw |= (OCRDMA_FLAG_SIG << OCRDMA_WQE_FLAGS_SHIFT);
2202                 if (wr->send_flags & IB_SEND_FENCE)
2203                         hdr->cw |=
2204                             (OCRDMA_FLAG_FENCE_L << OCRDMA_WQE_FLAGS_SHIFT);
2205                 if (wr->send_flags & IB_SEND_SOLICITED)
2206                         hdr->cw |=
2207                             (OCRDMA_FLAG_SOLICIT << OCRDMA_WQE_FLAGS_SHIFT);
2208                 hdr->total_len = 0;
2209                 switch (wr->opcode) {
2210                 case IB_WR_SEND_WITH_IMM:
2211                         hdr->cw |= (OCRDMA_FLAG_IMM << OCRDMA_WQE_FLAGS_SHIFT);
2212                         hdr->immdt = ntohl(wr->ex.imm_data);
2213                         /* fall through */
2214                 case IB_WR_SEND:
2215                         hdr->cw |= (OCRDMA_SEND << OCRDMA_WQE_OPCODE_SHIFT);
2216                         ocrdma_build_send(qp, hdr, wr);
2217                         break;
2218                 case IB_WR_SEND_WITH_INV:
2219                         hdr->cw |= (OCRDMA_FLAG_INV << OCRDMA_WQE_FLAGS_SHIFT);
2220                         hdr->cw |= (OCRDMA_SEND << OCRDMA_WQE_OPCODE_SHIFT);
2221                         hdr->lkey = wr->ex.invalidate_rkey;
2222                         status = ocrdma_build_send(qp, hdr, wr);
2223                         break;
2224                 case IB_WR_RDMA_WRITE_WITH_IMM:
2225                         hdr->cw |= (OCRDMA_FLAG_IMM << OCRDMA_WQE_FLAGS_SHIFT);
2226                         hdr->immdt = ntohl(wr->ex.imm_data);
2227                         /* fall through */
2228                 case IB_WR_RDMA_WRITE:
2229                         hdr->cw |= (OCRDMA_WRITE << OCRDMA_WQE_OPCODE_SHIFT);
2230                         status = ocrdma_build_write(qp, hdr, wr);
2231                         break;
2232                 case IB_WR_RDMA_READ:
2233                         ocrdma_build_read(qp, hdr, wr);
2234                         break;
2235                 case IB_WR_LOCAL_INV:
2236                         hdr->cw |=
2237                             (OCRDMA_LKEY_INV << OCRDMA_WQE_OPCODE_SHIFT);
2238                         hdr->cw |= ((sizeof(struct ocrdma_hdr_wqe) +
2239                                         sizeof(struct ocrdma_sge)) /
2240                                 OCRDMA_WQE_STRIDE) << OCRDMA_WQE_SIZE_SHIFT;
2241                         hdr->lkey = wr->ex.invalidate_rkey;
2242                         break;
2243                 case IB_WR_REG_MR:
2244                         status = ocrdma_build_reg(qp, hdr, reg_wr(wr));
2245                         break;
2246                 default:
2247                         status = -EINVAL;
2248                         break;
2249                 }
2250                 if (status) {
2251                         *bad_wr = wr;
2252                         break;
2253                 }
2254                 if (wr->send_flags & IB_SEND_SIGNALED || qp->signaled)
2255                         qp->wqe_wr_id_tbl[qp->sq.head].signaled = 1;
2256                 else
2257                         qp->wqe_wr_id_tbl[qp->sq.head].signaled = 0;
2258                 qp->wqe_wr_id_tbl[qp->sq.head].wrid = wr->wr_id;
2259                 ocrdma_cpu_to_le32(hdr, ((hdr->cw >> OCRDMA_WQE_SIZE_SHIFT) &
2260                                    OCRDMA_WQE_SIZE_MASK) * OCRDMA_WQE_STRIDE);
2261                 /* make sure wqe is written before adapter can access it */
2262                 wmb();
2263                 /* inform hw to start processing it */
2264                 ocrdma_ring_sq_db(qp);
2265
2266                 /* update pointer, counter for next wr */
2267                 ocrdma_hwq_inc_head(&qp->sq);
2268                 wr = wr->next;
2269         }
2270         spin_unlock_irqrestore(&qp->q_lock, flags);
2271         return status;
2272 }
2273
2274 static void ocrdma_ring_rq_db(struct ocrdma_qp *qp)
2275 {
2276         u32 val = qp->rq.dbid | (1 << OCRDMA_DB_RQ_SHIFT);
2277
2278         iowrite32(val, qp->rq_db);
2279 }
2280
2281 static void ocrdma_build_rqe(struct ocrdma_hdr_wqe *rqe,
2282                              const struct ib_recv_wr *wr, u16 tag)
2283 {
2284         u32 wqe_size = 0;
2285         struct ocrdma_sge *sge;
2286         if (wr->num_sge)
2287                 wqe_size = (wr->num_sge * sizeof(*sge)) + sizeof(*rqe);
2288         else
2289                 wqe_size = sizeof(*sge) + sizeof(*rqe);
2290
2291         rqe->cw = ((wqe_size / OCRDMA_WQE_STRIDE) <<
2292                                 OCRDMA_WQE_SIZE_SHIFT);
2293         rqe->cw |= (OCRDMA_FLAG_SIG << OCRDMA_WQE_FLAGS_SHIFT);
2294         rqe->cw |= (OCRDMA_TYPE_LKEY << OCRDMA_WQE_TYPE_SHIFT);
2295         rqe->total_len = 0;
2296         rqe->rsvd_tag = tag;
2297         sge = (struct ocrdma_sge *)(rqe + 1);
2298         ocrdma_build_sges(rqe, sge, wr->num_sge, wr->sg_list);
2299         ocrdma_cpu_to_le32(rqe, wqe_size);
2300 }
2301
2302 int ocrdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
2303                      const struct ib_recv_wr **bad_wr)
2304 {
2305         int status = 0;
2306         unsigned long flags;
2307         struct ocrdma_qp *qp = get_ocrdma_qp(ibqp);
2308         struct ocrdma_hdr_wqe *rqe;
2309
2310         spin_lock_irqsave(&qp->q_lock, flags);
2311         if (qp->state == OCRDMA_QPS_RST || qp->state == OCRDMA_QPS_ERR) {
2312                 spin_unlock_irqrestore(&qp->q_lock, flags);
2313                 *bad_wr = wr;
2314                 return -EINVAL;
2315         }
2316         while (wr) {
2317                 if (ocrdma_hwq_free_cnt(&qp->rq) == 0 ||
2318                     wr->num_sge > qp->rq.max_sges) {
2319                         *bad_wr = wr;
2320                         status = -ENOMEM;
2321                         break;
2322                 }
2323                 rqe = ocrdma_hwq_head(&qp->rq);
2324                 ocrdma_build_rqe(rqe, wr, 0);
2325
2326                 qp->rqe_wr_id_tbl[qp->rq.head] = wr->wr_id;
2327                 /* make sure rqe is written before adapter can access it */
2328                 wmb();
2329
2330                 /* inform hw to start processing it */
2331                 ocrdma_ring_rq_db(qp);
2332
2333                 /* update pointer, counter for next wr */
2334                 ocrdma_hwq_inc_head(&qp->rq);
2335                 wr = wr->next;
2336         }
2337         spin_unlock_irqrestore(&qp->q_lock, flags);
2338         return status;
2339 }
2340
2341 /* cqe for srq's rqe can potentially arrive out of order.
2342  * index gives the entry in the shadow table where to store
2343  * the wr_id. tag/index is returned in cqe to reference back
2344  * for a given rqe.
2345  */
2346 static int ocrdma_srq_get_idx(struct ocrdma_srq *srq)
2347 {
2348         int row = 0;
2349         int indx = 0;
2350
2351         for (row = 0; row < srq->bit_fields_len; row++) {
2352                 if (srq->idx_bit_fields[row]) {
2353                         indx = ffs(srq->idx_bit_fields[row]);
2354                         indx = (row * 32) + (indx - 1);
2355                         BUG_ON(indx >= srq->rq.max_cnt);
2356                         ocrdma_srq_toggle_bit(srq, indx);
2357                         break;
2358                 }
2359         }
2360
2361         BUG_ON(row == srq->bit_fields_len);
2362         return indx + 1; /* Use from index 1 */
2363 }
2364
2365 static void ocrdma_ring_srq_db(struct ocrdma_srq *srq)
2366 {
2367         u32 val = srq->rq.dbid | (1 << 16);
2368
2369         iowrite32(val, srq->db + OCRDMA_DB_GEN2_SRQ_OFFSET);
2370 }
2371
2372 int ocrdma_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
2373                          const struct ib_recv_wr **bad_wr)
2374 {
2375         int status = 0;
2376         unsigned long flags;
2377         struct ocrdma_srq *srq;
2378         struct ocrdma_hdr_wqe *rqe;
2379         u16 tag;
2380
2381         srq = get_ocrdma_srq(ibsrq);
2382
2383         spin_lock_irqsave(&srq->q_lock, flags);
2384         while (wr) {
2385                 if (ocrdma_hwq_free_cnt(&srq->rq) == 0 ||
2386                     wr->num_sge > srq->rq.max_sges) {
2387                         status = -ENOMEM;
2388                         *bad_wr = wr;
2389                         break;
2390                 }
2391                 tag = ocrdma_srq_get_idx(srq);
2392                 rqe = ocrdma_hwq_head(&srq->rq);
2393                 ocrdma_build_rqe(rqe, wr, tag);
2394
2395                 srq->rqe_wr_id_tbl[tag] = wr->wr_id;
2396                 /* make sure rqe is written before adapter can perform DMA */
2397                 wmb();
2398                 /* inform hw to start processing it */
2399                 ocrdma_ring_srq_db(srq);
2400                 /* update pointer, counter for next wr */
2401                 ocrdma_hwq_inc_head(&srq->rq);
2402                 wr = wr->next;
2403         }
2404         spin_unlock_irqrestore(&srq->q_lock, flags);
2405         return status;
2406 }
2407
2408 static enum ib_wc_status ocrdma_to_ibwc_err(u16 status)
2409 {
2410         enum ib_wc_status ibwc_status;
2411
2412         switch (status) {
2413         case OCRDMA_CQE_GENERAL_ERR:
2414                 ibwc_status = IB_WC_GENERAL_ERR;
2415                 break;
2416         case OCRDMA_CQE_LOC_LEN_ERR:
2417                 ibwc_status = IB_WC_LOC_LEN_ERR;
2418                 break;
2419         case OCRDMA_CQE_LOC_QP_OP_ERR:
2420                 ibwc_status = IB_WC_LOC_QP_OP_ERR;
2421                 break;
2422         case OCRDMA_CQE_LOC_EEC_OP_ERR:
2423                 ibwc_status = IB_WC_LOC_EEC_OP_ERR;
2424                 break;
2425         case OCRDMA_CQE_LOC_PROT_ERR:
2426                 ibwc_status = IB_WC_LOC_PROT_ERR;
2427                 break;
2428         case OCRDMA_CQE_WR_FLUSH_ERR:
2429                 ibwc_status = IB_WC_WR_FLUSH_ERR;
2430                 break;
2431         case OCRDMA_CQE_MW_BIND_ERR:
2432                 ibwc_status = IB_WC_MW_BIND_ERR;
2433                 break;
2434         case OCRDMA_CQE_BAD_RESP_ERR:
2435                 ibwc_status = IB_WC_BAD_RESP_ERR;
2436                 break;
2437         case OCRDMA_CQE_LOC_ACCESS_ERR:
2438                 ibwc_status = IB_WC_LOC_ACCESS_ERR;
2439                 break;
2440         case OCRDMA_CQE_REM_INV_REQ_ERR:
2441                 ibwc_status = IB_WC_REM_INV_REQ_ERR;
2442                 break;
2443         case OCRDMA_CQE_REM_ACCESS_ERR:
2444                 ibwc_status = IB_WC_REM_ACCESS_ERR;
2445                 break;
2446         case OCRDMA_CQE_REM_OP_ERR:
2447                 ibwc_status = IB_WC_REM_OP_ERR;
2448                 break;
2449         case OCRDMA_CQE_RETRY_EXC_ERR:
2450                 ibwc_status = IB_WC_RETRY_EXC_ERR;
2451                 break;
2452         case OCRDMA_CQE_RNR_RETRY_EXC_ERR:
2453                 ibwc_status = IB_WC_RNR_RETRY_EXC_ERR;
2454                 break;
2455         case OCRDMA_CQE_LOC_RDD_VIOL_ERR:
2456                 ibwc_status = IB_WC_LOC_RDD_VIOL_ERR;
2457                 break;
2458         case OCRDMA_CQE_REM_INV_RD_REQ_ERR:
2459                 ibwc_status = IB_WC_REM_INV_RD_REQ_ERR;
2460                 break;
2461         case OCRDMA_CQE_REM_ABORT_ERR:
2462                 ibwc_status = IB_WC_REM_ABORT_ERR;
2463                 break;
2464         case OCRDMA_CQE_INV_EECN_ERR:
2465                 ibwc_status = IB_WC_INV_EECN_ERR;
2466                 break;
2467         case OCRDMA_CQE_INV_EEC_STATE_ERR:
2468                 ibwc_status = IB_WC_INV_EEC_STATE_ERR;
2469                 break;
2470         case OCRDMA_CQE_FATAL_ERR:
2471                 ibwc_status = IB_WC_FATAL_ERR;
2472                 break;
2473         case OCRDMA_CQE_RESP_TIMEOUT_ERR:
2474                 ibwc_status = IB_WC_RESP_TIMEOUT_ERR;
2475                 break;
2476         default:
2477                 ibwc_status = IB_WC_GENERAL_ERR;
2478                 break;
2479         }
2480         return ibwc_status;
2481 }
2482
2483 static void ocrdma_update_wc(struct ocrdma_qp *qp, struct ib_wc *ibwc,
2484                       u32 wqe_idx)
2485 {
2486         struct ocrdma_hdr_wqe *hdr;
2487         struct ocrdma_sge *rw;
2488         int opcode;
2489
2490         hdr = ocrdma_hwq_head_from_idx(&qp->sq, wqe_idx);
2491
2492         ibwc->wr_id = qp->wqe_wr_id_tbl[wqe_idx].wrid;
2493         /* Undo the hdr->cw swap */
2494         opcode = le32_to_cpu(hdr->cw) & OCRDMA_WQE_OPCODE_MASK;
2495         switch (opcode) {
2496         case OCRDMA_WRITE:
2497                 ibwc->opcode = IB_WC_RDMA_WRITE;
2498                 break;
2499         case OCRDMA_READ:
2500                 rw = (struct ocrdma_sge *)(hdr + 1);
2501                 ibwc->opcode = IB_WC_RDMA_READ;
2502                 ibwc->byte_len = rw->len;
2503                 break;
2504         case OCRDMA_SEND:
2505                 ibwc->opcode = IB_WC_SEND;
2506                 break;
2507         case OCRDMA_FR_MR:
2508                 ibwc->opcode = IB_WC_REG_MR;
2509                 break;
2510         case OCRDMA_LKEY_INV:
2511                 ibwc->opcode = IB_WC_LOCAL_INV;
2512                 break;
2513         default:
2514                 ibwc->status = IB_WC_GENERAL_ERR;
2515                 pr_err("%s() invalid opcode received = 0x%x\n",
2516                        __func__, hdr->cw & OCRDMA_WQE_OPCODE_MASK);
2517                 break;
2518         }
2519 }
2520
2521 static void ocrdma_set_cqe_status_flushed(struct ocrdma_qp *qp,
2522                                                 struct ocrdma_cqe *cqe)
2523 {
2524         if (is_cqe_for_sq(cqe)) {
2525                 cqe->flags_status_srcqpn = cpu_to_le32(le32_to_cpu(
2526                                 cqe->flags_status_srcqpn) &
2527                                         ~OCRDMA_CQE_STATUS_MASK);
2528                 cqe->flags_status_srcqpn = cpu_to_le32(le32_to_cpu(
2529                                 cqe->flags_status_srcqpn) |
2530                                 (OCRDMA_CQE_WR_FLUSH_ERR <<
2531                                         OCRDMA_CQE_STATUS_SHIFT));
2532         } else {
2533                 if (qp->qp_type == IB_QPT_UD || qp->qp_type == IB_QPT_GSI) {
2534                         cqe->flags_status_srcqpn = cpu_to_le32(le32_to_cpu(
2535                                         cqe->flags_status_srcqpn) &
2536                                                 ~OCRDMA_CQE_UD_STATUS_MASK);
2537                         cqe->flags_status_srcqpn = cpu_to_le32(le32_to_cpu(
2538                                         cqe->flags_status_srcqpn) |
2539                                         (OCRDMA_CQE_WR_FLUSH_ERR <<
2540                                                 OCRDMA_CQE_UD_STATUS_SHIFT));
2541                 } else {
2542                         cqe->flags_status_srcqpn = cpu_to_le32(le32_to_cpu(
2543                                         cqe->flags_status_srcqpn) &
2544                                                 ~OCRDMA_CQE_STATUS_MASK);
2545                         cqe->flags_status_srcqpn = cpu_to_le32(le32_to_cpu(
2546                                         cqe->flags_status_srcqpn) |
2547                                         (OCRDMA_CQE_WR_FLUSH_ERR <<
2548                                                 OCRDMA_CQE_STATUS_SHIFT));
2549                 }
2550         }
2551 }
2552
2553 static bool ocrdma_update_err_cqe(struct ib_wc *ibwc, struct ocrdma_cqe *cqe,
2554                                   struct ocrdma_qp *qp, int status)
2555 {
2556         bool expand = false;
2557
2558         ibwc->byte_len = 0;
2559         ibwc->qp = &qp->ibqp;
2560         ibwc->status = ocrdma_to_ibwc_err(status);
2561
2562         ocrdma_flush_qp(qp);
2563         ocrdma_qp_state_change(qp, IB_QPS_ERR, NULL);
2564
2565         /* if wqe/rqe pending for which cqe needs to be returned,
2566          * trigger inflating it.
2567          */
2568         if (!is_hw_rq_empty(qp) || !is_hw_sq_empty(qp)) {
2569                 expand = true;
2570                 ocrdma_set_cqe_status_flushed(qp, cqe);
2571         }
2572         return expand;
2573 }
2574
2575 static int ocrdma_update_err_rcqe(struct ib_wc *ibwc, struct ocrdma_cqe *cqe,
2576                                   struct ocrdma_qp *qp, int status)
2577 {
2578         ibwc->opcode = IB_WC_RECV;
2579         ibwc->wr_id = qp->rqe_wr_id_tbl[qp->rq.tail];
2580         ocrdma_hwq_inc_tail(&qp->rq);
2581
2582         return ocrdma_update_err_cqe(ibwc, cqe, qp, status);
2583 }
2584
2585 static int ocrdma_update_err_scqe(struct ib_wc *ibwc, struct ocrdma_cqe *cqe,
2586                                   struct ocrdma_qp *qp, int status)
2587 {
2588         ocrdma_update_wc(qp, ibwc, qp->sq.tail);
2589         ocrdma_hwq_inc_tail(&qp->sq);
2590
2591         return ocrdma_update_err_cqe(ibwc, cqe, qp, status);
2592 }
2593
2594
2595 static bool ocrdma_poll_err_scqe(struct ocrdma_qp *qp,
2596                                  struct ocrdma_cqe *cqe, struct ib_wc *ibwc,
2597                                  bool *polled, bool *stop)
2598 {
2599         bool expand;
2600         struct ocrdma_dev *dev = get_ocrdma_dev(qp->ibqp.device);
2601         int status = (le32_to_cpu(cqe->flags_status_srcqpn) &
2602                 OCRDMA_CQE_STATUS_MASK) >> OCRDMA_CQE_STATUS_SHIFT;
2603         if (status < OCRDMA_MAX_CQE_ERR)
2604                 atomic_inc(&dev->cqe_err_stats[status]);
2605
2606         /* when hw sq is empty, but rq is not empty, so we continue
2607          * to keep the cqe in order to get the cq event again.
2608          */
2609         if (is_hw_sq_empty(qp) && !is_hw_rq_empty(qp)) {
2610                 /* when cq for rq and sq is same, it is safe to return
2611                  * flush cqe for RQEs.
2612                  */
2613                 if (!qp->srq && (qp->sq_cq == qp->rq_cq)) {
2614                         *polled = true;
2615                         status = OCRDMA_CQE_WR_FLUSH_ERR;
2616                         expand = ocrdma_update_err_rcqe(ibwc, cqe, qp, status);
2617                 } else {
2618                         /* stop processing further cqe as this cqe is used for
2619                          * triggering cq event on buddy cq of RQ.
2620                          * When QP is destroyed, this cqe will be removed
2621                          * from the cq's hardware q.
2622                          */
2623                         *polled = false;
2624                         *stop = true;
2625                         expand = false;
2626                 }
2627         } else if (is_hw_sq_empty(qp)) {
2628                 /* Do nothing */
2629                 expand = false;
2630                 *polled = false;
2631                 *stop = false;
2632         } else {
2633                 *polled = true;
2634                 expand = ocrdma_update_err_scqe(ibwc, cqe, qp, status);
2635         }
2636         return expand;
2637 }
2638
2639 static bool ocrdma_poll_success_scqe(struct ocrdma_qp *qp,
2640                                      struct ocrdma_cqe *cqe,
2641                                      struct ib_wc *ibwc, bool *polled)
2642 {
2643         bool expand = false;
2644         int tail = qp->sq.tail;
2645         u32 wqe_idx;
2646
2647         if (!qp->wqe_wr_id_tbl[tail].signaled) {
2648                 *polled = false;    /* WC cannot be consumed yet */
2649         } else {
2650                 ibwc->status = IB_WC_SUCCESS;
2651                 ibwc->wc_flags = 0;
2652                 ibwc->qp = &qp->ibqp;
2653                 ocrdma_update_wc(qp, ibwc, tail);
2654                 *polled = true;
2655         }
2656         wqe_idx = (le32_to_cpu(cqe->wq.wqeidx) &
2657                         OCRDMA_CQE_WQEIDX_MASK) & qp->sq.max_wqe_idx;
2658         if (tail != wqe_idx)
2659                 expand = true; /* Coalesced CQE can't be consumed yet */
2660
2661         ocrdma_hwq_inc_tail(&qp->sq);
2662         return expand;
2663 }
2664
2665 static bool ocrdma_poll_scqe(struct ocrdma_qp *qp, struct ocrdma_cqe *cqe,
2666                              struct ib_wc *ibwc, bool *polled, bool *stop)
2667 {
2668         int status;
2669         bool expand;
2670
2671         status = (le32_to_cpu(cqe->flags_status_srcqpn) &
2672                 OCRDMA_CQE_STATUS_MASK) >> OCRDMA_CQE_STATUS_SHIFT;
2673
2674         if (status == OCRDMA_CQE_SUCCESS)
2675                 expand = ocrdma_poll_success_scqe(qp, cqe, ibwc, polled);
2676         else
2677                 expand = ocrdma_poll_err_scqe(qp, cqe, ibwc, polled, stop);
2678         return expand;
2679 }
2680
2681 static int ocrdma_update_ud_rcqe(struct ocrdma_dev *dev, struct ib_wc *ibwc,
2682                                  struct ocrdma_cqe *cqe)
2683 {
2684         int status;
2685         u16 hdr_type = 0;
2686
2687         status = (le32_to_cpu(cqe->flags_status_srcqpn) &
2688                 OCRDMA_CQE_UD_STATUS_MASK) >> OCRDMA_CQE_UD_STATUS_SHIFT;
2689         ibwc->src_qp = le32_to_cpu(cqe->flags_status_srcqpn) &
2690                                                 OCRDMA_CQE_SRCQP_MASK;
2691         ibwc->pkey_index = 0;
2692         ibwc->wc_flags = IB_WC_GRH;
2693         ibwc->byte_len = (le32_to_cpu(cqe->ud.rxlen_pkey) >>
2694                           OCRDMA_CQE_UD_XFER_LEN_SHIFT) &
2695                           OCRDMA_CQE_UD_XFER_LEN_MASK;
2696
2697         if (ocrdma_is_udp_encap_supported(dev)) {
2698                 hdr_type = (le32_to_cpu(cqe->ud.rxlen_pkey) >>
2699                             OCRDMA_CQE_UD_L3TYPE_SHIFT) &
2700                             OCRDMA_CQE_UD_L3TYPE_MASK;
2701                 ibwc->wc_flags |= IB_WC_WITH_NETWORK_HDR_TYPE;
2702                 ibwc->network_hdr_type = hdr_type;
2703         }
2704
2705         return status;
2706 }
2707
2708 static void ocrdma_update_free_srq_cqe(struct ib_wc *ibwc,
2709                                        struct ocrdma_cqe *cqe,
2710                                        struct ocrdma_qp *qp)
2711 {
2712         unsigned long flags;
2713         struct ocrdma_srq *srq;
2714         u32 wqe_idx;
2715
2716         srq = get_ocrdma_srq(qp->ibqp.srq);
2717         wqe_idx = (le32_to_cpu(cqe->rq.buftag_qpn) >>
2718                 OCRDMA_CQE_BUFTAG_SHIFT) & srq->rq.max_wqe_idx;
2719         BUG_ON(wqe_idx < 1);
2720
2721         ibwc->wr_id = srq->rqe_wr_id_tbl[wqe_idx];
2722         spin_lock_irqsave(&srq->q_lock, flags);
2723         ocrdma_srq_toggle_bit(srq, wqe_idx - 1);
2724         spin_unlock_irqrestore(&srq->q_lock, flags);
2725         ocrdma_hwq_inc_tail(&srq->rq);
2726 }
2727
2728 static bool ocrdma_poll_err_rcqe(struct ocrdma_qp *qp, struct ocrdma_cqe *cqe,
2729                                 struct ib_wc *ibwc, bool *polled, bool *stop,
2730                                 int status)
2731 {
2732         bool expand;
2733         struct ocrdma_dev *dev = get_ocrdma_dev(qp->ibqp.device);
2734
2735         if (status < OCRDMA_MAX_CQE_ERR)
2736                 atomic_inc(&dev->cqe_err_stats[status]);
2737
2738         /* when hw_rq is empty, but wq is not empty, so continue
2739          * to keep the cqe to get the cq event again.
2740          */
2741         if (is_hw_rq_empty(qp) && !is_hw_sq_empty(qp)) {
2742                 if (!qp->srq && (qp->sq_cq == qp->rq_cq)) {
2743                         *polled = true;
2744                         status = OCRDMA_CQE_WR_FLUSH_ERR;
2745                         expand = ocrdma_update_err_scqe(ibwc, cqe, qp, status);
2746                 } else {
2747                         *polled = false;
2748                         *stop = true;
2749                         expand = false;
2750                 }
2751         } else if (is_hw_rq_empty(qp)) {
2752                 /* Do nothing */
2753                 expand = false;
2754                 *polled = false;
2755                 *stop = false;
2756         } else {
2757                 *polled = true;
2758                 expand = ocrdma_update_err_rcqe(ibwc, cqe, qp, status);
2759         }
2760         return expand;
2761 }
2762
2763 static void ocrdma_poll_success_rcqe(struct ocrdma_qp *qp,
2764                                      struct ocrdma_cqe *cqe, struct ib_wc *ibwc)
2765 {
2766         struct ocrdma_dev *dev;
2767
2768         dev = get_ocrdma_dev(qp->ibqp.device);
2769         ibwc->opcode = IB_WC_RECV;
2770         ibwc->qp = &qp->ibqp;
2771         ibwc->status = IB_WC_SUCCESS;
2772
2773         if (qp->qp_type == IB_QPT_UD || qp->qp_type == IB_QPT_GSI)
2774                 ocrdma_update_ud_rcqe(dev, ibwc, cqe);
2775         else
2776                 ibwc->byte_len = le32_to_cpu(cqe->rq.rxlen);
2777
2778         if (is_cqe_imm(cqe)) {
2779                 ibwc->ex.imm_data = htonl(le32_to_cpu(cqe->rq.lkey_immdt));
2780                 ibwc->wc_flags |= IB_WC_WITH_IMM;
2781         } else if (is_cqe_wr_imm(cqe)) {
2782                 ibwc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
2783                 ibwc->ex.imm_data = htonl(le32_to_cpu(cqe->rq.lkey_immdt));
2784                 ibwc->wc_flags |= IB_WC_WITH_IMM;
2785         } else if (is_cqe_invalidated(cqe)) {
2786                 ibwc->ex.invalidate_rkey = le32_to_cpu(cqe->rq.lkey_immdt);
2787                 ibwc->wc_flags |= IB_WC_WITH_INVALIDATE;
2788         }
2789         if (qp->ibqp.srq) {
2790                 ocrdma_update_free_srq_cqe(ibwc, cqe, qp);
2791         } else {
2792                 ibwc->wr_id = qp->rqe_wr_id_tbl[qp->rq.tail];
2793                 ocrdma_hwq_inc_tail(&qp->rq);
2794         }
2795 }
2796
2797 static bool ocrdma_poll_rcqe(struct ocrdma_qp *qp, struct ocrdma_cqe *cqe,
2798                              struct ib_wc *ibwc, bool *polled, bool *stop)
2799 {
2800         int status;
2801         bool expand = false;
2802
2803         ibwc->wc_flags = 0;
2804         if (qp->qp_type == IB_QPT_UD || qp->qp_type == IB_QPT_GSI) {
2805                 status = (le32_to_cpu(cqe->flags_status_srcqpn) &
2806                                         OCRDMA_CQE_UD_STATUS_MASK) >>
2807                                         OCRDMA_CQE_UD_STATUS_SHIFT;
2808         } else {
2809                 status = (le32_to_cpu(cqe->flags_status_srcqpn) &
2810                              OCRDMA_CQE_STATUS_MASK) >> OCRDMA_CQE_STATUS_SHIFT;
2811         }
2812
2813         if (status == OCRDMA_CQE_SUCCESS) {
2814                 *polled = true;
2815                 ocrdma_poll_success_rcqe(qp, cqe, ibwc);
2816         } else {
2817                 expand = ocrdma_poll_err_rcqe(qp, cqe, ibwc, polled, stop,
2818                                               status);
2819         }
2820         return expand;
2821 }
2822
2823 static void ocrdma_change_cq_phase(struct ocrdma_cq *cq, struct ocrdma_cqe *cqe,
2824                                    u16 cur_getp)
2825 {
2826         if (cq->phase_change) {
2827                 if (cur_getp == 0)
2828                         cq->phase = (~cq->phase & OCRDMA_CQE_VALID);
2829         } else {
2830                 /* clear valid bit */
2831                 cqe->flags_status_srcqpn = 0;
2832         }
2833 }
2834
2835 static int ocrdma_poll_hwcq(struct ocrdma_cq *cq, int num_entries,
2836                             struct ib_wc *ibwc)
2837 {
2838         u16 qpn = 0;
2839         int i = 0;
2840         bool expand = false;
2841         int polled_hw_cqes = 0;
2842         struct ocrdma_qp *qp = NULL;
2843         struct ocrdma_dev *dev = get_ocrdma_dev(cq->ibcq.device);
2844         struct ocrdma_cqe *cqe;
2845         u16 cur_getp; bool polled = false; bool stop = false;
2846
2847         cur_getp = cq->getp;
2848         while (num_entries) {
2849                 cqe = cq->va + cur_getp;
2850                 /* check whether valid cqe or not */
2851                 if (!is_cqe_valid(cq, cqe))
2852                         break;
2853                 qpn = (le32_to_cpu(cqe->cmn.qpn) & OCRDMA_CQE_QPN_MASK);
2854                 /* ignore discarded cqe */
2855                 if (qpn == 0)
2856                         goto skip_cqe;
2857                 qp = dev->qp_tbl[qpn];
2858                 BUG_ON(qp == NULL);
2859
2860                 if (is_cqe_for_sq(cqe)) {
2861                         expand = ocrdma_poll_scqe(qp, cqe, ibwc, &polled,
2862                                                   &stop);
2863                 } else {
2864                         expand = ocrdma_poll_rcqe(qp, cqe, ibwc, &polled,
2865                                                   &stop);
2866                 }
2867                 if (expand)
2868                         goto expand_cqe;
2869                 if (stop)
2870                         goto stop_cqe;
2871                 /* clear qpn to avoid duplicate processing by discard_cqe() */
2872                 cqe->cmn.qpn = 0;
2873 skip_cqe:
2874                 polled_hw_cqes += 1;
2875                 cur_getp = (cur_getp + 1) % cq->max_hw_cqe;
2876                 ocrdma_change_cq_phase(cq, cqe, cur_getp);
2877 expand_cqe:
2878                 if (polled) {
2879                         num_entries -= 1;
2880                         i += 1;
2881                         ibwc = ibwc + 1;
2882                         polled = false;
2883                 }
2884         }
2885 stop_cqe:
2886         cq->getp = cur_getp;
2887
2888         if (polled_hw_cqes)
2889                 ocrdma_ring_cq_db(dev, cq->id, false, false, polled_hw_cqes);
2890
2891         return i;
2892 }
2893
2894 /* insert error cqe if the QP's SQ or RQ's CQ matches the CQ under poll. */
2895 static int ocrdma_add_err_cqe(struct ocrdma_cq *cq, int num_entries,
2896                               struct ocrdma_qp *qp, struct ib_wc *ibwc)
2897 {
2898         int err_cqes = 0;
2899
2900         while (num_entries) {
2901                 if (is_hw_sq_empty(qp) && is_hw_rq_empty(qp))
2902                         break;
2903                 if (!is_hw_sq_empty(qp) && qp->sq_cq == cq) {
2904                         ocrdma_update_wc(qp, ibwc, qp->sq.tail);
2905                         ocrdma_hwq_inc_tail(&qp->sq);
2906                 } else if (!is_hw_rq_empty(qp) && qp->rq_cq == cq) {
2907                         ibwc->wr_id = qp->rqe_wr_id_tbl[qp->rq.tail];
2908                         ocrdma_hwq_inc_tail(&qp->rq);
2909                 } else {
2910                         return err_cqes;
2911                 }
2912                 ibwc->byte_len = 0;
2913                 ibwc->status = IB_WC_WR_FLUSH_ERR;
2914                 ibwc = ibwc + 1;
2915                 err_cqes += 1;
2916                 num_entries -= 1;
2917         }
2918         return err_cqes;
2919 }
2920
2921 int ocrdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
2922 {
2923         int cqes_to_poll = num_entries;
2924         struct ocrdma_cq *cq = get_ocrdma_cq(ibcq);
2925         struct ocrdma_dev *dev = get_ocrdma_dev(ibcq->device);
2926         int num_os_cqe = 0, err_cqes = 0;
2927         struct ocrdma_qp *qp;
2928         unsigned long flags;
2929
2930         /* poll cqes from adapter CQ */
2931         spin_lock_irqsave(&cq->cq_lock, flags);
2932         num_os_cqe = ocrdma_poll_hwcq(cq, cqes_to_poll, wc);
2933         spin_unlock_irqrestore(&cq->cq_lock, flags);
2934         cqes_to_poll -= num_os_cqe;
2935
2936         if (cqes_to_poll) {
2937                 wc = wc + num_os_cqe;
2938                 /* adapter returns single error cqe when qp moves to
2939                  * error state. So insert error cqes with wc_status as
2940                  * FLUSHED for pending WQEs and RQEs of QP's SQ and RQ
2941                  * respectively which uses this CQ.
2942                  */
2943                 spin_lock_irqsave(&dev->flush_q_lock, flags);
2944                 list_for_each_entry(qp, &cq->sq_head, sq_entry) {
2945                         if (cqes_to_poll == 0)
2946                                 break;
2947                         err_cqes = ocrdma_add_err_cqe(cq, cqes_to_poll, qp, wc);
2948                         cqes_to_poll -= err_cqes;
2949                         num_os_cqe += err_cqes;
2950                         wc = wc + err_cqes;
2951                 }
2952                 spin_unlock_irqrestore(&dev->flush_q_lock, flags);
2953         }
2954         return num_os_cqe;
2955 }
2956
2957 int ocrdma_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags cq_flags)
2958 {
2959         struct ocrdma_cq *cq = get_ocrdma_cq(ibcq);
2960         struct ocrdma_dev *dev = get_ocrdma_dev(ibcq->device);
2961         u16 cq_id;
2962         unsigned long flags;
2963         bool arm_needed = false, sol_needed = false;
2964
2965         cq_id = cq->id;
2966
2967         spin_lock_irqsave(&cq->cq_lock, flags);
2968         if (cq_flags & IB_CQ_NEXT_COMP || cq_flags & IB_CQ_SOLICITED)
2969                 arm_needed = true;
2970         if (cq_flags & IB_CQ_SOLICITED)
2971                 sol_needed = true;
2972
2973         ocrdma_ring_cq_db(dev, cq_id, arm_needed, sol_needed, 0);
2974         spin_unlock_irqrestore(&cq->cq_lock, flags);
2975
2976         return 0;
2977 }
2978
2979 struct ib_mr *ocrdma_alloc_mr(struct ib_pd *ibpd,
2980                               enum ib_mr_type mr_type,
2981                               u32 max_num_sg)
2982 {
2983         int status;
2984         struct ocrdma_mr *mr;
2985         struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
2986         struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
2987
2988         if (mr_type != IB_MR_TYPE_MEM_REG)
2989                 return ERR_PTR(-EINVAL);
2990
2991         if (max_num_sg > dev->attr.max_pages_per_frmr)
2992                 return ERR_PTR(-EINVAL);
2993
2994         mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2995         if (!mr)
2996                 return ERR_PTR(-ENOMEM);
2997
2998         mr->pages = kcalloc(max_num_sg, sizeof(u64), GFP_KERNEL);
2999         if (!mr->pages) {
3000                 status = -ENOMEM;
3001                 goto pl_err;
3002         }
3003
3004         status = ocrdma_get_pbl_info(dev, mr, max_num_sg);
3005         if (status)
3006                 goto pbl_err;
3007         mr->hwmr.fr_mr = 1;
3008         mr->hwmr.remote_rd = 0;
3009         mr->hwmr.remote_wr = 0;
3010         mr->hwmr.local_rd = 0;
3011         mr->hwmr.local_wr = 0;
3012         mr->hwmr.mw_bind = 0;
3013         status = ocrdma_build_pbl_tbl(dev, &mr->hwmr);
3014         if (status)
3015                 goto pbl_err;
3016         status = ocrdma_reg_mr(dev, &mr->hwmr, pd->id, 0);
3017         if (status)
3018                 goto mbx_err;
3019         mr->ibmr.rkey = mr->hwmr.lkey;
3020         mr->ibmr.lkey = mr->hwmr.lkey;
3021         dev->stag_arr[(mr->hwmr.lkey >> 8) & (OCRDMA_MAX_STAG - 1)] =
3022                 (unsigned long) mr;
3023         return &mr->ibmr;
3024 mbx_err:
3025         ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
3026 pbl_err:
3027         kfree(mr->pages);
3028 pl_err:
3029         kfree(mr);
3030         return ERR_PTR(-ENOMEM);
3031 }
3032
3033 static int ocrdma_set_page(struct ib_mr *ibmr, u64 addr)
3034 {
3035         struct ocrdma_mr *mr = get_ocrdma_mr(ibmr);
3036
3037         if (unlikely(mr->npages == mr->hwmr.num_pbes))
3038                 return -ENOMEM;
3039
3040         mr->pages[mr->npages++] = addr;
3041
3042         return 0;
3043 }
3044
3045 int ocrdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
3046                      unsigned int *sg_offset)
3047 {
3048         struct ocrdma_mr *mr = get_ocrdma_mr(ibmr);
3049
3050         mr->npages = 0;
3051
3052         return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, ocrdma_set_page);
3053 }