GNU Linux-libre 4.19.264-gnu1
[releases.git] / drivers / net / ethernet / mellanox / mlx4 / main.c
1 /*
2  * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
3  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
4  * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved.
5  * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
6  *
7  * This software is available to you under a choice of one of two
8  * licenses.  You may choose to be licensed under the terms of the GNU
9  * General Public License (GPL) Version 2, available from the file
10  * COPYING in the main directory of this source tree, or the
11  * OpenIB.org BSD license below:
12  *
13  *     Redistribution and use in source and binary forms, with or
14  *     without modification, are permitted provided that the following
15  *     conditions are met:
16  *
17  *      - Redistributions of source code must retain the above
18  *        copyright notice, this list of conditions and the following
19  *        disclaimer.
20  *
21  *      - Redistributions in binary form must reproduce the above
22  *        copyright notice, this list of conditions and the following
23  *        disclaimer in the documentation and/or other materials
24  *        provided with the distribution.
25  *
26  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33  * SOFTWARE.
34  */
35
36 #include <linux/module.h>
37 #include <linux/kernel.h>
38 #include <linux/init.h>
39 #include <linux/errno.h>
40 #include <linux/pci.h>
41 #include <linux/dma-mapping.h>
42 #include <linux/slab.h>
43 #include <linux/io-mapping.h>
44 #include <linux/delay.h>
45 #include <linux/kmod.h>
46 #include <linux/etherdevice.h>
47 #include <net/devlink.h>
48
49 #include <uapi/rdma/mlx4-abi.h>
50 #include <linux/mlx4/device.h>
51 #include <linux/mlx4/doorbell.h>
52
53 #include "mlx4.h"
54 #include "fw.h"
55 #include "icm.h"
56
57 MODULE_AUTHOR("Roland Dreier");
58 MODULE_DESCRIPTION("Mellanox ConnectX HCA low-level driver");
59 MODULE_LICENSE("Dual BSD/GPL");
60 MODULE_VERSION(DRV_VERSION);
61
62 struct workqueue_struct *mlx4_wq;
63
64 #ifdef CONFIG_MLX4_DEBUG
65
66 int mlx4_debug_level = 0;
67 module_param_named(debug_level, mlx4_debug_level, int, 0644);
68 MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0");
69
70 #endif /* CONFIG_MLX4_DEBUG */
71
72 #ifdef CONFIG_PCI_MSI
73
74 static int msi_x = 1;
75 module_param(msi_x, int, 0444);
76 MODULE_PARM_DESC(msi_x, "0 - don't use MSI-X, 1 - use MSI-X, >1 - limit number of MSI-X irqs to msi_x");
77
78 #else /* CONFIG_PCI_MSI */
79
80 #define msi_x (0)
81
82 #endif /* CONFIG_PCI_MSI */
83
84 static uint8_t num_vfs[3] = {0, 0, 0};
85 static int num_vfs_argc;
86 module_param_array(num_vfs, byte , &num_vfs_argc, 0444);
87 MODULE_PARM_DESC(num_vfs, "enable #num_vfs functions if num_vfs > 0\n"
88                           "num_vfs=port1,port2,port1+2");
89
90 static uint8_t probe_vf[3] = {0, 0, 0};
91 static int probe_vfs_argc;
92 module_param_array(probe_vf, byte, &probe_vfs_argc, 0444);
93 MODULE_PARM_DESC(probe_vf, "number of vfs to probe by pf driver (num_vfs > 0)\n"
94                            "probe_vf=port1,port2,port1+2");
95
96 static int mlx4_log_num_mgm_entry_size = MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE;
97 module_param_named(log_num_mgm_entry_size,
98                         mlx4_log_num_mgm_entry_size, int, 0444);
99 MODULE_PARM_DESC(log_num_mgm_entry_size, "log mgm size, that defines the num"
100                                          " of qp per mcg, for example:"
101                                          " 10 gives 248.range: 7 <="
102                                          " log_num_mgm_entry_size <= 12."
103                                          " To activate device managed"
104                                          " flow steering when available, set to -1");
105
106 static bool enable_64b_cqe_eqe = true;
107 module_param(enable_64b_cqe_eqe, bool, 0444);
108 MODULE_PARM_DESC(enable_64b_cqe_eqe,
109                  "Enable 64 byte CQEs/EQEs when the FW supports this (default: True)");
110
111 static bool enable_4k_uar;
112 module_param(enable_4k_uar, bool, 0444);
113 MODULE_PARM_DESC(enable_4k_uar,
114                  "Enable using 4K UAR. Should not be enabled if have VFs which do not support 4K UARs (default: false)");
115
116 #define PF_CONTEXT_BEHAVIOUR_MASK       (MLX4_FUNC_CAP_64B_EQE_CQE | \
117                                          MLX4_FUNC_CAP_EQE_CQE_STRIDE | \
118                                          MLX4_FUNC_CAP_DMFS_A0_STATIC)
119
120 #define RESET_PERSIST_MASK_FLAGS        (MLX4_FLAG_SRIOV)
121
122 static char mlx4_version[] =
123         DRV_NAME ": Mellanox ConnectX core driver v"
124         DRV_VERSION "\n";
125
126 static const struct mlx4_profile default_profile = {
127         .num_qp         = 1 << 18,
128         .num_srq        = 1 << 16,
129         .rdmarc_per_qp  = 1 << 4,
130         .num_cq         = 1 << 16,
131         .num_mcg        = 1 << 13,
132         .num_mpt        = 1 << 19,
133         .num_mtt        = 1 << 20, /* It is really num mtt segements */
134 };
135
136 static const struct mlx4_profile low_mem_profile = {
137         .num_qp         = 1 << 17,
138         .num_srq        = 1 << 6,
139         .rdmarc_per_qp  = 1 << 4,
140         .num_cq         = 1 << 8,
141         .num_mcg        = 1 << 8,
142         .num_mpt        = 1 << 9,
143         .num_mtt        = 1 << 7,
144 };
145
146 static int log_num_mac = 7;
147 module_param_named(log_num_mac, log_num_mac, int, 0444);
148 MODULE_PARM_DESC(log_num_mac, "Log2 max number of MACs per ETH port (1-7)");
149
150 static int log_num_vlan;
151 module_param_named(log_num_vlan, log_num_vlan, int, 0444);
152 MODULE_PARM_DESC(log_num_vlan, "Log2 max number of VLANs per ETH port (0-7)");
153 /* Log2 max number of VLANs per ETH port (0-7) */
154 #define MLX4_LOG_NUM_VLANS 7
155 #define MLX4_MIN_LOG_NUM_VLANS 0
156 #define MLX4_MIN_LOG_NUM_MAC 1
157
158 static bool use_prio;
159 module_param_named(use_prio, use_prio, bool, 0444);
160 MODULE_PARM_DESC(use_prio, "Enable steering by VLAN priority on ETH ports (deprecated)");
161
162 int log_mtts_per_seg = ilog2(1);
163 module_param_named(log_mtts_per_seg, log_mtts_per_seg, int, 0444);
164 MODULE_PARM_DESC(log_mtts_per_seg, "Log2 number of MTT entries per segment "
165                  "(0-7) (default: 0)");
166
167 static int port_type_array[2] = {MLX4_PORT_TYPE_NONE, MLX4_PORT_TYPE_NONE};
168 static int arr_argc = 2;
169 module_param_array(port_type_array, int, &arr_argc, 0444);
170 MODULE_PARM_DESC(port_type_array, "Array of port types: HW_DEFAULT (0) is default "
171                                 "1 for IB, 2 for Ethernet");
172
173 struct mlx4_port_config {
174         struct list_head list;
175         enum mlx4_port_type port_type[MLX4_MAX_PORTS + 1];
176         struct pci_dev *pdev;
177 };
178
179 static atomic_t pf_loading = ATOMIC_INIT(0);
180
181 static int mlx4_devlink_ierr_reset_get(struct devlink *devlink, u32 id,
182                                        struct devlink_param_gset_ctx *ctx)
183 {
184         ctx->val.vbool = !!mlx4_internal_err_reset;
185         return 0;
186 }
187
188 static int mlx4_devlink_ierr_reset_set(struct devlink *devlink, u32 id,
189                                        struct devlink_param_gset_ctx *ctx)
190 {
191         mlx4_internal_err_reset = ctx->val.vbool;
192         return 0;
193 }
194
195 static int mlx4_devlink_crdump_snapshot_get(struct devlink *devlink, u32 id,
196                                             struct devlink_param_gset_ctx *ctx)
197 {
198         struct mlx4_priv *priv = devlink_priv(devlink);
199         struct mlx4_dev *dev = &priv->dev;
200
201         ctx->val.vbool = dev->persist->crdump.snapshot_enable;
202         return 0;
203 }
204
205 static int mlx4_devlink_crdump_snapshot_set(struct devlink *devlink, u32 id,
206                                             struct devlink_param_gset_ctx *ctx)
207 {
208         struct mlx4_priv *priv = devlink_priv(devlink);
209         struct mlx4_dev *dev = &priv->dev;
210
211         dev->persist->crdump.snapshot_enable = ctx->val.vbool;
212         return 0;
213 }
214
215 static int
216 mlx4_devlink_max_macs_validate(struct devlink *devlink, u32 id,
217                                union devlink_param_value val,
218                                struct netlink_ext_ack *extack)
219 {
220         u32 value = val.vu32;
221
222         if (value < 1 || value > 128)
223                 return -ERANGE;
224
225         if (!is_power_of_2(value)) {
226                 NL_SET_ERR_MSG_MOD(extack, "max_macs supported must be power of 2");
227                 return -EINVAL;
228         }
229
230         return 0;
231 }
232
233 enum mlx4_devlink_param_id {
234         MLX4_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX,
235         MLX4_DEVLINK_PARAM_ID_ENABLE_64B_CQE_EQE,
236         MLX4_DEVLINK_PARAM_ID_ENABLE_4K_UAR,
237 };
238
239 static const struct devlink_param mlx4_devlink_params[] = {
240         DEVLINK_PARAM_GENERIC(INT_ERR_RESET,
241                               BIT(DEVLINK_PARAM_CMODE_RUNTIME) |
242                               BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
243                               mlx4_devlink_ierr_reset_get,
244                               mlx4_devlink_ierr_reset_set, NULL),
245         DEVLINK_PARAM_GENERIC(MAX_MACS,
246                               BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
247                               NULL, NULL, mlx4_devlink_max_macs_validate),
248         DEVLINK_PARAM_GENERIC(REGION_SNAPSHOT,
249                               BIT(DEVLINK_PARAM_CMODE_RUNTIME) |
250                               BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
251                               mlx4_devlink_crdump_snapshot_get,
252                               mlx4_devlink_crdump_snapshot_set, NULL),
253         DEVLINK_PARAM_DRIVER(MLX4_DEVLINK_PARAM_ID_ENABLE_64B_CQE_EQE,
254                              "enable_64b_cqe_eqe", DEVLINK_PARAM_TYPE_BOOL,
255                              BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
256                              NULL, NULL, NULL),
257         DEVLINK_PARAM_DRIVER(MLX4_DEVLINK_PARAM_ID_ENABLE_4K_UAR,
258                              "enable_4k_uar", DEVLINK_PARAM_TYPE_BOOL,
259                              BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
260                              NULL, NULL, NULL),
261 };
262
263 static void mlx4_devlink_set_params_init_values(struct devlink *devlink)
264 {
265         union devlink_param_value value;
266
267         value.vbool = !!mlx4_internal_err_reset;
268         devlink_param_driverinit_value_set(devlink,
269                                            DEVLINK_PARAM_GENERIC_ID_INT_ERR_RESET,
270                                            value);
271
272         value.vu32 = 1UL << log_num_mac;
273         devlink_param_driverinit_value_set(devlink,
274                                            DEVLINK_PARAM_GENERIC_ID_MAX_MACS,
275                                            value);
276
277         value.vbool = enable_64b_cqe_eqe;
278         devlink_param_driverinit_value_set(devlink,
279                                            MLX4_DEVLINK_PARAM_ID_ENABLE_64B_CQE_EQE,
280                                            value);
281
282         value.vbool = enable_4k_uar;
283         devlink_param_driverinit_value_set(devlink,
284                                            MLX4_DEVLINK_PARAM_ID_ENABLE_4K_UAR,
285                                            value);
286
287         value.vbool = false;
288         devlink_param_driverinit_value_set(devlink,
289                                            DEVLINK_PARAM_GENERIC_ID_REGION_SNAPSHOT,
290                                            value);
291 }
292
293 static inline void mlx4_set_num_reserved_uars(struct mlx4_dev *dev,
294                                               struct mlx4_dev_cap *dev_cap)
295 {
296         /* The reserved_uars is calculated by system page size unit.
297          * Therefore, adjustment is added when the uar page size is less
298          * than the system page size
299          */
300         dev->caps.reserved_uars =
301                 max_t(int,
302                       mlx4_get_num_reserved_uar(dev),
303                       dev_cap->reserved_uars /
304                         (1 << (PAGE_SHIFT - dev->uar_page_shift)));
305 }
306
307 int mlx4_check_port_params(struct mlx4_dev *dev,
308                            enum mlx4_port_type *port_type)
309 {
310         int i;
311
312         if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP)) {
313                 for (i = 0; i < dev->caps.num_ports - 1; i++) {
314                         if (port_type[i] != port_type[i + 1]) {
315                                 mlx4_err(dev, "Only same port types supported on this HCA, aborting\n");
316                                 return -EOPNOTSUPP;
317                         }
318                 }
319         }
320
321         for (i = 0; i < dev->caps.num_ports; i++) {
322                 if (!(port_type[i] & dev->caps.supported_type[i+1])) {
323                         mlx4_err(dev, "Requested port type for port %d is not supported on this HCA\n",
324                                  i + 1);
325                         return -EOPNOTSUPP;
326                 }
327         }
328         return 0;
329 }
330
331 static void mlx4_set_port_mask(struct mlx4_dev *dev)
332 {
333         int i;
334
335         for (i = 1; i <= dev->caps.num_ports; ++i)
336                 dev->caps.port_mask[i] = dev->caps.port_type[i];
337 }
338
339 enum {
340         MLX4_QUERY_FUNC_NUM_SYS_EQS = 1 << 0,
341 };
342
343 static int mlx4_query_func(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
344 {
345         int err = 0;
346         struct mlx4_func func;
347
348         if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS) {
349                 err = mlx4_QUERY_FUNC(dev, &func, 0);
350                 if (err) {
351                         mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
352                         return err;
353                 }
354                 dev_cap->max_eqs = func.max_eq;
355                 dev_cap->reserved_eqs = func.rsvd_eqs;
356                 dev_cap->reserved_uars = func.rsvd_uars;
357                 err |= MLX4_QUERY_FUNC_NUM_SYS_EQS;
358         }
359         return err;
360 }
361
362 static void mlx4_enable_cqe_eqe_stride(struct mlx4_dev *dev)
363 {
364         struct mlx4_caps *dev_cap = &dev->caps;
365
366         /* FW not supporting or cancelled by user */
367         if (!(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_EQE_STRIDE) ||
368             !(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_CQE_STRIDE))
369                 return;
370
371         /* Must have 64B CQE_EQE enabled by FW to use bigger stride
372          * When FW has NCSI it may decide not to report 64B CQE/EQEs
373          */
374         if (!(dev_cap->flags & MLX4_DEV_CAP_FLAG_64B_EQE) ||
375             !(dev_cap->flags & MLX4_DEV_CAP_FLAG_64B_CQE)) {
376                 dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_CQE_STRIDE;
377                 dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_EQE_STRIDE;
378                 return;
379         }
380
381         if (cache_line_size() == 128 || cache_line_size() == 256) {
382                 mlx4_dbg(dev, "Enabling CQE stride cacheLine supported\n");
383                 /* Changing the real data inside CQE size to 32B */
384                 dev_cap->flags &= ~MLX4_DEV_CAP_FLAG_64B_CQE;
385                 dev_cap->flags &= ~MLX4_DEV_CAP_FLAG_64B_EQE;
386
387                 if (mlx4_is_master(dev))
388                         dev_cap->function_caps |= MLX4_FUNC_CAP_EQE_CQE_STRIDE;
389         } else {
390                 if (cache_line_size() != 32  && cache_line_size() != 64)
391                         mlx4_dbg(dev, "Disabling CQE stride, cacheLine size unsupported\n");
392                 dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_CQE_STRIDE;
393                 dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_EQE_STRIDE;
394         }
395 }
396
397 static int _mlx4_dev_port(struct mlx4_dev *dev, int port,
398                           struct mlx4_port_cap *port_cap)
399 {
400         dev->caps.vl_cap[port]      = port_cap->max_vl;
401         dev->caps.ib_mtu_cap[port]          = port_cap->ib_mtu;
402         dev->phys_caps.gid_phys_table_len[port]  = port_cap->max_gids;
403         dev->phys_caps.pkey_phys_table_len[port] = port_cap->max_pkeys;
404         /* set gid and pkey table operating lengths by default
405          * to non-sriov values
406          */
407         dev->caps.gid_table_len[port]  = port_cap->max_gids;
408         dev->caps.pkey_table_len[port] = port_cap->max_pkeys;
409         dev->caps.port_width_cap[port] = port_cap->max_port_width;
410         dev->caps.eth_mtu_cap[port]    = port_cap->eth_mtu;
411         dev->caps.max_tc_eth           = port_cap->max_tc_eth;
412         dev->caps.def_mac[port]        = port_cap->def_mac;
413         dev->caps.supported_type[port] = port_cap->supported_port_types;
414         dev->caps.suggested_type[port] = port_cap->suggested_type;
415         dev->caps.default_sense[port] = port_cap->default_sense;
416         dev->caps.trans_type[port]          = port_cap->trans_type;
417         dev->caps.vendor_oui[port]     = port_cap->vendor_oui;
418         dev->caps.wavelength[port]     = port_cap->wavelength;
419         dev->caps.trans_code[port]     = port_cap->trans_code;
420
421         return 0;
422 }
423
424 static int mlx4_dev_port(struct mlx4_dev *dev, int port,
425                          struct mlx4_port_cap *port_cap)
426 {
427         int err = 0;
428
429         err = mlx4_QUERY_PORT(dev, port, port_cap);
430
431         if (err)
432                 mlx4_err(dev, "QUERY_PORT command failed.\n");
433
434         return err;
435 }
436
437 static inline void mlx4_enable_ignore_fcs(struct mlx4_dev *dev)
438 {
439         if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_IGNORE_FCS))
440                 return;
441
442         if (mlx4_is_mfunc(dev)) {
443                 mlx4_dbg(dev, "SRIOV mode - Disabling Ignore FCS");
444                 dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_IGNORE_FCS;
445                 return;
446         }
447
448         if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_FCS_KEEP)) {
449                 mlx4_dbg(dev,
450                          "Keep FCS is not supported - Disabling Ignore FCS");
451                 dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_IGNORE_FCS;
452                 return;
453         }
454 }
455
456 #define MLX4_A0_STEERING_TABLE_SIZE     256
457 static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
458 {
459         int err;
460         int i;
461
462         err = mlx4_QUERY_DEV_CAP(dev, dev_cap);
463         if (err) {
464                 mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting\n");
465                 return err;
466         }
467         mlx4_dev_cap_dump(dev, dev_cap);
468
469         if (dev_cap->min_page_sz > PAGE_SIZE) {
470                 mlx4_err(dev, "HCA minimum page size of %d bigger than kernel PAGE_SIZE of %ld, aborting\n",
471                          dev_cap->min_page_sz, PAGE_SIZE);
472                 return -ENODEV;
473         }
474         if (dev_cap->num_ports > MLX4_MAX_PORTS) {
475                 mlx4_err(dev, "HCA has %d ports, but we only support %d, aborting\n",
476                          dev_cap->num_ports, MLX4_MAX_PORTS);
477                 return -ENODEV;
478         }
479
480         if (dev_cap->uar_size > pci_resource_len(dev->persist->pdev, 2)) {
481                 mlx4_err(dev, "HCA reported UAR size of 0x%x bigger than PCI resource 2 size of 0x%llx, aborting\n",
482                          dev_cap->uar_size,
483                          (unsigned long long)
484                          pci_resource_len(dev->persist->pdev, 2));
485                 return -ENODEV;
486         }
487
488         dev->caps.num_ports          = dev_cap->num_ports;
489         dev->caps.num_sys_eqs = dev_cap->num_sys_eqs;
490         dev->phys_caps.num_phys_eqs = dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS ?
491                                       dev->caps.num_sys_eqs :
492                                       MLX4_MAX_EQ_NUM;
493         for (i = 1; i <= dev->caps.num_ports; ++i) {
494                 err = _mlx4_dev_port(dev, i, dev_cap->port_cap + i);
495                 if (err) {
496                         mlx4_err(dev, "QUERY_PORT command failed, aborting\n");
497                         return err;
498                 }
499         }
500
501         dev->caps.map_clock_to_user  = dev_cap->map_clock_to_user;
502         dev->caps.uar_page_size      = PAGE_SIZE;
503         dev->caps.num_uars           = dev_cap->uar_size / PAGE_SIZE;
504         dev->caps.local_ca_ack_delay = dev_cap->local_ca_ack_delay;
505         dev->caps.bf_reg_size        = dev_cap->bf_reg_size;
506         dev->caps.bf_regs_per_page   = dev_cap->bf_regs_per_page;
507         dev->caps.max_sq_sg          = dev_cap->max_sq_sg;
508         dev->caps.max_rq_sg          = dev_cap->max_rq_sg;
509         dev->caps.max_wqes           = dev_cap->max_qp_sz;
510         dev->caps.max_qp_init_rdma   = dev_cap->max_requester_per_qp;
511         dev->caps.max_srq_wqes       = dev_cap->max_srq_sz;
512         dev->caps.max_srq_sge        = dev_cap->max_rq_sg - 1;
513         dev->caps.reserved_srqs      = dev_cap->reserved_srqs;
514         dev->caps.max_sq_desc_sz     = dev_cap->max_sq_desc_sz;
515         dev->caps.max_rq_desc_sz     = dev_cap->max_rq_desc_sz;
516         /*
517          * Subtract 1 from the limit because we need to allocate a
518          * spare CQE so the HCA HW can tell the difference between an
519          * empty CQ and a full CQ.
520          */
521         dev->caps.max_cqes           = dev_cap->max_cq_sz - 1;
522         dev->caps.reserved_cqs       = dev_cap->reserved_cqs;
523         dev->caps.reserved_eqs       = dev_cap->reserved_eqs;
524         dev->caps.reserved_mtts      = dev_cap->reserved_mtts;
525         dev->caps.reserved_mrws      = dev_cap->reserved_mrws;
526
527         dev->caps.reserved_pds       = dev_cap->reserved_pds;
528         dev->caps.reserved_xrcds     = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ?
529                                         dev_cap->reserved_xrcds : 0;
530         dev->caps.max_xrcds          = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ?
531                                         dev_cap->max_xrcds : 0;
532         dev->caps.mtt_entry_sz       = dev_cap->mtt_entry_sz;
533
534         dev->caps.max_msg_sz         = dev_cap->max_msg_sz;
535         dev->caps.page_size_cap      = ~(u32) (dev_cap->min_page_sz - 1);
536         dev->caps.flags              = dev_cap->flags;
537         dev->caps.flags2             = dev_cap->flags2;
538         dev->caps.bmme_flags         = dev_cap->bmme_flags;
539         dev->caps.reserved_lkey      = dev_cap->reserved_lkey;
540         dev->caps.stat_rate_support  = dev_cap->stat_rate_support;
541         dev->caps.max_gso_sz         = dev_cap->max_gso_sz;
542         dev->caps.max_rss_tbl_sz     = dev_cap->max_rss_tbl_sz;
543         dev->caps.wol_port[1]          = dev_cap->wol_port[1];
544         dev->caps.wol_port[2]          = dev_cap->wol_port[2];
545         dev->caps.health_buffer_addrs  = dev_cap->health_buffer_addrs;
546
547         /* Save uar page shift */
548         if (!mlx4_is_slave(dev)) {
549                 /* Virtual PCI function needs to determine UAR page size from
550                  * firmware. Only master PCI function can set the uar page size
551                  */
552                 if (enable_4k_uar || !dev->persist->num_vfs)
553                         dev->uar_page_shift = DEFAULT_UAR_PAGE_SHIFT;
554                 else
555                         dev->uar_page_shift = PAGE_SHIFT;
556
557                 mlx4_set_num_reserved_uars(dev, dev_cap);
558         }
559
560         if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PHV_EN) {
561                 struct mlx4_init_hca_param hca_param;
562
563                 memset(&hca_param, 0, sizeof(hca_param));
564                 err = mlx4_QUERY_HCA(dev, &hca_param);
565                 /* Turn off PHV_EN flag in case phv_check_en is set.
566                  * phv_check_en is a HW check that parse the packet and verify
567                  * phv bit was reported correctly in the wqe. To allow QinQ
568                  * PHV_EN flag should be set and phv_check_en must be cleared
569                  * otherwise QinQ packets will be drop by the HW.
570                  */
571                 if (err || hca_param.phv_check_en)
572                         dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_PHV_EN;
573         }
574
575         /* Sense port always allowed on supported devices for ConnectX-1 and -2 */
576         if (mlx4_priv(dev)->pci_dev_data & MLX4_PCI_DEV_FORCE_SENSE_PORT)
577                 dev->caps.flags |= MLX4_DEV_CAP_FLAG_SENSE_SUPPORT;
578         /* Don't do sense port on multifunction devices (for now at least) */
579         if (mlx4_is_mfunc(dev))
580                 dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_SENSE_SUPPORT;
581
582         if (mlx4_low_memory_profile()) {
583                 dev->caps.log_num_macs  = MLX4_MIN_LOG_NUM_MAC;
584                 dev->caps.log_num_vlans = MLX4_MIN_LOG_NUM_VLANS;
585         } else {
586                 dev->caps.log_num_macs  = log_num_mac;
587                 dev->caps.log_num_vlans = MLX4_LOG_NUM_VLANS;
588         }
589
590         for (i = 1; i <= dev->caps.num_ports; ++i) {
591                 dev->caps.port_type[i] = MLX4_PORT_TYPE_NONE;
592                 if (dev->caps.supported_type[i]) {
593                         /* if only ETH is supported - assign ETH */
594                         if (dev->caps.supported_type[i] == MLX4_PORT_TYPE_ETH)
595                                 dev->caps.port_type[i] = MLX4_PORT_TYPE_ETH;
596                         /* if only IB is supported, assign IB */
597                         else if (dev->caps.supported_type[i] ==
598                                  MLX4_PORT_TYPE_IB)
599                                 dev->caps.port_type[i] = MLX4_PORT_TYPE_IB;
600                         else {
601                                 /* if IB and ETH are supported, we set the port
602                                  * type according to user selection of port type;
603                                  * if user selected none, take the FW hint */
604                                 if (port_type_array[i - 1] == MLX4_PORT_TYPE_NONE)
605                                         dev->caps.port_type[i] = dev->caps.suggested_type[i] ?
606                                                 MLX4_PORT_TYPE_ETH : MLX4_PORT_TYPE_IB;
607                                 else
608                                         dev->caps.port_type[i] = port_type_array[i - 1];
609                         }
610                 }
611                 /*
612                  * Link sensing is allowed on the port if 3 conditions are true:
613                  * 1. Both protocols are supported on the port.
614                  * 2. Different types are supported on the port
615                  * 3. FW declared that it supports link sensing
616                  */
617                 mlx4_priv(dev)->sense.sense_allowed[i] =
618                         ((dev->caps.supported_type[i] == MLX4_PORT_TYPE_AUTO) &&
619                          (dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP) &&
620                          (dev->caps.flags & MLX4_DEV_CAP_FLAG_SENSE_SUPPORT));
621
622                 /*
623                  * If "default_sense" bit is set, we move the port to "AUTO" mode
624                  * and perform sense_port FW command to try and set the correct
625                  * port type from beginning
626                  */
627                 if (mlx4_priv(dev)->sense.sense_allowed[i] && dev->caps.default_sense[i]) {
628                         enum mlx4_port_type sensed_port = MLX4_PORT_TYPE_NONE;
629                         dev->caps.possible_type[i] = MLX4_PORT_TYPE_AUTO;
630                         mlx4_SENSE_PORT(dev, i, &sensed_port);
631                         if (sensed_port != MLX4_PORT_TYPE_NONE)
632                                 dev->caps.port_type[i] = sensed_port;
633                 } else {
634                         dev->caps.possible_type[i] = dev->caps.port_type[i];
635                 }
636
637                 if (dev->caps.log_num_macs > dev_cap->port_cap[i].log_max_macs) {
638                         dev->caps.log_num_macs = dev_cap->port_cap[i].log_max_macs;
639                         mlx4_warn(dev, "Requested number of MACs is too much for port %d, reducing to %d\n",
640                                   i, 1 << dev->caps.log_num_macs);
641                 }
642                 if (dev->caps.log_num_vlans > dev_cap->port_cap[i].log_max_vlans) {
643                         dev->caps.log_num_vlans = dev_cap->port_cap[i].log_max_vlans;
644                         mlx4_warn(dev, "Requested number of VLANs is too much for port %d, reducing to %d\n",
645                                   i, 1 << dev->caps.log_num_vlans);
646                 }
647         }
648
649         if (mlx4_is_master(dev) && (dev->caps.num_ports == 2) &&
650             (port_type_array[0] == MLX4_PORT_TYPE_IB) &&
651             (port_type_array[1] == MLX4_PORT_TYPE_ETH)) {
652                 mlx4_warn(dev,
653                           "Granular QoS per VF not supported with IB/Eth configuration\n");
654                 dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_QOS_VPP;
655         }
656
657         dev->caps.max_counters = dev_cap->max_counters;
658
659         dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] = dev_cap->reserved_qps;
660         dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] =
661                 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] =
662                 (1 << dev->caps.log_num_macs) *
663                 (1 << dev->caps.log_num_vlans) *
664                 dev->caps.num_ports;
665         dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH] = MLX4_NUM_FEXCH;
666
667         if (dev_cap->dmfs_high_rate_qpn_base > 0 &&
668             dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FS_EN)
669                 dev->caps.dmfs_high_rate_qpn_base = dev_cap->dmfs_high_rate_qpn_base;
670         else
671                 dev->caps.dmfs_high_rate_qpn_base =
672                         dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW];
673
674         if (dev_cap->dmfs_high_rate_qpn_range > 0 &&
675             dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FS_EN) {
676                 dev->caps.dmfs_high_rate_qpn_range = dev_cap->dmfs_high_rate_qpn_range;
677                 dev->caps.dmfs_high_steer_mode = MLX4_STEERING_DMFS_A0_DEFAULT;
678                 dev->caps.flags2 |= MLX4_DEV_CAP_FLAG2_FS_A0;
679         } else {
680                 dev->caps.dmfs_high_steer_mode = MLX4_STEERING_DMFS_A0_NOT_SUPPORTED;
681                 dev->caps.dmfs_high_rate_qpn_base =
682                         dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW];
683                 dev->caps.dmfs_high_rate_qpn_range = MLX4_A0_STEERING_TABLE_SIZE;
684         }
685
686         dev->caps.rl_caps = dev_cap->rl_caps;
687
688         dev->caps.reserved_qps_cnt[MLX4_QP_REGION_RSS_RAW_ETH] =
689                 dev->caps.dmfs_high_rate_qpn_range;
690
691         dev->caps.reserved_qps = dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] +
692                 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] +
693                 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] +
694                 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH];
695
696         dev->caps.sqp_demux = (mlx4_is_master(dev)) ? MLX4_MAX_NUM_SLAVES : 0;
697
698         if (!enable_64b_cqe_eqe && !mlx4_is_slave(dev)) {
699                 if (dev_cap->flags &
700                     (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) {
701                         mlx4_warn(dev, "64B EQEs/CQEs supported by the device but not enabled\n");
702                         dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_CQE;
703                         dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_EQE;
704                 }
705
706                 if (dev_cap->flags2 &
707                     (MLX4_DEV_CAP_FLAG2_CQE_STRIDE |
708                      MLX4_DEV_CAP_FLAG2_EQE_STRIDE)) {
709                         mlx4_warn(dev, "Disabling EQE/CQE stride per user request\n");
710                         dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_CQE_STRIDE;
711                         dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_EQE_STRIDE;
712                 }
713         }
714
715         if ((dev->caps.flags &
716             (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) &&
717             mlx4_is_master(dev))
718                 dev->caps.function_caps |= MLX4_FUNC_CAP_64B_EQE_CQE;
719
720         if (!mlx4_is_slave(dev)) {
721                 mlx4_enable_cqe_eqe_stride(dev);
722                 dev->caps.alloc_res_qp_mask =
723                         (dev->caps.bf_reg_size ? MLX4_RESERVE_ETH_BF_QP : 0) |
724                         MLX4_RESERVE_A0_QP;
725
726                 if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ETS_CFG) &&
727                     dev->caps.flags & MLX4_DEV_CAP_FLAG_SET_ETH_SCHED) {
728                         mlx4_warn(dev, "Old device ETS support detected\n");
729                         mlx4_warn(dev, "Consider upgrading device FW.\n");
730                         dev->caps.flags2 |= MLX4_DEV_CAP_FLAG2_ETS_CFG;
731                 }
732
733         } else {
734                 dev->caps.alloc_res_qp_mask = 0;
735         }
736
737         mlx4_enable_ignore_fcs(dev);
738
739         return 0;
740 }
741
742 /*The function checks if there are live vf, return the num of them*/
743 static int mlx4_how_many_lives_vf(struct mlx4_dev *dev)
744 {
745         struct mlx4_priv *priv = mlx4_priv(dev);
746         struct mlx4_slave_state *s_state;
747         int i;
748         int ret = 0;
749
750         for (i = 1/*the ppf is 0*/; i < dev->num_slaves; ++i) {
751                 s_state = &priv->mfunc.master.slave_state[i];
752                 if (s_state->active && s_state->last_cmd !=
753                     MLX4_COMM_CMD_RESET) {
754                         mlx4_warn(dev, "%s: slave: %d is still active\n",
755                                   __func__, i);
756                         ret++;
757                 }
758         }
759         return ret;
760 }
761
762 int mlx4_get_parav_qkey(struct mlx4_dev *dev, u32 qpn, u32 *qkey)
763 {
764         u32 qk = MLX4_RESERVED_QKEY_BASE;
765
766         if (qpn >= dev->phys_caps.base_tunnel_sqpn + 8 * MLX4_MFUNC_MAX ||
767             qpn < dev->phys_caps.base_proxy_sqpn)
768                 return -EINVAL;
769
770         if (qpn >= dev->phys_caps.base_tunnel_sqpn)
771                 /* tunnel qp */
772                 qk += qpn - dev->phys_caps.base_tunnel_sqpn;
773         else
774                 qk += qpn - dev->phys_caps.base_proxy_sqpn;
775         *qkey = qk;
776         return 0;
777 }
778 EXPORT_SYMBOL(mlx4_get_parav_qkey);
779
780 void mlx4_sync_pkey_table(struct mlx4_dev *dev, int slave, int port, int i, int val)
781 {
782         struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev);
783
784         if (!mlx4_is_master(dev))
785                 return;
786
787         priv->virt2phys_pkey[slave][port - 1][i] = val;
788 }
789 EXPORT_SYMBOL(mlx4_sync_pkey_table);
790
791 void mlx4_put_slave_node_guid(struct mlx4_dev *dev, int slave, __be64 guid)
792 {
793         struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev);
794
795         if (!mlx4_is_master(dev))
796                 return;
797
798         priv->slave_node_guids[slave] = guid;
799 }
800 EXPORT_SYMBOL(mlx4_put_slave_node_guid);
801
802 __be64 mlx4_get_slave_node_guid(struct mlx4_dev *dev, int slave)
803 {
804         struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev);
805
806         if (!mlx4_is_master(dev))
807                 return 0;
808
809         return priv->slave_node_guids[slave];
810 }
811 EXPORT_SYMBOL(mlx4_get_slave_node_guid);
812
813 int mlx4_is_slave_active(struct mlx4_dev *dev, int slave)
814 {
815         struct mlx4_priv *priv = mlx4_priv(dev);
816         struct mlx4_slave_state *s_slave;
817
818         if (!mlx4_is_master(dev))
819                 return 0;
820
821         s_slave = &priv->mfunc.master.slave_state[slave];
822         return !!s_slave->active;
823 }
824 EXPORT_SYMBOL(mlx4_is_slave_active);
825
826 void mlx4_handle_eth_header_mcast_prio(struct mlx4_net_trans_rule_hw_ctrl *ctrl,
827                                        struct _rule_hw *eth_header)
828 {
829         if (is_multicast_ether_addr(eth_header->eth.dst_mac) ||
830             is_broadcast_ether_addr(eth_header->eth.dst_mac)) {
831                 struct mlx4_net_trans_rule_hw_eth *eth =
832                         (struct mlx4_net_trans_rule_hw_eth *)eth_header;
833                 struct _rule_hw *next_rule = (struct _rule_hw *)(eth + 1);
834                 bool last_rule = next_rule->size == 0 && next_rule->id == 0 &&
835                         next_rule->rsvd == 0;
836
837                 if (last_rule)
838                         ctrl->prio = cpu_to_be16(MLX4_DOMAIN_NIC);
839         }
840 }
841 EXPORT_SYMBOL(mlx4_handle_eth_header_mcast_prio);
842
843 static void slave_adjust_steering_mode(struct mlx4_dev *dev,
844                                        struct mlx4_dev_cap *dev_cap,
845                                        struct mlx4_init_hca_param *hca_param)
846 {
847         dev->caps.steering_mode = hca_param->steering_mode;
848         if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) {
849                 dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry;
850                 dev->caps.fs_log_max_ucast_qp_range_size =
851                         dev_cap->fs_log_max_ucast_qp_range_size;
852         } else
853                 dev->caps.num_qp_per_mgm =
854                         4 * ((1 << hca_param->log_mc_entry_sz)/16 - 2);
855
856         mlx4_dbg(dev, "Steering mode is: %s\n",
857                  mlx4_steering_mode_str(dev->caps.steering_mode));
858 }
859
860 static void mlx4_slave_destroy_special_qp_cap(struct mlx4_dev *dev)
861 {
862         kfree(dev->caps.spec_qps);
863         dev->caps.spec_qps = NULL;
864 }
865
866 static int mlx4_slave_special_qp_cap(struct mlx4_dev *dev)
867 {
868         struct mlx4_func_cap *func_cap = NULL;
869         struct mlx4_caps *caps = &dev->caps;
870         int i, err = 0;
871
872         func_cap = kzalloc(sizeof(*func_cap), GFP_KERNEL);
873         caps->spec_qps = kcalloc(caps->num_ports, sizeof(*caps->spec_qps), GFP_KERNEL);
874
875         if (!func_cap || !caps->spec_qps) {
876                 mlx4_err(dev, "Failed to allocate memory for special qps cap\n");
877                 err = -ENOMEM;
878                 goto err_mem;
879         }
880
881         for (i = 1; i <= caps->num_ports; ++i) {
882                 err = mlx4_QUERY_FUNC_CAP(dev, i, func_cap);
883                 if (err) {
884                         mlx4_err(dev, "QUERY_FUNC_CAP port command failed for port %d, aborting (%d)\n",
885                                  i, err);
886                         goto err_mem;
887                 }
888                 caps->spec_qps[i - 1] = func_cap->spec_qps;
889                 caps->port_mask[i] = caps->port_type[i];
890                 caps->phys_port_id[i] = func_cap->phys_port_id;
891                 err = mlx4_get_slave_pkey_gid_tbl_len(dev, i,
892                                                       &caps->gid_table_len[i],
893                                                       &caps->pkey_table_len[i]);
894                 if (err) {
895                         mlx4_err(dev, "QUERY_PORT command failed for port %d, aborting (%d)\n",
896                                  i, err);
897                         goto err_mem;
898                 }
899         }
900
901 err_mem:
902         if (err)
903                 mlx4_slave_destroy_special_qp_cap(dev);
904         kfree(func_cap);
905         return err;
906 }
907
908 static int mlx4_slave_cap(struct mlx4_dev *dev)
909 {
910         int                        err;
911         u32                        page_size;
912         struct mlx4_dev_cap        *dev_cap = NULL;
913         struct mlx4_func_cap       *func_cap = NULL;
914         struct mlx4_init_hca_param *hca_param = NULL;
915
916         hca_param = kzalloc(sizeof(*hca_param), GFP_KERNEL);
917         func_cap = kzalloc(sizeof(*func_cap), GFP_KERNEL);
918         dev_cap = kzalloc(sizeof(*dev_cap), GFP_KERNEL);
919         if (!hca_param || !func_cap || !dev_cap) {
920                 mlx4_err(dev, "Failed to allocate memory for slave_cap\n");
921                 err = -ENOMEM;
922                 goto free_mem;
923         }
924
925         err = mlx4_QUERY_HCA(dev, hca_param);
926         if (err) {
927                 mlx4_err(dev, "QUERY_HCA command failed, aborting\n");
928                 goto free_mem;
929         }
930
931         /* fail if the hca has an unknown global capability
932          * at this time global_caps should be always zeroed
933          */
934         if (hca_param->global_caps) {
935                 mlx4_err(dev, "Unknown hca global capabilities\n");
936                 err = -EINVAL;
937                 goto free_mem;
938         }
939
940         dev->caps.hca_core_clock = hca_param->hca_core_clock;
941
942         dev->caps.max_qp_dest_rdma = 1 << hca_param->log_rd_per_qp;
943         err = mlx4_dev_cap(dev, dev_cap);
944         if (err) {
945                 mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting\n");
946                 goto free_mem;
947         }
948
949         err = mlx4_QUERY_FW(dev);
950         if (err)
951                 mlx4_err(dev, "QUERY_FW command failed: could not get FW version\n");
952
953         page_size = ~dev->caps.page_size_cap + 1;
954         mlx4_warn(dev, "HCA minimum page size:%d\n", page_size);
955         if (page_size > PAGE_SIZE) {
956                 mlx4_err(dev, "HCA minimum page size of %d bigger than kernel PAGE_SIZE of %ld, aborting\n",
957                          page_size, PAGE_SIZE);
958                 err = -ENODEV;
959                 goto free_mem;
960         }
961
962         /* Set uar_page_shift for VF */
963         dev->uar_page_shift = hca_param->uar_page_sz + 12;
964
965         /* Make sure the master uar page size is valid */
966         if (dev->uar_page_shift > PAGE_SHIFT) {
967                 mlx4_err(dev,
968                          "Invalid configuration: uar page size is larger than system page size\n");
969                 err = -ENODEV;
970                 goto free_mem;
971         }
972
973         /* Set reserved_uars based on the uar_page_shift */
974         mlx4_set_num_reserved_uars(dev, dev_cap);
975
976         /* Although uar page size in FW differs from system page size,
977          * upper software layers (mlx4_ib, mlx4_en and part of mlx4_core)
978          * still works with assumption that uar page size == system page size
979          */
980         dev->caps.uar_page_size = PAGE_SIZE;
981
982         err = mlx4_QUERY_FUNC_CAP(dev, 0, func_cap);
983         if (err) {
984                 mlx4_err(dev, "QUERY_FUNC_CAP general command failed, aborting (%d)\n",
985                          err);
986                 goto free_mem;
987         }
988
989         if ((func_cap->pf_context_behaviour | PF_CONTEXT_BEHAVIOUR_MASK) !=
990             PF_CONTEXT_BEHAVIOUR_MASK) {
991                 mlx4_err(dev, "Unknown pf context behaviour %x known flags %x\n",
992                          func_cap->pf_context_behaviour,
993                          PF_CONTEXT_BEHAVIOUR_MASK);
994                 err = -EINVAL;
995                 goto free_mem;
996         }
997
998         dev->caps.num_ports             = func_cap->num_ports;
999         dev->quotas.qp                  = func_cap->qp_quota;
1000         dev->quotas.srq                 = func_cap->srq_quota;
1001         dev->quotas.cq                  = func_cap->cq_quota;
1002         dev->quotas.mpt                 = func_cap->mpt_quota;
1003         dev->quotas.mtt                 = func_cap->mtt_quota;
1004         dev->caps.num_qps               = 1 << hca_param->log_num_qps;
1005         dev->caps.num_srqs              = 1 << hca_param->log_num_srqs;
1006         dev->caps.num_cqs               = 1 << hca_param->log_num_cqs;
1007         dev->caps.num_mpts              = 1 << hca_param->log_mpt_sz;
1008         dev->caps.num_eqs               = func_cap->max_eq;
1009         dev->caps.reserved_eqs          = func_cap->reserved_eq;
1010         dev->caps.reserved_lkey         = func_cap->reserved_lkey;
1011         dev->caps.num_pds               = MLX4_NUM_PDS;
1012         dev->caps.num_mgms              = 0;
1013         dev->caps.num_amgms             = 0;
1014
1015         if (dev->caps.num_ports > MLX4_MAX_PORTS) {
1016                 mlx4_err(dev, "HCA has %d ports, but we only support %d, aborting\n",
1017                          dev->caps.num_ports, MLX4_MAX_PORTS);
1018                 err = -ENODEV;
1019                 goto free_mem;
1020         }
1021
1022         mlx4_replace_zero_macs(dev);
1023
1024         err = mlx4_slave_special_qp_cap(dev);
1025         if (err) {
1026                 mlx4_err(dev, "Set special QP caps failed. aborting\n");
1027                 goto free_mem;
1028         }
1029
1030         if (dev->caps.uar_page_size * (dev->caps.num_uars -
1031                                        dev->caps.reserved_uars) >
1032                                        pci_resource_len(dev->persist->pdev,
1033                                                         2)) {
1034                 mlx4_err(dev, "HCA reported UAR region size of 0x%x bigger than PCI resource 2 size of 0x%llx, aborting\n",
1035                          dev->caps.uar_page_size * dev->caps.num_uars,
1036                          (unsigned long long)
1037                          pci_resource_len(dev->persist->pdev, 2));
1038                 err = -ENOMEM;
1039                 goto err_mem;
1040         }
1041
1042         if (hca_param->dev_cap_enabled & MLX4_DEV_CAP_64B_EQE_ENABLED) {
1043                 dev->caps.eqe_size   = 64;
1044                 dev->caps.eqe_factor = 1;
1045         } else {
1046                 dev->caps.eqe_size   = 32;
1047                 dev->caps.eqe_factor = 0;
1048         }
1049
1050         if (hca_param->dev_cap_enabled & MLX4_DEV_CAP_64B_CQE_ENABLED) {
1051                 dev->caps.cqe_size   = 64;
1052                 dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_LARGE_CQE;
1053         } else {
1054                 dev->caps.cqe_size   = 32;
1055         }
1056
1057         if (hca_param->dev_cap_enabled & MLX4_DEV_CAP_EQE_STRIDE_ENABLED) {
1058                 dev->caps.eqe_size = hca_param->eqe_size;
1059                 dev->caps.eqe_factor = 0;
1060         }
1061
1062         if (hca_param->dev_cap_enabled & MLX4_DEV_CAP_CQE_STRIDE_ENABLED) {
1063                 dev->caps.cqe_size = hca_param->cqe_size;
1064                 /* User still need to know when CQE > 32B */
1065                 dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_LARGE_CQE;
1066         }
1067
1068         dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS;
1069         mlx4_warn(dev, "Timestamping is not supported in slave mode\n");
1070
1071         dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_USER_MAC_EN;
1072         mlx4_dbg(dev, "User MAC FW update is not supported in slave mode\n");
1073
1074         slave_adjust_steering_mode(dev, dev_cap, hca_param);
1075         mlx4_dbg(dev, "RSS support for IP fragments is %s\n",
1076                  hca_param->rss_ip_frags ? "on" : "off");
1077
1078         if (func_cap->extra_flags & MLX4_QUERY_FUNC_FLAGS_BF_RES_QP &&
1079             dev->caps.bf_reg_size)
1080                 dev->caps.alloc_res_qp_mask |= MLX4_RESERVE_ETH_BF_QP;
1081
1082         if (func_cap->extra_flags & MLX4_QUERY_FUNC_FLAGS_A0_RES_QP)
1083                 dev->caps.alloc_res_qp_mask |= MLX4_RESERVE_A0_QP;
1084
1085 err_mem:
1086         if (err)
1087                 mlx4_slave_destroy_special_qp_cap(dev);
1088 free_mem:
1089         kfree(hca_param);
1090         kfree(func_cap);
1091         kfree(dev_cap);
1092         return err;
1093 }
1094
1095 static void mlx4_request_modules(struct mlx4_dev *dev)
1096 {
1097         int port;
1098         int has_ib_port = false;
1099         int has_eth_port = false;
1100 #define EN_DRV_NAME     "mlx4_en"
1101 #define IB_DRV_NAME     "mlx4_ib"
1102
1103         for (port = 1; port <= dev->caps.num_ports; port++) {
1104                 if (dev->caps.port_type[port] == MLX4_PORT_TYPE_IB)
1105                         has_ib_port = true;
1106                 else if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH)
1107                         has_eth_port = true;
1108         }
1109
1110         if (has_eth_port)
1111                 request_module_nowait(EN_DRV_NAME);
1112         if (has_ib_port || (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE))
1113                 request_module_nowait(IB_DRV_NAME);
1114 }
1115
1116 /*
1117  * Change the port configuration of the device.
1118  * Every user of this function must hold the port mutex.
1119  */
1120 int mlx4_change_port_types(struct mlx4_dev *dev,
1121                            enum mlx4_port_type *port_types)
1122 {
1123         int err = 0;
1124         int change = 0;
1125         int port;
1126
1127         for (port = 0; port <  dev->caps.num_ports; port++) {
1128                 /* Change the port type only if the new type is different
1129                  * from the current, and not set to Auto */
1130                 if (port_types[port] != dev->caps.port_type[port + 1])
1131                         change = 1;
1132         }
1133         if (change) {
1134                 mlx4_unregister_device(dev);
1135                 for (port = 1; port <= dev->caps.num_ports; port++) {
1136                         mlx4_CLOSE_PORT(dev, port);
1137                         dev->caps.port_type[port] = port_types[port - 1];
1138                         err = mlx4_SET_PORT(dev, port, -1);
1139                         if (err) {
1140                                 mlx4_err(dev, "Failed to set port %d, aborting\n",
1141                                          port);
1142                                 goto out;
1143                         }
1144                 }
1145                 mlx4_set_port_mask(dev);
1146                 err = mlx4_register_device(dev);
1147                 if (err) {
1148                         mlx4_err(dev, "Failed to register device\n");
1149                         goto out;
1150                 }
1151                 mlx4_request_modules(dev);
1152         }
1153
1154 out:
1155         return err;
1156 }
1157
1158 static ssize_t show_port_type(struct device *dev,
1159                               struct device_attribute *attr,
1160                               char *buf)
1161 {
1162         struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
1163                                                    port_attr);
1164         struct mlx4_dev *mdev = info->dev;
1165         char type[8];
1166
1167         sprintf(type, "%s",
1168                 (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_IB) ?
1169                 "ib" : "eth");
1170         if (mdev->caps.possible_type[info->port] == MLX4_PORT_TYPE_AUTO)
1171                 sprintf(buf, "auto (%s)\n", type);
1172         else
1173                 sprintf(buf, "%s\n", type);
1174
1175         return strlen(buf);
1176 }
1177
1178 static int __set_port_type(struct mlx4_port_info *info,
1179                            enum mlx4_port_type port_type)
1180 {
1181         struct mlx4_dev *mdev = info->dev;
1182         struct mlx4_priv *priv = mlx4_priv(mdev);
1183         enum mlx4_port_type types[MLX4_MAX_PORTS];
1184         enum mlx4_port_type new_types[MLX4_MAX_PORTS];
1185         int i;
1186         int err = 0;
1187
1188         if ((port_type & mdev->caps.supported_type[info->port]) != port_type) {
1189                 mlx4_err(mdev,
1190                          "Requested port type for port %d is not supported on this HCA\n",
1191                          info->port);
1192                 return -EOPNOTSUPP;
1193         }
1194
1195         mlx4_stop_sense(mdev);
1196         mutex_lock(&priv->port_mutex);
1197         info->tmp_type = port_type;
1198
1199         /* Possible type is always the one that was delivered */
1200         mdev->caps.possible_type[info->port] = info->tmp_type;
1201
1202         for (i = 0; i < mdev->caps.num_ports; i++) {
1203                 types[i] = priv->port[i+1].tmp_type ? priv->port[i+1].tmp_type :
1204                                         mdev->caps.possible_type[i+1];
1205                 if (types[i] == MLX4_PORT_TYPE_AUTO)
1206                         types[i] = mdev->caps.port_type[i+1];
1207         }
1208
1209         if (!(mdev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP) &&
1210             !(mdev->caps.flags & MLX4_DEV_CAP_FLAG_SENSE_SUPPORT)) {
1211                 for (i = 1; i <= mdev->caps.num_ports; i++) {
1212                         if (mdev->caps.possible_type[i] == MLX4_PORT_TYPE_AUTO) {
1213                                 mdev->caps.possible_type[i] = mdev->caps.port_type[i];
1214                                 err = -EOPNOTSUPP;
1215                         }
1216                 }
1217         }
1218         if (err) {
1219                 mlx4_err(mdev, "Auto sensing is not supported on this HCA. Set only 'eth' or 'ib' for both ports (should be the same)\n");
1220                 goto out;
1221         }
1222
1223         mlx4_do_sense_ports(mdev, new_types, types);
1224
1225         err = mlx4_check_port_params(mdev, new_types);
1226         if (err)
1227                 goto out;
1228
1229         /* We are about to apply the changes after the configuration
1230          * was verified, no need to remember the temporary types
1231          * any more */
1232         for (i = 0; i < mdev->caps.num_ports; i++)
1233                 priv->port[i + 1].tmp_type = 0;
1234
1235         err = mlx4_change_port_types(mdev, new_types);
1236
1237 out:
1238         mlx4_start_sense(mdev);
1239         mutex_unlock(&priv->port_mutex);
1240
1241         return err;
1242 }
1243
1244 static ssize_t set_port_type(struct device *dev,
1245                              struct device_attribute *attr,
1246                              const char *buf, size_t count)
1247 {
1248         struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
1249                                                    port_attr);
1250         struct mlx4_dev *mdev = info->dev;
1251         enum mlx4_port_type port_type;
1252         static DEFINE_MUTEX(set_port_type_mutex);
1253         int err;
1254
1255         mutex_lock(&set_port_type_mutex);
1256
1257         if (!strcmp(buf, "ib\n")) {
1258                 port_type = MLX4_PORT_TYPE_IB;
1259         } else if (!strcmp(buf, "eth\n")) {
1260                 port_type = MLX4_PORT_TYPE_ETH;
1261         } else if (!strcmp(buf, "auto\n")) {
1262                 port_type = MLX4_PORT_TYPE_AUTO;
1263         } else {
1264                 mlx4_err(mdev, "%s is not supported port type\n", buf);
1265                 err = -EINVAL;
1266                 goto err_out;
1267         }
1268
1269         err = __set_port_type(info, port_type);
1270
1271 err_out:
1272         mutex_unlock(&set_port_type_mutex);
1273
1274         return err ? err : count;
1275 }
1276
1277 enum ibta_mtu {
1278         IB_MTU_256  = 1,
1279         IB_MTU_512  = 2,
1280         IB_MTU_1024 = 3,
1281         IB_MTU_2048 = 4,
1282         IB_MTU_4096 = 5
1283 };
1284
1285 static inline int int_to_ibta_mtu(int mtu)
1286 {
1287         switch (mtu) {
1288         case 256:  return IB_MTU_256;
1289         case 512:  return IB_MTU_512;
1290         case 1024: return IB_MTU_1024;
1291         case 2048: return IB_MTU_2048;
1292         case 4096: return IB_MTU_4096;
1293         default: return -1;
1294         }
1295 }
1296
1297 static inline int ibta_mtu_to_int(enum ibta_mtu mtu)
1298 {
1299         switch (mtu) {
1300         case IB_MTU_256:  return  256;
1301         case IB_MTU_512:  return  512;
1302         case IB_MTU_1024: return 1024;
1303         case IB_MTU_2048: return 2048;
1304         case IB_MTU_4096: return 4096;
1305         default: return -1;
1306         }
1307 }
1308
1309 static ssize_t show_port_ib_mtu(struct device *dev,
1310                              struct device_attribute *attr,
1311                              char *buf)
1312 {
1313         struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
1314                                                    port_mtu_attr);
1315         struct mlx4_dev *mdev = info->dev;
1316
1317         if (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_ETH)
1318                 mlx4_warn(mdev, "port level mtu is only used for IB ports\n");
1319
1320         sprintf(buf, "%d\n",
1321                         ibta_mtu_to_int(mdev->caps.port_ib_mtu[info->port]));
1322         return strlen(buf);
1323 }
1324
1325 static ssize_t set_port_ib_mtu(struct device *dev,
1326                              struct device_attribute *attr,
1327                              const char *buf, size_t count)
1328 {
1329         struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
1330                                                    port_mtu_attr);
1331         struct mlx4_dev *mdev = info->dev;
1332         struct mlx4_priv *priv = mlx4_priv(mdev);
1333         int err, port, mtu, ibta_mtu = -1;
1334
1335         if (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_ETH) {
1336                 mlx4_warn(mdev, "port level mtu is only used for IB ports\n");
1337                 return -EINVAL;
1338         }
1339
1340         err = kstrtoint(buf, 0, &mtu);
1341         if (!err)
1342                 ibta_mtu = int_to_ibta_mtu(mtu);
1343
1344         if (err || ibta_mtu < 0) {
1345                 mlx4_err(mdev, "%s is invalid IBTA mtu\n", buf);
1346                 return -EINVAL;
1347         }
1348
1349         mdev->caps.port_ib_mtu[info->port] = ibta_mtu;
1350
1351         mlx4_stop_sense(mdev);
1352         mutex_lock(&priv->port_mutex);
1353         mlx4_unregister_device(mdev);
1354         for (port = 1; port <= mdev->caps.num_ports; port++) {
1355                 mlx4_CLOSE_PORT(mdev, port);
1356                 err = mlx4_SET_PORT(mdev, port, -1);
1357                 if (err) {
1358                         mlx4_err(mdev, "Failed to set port %d, aborting\n",
1359                                  port);
1360                         goto err_set_port;
1361                 }
1362         }
1363         err = mlx4_register_device(mdev);
1364 err_set_port:
1365         mutex_unlock(&priv->port_mutex);
1366         mlx4_start_sense(mdev);
1367         return err ? err : count;
1368 }
1369
1370 /* bond for multi-function device */
1371 #define MAX_MF_BOND_ALLOWED_SLAVES 63
1372 static int mlx4_mf_bond(struct mlx4_dev *dev)
1373 {
1374         int err = 0;
1375         int nvfs;
1376         struct mlx4_slaves_pport slaves_port1;
1377         struct mlx4_slaves_pport slaves_port2;
1378         DECLARE_BITMAP(slaves_port_1_2, MLX4_MFUNC_MAX);
1379
1380         slaves_port1 = mlx4_phys_to_slaves_pport(dev, 1);
1381         slaves_port2 = mlx4_phys_to_slaves_pport(dev, 2);
1382         bitmap_and(slaves_port_1_2,
1383                    slaves_port1.slaves, slaves_port2.slaves,
1384                    dev->persist->num_vfs + 1);
1385
1386         /* only single port vfs are allowed */
1387         if (bitmap_weight(slaves_port_1_2, dev->persist->num_vfs + 1) > 1) {
1388                 mlx4_warn(dev, "HA mode unsupported for dual ported VFs\n");
1389                 return -EINVAL;
1390         }
1391
1392         /* number of virtual functions is number of total functions minus one
1393          * physical function for each port.
1394          */
1395         nvfs = bitmap_weight(slaves_port1.slaves, dev->persist->num_vfs + 1) +
1396                 bitmap_weight(slaves_port2.slaves, dev->persist->num_vfs + 1) - 2;
1397
1398         /* limit on maximum allowed VFs */
1399         if (nvfs > MAX_MF_BOND_ALLOWED_SLAVES) {
1400                 mlx4_warn(dev, "HA mode is not supported for %d VFs (max %d are allowed)\n",
1401                           nvfs, MAX_MF_BOND_ALLOWED_SLAVES);
1402                 return -EINVAL;
1403         }
1404
1405         if (dev->caps.steering_mode != MLX4_STEERING_MODE_DEVICE_MANAGED) {
1406                 mlx4_warn(dev, "HA mode unsupported for NON DMFS steering\n");
1407                 return -EINVAL;
1408         }
1409
1410         err = mlx4_bond_mac_table(dev);
1411         if (err)
1412                 return err;
1413         err = mlx4_bond_vlan_table(dev);
1414         if (err)
1415                 goto err1;
1416         err = mlx4_bond_fs_rules(dev);
1417         if (err)
1418                 goto err2;
1419
1420         return 0;
1421 err2:
1422         (void)mlx4_unbond_vlan_table(dev);
1423 err1:
1424         (void)mlx4_unbond_mac_table(dev);
1425         return err;
1426 }
1427
1428 static int mlx4_mf_unbond(struct mlx4_dev *dev)
1429 {
1430         int ret, ret1;
1431
1432         ret = mlx4_unbond_fs_rules(dev);
1433         if (ret)
1434                 mlx4_warn(dev, "multifunction unbond for flow rules failed (%d)\n", ret);
1435         ret1 = mlx4_unbond_mac_table(dev);
1436         if (ret1) {
1437                 mlx4_warn(dev, "multifunction unbond for MAC table failed (%d)\n", ret1);
1438                 ret = ret1;
1439         }
1440         ret1 = mlx4_unbond_vlan_table(dev);
1441         if (ret1) {
1442                 mlx4_warn(dev, "multifunction unbond for VLAN table failed (%d)\n", ret1);
1443                 ret = ret1;
1444         }
1445         return ret;
1446 }
1447
1448 int mlx4_bond(struct mlx4_dev *dev)
1449 {
1450         int ret = 0;
1451         struct mlx4_priv *priv = mlx4_priv(dev);
1452
1453         mutex_lock(&priv->bond_mutex);
1454
1455         if (!mlx4_is_bonded(dev)) {
1456                 ret = mlx4_do_bond(dev, true);
1457                 if (ret)
1458                         mlx4_err(dev, "Failed to bond device: %d\n", ret);
1459                 if (!ret && mlx4_is_master(dev)) {
1460                         ret = mlx4_mf_bond(dev);
1461                         if (ret) {
1462                                 mlx4_err(dev, "bond for multifunction failed\n");
1463                                 mlx4_do_bond(dev, false);
1464                         }
1465                 }
1466         }
1467
1468         mutex_unlock(&priv->bond_mutex);
1469         if (!ret)
1470                 mlx4_dbg(dev, "Device is bonded\n");
1471
1472         return ret;
1473 }
1474 EXPORT_SYMBOL_GPL(mlx4_bond);
1475
1476 int mlx4_unbond(struct mlx4_dev *dev)
1477 {
1478         int ret = 0;
1479         struct mlx4_priv *priv = mlx4_priv(dev);
1480
1481         mutex_lock(&priv->bond_mutex);
1482
1483         if (mlx4_is_bonded(dev)) {
1484                 int ret2 = 0;
1485
1486                 ret = mlx4_do_bond(dev, false);
1487                 if (ret)
1488                         mlx4_err(dev, "Failed to unbond device: %d\n", ret);
1489                 if (mlx4_is_master(dev))
1490                         ret2 = mlx4_mf_unbond(dev);
1491                 if (ret2) {
1492                         mlx4_warn(dev, "Failed to unbond device for multifunction (%d)\n", ret2);
1493                         ret = ret2;
1494                 }
1495         }
1496
1497         mutex_unlock(&priv->bond_mutex);
1498         if (!ret)
1499                 mlx4_dbg(dev, "Device is unbonded\n");
1500
1501         return ret;
1502 }
1503 EXPORT_SYMBOL_GPL(mlx4_unbond);
1504
1505
1506 int mlx4_port_map_set(struct mlx4_dev *dev, struct mlx4_port_map *v2p)
1507 {
1508         u8 port1 = v2p->port1;
1509         u8 port2 = v2p->port2;
1510         struct mlx4_priv *priv = mlx4_priv(dev);
1511         int err;
1512
1513         if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PORT_REMAP))
1514                 return -EOPNOTSUPP;
1515
1516         mutex_lock(&priv->bond_mutex);
1517
1518         /* zero means keep current mapping for this port */
1519         if (port1 == 0)
1520                 port1 = priv->v2p.port1;
1521         if (port2 == 0)
1522                 port2 = priv->v2p.port2;
1523
1524         if ((port1 < 1) || (port1 > MLX4_MAX_PORTS) ||
1525             (port2 < 1) || (port2 > MLX4_MAX_PORTS) ||
1526             (port1 == 2 && port2 == 1)) {
1527                 /* besides boundary checks cross mapping makes
1528                  * no sense and therefore not allowed */
1529                 err = -EINVAL;
1530         } else if ((port1 == priv->v2p.port1) &&
1531                  (port2 == priv->v2p.port2)) {
1532                 err = 0;
1533         } else {
1534                 err = mlx4_virt2phy_port_map(dev, port1, port2);
1535                 if (!err) {
1536                         mlx4_dbg(dev, "port map changed: [%d][%d]\n",
1537                                  port1, port2);
1538                         priv->v2p.port1 = port1;
1539                         priv->v2p.port2 = port2;
1540                 } else {
1541                         mlx4_err(dev, "Failed to change port mape: %d\n", err);
1542                 }
1543         }
1544
1545         mutex_unlock(&priv->bond_mutex);
1546         return err;
1547 }
1548 EXPORT_SYMBOL_GPL(mlx4_port_map_set);
1549
1550 static int mlx4_load_fw(struct mlx4_dev *dev)
1551 {
1552         struct mlx4_priv *priv = mlx4_priv(dev);
1553         int err;
1554
1555         priv->fw.fw_icm = mlx4_alloc_icm(dev, priv->fw.fw_pages,
1556                                          GFP_HIGHUSER | __GFP_NOWARN, 0);
1557         if (!priv->fw.fw_icm) {
1558                 mlx4_err(dev, "Couldn't allocate FW area, aborting\n");
1559                 return -ENOMEM;
1560         }
1561
1562         err = mlx4_MAP_FA(dev, priv->fw.fw_icm);
1563         if (err) {
1564                 mlx4_err(dev, "MAP_FA command failed, aborting\n");
1565                 goto err_free;
1566         }
1567
1568         err = mlx4_RUN_FW(dev);
1569         if (err) {
1570                 mlx4_err(dev, "RUN_FW command failed, aborting\n");
1571                 goto err_unmap_fa;
1572         }
1573
1574         return 0;
1575
1576 err_unmap_fa:
1577         mlx4_UNMAP_FA(dev);
1578
1579 err_free:
1580         mlx4_free_icm(dev, priv->fw.fw_icm, 0);
1581         return err;
1582 }
1583
1584 static int mlx4_init_cmpt_table(struct mlx4_dev *dev, u64 cmpt_base,
1585                                 int cmpt_entry_sz)
1586 {
1587         struct mlx4_priv *priv = mlx4_priv(dev);
1588         int err;
1589         int num_eqs;
1590
1591         err = mlx4_init_icm_table(dev, &priv->qp_table.cmpt_table,
1592                                   cmpt_base +
1593                                   ((u64) (MLX4_CMPT_TYPE_QP *
1594                                           cmpt_entry_sz) << MLX4_CMPT_SHIFT),
1595                                   cmpt_entry_sz, dev->caps.num_qps,
1596                                   dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
1597                                   0, 0);
1598         if (err)
1599                 goto err;
1600
1601         err = mlx4_init_icm_table(dev, &priv->srq_table.cmpt_table,
1602                                   cmpt_base +
1603                                   ((u64) (MLX4_CMPT_TYPE_SRQ *
1604                                           cmpt_entry_sz) << MLX4_CMPT_SHIFT),
1605                                   cmpt_entry_sz, dev->caps.num_srqs,
1606                                   dev->caps.reserved_srqs, 0, 0);
1607         if (err)
1608                 goto err_qp;
1609
1610         err = mlx4_init_icm_table(dev, &priv->cq_table.cmpt_table,
1611                                   cmpt_base +
1612                                   ((u64) (MLX4_CMPT_TYPE_CQ *
1613                                           cmpt_entry_sz) << MLX4_CMPT_SHIFT),
1614                                   cmpt_entry_sz, dev->caps.num_cqs,
1615                                   dev->caps.reserved_cqs, 0, 0);
1616         if (err)
1617                 goto err_srq;
1618
1619         num_eqs = dev->phys_caps.num_phys_eqs;
1620         err = mlx4_init_icm_table(dev, &priv->eq_table.cmpt_table,
1621                                   cmpt_base +
1622                                   ((u64) (MLX4_CMPT_TYPE_EQ *
1623                                           cmpt_entry_sz) << MLX4_CMPT_SHIFT),
1624                                   cmpt_entry_sz, num_eqs, num_eqs, 0, 0);
1625         if (err)
1626                 goto err_cq;
1627
1628         return 0;
1629
1630 err_cq:
1631         mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table);
1632
1633 err_srq:
1634         mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table);
1635
1636 err_qp:
1637         mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table);
1638
1639 err:
1640         return err;
1641 }
1642
1643 static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap,
1644                          struct mlx4_init_hca_param *init_hca, u64 icm_size)
1645 {
1646         struct mlx4_priv *priv = mlx4_priv(dev);
1647         u64 aux_pages;
1648         int num_eqs;
1649         int err;
1650
1651         err = mlx4_SET_ICM_SIZE(dev, icm_size, &aux_pages);
1652         if (err) {
1653                 mlx4_err(dev, "SET_ICM_SIZE command failed, aborting\n");
1654                 return err;
1655         }
1656
1657         mlx4_dbg(dev, "%lld KB of HCA context requires %lld KB aux memory\n",
1658                  (unsigned long long) icm_size >> 10,
1659                  (unsigned long long) aux_pages << 2);
1660
1661         priv->fw.aux_icm = mlx4_alloc_icm(dev, aux_pages,
1662                                           GFP_HIGHUSER | __GFP_NOWARN, 0);
1663         if (!priv->fw.aux_icm) {
1664                 mlx4_err(dev, "Couldn't allocate aux memory, aborting\n");
1665                 return -ENOMEM;
1666         }
1667
1668         err = mlx4_MAP_ICM_AUX(dev, priv->fw.aux_icm);
1669         if (err) {
1670                 mlx4_err(dev, "MAP_ICM_AUX command failed, aborting\n");
1671                 goto err_free_aux;
1672         }
1673
1674         err = mlx4_init_cmpt_table(dev, init_hca->cmpt_base, dev_cap->cmpt_entry_sz);
1675         if (err) {
1676                 mlx4_err(dev, "Failed to map cMPT context memory, aborting\n");
1677                 goto err_unmap_aux;
1678         }
1679
1680
1681         num_eqs = dev->phys_caps.num_phys_eqs;
1682         err = mlx4_init_icm_table(dev, &priv->eq_table.table,
1683                                   init_hca->eqc_base, dev_cap->eqc_entry_sz,
1684                                   num_eqs, num_eqs, 0, 0);
1685         if (err) {
1686                 mlx4_err(dev, "Failed to map EQ context memory, aborting\n");
1687                 goto err_unmap_cmpt;
1688         }
1689
1690         /*
1691          * Reserved MTT entries must be aligned up to a cacheline
1692          * boundary, since the FW will write to them, while the driver
1693          * writes to all other MTT entries. (The variable
1694          * dev->caps.mtt_entry_sz below is really the MTT segment
1695          * size, not the raw entry size)
1696          */
1697         dev->caps.reserved_mtts =
1698                 ALIGN(dev->caps.reserved_mtts * dev->caps.mtt_entry_sz,
1699                       dma_get_cache_alignment()) / dev->caps.mtt_entry_sz;
1700
1701         err = mlx4_init_icm_table(dev, &priv->mr_table.mtt_table,
1702                                   init_hca->mtt_base,
1703                                   dev->caps.mtt_entry_sz,
1704                                   dev->caps.num_mtts,
1705                                   dev->caps.reserved_mtts, 1, 0);
1706         if (err) {
1707                 mlx4_err(dev, "Failed to map MTT context memory, aborting\n");
1708                 goto err_unmap_eq;
1709         }
1710
1711         err = mlx4_init_icm_table(dev, &priv->mr_table.dmpt_table,
1712                                   init_hca->dmpt_base,
1713                                   dev_cap->dmpt_entry_sz,
1714                                   dev->caps.num_mpts,
1715                                   dev->caps.reserved_mrws, 1, 1);
1716         if (err) {
1717                 mlx4_err(dev, "Failed to map dMPT context memory, aborting\n");
1718                 goto err_unmap_mtt;
1719         }
1720
1721         err = mlx4_init_icm_table(dev, &priv->qp_table.qp_table,
1722                                   init_hca->qpc_base,
1723                                   dev_cap->qpc_entry_sz,
1724                                   dev->caps.num_qps,
1725                                   dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
1726                                   0, 0);
1727         if (err) {
1728                 mlx4_err(dev, "Failed to map QP context memory, aborting\n");
1729                 goto err_unmap_dmpt;
1730         }
1731
1732         err = mlx4_init_icm_table(dev, &priv->qp_table.auxc_table,
1733                                   init_hca->auxc_base,
1734                                   dev_cap->aux_entry_sz,
1735                                   dev->caps.num_qps,
1736                                   dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
1737                                   0, 0);
1738         if (err) {
1739                 mlx4_err(dev, "Failed to map AUXC context memory, aborting\n");
1740                 goto err_unmap_qp;
1741         }
1742
1743         err = mlx4_init_icm_table(dev, &priv->qp_table.altc_table,
1744                                   init_hca->altc_base,
1745                                   dev_cap->altc_entry_sz,
1746                                   dev->caps.num_qps,
1747                                   dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
1748                                   0, 0);
1749         if (err) {
1750                 mlx4_err(dev, "Failed to map ALTC context memory, aborting\n");
1751                 goto err_unmap_auxc;
1752         }
1753
1754         err = mlx4_init_icm_table(dev, &priv->qp_table.rdmarc_table,
1755                                   init_hca->rdmarc_base,
1756                                   dev_cap->rdmarc_entry_sz << priv->qp_table.rdmarc_shift,
1757                                   dev->caps.num_qps,
1758                                   dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
1759                                   0, 0);
1760         if (err) {
1761                 mlx4_err(dev, "Failed to map RDMARC context memory, aborting\n");
1762                 goto err_unmap_altc;
1763         }
1764
1765         err = mlx4_init_icm_table(dev, &priv->cq_table.table,
1766                                   init_hca->cqc_base,
1767                                   dev_cap->cqc_entry_sz,
1768                                   dev->caps.num_cqs,
1769                                   dev->caps.reserved_cqs, 0, 0);
1770         if (err) {
1771                 mlx4_err(dev, "Failed to map CQ context memory, aborting\n");
1772                 goto err_unmap_rdmarc;
1773         }
1774
1775         err = mlx4_init_icm_table(dev, &priv->srq_table.table,
1776                                   init_hca->srqc_base,
1777                                   dev_cap->srq_entry_sz,
1778                                   dev->caps.num_srqs,
1779                                   dev->caps.reserved_srqs, 0, 0);
1780         if (err) {
1781                 mlx4_err(dev, "Failed to map SRQ context memory, aborting\n");
1782                 goto err_unmap_cq;
1783         }
1784
1785         /*
1786          * For flow steering device managed mode it is required to use
1787          * mlx4_init_icm_table. For B0 steering mode it's not strictly
1788          * required, but for simplicity just map the whole multicast
1789          * group table now.  The table isn't very big and it's a lot
1790          * easier than trying to track ref counts.
1791          */
1792         err = mlx4_init_icm_table(dev, &priv->mcg_table.table,
1793                                   init_hca->mc_base,
1794                                   mlx4_get_mgm_entry_size(dev),
1795                                   dev->caps.num_mgms + dev->caps.num_amgms,
1796                                   dev->caps.num_mgms + dev->caps.num_amgms,
1797                                   0, 0);
1798         if (err) {
1799                 mlx4_err(dev, "Failed to map MCG context memory, aborting\n");
1800                 goto err_unmap_srq;
1801         }
1802
1803         return 0;
1804
1805 err_unmap_srq:
1806         mlx4_cleanup_icm_table(dev, &priv->srq_table.table);
1807
1808 err_unmap_cq:
1809         mlx4_cleanup_icm_table(dev, &priv->cq_table.table);
1810
1811 err_unmap_rdmarc:
1812         mlx4_cleanup_icm_table(dev, &priv->qp_table.rdmarc_table);
1813
1814 err_unmap_altc:
1815         mlx4_cleanup_icm_table(dev, &priv->qp_table.altc_table);
1816
1817 err_unmap_auxc:
1818         mlx4_cleanup_icm_table(dev, &priv->qp_table.auxc_table);
1819
1820 err_unmap_qp:
1821         mlx4_cleanup_icm_table(dev, &priv->qp_table.qp_table);
1822
1823 err_unmap_dmpt:
1824         mlx4_cleanup_icm_table(dev, &priv->mr_table.dmpt_table);
1825
1826 err_unmap_mtt:
1827         mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table);
1828
1829 err_unmap_eq:
1830         mlx4_cleanup_icm_table(dev, &priv->eq_table.table);
1831
1832 err_unmap_cmpt:
1833         mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table);
1834         mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table);
1835         mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table);
1836         mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table);
1837
1838 err_unmap_aux:
1839         mlx4_UNMAP_ICM_AUX(dev);
1840
1841 err_free_aux:
1842         mlx4_free_icm(dev, priv->fw.aux_icm, 0);
1843
1844         return err;
1845 }
1846
1847 static void mlx4_free_icms(struct mlx4_dev *dev)
1848 {
1849         struct mlx4_priv *priv = mlx4_priv(dev);
1850
1851         mlx4_cleanup_icm_table(dev, &priv->mcg_table.table);
1852         mlx4_cleanup_icm_table(dev, &priv->srq_table.table);
1853         mlx4_cleanup_icm_table(dev, &priv->cq_table.table);
1854         mlx4_cleanup_icm_table(dev, &priv->qp_table.rdmarc_table);
1855         mlx4_cleanup_icm_table(dev, &priv->qp_table.altc_table);
1856         mlx4_cleanup_icm_table(dev, &priv->qp_table.auxc_table);
1857         mlx4_cleanup_icm_table(dev, &priv->qp_table.qp_table);
1858         mlx4_cleanup_icm_table(dev, &priv->mr_table.dmpt_table);
1859         mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table);
1860         mlx4_cleanup_icm_table(dev, &priv->eq_table.table);
1861         mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table);
1862         mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table);
1863         mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table);
1864         mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table);
1865
1866         mlx4_UNMAP_ICM_AUX(dev);
1867         mlx4_free_icm(dev, priv->fw.aux_icm, 0);
1868 }
1869
1870 static void mlx4_slave_exit(struct mlx4_dev *dev)
1871 {
1872         struct mlx4_priv *priv = mlx4_priv(dev);
1873
1874         mutex_lock(&priv->cmd.slave_cmd_mutex);
1875         if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_CMD_NA_OP,
1876                           MLX4_COMM_TIME))
1877                 mlx4_warn(dev, "Failed to close slave function\n");
1878         mutex_unlock(&priv->cmd.slave_cmd_mutex);
1879 }
1880
1881 static int map_bf_area(struct mlx4_dev *dev)
1882 {
1883         struct mlx4_priv *priv = mlx4_priv(dev);
1884         resource_size_t bf_start;
1885         resource_size_t bf_len;
1886         int err = 0;
1887
1888         if (!dev->caps.bf_reg_size)
1889                 return -ENXIO;
1890
1891         bf_start = pci_resource_start(dev->persist->pdev, 2) +
1892                         (dev->caps.num_uars << PAGE_SHIFT);
1893         bf_len = pci_resource_len(dev->persist->pdev, 2) -
1894                         (dev->caps.num_uars << PAGE_SHIFT);
1895         priv->bf_mapping = io_mapping_create_wc(bf_start, bf_len);
1896         if (!priv->bf_mapping)
1897                 err = -ENOMEM;
1898
1899         return err;
1900 }
1901
1902 static void unmap_bf_area(struct mlx4_dev *dev)
1903 {
1904         if (mlx4_priv(dev)->bf_mapping)
1905                 io_mapping_free(mlx4_priv(dev)->bf_mapping);
1906 }
1907
1908 u64 mlx4_read_clock(struct mlx4_dev *dev)
1909 {
1910         u32 clockhi, clocklo, clockhi1;
1911         u64 cycles;
1912         int i;
1913         struct mlx4_priv *priv = mlx4_priv(dev);
1914
1915         for (i = 0; i < 10; i++) {
1916                 clockhi = swab32(readl(priv->clock_mapping));
1917                 clocklo = swab32(readl(priv->clock_mapping + 4));
1918                 clockhi1 = swab32(readl(priv->clock_mapping));
1919                 if (clockhi == clockhi1)
1920                         break;
1921         }
1922
1923         cycles = (u64) clockhi << 32 | (u64) clocklo;
1924
1925         return cycles;
1926 }
1927 EXPORT_SYMBOL_GPL(mlx4_read_clock);
1928
1929
1930 static int map_internal_clock(struct mlx4_dev *dev)
1931 {
1932         struct mlx4_priv *priv = mlx4_priv(dev);
1933
1934         priv->clock_mapping =
1935                 ioremap(pci_resource_start(dev->persist->pdev,
1936                                            priv->fw.clock_bar) +
1937                         priv->fw.clock_offset, MLX4_CLOCK_SIZE);
1938
1939         if (!priv->clock_mapping)
1940                 return -ENOMEM;
1941
1942         return 0;
1943 }
1944
1945 int mlx4_get_internal_clock_params(struct mlx4_dev *dev,
1946                                    struct mlx4_clock_params *params)
1947 {
1948         struct mlx4_priv *priv = mlx4_priv(dev);
1949
1950         if (mlx4_is_slave(dev))
1951                 return -EOPNOTSUPP;
1952
1953         if (!dev->caps.map_clock_to_user) {
1954                 mlx4_dbg(dev, "Map clock to user is not supported.\n");
1955                 return -EOPNOTSUPP;
1956         }
1957
1958         if (!params)
1959                 return -EINVAL;
1960
1961         params->bar = priv->fw.clock_bar;
1962         params->offset = priv->fw.clock_offset;
1963         params->size = MLX4_CLOCK_SIZE;
1964
1965         return 0;
1966 }
1967 EXPORT_SYMBOL_GPL(mlx4_get_internal_clock_params);
1968
1969 static void unmap_internal_clock(struct mlx4_dev *dev)
1970 {
1971         struct mlx4_priv *priv = mlx4_priv(dev);
1972
1973         if (priv->clock_mapping)
1974                 iounmap(priv->clock_mapping);
1975 }
1976
1977 static void mlx4_close_hca(struct mlx4_dev *dev)
1978 {
1979         unmap_internal_clock(dev);
1980         unmap_bf_area(dev);
1981         if (mlx4_is_slave(dev))
1982                 mlx4_slave_exit(dev);
1983         else {
1984                 mlx4_CLOSE_HCA(dev, 0);
1985                 mlx4_free_icms(dev);
1986         }
1987 }
1988
1989 static void mlx4_close_fw(struct mlx4_dev *dev)
1990 {
1991         if (!mlx4_is_slave(dev)) {
1992                 mlx4_UNMAP_FA(dev);
1993                 mlx4_free_icm(dev, mlx4_priv(dev)->fw.fw_icm, 0);
1994         }
1995 }
1996
1997 static int mlx4_comm_check_offline(struct mlx4_dev *dev)
1998 {
1999 #define COMM_CHAN_OFFLINE_OFFSET 0x09
2000
2001         u32 comm_flags;
2002         u32 offline_bit;
2003         unsigned long end;
2004         struct mlx4_priv *priv = mlx4_priv(dev);
2005
2006         end = msecs_to_jiffies(MLX4_COMM_OFFLINE_TIME_OUT) + jiffies;
2007         while (time_before(jiffies, end)) {
2008                 comm_flags = swab32(readl((__iomem char *)priv->mfunc.comm +
2009                                           MLX4_COMM_CHAN_FLAGS));
2010                 offline_bit = (comm_flags &
2011                                (u32)(1 << COMM_CHAN_OFFLINE_OFFSET));
2012                 if (!offline_bit)
2013                         return 0;
2014
2015                 /* If device removal has been requested,
2016                  * do not continue retrying.
2017                  */
2018                 if (dev->persist->interface_state &
2019                     MLX4_INTERFACE_STATE_NOWAIT)
2020                         break;
2021
2022                 /* There are cases as part of AER/Reset flow that PF needs
2023                  * around 100 msec to load. We therefore sleep for 100 msec
2024                  * to allow other tasks to make use of that CPU during this
2025                  * time interval.
2026                  */
2027                 msleep(100);
2028         }
2029         mlx4_err(dev, "Communication channel is offline.\n");
2030         return -EIO;
2031 }
2032
2033 static void mlx4_reset_vf_support(struct mlx4_dev *dev)
2034 {
2035 #define COMM_CHAN_RST_OFFSET 0x1e
2036
2037         struct mlx4_priv *priv = mlx4_priv(dev);
2038         u32 comm_rst;
2039         u32 comm_caps;
2040
2041         comm_caps = swab32(readl((__iomem char *)priv->mfunc.comm +
2042                                  MLX4_COMM_CHAN_CAPS));
2043         comm_rst = (comm_caps & (u32)(1 << COMM_CHAN_RST_OFFSET));
2044
2045         if (comm_rst)
2046                 dev->caps.vf_caps |= MLX4_VF_CAP_FLAG_RESET;
2047 }
2048
2049 static int mlx4_init_slave(struct mlx4_dev *dev)
2050 {
2051         struct mlx4_priv *priv = mlx4_priv(dev);
2052         u64 dma = (u64) priv->mfunc.vhcr_dma;
2053         int ret_from_reset = 0;
2054         u32 slave_read;
2055         u32 cmd_channel_ver;
2056
2057         if (atomic_read(&pf_loading)) {
2058                 mlx4_warn(dev, "PF is not ready - Deferring probe\n");
2059                 return -EPROBE_DEFER;
2060         }
2061
2062         mutex_lock(&priv->cmd.slave_cmd_mutex);
2063         priv->cmd.max_cmds = 1;
2064         if (mlx4_comm_check_offline(dev)) {
2065                 mlx4_err(dev, "PF is not responsive, skipping initialization\n");
2066                 goto err_offline;
2067         }
2068
2069         mlx4_reset_vf_support(dev);
2070         mlx4_warn(dev, "Sending reset\n");
2071         ret_from_reset = mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0,
2072                                        MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME);
2073         /* if we are in the middle of flr the slave will try
2074          * NUM_OF_RESET_RETRIES times before leaving.*/
2075         if (ret_from_reset) {
2076                 if (MLX4_DELAY_RESET_SLAVE == ret_from_reset) {
2077                         mlx4_warn(dev, "slave is currently in the middle of FLR - Deferring probe\n");
2078                         mutex_unlock(&priv->cmd.slave_cmd_mutex);
2079                         return -EPROBE_DEFER;
2080                 } else
2081                         goto err;
2082         }
2083
2084         /* check the driver version - the slave I/F revision
2085          * must match the master's */
2086         slave_read = swab32(readl(&priv->mfunc.comm->slave_read));
2087         cmd_channel_ver = mlx4_comm_get_version();
2088
2089         if (MLX4_COMM_GET_IF_REV(cmd_channel_ver) !=
2090                 MLX4_COMM_GET_IF_REV(slave_read)) {
2091                 mlx4_err(dev, "slave driver version is not supported by the master\n");
2092                 goto err;
2093         }
2094
2095         mlx4_warn(dev, "Sending vhcr0\n");
2096         if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR0, dma >> 48,
2097                              MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME))
2098                 goto err;
2099         if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR1, dma >> 32,
2100                              MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME))
2101                 goto err;
2102         if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR2, dma >> 16,
2103                              MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME))
2104                 goto err;
2105         if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR_EN, dma,
2106                           MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME))
2107                 goto err;
2108
2109         mutex_unlock(&priv->cmd.slave_cmd_mutex);
2110         return 0;
2111
2112 err:
2113         mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_CMD_NA_OP, 0);
2114 err_offline:
2115         mutex_unlock(&priv->cmd.slave_cmd_mutex);
2116         return -EIO;
2117 }
2118
2119 static void mlx4_parav_master_pf_caps(struct mlx4_dev *dev)
2120 {
2121         int i;
2122
2123         for (i = 1; i <= dev->caps.num_ports; i++) {
2124                 if (dev->caps.port_type[i] == MLX4_PORT_TYPE_ETH)
2125                         dev->caps.gid_table_len[i] =
2126                                 mlx4_get_slave_num_gids(dev, 0, i);
2127                 else
2128                         dev->caps.gid_table_len[i] = 1;
2129                 dev->caps.pkey_table_len[i] =
2130                         dev->phys_caps.pkey_phys_table_len[i] - 1;
2131         }
2132 }
2133
2134 static int choose_log_fs_mgm_entry_size(int qp_per_entry)
2135 {
2136         int i = MLX4_MIN_MGM_LOG_ENTRY_SIZE;
2137
2138         for (i = MLX4_MIN_MGM_LOG_ENTRY_SIZE; i <= MLX4_MAX_MGM_LOG_ENTRY_SIZE;
2139               i++) {
2140                 if (qp_per_entry <= 4 * ((1 << i) / 16 - 2))
2141                         break;
2142         }
2143
2144         return (i <= MLX4_MAX_MGM_LOG_ENTRY_SIZE) ? i : -1;
2145 }
2146
2147 static const char *dmfs_high_rate_steering_mode_str(int dmfs_high_steer_mode)
2148 {
2149         switch (dmfs_high_steer_mode) {
2150         case MLX4_STEERING_DMFS_A0_DEFAULT:
2151                 return "default performance";
2152
2153         case MLX4_STEERING_DMFS_A0_DYNAMIC:
2154                 return "dynamic hybrid mode";
2155
2156         case MLX4_STEERING_DMFS_A0_STATIC:
2157                 return "performance optimized for limited rule configuration (static)";
2158
2159         case MLX4_STEERING_DMFS_A0_DISABLE:
2160                 return "disabled performance optimized steering";
2161
2162         case MLX4_STEERING_DMFS_A0_NOT_SUPPORTED:
2163                 return "performance optimized steering not supported";
2164
2165         default:
2166                 return "Unrecognized mode";
2167         }
2168 }
2169
2170 #define MLX4_DMFS_A0_STEERING                   (1UL << 2)
2171
2172 static void choose_steering_mode(struct mlx4_dev *dev,
2173                                  struct mlx4_dev_cap *dev_cap)
2174 {
2175         if (mlx4_log_num_mgm_entry_size <= 0) {
2176                 if ((-mlx4_log_num_mgm_entry_size) & MLX4_DMFS_A0_STEERING) {
2177                         if (dev->caps.dmfs_high_steer_mode ==
2178                             MLX4_STEERING_DMFS_A0_NOT_SUPPORTED)
2179                                 mlx4_err(dev, "DMFS high rate mode not supported\n");
2180                         else
2181                                 dev->caps.dmfs_high_steer_mode =
2182                                         MLX4_STEERING_DMFS_A0_STATIC;
2183                 }
2184         }
2185
2186         if (mlx4_log_num_mgm_entry_size <= 0 &&
2187             dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_FS_EN &&
2188             (!mlx4_is_mfunc(dev) ||
2189              (dev_cap->fs_max_num_qp_per_entry >=
2190              (dev->persist->num_vfs + 1))) &&
2191             choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry) >=
2192                 MLX4_MIN_MGM_LOG_ENTRY_SIZE) {
2193                 dev->oper_log_mgm_entry_size =
2194                         choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry);
2195                 dev->caps.steering_mode = MLX4_STEERING_MODE_DEVICE_MANAGED;
2196                 dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry;
2197                 dev->caps.fs_log_max_ucast_qp_range_size =
2198                         dev_cap->fs_log_max_ucast_qp_range_size;
2199         } else {
2200                 if (dev->caps.dmfs_high_steer_mode !=
2201                     MLX4_STEERING_DMFS_A0_NOT_SUPPORTED)
2202                         dev->caps.dmfs_high_steer_mode = MLX4_STEERING_DMFS_A0_DISABLE;
2203                 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER &&
2204                     dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER)
2205                         dev->caps.steering_mode = MLX4_STEERING_MODE_B0;
2206                 else {
2207                         dev->caps.steering_mode = MLX4_STEERING_MODE_A0;
2208
2209                         if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER ||
2210                             dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER)
2211                                 mlx4_warn(dev, "Must have both UC_STEER and MC_STEER flags set to use B0 steering - falling back to A0 steering mode\n");
2212                 }
2213                 dev->oper_log_mgm_entry_size =
2214                         mlx4_log_num_mgm_entry_size > 0 ?
2215                         mlx4_log_num_mgm_entry_size :
2216                         MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE;
2217                 dev->caps.num_qp_per_mgm = mlx4_get_qp_per_mgm(dev);
2218         }
2219         mlx4_dbg(dev, "Steering mode is: %s, oper_log_mgm_entry_size = %d, modparam log_num_mgm_entry_size = %d\n",
2220                  mlx4_steering_mode_str(dev->caps.steering_mode),
2221                  dev->oper_log_mgm_entry_size,
2222                  mlx4_log_num_mgm_entry_size);
2223 }
2224
2225 static void choose_tunnel_offload_mode(struct mlx4_dev *dev,
2226                                        struct mlx4_dev_cap *dev_cap)
2227 {
2228         if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED &&
2229             dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_VXLAN_OFFLOADS)
2230                 dev->caps.tunnel_offload_mode = MLX4_TUNNEL_OFFLOAD_MODE_VXLAN;
2231         else
2232                 dev->caps.tunnel_offload_mode = MLX4_TUNNEL_OFFLOAD_MODE_NONE;
2233
2234         mlx4_dbg(dev, "Tunneling offload mode is: %s\n",  (dev->caps.tunnel_offload_mode
2235                  == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) ? "vxlan" : "none");
2236 }
2237
2238 static int mlx4_validate_optimized_steering(struct mlx4_dev *dev)
2239 {
2240         int i;
2241         struct mlx4_port_cap port_cap;
2242
2243         if (dev->caps.dmfs_high_steer_mode == MLX4_STEERING_DMFS_A0_NOT_SUPPORTED)
2244                 return -EINVAL;
2245
2246         for (i = 1; i <= dev->caps.num_ports; i++) {
2247                 if (mlx4_dev_port(dev, i, &port_cap)) {
2248                         mlx4_err(dev,
2249                                  "QUERY_DEV_CAP command failed, can't veify DMFS high rate steering.\n");
2250                 } else if ((dev->caps.dmfs_high_steer_mode !=
2251                             MLX4_STEERING_DMFS_A0_DEFAULT) &&
2252                            (port_cap.dmfs_optimized_state ==
2253                             !!(dev->caps.dmfs_high_steer_mode ==
2254                             MLX4_STEERING_DMFS_A0_DISABLE))) {
2255                         mlx4_err(dev,
2256                                  "DMFS high rate steer mode differ, driver requested %s but %s in FW.\n",
2257                                  dmfs_high_rate_steering_mode_str(
2258                                         dev->caps.dmfs_high_steer_mode),
2259                                  (port_cap.dmfs_optimized_state ?
2260                                         "enabled" : "disabled"));
2261                 }
2262         }
2263
2264         return 0;
2265 }
2266
2267 static int mlx4_init_fw(struct mlx4_dev *dev)
2268 {
2269         struct mlx4_mod_stat_cfg   mlx4_cfg;
2270         int err = 0;
2271
2272         if (!mlx4_is_slave(dev)) {
2273                 err = mlx4_QUERY_FW(dev);
2274                 if (err) {
2275                         if (err == -EACCES)
2276                                 mlx4_info(dev, "non-primary physical function, skipping\n");
2277                         else
2278                                 mlx4_err(dev, "QUERY_FW command failed, aborting\n");
2279                         return err;
2280                 }
2281
2282                 err = mlx4_load_fw(dev);
2283                 if (err) {
2284                         mlx4_err(dev, "Failed to start FW, aborting\n");
2285                         return err;
2286                 }
2287
2288                 mlx4_cfg.log_pg_sz_m = 1;
2289                 mlx4_cfg.log_pg_sz = 0;
2290                 err = mlx4_MOD_STAT_CFG(dev, &mlx4_cfg);
2291                 if (err)
2292                         mlx4_warn(dev, "Failed to override log_pg_sz parameter\n");
2293         }
2294
2295         return err;
2296 }
2297
2298 static int mlx4_init_hca(struct mlx4_dev *dev)
2299 {
2300         struct mlx4_priv          *priv = mlx4_priv(dev);
2301         struct mlx4_adapter        adapter;
2302         struct mlx4_dev_cap        dev_cap;
2303         struct mlx4_profile        profile;
2304         struct mlx4_init_hca_param init_hca;
2305         u64 icm_size;
2306         struct mlx4_config_dev_params params;
2307         int err;
2308
2309         if (!mlx4_is_slave(dev)) {
2310                 err = mlx4_dev_cap(dev, &dev_cap);
2311                 if (err) {
2312                         mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting\n");
2313                         return err;
2314                 }
2315
2316                 choose_steering_mode(dev, &dev_cap);
2317                 choose_tunnel_offload_mode(dev, &dev_cap);
2318
2319                 if (dev->caps.dmfs_high_steer_mode == MLX4_STEERING_DMFS_A0_STATIC &&
2320                     mlx4_is_master(dev))
2321                         dev->caps.function_caps |= MLX4_FUNC_CAP_DMFS_A0_STATIC;
2322
2323                 err = mlx4_get_phys_port_id(dev);
2324                 if (err)
2325                         mlx4_err(dev, "Fail to get physical port id\n");
2326
2327                 if (mlx4_is_master(dev))
2328                         mlx4_parav_master_pf_caps(dev);
2329
2330                 if (mlx4_low_memory_profile()) {
2331                         mlx4_info(dev, "Running from within kdump kernel. Using low memory profile\n");
2332                         profile = low_mem_profile;
2333                 } else {
2334                         profile = default_profile;
2335                 }
2336                 if (dev->caps.steering_mode ==
2337                     MLX4_STEERING_MODE_DEVICE_MANAGED)
2338                         profile.num_mcg = MLX4_FS_NUM_MCG;
2339
2340                 icm_size = mlx4_make_profile(dev, &profile, &dev_cap,
2341                                              &init_hca);
2342                 if ((long long) icm_size < 0) {
2343                         err = icm_size;
2344                         return err;
2345                 }
2346
2347                 dev->caps.max_fmr_maps = (1 << (32 - ilog2(dev->caps.num_mpts))) - 1;
2348
2349                 if (enable_4k_uar || !dev->persist->num_vfs) {
2350                         init_hca.log_uar_sz = ilog2(dev->caps.num_uars) +
2351                                                     PAGE_SHIFT - DEFAULT_UAR_PAGE_SHIFT;
2352                         init_hca.uar_page_sz = DEFAULT_UAR_PAGE_SHIFT - 12;
2353                 } else {
2354                         init_hca.log_uar_sz = ilog2(dev->caps.num_uars);
2355                         init_hca.uar_page_sz = PAGE_SHIFT - 12;
2356                 }
2357
2358                 init_hca.mw_enabled = 0;
2359                 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW ||
2360                     dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN)
2361                         init_hca.mw_enabled = INIT_HCA_TPT_MW_ENABLE;
2362
2363                 err = mlx4_init_icm(dev, &dev_cap, &init_hca, icm_size);
2364                 if (err)
2365                         return err;
2366
2367                 err = mlx4_INIT_HCA(dev, &init_hca);
2368                 if (err) {
2369                         mlx4_err(dev, "INIT_HCA command failed, aborting\n");
2370                         goto err_free_icm;
2371                 }
2372
2373                 if (dev_cap.flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS) {
2374                         err = mlx4_query_func(dev, &dev_cap);
2375                         if (err < 0) {
2376                                 mlx4_err(dev, "QUERY_FUNC command failed, aborting.\n");
2377                                 goto err_close;
2378                         } else if (err & MLX4_QUERY_FUNC_NUM_SYS_EQS) {
2379                                 dev->caps.num_eqs = dev_cap.max_eqs;
2380                                 dev->caps.reserved_eqs = dev_cap.reserved_eqs;
2381                                 dev->caps.reserved_uars = dev_cap.reserved_uars;
2382                         }
2383                 }
2384
2385                 /*
2386                  * If TS is supported by FW
2387                  * read HCA frequency by QUERY_HCA command
2388                  */
2389                 if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS) {
2390                         memset(&init_hca, 0, sizeof(init_hca));
2391                         err = mlx4_QUERY_HCA(dev, &init_hca);
2392                         if (err) {
2393                                 mlx4_err(dev, "QUERY_HCA command failed, disable timestamp\n");
2394                                 dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS;
2395                         } else {
2396                                 dev->caps.hca_core_clock =
2397                                         init_hca.hca_core_clock;
2398                         }
2399
2400                         /* In case we got HCA frequency 0 - disable timestamping
2401                          * to avoid dividing by zero
2402                          */
2403                         if (!dev->caps.hca_core_clock) {
2404                                 dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS;
2405                                 mlx4_err(dev,
2406                                          "HCA frequency is 0 - timestamping is not supported\n");
2407                         } else if (map_internal_clock(dev)) {
2408                                 /*
2409                                  * Map internal clock,
2410                                  * in case of failure disable timestamping
2411                                  */
2412                                 dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS;
2413                                 mlx4_err(dev, "Failed to map internal clock. Timestamping is not supported\n");
2414                         }
2415                 }
2416
2417                 if (dev->caps.dmfs_high_steer_mode !=
2418                     MLX4_STEERING_DMFS_A0_NOT_SUPPORTED) {
2419                         if (mlx4_validate_optimized_steering(dev))
2420                                 mlx4_warn(dev, "Optimized steering validation failed\n");
2421
2422                         if (dev->caps.dmfs_high_steer_mode ==
2423                             MLX4_STEERING_DMFS_A0_DISABLE) {
2424                                 dev->caps.dmfs_high_rate_qpn_base =
2425                                         dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW];
2426                                 dev->caps.dmfs_high_rate_qpn_range =
2427                                         MLX4_A0_STEERING_TABLE_SIZE;
2428                         }
2429
2430                         mlx4_info(dev, "DMFS high rate steer mode is: %s\n",
2431                                   dmfs_high_rate_steering_mode_str(
2432                                         dev->caps.dmfs_high_steer_mode));
2433                 }
2434         } else {
2435                 err = mlx4_init_slave(dev);
2436                 if (err) {
2437                         if (err != -EPROBE_DEFER)
2438                                 mlx4_err(dev, "Failed to initialize slave\n");
2439                         return err;
2440                 }
2441
2442                 err = mlx4_slave_cap(dev);
2443                 if (err) {
2444                         mlx4_err(dev, "Failed to obtain slave caps\n");
2445                         goto err_close;
2446                 }
2447         }
2448
2449         if (map_bf_area(dev))
2450                 mlx4_dbg(dev, "Failed to map blue flame area\n");
2451
2452         /*Only the master set the ports, all the rest got it from it.*/
2453         if (!mlx4_is_slave(dev))
2454                 mlx4_set_port_mask(dev);
2455
2456         err = mlx4_QUERY_ADAPTER(dev, &adapter);
2457         if (err) {
2458                 mlx4_err(dev, "QUERY_ADAPTER command failed, aborting\n");
2459                 goto unmap_bf;
2460         }
2461
2462         /* Query CONFIG_DEV parameters */
2463         err = mlx4_config_dev_retrieval(dev, &params);
2464         if (err && err != -EOPNOTSUPP) {
2465                 mlx4_err(dev, "Failed to query CONFIG_DEV parameters\n");
2466         } else if (!err) {
2467                 dev->caps.rx_checksum_flags_port[1] = params.rx_csum_flags_port_1;
2468                 dev->caps.rx_checksum_flags_port[2] = params.rx_csum_flags_port_2;
2469         }
2470         priv->eq_table.inta_pin = adapter.inta_pin;
2471         memcpy(dev->board_id, adapter.board_id, sizeof(dev->board_id));
2472
2473         return 0;
2474
2475 unmap_bf:
2476         unmap_internal_clock(dev);
2477         unmap_bf_area(dev);
2478
2479         if (mlx4_is_slave(dev))
2480                 mlx4_slave_destroy_special_qp_cap(dev);
2481
2482 err_close:
2483         if (mlx4_is_slave(dev))
2484                 mlx4_slave_exit(dev);
2485         else
2486                 mlx4_CLOSE_HCA(dev, 0);
2487
2488 err_free_icm:
2489         if (!mlx4_is_slave(dev))
2490                 mlx4_free_icms(dev);
2491
2492         return err;
2493 }
2494
2495 static int mlx4_init_counters_table(struct mlx4_dev *dev)
2496 {
2497         struct mlx4_priv *priv = mlx4_priv(dev);
2498         int nent_pow2;
2499
2500         if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS))
2501                 return -ENOENT;
2502
2503         if (!dev->caps.max_counters)
2504                 return -ENOSPC;
2505
2506         nent_pow2 = roundup_pow_of_two(dev->caps.max_counters);
2507         /* reserve last counter index for sink counter */
2508         return mlx4_bitmap_init(&priv->counters_bitmap, nent_pow2,
2509                                 nent_pow2 - 1, 0,
2510                                 nent_pow2 - dev->caps.max_counters + 1);
2511 }
2512
2513 static void mlx4_cleanup_counters_table(struct mlx4_dev *dev)
2514 {
2515         if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS))
2516                 return;
2517
2518         if (!dev->caps.max_counters)
2519                 return;
2520
2521         mlx4_bitmap_cleanup(&mlx4_priv(dev)->counters_bitmap);
2522 }
2523
2524 static void mlx4_cleanup_default_counters(struct mlx4_dev *dev)
2525 {
2526         struct mlx4_priv *priv = mlx4_priv(dev);
2527         int port;
2528
2529         for (port = 0; port < dev->caps.num_ports; port++)
2530                 if (priv->def_counter[port] != -1)
2531                         mlx4_counter_free(dev,  priv->def_counter[port]);
2532 }
2533
2534 static int mlx4_allocate_default_counters(struct mlx4_dev *dev)
2535 {
2536         struct mlx4_priv *priv = mlx4_priv(dev);
2537         int port, err = 0;
2538         u32 idx;
2539
2540         for (port = 0; port < dev->caps.num_ports; port++)
2541                 priv->def_counter[port] = -1;
2542
2543         for (port = 0; port < dev->caps.num_ports; port++) {
2544                 err = mlx4_counter_alloc(dev, &idx, MLX4_RES_USAGE_DRIVER);
2545
2546                 if (!err || err == -ENOSPC) {
2547                         priv->def_counter[port] = idx;
2548                         err = 0;
2549                 } else if (err == -ENOENT) {
2550                         err = 0;
2551                         continue;
2552                 } else if (mlx4_is_slave(dev) && err == -EINVAL) {
2553                         priv->def_counter[port] = MLX4_SINK_COUNTER_INDEX(dev);
2554                         mlx4_warn(dev, "can't allocate counter from old PF driver, using index %d\n",
2555                                   MLX4_SINK_COUNTER_INDEX(dev));
2556                         err = 0;
2557                 } else {
2558                         mlx4_err(dev, "%s: failed to allocate default counter port %d err %d\n",
2559                                  __func__, port + 1, err);
2560                         mlx4_cleanup_default_counters(dev);
2561                         return err;
2562                 }
2563
2564                 mlx4_dbg(dev, "%s: default counter index %d for port %d\n",
2565                          __func__, priv->def_counter[port], port + 1);
2566         }
2567
2568         return err;
2569 }
2570
2571 int __mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx)
2572 {
2573         struct mlx4_priv *priv = mlx4_priv(dev);
2574
2575         if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS))
2576                 return -ENOENT;
2577
2578         *idx = mlx4_bitmap_alloc(&priv->counters_bitmap);
2579         if (*idx == -1) {
2580                 *idx = MLX4_SINK_COUNTER_INDEX(dev);
2581                 return -ENOSPC;
2582         }
2583
2584         return 0;
2585 }
2586
2587 int mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx, u8 usage)
2588 {
2589         u32 in_modifier = RES_COUNTER | (((u32)usage & 3) << 30);
2590         u64 out_param;
2591         int err;
2592
2593         if (mlx4_is_mfunc(dev)) {
2594                 err = mlx4_cmd_imm(dev, 0, &out_param, in_modifier,
2595                                    RES_OP_RESERVE, MLX4_CMD_ALLOC_RES,
2596                                    MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
2597                 if (!err)
2598                         *idx = get_param_l(&out_param);
2599                 if (WARN_ON(err == -ENOSPC))
2600                         err = -EINVAL;
2601                 return err;
2602         }
2603         return __mlx4_counter_alloc(dev, idx);
2604 }
2605 EXPORT_SYMBOL_GPL(mlx4_counter_alloc);
2606
2607 static int __mlx4_clear_if_stat(struct mlx4_dev *dev,
2608                                 u8 counter_index)
2609 {
2610         struct mlx4_cmd_mailbox *if_stat_mailbox;
2611         int err;
2612         u32 if_stat_in_mod = (counter_index & 0xff) | MLX4_QUERY_IF_STAT_RESET;
2613
2614         if_stat_mailbox = mlx4_alloc_cmd_mailbox(dev);
2615         if (IS_ERR(if_stat_mailbox))
2616                 return PTR_ERR(if_stat_mailbox);
2617
2618         err = mlx4_cmd_box(dev, 0, if_stat_mailbox->dma, if_stat_in_mod, 0,
2619                            MLX4_CMD_QUERY_IF_STAT, MLX4_CMD_TIME_CLASS_C,
2620                            MLX4_CMD_NATIVE);
2621
2622         mlx4_free_cmd_mailbox(dev, if_stat_mailbox);
2623         return err;
2624 }
2625
2626 void __mlx4_counter_free(struct mlx4_dev *dev, u32 idx)
2627 {
2628         if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS))
2629                 return;
2630
2631         if (idx == MLX4_SINK_COUNTER_INDEX(dev))
2632                 return;
2633
2634         __mlx4_clear_if_stat(dev, idx);
2635
2636         mlx4_bitmap_free(&mlx4_priv(dev)->counters_bitmap, idx, MLX4_USE_RR);
2637         return;
2638 }
2639
2640 void mlx4_counter_free(struct mlx4_dev *dev, u32 idx)
2641 {
2642         u64 in_param = 0;
2643
2644         if (mlx4_is_mfunc(dev)) {
2645                 set_param_l(&in_param, idx);
2646                 mlx4_cmd(dev, in_param, RES_COUNTER, RES_OP_RESERVE,
2647                          MLX4_CMD_FREE_RES, MLX4_CMD_TIME_CLASS_A,
2648                          MLX4_CMD_WRAPPED);
2649                 return;
2650         }
2651         __mlx4_counter_free(dev, idx);
2652 }
2653 EXPORT_SYMBOL_GPL(mlx4_counter_free);
2654
2655 int mlx4_get_default_counter_index(struct mlx4_dev *dev, int port)
2656 {
2657         struct mlx4_priv *priv = mlx4_priv(dev);
2658
2659         return priv->def_counter[port - 1];
2660 }
2661 EXPORT_SYMBOL_GPL(mlx4_get_default_counter_index);
2662
2663 void mlx4_set_admin_guid(struct mlx4_dev *dev, __be64 guid, int entry, int port)
2664 {
2665         struct mlx4_priv *priv = mlx4_priv(dev);
2666
2667         priv->mfunc.master.vf_admin[entry].vport[port].guid = guid;
2668 }
2669 EXPORT_SYMBOL_GPL(mlx4_set_admin_guid);
2670
2671 __be64 mlx4_get_admin_guid(struct mlx4_dev *dev, int entry, int port)
2672 {
2673         struct mlx4_priv *priv = mlx4_priv(dev);
2674
2675         return priv->mfunc.master.vf_admin[entry].vport[port].guid;
2676 }
2677 EXPORT_SYMBOL_GPL(mlx4_get_admin_guid);
2678
2679 void mlx4_set_random_admin_guid(struct mlx4_dev *dev, int entry, int port)
2680 {
2681         struct mlx4_priv *priv = mlx4_priv(dev);
2682         __be64 guid;
2683
2684         /* hw GUID */
2685         if (entry == 0)
2686                 return;
2687
2688         get_random_bytes((char *)&guid, sizeof(guid));
2689         guid &= ~(cpu_to_be64(1ULL << 56));
2690         guid |= cpu_to_be64(1ULL << 57);
2691         priv->mfunc.master.vf_admin[entry].vport[port].guid = guid;
2692 }
2693
2694 static int mlx4_setup_hca(struct mlx4_dev *dev)
2695 {
2696         struct mlx4_priv *priv = mlx4_priv(dev);
2697         int err;
2698         int port;
2699         __be32 ib_port_default_caps;
2700
2701         err = mlx4_init_uar_table(dev);
2702         if (err) {
2703                 mlx4_err(dev, "Failed to initialize user access region table, aborting\n");
2704                 return err;
2705         }
2706
2707         err = mlx4_uar_alloc(dev, &priv->driver_uar);
2708         if (err) {
2709                 mlx4_err(dev, "Failed to allocate driver access region, aborting\n");
2710                 goto err_uar_table_free;
2711         }
2712
2713         priv->kar = ioremap((phys_addr_t) priv->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE);
2714         if (!priv->kar) {
2715                 mlx4_err(dev, "Couldn't map kernel access region, aborting\n");
2716                 err = -ENOMEM;
2717                 goto err_uar_free;
2718         }
2719
2720         err = mlx4_init_pd_table(dev);
2721         if (err) {
2722                 mlx4_err(dev, "Failed to initialize protection domain table, aborting\n");
2723                 goto err_kar_unmap;
2724         }
2725
2726         err = mlx4_init_xrcd_table(dev);
2727         if (err) {
2728                 mlx4_err(dev, "Failed to initialize reliable connection domain table, aborting\n");
2729                 goto err_pd_table_free;
2730         }
2731
2732         err = mlx4_init_mr_table(dev);
2733         if (err) {
2734                 mlx4_err(dev, "Failed to initialize memory region table, aborting\n");
2735                 goto err_xrcd_table_free;
2736         }
2737
2738         if (!mlx4_is_slave(dev)) {
2739                 err = mlx4_init_mcg_table(dev);
2740                 if (err) {
2741                         mlx4_err(dev, "Failed to initialize multicast group table, aborting\n");
2742                         goto err_mr_table_free;
2743                 }
2744                 err = mlx4_config_mad_demux(dev);
2745                 if (err) {
2746                         mlx4_err(dev, "Failed in config_mad_demux, aborting\n");
2747                         goto err_mcg_table_free;
2748                 }
2749         }
2750
2751         err = mlx4_init_eq_table(dev);
2752         if (err) {
2753                 mlx4_err(dev, "Failed to initialize event queue table, aborting\n");
2754                 goto err_mcg_table_free;
2755         }
2756
2757         err = mlx4_cmd_use_events(dev);
2758         if (err) {
2759                 mlx4_err(dev, "Failed to switch to event-driven firmware commands, aborting\n");
2760                 goto err_eq_table_free;
2761         }
2762
2763         err = mlx4_NOP(dev);
2764         if (err) {
2765                 if (dev->flags & MLX4_FLAG_MSI_X) {
2766                         mlx4_warn(dev, "NOP command failed to generate MSI-X interrupt IRQ %d)\n",
2767                                   priv->eq_table.eq[MLX4_EQ_ASYNC].irq);
2768                         mlx4_warn(dev, "Trying again without MSI-X\n");
2769                 } else {
2770                         mlx4_err(dev, "NOP command failed to generate interrupt (IRQ %d), aborting\n",
2771                                  priv->eq_table.eq[MLX4_EQ_ASYNC].irq);
2772                         mlx4_err(dev, "BIOS or ACPI interrupt routing problem?\n");
2773                 }
2774
2775                 goto err_cmd_poll;
2776         }
2777
2778         mlx4_dbg(dev, "NOP command IRQ test passed\n");
2779
2780         err = mlx4_init_cq_table(dev);
2781         if (err) {
2782                 mlx4_err(dev, "Failed to initialize completion queue table, aborting\n");
2783                 goto err_cmd_poll;
2784         }
2785
2786         err = mlx4_init_srq_table(dev);
2787         if (err) {
2788                 mlx4_err(dev, "Failed to initialize shared receive queue table, aborting\n");
2789                 goto err_cq_table_free;
2790         }
2791
2792         err = mlx4_init_qp_table(dev);
2793         if (err) {
2794                 mlx4_err(dev, "Failed to initialize queue pair table, aborting\n");
2795                 goto err_srq_table_free;
2796         }
2797
2798         if (!mlx4_is_slave(dev)) {
2799                 err = mlx4_init_counters_table(dev);
2800                 if (err && err != -ENOENT) {
2801                         mlx4_err(dev, "Failed to initialize counters table, aborting\n");
2802                         goto err_qp_table_free;
2803                 }
2804         }
2805
2806         err = mlx4_allocate_default_counters(dev);
2807         if (err) {
2808                 mlx4_err(dev, "Failed to allocate default counters, aborting\n");
2809                 goto err_counters_table_free;
2810         }
2811
2812         if (!mlx4_is_slave(dev)) {
2813                 for (port = 1; port <= dev->caps.num_ports; port++) {
2814                         ib_port_default_caps = 0;
2815                         err = mlx4_get_port_ib_caps(dev, port,
2816                                                     &ib_port_default_caps);
2817                         if (err)
2818                                 mlx4_warn(dev, "failed to get port %d default ib capabilities (%d). Continuing with caps = 0\n",
2819                                           port, err);
2820                         dev->caps.ib_port_def_cap[port] = ib_port_default_caps;
2821
2822                         /* initialize per-slave default ib port capabilities */
2823                         if (mlx4_is_master(dev)) {
2824                                 int i;
2825                                 for (i = 0; i < dev->num_slaves; i++) {
2826                                         if (i == mlx4_master_func_num(dev))
2827                                                 continue;
2828                                         priv->mfunc.master.slave_state[i].ib_cap_mask[port] =
2829                                                 ib_port_default_caps;
2830                                 }
2831                         }
2832
2833                         if (mlx4_is_mfunc(dev))
2834                                 dev->caps.port_ib_mtu[port] = IB_MTU_2048;
2835                         else
2836                                 dev->caps.port_ib_mtu[port] = IB_MTU_4096;
2837
2838                         err = mlx4_SET_PORT(dev, port, mlx4_is_master(dev) ?
2839                                             dev->caps.pkey_table_len[port] : -1);
2840                         if (err) {
2841                                 mlx4_err(dev, "Failed to set port %d, aborting\n",
2842                                          port);
2843                                 goto err_default_countes_free;
2844                         }
2845                 }
2846         }
2847
2848         return 0;
2849
2850 err_default_countes_free:
2851         mlx4_cleanup_default_counters(dev);
2852
2853 err_counters_table_free:
2854         if (!mlx4_is_slave(dev))
2855                 mlx4_cleanup_counters_table(dev);
2856
2857 err_qp_table_free:
2858         mlx4_cleanup_qp_table(dev);
2859
2860 err_srq_table_free:
2861         mlx4_cleanup_srq_table(dev);
2862
2863 err_cq_table_free:
2864         mlx4_cleanup_cq_table(dev);
2865
2866 err_cmd_poll:
2867         mlx4_cmd_use_polling(dev);
2868
2869 err_eq_table_free:
2870         mlx4_cleanup_eq_table(dev);
2871
2872 err_mcg_table_free:
2873         if (!mlx4_is_slave(dev))
2874                 mlx4_cleanup_mcg_table(dev);
2875
2876 err_mr_table_free:
2877         mlx4_cleanup_mr_table(dev);
2878
2879 err_xrcd_table_free:
2880         mlx4_cleanup_xrcd_table(dev);
2881
2882 err_pd_table_free:
2883         mlx4_cleanup_pd_table(dev);
2884
2885 err_kar_unmap:
2886         iounmap(priv->kar);
2887
2888 err_uar_free:
2889         mlx4_uar_free(dev, &priv->driver_uar);
2890
2891 err_uar_table_free:
2892         mlx4_cleanup_uar_table(dev);
2893         return err;
2894 }
2895
2896 static int mlx4_init_affinity_hint(struct mlx4_dev *dev, int port, int eqn)
2897 {
2898         int requested_cpu = 0;
2899         struct mlx4_priv *priv = mlx4_priv(dev);
2900         struct mlx4_eq *eq;
2901         int off = 0;
2902         int i;
2903
2904         if (eqn > dev->caps.num_comp_vectors)
2905                 return -EINVAL;
2906
2907         for (i = 1; i < port; i++)
2908                 off += mlx4_get_eqs_per_port(dev, i);
2909
2910         requested_cpu = eqn - off - !!(eqn > MLX4_EQ_ASYNC);
2911
2912         /* Meaning EQs are shared, and this call comes from the second port */
2913         if (requested_cpu < 0)
2914                 return 0;
2915
2916         eq = &priv->eq_table.eq[eqn];
2917
2918         if (!zalloc_cpumask_var(&eq->affinity_mask, GFP_KERNEL))
2919                 return -ENOMEM;
2920
2921         cpumask_set_cpu(requested_cpu, eq->affinity_mask);
2922
2923         return 0;
2924 }
2925
2926 static void mlx4_enable_msi_x(struct mlx4_dev *dev)
2927 {
2928         struct mlx4_priv *priv = mlx4_priv(dev);
2929         struct msix_entry *entries;
2930         int i;
2931         int port = 0;
2932
2933         if (msi_x) {
2934                 int nreq = min3(dev->caps.num_ports *
2935                                 (int)num_online_cpus() + 1,
2936                                 dev->caps.num_eqs - dev->caps.reserved_eqs,
2937                                 MAX_MSIX);
2938
2939                 if (msi_x > 1)
2940                         nreq = min_t(int, nreq, msi_x);
2941
2942                 entries = kcalloc(nreq, sizeof(*entries), GFP_KERNEL);
2943                 if (!entries)
2944                         goto no_msi;
2945
2946                 for (i = 0; i < nreq; ++i)
2947                         entries[i].entry = i;
2948
2949                 nreq = pci_enable_msix_range(dev->persist->pdev, entries, 2,
2950                                              nreq);
2951
2952                 if (nreq < 0 || nreq < MLX4_EQ_ASYNC) {
2953                         kfree(entries);
2954                         goto no_msi;
2955                 }
2956                 /* 1 is reserved for events (asyncrounous EQ) */
2957                 dev->caps.num_comp_vectors = nreq - 1;
2958
2959                 priv->eq_table.eq[MLX4_EQ_ASYNC].irq = entries[0].vector;
2960                 bitmap_zero(priv->eq_table.eq[MLX4_EQ_ASYNC].actv_ports.ports,
2961                             dev->caps.num_ports);
2962
2963                 for (i = 0; i < dev->caps.num_comp_vectors + 1; i++) {
2964                         if (i == MLX4_EQ_ASYNC)
2965                                 continue;
2966
2967                         priv->eq_table.eq[i].irq =
2968                                 entries[i + 1 - !!(i > MLX4_EQ_ASYNC)].vector;
2969
2970                         if (MLX4_IS_LEGACY_EQ_MODE(dev->caps)) {
2971                                 bitmap_fill(priv->eq_table.eq[i].actv_ports.ports,
2972                                             dev->caps.num_ports);
2973                                 /* We don't set affinity hint when there
2974                                  * aren't enough EQs
2975                                  */
2976                         } else {
2977                                 set_bit(port,
2978                                         priv->eq_table.eq[i].actv_ports.ports);
2979                                 if (mlx4_init_affinity_hint(dev, port + 1, i))
2980                                         mlx4_warn(dev, "Couldn't init hint cpumask for EQ %d\n",
2981                                                   i);
2982                         }
2983                         /* We divide the Eqs evenly between the two ports.
2984                          * (dev->caps.num_comp_vectors / dev->caps.num_ports)
2985                          * refers to the number of Eqs per port
2986                          * (i.e eqs_per_port). Theoretically, we would like to
2987                          * write something like (i + 1) % eqs_per_port == 0.
2988                          * However, since there's an asynchronous Eq, we have
2989                          * to skip over it by comparing this condition to
2990                          * !!((i + 1) > MLX4_EQ_ASYNC).
2991                          */
2992                         if ((dev->caps.num_comp_vectors > dev->caps.num_ports) &&
2993                             ((i + 1) %
2994                              (dev->caps.num_comp_vectors / dev->caps.num_ports)) ==
2995                             !!((i + 1) > MLX4_EQ_ASYNC))
2996                                 /* If dev->caps.num_comp_vectors < dev->caps.num_ports,
2997                                  * everything is shared anyway.
2998                                  */
2999                                 port++;
3000                 }
3001
3002                 dev->flags |= MLX4_FLAG_MSI_X;
3003
3004                 kfree(entries);
3005                 return;
3006         }
3007
3008 no_msi:
3009         dev->caps.num_comp_vectors = 1;
3010
3011         BUG_ON(MLX4_EQ_ASYNC >= 2);
3012         for (i = 0; i < 2; ++i) {
3013                 priv->eq_table.eq[i].irq = dev->persist->pdev->irq;
3014                 if (i != MLX4_EQ_ASYNC) {
3015                         bitmap_fill(priv->eq_table.eq[i].actv_ports.ports,
3016                                     dev->caps.num_ports);
3017                 }
3018         }
3019 }
3020
3021 static int mlx4_init_port_info(struct mlx4_dev *dev, int port)
3022 {
3023         struct devlink *devlink = priv_to_devlink(mlx4_priv(dev));
3024         struct mlx4_port_info *info = &mlx4_priv(dev)->port[port];
3025         int err;
3026
3027         err = devlink_port_register(devlink, &info->devlink_port, port);
3028         if (err)
3029                 return err;
3030
3031         info->dev = dev;
3032         info->port = port;
3033         if (!mlx4_is_slave(dev)) {
3034                 mlx4_init_mac_table(dev, &info->mac_table);
3035                 mlx4_init_vlan_table(dev, &info->vlan_table);
3036                 mlx4_init_roce_gid_table(dev, &info->gid_table);
3037                 info->base_qpn = mlx4_get_base_qpn(dev, port);
3038         }
3039
3040         sprintf(info->dev_name, "mlx4_port%d", port);
3041         info->port_attr.attr.name = info->dev_name;
3042         if (mlx4_is_mfunc(dev)) {
3043                 info->port_attr.attr.mode = 0444;
3044         } else {
3045                 info->port_attr.attr.mode = 0644;
3046                 info->port_attr.store     = set_port_type;
3047         }
3048         info->port_attr.show      = show_port_type;
3049         sysfs_attr_init(&info->port_attr.attr);
3050
3051         err = device_create_file(&dev->persist->pdev->dev, &info->port_attr);
3052         if (err) {
3053                 mlx4_err(dev, "Failed to create file for port %d\n", port);
3054                 devlink_port_unregister(&info->devlink_port);
3055                 info->port = -1;
3056                 return err;
3057         }
3058
3059         sprintf(info->dev_mtu_name, "mlx4_port%d_mtu", port);
3060         info->port_mtu_attr.attr.name = info->dev_mtu_name;
3061         if (mlx4_is_mfunc(dev)) {
3062                 info->port_mtu_attr.attr.mode = 0444;
3063         } else {
3064                 info->port_mtu_attr.attr.mode = 0644;
3065                 info->port_mtu_attr.store     = set_port_ib_mtu;
3066         }
3067         info->port_mtu_attr.show      = show_port_ib_mtu;
3068         sysfs_attr_init(&info->port_mtu_attr.attr);
3069
3070         err = device_create_file(&dev->persist->pdev->dev,
3071                                  &info->port_mtu_attr);
3072         if (err) {
3073                 mlx4_err(dev, "Failed to create mtu file for port %d\n", port);
3074                 device_remove_file(&info->dev->persist->pdev->dev,
3075                                    &info->port_attr);
3076                 devlink_port_unregister(&info->devlink_port);
3077                 info->port = -1;
3078                 return err;
3079         }
3080
3081         return 0;
3082 }
3083
3084 static void mlx4_cleanup_port_info(struct mlx4_port_info *info)
3085 {
3086         if (info->port < 0)
3087                 return;
3088
3089         device_remove_file(&info->dev->persist->pdev->dev, &info->port_attr);
3090         device_remove_file(&info->dev->persist->pdev->dev,
3091                            &info->port_mtu_attr);
3092         devlink_port_unregister(&info->devlink_port);
3093
3094 #ifdef CONFIG_RFS_ACCEL
3095         free_irq_cpu_rmap(info->rmap);
3096         info->rmap = NULL;
3097 #endif
3098 }
3099
3100 static int mlx4_init_steering(struct mlx4_dev *dev)
3101 {
3102         struct mlx4_priv *priv = mlx4_priv(dev);
3103         int num_entries = dev->caps.num_ports;
3104         int i, j;
3105
3106         priv->steer = kcalloc(num_entries, sizeof(struct mlx4_steer),
3107                               GFP_KERNEL);
3108         if (!priv->steer)
3109                 return -ENOMEM;
3110
3111         for (i = 0; i < num_entries; i++)
3112                 for (j = 0; j < MLX4_NUM_STEERS; j++) {
3113                         INIT_LIST_HEAD(&priv->steer[i].promisc_qps[j]);
3114                         INIT_LIST_HEAD(&priv->steer[i].steer_entries[j]);
3115                 }
3116         return 0;
3117 }
3118
3119 static void mlx4_clear_steering(struct mlx4_dev *dev)
3120 {
3121         struct mlx4_priv *priv = mlx4_priv(dev);
3122         struct mlx4_steer_index *entry, *tmp_entry;
3123         struct mlx4_promisc_qp *pqp, *tmp_pqp;
3124         int num_entries = dev->caps.num_ports;
3125         int i, j;
3126
3127         for (i = 0; i < num_entries; i++) {
3128                 for (j = 0; j < MLX4_NUM_STEERS; j++) {
3129                         list_for_each_entry_safe(pqp, tmp_pqp,
3130                                                  &priv->steer[i].promisc_qps[j],
3131                                                  list) {
3132                                 list_del(&pqp->list);
3133                                 kfree(pqp);
3134                         }
3135                         list_for_each_entry_safe(entry, tmp_entry,
3136                                                  &priv->steer[i].steer_entries[j],
3137                                                  list) {
3138                                 list_del(&entry->list);
3139                                 list_for_each_entry_safe(pqp, tmp_pqp,
3140                                                          &entry->duplicates,
3141                                                          list) {
3142                                         list_del(&pqp->list);
3143                                         kfree(pqp);
3144                                 }
3145                                 kfree(entry);
3146                         }
3147                 }
3148         }
3149         kfree(priv->steer);
3150 }
3151
3152 static int extended_func_num(struct pci_dev *pdev)
3153 {
3154         return PCI_SLOT(pdev->devfn) * 8 + PCI_FUNC(pdev->devfn);
3155 }
3156
3157 #define MLX4_OWNER_BASE 0x8069c
3158 #define MLX4_OWNER_SIZE 4
3159
3160 static int mlx4_get_ownership(struct mlx4_dev *dev)
3161 {
3162         void __iomem *owner;
3163         u32 ret;
3164
3165         if (pci_channel_offline(dev->persist->pdev))
3166                 return -EIO;
3167
3168         owner = ioremap(pci_resource_start(dev->persist->pdev, 0) +
3169                         MLX4_OWNER_BASE,
3170                         MLX4_OWNER_SIZE);
3171         if (!owner) {
3172                 mlx4_err(dev, "Failed to obtain ownership bit\n");
3173                 return -ENOMEM;
3174         }
3175
3176         ret = readl(owner);
3177         iounmap(owner);
3178         return (int) !!ret;
3179 }
3180
3181 static void mlx4_free_ownership(struct mlx4_dev *dev)
3182 {
3183         void __iomem *owner;
3184
3185         if (pci_channel_offline(dev->persist->pdev))
3186                 return;
3187
3188         owner = ioremap(pci_resource_start(dev->persist->pdev, 0) +
3189                         MLX4_OWNER_BASE,
3190                         MLX4_OWNER_SIZE);
3191         if (!owner) {
3192                 mlx4_err(dev, "Failed to obtain ownership bit\n");
3193                 return;
3194         }
3195         writel(0, owner);
3196         msleep(1000);
3197         iounmap(owner);
3198 }
3199
3200 #define SRIOV_VALID_STATE(flags) (!!((flags) & MLX4_FLAG_SRIOV) ==\
3201                                   !!((flags) & MLX4_FLAG_MASTER))
3202
3203 static u64 mlx4_enable_sriov(struct mlx4_dev *dev, struct pci_dev *pdev,
3204                              u8 total_vfs, int existing_vfs, int reset_flow)
3205 {
3206         u64 dev_flags = dev->flags;
3207         int err = 0;
3208         int fw_enabled_sriov_vfs = min(pci_sriov_get_totalvfs(pdev),
3209                                         MLX4_MAX_NUM_VF);
3210
3211         if (reset_flow) {
3212                 dev->dev_vfs = kcalloc(total_vfs, sizeof(*dev->dev_vfs),
3213                                        GFP_KERNEL);
3214                 if (!dev->dev_vfs)
3215                         goto free_mem;
3216                 return dev_flags;
3217         }
3218
3219         atomic_inc(&pf_loading);
3220         if (dev->flags &  MLX4_FLAG_SRIOV) {
3221                 if (existing_vfs != total_vfs) {
3222                         mlx4_err(dev, "SR-IOV was already enabled, but with num_vfs (%d) different than requested (%d)\n",
3223                                  existing_vfs, total_vfs);
3224                         total_vfs = existing_vfs;
3225                 }
3226         }
3227
3228         dev->dev_vfs = kcalloc(total_vfs, sizeof(*dev->dev_vfs), GFP_KERNEL);
3229         if (NULL == dev->dev_vfs) {
3230                 mlx4_err(dev, "Failed to allocate memory for VFs\n");
3231                 goto disable_sriov;
3232         }
3233
3234         if (!(dev->flags &  MLX4_FLAG_SRIOV)) {
3235                 if (total_vfs > fw_enabled_sriov_vfs) {
3236                         mlx4_err(dev, "requested vfs (%d) > available vfs (%d). Continuing without SR_IOV\n",
3237                                  total_vfs, fw_enabled_sriov_vfs);
3238                         err = -ENOMEM;
3239                         goto disable_sriov;
3240                 }
3241                 mlx4_warn(dev, "Enabling SR-IOV with %d VFs\n", total_vfs);
3242                 err = pci_enable_sriov(pdev, total_vfs);
3243         }
3244         if (err) {
3245                 mlx4_err(dev, "Failed to enable SR-IOV, continuing without SR-IOV (err = %d)\n",
3246                          err);
3247                 goto disable_sriov;
3248         } else {
3249                 mlx4_warn(dev, "Running in master mode\n");
3250                 dev_flags |= MLX4_FLAG_SRIOV |
3251                         MLX4_FLAG_MASTER;
3252                 dev_flags &= ~MLX4_FLAG_SLAVE;
3253                 dev->persist->num_vfs = total_vfs;
3254         }
3255         return dev_flags;
3256
3257 disable_sriov:
3258         atomic_dec(&pf_loading);
3259 free_mem:
3260         dev->persist->num_vfs = 0;
3261         kfree(dev->dev_vfs);
3262         dev->dev_vfs = NULL;
3263         return dev_flags & ~MLX4_FLAG_MASTER;
3264 }
3265
3266 enum {
3267         MLX4_DEV_CAP_CHECK_NUM_VFS_ABOVE_64 = -1,
3268 };
3269
3270 static int mlx4_check_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap,
3271                               int *nvfs)
3272 {
3273         int requested_vfs = nvfs[0] + nvfs[1] + nvfs[2];
3274         /* Checking for 64 VFs as a limitation of CX2 */
3275         if (!(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_80_VFS) &&
3276             requested_vfs >= 64) {
3277                 mlx4_err(dev, "Requested %d VFs, but FW does not support more than 64\n",
3278                          requested_vfs);
3279                 return MLX4_DEV_CAP_CHECK_NUM_VFS_ABOVE_64;
3280         }
3281         return 0;
3282 }
3283
3284 static int mlx4_pci_enable_device(struct mlx4_dev *dev)
3285 {
3286         struct pci_dev *pdev = dev->persist->pdev;
3287         int err = 0;
3288
3289         mutex_lock(&dev->persist->pci_status_mutex);
3290         if (dev->persist->pci_status == MLX4_PCI_STATUS_DISABLED) {
3291                 err = pci_enable_device(pdev);
3292                 if (!err)
3293                         dev->persist->pci_status = MLX4_PCI_STATUS_ENABLED;
3294         }
3295         mutex_unlock(&dev->persist->pci_status_mutex);
3296
3297         return err;
3298 }
3299
3300 static void mlx4_pci_disable_device(struct mlx4_dev *dev)
3301 {
3302         struct pci_dev *pdev = dev->persist->pdev;
3303
3304         mutex_lock(&dev->persist->pci_status_mutex);
3305         if (dev->persist->pci_status == MLX4_PCI_STATUS_ENABLED) {
3306                 pci_disable_device(pdev);
3307                 dev->persist->pci_status = MLX4_PCI_STATUS_DISABLED;
3308         }
3309         mutex_unlock(&dev->persist->pci_status_mutex);
3310 }
3311
3312 static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data,
3313                          int total_vfs, int *nvfs, struct mlx4_priv *priv,
3314                          int reset_flow)
3315 {
3316         struct mlx4_dev *dev;
3317         unsigned sum = 0;
3318         int err;
3319         int port;
3320         int i;
3321         struct mlx4_dev_cap *dev_cap = NULL;
3322         int existing_vfs = 0;
3323
3324         dev = &priv->dev;
3325
3326         INIT_LIST_HEAD(&priv->ctx_list);
3327         spin_lock_init(&priv->ctx_lock);
3328
3329         mutex_init(&priv->port_mutex);
3330         mutex_init(&priv->bond_mutex);
3331
3332         INIT_LIST_HEAD(&priv->pgdir_list);
3333         mutex_init(&priv->pgdir_mutex);
3334         spin_lock_init(&priv->cmd.context_lock);
3335
3336         INIT_LIST_HEAD(&priv->bf_list);
3337         mutex_init(&priv->bf_mutex);
3338
3339         dev->rev_id = pdev->revision;
3340         dev->numa_node = dev_to_node(&pdev->dev);
3341
3342         /* Detect if this device is a virtual function */
3343         if (pci_dev_data & MLX4_PCI_DEV_IS_VF) {
3344                 mlx4_warn(dev, "Detected virtual function - running in slave mode\n");
3345                 dev->flags |= MLX4_FLAG_SLAVE;
3346         } else {
3347                 /* We reset the device and enable SRIOV only for physical
3348                  * devices.  Try to claim ownership on the device;
3349                  * if already taken, skip -- do not allow multiple PFs */
3350                 err = mlx4_get_ownership(dev);
3351                 if (err) {
3352                         if (err < 0)
3353                                 return err;
3354                         else {
3355                                 mlx4_warn(dev, "Multiple PFs not yet supported - Skipping PF\n");
3356                                 return -EINVAL;
3357                         }
3358                 }
3359
3360                 atomic_set(&priv->opreq_count, 0);
3361                 INIT_WORK(&priv->opreq_task, mlx4_opreq_action);
3362
3363                 /*
3364                  * Now reset the HCA before we touch the PCI capabilities or
3365                  * attempt a firmware command, since a boot ROM may have left
3366                  * the HCA in an undefined state.
3367                  */
3368                 err = mlx4_reset(dev);
3369                 if (err) {
3370                         mlx4_err(dev, "Failed to reset HCA, aborting\n");
3371                         goto err_sriov;
3372                 }
3373
3374                 if (total_vfs) {
3375                         dev->flags = MLX4_FLAG_MASTER;
3376                         existing_vfs = pci_num_vf(pdev);
3377                         if (existing_vfs)
3378                                 dev->flags |= MLX4_FLAG_SRIOV;
3379                         dev->persist->num_vfs = total_vfs;
3380                 }
3381         }
3382
3383         /* on load remove any previous indication of internal error,
3384          * device is up.
3385          */
3386         dev->persist->state = MLX4_DEVICE_STATE_UP;
3387
3388 slave_start:
3389         err = mlx4_cmd_init(dev);
3390         if (err) {
3391                 mlx4_err(dev, "Failed to init command interface, aborting\n");
3392                 goto err_sriov;
3393         }
3394
3395         /* In slave functions, the communication channel must be initialized
3396          * before posting commands. Also, init num_slaves before calling
3397          * mlx4_init_hca */
3398         if (mlx4_is_mfunc(dev)) {
3399                 if (mlx4_is_master(dev)) {
3400                         dev->num_slaves = MLX4_MAX_NUM_SLAVES;
3401
3402                 } else {
3403                         dev->num_slaves = 0;
3404                         err = mlx4_multi_func_init(dev);
3405                         if (err) {
3406                                 mlx4_err(dev, "Failed to init slave mfunc interface, aborting\n");
3407                                 goto err_cmd;
3408                         }
3409                 }
3410         }
3411
3412         err = mlx4_init_fw(dev);
3413         if (err) {
3414                 mlx4_err(dev, "Failed to init fw, aborting.\n");
3415                 goto err_mfunc;
3416         }
3417
3418         if (mlx4_is_master(dev)) {
3419                 /* when we hit the goto slave_start below, dev_cap already initialized */
3420                 if (!dev_cap) {
3421                         dev_cap = kzalloc(sizeof(*dev_cap), GFP_KERNEL);
3422
3423                         if (!dev_cap) {
3424                                 err = -ENOMEM;
3425                                 goto err_fw;
3426                         }
3427
3428                         err = mlx4_QUERY_DEV_CAP(dev, dev_cap);
3429                         if (err) {
3430                                 mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
3431                                 goto err_fw;
3432                         }
3433
3434                         if (mlx4_check_dev_cap(dev, dev_cap, nvfs))
3435                                 goto err_fw;
3436
3437                         if (!(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS)) {
3438                                 u64 dev_flags = mlx4_enable_sriov(dev, pdev,
3439                                                                   total_vfs,
3440                                                                   existing_vfs,
3441                                                                   reset_flow);
3442
3443                                 mlx4_close_fw(dev);
3444                                 mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL);
3445                                 dev->flags = dev_flags;
3446                                 if (!SRIOV_VALID_STATE(dev->flags)) {
3447                                         mlx4_err(dev, "Invalid SRIOV state\n");
3448                                         goto err_sriov;
3449                                 }
3450                                 err = mlx4_reset(dev);
3451                                 if (err) {
3452                                         mlx4_err(dev, "Failed to reset HCA, aborting.\n");
3453                                         goto err_sriov;
3454                                 }
3455                                 goto slave_start;
3456                         }
3457                 } else {
3458                         /* Legacy mode FW requires SRIOV to be enabled before
3459                          * doing QUERY_DEV_CAP, since max_eq's value is different if
3460                          * SRIOV is enabled.
3461                          */
3462                         memset(dev_cap, 0, sizeof(*dev_cap));
3463                         err = mlx4_QUERY_DEV_CAP(dev, dev_cap);
3464                         if (err) {
3465                                 mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
3466                                 goto err_fw;
3467                         }
3468
3469                         if (mlx4_check_dev_cap(dev, dev_cap, nvfs))
3470                                 goto err_fw;
3471                 }
3472         }
3473
3474         err = mlx4_init_hca(dev);
3475         if (err) {
3476                 if (err == -EACCES) {
3477                         /* Not primary Physical function
3478                          * Running in slave mode */
3479                         mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL);
3480                         /* We're not a PF */
3481                         if (dev->flags & MLX4_FLAG_SRIOV) {
3482                                 if (!existing_vfs)
3483                                         pci_disable_sriov(pdev);
3484                                 if (mlx4_is_master(dev) && !reset_flow)
3485                                         atomic_dec(&pf_loading);
3486                                 dev->flags &= ~MLX4_FLAG_SRIOV;
3487                         }
3488                         if (!mlx4_is_slave(dev))
3489                                 mlx4_free_ownership(dev);
3490                         dev->flags |= MLX4_FLAG_SLAVE;
3491                         dev->flags &= ~MLX4_FLAG_MASTER;
3492                         goto slave_start;
3493                 } else
3494                         goto err_fw;
3495         }
3496
3497         if (mlx4_is_master(dev) && (dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS)) {
3498                 u64 dev_flags = mlx4_enable_sriov(dev, pdev, total_vfs,
3499                                                   existing_vfs, reset_flow);
3500
3501                 if ((dev->flags ^ dev_flags) & (MLX4_FLAG_MASTER | MLX4_FLAG_SLAVE)) {
3502                         mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_VHCR);
3503                         dev->flags = dev_flags;
3504                         err = mlx4_cmd_init(dev);
3505                         if (err) {
3506                                 /* Only VHCR is cleaned up, so could still
3507                                  * send FW commands
3508                                  */
3509                                 mlx4_err(dev, "Failed to init VHCR command interface, aborting\n");
3510                                 goto err_close;
3511                         }
3512                 } else {
3513                         dev->flags = dev_flags;
3514                 }
3515
3516                 if (!SRIOV_VALID_STATE(dev->flags)) {
3517                         mlx4_err(dev, "Invalid SRIOV state\n");
3518                         err = -EINVAL;
3519                         goto err_close;
3520                 }
3521         }
3522
3523         /* check if the device is functioning at its maximum possible speed.
3524          * No return code for this call, just warn the user in case of PCI
3525          * express device capabilities are under-satisfied by the bus.
3526          */
3527         if (!mlx4_is_slave(dev))
3528                 pcie_print_link_status(dev->persist->pdev);
3529
3530         /* In master functions, the communication channel must be initialized
3531          * after obtaining its address from fw */
3532         if (mlx4_is_master(dev)) {
3533                 if (dev->caps.num_ports < 2 &&
3534                     num_vfs_argc > 1) {
3535                         err = -EINVAL;
3536                         mlx4_err(dev,
3537                                  "Error: Trying to configure VFs on port 2, but HCA has only %d physical ports\n",
3538                                  dev->caps.num_ports);
3539                         goto err_close;
3540                 }
3541                 memcpy(dev->persist->nvfs, nvfs, sizeof(dev->persist->nvfs));
3542
3543                 for (i = 0;
3544                      i < sizeof(dev->persist->nvfs)/
3545                      sizeof(dev->persist->nvfs[0]); i++) {
3546                         unsigned j;
3547
3548                         for (j = 0; j < dev->persist->nvfs[i]; ++sum, ++j) {
3549                                 dev->dev_vfs[sum].min_port = i < 2 ? i + 1 : 1;
3550                                 dev->dev_vfs[sum].n_ports = i < 2 ? 1 :
3551                                         dev->caps.num_ports;
3552                         }
3553                 }
3554
3555                 /* In master functions, the communication channel
3556                  * must be initialized after obtaining its address from fw
3557                  */
3558                 err = mlx4_multi_func_init(dev);
3559                 if (err) {
3560                         mlx4_err(dev, "Failed to init master mfunc interface, aborting.\n");
3561                         goto err_close;
3562                 }
3563         }
3564
3565         err = mlx4_alloc_eq_table(dev);
3566         if (err)
3567                 goto err_master_mfunc;
3568
3569         bitmap_zero(priv->msix_ctl.pool_bm, MAX_MSIX);
3570         mutex_init(&priv->msix_ctl.pool_lock);
3571
3572         mlx4_enable_msi_x(dev);
3573         if ((mlx4_is_mfunc(dev)) &&
3574             !(dev->flags & MLX4_FLAG_MSI_X)) {
3575                 err = -EOPNOTSUPP;
3576                 mlx4_err(dev, "INTx is not supported in multi-function mode, aborting\n");
3577                 goto err_free_eq;
3578         }
3579
3580         if (!mlx4_is_slave(dev)) {
3581                 err = mlx4_init_steering(dev);
3582                 if (err)
3583                         goto err_disable_msix;
3584         }
3585
3586         mlx4_init_quotas(dev);
3587
3588         err = mlx4_setup_hca(dev);
3589         if (err == -EBUSY && (dev->flags & MLX4_FLAG_MSI_X) &&
3590             !mlx4_is_mfunc(dev)) {
3591                 dev->flags &= ~MLX4_FLAG_MSI_X;
3592                 dev->caps.num_comp_vectors = 1;
3593                 pci_disable_msix(pdev);
3594                 err = mlx4_setup_hca(dev);
3595         }
3596
3597         if (err)
3598                 goto err_steer;
3599
3600         /* When PF resources are ready arm its comm channel to enable
3601          * getting commands
3602          */
3603         if (mlx4_is_master(dev)) {
3604                 err = mlx4_ARM_COMM_CHANNEL(dev);
3605                 if (err) {
3606                         mlx4_err(dev, " Failed to arm comm channel eq: %x\n",
3607                                  err);
3608                         goto err_steer;
3609                 }
3610         }
3611
3612         for (port = 1; port <= dev->caps.num_ports; port++) {
3613                 err = mlx4_init_port_info(dev, port);
3614                 if (err)
3615                         goto err_port;
3616         }
3617
3618         priv->v2p.port1 = 1;
3619         priv->v2p.port2 = 2;
3620
3621         err = mlx4_register_device(dev);
3622         if (err)
3623                 goto err_port;
3624
3625         mlx4_request_modules(dev);
3626
3627         mlx4_sense_init(dev);
3628         mlx4_start_sense(dev);
3629
3630         priv->removed = 0;
3631
3632         if (mlx4_is_master(dev) && dev->persist->num_vfs && !reset_flow)
3633                 atomic_dec(&pf_loading);
3634
3635         kfree(dev_cap);
3636         return 0;
3637
3638 err_port:
3639         for (--port; port >= 1; --port)
3640                 mlx4_cleanup_port_info(&priv->port[port]);
3641
3642         mlx4_cleanup_default_counters(dev);
3643         if (!mlx4_is_slave(dev))
3644                 mlx4_cleanup_counters_table(dev);
3645         mlx4_cleanup_qp_table(dev);
3646         mlx4_cleanup_srq_table(dev);
3647         mlx4_cleanup_cq_table(dev);
3648         mlx4_cmd_use_polling(dev);
3649         mlx4_cleanup_eq_table(dev);
3650         mlx4_cleanup_mcg_table(dev);
3651         mlx4_cleanup_mr_table(dev);
3652         mlx4_cleanup_xrcd_table(dev);
3653         mlx4_cleanup_pd_table(dev);
3654         mlx4_cleanup_uar_table(dev);
3655
3656 err_steer:
3657         if (!mlx4_is_slave(dev))
3658                 mlx4_clear_steering(dev);
3659
3660 err_disable_msix:
3661         if (dev->flags & MLX4_FLAG_MSI_X)
3662                 pci_disable_msix(pdev);
3663
3664 err_free_eq:
3665         mlx4_free_eq_table(dev);
3666
3667 err_master_mfunc:
3668         if (mlx4_is_master(dev)) {
3669                 mlx4_free_resource_tracker(dev, RES_TR_FREE_STRUCTS_ONLY);
3670                 mlx4_multi_func_cleanup(dev);
3671         }
3672
3673         if (mlx4_is_slave(dev))
3674                 mlx4_slave_destroy_special_qp_cap(dev);
3675
3676 err_close:
3677         mlx4_close_hca(dev);
3678
3679 err_fw:
3680         mlx4_close_fw(dev);
3681
3682 err_mfunc:
3683         if (mlx4_is_slave(dev))
3684                 mlx4_multi_func_cleanup(dev);
3685
3686 err_cmd:
3687         mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL);
3688
3689 err_sriov:
3690         if (dev->flags & MLX4_FLAG_SRIOV && !existing_vfs) {
3691                 pci_disable_sriov(pdev);
3692                 dev->flags &= ~MLX4_FLAG_SRIOV;
3693         }
3694
3695         if (mlx4_is_master(dev) && dev->persist->num_vfs && !reset_flow)
3696                 atomic_dec(&pf_loading);
3697
3698         kfree(priv->dev.dev_vfs);
3699
3700         if (!mlx4_is_slave(dev))
3701                 mlx4_free_ownership(dev);
3702
3703         kfree(dev_cap);
3704         return err;
3705 }
3706
3707 static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data,
3708                            struct mlx4_priv *priv)
3709 {
3710         int err;
3711         int nvfs[MLX4_MAX_PORTS + 1] = {0, 0, 0};
3712         int prb_vf[MLX4_MAX_PORTS + 1] = {0, 0, 0};
3713         const int param_map[MLX4_MAX_PORTS + 1][MLX4_MAX_PORTS + 1] = {
3714                 {2, 0, 0}, {0, 1, 2}, {0, 1, 2} };
3715         unsigned total_vfs = 0;
3716         unsigned int i;
3717
3718         pr_info(DRV_NAME ": Initializing %s\n", pci_name(pdev));
3719
3720         err = mlx4_pci_enable_device(&priv->dev);
3721         if (err) {
3722                 dev_err(&pdev->dev, "Cannot enable PCI device, aborting\n");
3723                 return err;
3724         }
3725
3726         /* Due to requirement that all VFs and the PF are *guaranteed* 2 MACS
3727          * per port, we must limit the number of VFs to 63 (since their are
3728          * 128 MACs)
3729          */
3730         for (i = 0; i < ARRAY_SIZE(nvfs) && i < num_vfs_argc;
3731              total_vfs += nvfs[param_map[num_vfs_argc - 1][i]], i++) {
3732                 nvfs[param_map[num_vfs_argc - 1][i]] = num_vfs[i];
3733                 if (nvfs[i] < 0) {
3734                         dev_err(&pdev->dev, "num_vfs module parameter cannot be negative\n");
3735                         err = -EINVAL;
3736                         goto err_disable_pdev;
3737                 }
3738         }
3739         for (i = 0; i < ARRAY_SIZE(prb_vf) && i < probe_vfs_argc;
3740              i++) {
3741                 prb_vf[param_map[probe_vfs_argc - 1][i]] = probe_vf[i];
3742                 if (prb_vf[i] < 0 || prb_vf[i] > nvfs[i]) {
3743                         dev_err(&pdev->dev, "probe_vf module parameter cannot be negative or greater than num_vfs\n");
3744                         err = -EINVAL;
3745                         goto err_disable_pdev;
3746                 }
3747         }
3748         if (total_vfs > MLX4_MAX_NUM_VF) {
3749                 dev_err(&pdev->dev,
3750                         "Requested more VF's (%d) than allowed by hw (%d)\n",
3751                         total_vfs, MLX4_MAX_NUM_VF);
3752                 err = -EINVAL;
3753                 goto err_disable_pdev;
3754         }
3755
3756         for (i = 0; i < MLX4_MAX_PORTS; i++) {
3757                 if (nvfs[i] + nvfs[2] > MLX4_MAX_NUM_VF_P_PORT) {
3758                         dev_err(&pdev->dev,
3759                                 "Requested more VF's (%d) for port (%d) than allowed by driver (%d)\n",
3760                                 nvfs[i] + nvfs[2], i + 1,
3761                                 MLX4_MAX_NUM_VF_P_PORT);
3762                         err = -EINVAL;
3763                         goto err_disable_pdev;
3764                 }
3765         }
3766
3767         /* Check for BARs. */
3768         if (!(pci_dev_data & MLX4_PCI_DEV_IS_VF) &&
3769             !(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) {
3770                 dev_err(&pdev->dev, "Missing DCS, aborting (driver_data: 0x%x, pci_resource_flags(pdev, 0):0x%lx)\n",
3771                         pci_dev_data, pci_resource_flags(pdev, 0));
3772                 err = -ENODEV;
3773                 goto err_disable_pdev;
3774         }
3775         if (!(pci_resource_flags(pdev, 2) & IORESOURCE_MEM)) {
3776                 dev_err(&pdev->dev, "Missing UAR, aborting\n");
3777                 err = -ENODEV;
3778                 goto err_disable_pdev;
3779         }
3780
3781         err = pci_request_regions(pdev, DRV_NAME);
3782         if (err) {
3783                 dev_err(&pdev->dev, "Couldn't get PCI resources, aborting\n");
3784                 goto err_disable_pdev;
3785         }
3786
3787         pci_set_master(pdev);
3788
3789         err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
3790         if (err) {
3791                 dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask\n");
3792                 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
3793                 if (err) {
3794                         dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting\n");
3795                         goto err_release_regions;
3796                 }
3797         }
3798         err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
3799         if (err) {
3800                 dev_warn(&pdev->dev, "Warning: couldn't set 64-bit consistent PCI DMA mask\n");
3801                 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
3802                 if (err) {
3803                         dev_err(&pdev->dev, "Can't set consistent PCI DMA mask, aborting\n");
3804                         goto err_release_regions;
3805                 }
3806         }
3807
3808         /* Allow large DMA segments, up to the firmware limit of 1 GB */
3809         dma_set_max_seg_size(&pdev->dev, 1024 * 1024 * 1024);
3810         /* Detect if this device is a virtual function */
3811         if (pci_dev_data & MLX4_PCI_DEV_IS_VF) {
3812                 /* When acting as pf, we normally skip vfs unless explicitly
3813                  * requested to probe them.
3814                  */
3815                 if (total_vfs) {
3816                         unsigned vfs_offset = 0;
3817
3818                         for (i = 0; i < ARRAY_SIZE(nvfs) &&
3819                              vfs_offset + nvfs[i] < extended_func_num(pdev);
3820                              vfs_offset += nvfs[i], i++)
3821                                 ;
3822                         if (i == ARRAY_SIZE(nvfs)) {
3823                                 err = -ENODEV;
3824                                 goto err_release_regions;
3825                         }
3826                         if ((extended_func_num(pdev) - vfs_offset)
3827                             > prb_vf[i]) {
3828                                 dev_warn(&pdev->dev, "Skipping virtual function:%d\n",
3829                                          extended_func_num(pdev));
3830                                 err = -ENODEV;
3831                                 goto err_release_regions;
3832                         }
3833                 }
3834         }
3835
3836         err = mlx4_crdump_init(&priv->dev);
3837         if (err)
3838                 goto err_release_regions;
3839
3840         err = mlx4_catas_init(&priv->dev);
3841         if (err)
3842                 goto err_crdump;
3843
3844         err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv, 0);
3845         if (err)
3846                 goto err_catas;
3847
3848         return 0;
3849
3850 err_catas:
3851         mlx4_catas_end(&priv->dev);
3852
3853 err_crdump:
3854         mlx4_crdump_end(&priv->dev);
3855
3856 err_release_regions:
3857         pci_release_regions(pdev);
3858
3859 err_disable_pdev:
3860         mlx4_pci_disable_device(&priv->dev);
3861         return err;
3862 }
3863
3864 static int mlx4_devlink_port_type_set(struct devlink_port *devlink_port,
3865                                       enum devlink_port_type port_type)
3866 {
3867         struct mlx4_port_info *info = container_of(devlink_port,
3868                                                    struct mlx4_port_info,
3869                                                    devlink_port);
3870         enum mlx4_port_type mlx4_port_type;
3871
3872         switch (port_type) {
3873         case DEVLINK_PORT_TYPE_AUTO:
3874                 mlx4_port_type = MLX4_PORT_TYPE_AUTO;
3875                 break;
3876         case DEVLINK_PORT_TYPE_ETH:
3877                 mlx4_port_type = MLX4_PORT_TYPE_ETH;
3878                 break;
3879         case DEVLINK_PORT_TYPE_IB:
3880                 mlx4_port_type = MLX4_PORT_TYPE_IB;
3881                 break;
3882         default:
3883                 return -EOPNOTSUPP;
3884         }
3885
3886         return __set_port_type(info, mlx4_port_type);
3887 }
3888
3889 static void mlx4_devlink_param_load_driverinit_values(struct devlink *devlink)
3890 {
3891         struct mlx4_priv *priv = devlink_priv(devlink);
3892         struct mlx4_dev *dev = &priv->dev;
3893         struct mlx4_fw_crdump *crdump = &dev->persist->crdump;
3894         union devlink_param_value saved_value;
3895         int err;
3896
3897         err = devlink_param_driverinit_value_get(devlink,
3898                                                  DEVLINK_PARAM_GENERIC_ID_INT_ERR_RESET,
3899                                                  &saved_value);
3900         if (!err && mlx4_internal_err_reset != saved_value.vbool) {
3901                 mlx4_internal_err_reset = saved_value.vbool;
3902                 /* Notify on value changed on runtime configuration mode */
3903                 devlink_param_value_changed(devlink,
3904                                             DEVLINK_PARAM_GENERIC_ID_INT_ERR_RESET);
3905         }
3906         err = devlink_param_driverinit_value_get(devlink,
3907                                                  DEVLINK_PARAM_GENERIC_ID_MAX_MACS,
3908                                                  &saved_value);
3909         if (!err)
3910                 log_num_mac = order_base_2(saved_value.vu32);
3911         err = devlink_param_driverinit_value_get(devlink,
3912                                                  MLX4_DEVLINK_PARAM_ID_ENABLE_64B_CQE_EQE,
3913                                                  &saved_value);
3914         if (!err)
3915                 enable_64b_cqe_eqe = saved_value.vbool;
3916         err = devlink_param_driverinit_value_get(devlink,
3917                                                  MLX4_DEVLINK_PARAM_ID_ENABLE_4K_UAR,
3918                                                  &saved_value);
3919         if (!err)
3920                 enable_4k_uar = saved_value.vbool;
3921         err = devlink_param_driverinit_value_get(devlink,
3922                                                  DEVLINK_PARAM_GENERIC_ID_REGION_SNAPSHOT,
3923                                                  &saved_value);
3924         if (!err && crdump->snapshot_enable != saved_value.vbool) {
3925                 crdump->snapshot_enable = saved_value.vbool;
3926                 devlink_param_value_changed(devlink,
3927                                             DEVLINK_PARAM_GENERIC_ID_REGION_SNAPSHOT);
3928         }
3929 }
3930
3931 static int mlx4_devlink_reload(struct devlink *devlink,
3932                                struct netlink_ext_ack *extack)
3933 {
3934         struct mlx4_priv *priv = devlink_priv(devlink);
3935         struct mlx4_dev *dev = &priv->dev;
3936         struct mlx4_dev_persistent *persist = dev->persist;
3937         int err;
3938
3939         if (persist->num_vfs)
3940                 mlx4_warn(persist->dev, "Reload performed on PF, will cause reset on operating Virtual Functions\n");
3941         err = mlx4_restart_one(persist->pdev, true, devlink);
3942         if (err)
3943                 mlx4_err(persist->dev, "mlx4_restart_one failed, ret=%d\n", err);
3944
3945         return err;
3946 }
3947
3948 static const struct devlink_ops mlx4_devlink_ops = {
3949         .port_type_set  = mlx4_devlink_port_type_set,
3950         .reload         = mlx4_devlink_reload,
3951 };
3952
3953 static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
3954 {
3955         struct devlink *devlink;
3956         struct mlx4_priv *priv;
3957         struct mlx4_dev *dev;
3958         int ret;
3959
3960         printk_once(KERN_INFO "%s", mlx4_version);
3961
3962         devlink = devlink_alloc(&mlx4_devlink_ops, sizeof(*priv));
3963         if (!devlink)
3964                 return -ENOMEM;
3965         priv = devlink_priv(devlink);
3966
3967         dev       = &priv->dev;
3968         dev->persist = kzalloc(sizeof(*dev->persist), GFP_KERNEL);
3969         if (!dev->persist) {
3970                 ret = -ENOMEM;
3971                 goto err_devlink_free;
3972         }
3973         dev->persist->pdev = pdev;
3974         dev->persist->dev = dev;
3975         pci_set_drvdata(pdev, dev->persist);
3976         priv->pci_dev_data = id->driver_data;
3977         mutex_init(&dev->persist->device_state_mutex);
3978         mutex_init(&dev->persist->interface_state_mutex);
3979         mutex_init(&dev->persist->pci_status_mutex);
3980
3981         ret = devlink_register(devlink, &pdev->dev);
3982         if (ret)
3983                 goto err_persist_free;
3984         ret = devlink_params_register(devlink, mlx4_devlink_params,
3985                                       ARRAY_SIZE(mlx4_devlink_params));
3986         if (ret)
3987                 goto err_devlink_unregister;
3988         mlx4_devlink_set_params_init_values(devlink);
3989         ret =  __mlx4_init_one(pdev, id->driver_data, priv);
3990         if (ret)
3991                 goto err_params_unregister;
3992
3993         pci_save_state(pdev);
3994         return 0;
3995
3996 err_params_unregister:
3997         devlink_params_unregister(devlink, mlx4_devlink_params,
3998                                   ARRAY_SIZE(mlx4_devlink_params));
3999 err_devlink_unregister:
4000         devlink_unregister(devlink);
4001 err_persist_free:
4002         kfree(dev->persist);
4003 err_devlink_free:
4004         devlink_free(devlink);
4005         return ret;
4006 }
4007
4008 static void mlx4_clean_dev(struct mlx4_dev *dev)
4009 {
4010         struct mlx4_dev_persistent *persist = dev->persist;
4011         struct mlx4_priv *priv = mlx4_priv(dev);
4012         unsigned long   flags = (dev->flags & RESET_PERSIST_MASK_FLAGS);
4013
4014         memset(priv, 0, sizeof(*priv));
4015         priv->dev.persist = persist;
4016         priv->dev.flags = flags;
4017 }
4018
4019 static void mlx4_unload_one(struct pci_dev *pdev)
4020 {
4021         struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
4022         struct mlx4_dev  *dev  = persist->dev;
4023         struct mlx4_priv *priv = mlx4_priv(dev);
4024         int               pci_dev_data;
4025         int p, i;
4026
4027         if (priv->removed)
4028                 return;
4029
4030         /* saving current ports type for further use */
4031         for (i = 0; i < dev->caps.num_ports; i++) {
4032                 dev->persist->curr_port_type[i] = dev->caps.port_type[i + 1];
4033                 dev->persist->curr_port_poss_type[i] = dev->caps.
4034                                                        possible_type[i + 1];
4035         }
4036
4037         pci_dev_data = priv->pci_dev_data;
4038
4039         mlx4_stop_sense(dev);
4040         mlx4_unregister_device(dev);
4041
4042         for (p = 1; p <= dev->caps.num_ports; p++) {
4043                 mlx4_cleanup_port_info(&priv->port[p]);
4044                 mlx4_CLOSE_PORT(dev, p);
4045         }
4046
4047         if (mlx4_is_master(dev))
4048                 mlx4_free_resource_tracker(dev,
4049                                            RES_TR_FREE_SLAVES_ONLY);
4050
4051         mlx4_cleanup_default_counters(dev);
4052         if (!mlx4_is_slave(dev))
4053                 mlx4_cleanup_counters_table(dev);
4054         mlx4_cleanup_qp_table(dev);
4055         mlx4_cleanup_srq_table(dev);
4056         mlx4_cleanup_cq_table(dev);
4057         mlx4_cmd_use_polling(dev);
4058         mlx4_cleanup_eq_table(dev);
4059         mlx4_cleanup_mcg_table(dev);
4060         mlx4_cleanup_mr_table(dev);
4061         mlx4_cleanup_xrcd_table(dev);
4062         mlx4_cleanup_pd_table(dev);
4063
4064         if (mlx4_is_master(dev))
4065                 mlx4_free_resource_tracker(dev,
4066                                            RES_TR_FREE_STRUCTS_ONLY);
4067
4068         iounmap(priv->kar);
4069         mlx4_uar_free(dev, &priv->driver_uar);
4070         mlx4_cleanup_uar_table(dev);
4071         if (!mlx4_is_slave(dev))
4072                 mlx4_clear_steering(dev);
4073         mlx4_free_eq_table(dev);
4074         if (mlx4_is_master(dev))
4075                 mlx4_multi_func_cleanup(dev);
4076         mlx4_close_hca(dev);
4077         mlx4_close_fw(dev);
4078         if (mlx4_is_slave(dev))
4079                 mlx4_multi_func_cleanup(dev);
4080         mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL);
4081
4082         if (dev->flags & MLX4_FLAG_MSI_X)
4083                 pci_disable_msix(pdev);
4084
4085         if (!mlx4_is_slave(dev))
4086                 mlx4_free_ownership(dev);
4087
4088         mlx4_slave_destroy_special_qp_cap(dev);
4089         kfree(dev->dev_vfs);
4090
4091         mlx4_clean_dev(dev);
4092         priv->pci_dev_data = pci_dev_data;
4093         priv->removed = 1;
4094 }
4095
4096 static void mlx4_remove_one(struct pci_dev *pdev)
4097 {
4098         struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
4099         struct mlx4_dev  *dev  = persist->dev;
4100         struct mlx4_priv *priv = mlx4_priv(dev);
4101         struct devlink *devlink = priv_to_devlink(priv);
4102         int active_vfs = 0;
4103
4104         if (mlx4_is_slave(dev))
4105                 persist->interface_state |= MLX4_INTERFACE_STATE_NOWAIT;
4106
4107         mutex_lock(&persist->interface_state_mutex);
4108         persist->interface_state |= MLX4_INTERFACE_STATE_DELETION;
4109         mutex_unlock(&persist->interface_state_mutex);
4110
4111         /* Disabling SR-IOV is not allowed while there are active vf's */
4112         if (mlx4_is_master(dev) && dev->flags & MLX4_FLAG_SRIOV) {
4113                 active_vfs = mlx4_how_many_lives_vf(dev);
4114                 if (active_vfs) {
4115                         pr_warn("Removing PF when there are active VF's !!\n");
4116                         pr_warn("Will not disable SR-IOV.\n");
4117                 }
4118         }
4119
4120         /* device marked to be under deletion running now without the lock
4121          * letting other tasks to be terminated
4122          */
4123         if (persist->interface_state & MLX4_INTERFACE_STATE_UP)
4124                 mlx4_unload_one(pdev);
4125         else
4126                 mlx4_info(dev, "%s: interface is down\n", __func__);
4127         mlx4_catas_end(dev);
4128         mlx4_crdump_end(dev);
4129         if (dev->flags & MLX4_FLAG_SRIOV && !active_vfs) {
4130                 mlx4_warn(dev, "Disabling SR-IOV\n");
4131                 pci_disable_sriov(pdev);
4132         }
4133
4134         pci_release_regions(pdev);
4135         mlx4_pci_disable_device(dev);
4136         devlink_params_unregister(devlink, mlx4_devlink_params,
4137                                   ARRAY_SIZE(mlx4_devlink_params));
4138         devlink_unregister(devlink);
4139         kfree(dev->persist);
4140         devlink_free(devlink);
4141 }
4142
4143 static int restore_current_port_types(struct mlx4_dev *dev,
4144                                       enum mlx4_port_type *types,
4145                                       enum mlx4_port_type *poss_types)
4146 {
4147         struct mlx4_priv *priv = mlx4_priv(dev);
4148         int err, i;
4149
4150         mlx4_stop_sense(dev);
4151
4152         mutex_lock(&priv->port_mutex);
4153         for (i = 0; i < dev->caps.num_ports; i++)
4154                 dev->caps.possible_type[i + 1] = poss_types[i];
4155         err = mlx4_change_port_types(dev, types);
4156         mlx4_start_sense(dev);
4157         mutex_unlock(&priv->port_mutex);
4158
4159         return err;
4160 }
4161
4162 int mlx4_restart_one(struct pci_dev *pdev, bool reload, struct devlink *devlink)
4163 {
4164         struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
4165         struct mlx4_dev  *dev  = persist->dev;
4166         struct mlx4_priv *priv = mlx4_priv(dev);
4167         int nvfs[MLX4_MAX_PORTS + 1] = {0, 0, 0};
4168         int pci_dev_data, err, total_vfs;
4169
4170         pci_dev_data = priv->pci_dev_data;
4171         total_vfs = dev->persist->num_vfs;
4172         memcpy(nvfs, dev->persist->nvfs, sizeof(dev->persist->nvfs));
4173
4174         mlx4_unload_one(pdev);
4175         if (reload)
4176                 mlx4_devlink_param_load_driverinit_values(devlink);
4177         err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv, 1);
4178         if (err) {
4179                 mlx4_err(dev, "%s: ERROR: mlx4_load_one failed, pci_name=%s, err=%d\n",
4180                          __func__, pci_name(pdev), err);
4181                 return err;
4182         }
4183
4184         err = restore_current_port_types(dev, dev->persist->curr_port_type,
4185                                          dev->persist->curr_port_poss_type);
4186         if (err)
4187                 mlx4_err(dev, "could not restore original port types (%d)\n",
4188                          err);
4189
4190         return err;
4191 }
4192
4193 #define MLX_SP(id) { PCI_VDEVICE(MELLANOX, id), MLX4_PCI_DEV_FORCE_SENSE_PORT }
4194 #define MLX_VF(id) { PCI_VDEVICE(MELLANOX, id), MLX4_PCI_DEV_IS_VF }
4195 #define MLX_GN(id) { PCI_VDEVICE(MELLANOX, id), 0 }
4196
4197 static const struct pci_device_id mlx4_pci_table[] = {
4198 #ifdef CONFIG_MLX4_CORE_GEN2
4199         /* MT25408 "Hermon" */
4200         MLX_SP(PCI_DEVICE_ID_MELLANOX_HERMON_SDR),      /* SDR */
4201         MLX_SP(PCI_DEVICE_ID_MELLANOX_HERMON_DDR),      /* DDR */
4202         MLX_SP(PCI_DEVICE_ID_MELLANOX_HERMON_QDR),      /* QDR */
4203         MLX_SP(PCI_DEVICE_ID_MELLANOX_HERMON_DDR_GEN2), /* DDR Gen2 */
4204         MLX_SP(PCI_DEVICE_ID_MELLANOX_HERMON_QDR_GEN2), /* QDR Gen2 */
4205         MLX_SP(PCI_DEVICE_ID_MELLANOX_HERMON_EN),       /* EN 10GigE */
4206         MLX_SP(PCI_DEVICE_ID_MELLANOX_HERMON_EN_GEN2),  /* EN 10GigE Gen2 */
4207         /* MT25458 ConnectX EN 10GBASE-T */
4208         MLX_SP(PCI_DEVICE_ID_MELLANOX_CONNECTX_EN),
4209         MLX_SP(PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_T_GEN2),      /* Gen2 */
4210         /* MT26468 ConnectX EN 10GigE PCIe Gen2*/
4211         MLX_SP(PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_GEN2),
4212         /* MT26438 ConnectX EN 40GigE PCIe Gen2 5GT/s */
4213         MLX_SP(PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_5_GEN2),
4214         /* MT26478 ConnectX2 40GigE PCIe Gen2 */
4215         MLX_SP(PCI_DEVICE_ID_MELLANOX_CONNECTX2),
4216         /* MT25400 Family [ConnectX-2] */
4217         MLX_VF(0x1002),                                 /* Virtual Function */
4218 #endif /* CONFIG_MLX4_CORE_GEN2 */
4219         /* MT27500 Family [ConnectX-3] */
4220         MLX_GN(PCI_DEVICE_ID_MELLANOX_CONNECTX3),
4221         MLX_VF(0x1004),                                 /* Virtual Function */
4222         MLX_GN(0x1005),                                 /* MT27510 Family */
4223         MLX_GN(0x1006),                                 /* MT27511 Family */
4224         MLX_GN(PCI_DEVICE_ID_MELLANOX_CONNECTX3_PRO),   /* MT27520 Family */
4225         MLX_GN(0x1008),                                 /* MT27521 Family */
4226         MLX_GN(0x1009),                                 /* MT27530 Family */
4227         MLX_GN(0x100a),                                 /* MT27531 Family */
4228         MLX_GN(0x100b),                                 /* MT27540 Family */
4229         MLX_GN(0x100c),                                 /* MT27541 Family */
4230         MLX_GN(0x100d),                                 /* MT27550 Family */
4231         MLX_GN(0x100e),                                 /* MT27551 Family */
4232         MLX_GN(0x100f),                                 /* MT27560 Family */
4233         MLX_GN(0x1010),                                 /* MT27561 Family */
4234
4235         /*
4236          * See the mellanox_check_broken_intx_masking() quirk when
4237          * adding devices
4238          */
4239
4240         { 0, }
4241 };
4242
4243 MODULE_DEVICE_TABLE(pci, mlx4_pci_table);
4244
4245 static pci_ers_result_t mlx4_pci_err_detected(struct pci_dev *pdev,
4246                                               pci_channel_state_t state)
4247 {
4248         struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
4249
4250         mlx4_err(persist->dev, "mlx4_pci_err_detected was called\n");
4251         mlx4_enter_error_state(persist);
4252
4253         mutex_lock(&persist->interface_state_mutex);
4254         if (persist->interface_state & MLX4_INTERFACE_STATE_UP)
4255                 mlx4_unload_one(pdev);
4256
4257         mutex_unlock(&persist->interface_state_mutex);
4258         if (state == pci_channel_io_perm_failure)
4259                 return PCI_ERS_RESULT_DISCONNECT;
4260
4261         mlx4_pci_disable_device(persist->dev);
4262         return PCI_ERS_RESULT_NEED_RESET;
4263 }
4264
4265 static pci_ers_result_t mlx4_pci_slot_reset(struct pci_dev *pdev)
4266 {
4267         struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
4268         struct mlx4_dev  *dev  = persist->dev;
4269         int err;
4270
4271         mlx4_err(dev, "mlx4_pci_slot_reset was called\n");
4272         err = mlx4_pci_enable_device(dev);
4273         if (err) {
4274                 mlx4_err(dev, "Can not re-enable device, err=%d\n", err);
4275                 return PCI_ERS_RESULT_DISCONNECT;
4276         }
4277
4278         pci_set_master(pdev);
4279         pci_restore_state(pdev);
4280         pci_save_state(pdev);
4281         return PCI_ERS_RESULT_RECOVERED;
4282 }
4283
4284 static void mlx4_pci_resume(struct pci_dev *pdev)
4285 {
4286         struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
4287         struct mlx4_dev  *dev  = persist->dev;
4288         struct mlx4_priv *priv = mlx4_priv(dev);
4289         int nvfs[MLX4_MAX_PORTS + 1] = {0, 0, 0};
4290         int total_vfs;
4291         int err;
4292
4293         mlx4_err(dev, "%s was called\n", __func__);
4294         total_vfs = dev->persist->num_vfs;
4295         memcpy(nvfs, dev->persist->nvfs, sizeof(dev->persist->nvfs));
4296
4297         mutex_lock(&persist->interface_state_mutex);
4298         if (!(persist->interface_state & MLX4_INTERFACE_STATE_UP)) {
4299                 err = mlx4_load_one(pdev, priv->pci_dev_data, total_vfs, nvfs,
4300                                     priv, 1);
4301                 if (err) {
4302                         mlx4_err(dev, "%s: mlx4_load_one failed, err=%d\n",
4303                                  __func__,  err);
4304                         goto end;
4305                 }
4306
4307                 err = restore_current_port_types(dev, dev->persist->
4308                                                  curr_port_type, dev->persist->
4309                                                  curr_port_poss_type);
4310                 if (err)
4311                         mlx4_err(dev, "could not restore original port types (%d)\n", err);
4312         }
4313 end:
4314         mutex_unlock(&persist->interface_state_mutex);
4315
4316 }
4317
4318 static void mlx4_shutdown(struct pci_dev *pdev)
4319 {
4320         struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
4321         struct mlx4_dev *dev = persist->dev;
4322
4323         mlx4_info(persist->dev, "mlx4_shutdown was called\n");
4324         mutex_lock(&persist->interface_state_mutex);
4325         if (persist->interface_state & MLX4_INTERFACE_STATE_UP)
4326                 mlx4_unload_one(pdev);
4327         mutex_unlock(&persist->interface_state_mutex);
4328         mlx4_pci_disable_device(dev);
4329 }
4330
4331 static const struct pci_error_handlers mlx4_err_handler = {
4332         .error_detected = mlx4_pci_err_detected,
4333         .slot_reset     = mlx4_pci_slot_reset,
4334         .resume         = mlx4_pci_resume,
4335 };
4336
4337 static int mlx4_suspend(struct pci_dev *pdev, pm_message_t state)
4338 {
4339         struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
4340         struct mlx4_dev *dev = persist->dev;
4341
4342         mlx4_err(dev, "suspend was called\n");
4343         mutex_lock(&persist->interface_state_mutex);
4344         if (persist->interface_state & MLX4_INTERFACE_STATE_UP)
4345                 mlx4_unload_one(pdev);
4346         mutex_unlock(&persist->interface_state_mutex);
4347
4348         return 0;
4349 }
4350
4351 static int mlx4_resume(struct pci_dev *pdev)
4352 {
4353         struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
4354         struct mlx4_dev *dev = persist->dev;
4355         struct mlx4_priv *priv = mlx4_priv(dev);
4356         int nvfs[MLX4_MAX_PORTS + 1] = {0, 0, 0};
4357         int total_vfs;
4358         int ret = 0;
4359
4360         mlx4_err(dev, "resume was called\n");
4361         total_vfs = dev->persist->num_vfs;
4362         memcpy(nvfs, dev->persist->nvfs, sizeof(dev->persist->nvfs));
4363
4364         mutex_lock(&persist->interface_state_mutex);
4365         if (!(persist->interface_state & MLX4_INTERFACE_STATE_UP)) {
4366                 ret = mlx4_load_one(pdev, priv->pci_dev_data, total_vfs,
4367                                     nvfs, priv, 1);
4368                 if (!ret) {
4369                         ret = restore_current_port_types(dev,
4370                                         dev->persist->curr_port_type,
4371                                         dev->persist->curr_port_poss_type);
4372                         if (ret)
4373                                 mlx4_err(dev, "resume: could not restore original port types (%d)\n", ret);
4374                 }
4375         }
4376         mutex_unlock(&persist->interface_state_mutex);
4377
4378         return ret;
4379 }
4380
4381 static struct pci_driver mlx4_driver = {
4382         .name           = DRV_NAME,
4383         .id_table       = mlx4_pci_table,
4384         .probe          = mlx4_init_one,
4385         .shutdown       = mlx4_shutdown,
4386         .remove         = mlx4_remove_one,
4387         .suspend        = mlx4_suspend,
4388         .resume         = mlx4_resume,
4389         .err_handler    = &mlx4_err_handler,
4390 };
4391
4392 static int __init mlx4_verify_params(void)
4393 {
4394         if (msi_x < 0) {
4395                 pr_warn("mlx4_core: bad msi_x: %d\n", msi_x);
4396                 return -1;
4397         }
4398
4399         if ((log_num_mac < 0) || (log_num_mac > 7)) {
4400                 pr_warn("mlx4_core: bad num_mac: %d\n", log_num_mac);
4401                 return -1;
4402         }
4403
4404         if (log_num_vlan != 0)
4405                 pr_warn("mlx4_core: log_num_vlan - obsolete module param, using %d\n",
4406                         MLX4_LOG_NUM_VLANS);
4407
4408         if (use_prio != 0)
4409                 pr_warn("mlx4_core: use_prio - obsolete module param, ignored\n");
4410
4411         if ((log_mtts_per_seg < 0) || (log_mtts_per_seg > 7)) {
4412                 pr_warn("mlx4_core: bad log_mtts_per_seg: %d\n",
4413                         log_mtts_per_seg);
4414                 return -1;
4415         }
4416
4417         /* Check if module param for ports type has legal combination */
4418         if (port_type_array[0] == false && port_type_array[1] == true) {
4419                 pr_warn("Module parameter configuration ETH/IB is not supported. Switching to default configuration IB/IB\n");
4420                 port_type_array[0] = true;
4421         }
4422
4423         if (mlx4_log_num_mgm_entry_size < -7 ||
4424             (mlx4_log_num_mgm_entry_size > 0 &&
4425              (mlx4_log_num_mgm_entry_size < MLX4_MIN_MGM_LOG_ENTRY_SIZE ||
4426               mlx4_log_num_mgm_entry_size > MLX4_MAX_MGM_LOG_ENTRY_SIZE))) {
4427                 pr_warn("mlx4_core: mlx4_log_num_mgm_entry_size (%d) not in legal range (-7..0 or %d..%d)\n",
4428                         mlx4_log_num_mgm_entry_size,
4429                         MLX4_MIN_MGM_LOG_ENTRY_SIZE,
4430                         MLX4_MAX_MGM_LOG_ENTRY_SIZE);
4431                 return -1;
4432         }
4433
4434         return 0;
4435 }
4436
4437 static int __init mlx4_init(void)
4438 {
4439         int ret;
4440
4441         if (mlx4_verify_params())
4442                 return -EINVAL;
4443
4444
4445         mlx4_wq = create_singlethread_workqueue("mlx4");
4446         if (!mlx4_wq)
4447                 return -ENOMEM;
4448
4449         ret = pci_register_driver(&mlx4_driver);
4450         if (ret < 0)
4451                 destroy_workqueue(mlx4_wq);
4452         return ret < 0 ? ret : 0;
4453 }
4454
4455 static void __exit mlx4_cleanup(void)
4456 {
4457         pci_unregister_driver(&mlx4_driver);
4458         destroy_workqueue(mlx4_wq);
4459 }
4460
4461 module_init(mlx4_init);
4462 module_exit(mlx4_cleanup);