GNU Linux-libre 4.14.290-gnu1
[releases.git] / drivers / net / ethernet / emulex / benet / be_main.c
1 /*
2  * Copyright (C) 2005 - 2016 Broadcom
3  * All rights reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License version 2
7  * as published by the Free Software Foundation.  The full GNU General
8  * Public License is included in this distribution in the file called COPYING.
9  *
10  * Contact Information:
11  * linux-drivers@emulex.com
12  *
13  * Emulex
14  * 3333 Susan Street
15  * Costa Mesa, CA 92626
16  */
17
18 #include <linux/prefetch.h>
19 #include <linux/module.h>
20 #include "be.h"
21 #include "be_cmds.h"
22 #include <asm/div64.h>
23 #include <linux/aer.h>
24 #include <linux/if_bridge.h>
25 #include <net/busy_poll.h>
26 #include <net/vxlan.h>
27
28 MODULE_VERSION(DRV_VER);
29 MODULE_DESCRIPTION(DRV_DESC " " DRV_VER);
30 MODULE_AUTHOR("Emulex Corporation");
31 MODULE_LICENSE("GPL");
32
33 /* num_vfs module param is obsolete.
34  * Use sysfs method to enable/disable VFs.
35  */
36 static unsigned int num_vfs;
37 module_param(num_vfs, uint, S_IRUGO);
38 MODULE_PARM_DESC(num_vfs, "Number of PCI VFs to initialize");
39
40 static ushort rx_frag_size = 2048;
41 module_param(rx_frag_size, ushort, S_IRUGO);
42 MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data.");
43
44 /* Per-module error detection/recovery workq shared across all functions.
45  * Each function schedules its own work request on this shared workq.
46  */
47 static struct workqueue_struct *be_err_recovery_workq;
48
49 static const struct pci_device_id be_dev_ids[] = {
50         { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID1) },
51         { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID2) },
52         { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID1) },
53         { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID2) },
54         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID3)},
55         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID4)},
56         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID5)},
57         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID6)},
58         { 0 }
59 };
60 MODULE_DEVICE_TABLE(pci, be_dev_ids);
61
62 /* Workqueue used by all functions for defering cmd calls to the adapter */
63 static struct workqueue_struct *be_wq;
64
65 /* UE Status Low CSR */
66 static const char * const ue_status_low_desc[] = {
67         "CEV",
68         "CTX",
69         "DBUF",
70         "ERX",
71         "Host",
72         "MPU",
73         "NDMA",
74         "PTC ",
75         "RDMA ",
76         "RXF ",
77         "RXIPS ",
78         "RXULP0 ",
79         "RXULP1 ",
80         "RXULP2 ",
81         "TIM ",
82         "TPOST ",
83         "TPRE ",
84         "TXIPS ",
85         "TXULP0 ",
86         "TXULP1 ",
87         "UC ",
88         "WDMA ",
89         "TXULP2 ",
90         "HOST1 ",
91         "P0_OB_LINK ",
92         "P1_OB_LINK ",
93         "HOST_GPIO ",
94         "MBOX ",
95         "ERX2 ",
96         "SPARE ",
97         "JTAG ",
98         "MPU_INTPEND "
99 };
100
101 /* UE Status High CSR */
102 static const char * const ue_status_hi_desc[] = {
103         "LPCMEMHOST",
104         "MGMT_MAC",
105         "PCS0ONLINE",
106         "MPU_IRAM",
107         "PCS1ONLINE",
108         "PCTL0",
109         "PCTL1",
110         "PMEM",
111         "RR",
112         "TXPB",
113         "RXPP",
114         "XAUI",
115         "TXP",
116         "ARM",
117         "IPC",
118         "HOST2",
119         "HOST3",
120         "HOST4",
121         "HOST5",
122         "HOST6",
123         "HOST7",
124         "ECRC",
125         "Poison TLP",
126         "NETC",
127         "PERIPH",
128         "LLTXULP",
129         "D2P",
130         "RCON",
131         "LDMA",
132         "LLTXP",
133         "LLTXPB",
134         "Unknown"
135 };
136
137 #define BE_VF_IF_EN_FLAGS       (BE_IF_FLAGS_UNTAGGED | \
138                                  BE_IF_FLAGS_BROADCAST | \
139                                  BE_IF_FLAGS_MULTICAST | \
140                                  BE_IF_FLAGS_PASS_L3L4_ERRORS)
141
142 static void be_queue_free(struct be_adapter *adapter, struct be_queue_info *q)
143 {
144         struct be_dma_mem *mem = &q->dma_mem;
145
146         if (mem->va) {
147                 dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va,
148                                   mem->dma);
149                 mem->va = NULL;
150         }
151 }
152
153 static int be_queue_alloc(struct be_adapter *adapter, struct be_queue_info *q,
154                           u16 len, u16 entry_size)
155 {
156         struct be_dma_mem *mem = &q->dma_mem;
157
158         memset(q, 0, sizeof(*q));
159         q->len = len;
160         q->entry_size = entry_size;
161         mem->size = len * entry_size;
162         mem->va = dma_zalloc_coherent(&adapter->pdev->dev, mem->size, &mem->dma,
163                                       GFP_KERNEL);
164         if (!mem->va)
165                 return -ENOMEM;
166         return 0;
167 }
168
169 static void be_reg_intr_set(struct be_adapter *adapter, bool enable)
170 {
171         u32 reg, enabled;
172
173         pci_read_config_dword(adapter->pdev, PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET,
174                               &reg);
175         enabled = reg & MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
176
177         if (!enabled && enable)
178                 reg |= MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
179         else if (enabled && !enable)
180                 reg &= ~MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
181         else
182                 return;
183
184         pci_write_config_dword(adapter->pdev,
185                                PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET, reg);
186 }
187
188 static void be_intr_set(struct be_adapter *adapter, bool enable)
189 {
190         int status = 0;
191
192         /* On lancer interrupts can't be controlled via this register */
193         if (lancer_chip(adapter))
194                 return;
195
196         if (be_check_error(adapter, BE_ERROR_EEH))
197                 return;
198
199         status = be_cmd_intr_set(adapter, enable);
200         if (status)
201                 be_reg_intr_set(adapter, enable);
202 }
203
204 static void be_rxq_notify(struct be_adapter *adapter, u16 qid, u16 posted)
205 {
206         u32 val = 0;
207
208         if (be_check_error(adapter, BE_ERROR_HW))
209                 return;
210
211         val |= qid & DB_RQ_RING_ID_MASK;
212         val |= posted << DB_RQ_NUM_POSTED_SHIFT;
213
214         wmb();
215         iowrite32(val, adapter->db + DB_RQ_OFFSET);
216 }
217
218 static void be_txq_notify(struct be_adapter *adapter, struct be_tx_obj *txo,
219                           u16 posted)
220 {
221         u32 val = 0;
222
223         if (be_check_error(adapter, BE_ERROR_HW))
224                 return;
225
226         val |= txo->q.id & DB_TXULP_RING_ID_MASK;
227         val |= (posted & DB_TXULP_NUM_POSTED_MASK) << DB_TXULP_NUM_POSTED_SHIFT;
228
229         wmb();
230         iowrite32(val, adapter->db + txo->db_offset);
231 }
232
233 static void be_eq_notify(struct be_adapter *adapter, u16 qid,
234                          bool arm, bool clear_int, u16 num_popped,
235                          u32 eq_delay_mult_enc)
236 {
237         u32 val = 0;
238
239         val |= qid & DB_EQ_RING_ID_MASK;
240         val |= ((qid & DB_EQ_RING_ID_EXT_MASK) << DB_EQ_RING_ID_EXT_MASK_SHIFT);
241
242         if (be_check_error(adapter, BE_ERROR_HW))
243                 return;
244
245         if (arm)
246                 val |= 1 << DB_EQ_REARM_SHIFT;
247         if (clear_int)
248                 val |= 1 << DB_EQ_CLR_SHIFT;
249         val |= 1 << DB_EQ_EVNT_SHIFT;
250         val |= num_popped << DB_EQ_NUM_POPPED_SHIFT;
251         val |= eq_delay_mult_enc << DB_EQ_R2I_DLY_SHIFT;
252         iowrite32(val, adapter->db + DB_EQ_OFFSET);
253 }
254
255 void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped)
256 {
257         u32 val = 0;
258
259         val |= qid & DB_CQ_RING_ID_MASK;
260         val |= ((qid & DB_CQ_RING_ID_EXT_MASK) <<
261                         DB_CQ_RING_ID_EXT_MASK_SHIFT);
262
263         if (be_check_error(adapter, BE_ERROR_HW))
264                 return;
265
266         if (arm)
267                 val |= 1 << DB_CQ_REARM_SHIFT;
268         val |= num_popped << DB_CQ_NUM_POPPED_SHIFT;
269         iowrite32(val, adapter->db + DB_CQ_OFFSET);
270 }
271
272 static int be_dev_mac_add(struct be_adapter *adapter, u8 *mac)
273 {
274         int i;
275
276         /* Check if mac has already been added as part of uc-list */
277         for (i = 0; i < adapter->uc_macs; i++) {
278                 if (ether_addr_equal(adapter->uc_list[i].mac, mac)) {
279                         /* mac already added, skip addition */
280                         adapter->pmac_id[0] = adapter->pmac_id[i + 1];
281                         return 0;
282                 }
283         }
284
285         return be_cmd_pmac_add(adapter, mac, adapter->if_handle,
286                                &adapter->pmac_id[0], 0);
287 }
288
289 static void be_dev_mac_del(struct be_adapter *adapter, int pmac_id)
290 {
291         int i;
292
293         /* Skip deletion if the programmed mac is
294          * being used in uc-list
295          */
296         for (i = 0; i < adapter->uc_macs; i++) {
297                 if (adapter->pmac_id[i + 1] == pmac_id)
298                         return;
299         }
300         be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
301 }
302
303 static int be_mac_addr_set(struct net_device *netdev, void *p)
304 {
305         struct be_adapter *adapter = netdev_priv(netdev);
306         struct device *dev = &adapter->pdev->dev;
307         struct sockaddr *addr = p;
308         int status;
309         u8 mac[ETH_ALEN];
310         u32 old_pmac_id = adapter->pmac_id[0];
311
312         if (!is_valid_ether_addr(addr->sa_data))
313                 return -EADDRNOTAVAIL;
314
315         /* Proceed further only if, User provided MAC is different
316          * from active MAC
317          */
318         if (ether_addr_equal(addr->sa_data, adapter->dev_mac))
319                 return 0;
320
321         /* BE3 VFs without FILTMGMT privilege are not allowed to set its MAC
322          * address
323          */
324         if (BEx_chip(adapter) && be_virtfn(adapter) &&
325             !check_privilege(adapter, BE_PRIV_FILTMGMT))
326                 return -EPERM;
327
328         /* if device is not running, copy MAC to netdev->dev_addr */
329         if (!netif_running(netdev))
330                 goto done;
331
332         /* The PMAC_ADD cmd may fail if the VF doesn't have FILTMGMT
333          * privilege or if PF did not provision the new MAC address.
334          * On BE3, this cmd will always fail if the VF doesn't have the
335          * FILTMGMT privilege. This failure is OK, only if the PF programmed
336          * the MAC for the VF.
337          */
338         mutex_lock(&adapter->rx_filter_lock);
339         status = be_dev_mac_add(adapter, (u8 *)addr->sa_data);
340         if (!status) {
341
342                 /* Delete the old programmed MAC. This call may fail if the
343                  * old MAC was already deleted by the PF driver.
344                  */
345                 if (adapter->pmac_id[0] != old_pmac_id)
346                         be_dev_mac_del(adapter, old_pmac_id);
347         }
348
349         mutex_unlock(&adapter->rx_filter_lock);
350         /* Decide if the new MAC is successfully activated only after
351          * querying the FW
352          */
353         status = be_cmd_get_active_mac(adapter, adapter->pmac_id[0], mac,
354                                        adapter->if_handle, true, 0);
355         if (status)
356                 goto err;
357
358         /* The MAC change did not happen, either due to lack of privilege
359          * or PF didn't pre-provision.
360          */
361         if (!ether_addr_equal(addr->sa_data, mac)) {
362                 status = -EPERM;
363                 goto err;
364         }
365
366         /* Remember currently programmed MAC */
367         ether_addr_copy(adapter->dev_mac, addr->sa_data);
368 done:
369         ether_addr_copy(netdev->dev_addr, addr->sa_data);
370         dev_info(dev, "MAC address changed to %pM\n", addr->sa_data);
371         return 0;
372 err:
373         dev_warn(dev, "MAC address change to %pM failed\n", addr->sa_data);
374         return status;
375 }
376
377 /* BE2 supports only v0 cmd */
378 static void *hw_stats_from_cmd(struct be_adapter *adapter)
379 {
380         if (BE2_chip(adapter)) {
381                 struct be_cmd_resp_get_stats_v0 *cmd = adapter->stats_cmd.va;
382
383                 return &cmd->hw_stats;
384         } else if (BE3_chip(adapter)) {
385                 struct be_cmd_resp_get_stats_v1 *cmd = adapter->stats_cmd.va;
386
387                 return &cmd->hw_stats;
388         } else {
389                 struct be_cmd_resp_get_stats_v2 *cmd = adapter->stats_cmd.va;
390
391                 return &cmd->hw_stats;
392         }
393 }
394
395 /* BE2 supports only v0 cmd */
396 static void *be_erx_stats_from_cmd(struct be_adapter *adapter)
397 {
398         if (BE2_chip(adapter)) {
399                 struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
400
401                 return &hw_stats->erx;
402         } else if (BE3_chip(adapter)) {
403                 struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
404
405                 return &hw_stats->erx;
406         } else {
407                 struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
408
409                 return &hw_stats->erx;
410         }
411 }
412
413 static void populate_be_v0_stats(struct be_adapter *adapter)
414 {
415         struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
416         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
417         struct be_rxf_stats_v0 *rxf_stats = &hw_stats->rxf;
418         struct be_port_rxf_stats_v0 *port_stats =
419                                         &rxf_stats->port[adapter->port_num];
420         struct be_drv_stats *drvs = &adapter->drv_stats;
421
422         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
423         drvs->rx_pause_frames = port_stats->rx_pause_frames;
424         drvs->rx_crc_errors = port_stats->rx_crc_errors;
425         drvs->rx_control_frames = port_stats->rx_control_frames;
426         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
427         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
428         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
429         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
430         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
431         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
432         drvs->rxpp_fifo_overflow_drop = port_stats->rx_fifo_overflow;
433         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
434         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
435         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
436         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
437         drvs->rx_input_fifo_overflow_drop = port_stats->rx_input_fifo_overflow;
438         drvs->rx_dropped_header_too_small =
439                 port_stats->rx_dropped_header_too_small;
440         drvs->rx_address_filtered =
441                                         port_stats->rx_address_filtered +
442                                         port_stats->rx_vlan_filtered;
443         drvs->rx_alignment_symbol_errors =
444                 port_stats->rx_alignment_symbol_errors;
445
446         drvs->tx_pauseframes = port_stats->tx_pauseframes;
447         drvs->tx_controlframes = port_stats->tx_controlframes;
448
449         if (adapter->port_num)
450                 drvs->jabber_events = rxf_stats->port1_jabber_events;
451         else
452                 drvs->jabber_events = rxf_stats->port0_jabber_events;
453         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
454         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
455         drvs->forwarded_packets = rxf_stats->forwarded_packets;
456         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
457         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
458         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
459         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
460 }
461
462 static void populate_be_v1_stats(struct be_adapter *adapter)
463 {
464         struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
465         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
466         struct be_rxf_stats_v1 *rxf_stats = &hw_stats->rxf;
467         struct be_port_rxf_stats_v1 *port_stats =
468                                         &rxf_stats->port[adapter->port_num];
469         struct be_drv_stats *drvs = &adapter->drv_stats;
470
471         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
472         drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
473         drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
474         drvs->rx_pause_frames = port_stats->rx_pause_frames;
475         drvs->rx_crc_errors = port_stats->rx_crc_errors;
476         drvs->rx_control_frames = port_stats->rx_control_frames;
477         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
478         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
479         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
480         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
481         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
482         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
483         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
484         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
485         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
486         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
487         drvs->rx_dropped_header_too_small =
488                 port_stats->rx_dropped_header_too_small;
489         drvs->rx_input_fifo_overflow_drop =
490                 port_stats->rx_input_fifo_overflow_drop;
491         drvs->rx_address_filtered = port_stats->rx_address_filtered;
492         drvs->rx_alignment_symbol_errors =
493                 port_stats->rx_alignment_symbol_errors;
494         drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
495         drvs->tx_pauseframes = port_stats->tx_pauseframes;
496         drvs->tx_controlframes = port_stats->tx_controlframes;
497         drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
498         drvs->jabber_events = port_stats->jabber_events;
499         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
500         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
501         drvs->forwarded_packets = rxf_stats->forwarded_packets;
502         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
503         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
504         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
505         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
506 }
507
508 static void populate_be_v2_stats(struct be_adapter *adapter)
509 {
510         struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
511         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
512         struct be_rxf_stats_v2 *rxf_stats = &hw_stats->rxf;
513         struct be_port_rxf_stats_v2 *port_stats =
514                                         &rxf_stats->port[adapter->port_num];
515         struct be_drv_stats *drvs = &adapter->drv_stats;
516
517         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
518         drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
519         drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
520         drvs->rx_pause_frames = port_stats->rx_pause_frames;
521         drvs->rx_crc_errors = port_stats->rx_crc_errors;
522         drvs->rx_control_frames = port_stats->rx_control_frames;
523         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
524         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
525         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
526         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
527         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
528         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
529         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
530         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
531         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
532         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
533         drvs->rx_dropped_header_too_small =
534                 port_stats->rx_dropped_header_too_small;
535         drvs->rx_input_fifo_overflow_drop =
536                 port_stats->rx_input_fifo_overflow_drop;
537         drvs->rx_address_filtered = port_stats->rx_address_filtered;
538         drvs->rx_alignment_symbol_errors =
539                 port_stats->rx_alignment_symbol_errors;
540         drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
541         drvs->tx_pauseframes = port_stats->tx_pauseframes;
542         drvs->tx_controlframes = port_stats->tx_controlframes;
543         drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
544         drvs->jabber_events = port_stats->jabber_events;
545         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
546         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
547         drvs->forwarded_packets = rxf_stats->forwarded_packets;
548         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
549         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
550         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
551         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
552         if (be_roce_supported(adapter)) {
553                 drvs->rx_roce_bytes_lsd = port_stats->roce_bytes_received_lsd;
554                 drvs->rx_roce_bytes_msd = port_stats->roce_bytes_received_msd;
555                 drvs->rx_roce_frames = port_stats->roce_frames_received;
556                 drvs->roce_drops_crc = port_stats->roce_drops_crc;
557                 drvs->roce_drops_payload_len =
558                         port_stats->roce_drops_payload_len;
559         }
560 }
561
562 static void populate_lancer_stats(struct be_adapter *adapter)
563 {
564         struct be_drv_stats *drvs = &adapter->drv_stats;
565         struct lancer_pport_stats *pport_stats = pport_stats_from_cmd(adapter);
566
567         be_dws_le_to_cpu(pport_stats, sizeof(*pport_stats));
568         drvs->rx_pause_frames = pport_stats->rx_pause_frames_lo;
569         drvs->rx_crc_errors = pport_stats->rx_crc_errors_lo;
570         drvs->rx_control_frames = pport_stats->rx_control_frames_lo;
571         drvs->rx_in_range_errors = pport_stats->rx_in_range_errors;
572         drvs->rx_frame_too_long = pport_stats->rx_frames_too_long_lo;
573         drvs->rx_dropped_runt = pport_stats->rx_dropped_runt;
574         drvs->rx_ip_checksum_errs = pport_stats->rx_ip_checksum_errors;
575         drvs->rx_tcp_checksum_errs = pport_stats->rx_tcp_checksum_errors;
576         drvs->rx_udp_checksum_errs = pport_stats->rx_udp_checksum_errors;
577         drvs->rx_dropped_tcp_length =
578                                 pport_stats->rx_dropped_invalid_tcp_length;
579         drvs->rx_dropped_too_small = pport_stats->rx_dropped_too_small;
580         drvs->rx_dropped_too_short = pport_stats->rx_dropped_too_short;
581         drvs->rx_out_range_errors = pport_stats->rx_out_of_range_errors;
582         drvs->rx_dropped_header_too_small =
583                                 pport_stats->rx_dropped_header_too_small;
584         drvs->rx_input_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
585         drvs->rx_address_filtered =
586                                         pport_stats->rx_address_filtered +
587                                         pport_stats->rx_vlan_filtered;
588         drvs->rx_alignment_symbol_errors = pport_stats->rx_symbol_errors_lo;
589         drvs->rxpp_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
590         drvs->tx_pauseframes = pport_stats->tx_pause_frames_lo;
591         drvs->tx_controlframes = pport_stats->tx_control_frames_lo;
592         drvs->jabber_events = pport_stats->rx_jabbers;
593         drvs->forwarded_packets = pport_stats->num_forwards_lo;
594         drvs->rx_drops_mtu = pport_stats->rx_drops_mtu_lo;
595         drvs->rx_drops_too_many_frags =
596                                 pport_stats->rx_drops_too_many_frags_lo;
597 }
598
599 static void accumulate_16bit_val(u32 *acc, u16 val)
600 {
601 #define lo(x)                   (x & 0xFFFF)
602 #define hi(x)                   (x & 0xFFFF0000)
603         bool wrapped = val < lo(*acc);
604         u32 newacc = hi(*acc) + val;
605
606         if (wrapped)
607                 newacc += 65536;
608         ACCESS_ONCE(*acc) = newacc;
609 }
610
611 static void populate_erx_stats(struct be_adapter *adapter,
612                                struct be_rx_obj *rxo, u32 erx_stat)
613 {
614         if (!BEx_chip(adapter))
615                 rx_stats(rxo)->rx_drops_no_frags = erx_stat;
616         else
617                 /* below erx HW counter can actually wrap around after
618                  * 65535. Driver accumulates a 32-bit value
619                  */
620                 accumulate_16bit_val(&rx_stats(rxo)->rx_drops_no_frags,
621                                      (u16)erx_stat);
622 }
623
624 void be_parse_stats(struct be_adapter *adapter)
625 {
626         struct be_erx_stats_v2 *erx = be_erx_stats_from_cmd(adapter);
627         struct be_rx_obj *rxo;
628         int i;
629         u32 erx_stat;
630
631         if (lancer_chip(adapter)) {
632                 populate_lancer_stats(adapter);
633         } else {
634                 if (BE2_chip(adapter))
635                         populate_be_v0_stats(adapter);
636                 else if (BE3_chip(adapter))
637                         /* for BE3 */
638                         populate_be_v1_stats(adapter);
639                 else
640                         populate_be_v2_stats(adapter);
641
642                 /* erx_v2 is longer than v0, v1. use v2 for v0, v1 access */
643                 for_all_rx_queues(adapter, rxo, i) {
644                         erx_stat = erx->rx_drops_no_fragments[rxo->q.id];
645                         populate_erx_stats(adapter, rxo, erx_stat);
646                 }
647         }
648 }
649
650 static void be_get_stats64(struct net_device *netdev,
651                            struct rtnl_link_stats64 *stats)
652 {
653         struct be_adapter *adapter = netdev_priv(netdev);
654         struct be_drv_stats *drvs = &adapter->drv_stats;
655         struct be_rx_obj *rxo;
656         struct be_tx_obj *txo;
657         u64 pkts, bytes;
658         unsigned int start;
659         int i;
660
661         for_all_rx_queues(adapter, rxo, i) {
662                 const struct be_rx_stats *rx_stats = rx_stats(rxo);
663
664                 do {
665                         start = u64_stats_fetch_begin_irq(&rx_stats->sync);
666                         pkts = rx_stats(rxo)->rx_pkts;
667                         bytes = rx_stats(rxo)->rx_bytes;
668                 } while (u64_stats_fetch_retry_irq(&rx_stats->sync, start));
669                 stats->rx_packets += pkts;
670                 stats->rx_bytes += bytes;
671                 stats->multicast += rx_stats(rxo)->rx_mcast_pkts;
672                 stats->rx_dropped += rx_stats(rxo)->rx_drops_no_skbs +
673                                         rx_stats(rxo)->rx_drops_no_frags;
674         }
675
676         for_all_tx_queues(adapter, txo, i) {
677                 const struct be_tx_stats *tx_stats = tx_stats(txo);
678
679                 do {
680                         start = u64_stats_fetch_begin_irq(&tx_stats->sync);
681                         pkts = tx_stats(txo)->tx_pkts;
682                         bytes = tx_stats(txo)->tx_bytes;
683                 } while (u64_stats_fetch_retry_irq(&tx_stats->sync, start));
684                 stats->tx_packets += pkts;
685                 stats->tx_bytes += bytes;
686         }
687
688         /* bad pkts received */
689         stats->rx_errors = drvs->rx_crc_errors +
690                 drvs->rx_alignment_symbol_errors +
691                 drvs->rx_in_range_errors +
692                 drvs->rx_out_range_errors +
693                 drvs->rx_frame_too_long +
694                 drvs->rx_dropped_too_small +
695                 drvs->rx_dropped_too_short +
696                 drvs->rx_dropped_header_too_small +
697                 drvs->rx_dropped_tcp_length +
698                 drvs->rx_dropped_runt;
699
700         /* detailed rx errors */
701         stats->rx_length_errors = drvs->rx_in_range_errors +
702                 drvs->rx_out_range_errors +
703                 drvs->rx_frame_too_long;
704
705         stats->rx_crc_errors = drvs->rx_crc_errors;
706
707         /* frame alignment errors */
708         stats->rx_frame_errors = drvs->rx_alignment_symbol_errors;
709
710         /* receiver fifo overrun */
711         /* drops_no_pbuf is no per i/f, it's per BE card */
712         stats->rx_fifo_errors = drvs->rxpp_fifo_overflow_drop +
713                                 drvs->rx_input_fifo_overflow_drop +
714                                 drvs->rx_drops_no_pbuf;
715 }
716
717 void be_link_status_update(struct be_adapter *adapter, u8 link_status)
718 {
719         struct net_device *netdev = adapter->netdev;
720
721         if (!(adapter->flags & BE_FLAGS_LINK_STATUS_INIT)) {
722                 netif_carrier_off(netdev);
723                 adapter->flags |= BE_FLAGS_LINK_STATUS_INIT;
724         }
725
726         if (link_status)
727                 netif_carrier_on(netdev);
728         else
729                 netif_carrier_off(netdev);
730
731         netdev_info(netdev, "Link is %s\n", link_status ? "Up" : "Down");
732 }
733
734 static int be_gso_hdr_len(struct sk_buff *skb)
735 {
736         if (skb->encapsulation)
737                 return skb_inner_transport_offset(skb) +
738                        inner_tcp_hdrlen(skb);
739         return skb_transport_offset(skb) + tcp_hdrlen(skb);
740 }
741
742 static void be_tx_stats_update(struct be_tx_obj *txo, struct sk_buff *skb)
743 {
744         struct be_tx_stats *stats = tx_stats(txo);
745         u32 tx_pkts = skb_shinfo(skb)->gso_segs ? : 1;
746         /* Account for headers which get duplicated in TSO pkt */
747         u32 dup_hdr_len = tx_pkts > 1 ? be_gso_hdr_len(skb) * (tx_pkts - 1) : 0;
748
749         u64_stats_update_begin(&stats->sync);
750         stats->tx_reqs++;
751         stats->tx_bytes += skb->len + dup_hdr_len;
752         stats->tx_pkts += tx_pkts;
753         if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL)
754                 stats->tx_vxlan_offload_pkts += tx_pkts;
755         u64_stats_update_end(&stats->sync);
756 }
757
758 /* Returns number of WRBs needed for the skb */
759 static u32 skb_wrb_cnt(struct sk_buff *skb)
760 {
761         /* +1 for the header wrb */
762         return 1 + (skb_headlen(skb) ? 1 : 0) + skb_shinfo(skb)->nr_frags;
763 }
764
765 static inline void wrb_fill(struct be_eth_wrb *wrb, u64 addr, int len)
766 {
767         wrb->frag_pa_hi = cpu_to_le32(upper_32_bits(addr));
768         wrb->frag_pa_lo = cpu_to_le32(lower_32_bits(addr));
769         wrb->frag_len = cpu_to_le32(len & ETH_WRB_FRAG_LEN_MASK);
770         wrb->rsvd0 = 0;
771 }
772
773 /* A dummy wrb is just all zeros. Using a separate routine for dummy-wrb
774  * to avoid the swap and shift/mask operations in wrb_fill().
775  */
776 static inline void wrb_fill_dummy(struct be_eth_wrb *wrb)
777 {
778         wrb->frag_pa_hi = 0;
779         wrb->frag_pa_lo = 0;
780         wrb->frag_len = 0;
781         wrb->rsvd0 = 0;
782 }
783
784 static inline u16 be_get_tx_vlan_tag(struct be_adapter *adapter,
785                                      struct sk_buff *skb)
786 {
787         u8 vlan_prio;
788         u16 vlan_tag;
789
790         vlan_tag = skb_vlan_tag_get(skb);
791         vlan_prio = (vlan_tag & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
792         /* If vlan priority provided by OS is NOT in available bmap */
793         if (!(adapter->vlan_prio_bmap & (1 << vlan_prio)))
794                 vlan_tag = (vlan_tag & ~VLAN_PRIO_MASK) |
795                                 adapter->recommended_prio_bits;
796
797         return vlan_tag;
798 }
799
800 /* Used only for IP tunnel packets */
801 static u16 skb_inner_ip_proto(struct sk_buff *skb)
802 {
803         return (inner_ip_hdr(skb)->version == 4) ?
804                 inner_ip_hdr(skb)->protocol : inner_ipv6_hdr(skb)->nexthdr;
805 }
806
807 static u16 skb_ip_proto(struct sk_buff *skb)
808 {
809         return (ip_hdr(skb)->version == 4) ?
810                 ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr;
811 }
812
813 static inline bool be_is_txq_full(struct be_tx_obj *txo)
814 {
815         return atomic_read(&txo->q.used) + BE_MAX_TX_FRAG_COUNT >= txo->q.len;
816 }
817
818 static inline bool be_can_txq_wake(struct be_tx_obj *txo)
819 {
820         return atomic_read(&txo->q.used) < txo->q.len / 2;
821 }
822
823 static inline bool be_is_tx_compl_pending(struct be_tx_obj *txo)
824 {
825         return atomic_read(&txo->q.used) > txo->pend_wrb_cnt;
826 }
827
828 static void be_get_wrb_params_from_skb(struct be_adapter *adapter,
829                                        struct sk_buff *skb,
830                                        struct be_wrb_params *wrb_params)
831 {
832         u16 proto;
833
834         if (skb_is_gso(skb)) {
835                 BE_WRB_F_SET(wrb_params->features, LSO, 1);
836                 wrb_params->lso_mss = skb_shinfo(skb)->gso_size;
837                 if (skb_is_gso_v6(skb) && !lancer_chip(adapter))
838                         BE_WRB_F_SET(wrb_params->features, LSO6, 1);
839         } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
840                 if (skb->encapsulation) {
841                         BE_WRB_F_SET(wrb_params->features, IPCS, 1);
842                         proto = skb_inner_ip_proto(skb);
843                 } else {
844                         proto = skb_ip_proto(skb);
845                 }
846                 if (proto == IPPROTO_TCP)
847                         BE_WRB_F_SET(wrb_params->features, TCPCS, 1);
848                 else if (proto == IPPROTO_UDP)
849                         BE_WRB_F_SET(wrb_params->features, UDPCS, 1);
850         }
851
852         if (skb_vlan_tag_present(skb)) {
853                 BE_WRB_F_SET(wrb_params->features, VLAN, 1);
854                 wrb_params->vlan_tag = be_get_tx_vlan_tag(adapter, skb);
855         }
856
857         BE_WRB_F_SET(wrb_params->features, CRC, 1);
858 }
859
860 static void wrb_fill_hdr(struct be_adapter *adapter,
861                          struct be_eth_hdr_wrb *hdr,
862                          struct be_wrb_params *wrb_params,
863                          struct sk_buff *skb)
864 {
865         memset(hdr, 0, sizeof(*hdr));
866
867         SET_TX_WRB_HDR_BITS(crc, hdr,
868                             BE_WRB_F_GET(wrb_params->features, CRC));
869         SET_TX_WRB_HDR_BITS(ipcs, hdr,
870                             BE_WRB_F_GET(wrb_params->features, IPCS));
871         SET_TX_WRB_HDR_BITS(tcpcs, hdr,
872                             BE_WRB_F_GET(wrb_params->features, TCPCS));
873         SET_TX_WRB_HDR_BITS(udpcs, hdr,
874                             BE_WRB_F_GET(wrb_params->features, UDPCS));
875
876         SET_TX_WRB_HDR_BITS(lso, hdr,
877                             BE_WRB_F_GET(wrb_params->features, LSO));
878         SET_TX_WRB_HDR_BITS(lso6, hdr,
879                             BE_WRB_F_GET(wrb_params->features, LSO6));
880         SET_TX_WRB_HDR_BITS(lso_mss, hdr, wrb_params->lso_mss);
881
882         /* Hack to skip HW VLAN tagging needs evt = 1, compl = 0. When this
883          * hack is not needed, the evt bit is set while ringing DB.
884          */
885         SET_TX_WRB_HDR_BITS(event, hdr,
886                             BE_WRB_F_GET(wrb_params->features, VLAN_SKIP_HW));
887         SET_TX_WRB_HDR_BITS(vlan, hdr,
888                             BE_WRB_F_GET(wrb_params->features, VLAN));
889         SET_TX_WRB_HDR_BITS(vlan_tag, hdr, wrb_params->vlan_tag);
890
891         SET_TX_WRB_HDR_BITS(num_wrb, hdr, skb_wrb_cnt(skb));
892         SET_TX_WRB_HDR_BITS(len, hdr, skb->len);
893         SET_TX_WRB_HDR_BITS(mgmt, hdr,
894                             BE_WRB_F_GET(wrb_params->features, OS2BMC));
895 }
896
897 static void unmap_tx_frag(struct device *dev, struct be_eth_wrb *wrb,
898                           bool unmap_single)
899 {
900         dma_addr_t dma;
901         u32 frag_len = le32_to_cpu(wrb->frag_len);
902
903
904         dma = (u64)le32_to_cpu(wrb->frag_pa_hi) << 32 |
905                 (u64)le32_to_cpu(wrb->frag_pa_lo);
906         if (frag_len) {
907                 if (unmap_single)
908                         dma_unmap_single(dev, dma, frag_len, DMA_TO_DEVICE);
909                 else
910                         dma_unmap_page(dev, dma, frag_len, DMA_TO_DEVICE);
911         }
912 }
913
914 /* Grab a WRB header for xmit */
915 static u32 be_tx_get_wrb_hdr(struct be_tx_obj *txo)
916 {
917         u32 head = txo->q.head;
918
919         queue_head_inc(&txo->q);
920         return head;
921 }
922
923 /* Set up the WRB header for xmit */
924 static void be_tx_setup_wrb_hdr(struct be_adapter *adapter,
925                                 struct be_tx_obj *txo,
926                                 struct be_wrb_params *wrb_params,
927                                 struct sk_buff *skb, u16 head)
928 {
929         u32 num_frags = skb_wrb_cnt(skb);
930         struct be_queue_info *txq = &txo->q;
931         struct be_eth_hdr_wrb *hdr = queue_index_node(txq, head);
932
933         wrb_fill_hdr(adapter, hdr, wrb_params, skb);
934         be_dws_cpu_to_le(hdr, sizeof(*hdr));
935
936         BUG_ON(txo->sent_skb_list[head]);
937         txo->sent_skb_list[head] = skb;
938         txo->last_req_hdr = head;
939         atomic_add(num_frags, &txq->used);
940         txo->last_req_wrb_cnt = num_frags;
941         txo->pend_wrb_cnt += num_frags;
942 }
943
944 /* Setup a WRB fragment (buffer descriptor) for xmit */
945 static void be_tx_setup_wrb_frag(struct be_tx_obj *txo, dma_addr_t busaddr,
946                                  int len)
947 {
948         struct be_eth_wrb *wrb;
949         struct be_queue_info *txq = &txo->q;
950
951         wrb = queue_head_node(txq);
952         wrb_fill(wrb, busaddr, len);
953         queue_head_inc(txq);
954 }
955
956 /* Bring the queue back to the state it was in before be_xmit_enqueue() routine
957  * was invoked. The producer index is restored to the previous packet and the
958  * WRBs of the current packet are unmapped. Invoked to handle tx setup errors.
959  */
960 static void be_xmit_restore(struct be_adapter *adapter,
961                             struct be_tx_obj *txo, u32 head, bool map_single,
962                             u32 copied)
963 {
964         struct device *dev;
965         struct be_eth_wrb *wrb;
966         struct be_queue_info *txq = &txo->q;
967
968         dev = &adapter->pdev->dev;
969         txq->head = head;
970
971         /* skip the first wrb (hdr); it's not mapped */
972         queue_head_inc(txq);
973         while (copied) {
974                 wrb = queue_head_node(txq);
975                 unmap_tx_frag(dev, wrb, map_single);
976                 map_single = false;
977                 copied -= le32_to_cpu(wrb->frag_len);
978                 queue_head_inc(txq);
979         }
980
981         txq->head = head;
982 }
983
984 /* Enqueue the given packet for transmit. This routine allocates WRBs for the
985  * packet, dma maps the packet buffers and sets up the WRBs. Returns the number
986  * of WRBs used up by the packet.
987  */
988 static u32 be_xmit_enqueue(struct be_adapter *adapter, struct be_tx_obj *txo,
989                            struct sk_buff *skb,
990                            struct be_wrb_params *wrb_params)
991 {
992         u32 i, copied = 0, wrb_cnt = skb_wrb_cnt(skb);
993         struct device *dev = &adapter->pdev->dev;
994         struct be_queue_info *txq = &txo->q;
995         bool map_single = false;
996         u32 head = txq->head;
997         dma_addr_t busaddr;
998         int len;
999
1000         head = be_tx_get_wrb_hdr(txo);
1001
1002         if (skb->len > skb->data_len) {
1003                 len = skb_headlen(skb);
1004
1005                 busaddr = dma_map_single(dev, skb->data, len, DMA_TO_DEVICE);
1006                 if (dma_mapping_error(dev, busaddr))
1007                         goto dma_err;
1008                 map_single = true;
1009                 be_tx_setup_wrb_frag(txo, busaddr, len);
1010                 copied += len;
1011         }
1012
1013         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1014                 const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
1015                 len = skb_frag_size(frag);
1016
1017                 busaddr = skb_frag_dma_map(dev, frag, 0, len, DMA_TO_DEVICE);
1018                 if (dma_mapping_error(dev, busaddr))
1019                         goto dma_err;
1020                 be_tx_setup_wrb_frag(txo, busaddr, len);
1021                 copied += len;
1022         }
1023
1024         be_tx_setup_wrb_hdr(adapter, txo, wrb_params, skb, head);
1025
1026         be_tx_stats_update(txo, skb);
1027         return wrb_cnt;
1028
1029 dma_err:
1030         adapter->drv_stats.dma_map_errors++;
1031         be_xmit_restore(adapter, txo, head, map_single, copied);
1032         return 0;
1033 }
1034
1035 static inline int qnq_async_evt_rcvd(struct be_adapter *adapter)
1036 {
1037         return adapter->flags & BE_FLAGS_QNQ_ASYNC_EVT_RCVD;
1038 }
1039
1040 static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter,
1041                                              struct sk_buff *skb,
1042                                              struct be_wrb_params
1043                                              *wrb_params)
1044 {
1045         u16 vlan_tag = 0;
1046
1047         skb = skb_share_check(skb, GFP_ATOMIC);
1048         if (unlikely(!skb))
1049                 return skb;
1050
1051         if (skb_vlan_tag_present(skb))
1052                 vlan_tag = be_get_tx_vlan_tag(adapter, skb);
1053
1054         if (qnq_async_evt_rcvd(adapter) && adapter->pvid) {
1055                 if (!vlan_tag)
1056                         vlan_tag = adapter->pvid;
1057                 /* f/w workaround to set skip_hw_vlan = 1, informs the F/W to
1058                  * skip VLAN insertion
1059                  */
1060                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1061         }
1062
1063         if (vlan_tag) {
1064                 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1065                                                 vlan_tag);
1066                 if (unlikely(!skb))
1067                         return skb;
1068                 skb->vlan_tci = 0;
1069         }
1070
1071         /* Insert the outer VLAN, if any */
1072         if (adapter->qnq_vid) {
1073                 vlan_tag = adapter->qnq_vid;
1074                 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1075                                                 vlan_tag);
1076                 if (unlikely(!skb))
1077                         return skb;
1078                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1079         }
1080
1081         return skb;
1082 }
1083
1084 static bool be_ipv6_exthdr_check(struct sk_buff *skb)
1085 {
1086         struct ethhdr *eh = (struct ethhdr *)skb->data;
1087         u16 offset = ETH_HLEN;
1088
1089         if (eh->h_proto == htons(ETH_P_IPV6)) {
1090                 struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + offset);
1091
1092                 offset += sizeof(struct ipv6hdr);
1093                 if (ip6h->nexthdr != NEXTHDR_TCP &&
1094                     ip6h->nexthdr != NEXTHDR_UDP) {
1095                         struct ipv6_opt_hdr *ehdr =
1096                                 (struct ipv6_opt_hdr *)(skb->data + offset);
1097
1098                         /* offending pkt: 2nd byte following IPv6 hdr is 0xff */
1099                         if (ehdr->hdrlen == 0xff)
1100                                 return true;
1101                 }
1102         }
1103         return false;
1104 }
1105
1106 static int be_vlan_tag_tx_chk(struct be_adapter *adapter, struct sk_buff *skb)
1107 {
1108         return skb_vlan_tag_present(skb) || adapter->pvid || adapter->qnq_vid;
1109 }
1110
1111 static int be_ipv6_tx_stall_chk(struct be_adapter *adapter, struct sk_buff *skb)
1112 {
1113         return BE3_chip(adapter) && be_ipv6_exthdr_check(skb);
1114 }
1115
1116 static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter,
1117                                                   struct sk_buff *skb,
1118                                                   struct be_wrb_params
1119                                                   *wrb_params)
1120 {
1121         struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1122         unsigned int eth_hdr_len;
1123         struct iphdr *ip;
1124
1125         /* For padded packets, BE HW modifies tot_len field in IP header
1126          * incorrecly when VLAN tag is inserted by HW.
1127          * For padded packets, Lancer computes incorrect checksum.
1128          */
1129         eth_hdr_len = ntohs(skb->protocol) == ETH_P_8021Q ?
1130                                                 VLAN_ETH_HLEN : ETH_HLEN;
1131         if (skb->len <= 60 &&
1132             (lancer_chip(adapter) || skb_vlan_tag_present(skb)) &&
1133             is_ipv4_pkt(skb)) {
1134                 ip = (struct iphdr *)ip_hdr(skb);
1135                 pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len));
1136         }
1137
1138         /* If vlan tag is already inlined in the packet, skip HW VLAN
1139          * tagging in pvid-tagging mode
1140          */
1141         if (be_pvid_tagging_enabled(adapter) &&
1142             veh->h_vlan_proto == htons(ETH_P_8021Q))
1143                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1144
1145         /* HW has a bug wherein it will calculate CSUM for VLAN
1146          * pkts even though it is disabled.
1147          * Manually insert VLAN in pkt.
1148          */
1149         if (skb->ip_summed != CHECKSUM_PARTIAL &&
1150             skb_vlan_tag_present(skb)) {
1151                 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1152                 if (unlikely(!skb))
1153                         goto err;
1154         }
1155
1156         /* HW may lockup when VLAN HW tagging is requested on
1157          * certain ipv6 packets. Drop such pkts if the HW workaround to
1158          * skip HW tagging is not enabled by FW.
1159          */
1160         if (unlikely(be_ipv6_tx_stall_chk(adapter, skb) &&
1161                      (adapter->pvid || adapter->qnq_vid) &&
1162                      !qnq_async_evt_rcvd(adapter)))
1163                 goto tx_drop;
1164
1165         /* Manual VLAN tag insertion to prevent:
1166          * ASIC lockup when the ASIC inserts VLAN tag into
1167          * certain ipv6 packets. Insert VLAN tags in driver,
1168          * and set event, completion, vlan bits accordingly
1169          * in the Tx WRB.
1170          */
1171         if (be_ipv6_tx_stall_chk(adapter, skb) &&
1172             be_vlan_tag_tx_chk(adapter, skb)) {
1173                 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1174                 if (unlikely(!skb))
1175                         goto err;
1176         }
1177
1178         return skb;
1179 tx_drop:
1180         dev_kfree_skb_any(skb);
1181 err:
1182         return NULL;
1183 }
1184
1185 static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter,
1186                                            struct sk_buff *skb,
1187                                            struct be_wrb_params *wrb_params)
1188 {
1189         int err;
1190
1191         /* Lancer, SH and BE3 in SRIOV mode have a bug wherein
1192          * packets that are 32b or less may cause a transmit stall
1193          * on that port. The workaround is to pad such packets
1194          * (len <= 32 bytes) to a minimum length of 36b.
1195          */
1196         if (skb->len <= 32) {
1197                 if (skb_put_padto(skb, 36))
1198                         return NULL;
1199         }
1200
1201         if (BEx_chip(adapter) || lancer_chip(adapter)) {
1202                 skb = be_lancer_xmit_workarounds(adapter, skb, wrb_params);
1203                 if (!skb)
1204                         return NULL;
1205         }
1206
1207         /* The stack can send us skbs with length greater than
1208          * what the HW can handle. Trim the extra bytes.
1209          */
1210         WARN_ON_ONCE(skb->len > BE_MAX_GSO_SIZE);
1211         err = pskb_trim(skb, BE_MAX_GSO_SIZE);
1212         WARN_ON(err);
1213
1214         return skb;
1215 }
1216
1217 static void be_xmit_flush(struct be_adapter *adapter, struct be_tx_obj *txo)
1218 {
1219         struct be_queue_info *txq = &txo->q;
1220         struct be_eth_hdr_wrb *hdr = queue_index_node(txq, txo->last_req_hdr);
1221
1222         /* Mark the last request eventable if it hasn't been marked already */
1223         if (!(hdr->dw[2] & cpu_to_le32(TX_HDR_WRB_EVT)))
1224                 hdr->dw[2] |= cpu_to_le32(TX_HDR_WRB_EVT | TX_HDR_WRB_COMPL);
1225
1226         /* compose a dummy wrb if there are odd set of wrbs to notify */
1227         if (!lancer_chip(adapter) && (txo->pend_wrb_cnt & 1)) {
1228                 wrb_fill_dummy(queue_head_node(txq));
1229                 queue_head_inc(txq);
1230                 atomic_inc(&txq->used);
1231                 txo->pend_wrb_cnt++;
1232                 hdr->dw[2] &= ~cpu_to_le32(TX_HDR_WRB_NUM_MASK <<
1233                                            TX_HDR_WRB_NUM_SHIFT);
1234                 hdr->dw[2] |= cpu_to_le32((txo->last_req_wrb_cnt + 1) <<
1235                                           TX_HDR_WRB_NUM_SHIFT);
1236         }
1237         be_txq_notify(adapter, txo, txo->pend_wrb_cnt);
1238         txo->pend_wrb_cnt = 0;
1239 }
1240
1241 /* OS2BMC related */
1242
1243 #define DHCP_CLIENT_PORT        68
1244 #define DHCP_SERVER_PORT        67
1245 #define NET_BIOS_PORT1          137
1246 #define NET_BIOS_PORT2          138
1247 #define DHCPV6_RAS_PORT         547
1248
1249 #define is_mc_allowed_on_bmc(adapter, eh)       \
1250         (!is_multicast_filt_enabled(adapter) && \
1251          is_multicast_ether_addr(eh->h_dest) && \
1252          !is_broadcast_ether_addr(eh->h_dest))
1253
1254 #define is_bc_allowed_on_bmc(adapter, eh)       \
1255         (!is_broadcast_filt_enabled(adapter) && \
1256          is_broadcast_ether_addr(eh->h_dest))
1257
1258 #define is_arp_allowed_on_bmc(adapter, skb)     \
1259         (is_arp(skb) && is_arp_filt_enabled(adapter))
1260
1261 #define is_broadcast_packet(eh, adapter)        \
1262                 (is_multicast_ether_addr(eh->h_dest) && \
1263                 !compare_ether_addr(eh->h_dest, adapter->netdev->broadcast))
1264
1265 #define is_arp(skb)     (skb->protocol == htons(ETH_P_ARP))
1266
1267 #define is_arp_filt_enabled(adapter)    \
1268                 (adapter->bmc_filt_mask & (BMC_FILT_BROADCAST_ARP))
1269
1270 #define is_dhcp_client_filt_enabled(adapter)    \
1271                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_CLIENT)
1272
1273 #define is_dhcp_srvr_filt_enabled(adapter)      \
1274                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_SERVER)
1275
1276 #define is_nbios_filt_enabled(adapter)  \
1277                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_NET_BIOS)
1278
1279 #define is_ipv6_na_filt_enabled(adapter)        \
1280                 (adapter->bmc_filt_mask &       \
1281                         BMC_FILT_MULTICAST_IPV6_NEIGH_ADVER)
1282
1283 #define is_ipv6_ra_filt_enabled(adapter)        \
1284                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RA)
1285
1286 #define is_ipv6_ras_filt_enabled(adapter)       \
1287                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RAS)
1288
1289 #define is_broadcast_filt_enabled(adapter)      \
1290                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST)
1291
1292 #define is_multicast_filt_enabled(adapter)      \
1293                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST)
1294
1295 static bool be_send_pkt_to_bmc(struct be_adapter *adapter,
1296                                struct sk_buff **skb)
1297 {
1298         struct ethhdr *eh = (struct ethhdr *)(*skb)->data;
1299         bool os2bmc = false;
1300
1301         if (!be_is_os2bmc_enabled(adapter))
1302                 goto done;
1303
1304         if (!is_multicast_ether_addr(eh->h_dest))
1305                 goto done;
1306
1307         if (is_mc_allowed_on_bmc(adapter, eh) ||
1308             is_bc_allowed_on_bmc(adapter, eh) ||
1309             is_arp_allowed_on_bmc(adapter, (*skb))) {
1310                 os2bmc = true;
1311                 goto done;
1312         }
1313
1314         if ((*skb)->protocol == htons(ETH_P_IPV6)) {
1315                 struct ipv6hdr *hdr = ipv6_hdr((*skb));
1316                 u8 nexthdr = hdr->nexthdr;
1317
1318                 if (nexthdr == IPPROTO_ICMPV6) {
1319                         struct icmp6hdr *icmp6 = icmp6_hdr((*skb));
1320
1321                         switch (icmp6->icmp6_type) {
1322                         case NDISC_ROUTER_ADVERTISEMENT:
1323                                 os2bmc = is_ipv6_ra_filt_enabled(adapter);
1324                                 goto done;
1325                         case NDISC_NEIGHBOUR_ADVERTISEMENT:
1326                                 os2bmc = is_ipv6_na_filt_enabled(adapter);
1327                                 goto done;
1328                         default:
1329                                 break;
1330                         }
1331                 }
1332         }
1333
1334         if (is_udp_pkt((*skb))) {
1335                 struct udphdr *udp = udp_hdr((*skb));
1336
1337                 switch (ntohs(udp->dest)) {
1338                 case DHCP_CLIENT_PORT:
1339                         os2bmc = is_dhcp_client_filt_enabled(adapter);
1340                         goto done;
1341                 case DHCP_SERVER_PORT:
1342                         os2bmc = is_dhcp_srvr_filt_enabled(adapter);
1343                         goto done;
1344                 case NET_BIOS_PORT1:
1345                 case NET_BIOS_PORT2:
1346                         os2bmc = is_nbios_filt_enabled(adapter);
1347                         goto done;
1348                 case DHCPV6_RAS_PORT:
1349                         os2bmc = is_ipv6_ras_filt_enabled(adapter);
1350                         goto done;
1351                 default:
1352                         break;
1353                 }
1354         }
1355 done:
1356         /* For packets over a vlan, which are destined
1357          * to BMC, asic expects the vlan to be inline in the packet.
1358          */
1359         if (os2bmc)
1360                 *skb = be_insert_vlan_in_pkt(adapter, *skb, NULL);
1361
1362         return os2bmc;
1363 }
1364
1365 static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev)
1366 {
1367         struct be_adapter *adapter = netdev_priv(netdev);
1368         u16 q_idx = skb_get_queue_mapping(skb);
1369         struct be_tx_obj *txo = &adapter->tx_obj[q_idx];
1370         struct be_wrb_params wrb_params = { 0 };
1371         bool flush = !skb->xmit_more;
1372         u16 wrb_cnt;
1373
1374         skb = be_xmit_workarounds(adapter, skb, &wrb_params);
1375         if (unlikely(!skb))
1376                 goto drop;
1377
1378         be_get_wrb_params_from_skb(adapter, skb, &wrb_params);
1379
1380         wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1381         if (unlikely(!wrb_cnt)) {
1382                 dev_kfree_skb_any(skb);
1383                 goto drop;
1384         }
1385
1386         /* if os2bmc is enabled and if the pkt is destined to bmc,
1387          * enqueue the pkt a 2nd time with mgmt bit set.
1388          */
1389         if (be_send_pkt_to_bmc(adapter, &skb)) {
1390                 BE_WRB_F_SET(wrb_params.features, OS2BMC, 1);
1391                 wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1392                 if (unlikely(!wrb_cnt))
1393                         goto drop;
1394                 else
1395                         skb_get(skb);
1396         }
1397
1398         if (be_is_txq_full(txo)) {
1399                 netif_stop_subqueue(netdev, q_idx);
1400                 tx_stats(txo)->tx_stops++;
1401         }
1402
1403         if (flush || __netif_subqueue_stopped(netdev, q_idx))
1404                 be_xmit_flush(adapter, txo);
1405
1406         return NETDEV_TX_OK;
1407 drop:
1408         tx_stats(txo)->tx_drv_drops++;
1409         /* Flush the already enqueued tx requests */
1410         if (flush && txo->pend_wrb_cnt)
1411                 be_xmit_flush(adapter, txo);
1412
1413         return NETDEV_TX_OK;
1414 }
1415
1416 static inline bool be_in_all_promisc(struct be_adapter *adapter)
1417 {
1418         return (adapter->if_flags & BE_IF_FLAGS_ALL_PROMISCUOUS) ==
1419                         BE_IF_FLAGS_ALL_PROMISCUOUS;
1420 }
1421
1422 static int be_set_vlan_promisc(struct be_adapter *adapter)
1423 {
1424         struct device *dev = &adapter->pdev->dev;
1425         int status;
1426
1427         if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS)
1428                 return 0;
1429
1430         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, ON);
1431         if (!status) {
1432                 dev_info(dev, "Enabled VLAN promiscuous mode\n");
1433                 adapter->if_flags |= BE_IF_FLAGS_VLAN_PROMISCUOUS;
1434         } else {
1435                 dev_err(dev, "Failed to enable VLAN promiscuous mode\n");
1436         }
1437         return status;
1438 }
1439
1440 static int be_clear_vlan_promisc(struct be_adapter *adapter)
1441 {
1442         struct device *dev = &adapter->pdev->dev;
1443         int status;
1444
1445         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, OFF);
1446         if (!status) {
1447                 dev_info(dev, "Disabling VLAN promiscuous mode\n");
1448                 adapter->if_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
1449         }
1450         return status;
1451 }
1452
1453 /*
1454  * A max of 64 (BE_NUM_VLANS_SUPPORTED) vlans can be configured in BE.
1455  * If the user configures more, place BE in vlan promiscuous mode.
1456  */
1457 static int be_vid_config(struct be_adapter *adapter)
1458 {
1459         struct device *dev = &adapter->pdev->dev;
1460         u16 vids[BE_NUM_VLANS_SUPPORTED];
1461         u16 num = 0, i = 0;
1462         int status = 0;
1463
1464         /* No need to change the VLAN state if the I/F is in promiscuous */
1465         if (adapter->netdev->flags & IFF_PROMISC)
1466                 return 0;
1467
1468         if (adapter->vlans_added > be_max_vlans(adapter))
1469                 return be_set_vlan_promisc(adapter);
1470
1471         if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
1472                 status = be_clear_vlan_promisc(adapter);
1473                 if (status)
1474                         return status;
1475         }
1476         /* Construct VLAN Table to give to HW */
1477         for_each_set_bit(i, adapter->vids, VLAN_N_VID)
1478                 vids[num++] = cpu_to_le16(i);
1479
1480         status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num, 0);
1481         if (status) {
1482                 dev_err(dev, "Setting HW VLAN filtering failed\n");
1483                 /* Set to VLAN promisc mode as setting VLAN filter failed */
1484                 if (addl_status(status) == MCC_ADDL_STATUS_INSUFFICIENT_VLANS ||
1485                     addl_status(status) ==
1486                                 MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES)
1487                         return be_set_vlan_promisc(adapter);
1488         }
1489         return status;
1490 }
1491
1492 static int be_vlan_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
1493 {
1494         struct be_adapter *adapter = netdev_priv(netdev);
1495         int status = 0;
1496
1497         mutex_lock(&adapter->rx_filter_lock);
1498
1499         /* Packets with VID 0 are always received by Lancer by default */
1500         if (lancer_chip(adapter) && vid == 0)
1501                 goto done;
1502
1503         if (test_bit(vid, adapter->vids))
1504                 goto done;
1505
1506         set_bit(vid, adapter->vids);
1507         adapter->vlans_added++;
1508
1509         status = be_vid_config(adapter);
1510 done:
1511         mutex_unlock(&adapter->rx_filter_lock);
1512         return status;
1513 }
1514
1515 static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid)
1516 {
1517         struct be_adapter *adapter = netdev_priv(netdev);
1518         int status = 0;
1519
1520         mutex_lock(&adapter->rx_filter_lock);
1521
1522         /* Packets with VID 0 are always received by Lancer by default */
1523         if (lancer_chip(adapter) && vid == 0)
1524                 goto done;
1525
1526         if (!test_bit(vid, adapter->vids))
1527                 goto done;
1528
1529         clear_bit(vid, adapter->vids);
1530         adapter->vlans_added--;
1531
1532         status = be_vid_config(adapter);
1533 done:
1534         mutex_unlock(&adapter->rx_filter_lock);
1535         return status;
1536 }
1537
1538 static void be_set_all_promisc(struct be_adapter *adapter)
1539 {
1540         be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, ON);
1541         adapter->if_flags |= BE_IF_FLAGS_ALL_PROMISCUOUS;
1542 }
1543
1544 static void be_set_mc_promisc(struct be_adapter *adapter)
1545 {
1546         int status;
1547
1548         if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS)
1549                 return;
1550
1551         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MCAST_PROMISCUOUS, ON);
1552         if (!status)
1553                 adapter->if_flags |= BE_IF_FLAGS_MCAST_PROMISCUOUS;
1554 }
1555
1556 static void be_set_uc_promisc(struct be_adapter *adapter)
1557 {
1558         int status;
1559
1560         if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS)
1561                 return;
1562
1563         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, ON);
1564         if (!status)
1565                 adapter->if_flags |= BE_IF_FLAGS_PROMISCUOUS;
1566 }
1567
1568 static void be_clear_uc_promisc(struct be_adapter *adapter)
1569 {
1570         int status;
1571
1572         if (!(adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS))
1573                 return;
1574
1575         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, OFF);
1576         if (!status)
1577                 adapter->if_flags &= ~BE_IF_FLAGS_PROMISCUOUS;
1578 }
1579
1580 /* The below 2 functions are the callback args for __dev_mc_sync/dev_uc_sync().
1581  * We use a single callback function for both sync and unsync. We really don't
1582  * add/remove addresses through this callback. But, we use it to detect changes
1583  * to the uc/mc lists. The entire uc/mc list is programmed in be_set_rx_mode().
1584  */
1585 static int be_uc_list_update(struct net_device *netdev,
1586                              const unsigned char *addr)
1587 {
1588         struct be_adapter *adapter = netdev_priv(netdev);
1589
1590         adapter->update_uc_list = true;
1591         return 0;
1592 }
1593
1594 static int be_mc_list_update(struct net_device *netdev,
1595                              const unsigned char *addr)
1596 {
1597         struct be_adapter *adapter = netdev_priv(netdev);
1598
1599         adapter->update_mc_list = true;
1600         return 0;
1601 }
1602
1603 static void be_set_mc_list(struct be_adapter *adapter)
1604 {
1605         struct net_device *netdev = adapter->netdev;
1606         struct netdev_hw_addr *ha;
1607         bool mc_promisc = false;
1608         int status;
1609
1610         netif_addr_lock_bh(netdev);
1611         __dev_mc_sync(netdev, be_mc_list_update, be_mc_list_update);
1612
1613         if (netdev->flags & IFF_PROMISC) {
1614                 adapter->update_mc_list = false;
1615         } else if (netdev->flags & IFF_ALLMULTI ||
1616                    netdev_mc_count(netdev) > be_max_mc(adapter)) {
1617                 /* Enable multicast promisc if num configured exceeds
1618                  * what we support
1619                  */
1620                 mc_promisc = true;
1621                 adapter->update_mc_list = false;
1622         } else if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS) {
1623                 /* Update mc-list unconditionally if the iface was previously
1624                  * in mc-promisc mode and now is out of that mode.
1625                  */
1626                 adapter->update_mc_list = true;
1627         }
1628
1629         if (adapter->update_mc_list) {
1630                 int i = 0;
1631
1632                 /* cache the mc-list in adapter */
1633                 netdev_for_each_mc_addr(ha, netdev) {
1634                         ether_addr_copy(adapter->mc_list[i].mac, ha->addr);
1635                         i++;
1636                 }
1637                 adapter->mc_count = netdev_mc_count(netdev);
1638         }
1639         netif_addr_unlock_bh(netdev);
1640
1641         if (mc_promisc) {
1642                 be_set_mc_promisc(adapter);
1643         } else if (adapter->update_mc_list) {
1644                 status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, ON);
1645                 if (!status)
1646                         adapter->if_flags &= ~BE_IF_FLAGS_MCAST_PROMISCUOUS;
1647                 else
1648                         be_set_mc_promisc(adapter);
1649
1650                 adapter->update_mc_list = false;
1651         }
1652 }
1653
1654 static void be_clear_mc_list(struct be_adapter *adapter)
1655 {
1656         struct net_device *netdev = adapter->netdev;
1657
1658         __dev_mc_unsync(netdev, NULL);
1659         be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, OFF);
1660         adapter->mc_count = 0;
1661 }
1662
1663 static int be_uc_mac_add(struct be_adapter *adapter, int uc_idx)
1664 {
1665         if (ether_addr_equal(adapter->uc_list[uc_idx].mac, adapter->dev_mac)) {
1666                 adapter->pmac_id[uc_idx + 1] = adapter->pmac_id[0];
1667                 return 0;
1668         }
1669
1670         return be_cmd_pmac_add(adapter, adapter->uc_list[uc_idx].mac,
1671                                adapter->if_handle,
1672                                &adapter->pmac_id[uc_idx + 1], 0);
1673 }
1674
1675 static void be_uc_mac_del(struct be_adapter *adapter, int pmac_id)
1676 {
1677         if (pmac_id == adapter->pmac_id[0])
1678                 return;
1679
1680         be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
1681 }
1682
1683 static void be_set_uc_list(struct be_adapter *adapter)
1684 {
1685         struct net_device *netdev = adapter->netdev;
1686         struct netdev_hw_addr *ha;
1687         bool uc_promisc = false;
1688         int curr_uc_macs = 0, i;
1689
1690         netif_addr_lock_bh(netdev);
1691         __dev_uc_sync(netdev, be_uc_list_update, be_uc_list_update);
1692
1693         if (netdev->flags & IFF_PROMISC) {
1694                 adapter->update_uc_list = false;
1695         } else if (netdev_uc_count(netdev) > (be_max_uc(adapter) - 1)) {
1696                 uc_promisc = true;
1697                 adapter->update_uc_list = false;
1698         }  else if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS) {
1699                 /* Update uc-list unconditionally if the iface was previously
1700                  * in uc-promisc mode and now is out of that mode.
1701                  */
1702                 adapter->update_uc_list = true;
1703         }
1704
1705         if (adapter->update_uc_list) {
1706                 /* cache the uc-list in adapter array */
1707                 i = 0;
1708                 netdev_for_each_uc_addr(ha, netdev) {
1709                         ether_addr_copy(adapter->uc_list[i].mac, ha->addr);
1710                         i++;
1711                 }
1712                 curr_uc_macs = netdev_uc_count(netdev);
1713         }
1714         netif_addr_unlock_bh(netdev);
1715
1716         if (uc_promisc) {
1717                 be_set_uc_promisc(adapter);
1718         } else if (adapter->update_uc_list) {
1719                 be_clear_uc_promisc(adapter);
1720
1721                 for (i = 0; i < adapter->uc_macs; i++)
1722                         be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1723
1724                 for (i = 0; i < curr_uc_macs; i++)
1725                         be_uc_mac_add(adapter, i);
1726                 adapter->uc_macs = curr_uc_macs;
1727                 adapter->update_uc_list = false;
1728         }
1729 }
1730
1731 static void be_clear_uc_list(struct be_adapter *adapter)
1732 {
1733         struct net_device *netdev = adapter->netdev;
1734         int i;
1735
1736         __dev_uc_unsync(netdev, NULL);
1737         for (i = 0; i < adapter->uc_macs; i++)
1738                 be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1739
1740         adapter->uc_macs = 0;
1741 }
1742
1743 static void __be_set_rx_mode(struct be_adapter *adapter)
1744 {
1745         struct net_device *netdev = adapter->netdev;
1746
1747         mutex_lock(&adapter->rx_filter_lock);
1748
1749         if (netdev->flags & IFF_PROMISC) {
1750                 if (!be_in_all_promisc(adapter))
1751                         be_set_all_promisc(adapter);
1752         } else if (be_in_all_promisc(adapter)) {
1753                 /* We need to re-program the vlan-list or clear
1754                  * vlan-promisc mode (if needed) when the interface
1755                  * comes out of promisc mode.
1756                  */
1757                 be_vid_config(adapter);
1758         }
1759
1760         be_set_uc_list(adapter);
1761         be_set_mc_list(adapter);
1762
1763         mutex_unlock(&adapter->rx_filter_lock);
1764 }
1765
1766 static void be_work_set_rx_mode(struct work_struct *work)
1767 {
1768         struct be_cmd_work *cmd_work =
1769                                 container_of(work, struct be_cmd_work, work);
1770
1771         __be_set_rx_mode(cmd_work->adapter);
1772         kfree(cmd_work);
1773 }
1774
1775 static int be_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
1776 {
1777         struct be_adapter *adapter = netdev_priv(netdev);
1778         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1779         int status;
1780
1781         if (!sriov_enabled(adapter))
1782                 return -EPERM;
1783
1784         if (!is_valid_ether_addr(mac) || vf >= adapter->num_vfs)
1785                 return -EINVAL;
1786
1787         /* Proceed further only if user provided MAC is different
1788          * from active MAC
1789          */
1790         if (ether_addr_equal(mac, vf_cfg->mac_addr))
1791                 return 0;
1792
1793         if (BEx_chip(adapter)) {
1794                 be_cmd_pmac_del(adapter, vf_cfg->if_handle, vf_cfg->pmac_id,
1795                                 vf + 1);
1796
1797                 status = be_cmd_pmac_add(adapter, mac, vf_cfg->if_handle,
1798                                          &vf_cfg->pmac_id, vf + 1);
1799         } else {
1800                 status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
1801                                         vf + 1);
1802         }
1803
1804         if (status) {
1805                 dev_err(&adapter->pdev->dev, "MAC %pM set on VF %d Failed: %#x",
1806                         mac, vf, status);
1807                 return be_cmd_status(status);
1808         }
1809
1810         ether_addr_copy(vf_cfg->mac_addr, mac);
1811
1812         return 0;
1813 }
1814
1815 static int be_get_vf_config(struct net_device *netdev, int vf,
1816                             struct ifla_vf_info *vi)
1817 {
1818         struct be_adapter *adapter = netdev_priv(netdev);
1819         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1820
1821         if (!sriov_enabled(adapter))
1822                 return -EPERM;
1823
1824         if (vf >= adapter->num_vfs)
1825                 return -EINVAL;
1826
1827         vi->vf = vf;
1828         vi->max_tx_rate = vf_cfg->tx_rate;
1829         vi->min_tx_rate = 0;
1830         vi->vlan = vf_cfg->vlan_tag & VLAN_VID_MASK;
1831         vi->qos = vf_cfg->vlan_tag >> VLAN_PRIO_SHIFT;
1832         memcpy(&vi->mac, vf_cfg->mac_addr, ETH_ALEN);
1833         vi->linkstate = adapter->vf_cfg[vf].plink_tracking;
1834         vi->spoofchk = adapter->vf_cfg[vf].spoofchk;
1835
1836         return 0;
1837 }
1838
1839 static int be_set_vf_tvt(struct be_adapter *adapter, int vf, u16 vlan)
1840 {
1841         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1842         u16 vids[BE_NUM_VLANS_SUPPORTED];
1843         int vf_if_id = vf_cfg->if_handle;
1844         int status;
1845
1846         /* Enable Transparent VLAN Tagging */
1847         status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0, 0);
1848         if (status)
1849                 return status;
1850
1851         /* Clear pre-programmed VLAN filters on VF if any, if TVT is enabled */
1852         vids[0] = 0;
1853         status = be_cmd_vlan_config(adapter, vf_if_id, vids, 1, vf + 1);
1854         if (!status)
1855                 dev_info(&adapter->pdev->dev,
1856                          "Cleared guest VLANs on VF%d", vf);
1857
1858         /* After TVT is enabled, disallow VFs to program VLAN filters */
1859         if (vf_cfg->privileges & BE_PRIV_FILTMGMT) {
1860                 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges &
1861                                                   ~BE_PRIV_FILTMGMT, vf + 1);
1862                 if (!status)
1863                         vf_cfg->privileges &= ~BE_PRIV_FILTMGMT;
1864         }
1865         return 0;
1866 }
1867
1868 static int be_clear_vf_tvt(struct be_adapter *adapter, int vf)
1869 {
1870         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1871         struct device *dev = &adapter->pdev->dev;
1872         int status;
1873
1874         /* Reset Transparent VLAN Tagging. */
1875         status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, vf + 1,
1876                                        vf_cfg->if_handle, 0, 0);
1877         if (status)
1878                 return status;
1879
1880         /* Allow VFs to program VLAN filtering */
1881         if (!(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
1882                 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges |
1883                                                   BE_PRIV_FILTMGMT, vf + 1);
1884                 if (!status) {
1885                         vf_cfg->privileges |= BE_PRIV_FILTMGMT;
1886                         dev_info(dev, "VF%d: FILTMGMT priv enabled", vf);
1887                 }
1888         }
1889
1890         dev_info(dev,
1891                  "Disable/re-enable i/f in VM to clear Transparent VLAN tag");
1892         return 0;
1893 }
1894
1895 static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos,
1896                           __be16 vlan_proto)
1897 {
1898         struct be_adapter *adapter = netdev_priv(netdev);
1899         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1900         int status;
1901
1902         if (!sriov_enabled(adapter))
1903                 return -EPERM;
1904
1905         if (vf >= adapter->num_vfs || vlan > 4095 || qos > 7)
1906                 return -EINVAL;
1907
1908         if (vlan_proto != htons(ETH_P_8021Q))
1909                 return -EPROTONOSUPPORT;
1910
1911         if (vlan || qos) {
1912                 vlan |= qos << VLAN_PRIO_SHIFT;
1913                 status = be_set_vf_tvt(adapter, vf, vlan);
1914         } else {
1915                 status = be_clear_vf_tvt(adapter, vf);
1916         }
1917
1918         if (status) {
1919                 dev_err(&adapter->pdev->dev,
1920                         "VLAN %d config on VF %d failed : %#x\n", vlan, vf,
1921                         status);
1922                 return be_cmd_status(status);
1923         }
1924
1925         vf_cfg->vlan_tag = vlan;
1926         return 0;
1927 }
1928
1929 static int be_set_vf_tx_rate(struct net_device *netdev, int vf,
1930                              int min_tx_rate, int max_tx_rate)
1931 {
1932         struct be_adapter *adapter = netdev_priv(netdev);
1933         struct device *dev = &adapter->pdev->dev;
1934         int percent_rate, status = 0;
1935         u16 link_speed = 0;
1936         u8 link_status;
1937
1938         if (!sriov_enabled(adapter))
1939                 return -EPERM;
1940
1941         if (vf >= adapter->num_vfs)
1942                 return -EINVAL;
1943
1944         if (min_tx_rate)
1945                 return -EINVAL;
1946
1947         if (!max_tx_rate)
1948                 goto config_qos;
1949
1950         status = be_cmd_link_status_query(adapter, &link_speed,
1951                                           &link_status, 0);
1952         if (status)
1953                 goto err;
1954
1955         if (!link_status) {
1956                 dev_err(dev, "TX-rate setting not allowed when link is down\n");
1957                 status = -ENETDOWN;
1958                 goto err;
1959         }
1960
1961         if (max_tx_rate < 100 || max_tx_rate > link_speed) {
1962                 dev_err(dev, "TX-rate must be between 100 and %d Mbps\n",
1963                         link_speed);
1964                 status = -EINVAL;
1965                 goto err;
1966         }
1967
1968         /* On Skyhawk the QOS setting must be done only as a % value */
1969         percent_rate = link_speed / 100;
1970         if (skyhawk_chip(adapter) && (max_tx_rate % percent_rate)) {
1971                 dev_err(dev, "TX-rate must be a multiple of %d Mbps\n",
1972                         percent_rate);
1973                 status = -EINVAL;
1974                 goto err;
1975         }
1976
1977 config_qos:
1978         status = be_cmd_config_qos(adapter, max_tx_rate, link_speed, vf + 1);
1979         if (status)
1980                 goto err;
1981
1982         adapter->vf_cfg[vf].tx_rate = max_tx_rate;
1983         return 0;
1984
1985 err:
1986         dev_err(dev, "TX-rate setting of %dMbps on VF%d failed\n",
1987                 max_tx_rate, vf);
1988         return be_cmd_status(status);
1989 }
1990
1991 static int be_set_vf_link_state(struct net_device *netdev, int vf,
1992                                 int link_state)
1993 {
1994         struct be_adapter *adapter = netdev_priv(netdev);
1995         int status;
1996
1997         if (!sriov_enabled(adapter))
1998                 return -EPERM;
1999
2000         if (vf >= adapter->num_vfs)
2001                 return -EINVAL;
2002
2003         status = be_cmd_set_logical_link_config(adapter, link_state, vf+1);
2004         if (status) {
2005                 dev_err(&adapter->pdev->dev,
2006                         "Link state change on VF %d failed: %#x\n", vf, status);
2007                 return be_cmd_status(status);
2008         }
2009
2010         adapter->vf_cfg[vf].plink_tracking = link_state;
2011
2012         return 0;
2013 }
2014
2015 static int be_set_vf_spoofchk(struct net_device *netdev, int vf, bool enable)
2016 {
2017         struct be_adapter *adapter = netdev_priv(netdev);
2018         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
2019         u8 spoofchk;
2020         int status;
2021
2022         if (!sriov_enabled(adapter))
2023                 return -EPERM;
2024
2025         if (vf >= adapter->num_vfs)
2026                 return -EINVAL;
2027
2028         if (BEx_chip(adapter))
2029                 return -EOPNOTSUPP;
2030
2031         if (enable == vf_cfg->spoofchk)
2032                 return 0;
2033
2034         spoofchk = enable ? ENABLE_MAC_SPOOFCHK : DISABLE_MAC_SPOOFCHK;
2035
2036         status = be_cmd_set_hsw_config(adapter, 0, vf + 1, vf_cfg->if_handle,
2037                                        0, spoofchk);
2038         if (status) {
2039                 dev_err(&adapter->pdev->dev,
2040                         "Spoofchk change on VF %d failed: %#x\n", vf, status);
2041                 return be_cmd_status(status);
2042         }
2043
2044         vf_cfg->spoofchk = enable;
2045         return 0;
2046 }
2047
2048 static void be_aic_update(struct be_aic_obj *aic, u64 rx_pkts, u64 tx_pkts,
2049                           ulong now)
2050 {
2051         aic->rx_pkts_prev = rx_pkts;
2052         aic->tx_reqs_prev = tx_pkts;
2053         aic->jiffies = now;
2054 }
2055
2056 static int be_get_new_eqd(struct be_eq_obj *eqo)
2057 {
2058         struct be_adapter *adapter = eqo->adapter;
2059         int eqd, start;
2060         struct be_aic_obj *aic;
2061         struct be_rx_obj *rxo;
2062         struct be_tx_obj *txo;
2063         u64 rx_pkts = 0, tx_pkts = 0;
2064         ulong now;
2065         u32 pps, delta;
2066         int i;
2067
2068         aic = &adapter->aic_obj[eqo->idx];
2069         if (!aic->enable) {
2070                 if (aic->jiffies)
2071                         aic->jiffies = 0;
2072                 eqd = aic->et_eqd;
2073                 return eqd;
2074         }
2075
2076         for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
2077                 do {
2078                         start = u64_stats_fetch_begin_irq(&rxo->stats.sync);
2079                         rx_pkts += rxo->stats.rx_pkts;
2080                 } while (u64_stats_fetch_retry_irq(&rxo->stats.sync, start));
2081         }
2082
2083         for_all_tx_queues_on_eq(adapter, eqo, txo, i) {
2084                 do {
2085                         start = u64_stats_fetch_begin_irq(&txo->stats.sync);
2086                         tx_pkts += txo->stats.tx_reqs;
2087                 } while (u64_stats_fetch_retry_irq(&txo->stats.sync, start));
2088         }
2089
2090         /* Skip, if wrapped around or first calculation */
2091         now = jiffies;
2092         if (!aic->jiffies || time_before(now, aic->jiffies) ||
2093             rx_pkts < aic->rx_pkts_prev ||
2094             tx_pkts < aic->tx_reqs_prev) {
2095                 be_aic_update(aic, rx_pkts, tx_pkts, now);
2096                 return aic->prev_eqd;
2097         }
2098
2099         delta = jiffies_to_msecs(now - aic->jiffies);
2100         if (delta == 0)
2101                 return aic->prev_eqd;
2102
2103         pps = (((u32)(rx_pkts - aic->rx_pkts_prev) * 1000) / delta) +
2104                 (((u32)(tx_pkts - aic->tx_reqs_prev) * 1000) / delta);
2105         eqd = (pps / 15000) << 2;
2106
2107         if (eqd < 8)
2108                 eqd = 0;
2109         eqd = min_t(u32, eqd, aic->max_eqd);
2110         eqd = max_t(u32, eqd, aic->min_eqd);
2111
2112         be_aic_update(aic, rx_pkts, tx_pkts, now);
2113
2114         return eqd;
2115 }
2116
2117 /* For Skyhawk-R only */
2118 static u32 be_get_eq_delay_mult_enc(struct be_eq_obj *eqo)
2119 {
2120         struct be_adapter *adapter = eqo->adapter;
2121         struct be_aic_obj *aic = &adapter->aic_obj[eqo->idx];
2122         ulong now = jiffies;
2123         int eqd;
2124         u32 mult_enc;
2125
2126         if (!aic->enable)
2127                 return 0;
2128
2129         if (jiffies_to_msecs(now - aic->jiffies) < 1)
2130                 eqd = aic->prev_eqd;
2131         else
2132                 eqd = be_get_new_eqd(eqo);
2133
2134         if (eqd > 100)
2135                 mult_enc = R2I_DLY_ENC_1;
2136         else if (eqd > 60)
2137                 mult_enc = R2I_DLY_ENC_2;
2138         else if (eqd > 20)
2139                 mult_enc = R2I_DLY_ENC_3;
2140         else
2141                 mult_enc = R2I_DLY_ENC_0;
2142
2143         aic->prev_eqd = eqd;
2144
2145         return mult_enc;
2146 }
2147
2148 void be_eqd_update(struct be_adapter *adapter, bool force_update)
2149 {
2150         struct be_set_eqd set_eqd[MAX_EVT_QS];
2151         struct be_aic_obj *aic;
2152         struct be_eq_obj *eqo;
2153         int i, num = 0, eqd;
2154
2155         for_all_evt_queues(adapter, eqo, i) {
2156                 aic = &adapter->aic_obj[eqo->idx];
2157                 eqd = be_get_new_eqd(eqo);
2158                 if (force_update || eqd != aic->prev_eqd) {
2159                         set_eqd[num].delay_multiplier = (eqd * 65)/100;
2160                         set_eqd[num].eq_id = eqo->q.id;
2161                         aic->prev_eqd = eqd;
2162                         num++;
2163                 }
2164         }
2165
2166         if (num)
2167                 be_cmd_modify_eqd(adapter, set_eqd, num);
2168 }
2169
2170 static void be_rx_stats_update(struct be_rx_obj *rxo,
2171                                struct be_rx_compl_info *rxcp)
2172 {
2173         struct be_rx_stats *stats = rx_stats(rxo);
2174
2175         u64_stats_update_begin(&stats->sync);
2176         stats->rx_compl++;
2177         stats->rx_bytes += rxcp->pkt_size;
2178         stats->rx_pkts++;
2179         if (rxcp->tunneled)
2180                 stats->rx_vxlan_offload_pkts++;
2181         if (rxcp->pkt_type == BE_MULTICAST_PACKET)
2182                 stats->rx_mcast_pkts++;
2183         if (rxcp->err)
2184                 stats->rx_compl_err++;
2185         u64_stats_update_end(&stats->sync);
2186 }
2187
2188 static inline bool csum_passed(struct be_rx_compl_info *rxcp)
2189 {
2190         /* L4 checksum is not reliable for non TCP/UDP packets.
2191          * Also ignore ipcksm for ipv6 pkts
2192          */
2193         return (rxcp->tcpf || rxcp->udpf) && rxcp->l4_csum &&
2194                 (rxcp->ip_csum || rxcp->ipv6) && !rxcp->err;
2195 }
2196
2197 static struct be_rx_page_info *get_rx_page_info(struct be_rx_obj *rxo)
2198 {
2199         struct be_adapter *adapter = rxo->adapter;
2200         struct be_rx_page_info *rx_page_info;
2201         struct be_queue_info *rxq = &rxo->q;
2202         u32 frag_idx = rxq->tail;
2203
2204         rx_page_info = &rxo->page_info_tbl[frag_idx];
2205         BUG_ON(!rx_page_info->page);
2206
2207         if (rx_page_info->last_frag) {
2208                 dma_unmap_page(&adapter->pdev->dev,
2209                                dma_unmap_addr(rx_page_info, bus),
2210                                adapter->big_page_size, DMA_FROM_DEVICE);
2211                 rx_page_info->last_frag = false;
2212         } else {
2213                 dma_sync_single_for_cpu(&adapter->pdev->dev,
2214                                         dma_unmap_addr(rx_page_info, bus),
2215                                         rx_frag_size, DMA_FROM_DEVICE);
2216         }
2217
2218         queue_tail_inc(rxq);
2219         atomic_dec(&rxq->used);
2220         return rx_page_info;
2221 }
2222
2223 /* Throwaway the data in the Rx completion */
2224 static void be_rx_compl_discard(struct be_rx_obj *rxo,
2225                                 struct be_rx_compl_info *rxcp)
2226 {
2227         struct be_rx_page_info *page_info;
2228         u16 i, num_rcvd = rxcp->num_rcvd;
2229
2230         for (i = 0; i < num_rcvd; i++) {
2231                 page_info = get_rx_page_info(rxo);
2232                 put_page(page_info->page);
2233                 memset(page_info, 0, sizeof(*page_info));
2234         }
2235 }
2236
2237 /*
2238  * skb_fill_rx_data forms a complete skb for an ether frame
2239  * indicated by rxcp.
2240  */
2241 static void skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb,
2242                              struct be_rx_compl_info *rxcp)
2243 {
2244         struct be_rx_page_info *page_info;
2245         u16 i, j;
2246         u16 hdr_len, curr_frag_len, remaining;
2247         u8 *start;
2248
2249         page_info = get_rx_page_info(rxo);
2250         start = page_address(page_info->page) + page_info->page_offset;
2251         prefetch(start);
2252
2253         /* Copy data in the first descriptor of this completion */
2254         curr_frag_len = min(rxcp->pkt_size, rx_frag_size);
2255
2256         skb->len = curr_frag_len;
2257         if (curr_frag_len <= BE_HDR_LEN) { /* tiny packet */
2258                 memcpy(skb->data, start, curr_frag_len);
2259                 /* Complete packet has now been moved to data */
2260                 put_page(page_info->page);
2261                 skb->data_len = 0;
2262                 skb->tail += curr_frag_len;
2263         } else {
2264                 hdr_len = ETH_HLEN;
2265                 memcpy(skb->data, start, hdr_len);
2266                 skb_shinfo(skb)->nr_frags = 1;
2267                 skb_frag_set_page(skb, 0, page_info->page);
2268                 skb_shinfo(skb)->frags[0].page_offset =
2269                                         page_info->page_offset + hdr_len;
2270                 skb_frag_size_set(&skb_shinfo(skb)->frags[0],
2271                                   curr_frag_len - hdr_len);
2272                 skb->data_len = curr_frag_len - hdr_len;
2273                 skb->truesize += rx_frag_size;
2274                 skb->tail += hdr_len;
2275         }
2276         page_info->page = NULL;
2277
2278         if (rxcp->pkt_size <= rx_frag_size) {
2279                 BUG_ON(rxcp->num_rcvd != 1);
2280                 return;
2281         }
2282
2283         /* More frags present for this completion */
2284         remaining = rxcp->pkt_size - curr_frag_len;
2285         for (i = 1, j = 0; i < rxcp->num_rcvd; i++) {
2286                 page_info = get_rx_page_info(rxo);
2287                 curr_frag_len = min(remaining, rx_frag_size);
2288
2289                 /* Coalesce all frags from the same physical page in one slot */
2290                 if (page_info->page_offset == 0) {
2291                         /* Fresh page */
2292                         j++;
2293                         skb_frag_set_page(skb, j, page_info->page);
2294                         skb_shinfo(skb)->frags[j].page_offset =
2295                                                         page_info->page_offset;
2296                         skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2297                         skb_shinfo(skb)->nr_frags++;
2298                 } else {
2299                         put_page(page_info->page);
2300                 }
2301
2302                 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2303                 skb->len += curr_frag_len;
2304                 skb->data_len += curr_frag_len;
2305                 skb->truesize += rx_frag_size;
2306                 remaining -= curr_frag_len;
2307                 page_info->page = NULL;
2308         }
2309         BUG_ON(j > MAX_SKB_FRAGS);
2310 }
2311
2312 /* Process the RX completion indicated by rxcp when GRO is disabled */
2313 static void be_rx_compl_process(struct be_rx_obj *rxo, struct napi_struct *napi,
2314                                 struct be_rx_compl_info *rxcp)
2315 {
2316         struct be_adapter *adapter = rxo->adapter;
2317         struct net_device *netdev = adapter->netdev;
2318         struct sk_buff *skb;
2319
2320         skb = netdev_alloc_skb_ip_align(netdev, BE_RX_SKB_ALLOC_SIZE);
2321         if (unlikely(!skb)) {
2322                 rx_stats(rxo)->rx_drops_no_skbs++;
2323                 be_rx_compl_discard(rxo, rxcp);
2324                 return;
2325         }
2326
2327         skb_fill_rx_data(rxo, skb, rxcp);
2328
2329         if (likely((netdev->features & NETIF_F_RXCSUM) && csum_passed(rxcp)))
2330                 skb->ip_summed = CHECKSUM_UNNECESSARY;
2331         else
2332                 skb_checksum_none_assert(skb);
2333
2334         skb->protocol = eth_type_trans(skb, netdev);
2335         skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2336         if (netdev->features & NETIF_F_RXHASH)
2337                 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2338
2339         skb->csum_level = rxcp->tunneled;
2340         skb_mark_napi_id(skb, napi);
2341
2342         if (rxcp->vlanf)
2343                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2344
2345         netif_receive_skb(skb);
2346 }
2347
2348 /* Process the RX completion indicated by rxcp when GRO is enabled */
2349 static void be_rx_compl_process_gro(struct be_rx_obj *rxo,
2350                                     struct napi_struct *napi,
2351                                     struct be_rx_compl_info *rxcp)
2352 {
2353         struct be_adapter *adapter = rxo->adapter;
2354         struct be_rx_page_info *page_info;
2355         struct sk_buff *skb = NULL;
2356         u16 remaining, curr_frag_len;
2357         u16 i, j;
2358
2359         skb = napi_get_frags(napi);
2360         if (!skb) {
2361                 be_rx_compl_discard(rxo, rxcp);
2362                 return;
2363         }
2364
2365         remaining = rxcp->pkt_size;
2366         for (i = 0, j = -1; i < rxcp->num_rcvd; i++) {
2367                 page_info = get_rx_page_info(rxo);
2368
2369                 curr_frag_len = min(remaining, rx_frag_size);
2370
2371                 /* Coalesce all frags from the same physical page in one slot */
2372                 if (i == 0 || page_info->page_offset == 0) {
2373                         /* First frag or Fresh page */
2374                         j++;
2375                         skb_frag_set_page(skb, j, page_info->page);
2376                         skb_shinfo(skb)->frags[j].page_offset =
2377                                                         page_info->page_offset;
2378                         skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2379                 } else {
2380                         put_page(page_info->page);
2381                 }
2382                 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2383                 skb->truesize += rx_frag_size;
2384                 remaining -= curr_frag_len;
2385                 memset(page_info, 0, sizeof(*page_info));
2386         }
2387         BUG_ON(j > MAX_SKB_FRAGS);
2388
2389         skb_shinfo(skb)->nr_frags = j + 1;
2390         skb->len = rxcp->pkt_size;
2391         skb->data_len = rxcp->pkt_size;
2392         skb->ip_summed = CHECKSUM_UNNECESSARY;
2393         skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2394         if (adapter->netdev->features & NETIF_F_RXHASH)
2395                 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2396
2397         skb->csum_level = rxcp->tunneled;
2398
2399         if (rxcp->vlanf)
2400                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2401
2402         napi_gro_frags(napi);
2403 }
2404
2405 static void be_parse_rx_compl_v1(struct be_eth_rx_compl *compl,
2406                                  struct be_rx_compl_info *rxcp)
2407 {
2408         rxcp->pkt_size = GET_RX_COMPL_V1_BITS(pktsize, compl);
2409         rxcp->vlanf = GET_RX_COMPL_V1_BITS(vtp, compl);
2410         rxcp->err = GET_RX_COMPL_V1_BITS(err, compl);
2411         rxcp->tcpf = GET_RX_COMPL_V1_BITS(tcpf, compl);
2412         rxcp->udpf = GET_RX_COMPL_V1_BITS(udpf, compl);
2413         rxcp->ip_csum = GET_RX_COMPL_V1_BITS(ipcksm, compl);
2414         rxcp->l4_csum = GET_RX_COMPL_V1_BITS(l4_cksm, compl);
2415         rxcp->ipv6 = GET_RX_COMPL_V1_BITS(ip_version, compl);
2416         rxcp->num_rcvd = GET_RX_COMPL_V1_BITS(numfrags, compl);
2417         rxcp->pkt_type = GET_RX_COMPL_V1_BITS(cast_enc, compl);
2418         rxcp->rss_hash = GET_RX_COMPL_V1_BITS(rsshash, compl);
2419         if (rxcp->vlanf) {
2420                 rxcp->qnq = GET_RX_COMPL_V1_BITS(qnq, compl);
2421                 rxcp->vlan_tag = GET_RX_COMPL_V1_BITS(vlan_tag, compl);
2422         }
2423         rxcp->port = GET_RX_COMPL_V1_BITS(port, compl);
2424         rxcp->tunneled =
2425                 GET_RX_COMPL_V1_BITS(tunneled, compl);
2426 }
2427
2428 static void be_parse_rx_compl_v0(struct be_eth_rx_compl *compl,
2429                                  struct be_rx_compl_info *rxcp)
2430 {
2431         rxcp->pkt_size = GET_RX_COMPL_V0_BITS(pktsize, compl);
2432         rxcp->vlanf = GET_RX_COMPL_V0_BITS(vtp, compl);
2433         rxcp->err = GET_RX_COMPL_V0_BITS(err, compl);
2434         rxcp->tcpf = GET_RX_COMPL_V0_BITS(tcpf, compl);
2435         rxcp->udpf = GET_RX_COMPL_V0_BITS(udpf, compl);
2436         rxcp->ip_csum = GET_RX_COMPL_V0_BITS(ipcksm, compl);
2437         rxcp->l4_csum = GET_RX_COMPL_V0_BITS(l4_cksm, compl);
2438         rxcp->ipv6 = GET_RX_COMPL_V0_BITS(ip_version, compl);
2439         rxcp->num_rcvd = GET_RX_COMPL_V0_BITS(numfrags, compl);
2440         rxcp->pkt_type = GET_RX_COMPL_V0_BITS(cast_enc, compl);
2441         rxcp->rss_hash = GET_RX_COMPL_V0_BITS(rsshash, compl);
2442         if (rxcp->vlanf) {
2443                 rxcp->qnq = GET_RX_COMPL_V0_BITS(qnq, compl);
2444                 rxcp->vlan_tag = GET_RX_COMPL_V0_BITS(vlan_tag, compl);
2445         }
2446         rxcp->port = GET_RX_COMPL_V0_BITS(port, compl);
2447         rxcp->ip_frag = GET_RX_COMPL_V0_BITS(ip_frag, compl);
2448 }
2449
2450 static struct be_rx_compl_info *be_rx_compl_get(struct be_rx_obj *rxo)
2451 {
2452         struct be_eth_rx_compl *compl = queue_tail_node(&rxo->cq);
2453         struct be_rx_compl_info *rxcp = &rxo->rxcp;
2454         struct be_adapter *adapter = rxo->adapter;
2455
2456         /* For checking the valid bit it is Ok to use either definition as the
2457          * valid bit is at the same position in both v0 and v1 Rx compl */
2458         if (compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] == 0)
2459                 return NULL;
2460
2461         rmb();
2462         be_dws_le_to_cpu(compl, sizeof(*compl));
2463
2464         if (adapter->be3_native)
2465                 be_parse_rx_compl_v1(compl, rxcp);
2466         else
2467                 be_parse_rx_compl_v0(compl, rxcp);
2468
2469         if (rxcp->ip_frag)
2470                 rxcp->l4_csum = 0;
2471
2472         if (rxcp->vlanf) {
2473                 /* In QNQ modes, if qnq bit is not set, then the packet was
2474                  * tagged only with the transparent outer vlan-tag and must
2475                  * not be treated as a vlan packet by host
2476                  */
2477                 if (be_is_qnq_mode(adapter) && !rxcp->qnq)
2478                         rxcp->vlanf = 0;
2479
2480                 if (!lancer_chip(adapter))
2481                         rxcp->vlan_tag = swab16(rxcp->vlan_tag);
2482
2483                 if (adapter->pvid == (rxcp->vlan_tag & VLAN_VID_MASK) &&
2484                     !test_bit(rxcp->vlan_tag, adapter->vids))
2485                         rxcp->vlanf = 0;
2486         }
2487
2488         /* As the compl has been parsed, reset it; we wont touch it again */
2489         compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] = 0;
2490
2491         queue_tail_inc(&rxo->cq);
2492         return rxcp;
2493 }
2494
2495 static inline struct page *be_alloc_pages(u32 size, gfp_t gfp)
2496 {
2497         u32 order = get_order(size);
2498
2499         if (order > 0)
2500                 gfp |= __GFP_COMP;
2501         return  alloc_pages(gfp, order);
2502 }
2503
2504 /*
2505  * Allocate a page, split it to fragments of size rx_frag_size and post as
2506  * receive buffers to BE
2507  */
2508 static void be_post_rx_frags(struct be_rx_obj *rxo, gfp_t gfp, u32 frags_needed)
2509 {
2510         struct be_adapter *adapter = rxo->adapter;
2511         struct be_rx_page_info *page_info = NULL, *prev_page_info = NULL;
2512         struct be_queue_info *rxq = &rxo->q;
2513         struct page *pagep = NULL;
2514         struct device *dev = &adapter->pdev->dev;
2515         struct be_eth_rx_d *rxd;
2516         u64 page_dmaaddr = 0, frag_dmaaddr;
2517         u32 posted, page_offset = 0, notify = 0;
2518
2519         page_info = &rxo->page_info_tbl[rxq->head];
2520         for (posted = 0; posted < frags_needed && !page_info->page; posted++) {
2521                 if (!pagep) {
2522                         pagep = be_alloc_pages(adapter->big_page_size, gfp);
2523                         if (unlikely(!pagep)) {
2524                                 rx_stats(rxo)->rx_post_fail++;
2525                                 break;
2526                         }
2527                         page_dmaaddr = dma_map_page(dev, pagep, 0,
2528                                                     adapter->big_page_size,
2529                                                     DMA_FROM_DEVICE);
2530                         if (dma_mapping_error(dev, page_dmaaddr)) {
2531                                 put_page(pagep);
2532                                 pagep = NULL;
2533                                 adapter->drv_stats.dma_map_errors++;
2534                                 break;
2535                         }
2536                         page_offset = 0;
2537                 } else {
2538                         get_page(pagep);
2539                         page_offset += rx_frag_size;
2540                 }
2541                 page_info->page_offset = page_offset;
2542                 page_info->page = pagep;
2543
2544                 rxd = queue_head_node(rxq);
2545                 frag_dmaaddr = page_dmaaddr + page_info->page_offset;
2546                 rxd->fragpa_lo = cpu_to_le32(frag_dmaaddr & 0xFFFFFFFF);
2547                 rxd->fragpa_hi = cpu_to_le32(upper_32_bits(frag_dmaaddr));
2548
2549                 /* Any space left in the current big page for another frag? */
2550                 if ((page_offset + rx_frag_size + rx_frag_size) >
2551                                         adapter->big_page_size) {
2552                         pagep = NULL;
2553                         page_info->last_frag = true;
2554                         dma_unmap_addr_set(page_info, bus, page_dmaaddr);
2555                 } else {
2556                         dma_unmap_addr_set(page_info, bus, frag_dmaaddr);
2557                 }
2558
2559                 prev_page_info = page_info;
2560                 queue_head_inc(rxq);
2561                 page_info = &rxo->page_info_tbl[rxq->head];
2562         }
2563
2564         /* Mark the last frag of a page when we break out of the above loop
2565          * with no more slots available in the RXQ
2566          */
2567         if (pagep) {
2568                 prev_page_info->last_frag = true;
2569                 dma_unmap_addr_set(prev_page_info, bus, page_dmaaddr);
2570         }
2571
2572         if (posted) {
2573                 atomic_add(posted, &rxq->used);
2574                 if (rxo->rx_post_starved)
2575                         rxo->rx_post_starved = false;
2576                 do {
2577                         notify = min(MAX_NUM_POST_ERX_DB, posted);
2578                         be_rxq_notify(adapter, rxq->id, notify);
2579                         posted -= notify;
2580                 } while (posted);
2581         } else if (atomic_read(&rxq->used) == 0) {
2582                 /* Let be_worker replenish when memory is available */
2583                 rxo->rx_post_starved = true;
2584         }
2585 }
2586
2587 static struct be_tx_compl_info *be_tx_compl_get(struct be_tx_obj *txo)
2588 {
2589         struct be_queue_info *tx_cq = &txo->cq;
2590         struct be_tx_compl_info *txcp = &txo->txcp;
2591         struct be_eth_tx_compl *compl = queue_tail_node(tx_cq);
2592
2593         if (compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] == 0)
2594                 return NULL;
2595
2596         /* Ensure load ordering of valid bit dword and other dwords below */
2597         rmb();
2598         be_dws_le_to_cpu(compl, sizeof(*compl));
2599
2600         txcp->status = GET_TX_COMPL_BITS(status, compl);
2601         txcp->end_index = GET_TX_COMPL_BITS(wrb_index, compl);
2602
2603         compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] = 0;
2604         queue_tail_inc(tx_cq);
2605         return txcp;
2606 }
2607
2608 static u16 be_tx_compl_process(struct be_adapter *adapter,
2609                                struct be_tx_obj *txo, u16 last_index)
2610 {
2611         struct sk_buff **sent_skbs = txo->sent_skb_list;
2612         struct be_queue_info *txq = &txo->q;
2613         struct sk_buff *skb = NULL;
2614         bool unmap_skb_hdr = false;
2615         struct be_eth_wrb *wrb;
2616         u16 num_wrbs = 0;
2617         u32 frag_index;
2618
2619         do {
2620                 if (sent_skbs[txq->tail]) {
2621                         /* Free skb from prev req */
2622                         if (skb)
2623                                 dev_consume_skb_any(skb);
2624                         skb = sent_skbs[txq->tail];
2625                         sent_skbs[txq->tail] = NULL;
2626                         queue_tail_inc(txq);  /* skip hdr wrb */
2627                         num_wrbs++;
2628                         unmap_skb_hdr = true;
2629                 }
2630                 wrb = queue_tail_node(txq);
2631                 frag_index = txq->tail;
2632                 unmap_tx_frag(&adapter->pdev->dev, wrb,
2633                               (unmap_skb_hdr && skb_headlen(skb)));
2634                 unmap_skb_hdr = false;
2635                 queue_tail_inc(txq);
2636                 num_wrbs++;
2637         } while (frag_index != last_index);
2638         dev_consume_skb_any(skb);
2639
2640         return num_wrbs;
2641 }
2642
2643 /* Return the number of events in the event queue */
2644 static inline int events_get(struct be_eq_obj *eqo)
2645 {
2646         struct be_eq_entry *eqe;
2647         int num = 0;
2648
2649         do {
2650                 eqe = queue_tail_node(&eqo->q);
2651                 if (eqe->evt == 0)
2652                         break;
2653
2654                 rmb();
2655                 eqe->evt = 0;
2656                 num++;
2657                 queue_tail_inc(&eqo->q);
2658         } while (true);
2659
2660         return num;
2661 }
2662
2663 /* Leaves the EQ is disarmed state */
2664 static void be_eq_clean(struct be_eq_obj *eqo)
2665 {
2666         int num = events_get(eqo);
2667
2668         be_eq_notify(eqo->adapter, eqo->q.id, false, true, num, 0);
2669 }
2670
2671 /* Free posted rx buffers that were not used */
2672 static void be_rxq_clean(struct be_rx_obj *rxo)
2673 {
2674         struct be_queue_info *rxq = &rxo->q;
2675         struct be_rx_page_info *page_info;
2676
2677         while (atomic_read(&rxq->used) > 0) {
2678                 page_info = get_rx_page_info(rxo);
2679                 put_page(page_info->page);
2680                 memset(page_info, 0, sizeof(*page_info));
2681         }
2682         BUG_ON(atomic_read(&rxq->used));
2683         rxq->tail = 0;
2684         rxq->head = 0;
2685 }
2686
2687 static void be_rx_cq_clean(struct be_rx_obj *rxo)
2688 {
2689         struct be_queue_info *rx_cq = &rxo->cq;
2690         struct be_rx_compl_info *rxcp;
2691         struct be_adapter *adapter = rxo->adapter;
2692         int flush_wait = 0;
2693
2694         /* Consume pending rx completions.
2695          * Wait for the flush completion (identified by zero num_rcvd)
2696          * to arrive. Notify CQ even when there are no more CQ entries
2697          * for HW to flush partially coalesced CQ entries.
2698          * In Lancer, there is no need to wait for flush compl.
2699          */
2700         for (;;) {
2701                 rxcp = be_rx_compl_get(rxo);
2702                 if (!rxcp) {
2703                         if (lancer_chip(adapter))
2704                                 break;
2705
2706                         if (flush_wait++ > 50 ||
2707                             be_check_error(adapter,
2708                                            BE_ERROR_HW)) {
2709                                 dev_warn(&adapter->pdev->dev,
2710                                          "did not receive flush compl\n");
2711                                 break;
2712                         }
2713                         be_cq_notify(adapter, rx_cq->id, true, 0);
2714                         mdelay(1);
2715                 } else {
2716                         be_rx_compl_discard(rxo, rxcp);
2717                         be_cq_notify(adapter, rx_cq->id, false, 1);
2718                         if (rxcp->num_rcvd == 0)
2719                                 break;
2720                 }
2721         }
2722
2723         /* After cleanup, leave the CQ in unarmed state */
2724         be_cq_notify(adapter, rx_cq->id, false, 0);
2725 }
2726
2727 static void be_tx_compl_clean(struct be_adapter *adapter)
2728 {
2729         struct device *dev = &adapter->pdev->dev;
2730         u16 cmpl = 0, timeo = 0, num_wrbs = 0;
2731         struct be_tx_compl_info *txcp;
2732         struct be_queue_info *txq;
2733         u32 end_idx, notified_idx;
2734         struct be_tx_obj *txo;
2735         int i, pending_txqs;
2736
2737         /* Stop polling for compls when HW has been silent for 10ms */
2738         do {
2739                 pending_txqs = adapter->num_tx_qs;
2740
2741                 for_all_tx_queues(adapter, txo, i) {
2742                         cmpl = 0;
2743                         num_wrbs = 0;
2744                         txq = &txo->q;
2745                         while ((txcp = be_tx_compl_get(txo))) {
2746                                 num_wrbs +=
2747                                         be_tx_compl_process(adapter, txo,
2748                                                             txcp->end_index);
2749                                 cmpl++;
2750                         }
2751                         if (cmpl) {
2752                                 be_cq_notify(adapter, txo->cq.id, false, cmpl);
2753                                 atomic_sub(num_wrbs, &txq->used);
2754                                 timeo = 0;
2755                         }
2756                         if (!be_is_tx_compl_pending(txo))
2757                                 pending_txqs--;
2758                 }
2759
2760                 if (pending_txqs == 0 || ++timeo > 10 ||
2761                     be_check_error(adapter, BE_ERROR_HW))
2762                         break;
2763
2764                 mdelay(1);
2765         } while (true);
2766
2767         /* Free enqueued TX that was never notified to HW */
2768         for_all_tx_queues(adapter, txo, i) {
2769                 txq = &txo->q;
2770
2771                 if (atomic_read(&txq->used)) {
2772                         dev_info(dev, "txq%d: cleaning %d pending tx-wrbs\n",
2773                                  i, atomic_read(&txq->used));
2774                         notified_idx = txq->tail;
2775                         end_idx = txq->tail;
2776                         index_adv(&end_idx, atomic_read(&txq->used) - 1,
2777                                   txq->len);
2778                         /* Use the tx-compl process logic to handle requests
2779                          * that were not sent to the HW.
2780                          */
2781                         num_wrbs = be_tx_compl_process(adapter, txo, end_idx);
2782                         atomic_sub(num_wrbs, &txq->used);
2783                         BUG_ON(atomic_read(&txq->used));
2784                         txo->pend_wrb_cnt = 0;
2785                         /* Since hw was never notified of these requests,
2786                          * reset TXQ indices
2787                          */
2788                         txq->head = notified_idx;
2789                         txq->tail = notified_idx;
2790                 }
2791         }
2792 }
2793
2794 static void be_evt_queues_destroy(struct be_adapter *adapter)
2795 {
2796         struct be_eq_obj *eqo;
2797         int i;
2798
2799         for_all_evt_queues(adapter, eqo, i) {
2800                 if (eqo->q.created) {
2801                         be_eq_clean(eqo);
2802                         be_cmd_q_destroy(adapter, &eqo->q, QTYPE_EQ);
2803                         netif_napi_del(&eqo->napi);
2804                         free_cpumask_var(eqo->affinity_mask);
2805                 }
2806                 be_queue_free(adapter, &eqo->q);
2807         }
2808 }
2809
2810 static int be_evt_queues_create(struct be_adapter *adapter)
2811 {
2812         struct be_queue_info *eq;
2813         struct be_eq_obj *eqo;
2814         struct be_aic_obj *aic;
2815         int i, rc;
2816
2817         /* need enough EQs to service both RX and TX queues */
2818         adapter->num_evt_qs = min_t(u16, num_irqs(adapter),
2819                                     max(adapter->cfg_num_rx_irqs,
2820                                         adapter->cfg_num_tx_irqs));
2821
2822         for_all_evt_queues(adapter, eqo, i) {
2823                 int numa_node = dev_to_node(&adapter->pdev->dev);
2824
2825                 aic = &adapter->aic_obj[i];
2826                 eqo->adapter = adapter;
2827                 eqo->idx = i;
2828                 aic->max_eqd = BE_MAX_EQD;
2829                 aic->enable = true;
2830
2831                 eq = &eqo->q;
2832                 rc = be_queue_alloc(adapter, eq, EVNT_Q_LEN,
2833                                     sizeof(struct be_eq_entry));
2834                 if (rc)
2835                         return rc;
2836
2837                 rc = be_cmd_eq_create(adapter, eqo);
2838                 if (rc)
2839                         return rc;
2840
2841                 if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL))
2842                         return -ENOMEM;
2843                 cpumask_set_cpu(cpumask_local_spread(i, numa_node),
2844                                 eqo->affinity_mask);
2845                 netif_napi_add(adapter->netdev, &eqo->napi, be_poll,
2846                                BE_NAPI_WEIGHT);
2847         }
2848         return 0;
2849 }
2850
2851 static void be_mcc_queues_destroy(struct be_adapter *adapter)
2852 {
2853         struct be_queue_info *q;
2854
2855         q = &adapter->mcc_obj.q;
2856         if (q->created)
2857                 be_cmd_q_destroy(adapter, q, QTYPE_MCCQ);
2858         be_queue_free(adapter, q);
2859
2860         q = &adapter->mcc_obj.cq;
2861         if (q->created)
2862                 be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2863         be_queue_free(adapter, q);
2864 }
2865
2866 /* Must be called only after TX qs are created as MCC shares TX EQ */
2867 static int be_mcc_queues_create(struct be_adapter *adapter)
2868 {
2869         struct be_queue_info *q, *cq;
2870
2871         cq = &adapter->mcc_obj.cq;
2872         if (be_queue_alloc(adapter, cq, MCC_CQ_LEN,
2873                            sizeof(struct be_mcc_compl)))
2874                 goto err;
2875
2876         /* Use the default EQ for MCC completions */
2877         if (be_cmd_cq_create(adapter, cq, &mcc_eqo(adapter)->q, true, 0))
2878                 goto mcc_cq_free;
2879
2880         q = &adapter->mcc_obj.q;
2881         if (be_queue_alloc(adapter, q, MCC_Q_LEN, sizeof(struct be_mcc_wrb)))
2882                 goto mcc_cq_destroy;
2883
2884         if (be_cmd_mccq_create(adapter, q, cq))
2885                 goto mcc_q_free;
2886
2887         return 0;
2888
2889 mcc_q_free:
2890         be_queue_free(adapter, q);
2891 mcc_cq_destroy:
2892         be_cmd_q_destroy(adapter, cq, QTYPE_CQ);
2893 mcc_cq_free:
2894         be_queue_free(adapter, cq);
2895 err:
2896         return -1;
2897 }
2898
2899 static void be_tx_queues_destroy(struct be_adapter *adapter)
2900 {
2901         struct be_queue_info *q;
2902         struct be_tx_obj *txo;
2903         u8 i;
2904
2905         for_all_tx_queues(adapter, txo, i) {
2906                 q = &txo->q;
2907                 if (q->created)
2908                         be_cmd_q_destroy(adapter, q, QTYPE_TXQ);
2909                 be_queue_free(adapter, q);
2910
2911                 q = &txo->cq;
2912                 if (q->created)
2913                         be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2914                 be_queue_free(adapter, q);
2915         }
2916 }
2917
2918 static int be_tx_qs_create(struct be_adapter *adapter)
2919 {
2920         struct be_queue_info *cq;
2921         struct be_tx_obj *txo;
2922         struct be_eq_obj *eqo;
2923         int status, i;
2924
2925         adapter->num_tx_qs = min(adapter->num_evt_qs, adapter->cfg_num_tx_irqs);
2926
2927         for_all_tx_queues(adapter, txo, i) {
2928                 cq = &txo->cq;
2929                 status = be_queue_alloc(adapter, cq, TX_CQ_LEN,
2930                                         sizeof(struct be_eth_tx_compl));
2931                 if (status)
2932                         return status;
2933
2934                 u64_stats_init(&txo->stats.sync);
2935                 u64_stats_init(&txo->stats.sync_compl);
2936
2937                 /* If num_evt_qs is less than num_tx_qs, then more than
2938                  * one txq share an eq
2939                  */
2940                 eqo = &adapter->eq_obj[i % adapter->num_evt_qs];
2941                 status = be_cmd_cq_create(adapter, cq, &eqo->q, false, 3);
2942                 if (status)
2943                         return status;
2944
2945                 status = be_queue_alloc(adapter, &txo->q, TX_Q_LEN,
2946                                         sizeof(struct be_eth_wrb));
2947                 if (status)
2948                         return status;
2949
2950                 status = be_cmd_txq_create(adapter, txo);
2951                 if (status)
2952                         return status;
2953
2954                 netif_set_xps_queue(adapter->netdev, eqo->affinity_mask,
2955                                     eqo->idx);
2956         }
2957
2958         dev_info(&adapter->pdev->dev, "created %d TX queue(s)\n",
2959                  adapter->num_tx_qs);
2960         return 0;
2961 }
2962
2963 static void be_rx_cqs_destroy(struct be_adapter *adapter)
2964 {
2965         struct be_queue_info *q;
2966         struct be_rx_obj *rxo;
2967         int i;
2968
2969         for_all_rx_queues(adapter, rxo, i) {
2970                 q = &rxo->cq;
2971                 if (q->created)
2972                         be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2973                 be_queue_free(adapter, q);
2974         }
2975 }
2976
2977 static int be_rx_cqs_create(struct be_adapter *adapter)
2978 {
2979         struct be_queue_info *eq, *cq;
2980         struct be_rx_obj *rxo;
2981         int rc, i;
2982
2983         adapter->num_rss_qs =
2984                         min(adapter->num_evt_qs, adapter->cfg_num_rx_irqs);
2985
2986         /* We'll use RSS only if atleast 2 RSS rings are supported. */
2987         if (adapter->num_rss_qs < 2)
2988                 adapter->num_rss_qs = 0;
2989
2990         adapter->num_rx_qs = adapter->num_rss_qs + adapter->need_def_rxq;
2991
2992         /* When the interface is not capable of RSS rings (and there is no
2993          * need to create a default RXQ) we'll still need one RXQ
2994          */
2995         if (adapter->num_rx_qs == 0)
2996                 adapter->num_rx_qs = 1;
2997
2998         adapter->big_page_size = (1 << get_order(rx_frag_size)) * PAGE_SIZE;
2999         for_all_rx_queues(adapter, rxo, i) {
3000                 rxo->adapter = adapter;
3001                 cq = &rxo->cq;
3002                 rc = be_queue_alloc(adapter, cq, RX_CQ_LEN,
3003                                     sizeof(struct be_eth_rx_compl));
3004                 if (rc)
3005                         return rc;
3006
3007                 u64_stats_init(&rxo->stats.sync);
3008                 eq = &adapter->eq_obj[i % adapter->num_evt_qs].q;
3009                 rc = be_cmd_cq_create(adapter, cq, eq, false, 3);
3010                 if (rc)
3011                         return rc;
3012         }
3013
3014         dev_info(&adapter->pdev->dev,
3015                  "created %d RX queue(s)\n", adapter->num_rx_qs);
3016         return 0;
3017 }
3018
3019 static irqreturn_t be_intx(int irq, void *dev)
3020 {
3021         struct be_eq_obj *eqo = dev;
3022         struct be_adapter *adapter = eqo->adapter;
3023         int num_evts = 0;
3024
3025         /* IRQ is not expected when NAPI is scheduled as the EQ
3026          * will not be armed.
3027          * But, this can happen on Lancer INTx where it takes
3028          * a while to de-assert INTx or in BE2 where occasionaly
3029          * an interrupt may be raised even when EQ is unarmed.
3030          * If NAPI is already scheduled, then counting & notifying
3031          * events will orphan them.
3032          */
3033         if (napi_schedule_prep(&eqo->napi)) {
3034                 num_evts = events_get(eqo);
3035                 __napi_schedule(&eqo->napi);
3036                 if (num_evts)
3037                         eqo->spurious_intr = 0;
3038         }
3039         be_eq_notify(adapter, eqo->q.id, false, true, num_evts, 0);
3040
3041         /* Return IRQ_HANDLED only for the the first spurious intr
3042          * after a valid intr to stop the kernel from branding
3043          * this irq as a bad one!
3044          */
3045         if (num_evts || eqo->spurious_intr++ == 0)
3046                 return IRQ_HANDLED;
3047         else
3048                 return IRQ_NONE;
3049 }
3050
3051 static irqreturn_t be_msix(int irq, void *dev)
3052 {
3053         struct be_eq_obj *eqo = dev;
3054
3055         be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
3056         napi_schedule(&eqo->napi);
3057         return IRQ_HANDLED;
3058 }
3059
3060 static inline bool do_gro(struct be_rx_compl_info *rxcp)
3061 {
3062         return (rxcp->tcpf && !rxcp->err && rxcp->l4_csum) ? true : false;
3063 }
3064
3065 static int be_process_rx(struct be_rx_obj *rxo, struct napi_struct *napi,
3066                          int budget)
3067 {
3068         struct be_adapter *adapter = rxo->adapter;
3069         struct be_queue_info *rx_cq = &rxo->cq;
3070         struct be_rx_compl_info *rxcp;
3071         u32 work_done;
3072         u32 frags_consumed = 0;
3073
3074         for (work_done = 0; work_done < budget; work_done++) {
3075                 rxcp = be_rx_compl_get(rxo);
3076                 if (!rxcp)
3077                         break;
3078
3079                 /* Is it a flush compl that has no data */
3080                 if (unlikely(rxcp->num_rcvd == 0))
3081                         goto loop_continue;
3082
3083                 /* Discard compl with partial DMA Lancer B0 */
3084                 if (unlikely(!rxcp->pkt_size)) {
3085                         be_rx_compl_discard(rxo, rxcp);
3086                         goto loop_continue;
3087                 }
3088
3089                 /* On BE drop pkts that arrive due to imperfect filtering in
3090                  * promiscuous mode on some skews
3091                  */
3092                 if (unlikely(rxcp->port != adapter->port_num &&
3093                              !lancer_chip(adapter))) {
3094                         be_rx_compl_discard(rxo, rxcp);
3095                         goto loop_continue;
3096                 }
3097
3098                 if (do_gro(rxcp))
3099                         be_rx_compl_process_gro(rxo, napi, rxcp);
3100                 else
3101                         be_rx_compl_process(rxo, napi, rxcp);
3102
3103 loop_continue:
3104                 frags_consumed += rxcp->num_rcvd;
3105                 be_rx_stats_update(rxo, rxcp);
3106         }
3107
3108         if (work_done) {
3109                 be_cq_notify(adapter, rx_cq->id, true, work_done);
3110
3111                 /* When an rx-obj gets into post_starved state, just
3112                  * let be_worker do the posting.
3113                  */
3114                 if (atomic_read(&rxo->q.used) < RX_FRAGS_REFILL_WM &&
3115                     !rxo->rx_post_starved)
3116                         be_post_rx_frags(rxo, GFP_ATOMIC,
3117                                          max_t(u32, MAX_RX_POST,
3118                                                frags_consumed));
3119         }
3120
3121         return work_done;
3122 }
3123
3124 static inline void be_update_tx_err(struct be_tx_obj *txo, u8 status)
3125 {
3126         switch (status) {
3127         case BE_TX_COMP_HDR_PARSE_ERR:
3128                 tx_stats(txo)->tx_hdr_parse_err++;
3129                 break;
3130         case BE_TX_COMP_NDMA_ERR:
3131                 tx_stats(txo)->tx_dma_err++;
3132                 break;
3133         case BE_TX_COMP_ACL_ERR:
3134                 tx_stats(txo)->tx_spoof_check_err++;
3135                 break;
3136         }
3137 }
3138
3139 static inline void lancer_update_tx_err(struct be_tx_obj *txo, u8 status)
3140 {
3141         switch (status) {
3142         case LANCER_TX_COMP_LSO_ERR:
3143                 tx_stats(txo)->tx_tso_err++;
3144                 break;
3145         case LANCER_TX_COMP_HSW_DROP_MAC_ERR:
3146         case LANCER_TX_COMP_HSW_DROP_VLAN_ERR:
3147                 tx_stats(txo)->tx_spoof_check_err++;
3148                 break;
3149         case LANCER_TX_COMP_QINQ_ERR:
3150                 tx_stats(txo)->tx_qinq_err++;
3151                 break;
3152         case LANCER_TX_COMP_PARITY_ERR:
3153                 tx_stats(txo)->tx_internal_parity_err++;
3154                 break;
3155         case LANCER_TX_COMP_DMA_ERR:
3156                 tx_stats(txo)->tx_dma_err++;
3157                 break;
3158         }
3159 }
3160
3161 static void be_process_tx(struct be_adapter *adapter, struct be_tx_obj *txo,
3162                           int idx)
3163 {
3164         int num_wrbs = 0, work_done = 0;
3165         struct be_tx_compl_info *txcp;
3166
3167         while ((txcp = be_tx_compl_get(txo))) {
3168                 num_wrbs += be_tx_compl_process(adapter, txo, txcp->end_index);
3169                 work_done++;
3170
3171                 if (txcp->status) {
3172                         if (lancer_chip(adapter))
3173                                 lancer_update_tx_err(txo, txcp->status);
3174                         else
3175                                 be_update_tx_err(txo, txcp->status);
3176                 }
3177         }
3178
3179         if (work_done) {
3180                 be_cq_notify(adapter, txo->cq.id, true, work_done);
3181                 atomic_sub(num_wrbs, &txo->q.used);
3182
3183                 /* As Tx wrbs have been freed up, wake up netdev queue
3184                  * if it was stopped due to lack of tx wrbs.  */
3185                 if (__netif_subqueue_stopped(adapter->netdev, idx) &&
3186                     be_can_txq_wake(txo)) {
3187                         netif_wake_subqueue(adapter->netdev, idx);
3188                 }
3189
3190                 u64_stats_update_begin(&tx_stats(txo)->sync_compl);
3191                 tx_stats(txo)->tx_compl += work_done;
3192                 u64_stats_update_end(&tx_stats(txo)->sync_compl);
3193         }
3194 }
3195
3196 int be_poll(struct napi_struct *napi, int budget)
3197 {
3198         struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3199         struct be_adapter *adapter = eqo->adapter;
3200         int max_work = 0, work, i, num_evts;
3201         struct be_rx_obj *rxo;
3202         struct be_tx_obj *txo;
3203         u32 mult_enc = 0;
3204
3205         num_evts = events_get(eqo);
3206
3207         for_all_tx_queues_on_eq(adapter, eqo, txo, i)
3208                 be_process_tx(adapter, txo, i);
3209
3210         /* This loop will iterate twice for EQ0 in which
3211          * completions of the last RXQ (default one) are also processed
3212          * For other EQs the loop iterates only once
3213          */
3214         for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3215                 work = be_process_rx(rxo, napi, budget);
3216                 max_work = max(work, max_work);
3217         }
3218
3219         if (is_mcc_eqo(eqo))
3220                 be_process_mcc(adapter);
3221
3222         if (max_work < budget) {
3223                 napi_complete_done(napi, max_work);
3224
3225                 /* Skyhawk EQ_DB has a provision to set the rearm to interrupt
3226                  * delay via a delay multiplier encoding value
3227                  */
3228                 if (skyhawk_chip(adapter))
3229                         mult_enc = be_get_eq_delay_mult_enc(eqo);
3230
3231                 be_eq_notify(adapter, eqo->q.id, true, false, num_evts,
3232                              mult_enc);
3233         } else {
3234                 /* As we'll continue in polling mode, count and clear events */
3235                 be_eq_notify(adapter, eqo->q.id, false, false, num_evts, 0);
3236         }
3237         return max_work;
3238 }
3239
3240 void be_detect_error(struct be_adapter *adapter)
3241 {
3242         u32 ue_lo = 0, ue_hi = 0, ue_lo_mask = 0, ue_hi_mask = 0;
3243         u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0;
3244         struct device *dev = &adapter->pdev->dev;
3245         u16 val;
3246         u32 i;
3247
3248         if (be_check_error(adapter, BE_ERROR_HW))
3249                 return;
3250
3251         if (lancer_chip(adapter)) {
3252                 sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET);
3253                 if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
3254                         be_set_error(adapter, BE_ERROR_UE);
3255                         sliport_err1 = ioread32(adapter->db +
3256                                                 SLIPORT_ERROR1_OFFSET);
3257                         sliport_err2 = ioread32(adapter->db +
3258                                                 SLIPORT_ERROR2_OFFSET);
3259                         /* Do not log error messages if its a FW reset */
3260                         if (sliport_err1 == SLIPORT_ERROR_FW_RESET1 &&
3261                             sliport_err2 == SLIPORT_ERROR_FW_RESET2) {
3262                                 dev_info(dev, "Firmware update in progress\n");
3263                         } else {
3264                                 dev_err(dev, "Error detected in the card\n");
3265                                 dev_err(dev, "ERR: sliport status 0x%x\n",
3266                                         sliport_status);
3267                                 dev_err(dev, "ERR: sliport error1 0x%x\n",
3268                                         sliport_err1);
3269                                 dev_err(dev, "ERR: sliport error2 0x%x\n",
3270                                         sliport_err2);
3271                         }
3272                 }
3273         } else {
3274                 ue_lo = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_LOW);
3275                 ue_hi = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_HIGH);
3276                 ue_lo_mask = ioread32(adapter->pcicfg +
3277                                       PCICFG_UE_STATUS_LOW_MASK);
3278                 ue_hi_mask = ioread32(adapter->pcicfg +
3279                                       PCICFG_UE_STATUS_HI_MASK);
3280
3281                 ue_lo = (ue_lo & ~ue_lo_mask);
3282                 ue_hi = (ue_hi & ~ue_hi_mask);
3283
3284                 if (ue_lo || ue_hi) {
3285                         /* On certain platforms BE3 hardware can indicate
3286                          * spurious UEs. In case of a UE in the chip,
3287                          * the POST register correctly reports either a
3288                          * FAT_LOG_START state (FW is currently dumping
3289                          * FAT log data) or a ARMFW_UE state. Check for the
3290                          * above states to ascertain if the UE is valid or not.
3291                          */
3292                         if (BE3_chip(adapter)) {
3293                                 val = be_POST_stage_get(adapter);
3294                                 if ((val & POST_STAGE_FAT_LOG_START)
3295                                      != POST_STAGE_FAT_LOG_START &&
3296                                     (val & POST_STAGE_ARMFW_UE)
3297                                      != POST_STAGE_ARMFW_UE &&
3298                                     (val & POST_STAGE_RECOVERABLE_ERR)
3299                                      != POST_STAGE_RECOVERABLE_ERR)
3300                                         return;
3301                         }
3302
3303                         dev_err(dev, "Error detected in the adapter");
3304                         be_set_error(adapter, BE_ERROR_UE);
3305
3306                         for (i = 0; ue_lo; ue_lo >>= 1, i++) {
3307                                 if (ue_lo & 1)
3308                                         dev_err(dev, "UE: %s bit set\n",
3309                                                 ue_status_low_desc[i]);
3310                         }
3311                         for (i = 0; ue_hi; ue_hi >>= 1, i++) {
3312                                 if (ue_hi & 1)
3313                                         dev_err(dev, "UE: %s bit set\n",
3314                                                 ue_status_hi_desc[i]);
3315                         }
3316                 }
3317         }
3318 }
3319
3320 static void be_msix_disable(struct be_adapter *adapter)
3321 {
3322         if (msix_enabled(adapter)) {
3323                 pci_disable_msix(adapter->pdev);
3324                 adapter->num_msix_vec = 0;
3325                 adapter->num_msix_roce_vec = 0;
3326         }
3327 }
3328
3329 static int be_msix_enable(struct be_adapter *adapter)
3330 {
3331         unsigned int i, max_roce_eqs;
3332         struct device *dev = &adapter->pdev->dev;
3333         int num_vec;
3334
3335         /* If RoCE is supported, program the max number of vectors that
3336          * could be used for NIC and RoCE, else, just program the number
3337          * we'll use initially.
3338          */
3339         if (be_roce_supported(adapter)) {
3340                 max_roce_eqs =
3341                         be_max_func_eqs(adapter) - be_max_nic_eqs(adapter);
3342                 max_roce_eqs = min(max_roce_eqs, num_online_cpus());
3343                 num_vec = be_max_any_irqs(adapter) + max_roce_eqs;
3344         } else {
3345                 num_vec = max(adapter->cfg_num_rx_irqs,
3346                               adapter->cfg_num_tx_irqs);
3347         }
3348
3349         for (i = 0; i < num_vec; i++)
3350                 adapter->msix_entries[i].entry = i;
3351
3352         num_vec = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
3353                                         MIN_MSIX_VECTORS, num_vec);
3354         if (num_vec < 0)
3355                 goto fail;
3356
3357         if (be_roce_supported(adapter) && num_vec > MIN_MSIX_VECTORS) {
3358                 adapter->num_msix_roce_vec = num_vec / 2;
3359                 dev_info(dev, "enabled %d MSI-x vector(s) for RoCE\n",
3360                          adapter->num_msix_roce_vec);
3361         }
3362
3363         adapter->num_msix_vec = num_vec - adapter->num_msix_roce_vec;
3364
3365         dev_info(dev, "enabled %d MSI-x vector(s) for NIC\n",
3366                  adapter->num_msix_vec);
3367         return 0;
3368
3369 fail:
3370         dev_warn(dev, "MSIx enable failed\n");
3371
3372         /* INTx is not supported in VFs, so fail probe if enable_msix fails */
3373         if (be_virtfn(adapter))
3374                 return num_vec;
3375         return 0;
3376 }
3377
3378 static inline int be_msix_vec_get(struct be_adapter *adapter,
3379                                   struct be_eq_obj *eqo)
3380 {
3381         return adapter->msix_entries[eqo->msix_idx].vector;
3382 }
3383
3384 static int be_msix_register(struct be_adapter *adapter)
3385 {
3386         struct net_device *netdev = adapter->netdev;
3387         struct be_eq_obj *eqo;
3388         int status, i, vec;
3389
3390         for_all_evt_queues(adapter, eqo, i) {
3391                 sprintf(eqo->desc, "%s-q%d", netdev->name, i);
3392                 vec = be_msix_vec_get(adapter, eqo);
3393                 status = request_irq(vec, be_msix, 0, eqo->desc, eqo);
3394                 if (status)
3395                         goto err_msix;
3396
3397                 irq_set_affinity_hint(vec, eqo->affinity_mask);
3398         }
3399
3400         return 0;
3401 err_msix:
3402         for (i--; i >= 0; i--) {
3403                 eqo = &adapter->eq_obj[i];
3404                 free_irq(be_msix_vec_get(adapter, eqo), eqo);
3405         }
3406         dev_warn(&adapter->pdev->dev, "MSIX Request IRQ failed - err %d\n",
3407                  status);
3408         be_msix_disable(adapter);
3409         return status;
3410 }
3411
3412 static int be_irq_register(struct be_adapter *adapter)
3413 {
3414         struct net_device *netdev = adapter->netdev;
3415         int status;
3416
3417         if (msix_enabled(adapter)) {
3418                 status = be_msix_register(adapter);
3419                 if (status == 0)
3420                         goto done;
3421                 /* INTx is not supported for VF */
3422                 if (be_virtfn(adapter))
3423                         return status;
3424         }
3425
3426         /* INTx: only the first EQ is used */
3427         netdev->irq = adapter->pdev->irq;
3428         status = request_irq(netdev->irq, be_intx, IRQF_SHARED, netdev->name,
3429                              &adapter->eq_obj[0]);
3430         if (status) {
3431                 dev_err(&adapter->pdev->dev,
3432                         "INTx request IRQ failed - err %d\n", status);
3433                 return status;
3434         }
3435 done:
3436         adapter->isr_registered = true;
3437         return 0;
3438 }
3439
3440 static void be_irq_unregister(struct be_adapter *adapter)
3441 {
3442         struct net_device *netdev = adapter->netdev;
3443         struct be_eq_obj *eqo;
3444         int i, vec;
3445
3446         if (!adapter->isr_registered)
3447                 return;
3448
3449         /* INTx */
3450         if (!msix_enabled(adapter)) {
3451                 free_irq(netdev->irq, &adapter->eq_obj[0]);
3452                 goto done;
3453         }
3454
3455         /* MSIx */
3456         for_all_evt_queues(adapter, eqo, i) {
3457                 vec = be_msix_vec_get(adapter, eqo);
3458                 irq_set_affinity_hint(vec, NULL);
3459                 free_irq(vec, eqo);
3460         }
3461
3462 done:
3463         adapter->isr_registered = false;
3464 }
3465
3466 static void be_rx_qs_destroy(struct be_adapter *adapter)
3467 {
3468         struct rss_info *rss = &adapter->rss_info;
3469         struct be_queue_info *q;
3470         struct be_rx_obj *rxo;
3471         int i;
3472
3473         for_all_rx_queues(adapter, rxo, i) {
3474                 q = &rxo->q;
3475                 if (q->created) {
3476                         /* If RXQs are destroyed while in an "out of buffer"
3477                          * state, there is a possibility of an HW stall on
3478                          * Lancer. So, post 64 buffers to each queue to relieve
3479                          * the "out of buffer" condition.
3480                          * Make sure there's space in the RXQ before posting.
3481                          */
3482                         if (lancer_chip(adapter)) {
3483                                 be_rx_cq_clean(rxo);
3484                                 if (atomic_read(&q->used) == 0)
3485                                         be_post_rx_frags(rxo, GFP_KERNEL,
3486                                                          MAX_RX_POST);
3487                         }
3488
3489                         be_cmd_rxq_destroy(adapter, q);
3490                         be_rx_cq_clean(rxo);
3491                         be_rxq_clean(rxo);
3492                 }
3493                 be_queue_free(adapter, q);
3494         }
3495
3496         if (rss->rss_flags) {
3497                 rss->rss_flags = RSS_ENABLE_NONE;
3498                 be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3499                                   128, rss->rss_hkey);
3500         }
3501 }
3502
3503 static void be_disable_if_filters(struct be_adapter *adapter)
3504 {
3505         /* Don't delete MAC on BE3 VFs without FILTMGMT privilege  */
3506         if (!BEx_chip(adapter) || !be_virtfn(adapter) ||
3507             check_privilege(adapter, BE_PRIV_FILTMGMT)) {
3508                 be_dev_mac_del(adapter, adapter->pmac_id[0]);
3509                 eth_zero_addr(adapter->dev_mac);
3510         }
3511
3512         be_clear_uc_list(adapter);
3513         be_clear_mc_list(adapter);
3514
3515         /* The IFACE flags are enabled in the open path and cleared
3516          * in the close path. When a VF gets detached from the host and
3517          * assigned to a VM the following happens:
3518          *      - VF's IFACE flags get cleared in the detach path
3519          *      - IFACE create is issued by the VF in the attach path
3520          * Due to a bug in the BE3/Skyhawk-R FW
3521          * (Lancer FW doesn't have the bug), the IFACE capability flags
3522          * specified along with the IFACE create cmd issued by a VF are not
3523          * honoured by FW.  As a consequence, if a *new* driver
3524          * (that enables/disables IFACE flags in open/close)
3525          * is loaded in the host and an *old* driver is * used by a VM/VF,
3526          * the IFACE gets created *without* the needed flags.
3527          * To avoid this, disable RX-filter flags only for Lancer.
3528          */
3529         if (lancer_chip(adapter)) {
3530                 be_cmd_rx_filter(adapter, BE_IF_ALL_FILT_FLAGS, OFF);
3531                 adapter->if_flags &= ~BE_IF_ALL_FILT_FLAGS;
3532         }
3533 }
3534
3535 static int be_close(struct net_device *netdev)
3536 {
3537         struct be_adapter *adapter = netdev_priv(netdev);
3538         struct be_eq_obj *eqo;
3539         int i;
3540
3541         /* This protection is needed as be_close() may be called even when the
3542          * adapter is in cleared state (after eeh perm failure)
3543          */
3544         if (!(adapter->flags & BE_FLAGS_SETUP_DONE))
3545                 return 0;
3546
3547         /* Before attempting cleanup ensure all the pending cmds in the
3548          * config_wq have finished execution
3549          */
3550         flush_workqueue(be_wq);
3551
3552         be_disable_if_filters(adapter);
3553
3554         if (adapter->flags & BE_FLAGS_NAPI_ENABLED) {
3555                 for_all_evt_queues(adapter, eqo, i) {
3556                         napi_disable(&eqo->napi);
3557                 }
3558                 adapter->flags &= ~BE_FLAGS_NAPI_ENABLED;
3559         }
3560
3561         be_async_mcc_disable(adapter);
3562
3563         /* Wait for all pending tx completions to arrive so that
3564          * all tx skbs are freed.
3565          */
3566         netif_tx_disable(netdev);
3567         be_tx_compl_clean(adapter);
3568
3569         be_rx_qs_destroy(adapter);
3570
3571         for_all_evt_queues(adapter, eqo, i) {
3572                 if (msix_enabled(adapter))
3573                         synchronize_irq(be_msix_vec_get(adapter, eqo));
3574                 else
3575                         synchronize_irq(netdev->irq);
3576                 be_eq_clean(eqo);
3577         }
3578
3579         be_irq_unregister(adapter);
3580
3581         return 0;
3582 }
3583
3584 static int be_rx_qs_create(struct be_adapter *adapter)
3585 {
3586         struct rss_info *rss = &adapter->rss_info;
3587         u8 rss_key[RSS_HASH_KEY_LEN];
3588         struct be_rx_obj *rxo;
3589         int rc, i, j;
3590
3591         for_all_rx_queues(adapter, rxo, i) {
3592                 rc = be_queue_alloc(adapter, &rxo->q, RX_Q_LEN,
3593                                     sizeof(struct be_eth_rx_d));
3594                 if (rc)
3595                         return rc;
3596         }
3597
3598         if (adapter->need_def_rxq || !adapter->num_rss_qs) {
3599                 rxo = default_rxo(adapter);
3600                 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3601                                        rx_frag_size, adapter->if_handle,
3602                                        false, &rxo->rss_id);
3603                 if (rc)
3604                         return rc;
3605         }
3606
3607         for_all_rss_queues(adapter, rxo, i) {
3608                 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3609                                        rx_frag_size, adapter->if_handle,
3610                                        true, &rxo->rss_id);
3611                 if (rc)
3612                         return rc;
3613         }
3614
3615         if (be_multi_rxq(adapter)) {
3616                 for (j = 0; j < RSS_INDIR_TABLE_LEN; j += adapter->num_rss_qs) {
3617                         for_all_rss_queues(adapter, rxo, i) {
3618                                 if ((j + i) >= RSS_INDIR_TABLE_LEN)
3619                                         break;
3620                                 rss->rsstable[j + i] = rxo->rss_id;
3621                                 rss->rss_queue[j + i] = i;
3622                         }
3623                 }
3624                 rss->rss_flags = RSS_ENABLE_TCP_IPV4 | RSS_ENABLE_IPV4 |
3625                         RSS_ENABLE_TCP_IPV6 | RSS_ENABLE_IPV6;
3626
3627                 if (!BEx_chip(adapter))
3628                         rss->rss_flags |= RSS_ENABLE_UDP_IPV4 |
3629                                 RSS_ENABLE_UDP_IPV6;
3630
3631                 netdev_rss_key_fill(rss_key, RSS_HASH_KEY_LEN);
3632                 rc = be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3633                                        RSS_INDIR_TABLE_LEN, rss_key);
3634                 if (rc) {
3635                         rss->rss_flags = RSS_ENABLE_NONE;
3636                         return rc;
3637                 }
3638
3639                 memcpy(rss->rss_hkey, rss_key, RSS_HASH_KEY_LEN);
3640         } else {
3641                 /* Disable RSS, if only default RX Q is created */
3642                 rss->rss_flags = RSS_ENABLE_NONE;
3643         }
3644
3645
3646         /* Post 1 less than RXQ-len to avoid head being equal to tail,
3647          * which is a queue empty condition
3648          */
3649         for_all_rx_queues(adapter, rxo, i)
3650                 be_post_rx_frags(rxo, GFP_KERNEL, RX_Q_LEN - 1);
3651
3652         return 0;
3653 }
3654
3655 static int be_enable_if_filters(struct be_adapter *adapter)
3656 {
3657         int status;
3658
3659         status = be_cmd_rx_filter(adapter, BE_IF_FILT_FLAGS_BASIC, ON);
3660         if (status)
3661                 return status;
3662
3663         /* Normally this condition usually true as the ->dev_mac is zeroed.
3664          * But on BE3 VFs the initial MAC is pre-programmed by PF and
3665          * subsequent be_dev_mac_add() can fail (after fresh boot)
3666          */
3667         if (!ether_addr_equal(adapter->dev_mac, adapter->netdev->dev_addr)) {
3668                 int old_pmac_id = -1;
3669
3670                 /* Remember old programmed MAC if any - can happen on BE3 VF */
3671                 if (!is_zero_ether_addr(adapter->dev_mac))
3672                         old_pmac_id = adapter->pmac_id[0];
3673
3674                 status = be_dev_mac_add(adapter, adapter->netdev->dev_addr);
3675                 if (status)
3676                         return status;
3677
3678                 /* Delete the old programmed MAC as we successfully programmed
3679                  * a new MAC
3680                  */
3681                 if (old_pmac_id >= 0 && old_pmac_id != adapter->pmac_id[0])
3682                         be_dev_mac_del(adapter, old_pmac_id);
3683
3684                 ether_addr_copy(adapter->dev_mac, adapter->netdev->dev_addr);
3685         }
3686
3687         if (adapter->vlans_added)
3688                 be_vid_config(adapter);
3689
3690         __be_set_rx_mode(adapter);
3691
3692         return 0;
3693 }
3694
3695 static int be_open(struct net_device *netdev)
3696 {
3697         struct be_adapter *adapter = netdev_priv(netdev);
3698         struct be_eq_obj *eqo;
3699         struct be_rx_obj *rxo;
3700         struct be_tx_obj *txo;
3701         u8 link_status;
3702         int status, i;
3703
3704         status = be_rx_qs_create(adapter);
3705         if (status)
3706                 goto err;
3707
3708         status = be_enable_if_filters(adapter);
3709         if (status)
3710                 goto err;
3711
3712         status = be_irq_register(adapter);
3713         if (status)
3714                 goto err;
3715
3716         for_all_rx_queues(adapter, rxo, i)
3717                 be_cq_notify(adapter, rxo->cq.id, true, 0);
3718
3719         for_all_tx_queues(adapter, txo, i)
3720                 be_cq_notify(adapter, txo->cq.id, true, 0);
3721
3722         be_async_mcc_enable(adapter);
3723
3724         for_all_evt_queues(adapter, eqo, i) {
3725                 napi_enable(&eqo->napi);
3726                 be_eq_notify(adapter, eqo->q.id, true, true, 0, 0);
3727         }
3728         adapter->flags |= BE_FLAGS_NAPI_ENABLED;
3729
3730         status = be_cmd_link_status_query(adapter, NULL, &link_status, 0);
3731         if (!status)
3732                 be_link_status_update(adapter, link_status);
3733
3734         netif_tx_start_all_queues(netdev);
3735         if (skyhawk_chip(adapter))
3736                 udp_tunnel_get_rx_info(netdev);
3737
3738         return 0;
3739 err:
3740         be_close(adapter->netdev);
3741         return -EIO;
3742 }
3743
3744 static void be_vf_eth_addr_generate(struct be_adapter *adapter, u8 *mac)
3745 {
3746         u32 addr;
3747
3748         addr = jhash(adapter->netdev->dev_addr, ETH_ALEN, 0);
3749
3750         mac[5] = (u8)(addr & 0xFF);
3751         mac[4] = (u8)((addr >> 8) & 0xFF);
3752         mac[3] = (u8)((addr >> 16) & 0xFF);
3753         /* Use the OUI from the current MAC address */
3754         memcpy(mac, adapter->netdev->dev_addr, 3);
3755 }
3756
3757 /*
3758  * Generate a seed MAC address from the PF MAC Address using jhash.
3759  * MAC Address for VFs are assigned incrementally starting from the seed.
3760  * These addresses are programmed in the ASIC by the PF and the VF driver
3761  * queries for the MAC address during its probe.
3762  */
3763 static int be_vf_eth_addr_config(struct be_adapter *adapter)
3764 {
3765         u32 vf;
3766         int status = 0;
3767         u8 mac[ETH_ALEN];
3768         struct be_vf_cfg *vf_cfg;
3769
3770         be_vf_eth_addr_generate(adapter, mac);
3771
3772         for_all_vfs(adapter, vf_cfg, vf) {
3773                 if (BEx_chip(adapter))
3774                         status = be_cmd_pmac_add(adapter, mac,
3775                                                  vf_cfg->if_handle,
3776                                                  &vf_cfg->pmac_id, vf + 1);
3777                 else
3778                         status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
3779                                                 vf + 1);
3780
3781                 if (status)
3782                         dev_err(&adapter->pdev->dev,
3783                                 "Mac address assignment failed for VF %d\n",
3784                                 vf);
3785                 else
3786                         memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3787
3788                 mac[5] += 1;
3789         }
3790         return status;
3791 }
3792
3793 static int be_vfs_mac_query(struct be_adapter *adapter)
3794 {
3795         int status, vf;
3796         u8 mac[ETH_ALEN];
3797         struct be_vf_cfg *vf_cfg;
3798
3799         for_all_vfs(adapter, vf_cfg, vf) {
3800                 status = be_cmd_get_active_mac(adapter, vf_cfg->pmac_id,
3801                                                mac, vf_cfg->if_handle,
3802                                                false, vf+1);
3803                 if (status)
3804                         return status;
3805                 memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3806         }
3807         return 0;
3808 }
3809
3810 static void be_vf_clear(struct be_adapter *adapter)
3811 {
3812         struct be_vf_cfg *vf_cfg;
3813         u32 vf;
3814
3815         if (pci_vfs_assigned(adapter->pdev)) {
3816                 dev_warn(&adapter->pdev->dev,
3817                          "VFs are assigned to VMs: not disabling VFs\n");
3818                 goto done;
3819         }
3820
3821         pci_disable_sriov(adapter->pdev);
3822
3823         for_all_vfs(adapter, vf_cfg, vf) {
3824                 if (BEx_chip(adapter))
3825                         be_cmd_pmac_del(adapter, vf_cfg->if_handle,
3826                                         vf_cfg->pmac_id, vf + 1);
3827                 else
3828                         be_cmd_set_mac(adapter, NULL, vf_cfg->if_handle,
3829                                        vf + 1);
3830
3831                 be_cmd_if_destroy(adapter, vf_cfg->if_handle, vf + 1);
3832         }
3833
3834         if (BE3_chip(adapter))
3835                 be_cmd_set_hsw_config(adapter, 0, 0,
3836                                       adapter->if_handle,
3837                                       PORT_FWD_TYPE_PASSTHRU, 0);
3838 done:
3839         kfree(adapter->vf_cfg);
3840         adapter->num_vfs = 0;
3841         adapter->flags &= ~BE_FLAGS_SRIOV_ENABLED;
3842 }
3843
3844 static void be_clear_queues(struct be_adapter *adapter)
3845 {
3846         be_mcc_queues_destroy(adapter);
3847         be_rx_cqs_destroy(adapter);
3848         be_tx_queues_destroy(adapter);
3849         be_evt_queues_destroy(adapter);
3850 }
3851
3852 static void be_cancel_worker(struct be_adapter *adapter)
3853 {
3854         if (adapter->flags & BE_FLAGS_WORKER_SCHEDULED) {
3855                 cancel_delayed_work_sync(&adapter->work);
3856                 adapter->flags &= ~BE_FLAGS_WORKER_SCHEDULED;
3857         }
3858 }
3859
3860 static void be_cancel_err_detection(struct be_adapter *adapter)
3861 {
3862         struct be_error_recovery *err_rec = &adapter->error_recovery;
3863
3864         if (!be_err_recovery_workq)
3865                 return;
3866
3867         if (adapter->flags & BE_FLAGS_ERR_DETECTION_SCHEDULED) {
3868                 cancel_delayed_work_sync(&err_rec->err_detection_work);
3869                 adapter->flags &= ~BE_FLAGS_ERR_DETECTION_SCHEDULED;
3870         }
3871 }
3872
3873 static int be_enable_vxlan_offloads(struct be_adapter *adapter)
3874 {
3875         struct net_device *netdev = adapter->netdev;
3876         struct device *dev = &adapter->pdev->dev;
3877         struct be_vxlan_port *vxlan_port;
3878         __be16 port;
3879         int status;
3880
3881         vxlan_port = list_first_entry(&adapter->vxlan_port_list,
3882                                       struct be_vxlan_port, list);
3883         port = vxlan_port->port;
3884
3885         status = be_cmd_manage_iface(adapter, adapter->if_handle,
3886                                      OP_CONVERT_NORMAL_TO_TUNNEL);
3887         if (status) {
3888                 dev_warn(dev, "Failed to convert normal interface to tunnel\n");
3889                 return status;
3890         }
3891         adapter->flags |= BE_FLAGS_VXLAN_OFFLOADS;
3892
3893         status = be_cmd_set_vxlan_port(adapter, port);
3894         if (status) {
3895                 dev_warn(dev, "Failed to add VxLAN port\n");
3896                 return status;
3897         }
3898         adapter->vxlan_port = port;
3899
3900         netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
3901                                    NETIF_F_TSO | NETIF_F_TSO6 |
3902                                    NETIF_F_GSO_UDP_TUNNEL;
3903
3904         dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n",
3905                  be16_to_cpu(port));
3906         return 0;
3907 }
3908
3909 static void be_disable_vxlan_offloads(struct be_adapter *adapter)
3910 {
3911         struct net_device *netdev = adapter->netdev;
3912
3913         if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS)
3914                 be_cmd_manage_iface(adapter, adapter->if_handle,
3915                                     OP_CONVERT_TUNNEL_TO_NORMAL);
3916
3917         if (adapter->vxlan_port)
3918                 be_cmd_set_vxlan_port(adapter, 0);
3919
3920         adapter->flags &= ~BE_FLAGS_VXLAN_OFFLOADS;
3921         adapter->vxlan_port = 0;
3922
3923         netdev->hw_enc_features = 0;
3924 }
3925
3926 static void be_calculate_vf_res(struct be_adapter *adapter, u16 num_vfs,
3927                                 struct be_resources *vft_res)
3928 {
3929         struct be_resources res = adapter->pool_res;
3930         u32 vf_if_cap_flags = res.vf_if_cap_flags;
3931         struct be_resources res_mod = {0};
3932         u16 num_vf_qs = 1;
3933
3934         /* Distribute the queue resources among the PF and it's VFs */
3935         if (num_vfs) {
3936                 /* Divide the rx queues evenly among the VFs and the PF, capped
3937                  * at VF-EQ-count. Any remainder queues belong to the PF.
3938                  */
3939                 num_vf_qs = min(SH_VF_MAX_NIC_EQS,
3940                                 res.max_rss_qs / (num_vfs + 1));
3941
3942                 /* Skyhawk-R chip supports only MAX_PORT_RSS_TABLES
3943                  * RSS Tables per port. Provide RSS on VFs, only if number of
3944                  * VFs requested is less than it's PF Pool's RSS Tables limit.
3945                  */
3946                 if (num_vfs >= be_max_pf_pool_rss_tables(adapter))
3947                         num_vf_qs = 1;
3948         }
3949
3950         /* Resource with fields set to all '1's by GET_PROFILE_CONFIG cmd,
3951          * which are modifiable using SET_PROFILE_CONFIG cmd.
3952          */
3953         be_cmd_get_profile_config(adapter, &res_mod, NULL, ACTIVE_PROFILE_TYPE,
3954                                   RESOURCE_MODIFIABLE, 0);
3955
3956         /* If RSS IFACE capability flags are modifiable for a VF, set the
3957          * capability flag as valid and set RSS and DEFQ_RSS IFACE flags if
3958          * more than 1 RSSQ is available for a VF.
3959          * Otherwise, provision only 1 queue pair for VF.
3960          */
3961         if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_RSS) {
3962                 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
3963                 if (num_vf_qs > 1) {
3964                         vf_if_cap_flags |= BE_IF_FLAGS_RSS;
3965                         if (res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS)
3966                                 vf_if_cap_flags |= BE_IF_FLAGS_DEFQ_RSS;
3967                 } else {
3968                         vf_if_cap_flags &= ~(BE_IF_FLAGS_RSS |
3969                                              BE_IF_FLAGS_DEFQ_RSS);
3970                 }
3971         } else {
3972                 num_vf_qs = 1;
3973         }
3974
3975         if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
3976                 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
3977                 vf_if_cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
3978         }
3979
3980         vft_res->vf_if_cap_flags = vf_if_cap_flags;
3981         vft_res->max_rx_qs = num_vf_qs;
3982         vft_res->max_rss_qs = num_vf_qs;
3983         vft_res->max_tx_qs = res.max_tx_qs / (num_vfs + 1);
3984         vft_res->max_cq_count = res.max_cq_count / (num_vfs + 1);
3985
3986         /* Distribute unicast MACs, VLANs, IFACE count and MCCQ count equally
3987          * among the PF and it's VFs, if the fields are changeable
3988          */
3989         if (res_mod.max_uc_mac == FIELD_MODIFIABLE)
3990                 vft_res->max_uc_mac = res.max_uc_mac / (num_vfs + 1);
3991
3992         if (res_mod.max_vlans == FIELD_MODIFIABLE)
3993                 vft_res->max_vlans = res.max_vlans / (num_vfs + 1);
3994
3995         if (res_mod.max_iface_count == FIELD_MODIFIABLE)
3996                 vft_res->max_iface_count = res.max_iface_count / (num_vfs + 1);
3997
3998         if (res_mod.max_mcc_count == FIELD_MODIFIABLE)
3999                 vft_res->max_mcc_count = res.max_mcc_count / (num_vfs + 1);
4000 }
4001
4002 static void be_if_destroy(struct be_adapter *adapter)
4003 {
4004         be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4005
4006         kfree(adapter->pmac_id);
4007         adapter->pmac_id = NULL;
4008
4009         kfree(adapter->mc_list);
4010         adapter->mc_list = NULL;
4011
4012         kfree(adapter->uc_list);
4013         adapter->uc_list = NULL;
4014 }
4015
4016 static int be_clear(struct be_adapter *adapter)
4017 {
4018         struct pci_dev *pdev = adapter->pdev;
4019         struct  be_resources vft_res = {0};
4020
4021         be_cancel_worker(adapter);
4022
4023         flush_workqueue(be_wq);
4024
4025         if (sriov_enabled(adapter))
4026                 be_vf_clear(adapter);
4027
4028         /* Re-configure FW to distribute resources evenly across max-supported
4029          * number of VFs, only when VFs are not already enabled.
4030          */
4031         if (skyhawk_chip(adapter) && be_physfn(adapter) &&
4032             !pci_vfs_assigned(pdev)) {
4033                 be_calculate_vf_res(adapter,
4034                                     pci_sriov_get_totalvfs(pdev),
4035                                     &vft_res);
4036                 be_cmd_set_sriov_config(adapter, adapter->pool_res,
4037                                         pci_sriov_get_totalvfs(pdev),
4038                                         &vft_res);
4039         }
4040
4041         be_disable_vxlan_offloads(adapter);
4042
4043         be_if_destroy(adapter);
4044
4045         be_clear_queues(adapter);
4046
4047         be_msix_disable(adapter);
4048         adapter->flags &= ~BE_FLAGS_SETUP_DONE;
4049         return 0;
4050 }
4051
4052 static int be_vfs_if_create(struct be_adapter *adapter)
4053 {
4054         struct be_resources res = {0};
4055         u32 cap_flags, en_flags, vf;
4056         struct be_vf_cfg *vf_cfg;
4057         int status;
4058
4059         /* If a FW profile exists, then cap_flags are updated */
4060         cap_flags = BE_VF_IF_EN_FLAGS;
4061
4062         for_all_vfs(adapter, vf_cfg, vf) {
4063                 if (!BE3_chip(adapter)) {
4064                         status = be_cmd_get_profile_config(adapter, &res, NULL,
4065                                                            ACTIVE_PROFILE_TYPE,
4066                                                            RESOURCE_LIMITS,
4067                                                            vf + 1);
4068                         if (!status) {
4069                                 cap_flags = res.if_cap_flags;
4070                                 /* Prevent VFs from enabling VLAN promiscuous
4071                                  * mode
4072                                  */
4073                                 cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4074                         }
4075                 }
4076
4077                 /* PF should enable IF flags during proxy if_create call */
4078                 en_flags = cap_flags & BE_VF_IF_EN_FLAGS;
4079                 status = be_cmd_if_create(adapter, cap_flags, en_flags,
4080                                           &vf_cfg->if_handle, vf + 1);
4081                 if (status)
4082                         return status;
4083         }
4084
4085         return 0;
4086 }
4087
4088 static int be_vf_setup_init(struct be_adapter *adapter)
4089 {
4090         struct be_vf_cfg *vf_cfg;
4091         int vf;
4092
4093         adapter->vf_cfg = kcalloc(adapter->num_vfs, sizeof(*vf_cfg),
4094                                   GFP_KERNEL);
4095         if (!adapter->vf_cfg)
4096                 return -ENOMEM;
4097
4098         for_all_vfs(adapter, vf_cfg, vf) {
4099                 vf_cfg->if_handle = -1;
4100                 vf_cfg->pmac_id = -1;
4101         }
4102         return 0;
4103 }
4104
4105 static int be_vf_setup(struct be_adapter *adapter)
4106 {
4107         struct device *dev = &adapter->pdev->dev;
4108         struct be_vf_cfg *vf_cfg;
4109         int status, old_vfs, vf;
4110         bool spoofchk;
4111
4112         old_vfs = pci_num_vf(adapter->pdev);
4113
4114         status = be_vf_setup_init(adapter);
4115         if (status)
4116                 goto err;
4117
4118         if (old_vfs) {
4119                 for_all_vfs(adapter, vf_cfg, vf) {
4120                         status = be_cmd_get_if_id(adapter, vf_cfg, vf);
4121                         if (status)
4122                                 goto err;
4123                 }
4124
4125                 status = be_vfs_mac_query(adapter);
4126                 if (status)
4127                         goto err;
4128         } else {
4129                 status = be_vfs_if_create(adapter);
4130                 if (status)
4131                         goto err;
4132
4133                 status = be_vf_eth_addr_config(adapter);
4134                 if (status)
4135                         goto err;
4136         }
4137
4138         for_all_vfs(adapter, vf_cfg, vf) {
4139                 /* Allow VFs to programs MAC/VLAN filters */
4140                 status = be_cmd_get_fn_privileges(adapter, &vf_cfg->privileges,
4141                                                   vf + 1);
4142                 if (!status && !(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
4143                         status = be_cmd_set_fn_privileges(adapter,
4144                                                           vf_cfg->privileges |
4145                                                           BE_PRIV_FILTMGMT,
4146                                                           vf + 1);
4147                         if (!status) {
4148                                 vf_cfg->privileges |= BE_PRIV_FILTMGMT;
4149                                 dev_info(dev, "VF%d has FILTMGMT privilege\n",
4150                                          vf);
4151                         }
4152                 }
4153
4154                 /* Allow full available bandwidth */
4155                 if (!old_vfs)
4156                         be_cmd_config_qos(adapter, 0, 0, vf + 1);
4157
4158                 status = be_cmd_get_hsw_config(adapter, NULL, vf + 1,
4159                                                vf_cfg->if_handle, NULL,
4160                                                &spoofchk);
4161                 if (!status)
4162                         vf_cfg->spoofchk = spoofchk;
4163
4164                 if (!old_vfs) {
4165                         be_cmd_enable_vf(adapter, vf + 1);
4166                         be_cmd_set_logical_link_config(adapter,
4167                                                        IFLA_VF_LINK_STATE_AUTO,
4168                                                        vf+1);
4169                 }
4170         }
4171
4172         if (!old_vfs) {
4173                 status = pci_enable_sriov(adapter->pdev, adapter->num_vfs);
4174                 if (status) {
4175                         dev_err(dev, "SRIOV enable failed\n");
4176                         adapter->num_vfs = 0;
4177                         goto err;
4178                 }
4179         }
4180
4181         if (BE3_chip(adapter)) {
4182                 /* On BE3, enable VEB only when SRIOV is enabled */
4183                 status = be_cmd_set_hsw_config(adapter, 0, 0,
4184                                                adapter->if_handle,
4185                                                PORT_FWD_TYPE_VEB, 0);
4186                 if (status)
4187                         goto err;
4188         }
4189
4190         adapter->flags |= BE_FLAGS_SRIOV_ENABLED;
4191         return 0;
4192 err:
4193         dev_err(dev, "VF setup failed\n");
4194         be_vf_clear(adapter);
4195         return status;
4196 }
4197
4198 /* Converting function_mode bits on BE3 to SH mc_type enums */
4199
4200 static u8 be_convert_mc_type(u32 function_mode)
4201 {
4202         if (function_mode & VNIC_MODE && function_mode & QNQ_MODE)
4203                 return vNIC1;
4204         else if (function_mode & QNQ_MODE)
4205                 return FLEX10;
4206         else if (function_mode & VNIC_MODE)
4207                 return vNIC2;
4208         else if (function_mode & UMC_ENABLED)
4209                 return UMC;
4210         else
4211                 return MC_NONE;
4212 }
4213
4214 /* On BE2/BE3 FW does not suggest the supported limits */
4215 static void BEx_get_resources(struct be_adapter *adapter,
4216                               struct be_resources *res)
4217 {
4218         bool use_sriov = adapter->num_vfs ? 1 : 0;
4219
4220         if (be_physfn(adapter))
4221                 res->max_uc_mac = BE_UC_PMAC_COUNT;
4222         else
4223                 res->max_uc_mac = BE_VF_UC_PMAC_COUNT;
4224
4225         adapter->mc_type = be_convert_mc_type(adapter->function_mode);
4226
4227         if (be_is_mc(adapter)) {
4228                 /* Assuming that there are 4 channels per port,
4229                  * when multi-channel is enabled
4230                  */
4231                 if (be_is_qnq_mode(adapter))
4232                         res->max_vlans = BE_NUM_VLANS_SUPPORTED/8;
4233                 else
4234                         /* In a non-qnq multichannel mode, the pvid
4235                          * takes up one vlan entry
4236                          */
4237                         res->max_vlans = (BE_NUM_VLANS_SUPPORTED / 4) - 1;
4238         } else {
4239                 res->max_vlans = BE_NUM_VLANS_SUPPORTED;
4240         }
4241
4242         res->max_mcast_mac = BE_MAX_MC;
4243
4244         /* 1) For BE3 1Gb ports, FW does not support multiple TXQs
4245          * 2) Create multiple TX rings on a BE3-R multi-channel interface
4246          *    *only* if it is RSS-capable.
4247          */
4248         if (BE2_chip(adapter) || use_sriov ||  (adapter->port_num > 1) ||
4249             be_virtfn(adapter) ||
4250             (be_is_mc(adapter) &&
4251              !(adapter->function_caps & BE_FUNCTION_CAPS_RSS))) {
4252                 res->max_tx_qs = 1;
4253         } else if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) {
4254                 struct be_resources super_nic_res = {0};
4255
4256                 /* On a SuperNIC profile, the driver needs to use the
4257                  * GET_PROFILE_CONFIG cmd to query the per-function TXQ limits
4258                  */
4259                 be_cmd_get_profile_config(adapter, &super_nic_res, NULL,
4260                                           ACTIVE_PROFILE_TYPE, RESOURCE_LIMITS,
4261                                           0);
4262                 /* Some old versions of BE3 FW don't report max_tx_qs value */
4263                 res->max_tx_qs = super_nic_res.max_tx_qs ? : BE3_MAX_TX_QS;
4264         } else {
4265                 res->max_tx_qs = BE3_MAX_TX_QS;
4266         }
4267
4268         if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) &&
4269             !use_sriov && be_physfn(adapter))
4270                 res->max_rss_qs = (adapter->be3_native) ?
4271                                            BE3_MAX_RSS_QS : BE2_MAX_RSS_QS;
4272         res->max_rx_qs = res->max_rss_qs + 1;
4273
4274         if (be_physfn(adapter))
4275                 res->max_evt_qs = (be_max_vfs(adapter) > 0) ?
4276                                         BE3_SRIOV_MAX_EVT_QS : BE3_MAX_EVT_QS;
4277         else
4278                 res->max_evt_qs = 1;
4279
4280         res->if_cap_flags = BE_IF_CAP_FLAGS_WANT;
4281         res->if_cap_flags &= ~BE_IF_FLAGS_DEFQ_RSS;
4282         if (!(adapter->function_caps & BE_FUNCTION_CAPS_RSS))
4283                 res->if_cap_flags &= ~BE_IF_FLAGS_RSS;
4284 }
4285
4286 static void be_setup_init(struct be_adapter *adapter)
4287 {
4288         adapter->vlan_prio_bmap = 0xff;
4289         adapter->phy.link_speed = -1;
4290         adapter->if_handle = -1;
4291         adapter->be3_native = false;
4292         adapter->if_flags = 0;
4293         adapter->phy_state = BE_UNKNOWN_PHY_STATE;
4294         if (be_physfn(adapter))
4295                 adapter->cmd_privileges = MAX_PRIVILEGES;
4296         else
4297                 adapter->cmd_privileges = MIN_PRIVILEGES;
4298 }
4299
4300 /* HW supports only MAX_PORT_RSS_TABLES RSS Policy Tables per port.
4301  * However, this HW limitation is not exposed to the host via any SLI cmd.
4302  * As a result, in the case of SRIOV and in particular multi-partition configs
4303  * the driver needs to calcuate a proportional share of RSS Tables per PF-pool
4304  * for distribution between the VFs. This self-imposed limit will determine the
4305  * no: of VFs for which RSS can be enabled.
4306  */
4307 static void be_calculate_pf_pool_rss_tables(struct be_adapter *adapter)
4308 {
4309         struct be_port_resources port_res = {0};
4310         u8 rss_tables_on_port;
4311         u16 max_vfs = be_max_vfs(adapter);
4312
4313         be_cmd_get_profile_config(adapter, NULL, &port_res, SAVED_PROFILE_TYPE,
4314                                   RESOURCE_LIMITS, 0);
4315
4316         rss_tables_on_port = MAX_PORT_RSS_TABLES - port_res.nic_pfs;
4317
4318         /* Each PF Pool's RSS Tables limit =
4319          * PF's Max VFs / Total_Max_VFs on Port * RSS Tables on Port
4320          */
4321         adapter->pool_res.max_rss_tables =
4322                 max_vfs * rss_tables_on_port / port_res.max_vfs;
4323 }
4324
4325 static int be_get_sriov_config(struct be_adapter *adapter)
4326 {
4327         struct be_resources res = {0};
4328         int max_vfs, old_vfs;
4329
4330         be_cmd_get_profile_config(adapter, &res, NULL, ACTIVE_PROFILE_TYPE,
4331                                   RESOURCE_LIMITS, 0);
4332
4333         /* Some old versions of BE3 FW don't report max_vfs value */
4334         if (BE3_chip(adapter) && !res.max_vfs) {
4335                 max_vfs = pci_sriov_get_totalvfs(adapter->pdev);
4336                 res.max_vfs = max_vfs > 0 ? min(MAX_VFS, max_vfs) : 0;
4337         }
4338
4339         adapter->pool_res = res;
4340
4341         /* If during previous unload of the driver, the VFs were not disabled,
4342          * then we cannot rely on the PF POOL limits for the TotalVFs value.
4343          * Instead use the TotalVFs value stored in the pci-dev struct.
4344          */
4345         old_vfs = pci_num_vf(adapter->pdev);
4346         if (old_vfs) {
4347                 dev_info(&adapter->pdev->dev, "%d VFs are already enabled\n",
4348                          old_vfs);
4349
4350                 adapter->pool_res.max_vfs =
4351                         pci_sriov_get_totalvfs(adapter->pdev);
4352                 adapter->num_vfs = old_vfs;
4353         }
4354
4355         if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4356                 be_calculate_pf_pool_rss_tables(adapter);
4357                 dev_info(&adapter->pdev->dev,
4358                          "RSS can be enabled for all VFs if num_vfs <= %d\n",
4359                          be_max_pf_pool_rss_tables(adapter));
4360         }
4361         return 0;
4362 }
4363
4364 static void be_alloc_sriov_res(struct be_adapter *adapter)
4365 {
4366         int old_vfs = pci_num_vf(adapter->pdev);
4367         struct  be_resources vft_res = {0};
4368         int status;
4369
4370         be_get_sriov_config(adapter);
4371
4372         if (!old_vfs)
4373                 pci_sriov_set_totalvfs(adapter->pdev, be_max_vfs(adapter));
4374
4375         /* When the HW is in SRIOV capable configuration, the PF-pool
4376          * resources are given to PF during driver load, if there are no
4377          * old VFs. This facility is not available in BE3 FW.
4378          * Also, this is done by FW in Lancer chip.
4379          */
4380         if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4381                 be_calculate_vf_res(adapter, 0, &vft_res);
4382                 status = be_cmd_set_sriov_config(adapter, adapter->pool_res, 0,
4383                                                  &vft_res);
4384                 if (status)
4385                         dev_err(&adapter->pdev->dev,
4386                                 "Failed to optimize SRIOV resources\n");
4387         }
4388 }
4389
4390 static int be_get_resources(struct be_adapter *adapter)
4391 {
4392         struct device *dev = &adapter->pdev->dev;
4393         struct be_resources res = {0};
4394         int status;
4395
4396         /* For Lancer, SH etc read per-function resource limits from FW.
4397          * GET_FUNC_CONFIG returns per function guaranteed limits.
4398          * GET_PROFILE_CONFIG returns PCI-E related limits PF-pool limits
4399          */
4400         if (BEx_chip(adapter)) {
4401                 BEx_get_resources(adapter, &res);
4402         } else {
4403                 status = be_cmd_get_func_config(adapter, &res);
4404                 if (status)
4405                         return status;
4406
4407                 /* If a deafault RXQ must be created, we'll use up one RSSQ*/
4408                 if (res.max_rss_qs && res.max_rss_qs == res.max_rx_qs &&
4409                     !(res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS))
4410                         res.max_rss_qs -= 1;
4411         }
4412
4413         /* If RoCE is supported stash away half the EQs for RoCE */
4414         res.max_nic_evt_qs = be_roce_supported(adapter) ?
4415                                 res.max_evt_qs / 2 : res.max_evt_qs;
4416         adapter->res = res;
4417
4418         /* If FW supports RSS default queue, then skip creating non-RSS
4419          * queue for non-IP traffic.
4420          */
4421         adapter->need_def_rxq = (be_if_cap_flags(adapter) &
4422                                  BE_IF_FLAGS_DEFQ_RSS) ? 0 : 1;
4423
4424         dev_info(dev, "Max: txqs %d, rxqs %d, rss %d, eqs %d, vfs %d\n",
4425                  be_max_txqs(adapter), be_max_rxqs(adapter),
4426                  be_max_rss(adapter), be_max_nic_eqs(adapter),
4427                  be_max_vfs(adapter));
4428         dev_info(dev, "Max: uc-macs %d, mc-macs %d, vlans %d\n",
4429                  be_max_uc(adapter), be_max_mc(adapter),
4430                  be_max_vlans(adapter));
4431
4432         /* Ensure RX and TX queues are created in pairs at init time */
4433         adapter->cfg_num_rx_irqs =
4434                                 min_t(u16, netif_get_num_default_rss_queues(),
4435                                       be_max_qp_irqs(adapter));
4436         adapter->cfg_num_tx_irqs = adapter->cfg_num_rx_irqs;
4437         return 0;
4438 }
4439
4440 static int be_get_config(struct be_adapter *adapter)
4441 {
4442         int status, level;
4443         u16 profile_id;
4444
4445         status = be_cmd_get_cntl_attributes(adapter);
4446         if (status)
4447                 return status;
4448
4449         status = be_cmd_query_fw_cfg(adapter);
4450         if (status)
4451                 return status;
4452
4453         if (!lancer_chip(adapter) && be_physfn(adapter))
4454                 be_cmd_get_fat_dump_len(adapter, &adapter->fat_dump_len);
4455
4456         if (BEx_chip(adapter)) {
4457                 level = be_cmd_get_fw_log_level(adapter);
4458                 adapter->msg_enable =
4459                         level <= FW_LOG_LEVEL_DEFAULT ? NETIF_MSG_HW : 0;
4460         }
4461
4462         be_cmd_get_acpi_wol_cap(adapter);
4463         pci_enable_wake(adapter->pdev, PCI_D3hot, adapter->wol_en);
4464         pci_enable_wake(adapter->pdev, PCI_D3cold, adapter->wol_en);
4465
4466         be_cmd_query_port_name(adapter);
4467
4468         if (be_physfn(adapter)) {
4469                 status = be_cmd_get_active_profile(adapter, &profile_id);
4470                 if (!status)
4471                         dev_info(&adapter->pdev->dev,
4472                                  "Using profile 0x%x\n", profile_id);
4473         }
4474
4475         return 0;
4476 }
4477
4478 static int be_mac_setup(struct be_adapter *adapter)
4479 {
4480         u8 mac[ETH_ALEN];
4481         int status;
4482
4483         if (is_zero_ether_addr(adapter->netdev->dev_addr)) {
4484                 status = be_cmd_get_perm_mac(adapter, mac);
4485                 if (status)
4486                         return status;
4487
4488                 memcpy(adapter->netdev->dev_addr, mac, ETH_ALEN);
4489                 memcpy(adapter->netdev->perm_addr, mac, ETH_ALEN);
4490
4491                 /* Initial MAC for BE3 VFs is already programmed by PF */
4492                 if (BEx_chip(adapter) && be_virtfn(adapter))
4493                         memcpy(adapter->dev_mac, mac, ETH_ALEN);
4494         }
4495
4496         return 0;
4497 }
4498
4499 static void be_schedule_worker(struct be_adapter *adapter)
4500 {
4501         queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
4502         adapter->flags |= BE_FLAGS_WORKER_SCHEDULED;
4503 }
4504
4505 static void be_destroy_err_recovery_workq(void)
4506 {
4507         if (!be_err_recovery_workq)
4508                 return;
4509
4510         flush_workqueue(be_err_recovery_workq);
4511         destroy_workqueue(be_err_recovery_workq);
4512         be_err_recovery_workq = NULL;
4513 }
4514
4515 static void be_schedule_err_detection(struct be_adapter *adapter, u32 delay)
4516 {
4517         struct be_error_recovery *err_rec = &adapter->error_recovery;
4518
4519         if (!be_err_recovery_workq)
4520                 return;
4521
4522         queue_delayed_work(be_err_recovery_workq, &err_rec->err_detection_work,
4523                            msecs_to_jiffies(delay));
4524         adapter->flags |= BE_FLAGS_ERR_DETECTION_SCHEDULED;
4525 }
4526
4527 static int be_setup_queues(struct be_adapter *adapter)
4528 {
4529         struct net_device *netdev = adapter->netdev;
4530         int status;
4531
4532         status = be_evt_queues_create(adapter);
4533         if (status)
4534                 goto err;
4535
4536         status = be_tx_qs_create(adapter);
4537         if (status)
4538                 goto err;
4539
4540         status = be_rx_cqs_create(adapter);
4541         if (status)
4542                 goto err;
4543
4544         status = be_mcc_queues_create(adapter);
4545         if (status)
4546                 goto err;
4547
4548         status = netif_set_real_num_rx_queues(netdev, adapter->num_rx_qs);
4549         if (status)
4550                 goto err;
4551
4552         status = netif_set_real_num_tx_queues(netdev, adapter->num_tx_qs);
4553         if (status)
4554                 goto err;
4555
4556         return 0;
4557 err:
4558         dev_err(&adapter->pdev->dev, "queue_setup failed\n");
4559         return status;
4560 }
4561
4562 static int be_if_create(struct be_adapter *adapter)
4563 {
4564         u32 en_flags = BE_IF_FLAGS_RSS | BE_IF_FLAGS_DEFQ_RSS;
4565         u32 cap_flags = be_if_cap_flags(adapter);
4566         int status;
4567
4568         /* alloc required memory for other filtering fields */
4569         adapter->pmac_id = kcalloc(be_max_uc(adapter),
4570                                    sizeof(*adapter->pmac_id), GFP_KERNEL);
4571         if (!adapter->pmac_id)
4572                 return -ENOMEM;
4573
4574         adapter->mc_list = kcalloc(be_max_mc(adapter),
4575                                    sizeof(*adapter->mc_list), GFP_KERNEL);
4576         if (!adapter->mc_list)
4577                 return -ENOMEM;
4578
4579         adapter->uc_list = kcalloc(be_max_uc(adapter),
4580                                    sizeof(*adapter->uc_list), GFP_KERNEL);
4581         if (!adapter->uc_list)
4582                 return -ENOMEM;
4583
4584         if (adapter->cfg_num_rx_irqs == 1)
4585                 cap_flags &= ~(BE_IF_FLAGS_DEFQ_RSS | BE_IF_FLAGS_RSS);
4586
4587         en_flags &= cap_flags;
4588         /* will enable all the needed filter flags in be_open() */
4589         status = be_cmd_if_create(adapter, be_if_cap_flags(adapter), en_flags,
4590                                   &adapter->if_handle, 0);
4591
4592         if (status)
4593                 return status;
4594
4595         return 0;
4596 }
4597
4598 int be_update_queues(struct be_adapter *adapter)
4599 {
4600         struct net_device *netdev = adapter->netdev;
4601         int status;
4602
4603         if (netif_running(netdev)) {
4604                 /* device cannot transmit now, avoid dev_watchdog timeouts */
4605                 netif_carrier_off(netdev);
4606
4607                 be_close(netdev);
4608         }
4609
4610         be_cancel_worker(adapter);
4611
4612         /* If any vectors have been shared with RoCE we cannot re-program
4613          * the MSIx table.
4614          */
4615         if (!adapter->num_msix_roce_vec)
4616                 be_msix_disable(adapter);
4617
4618         be_clear_queues(adapter);
4619         status = be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4620         if (status)
4621                 return status;
4622
4623         if (!msix_enabled(adapter)) {
4624                 status = be_msix_enable(adapter);
4625                 if (status)
4626                         return status;
4627         }
4628
4629         status = be_if_create(adapter);
4630         if (status)
4631                 return status;
4632
4633         status = be_setup_queues(adapter);
4634         if (status)
4635                 return status;
4636
4637         be_schedule_worker(adapter);
4638
4639         /*
4640          * The IF was destroyed and re-created. We need to clear
4641          * all promiscuous flags valid for the destroyed IF.
4642          * Without this promisc mode is not restored during
4643          * be_open() because the driver thinks that it is
4644          * already enabled in HW.
4645          */
4646         adapter->if_flags &= ~BE_IF_FLAGS_ALL_PROMISCUOUS;
4647
4648         if (netif_running(netdev))
4649                 status = be_open(netdev);
4650
4651         return status;
4652 }
4653
4654 static inline int fw_major_num(const char *fw_ver)
4655 {
4656         int fw_major = 0, i;
4657
4658         i = sscanf(fw_ver, "%d.", &fw_major);
4659         if (i != 1)
4660                 return 0;
4661
4662         return fw_major;
4663 }
4664
4665 /* If it is error recovery, FLR the PF
4666  * Else if any VFs are already enabled don't FLR the PF
4667  */
4668 static bool be_reset_required(struct be_adapter *adapter)
4669 {
4670         if (be_error_recovering(adapter))
4671                 return true;
4672         else
4673                 return pci_num_vf(adapter->pdev) == 0;
4674 }
4675
4676 /* Wait for the FW to be ready and perform the required initialization */
4677 static int be_func_init(struct be_adapter *adapter)
4678 {
4679         int status;
4680
4681         status = be_fw_wait_ready(adapter);
4682         if (status)
4683                 return status;
4684
4685         /* FW is now ready; clear errors to allow cmds/doorbell */
4686         be_clear_error(adapter, BE_CLEAR_ALL);
4687
4688         if (be_reset_required(adapter)) {
4689                 status = be_cmd_reset_function(adapter);
4690                 if (status)
4691                         return status;
4692
4693                 /* Wait for interrupts to quiesce after an FLR */
4694                 msleep(100);
4695         }
4696
4697         /* Tell FW we're ready to fire cmds */
4698         status = be_cmd_fw_init(adapter);
4699         if (status)
4700                 return status;
4701
4702         /* Allow interrupts for other ULPs running on NIC function */
4703         be_intr_set(adapter, true);
4704
4705         return 0;
4706 }
4707
4708 static int be_setup(struct be_adapter *adapter)
4709 {
4710         struct device *dev = &adapter->pdev->dev;
4711         int status;
4712
4713         status = be_func_init(adapter);
4714         if (status)
4715                 return status;
4716
4717         be_setup_init(adapter);
4718
4719         if (!lancer_chip(adapter))
4720                 be_cmd_req_native_mode(adapter);
4721
4722         /* invoke this cmd first to get pf_num and vf_num which are needed
4723          * for issuing profile related cmds
4724          */
4725         if (!BEx_chip(adapter)) {
4726                 status = be_cmd_get_func_config(adapter, NULL);
4727                 if (status)
4728                         return status;
4729         }
4730
4731         status = be_get_config(adapter);
4732         if (status)
4733                 goto err;
4734
4735         if (!BE2_chip(adapter) && be_physfn(adapter))
4736                 be_alloc_sriov_res(adapter);
4737
4738         status = be_get_resources(adapter);
4739         if (status)
4740                 goto err;
4741
4742         status = be_msix_enable(adapter);
4743         if (status)
4744                 goto err;
4745
4746         /* will enable all the needed filter flags in be_open() */
4747         status = be_if_create(adapter);
4748         if (status)
4749                 goto err;
4750
4751         /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
4752         rtnl_lock();
4753         status = be_setup_queues(adapter);
4754         rtnl_unlock();
4755         if (status)
4756                 goto err;
4757
4758         be_cmd_get_fn_privileges(adapter, &adapter->cmd_privileges, 0);
4759
4760         status = be_mac_setup(adapter);
4761         if (status)
4762                 goto err;
4763
4764         be_cmd_get_fw_ver(adapter);
4765         dev_info(dev, "FW version is %s\n", adapter->fw_ver);
4766
4767         if (BE2_chip(adapter) && fw_major_num(adapter->fw_ver) < 4) {
4768                 dev_err(dev, "Firmware on card is old(%s), IRQs may not work",
4769                         adapter->fw_ver);
4770                 dev_err(dev, "Please upgrade firmware to version >= 4.0\n");
4771         }
4772
4773         status = be_cmd_set_flow_control(adapter, adapter->tx_fc,
4774                                          adapter->rx_fc);
4775         if (status)
4776                 be_cmd_get_flow_control(adapter, &adapter->tx_fc,
4777                                         &adapter->rx_fc);
4778
4779         dev_info(&adapter->pdev->dev, "HW Flow control - TX:%d RX:%d\n",
4780                  adapter->tx_fc, adapter->rx_fc);
4781
4782         if (be_physfn(adapter))
4783                 be_cmd_set_logical_link_config(adapter,
4784                                                IFLA_VF_LINK_STATE_AUTO, 0);
4785
4786         /* BE3 EVB echoes broadcast/multicast packets back to PF's vport
4787          * confusing a linux bridge or OVS that it might be connected to.
4788          * Set the EVB to PASSTHRU mode which effectively disables the EVB
4789          * when SRIOV is not enabled.
4790          */
4791         if (BE3_chip(adapter))
4792                 be_cmd_set_hsw_config(adapter, 0, 0, adapter->if_handle,
4793                                       PORT_FWD_TYPE_PASSTHRU, 0);
4794
4795         if (adapter->num_vfs)
4796                 be_vf_setup(adapter);
4797
4798         status = be_cmd_get_phy_info(adapter);
4799         if (!status && be_pause_supported(adapter))
4800                 adapter->phy.fc_autoneg = 1;
4801
4802         if (be_physfn(adapter) && !lancer_chip(adapter))
4803                 be_cmd_set_features(adapter);
4804
4805         be_schedule_worker(adapter);
4806         adapter->flags |= BE_FLAGS_SETUP_DONE;
4807         return 0;
4808 err:
4809         be_clear(adapter);
4810         return status;
4811 }
4812
4813 #ifdef CONFIG_NET_POLL_CONTROLLER
4814 static void be_netpoll(struct net_device *netdev)
4815 {
4816         struct be_adapter *adapter = netdev_priv(netdev);
4817         struct be_eq_obj *eqo;
4818         int i;
4819
4820         for_all_evt_queues(adapter, eqo, i) {
4821                 be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
4822                 napi_schedule(&eqo->napi);
4823         }
4824 }
4825 #endif
4826
4827 int be_load_fw(struct be_adapter *adapter, u8 *fw_file)
4828 {
4829         const struct firmware *fw;
4830         int status;
4831
4832         if (!netif_running(adapter->netdev)) {
4833                 dev_err(&adapter->pdev->dev,
4834                         "Firmware load not allowed (interface is down)\n");
4835                 return -ENETDOWN;
4836         }
4837
4838         status = request_firmware(&fw, fw_file, &adapter->pdev->dev);
4839         if (status)
4840                 goto fw_exit;
4841
4842         dev_info(&adapter->pdev->dev, "Flashing firmware file %s\n", fw_file);
4843
4844         if (lancer_chip(adapter))
4845                 status = lancer_fw_download(adapter, fw);
4846         else
4847                 status = be_fw_download(adapter, fw);
4848
4849         if (!status)
4850                 be_cmd_get_fw_ver(adapter);
4851
4852 fw_exit:
4853         release_firmware(fw);
4854         return status;
4855 }
4856
4857 static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
4858                                  u16 flags)
4859 {
4860         struct be_adapter *adapter = netdev_priv(dev);
4861         struct nlattr *attr, *br_spec;
4862         int rem;
4863         int status = 0;
4864         u16 mode = 0;
4865
4866         if (!sriov_enabled(adapter))
4867                 return -EOPNOTSUPP;
4868
4869         br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
4870         if (!br_spec)
4871                 return -EINVAL;
4872
4873         nla_for_each_nested(attr, br_spec, rem) {
4874                 if (nla_type(attr) != IFLA_BRIDGE_MODE)
4875                         continue;
4876
4877                 if (nla_len(attr) < sizeof(mode))
4878                         return -EINVAL;
4879
4880                 mode = nla_get_u16(attr);
4881                 if (BE3_chip(adapter) && mode == BRIDGE_MODE_VEPA)
4882                         return -EOPNOTSUPP;
4883
4884                 if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB)
4885                         return -EINVAL;
4886
4887                 status = be_cmd_set_hsw_config(adapter, 0, 0,
4888                                                adapter->if_handle,
4889                                                mode == BRIDGE_MODE_VEPA ?
4890                                                PORT_FWD_TYPE_VEPA :
4891                                                PORT_FWD_TYPE_VEB, 0);
4892                 if (status)
4893                         goto err;
4894
4895                 dev_info(&adapter->pdev->dev, "enabled switch mode: %s\n",
4896                          mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4897
4898                 return status;
4899         }
4900 err:
4901         dev_err(&adapter->pdev->dev, "Failed to set switch mode %s\n",
4902                 mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4903
4904         return status;
4905 }
4906
4907 static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
4908                                  struct net_device *dev, u32 filter_mask,
4909                                  int nlflags)
4910 {
4911         struct be_adapter *adapter = netdev_priv(dev);
4912         int status = 0;
4913         u8 hsw_mode;
4914
4915         /* BE and Lancer chips support VEB mode only */
4916         if (BEx_chip(adapter) || lancer_chip(adapter)) {
4917                 /* VEB is disabled in non-SR-IOV profiles on BE3/Lancer */
4918                 if (!pci_sriov_get_totalvfs(adapter->pdev))
4919                         return 0;
4920                 hsw_mode = PORT_FWD_TYPE_VEB;
4921         } else {
4922                 status = be_cmd_get_hsw_config(adapter, NULL, 0,
4923                                                adapter->if_handle, &hsw_mode,
4924                                                NULL);
4925                 if (status)
4926                         return 0;
4927
4928                 if (hsw_mode == PORT_FWD_TYPE_PASSTHRU)
4929                         return 0;
4930         }
4931
4932         return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
4933                                        hsw_mode == PORT_FWD_TYPE_VEPA ?
4934                                        BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB,
4935                                        0, 0, nlflags, filter_mask, NULL);
4936 }
4937
4938 static struct be_cmd_work *be_alloc_work(struct be_adapter *adapter,
4939                                          void (*func)(struct work_struct *))
4940 {
4941         struct be_cmd_work *work;
4942
4943         work = kzalloc(sizeof(*work), GFP_ATOMIC);
4944         if (!work) {
4945                 dev_err(&adapter->pdev->dev,
4946                         "be_work memory allocation failed\n");
4947                 return NULL;
4948         }
4949
4950         INIT_WORK(&work->work, func);
4951         work->adapter = adapter;
4952         return work;
4953 }
4954
4955 /* VxLAN offload Notes:
4956  *
4957  * The stack defines tunnel offload flags (hw_enc_features) for IP and doesn't
4958  * distinguish various types of transports (VxLAN, GRE, NVGRE ..). So, offload
4959  * is expected to work across all types of IP tunnels once exported. Skyhawk
4960  * supports offloads for either VxLAN or NVGRE, exclusively. So we export VxLAN
4961  * offloads in hw_enc_features only when a VxLAN port is added. If other (non
4962  * VxLAN) tunnels are configured while VxLAN offloads are enabled, offloads for
4963  * those other tunnels are unexported on the fly through ndo_features_check().
4964  *
4965  * Skyhawk supports VxLAN offloads only for one UDP dport. So, if the stack
4966  * adds more than one port, disable offloads and re-enable them again when
4967  * there's only one port left. We maintain a list of ports for this purpose.
4968  */
4969 static void be_work_add_vxlan_port(struct work_struct *work)
4970 {
4971         struct be_cmd_work *cmd_work =
4972                                 container_of(work, struct be_cmd_work, work);
4973         struct be_adapter *adapter = cmd_work->adapter;
4974         struct device *dev = &adapter->pdev->dev;
4975         __be16 port = cmd_work->info.vxlan_port;
4976         struct be_vxlan_port *vxlan_port;
4977         int status;
4978
4979         /* Bump up the alias count if it is an existing port */
4980         list_for_each_entry(vxlan_port, &adapter->vxlan_port_list, list) {
4981                 if (vxlan_port->port == port) {
4982                         vxlan_port->port_aliases++;
4983                         goto done;
4984                 }
4985         }
4986
4987         /* Add a new port to our list. We don't need a lock here since port
4988          * add/delete are done only in the context of a single-threaded work
4989          * queue (be_wq).
4990          */
4991         vxlan_port = kzalloc(sizeof(*vxlan_port), GFP_KERNEL);
4992         if (!vxlan_port)
4993                 goto done;
4994
4995         vxlan_port->port = port;
4996         INIT_LIST_HEAD(&vxlan_port->list);
4997         list_add_tail(&vxlan_port->list, &adapter->vxlan_port_list);
4998         adapter->vxlan_port_count++;
4999
5000         if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS) {
5001                 dev_info(dev,
5002                          "Only one UDP port supported for VxLAN offloads\n");
5003                 dev_info(dev, "Disabling VxLAN offloads\n");
5004                 goto err;
5005         }
5006
5007         if (adapter->vxlan_port_count > 1)
5008                 goto done;
5009
5010         status = be_enable_vxlan_offloads(adapter);
5011         if (!status)
5012                 goto done;
5013
5014 err:
5015         be_disable_vxlan_offloads(adapter);
5016 done:
5017         kfree(cmd_work);
5018         return;
5019 }
5020
5021 static void be_work_del_vxlan_port(struct work_struct *work)
5022 {
5023         struct be_cmd_work *cmd_work =
5024                                 container_of(work, struct be_cmd_work, work);
5025         struct be_adapter *adapter = cmd_work->adapter;
5026         __be16 port = cmd_work->info.vxlan_port;
5027         struct be_vxlan_port *vxlan_port;
5028
5029         /* Nothing to be done if a port alias is being deleted */
5030         list_for_each_entry(vxlan_port, &adapter->vxlan_port_list, list) {
5031                 if (vxlan_port->port == port) {
5032                         if (vxlan_port->port_aliases) {
5033                                 vxlan_port->port_aliases--;
5034                                 goto done;
5035                         }
5036                         break;
5037                 }
5038         }
5039
5040         /* No port aliases left; delete the port from the list */
5041         list_del(&vxlan_port->list);
5042         adapter->vxlan_port_count--;
5043
5044         /* Disable VxLAN offload if this is the offloaded port */
5045         if (adapter->vxlan_port == vxlan_port->port) {
5046                 WARN_ON(adapter->vxlan_port_count);
5047                 be_disable_vxlan_offloads(adapter);
5048                 dev_info(&adapter->pdev->dev,
5049                          "Disabled VxLAN offloads for UDP port %d\n",
5050                          be16_to_cpu(port));
5051                 goto out;
5052         }
5053
5054         /* If only 1 port is left, re-enable VxLAN offload */
5055         if (adapter->vxlan_port_count == 1)
5056                 be_enable_vxlan_offloads(adapter);
5057
5058 out:
5059         kfree(vxlan_port);
5060 done:
5061         kfree(cmd_work);
5062 }
5063
5064 static void be_cfg_vxlan_port(struct net_device *netdev,
5065                               struct udp_tunnel_info *ti,
5066                               void (*func)(struct work_struct *))
5067 {
5068         struct be_adapter *adapter = netdev_priv(netdev);
5069         struct be_cmd_work *cmd_work;
5070
5071         if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
5072                 return;
5073
5074         if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter))
5075                 return;
5076
5077         cmd_work = be_alloc_work(adapter, func);
5078         if (cmd_work) {
5079                 cmd_work->info.vxlan_port = ti->port;
5080                 queue_work(be_wq, &cmd_work->work);
5081         }
5082 }
5083
5084 static void be_del_vxlan_port(struct net_device *netdev,
5085                               struct udp_tunnel_info *ti)
5086 {
5087         be_cfg_vxlan_port(netdev, ti, be_work_del_vxlan_port);
5088 }
5089
5090 static void be_add_vxlan_port(struct net_device *netdev,
5091                               struct udp_tunnel_info *ti)
5092 {
5093         be_cfg_vxlan_port(netdev, ti, be_work_add_vxlan_port);
5094 }
5095
5096 static netdev_features_t be_features_check(struct sk_buff *skb,
5097                                            struct net_device *dev,
5098                                            netdev_features_t features)
5099 {
5100         struct be_adapter *adapter = netdev_priv(dev);
5101         u8 l4_hdr = 0;
5102
5103         if (skb_is_gso(skb)) {
5104                 /* IPv6 TSO requests with extension hdrs are a problem
5105                  * to Lancer and BE3 HW. Disable TSO6 feature.
5106                  */
5107                 if (!skyhawk_chip(adapter) && is_ipv6_ext_hdr(skb))
5108                         features &= ~NETIF_F_TSO6;
5109
5110                 /* Lancer cannot handle the packet with MSS less than 256.
5111                  * Disable the GSO support in such cases
5112                  */
5113                 if (lancer_chip(adapter) && skb_shinfo(skb)->gso_size < 256)
5114                         features &= ~NETIF_F_GSO_MASK;
5115         }
5116
5117         /* The code below restricts offload features for some tunneled and
5118          * Q-in-Q packets.
5119          * Offload features for normal (non tunnel) packets are unchanged.
5120          */
5121         features = vlan_features_check(skb, features);
5122         if (!skb->encapsulation ||
5123             !(adapter->flags & BE_FLAGS_VXLAN_OFFLOADS))
5124                 return features;
5125
5126         /* It's an encapsulated packet and VxLAN offloads are enabled. We
5127          * should disable tunnel offload features if it's not a VxLAN packet,
5128          * as tunnel offloads have been enabled only for VxLAN. This is done to
5129          * allow other tunneled traffic like GRE work fine while VxLAN
5130          * offloads are configured in Skyhawk-R.
5131          */
5132         switch (vlan_get_protocol(skb)) {
5133         case htons(ETH_P_IP):
5134                 l4_hdr = ip_hdr(skb)->protocol;
5135                 break;
5136         case htons(ETH_P_IPV6):
5137                 l4_hdr = ipv6_hdr(skb)->nexthdr;
5138                 break;
5139         default:
5140                 return features;
5141         }
5142
5143         if (l4_hdr != IPPROTO_UDP ||
5144             skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
5145             skb->inner_protocol != htons(ETH_P_TEB) ||
5146             skb_inner_mac_header(skb) - skb_transport_header(skb) !=
5147                 sizeof(struct udphdr) + sizeof(struct vxlanhdr) ||
5148             !adapter->vxlan_port ||
5149             udp_hdr(skb)->dest != adapter->vxlan_port)
5150                 return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
5151
5152         return features;
5153 }
5154
5155 static int be_get_phys_port_id(struct net_device *dev,
5156                                struct netdev_phys_item_id *ppid)
5157 {
5158         int i, id_len = CNTL_SERIAL_NUM_WORDS * CNTL_SERIAL_NUM_WORD_SZ + 1;
5159         struct be_adapter *adapter = netdev_priv(dev);
5160         u8 *id;
5161
5162         if (MAX_PHYS_ITEM_ID_LEN < id_len)
5163                 return -ENOSPC;
5164
5165         ppid->id[0] = adapter->hba_port_num + 1;
5166         id = &ppid->id[1];
5167         for (i = CNTL_SERIAL_NUM_WORDS - 1; i >= 0;
5168              i--, id += CNTL_SERIAL_NUM_WORD_SZ)
5169                 memcpy(id, &adapter->serial_num[i], CNTL_SERIAL_NUM_WORD_SZ);
5170
5171         ppid->id_len = id_len;
5172
5173         return 0;
5174 }
5175
5176 static void be_set_rx_mode(struct net_device *dev)
5177 {
5178         struct be_adapter *adapter = netdev_priv(dev);
5179         struct be_cmd_work *work;
5180
5181         work = be_alloc_work(adapter, be_work_set_rx_mode);
5182         if (work)
5183                 queue_work(be_wq, &work->work);
5184 }
5185
5186 static const struct net_device_ops be_netdev_ops = {
5187         .ndo_open               = be_open,
5188         .ndo_stop               = be_close,
5189         .ndo_start_xmit         = be_xmit,
5190         .ndo_set_rx_mode        = be_set_rx_mode,
5191         .ndo_set_mac_address    = be_mac_addr_set,
5192         .ndo_get_stats64        = be_get_stats64,
5193         .ndo_validate_addr      = eth_validate_addr,
5194         .ndo_vlan_rx_add_vid    = be_vlan_add_vid,
5195         .ndo_vlan_rx_kill_vid   = be_vlan_rem_vid,
5196         .ndo_set_vf_mac         = be_set_vf_mac,
5197         .ndo_set_vf_vlan        = be_set_vf_vlan,
5198         .ndo_set_vf_rate        = be_set_vf_tx_rate,
5199         .ndo_get_vf_config      = be_get_vf_config,
5200         .ndo_set_vf_link_state  = be_set_vf_link_state,
5201         .ndo_set_vf_spoofchk    = be_set_vf_spoofchk,
5202 #ifdef CONFIG_NET_POLL_CONTROLLER
5203         .ndo_poll_controller    = be_netpoll,
5204 #endif
5205         .ndo_bridge_setlink     = be_ndo_bridge_setlink,
5206         .ndo_bridge_getlink     = be_ndo_bridge_getlink,
5207         .ndo_udp_tunnel_add     = be_add_vxlan_port,
5208         .ndo_udp_tunnel_del     = be_del_vxlan_port,
5209         .ndo_features_check     = be_features_check,
5210         .ndo_get_phys_port_id   = be_get_phys_port_id,
5211 };
5212
5213 static void be_netdev_init(struct net_device *netdev)
5214 {
5215         struct be_adapter *adapter = netdev_priv(netdev);
5216
5217         netdev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5218                 NETIF_F_GSO_UDP_TUNNEL |
5219                 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
5220                 NETIF_F_HW_VLAN_CTAG_TX;
5221         if ((be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS))
5222                 netdev->hw_features |= NETIF_F_RXHASH;
5223
5224         netdev->features |= netdev->hw_features |
5225                 NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER;
5226
5227         netdev->vlan_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5228                 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
5229
5230         netdev->priv_flags |= IFF_UNICAST_FLT;
5231
5232         netdev->flags |= IFF_MULTICAST;
5233
5234         netif_set_gso_max_size(netdev, BE_MAX_GSO_SIZE - ETH_HLEN);
5235
5236         netdev->netdev_ops = &be_netdev_ops;
5237
5238         netdev->ethtool_ops = &be_ethtool_ops;
5239
5240         /* MTU range: 256 - 9000 */
5241         netdev->min_mtu = BE_MIN_MTU;
5242         netdev->max_mtu = BE_MAX_MTU;
5243 }
5244
5245 static void be_cleanup(struct be_adapter *adapter)
5246 {
5247         struct net_device *netdev = adapter->netdev;
5248
5249         rtnl_lock();
5250         netif_device_detach(netdev);
5251         if (netif_running(netdev))
5252                 be_close(netdev);
5253         rtnl_unlock();
5254
5255         be_clear(adapter);
5256 }
5257
5258 static int be_resume(struct be_adapter *adapter)
5259 {
5260         struct net_device *netdev = adapter->netdev;
5261         int status;
5262
5263         status = be_setup(adapter);
5264         if (status)
5265                 return status;
5266
5267         rtnl_lock();
5268         if (netif_running(netdev))
5269                 status = be_open(netdev);
5270         rtnl_unlock();
5271
5272         if (status)
5273                 return status;
5274
5275         netif_device_attach(netdev);
5276
5277         return 0;
5278 }
5279
5280 static void be_soft_reset(struct be_adapter *adapter)
5281 {
5282         u32 val;
5283
5284         dev_info(&adapter->pdev->dev, "Initiating chip soft reset\n");
5285         val = ioread32(adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5286         val |= SLIPORT_SOFTRESET_SR_MASK;
5287         iowrite32(val, adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5288 }
5289
5290 static bool be_err_is_recoverable(struct be_adapter *adapter)
5291 {
5292         struct be_error_recovery *err_rec = &adapter->error_recovery;
5293         unsigned long initial_idle_time =
5294                 msecs_to_jiffies(ERR_RECOVERY_IDLE_TIME);
5295         unsigned long recovery_interval =
5296                 msecs_to_jiffies(ERR_RECOVERY_INTERVAL);
5297         u16 ue_err_code;
5298         u32 val;
5299
5300         val = be_POST_stage_get(adapter);
5301         if ((val & POST_STAGE_RECOVERABLE_ERR) != POST_STAGE_RECOVERABLE_ERR)
5302                 return false;
5303         ue_err_code = val & POST_ERR_RECOVERY_CODE_MASK;
5304         if (ue_err_code == 0)
5305                 return false;
5306
5307         dev_err(&adapter->pdev->dev, "Recoverable HW error code: 0x%x\n",
5308                 ue_err_code);
5309
5310         if (time_before_eq(jiffies - err_rec->probe_time, initial_idle_time)) {
5311                 dev_err(&adapter->pdev->dev,
5312                         "Cannot recover within %lu sec from driver load\n",
5313                         jiffies_to_msecs(initial_idle_time) / MSEC_PER_SEC);
5314                 return false;
5315         }
5316
5317         if (err_rec->last_recovery_time && time_before_eq(
5318                 jiffies - err_rec->last_recovery_time, recovery_interval)) {
5319                 dev_err(&adapter->pdev->dev,
5320                         "Cannot recover within %lu sec from last recovery\n",
5321                         jiffies_to_msecs(recovery_interval) / MSEC_PER_SEC);
5322                 return false;
5323         }
5324
5325         if (ue_err_code == err_rec->last_err_code) {
5326                 dev_err(&adapter->pdev->dev,
5327                         "Cannot recover from a consecutive TPE error\n");
5328                 return false;
5329         }
5330
5331         err_rec->last_recovery_time = jiffies;
5332         err_rec->last_err_code = ue_err_code;
5333         return true;
5334 }
5335
5336 static int be_tpe_recover(struct be_adapter *adapter)
5337 {
5338         struct be_error_recovery *err_rec = &adapter->error_recovery;
5339         int status = -EAGAIN;
5340         u32 val;
5341
5342         switch (err_rec->recovery_state) {
5343         case ERR_RECOVERY_ST_NONE:
5344                 err_rec->recovery_state = ERR_RECOVERY_ST_DETECT;
5345                 err_rec->resched_delay = ERR_RECOVERY_UE_DETECT_DURATION;
5346                 break;
5347
5348         case ERR_RECOVERY_ST_DETECT:
5349                 val = be_POST_stage_get(adapter);
5350                 if ((val & POST_STAGE_RECOVERABLE_ERR) !=
5351                     POST_STAGE_RECOVERABLE_ERR) {
5352                         dev_err(&adapter->pdev->dev,
5353                                 "Unrecoverable HW error detected: 0x%x\n", val);
5354                         status = -EINVAL;
5355                         err_rec->resched_delay = 0;
5356                         break;
5357                 }
5358
5359                 dev_err(&adapter->pdev->dev, "Recoverable HW error detected\n");
5360
5361                 /* Only PF0 initiates Chip Soft Reset. But PF0 must wait UE2SR
5362                  * milliseconds before it checks for final error status in
5363                  * SLIPORT_SEMAPHORE to determine if recovery criteria is met.
5364                  * If it does, then PF0 initiates a Soft Reset.
5365                  */
5366                 if (adapter->pf_num == 0) {
5367                         err_rec->recovery_state = ERR_RECOVERY_ST_RESET;
5368                         err_rec->resched_delay = err_rec->ue_to_reset_time -
5369                                         ERR_RECOVERY_UE_DETECT_DURATION;
5370                         break;
5371                 }
5372
5373                 err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5374                 err_rec->resched_delay = err_rec->ue_to_poll_time -
5375                                         ERR_RECOVERY_UE_DETECT_DURATION;
5376                 break;
5377
5378         case ERR_RECOVERY_ST_RESET:
5379                 if (!be_err_is_recoverable(adapter)) {
5380                         dev_err(&adapter->pdev->dev,
5381                                 "Failed to meet recovery criteria\n");
5382                         status = -EIO;
5383                         err_rec->resched_delay = 0;
5384                         break;
5385                 }
5386                 be_soft_reset(adapter);
5387                 err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5388                 err_rec->resched_delay = err_rec->ue_to_poll_time -
5389                                         err_rec->ue_to_reset_time;
5390                 break;
5391
5392         case ERR_RECOVERY_ST_PRE_POLL:
5393                 err_rec->recovery_state = ERR_RECOVERY_ST_REINIT;
5394                 err_rec->resched_delay = 0;
5395                 status = 0;                     /* done */
5396                 break;
5397
5398         default:
5399                 status = -EINVAL;
5400                 err_rec->resched_delay = 0;
5401                 break;
5402         }
5403
5404         return status;
5405 }
5406
5407 static int be_err_recover(struct be_adapter *adapter)
5408 {
5409         int status;
5410
5411         if (!lancer_chip(adapter)) {
5412                 if (!adapter->error_recovery.recovery_supported ||
5413                     adapter->priv_flags & BE_DISABLE_TPE_RECOVERY)
5414                         return -EIO;
5415                 status = be_tpe_recover(adapter);
5416                 if (status)
5417                         goto err;
5418         }
5419
5420         /* Wait for adapter to reach quiescent state before
5421          * destroying queues
5422          */
5423         status = be_fw_wait_ready(adapter);
5424         if (status)
5425                 goto err;
5426
5427         adapter->flags |= BE_FLAGS_TRY_RECOVERY;
5428
5429         be_cleanup(adapter);
5430
5431         status = be_resume(adapter);
5432         if (status)
5433                 goto err;
5434
5435         adapter->flags &= ~BE_FLAGS_TRY_RECOVERY;
5436
5437 err:
5438         return status;
5439 }
5440
5441 static void be_err_detection_task(struct work_struct *work)
5442 {
5443         struct be_error_recovery *err_rec =
5444                         container_of(work, struct be_error_recovery,
5445                                      err_detection_work.work);
5446         struct be_adapter *adapter =
5447                         container_of(err_rec, struct be_adapter,
5448                                      error_recovery);
5449         u32 resched_delay = ERR_RECOVERY_DETECTION_DELAY;
5450         struct device *dev = &adapter->pdev->dev;
5451         int recovery_status;
5452
5453         be_detect_error(adapter);
5454         if (!be_check_error(adapter, BE_ERROR_HW))
5455                 goto reschedule_task;
5456
5457         recovery_status = be_err_recover(adapter);
5458         if (!recovery_status) {
5459                 err_rec->recovery_retries = 0;
5460                 err_rec->recovery_state = ERR_RECOVERY_ST_NONE;
5461                 dev_info(dev, "Adapter recovery successful\n");
5462                 goto reschedule_task;
5463         } else if (!lancer_chip(adapter) && err_rec->resched_delay) {
5464                 /* BEx/SH recovery state machine */
5465                 if (adapter->pf_num == 0 &&
5466                     err_rec->recovery_state > ERR_RECOVERY_ST_DETECT)
5467                         dev_err(&adapter->pdev->dev,
5468                                 "Adapter recovery in progress\n");
5469                 resched_delay = err_rec->resched_delay;
5470                 goto reschedule_task;
5471         } else if (lancer_chip(adapter) && be_virtfn(adapter)) {
5472                 /* For VFs, check if PF have allocated resources
5473                  * every second.
5474                  */
5475                 dev_err(dev, "Re-trying adapter recovery\n");
5476                 goto reschedule_task;
5477         } else if (lancer_chip(adapter) && err_rec->recovery_retries++ <
5478                    ERR_RECOVERY_MAX_RETRY_COUNT) {
5479                 /* In case of another error during recovery, it takes 30 sec
5480                  * for adapter to come out of error. Retry error recovery after
5481                  * this time interval.
5482                  */
5483                 dev_err(&adapter->pdev->dev, "Re-trying adapter recovery\n");
5484                 resched_delay = ERR_RECOVERY_RETRY_DELAY;
5485                 goto reschedule_task;
5486         } else {
5487                 dev_err(dev, "Adapter recovery failed\n");
5488                 dev_err(dev, "Please reboot server to recover\n");
5489         }
5490
5491         return;
5492
5493 reschedule_task:
5494         be_schedule_err_detection(adapter, resched_delay);
5495 }
5496
5497 static void be_log_sfp_info(struct be_adapter *adapter)
5498 {
5499         int status;
5500
5501         status = be_cmd_query_sfp_info(adapter);
5502         if (!status) {
5503                 dev_err(&adapter->pdev->dev,
5504                         "Port %c: %s Vendor: %s part no: %s",
5505                         adapter->port_name,
5506                         be_misconfig_evt_port_state[adapter->phy_state],
5507                         adapter->phy.vendor_name,
5508                         adapter->phy.vendor_pn);
5509         }
5510         adapter->flags &= ~BE_FLAGS_PHY_MISCONFIGURED;
5511 }
5512
5513 static void be_worker(struct work_struct *work)
5514 {
5515         struct be_adapter *adapter =
5516                 container_of(work, struct be_adapter, work.work);
5517         struct be_rx_obj *rxo;
5518         int i;
5519
5520         if (be_physfn(adapter) &&
5521             MODULO(adapter->work_counter, adapter->be_get_temp_freq) == 0)
5522                 be_cmd_get_die_temperature(adapter);
5523
5524         /* when interrupts are not yet enabled, just reap any pending
5525          * mcc completions
5526          */
5527         if (!netif_running(adapter->netdev)) {
5528                 local_bh_disable();
5529                 be_process_mcc(adapter);
5530                 local_bh_enable();
5531                 goto reschedule;
5532         }
5533
5534         if (!adapter->stats_cmd_sent) {
5535                 if (lancer_chip(adapter))
5536                         lancer_cmd_get_pport_stats(adapter,
5537                                                    &adapter->stats_cmd);
5538                 else
5539                         be_cmd_get_stats(adapter, &adapter->stats_cmd);
5540         }
5541
5542         for_all_rx_queues(adapter, rxo, i) {
5543                 /* Replenish RX-queues starved due to memory
5544                  * allocation failures.
5545                  */
5546                 if (rxo->rx_post_starved)
5547                         be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST);
5548         }
5549
5550         /* EQ-delay update for Skyhawk is done while notifying EQ */
5551         if (!skyhawk_chip(adapter))
5552                 be_eqd_update(adapter, false);
5553
5554         if (adapter->flags & BE_FLAGS_PHY_MISCONFIGURED)
5555                 be_log_sfp_info(adapter);
5556
5557 reschedule:
5558         adapter->work_counter++;
5559         queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
5560 }
5561
5562 static void be_unmap_pci_bars(struct be_adapter *adapter)
5563 {
5564         if (adapter->csr)
5565                 pci_iounmap(adapter->pdev, adapter->csr);
5566         if (adapter->db)
5567                 pci_iounmap(adapter->pdev, adapter->db);
5568         if (adapter->pcicfg && adapter->pcicfg_mapped)
5569                 pci_iounmap(adapter->pdev, adapter->pcicfg);
5570 }
5571
5572 static int db_bar(struct be_adapter *adapter)
5573 {
5574         if (lancer_chip(adapter) || be_virtfn(adapter))
5575                 return 0;
5576         else
5577                 return 4;
5578 }
5579
5580 static int be_roce_map_pci_bars(struct be_adapter *adapter)
5581 {
5582         if (skyhawk_chip(adapter)) {
5583                 adapter->roce_db.size = 4096;
5584                 adapter->roce_db.io_addr = pci_resource_start(adapter->pdev,
5585                                                               db_bar(adapter));
5586                 adapter->roce_db.total_size = pci_resource_len(adapter->pdev,
5587                                                                db_bar(adapter));
5588         }
5589         return 0;
5590 }
5591
5592 static int be_map_pci_bars(struct be_adapter *adapter)
5593 {
5594         struct pci_dev *pdev = adapter->pdev;
5595         u8 __iomem *addr;
5596         u32 sli_intf;
5597
5598         pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf);
5599         adapter->sli_family = (sli_intf & SLI_INTF_FAMILY_MASK) >>
5600                                 SLI_INTF_FAMILY_SHIFT;
5601         adapter->virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0;
5602
5603         if (BEx_chip(adapter) && be_physfn(adapter)) {
5604                 adapter->csr = pci_iomap(pdev, 2, 0);
5605                 if (!adapter->csr)
5606                         return -ENOMEM;
5607         }
5608
5609         addr = pci_iomap(pdev, db_bar(adapter), 0);
5610         if (!addr)
5611                 goto pci_map_err;
5612         adapter->db = addr;
5613
5614         if (skyhawk_chip(adapter) || BEx_chip(adapter)) {
5615                 if (be_physfn(adapter)) {
5616                         /* PCICFG is the 2nd BAR in BE2 */
5617                         addr = pci_iomap(pdev, BE2_chip(adapter) ? 1 : 0, 0);
5618                         if (!addr)
5619                                 goto pci_map_err;
5620                         adapter->pcicfg = addr;
5621                         adapter->pcicfg_mapped = true;
5622                 } else {
5623                         adapter->pcicfg = adapter->db + SRIOV_VF_PCICFG_OFFSET;
5624                         adapter->pcicfg_mapped = false;
5625                 }
5626         }
5627
5628         be_roce_map_pci_bars(adapter);
5629         return 0;
5630
5631 pci_map_err:
5632         dev_err(&pdev->dev, "Error in mapping PCI BARs\n");
5633         be_unmap_pci_bars(adapter);
5634         return -ENOMEM;
5635 }
5636
5637 static void be_drv_cleanup(struct be_adapter *adapter)
5638 {
5639         struct be_dma_mem *mem = &adapter->mbox_mem_alloced;
5640         struct device *dev = &adapter->pdev->dev;
5641
5642         if (mem->va)
5643                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5644
5645         mem = &adapter->rx_filter;
5646         if (mem->va)
5647                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5648
5649         mem = &adapter->stats_cmd;
5650         if (mem->va)
5651                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5652 }
5653
5654 /* Allocate and initialize various fields in be_adapter struct */
5655 static int be_drv_init(struct be_adapter *adapter)
5656 {
5657         struct be_dma_mem *mbox_mem_alloc = &adapter->mbox_mem_alloced;
5658         struct be_dma_mem *mbox_mem_align = &adapter->mbox_mem;
5659         struct be_dma_mem *rx_filter = &adapter->rx_filter;
5660         struct be_dma_mem *stats_cmd = &adapter->stats_cmd;
5661         struct device *dev = &adapter->pdev->dev;
5662         int status = 0;
5663
5664         mbox_mem_alloc->size = sizeof(struct be_mcc_mailbox) + 16;
5665         mbox_mem_alloc->va = dma_zalloc_coherent(dev, mbox_mem_alloc->size,
5666                                                  &mbox_mem_alloc->dma,
5667                                                  GFP_KERNEL);
5668         if (!mbox_mem_alloc->va)
5669                 return -ENOMEM;
5670
5671         mbox_mem_align->size = sizeof(struct be_mcc_mailbox);
5672         mbox_mem_align->va = PTR_ALIGN(mbox_mem_alloc->va, 16);
5673         mbox_mem_align->dma = PTR_ALIGN(mbox_mem_alloc->dma, 16);
5674
5675         rx_filter->size = sizeof(struct be_cmd_req_rx_filter);
5676         rx_filter->va = dma_zalloc_coherent(dev, rx_filter->size,
5677                                             &rx_filter->dma, GFP_KERNEL);
5678         if (!rx_filter->va) {
5679                 status = -ENOMEM;
5680                 goto free_mbox;
5681         }
5682
5683         if (lancer_chip(adapter))
5684                 stats_cmd->size = sizeof(struct lancer_cmd_req_pport_stats);
5685         else if (BE2_chip(adapter))
5686                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v0);
5687         else if (BE3_chip(adapter))
5688                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v1);
5689         else
5690                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v2);
5691         stats_cmd->va = dma_zalloc_coherent(dev, stats_cmd->size,
5692                                             &stats_cmd->dma, GFP_KERNEL);
5693         if (!stats_cmd->va) {
5694                 status = -ENOMEM;
5695                 goto free_rx_filter;
5696         }
5697
5698         mutex_init(&adapter->mbox_lock);
5699         mutex_init(&adapter->mcc_lock);
5700         mutex_init(&adapter->rx_filter_lock);
5701         spin_lock_init(&adapter->mcc_cq_lock);
5702         init_completion(&adapter->et_cmd_compl);
5703
5704         pci_save_state(adapter->pdev);
5705
5706         INIT_DELAYED_WORK(&adapter->work, be_worker);
5707
5708         adapter->error_recovery.recovery_state = ERR_RECOVERY_ST_NONE;
5709         adapter->error_recovery.resched_delay = 0;
5710         INIT_DELAYED_WORK(&adapter->error_recovery.err_detection_work,
5711                           be_err_detection_task);
5712
5713         adapter->rx_fc = true;
5714         adapter->tx_fc = true;
5715
5716         /* Must be a power of 2 or else MODULO will BUG_ON */
5717         adapter->be_get_temp_freq = 64;
5718
5719         INIT_LIST_HEAD(&adapter->vxlan_port_list);
5720         return 0;
5721
5722 free_rx_filter:
5723         dma_free_coherent(dev, rx_filter->size, rx_filter->va, rx_filter->dma);
5724 free_mbox:
5725         dma_free_coherent(dev, mbox_mem_alloc->size, mbox_mem_alloc->va,
5726                           mbox_mem_alloc->dma);
5727         return status;
5728 }
5729
5730 static void be_remove(struct pci_dev *pdev)
5731 {
5732         struct be_adapter *adapter = pci_get_drvdata(pdev);
5733
5734         if (!adapter)
5735                 return;
5736
5737         be_roce_dev_remove(adapter);
5738         be_intr_set(adapter, false);
5739
5740         be_cancel_err_detection(adapter);
5741
5742         unregister_netdev(adapter->netdev);
5743
5744         be_clear(adapter);
5745
5746         if (!pci_vfs_assigned(adapter->pdev))
5747                 be_cmd_reset_function(adapter);
5748
5749         /* tell fw we're done with firing cmds */
5750         be_cmd_fw_clean(adapter);
5751
5752         be_unmap_pci_bars(adapter);
5753         be_drv_cleanup(adapter);
5754
5755         pci_disable_pcie_error_reporting(pdev);
5756
5757         pci_release_regions(pdev);
5758         pci_disable_device(pdev);
5759
5760         free_netdev(adapter->netdev);
5761 }
5762
5763 static ssize_t be_hwmon_show_temp(struct device *dev,
5764                                   struct device_attribute *dev_attr,
5765                                   char *buf)
5766 {
5767         struct be_adapter *adapter = dev_get_drvdata(dev);
5768
5769         /* Unit: millidegree Celsius */
5770         if (adapter->hwmon_info.be_on_die_temp == BE_INVALID_DIE_TEMP)
5771                 return -EIO;
5772         else
5773                 return sprintf(buf, "%u\n",
5774                                adapter->hwmon_info.be_on_die_temp * 1000);
5775 }
5776
5777 static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO,
5778                           be_hwmon_show_temp, NULL, 1);
5779
5780 static struct attribute *be_hwmon_attrs[] = {
5781         &sensor_dev_attr_temp1_input.dev_attr.attr,
5782         NULL
5783 };
5784
5785 ATTRIBUTE_GROUPS(be_hwmon);
5786
5787 static char *mc_name(struct be_adapter *adapter)
5788 {
5789         char *str = ""; /* default */
5790
5791         switch (adapter->mc_type) {
5792         case UMC:
5793                 str = "UMC";
5794                 break;
5795         case FLEX10:
5796                 str = "FLEX10";
5797                 break;
5798         case vNIC1:
5799                 str = "vNIC-1";
5800                 break;
5801         case nPAR:
5802                 str = "nPAR";
5803                 break;
5804         case UFP:
5805                 str = "UFP";
5806                 break;
5807         case vNIC2:
5808                 str = "vNIC-2";
5809                 break;
5810         default:
5811                 str = "";
5812         }
5813
5814         return str;
5815 }
5816
5817 static inline char *func_name(struct be_adapter *adapter)
5818 {
5819         return be_physfn(adapter) ? "PF" : "VF";
5820 }
5821
5822 static inline char *nic_name(struct pci_dev *pdev)
5823 {
5824         switch (pdev->device) {
5825         case OC_DEVICE_ID1:
5826                 return OC_NAME;
5827         case OC_DEVICE_ID2:
5828                 return OC_NAME_BE;
5829         case OC_DEVICE_ID3:
5830         case OC_DEVICE_ID4:
5831                 return OC_NAME_LANCER;
5832         case BE_DEVICE_ID2:
5833                 return BE3_NAME;
5834         case OC_DEVICE_ID5:
5835         case OC_DEVICE_ID6:
5836                 return OC_NAME_SH;
5837         default:
5838                 return BE_NAME;
5839         }
5840 }
5841
5842 static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id)
5843 {
5844         struct be_adapter *adapter;
5845         struct net_device *netdev;
5846         int status = 0;
5847
5848         dev_info(&pdev->dev, "%s version is %s\n", DRV_NAME, DRV_VER);
5849
5850         status = pci_enable_device(pdev);
5851         if (status)
5852                 goto do_none;
5853
5854         status = pci_request_regions(pdev, DRV_NAME);
5855         if (status)
5856                 goto disable_dev;
5857         pci_set_master(pdev);
5858
5859         netdev = alloc_etherdev_mqs(sizeof(*adapter), MAX_TX_QS, MAX_RX_QS);
5860         if (!netdev) {
5861                 status = -ENOMEM;
5862                 goto rel_reg;
5863         }
5864         adapter = netdev_priv(netdev);
5865         adapter->pdev = pdev;
5866         pci_set_drvdata(pdev, adapter);
5867         adapter->netdev = netdev;
5868         SET_NETDEV_DEV(netdev, &pdev->dev);
5869
5870         status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
5871         if (!status) {
5872                 netdev->features |= NETIF_F_HIGHDMA;
5873         } else {
5874                 status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
5875                 if (status) {
5876                         dev_err(&pdev->dev, "Could not set PCI DMA Mask\n");
5877                         goto free_netdev;
5878                 }
5879         }
5880
5881         status = pci_enable_pcie_error_reporting(pdev);
5882         if (!status)
5883                 dev_info(&pdev->dev, "PCIe error reporting enabled\n");
5884
5885         status = be_map_pci_bars(adapter);
5886         if (status)
5887                 goto free_netdev;
5888
5889         status = be_drv_init(adapter);
5890         if (status)
5891                 goto unmap_bars;
5892
5893         status = be_setup(adapter);
5894         if (status)
5895                 goto drv_cleanup;
5896
5897         be_netdev_init(netdev);
5898         status = register_netdev(netdev);
5899         if (status != 0)
5900                 goto unsetup;
5901
5902         be_roce_dev_add(adapter);
5903
5904         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5905         adapter->error_recovery.probe_time = jiffies;
5906
5907         /* On Die temperature not supported for VF. */
5908         if (be_physfn(adapter) && IS_ENABLED(CONFIG_BE2NET_HWMON)) {
5909                 adapter->hwmon_info.hwmon_dev =
5910                         devm_hwmon_device_register_with_groups(&pdev->dev,
5911                                                                DRV_NAME,
5912                                                                adapter,
5913                                                                be_hwmon_groups);
5914                 adapter->hwmon_info.be_on_die_temp = BE_INVALID_DIE_TEMP;
5915         }
5916
5917         dev_info(&pdev->dev, "%s: %s %s port %c\n", nic_name(pdev),
5918                  func_name(adapter), mc_name(adapter), adapter->port_name);
5919
5920         return 0;
5921
5922 unsetup:
5923         be_clear(adapter);
5924 drv_cleanup:
5925         be_drv_cleanup(adapter);
5926 unmap_bars:
5927         be_unmap_pci_bars(adapter);
5928 free_netdev:
5929         pci_disable_pcie_error_reporting(pdev);
5930         free_netdev(netdev);
5931 rel_reg:
5932         pci_release_regions(pdev);
5933 disable_dev:
5934         pci_disable_device(pdev);
5935 do_none:
5936         dev_err(&pdev->dev, "%s initialization failed\n", nic_name(pdev));
5937         return status;
5938 }
5939
5940 static int be_suspend(struct pci_dev *pdev, pm_message_t state)
5941 {
5942         struct be_adapter *adapter = pci_get_drvdata(pdev);
5943
5944         be_intr_set(adapter, false);
5945         be_cancel_err_detection(adapter);
5946
5947         be_cleanup(adapter);
5948
5949         pci_save_state(pdev);
5950         pci_disable_device(pdev);
5951         pci_set_power_state(pdev, pci_choose_state(pdev, state));
5952         return 0;
5953 }
5954
5955 static int be_pci_resume(struct pci_dev *pdev)
5956 {
5957         struct be_adapter *adapter = pci_get_drvdata(pdev);
5958         int status = 0;
5959
5960         status = pci_enable_device(pdev);
5961         if (status)
5962                 return status;
5963
5964         pci_restore_state(pdev);
5965
5966         status = be_resume(adapter);
5967         if (status)
5968                 return status;
5969
5970         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5971
5972         return 0;
5973 }
5974
5975 /*
5976  * An FLR will stop BE from DMAing any data.
5977  */
5978 static void be_shutdown(struct pci_dev *pdev)
5979 {
5980         struct be_adapter *adapter = pci_get_drvdata(pdev);
5981
5982         if (!adapter)
5983                 return;
5984
5985         be_roce_dev_shutdown(adapter);
5986         cancel_delayed_work_sync(&adapter->work);
5987         be_cancel_err_detection(adapter);
5988
5989         netif_device_detach(adapter->netdev);
5990
5991         be_cmd_reset_function(adapter);
5992
5993         pci_disable_device(pdev);
5994 }
5995
5996 static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev,
5997                                             pci_channel_state_t state)
5998 {
5999         struct be_adapter *adapter = pci_get_drvdata(pdev);
6000
6001         dev_err(&adapter->pdev->dev, "EEH error detected\n");
6002
6003         be_roce_dev_remove(adapter);
6004
6005         if (!be_check_error(adapter, BE_ERROR_EEH)) {
6006                 be_set_error(adapter, BE_ERROR_EEH);
6007
6008                 be_cancel_err_detection(adapter);
6009
6010                 be_cleanup(adapter);
6011         }
6012
6013         if (state == pci_channel_io_perm_failure)
6014                 return PCI_ERS_RESULT_DISCONNECT;
6015
6016         pci_disable_device(pdev);
6017
6018         /* The error could cause the FW to trigger a flash debug dump.
6019          * Resetting the card while flash dump is in progress
6020          * can cause it not to recover; wait for it to finish.
6021          * Wait only for first function as it is needed only once per
6022          * adapter.
6023          */
6024         if (pdev->devfn == 0)
6025                 ssleep(30);
6026
6027         return PCI_ERS_RESULT_NEED_RESET;
6028 }
6029
6030 static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev)
6031 {
6032         struct be_adapter *adapter = pci_get_drvdata(pdev);
6033         int status;
6034
6035         dev_info(&adapter->pdev->dev, "EEH reset\n");
6036
6037         status = pci_enable_device(pdev);
6038         if (status)
6039                 return PCI_ERS_RESULT_DISCONNECT;
6040
6041         pci_set_master(pdev);
6042         pci_restore_state(pdev);
6043
6044         /* Check if card is ok and fw is ready */
6045         dev_info(&adapter->pdev->dev,
6046                  "Waiting for FW to be ready after EEH reset\n");
6047         status = be_fw_wait_ready(adapter);
6048         if (status)
6049                 return PCI_ERS_RESULT_DISCONNECT;
6050
6051         pci_cleanup_aer_uncorrect_error_status(pdev);
6052         be_clear_error(adapter, BE_CLEAR_ALL);
6053         return PCI_ERS_RESULT_RECOVERED;
6054 }
6055
6056 static void be_eeh_resume(struct pci_dev *pdev)
6057 {
6058         int status = 0;
6059         struct be_adapter *adapter = pci_get_drvdata(pdev);
6060
6061         dev_info(&adapter->pdev->dev, "EEH resume\n");
6062
6063         pci_save_state(pdev);
6064
6065         status = be_resume(adapter);
6066         if (status)
6067                 goto err;
6068
6069         be_roce_dev_add(adapter);
6070
6071         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6072         return;
6073 err:
6074         dev_err(&adapter->pdev->dev, "EEH resume failed\n");
6075 }
6076
6077 static int be_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
6078 {
6079         struct be_adapter *adapter = pci_get_drvdata(pdev);
6080         struct be_resources vft_res = {0};
6081         int status;
6082
6083         if (!num_vfs)
6084                 be_vf_clear(adapter);
6085
6086         adapter->num_vfs = num_vfs;
6087
6088         if (adapter->num_vfs == 0 && pci_vfs_assigned(pdev)) {
6089                 dev_warn(&pdev->dev,
6090                          "Cannot disable VFs while they are assigned\n");
6091                 return -EBUSY;
6092         }
6093
6094         /* When the HW is in SRIOV capable configuration, the PF-pool resources
6095          * are equally distributed across the max-number of VFs. The user may
6096          * request only a subset of the max-vfs to be enabled.
6097          * Based on num_vfs, redistribute the resources across num_vfs so that
6098          * each VF will have access to more number of resources.
6099          * This facility is not available in BE3 FW.
6100          * Also, this is done by FW in Lancer chip.
6101          */
6102         if (skyhawk_chip(adapter) && !pci_num_vf(pdev)) {
6103                 be_calculate_vf_res(adapter, adapter->num_vfs,
6104                                     &vft_res);
6105                 status = be_cmd_set_sriov_config(adapter, adapter->pool_res,
6106                                                  adapter->num_vfs, &vft_res);
6107                 if (status)
6108                         dev_err(&pdev->dev,
6109                                 "Failed to optimize SR-IOV resources\n");
6110         }
6111
6112         status = be_get_resources(adapter);
6113         if (status)
6114                 return be_cmd_status(status);
6115
6116         /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
6117         rtnl_lock();
6118         status = be_update_queues(adapter);
6119         rtnl_unlock();
6120         if (status)
6121                 return be_cmd_status(status);
6122
6123         if (adapter->num_vfs)
6124                 status = be_vf_setup(adapter);
6125
6126         if (!status)
6127                 return adapter->num_vfs;
6128
6129         return 0;
6130 }
6131
6132 static const struct pci_error_handlers be_eeh_handlers = {
6133         .error_detected = be_eeh_err_detected,
6134         .slot_reset = be_eeh_reset,
6135         .resume = be_eeh_resume,
6136 };
6137
6138 static struct pci_driver be_driver = {
6139         .name = DRV_NAME,
6140         .id_table = be_dev_ids,
6141         .probe = be_probe,
6142         .remove = be_remove,
6143         .suspend = be_suspend,
6144         .resume = be_pci_resume,
6145         .shutdown = be_shutdown,
6146         .sriov_configure = be_pci_sriov_configure,
6147         .err_handler = &be_eeh_handlers
6148 };
6149
6150 static int __init be_init_module(void)
6151 {
6152         int status;
6153
6154         if (rx_frag_size != 8192 && rx_frag_size != 4096 &&
6155             rx_frag_size != 2048) {
6156                 printk(KERN_WARNING DRV_NAME
6157                         " : Module param rx_frag_size must be 2048/4096/8192."
6158                         " Using 2048\n");
6159                 rx_frag_size = 2048;
6160         }
6161
6162         if (num_vfs > 0) {
6163                 pr_info(DRV_NAME " : Module param num_vfs is obsolete.");
6164                 pr_info(DRV_NAME " : Use sysfs method to enable VFs\n");
6165         }
6166
6167         be_wq = create_singlethread_workqueue("be_wq");
6168         if (!be_wq) {
6169                 pr_warn(DRV_NAME "workqueue creation failed\n");
6170                 return -1;
6171         }
6172
6173         be_err_recovery_workq =
6174                 create_singlethread_workqueue("be_err_recover");
6175         if (!be_err_recovery_workq)
6176                 pr_warn(DRV_NAME "Could not create error recovery workqueue\n");
6177
6178         status = pci_register_driver(&be_driver);
6179         if (status) {
6180                 destroy_workqueue(be_wq);
6181                 be_destroy_err_recovery_workq();
6182         }
6183         return status;
6184 }
6185 module_init(be_init_module);
6186
6187 static void __exit be_exit_module(void)
6188 {
6189         pci_unregister_driver(&be_driver);
6190
6191         be_destroy_err_recovery_workq();
6192
6193         if (be_wq)
6194                 destroy_workqueue(be_wq);
6195 }
6196 module_exit(be_exit_module);