GNU Linux-libre 4.19.264-gnu1
[releases.git] / drivers / net / ethernet / emulex / benet / be_main.c
1 /*
2  * Copyright (C) 2005 - 2016 Broadcom
3  * All rights reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License version 2
7  * as published by the Free Software Foundation.  The full GNU General
8  * Public License is included in this distribution in the file called COPYING.
9  *
10  * Contact Information:
11  * linux-drivers@emulex.com
12  *
13  * Emulex
14  * 3333 Susan Street
15  * Costa Mesa, CA 92626
16  */
17
18 #include <linux/prefetch.h>
19 #include <linux/module.h>
20 #include "be.h"
21 #include "be_cmds.h"
22 #include <asm/div64.h>
23 #include <linux/aer.h>
24 #include <linux/if_bridge.h>
25 #include <net/busy_poll.h>
26 #include <net/vxlan.h>
27
28 MODULE_VERSION(DRV_VER);
29 MODULE_DESCRIPTION(DRV_DESC " " DRV_VER);
30 MODULE_AUTHOR("Emulex Corporation");
31 MODULE_LICENSE("GPL");
32
33 /* num_vfs module param is obsolete.
34  * Use sysfs method to enable/disable VFs.
35  */
36 static unsigned int num_vfs;
37 module_param(num_vfs, uint, 0444);
38 MODULE_PARM_DESC(num_vfs, "Number of PCI VFs to initialize");
39
40 static ushort rx_frag_size = 2048;
41 module_param(rx_frag_size, ushort, 0444);
42 MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data.");
43
44 /* Per-module error detection/recovery workq shared across all functions.
45  * Each function schedules its own work request on this shared workq.
46  */
47 static struct workqueue_struct *be_err_recovery_workq;
48
49 static const struct pci_device_id be_dev_ids[] = {
50 #ifdef CONFIG_BE2NET_BE2
51         { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID1) },
52         { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID1) },
53 #endif /* CONFIG_BE2NET_BE2 */
54 #ifdef CONFIG_BE2NET_BE3
55         { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID2) },
56         { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID2) },
57 #endif /* CONFIG_BE2NET_BE3 */
58 #ifdef CONFIG_BE2NET_LANCER
59         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID3)},
60         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID4)},
61 #endif /* CONFIG_BE2NET_LANCER */
62 #ifdef CONFIG_BE2NET_SKYHAWK
63         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID5)},
64         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID6)},
65 #endif /* CONFIG_BE2NET_SKYHAWK */
66         { 0 }
67 };
68 MODULE_DEVICE_TABLE(pci, be_dev_ids);
69
70 /* Workqueue used by all functions for defering cmd calls to the adapter */
71 static struct workqueue_struct *be_wq;
72
73 /* UE Status Low CSR */
74 static const char * const ue_status_low_desc[] = {
75         "CEV",
76         "CTX",
77         "DBUF",
78         "ERX",
79         "Host",
80         "MPU",
81         "NDMA",
82         "PTC ",
83         "RDMA ",
84         "RXF ",
85         "RXIPS ",
86         "RXULP0 ",
87         "RXULP1 ",
88         "RXULP2 ",
89         "TIM ",
90         "TPOST ",
91         "TPRE ",
92         "TXIPS ",
93         "TXULP0 ",
94         "TXULP1 ",
95         "UC ",
96         "WDMA ",
97         "TXULP2 ",
98         "HOST1 ",
99         "P0_OB_LINK ",
100         "P1_OB_LINK ",
101         "HOST_GPIO ",
102         "MBOX ",
103         "ERX2 ",
104         "SPARE ",
105         "JTAG ",
106         "MPU_INTPEND "
107 };
108
109 /* UE Status High CSR */
110 static const char * const ue_status_hi_desc[] = {
111         "LPCMEMHOST",
112         "MGMT_MAC",
113         "PCS0ONLINE",
114         "MPU_IRAM",
115         "PCS1ONLINE",
116         "PCTL0",
117         "PCTL1",
118         "PMEM",
119         "RR",
120         "TXPB",
121         "RXPP",
122         "XAUI",
123         "TXP",
124         "ARM",
125         "IPC",
126         "HOST2",
127         "HOST3",
128         "HOST4",
129         "HOST5",
130         "HOST6",
131         "HOST7",
132         "ECRC",
133         "Poison TLP",
134         "NETC",
135         "PERIPH",
136         "LLTXULP",
137         "D2P",
138         "RCON",
139         "LDMA",
140         "LLTXP",
141         "LLTXPB",
142         "Unknown"
143 };
144
145 #define BE_VF_IF_EN_FLAGS       (BE_IF_FLAGS_UNTAGGED | \
146                                  BE_IF_FLAGS_BROADCAST | \
147                                  BE_IF_FLAGS_MULTICAST | \
148                                  BE_IF_FLAGS_PASS_L3L4_ERRORS)
149
150 static void be_queue_free(struct be_adapter *adapter, struct be_queue_info *q)
151 {
152         struct be_dma_mem *mem = &q->dma_mem;
153
154         if (mem->va) {
155                 dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va,
156                                   mem->dma);
157                 mem->va = NULL;
158         }
159 }
160
161 static int be_queue_alloc(struct be_adapter *adapter, struct be_queue_info *q,
162                           u16 len, u16 entry_size)
163 {
164         struct be_dma_mem *mem = &q->dma_mem;
165
166         memset(q, 0, sizeof(*q));
167         q->len = len;
168         q->entry_size = entry_size;
169         mem->size = len * entry_size;
170         mem->va = dma_zalloc_coherent(&adapter->pdev->dev, mem->size, &mem->dma,
171                                       GFP_KERNEL);
172         if (!mem->va)
173                 return -ENOMEM;
174         return 0;
175 }
176
177 static void be_reg_intr_set(struct be_adapter *adapter, bool enable)
178 {
179         u32 reg, enabled;
180
181         pci_read_config_dword(adapter->pdev, PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET,
182                               &reg);
183         enabled = reg & MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
184
185         if (!enabled && enable)
186                 reg |= MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
187         else if (enabled && !enable)
188                 reg &= ~MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
189         else
190                 return;
191
192         pci_write_config_dword(adapter->pdev,
193                                PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET, reg);
194 }
195
196 static void be_intr_set(struct be_adapter *adapter, bool enable)
197 {
198         int status = 0;
199
200         /* On lancer interrupts can't be controlled via this register */
201         if (lancer_chip(adapter))
202                 return;
203
204         if (be_check_error(adapter, BE_ERROR_EEH))
205                 return;
206
207         status = be_cmd_intr_set(adapter, enable);
208         if (status)
209                 be_reg_intr_set(adapter, enable);
210 }
211
212 static void be_rxq_notify(struct be_adapter *adapter, u16 qid, u16 posted)
213 {
214         u32 val = 0;
215
216         if (be_check_error(adapter, BE_ERROR_HW))
217                 return;
218
219         val |= qid & DB_RQ_RING_ID_MASK;
220         val |= posted << DB_RQ_NUM_POSTED_SHIFT;
221
222         wmb();
223         iowrite32(val, adapter->db + DB_RQ_OFFSET);
224 }
225
226 static void be_txq_notify(struct be_adapter *adapter, struct be_tx_obj *txo,
227                           u16 posted)
228 {
229         u32 val = 0;
230
231         if (be_check_error(adapter, BE_ERROR_HW))
232                 return;
233
234         val |= txo->q.id & DB_TXULP_RING_ID_MASK;
235         val |= (posted & DB_TXULP_NUM_POSTED_MASK) << DB_TXULP_NUM_POSTED_SHIFT;
236
237         wmb();
238         iowrite32(val, adapter->db + txo->db_offset);
239 }
240
241 static void be_eq_notify(struct be_adapter *adapter, u16 qid,
242                          bool arm, bool clear_int, u16 num_popped,
243                          u32 eq_delay_mult_enc)
244 {
245         u32 val = 0;
246
247         val |= qid & DB_EQ_RING_ID_MASK;
248         val |= ((qid & DB_EQ_RING_ID_EXT_MASK) << DB_EQ_RING_ID_EXT_MASK_SHIFT);
249
250         if (be_check_error(adapter, BE_ERROR_HW))
251                 return;
252
253         if (arm)
254                 val |= 1 << DB_EQ_REARM_SHIFT;
255         if (clear_int)
256                 val |= 1 << DB_EQ_CLR_SHIFT;
257         val |= 1 << DB_EQ_EVNT_SHIFT;
258         val |= num_popped << DB_EQ_NUM_POPPED_SHIFT;
259         val |= eq_delay_mult_enc << DB_EQ_R2I_DLY_SHIFT;
260         iowrite32(val, adapter->db + DB_EQ_OFFSET);
261 }
262
263 void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped)
264 {
265         u32 val = 0;
266
267         val |= qid & DB_CQ_RING_ID_MASK;
268         val |= ((qid & DB_CQ_RING_ID_EXT_MASK) <<
269                         DB_CQ_RING_ID_EXT_MASK_SHIFT);
270
271         if (be_check_error(adapter, BE_ERROR_HW))
272                 return;
273
274         if (arm)
275                 val |= 1 << DB_CQ_REARM_SHIFT;
276         val |= num_popped << DB_CQ_NUM_POPPED_SHIFT;
277         iowrite32(val, adapter->db + DB_CQ_OFFSET);
278 }
279
280 static int be_dev_mac_add(struct be_adapter *adapter, u8 *mac)
281 {
282         int i;
283
284         /* Check if mac has already been added as part of uc-list */
285         for (i = 0; i < adapter->uc_macs; i++) {
286                 if (ether_addr_equal(adapter->uc_list[i].mac, mac)) {
287                         /* mac already added, skip addition */
288                         adapter->pmac_id[0] = adapter->pmac_id[i + 1];
289                         return 0;
290                 }
291         }
292
293         return be_cmd_pmac_add(adapter, mac, adapter->if_handle,
294                                &adapter->pmac_id[0], 0);
295 }
296
297 static void be_dev_mac_del(struct be_adapter *adapter, int pmac_id)
298 {
299         int i;
300
301         /* Skip deletion if the programmed mac is
302          * being used in uc-list
303          */
304         for (i = 0; i < adapter->uc_macs; i++) {
305                 if (adapter->pmac_id[i + 1] == pmac_id)
306                         return;
307         }
308         be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
309 }
310
311 static int be_mac_addr_set(struct net_device *netdev, void *p)
312 {
313         struct be_adapter *adapter = netdev_priv(netdev);
314         struct device *dev = &adapter->pdev->dev;
315         struct sockaddr *addr = p;
316         int status;
317         u8 mac[ETH_ALEN];
318         u32 old_pmac_id = adapter->pmac_id[0];
319
320         if (!is_valid_ether_addr(addr->sa_data))
321                 return -EADDRNOTAVAIL;
322
323         /* Proceed further only if, User provided MAC is different
324          * from active MAC
325          */
326         if (ether_addr_equal(addr->sa_data, adapter->dev_mac))
327                 return 0;
328
329         /* BE3 VFs without FILTMGMT privilege are not allowed to set its MAC
330          * address
331          */
332         if (BEx_chip(adapter) && be_virtfn(adapter) &&
333             !check_privilege(adapter, BE_PRIV_FILTMGMT))
334                 return -EPERM;
335
336         /* if device is not running, copy MAC to netdev->dev_addr */
337         if (!netif_running(netdev))
338                 goto done;
339
340         /* The PMAC_ADD cmd may fail if the VF doesn't have FILTMGMT
341          * privilege or if PF did not provision the new MAC address.
342          * On BE3, this cmd will always fail if the VF doesn't have the
343          * FILTMGMT privilege. This failure is OK, only if the PF programmed
344          * the MAC for the VF.
345          */
346         mutex_lock(&adapter->rx_filter_lock);
347         status = be_dev_mac_add(adapter, (u8 *)addr->sa_data);
348         if (!status) {
349
350                 /* Delete the old programmed MAC. This call may fail if the
351                  * old MAC was already deleted by the PF driver.
352                  */
353                 if (adapter->pmac_id[0] != old_pmac_id)
354                         be_dev_mac_del(adapter, old_pmac_id);
355         }
356
357         mutex_unlock(&adapter->rx_filter_lock);
358         /* Decide if the new MAC is successfully activated only after
359          * querying the FW
360          */
361         status = be_cmd_get_active_mac(adapter, adapter->pmac_id[0], mac,
362                                        adapter->if_handle, true, 0);
363         if (status)
364                 goto err;
365
366         /* The MAC change did not happen, either due to lack of privilege
367          * or PF didn't pre-provision.
368          */
369         if (!ether_addr_equal(addr->sa_data, mac)) {
370                 status = -EPERM;
371                 goto err;
372         }
373
374         /* Remember currently programmed MAC */
375         ether_addr_copy(adapter->dev_mac, addr->sa_data);
376 done:
377         ether_addr_copy(netdev->dev_addr, addr->sa_data);
378         dev_info(dev, "MAC address changed to %pM\n", addr->sa_data);
379         return 0;
380 err:
381         dev_warn(dev, "MAC address change to %pM failed\n", addr->sa_data);
382         return status;
383 }
384
385 /* BE2 supports only v0 cmd */
386 static void *hw_stats_from_cmd(struct be_adapter *adapter)
387 {
388         if (BE2_chip(adapter)) {
389                 struct be_cmd_resp_get_stats_v0 *cmd = adapter->stats_cmd.va;
390
391                 return &cmd->hw_stats;
392         } else if (BE3_chip(adapter)) {
393                 struct be_cmd_resp_get_stats_v1 *cmd = adapter->stats_cmd.va;
394
395                 return &cmd->hw_stats;
396         } else {
397                 struct be_cmd_resp_get_stats_v2 *cmd = adapter->stats_cmd.va;
398
399                 return &cmd->hw_stats;
400         }
401 }
402
403 /* BE2 supports only v0 cmd */
404 static void *be_erx_stats_from_cmd(struct be_adapter *adapter)
405 {
406         if (BE2_chip(adapter)) {
407                 struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
408
409                 return &hw_stats->erx;
410         } else if (BE3_chip(adapter)) {
411                 struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
412
413                 return &hw_stats->erx;
414         } else {
415                 struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
416
417                 return &hw_stats->erx;
418         }
419 }
420
421 static void populate_be_v0_stats(struct be_adapter *adapter)
422 {
423         struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
424         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
425         struct be_rxf_stats_v0 *rxf_stats = &hw_stats->rxf;
426         struct be_port_rxf_stats_v0 *port_stats =
427                                         &rxf_stats->port[adapter->port_num];
428         struct be_drv_stats *drvs = &adapter->drv_stats;
429
430         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
431         drvs->rx_pause_frames = port_stats->rx_pause_frames;
432         drvs->rx_crc_errors = port_stats->rx_crc_errors;
433         drvs->rx_control_frames = port_stats->rx_control_frames;
434         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
435         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
436         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
437         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
438         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
439         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
440         drvs->rxpp_fifo_overflow_drop = port_stats->rx_fifo_overflow;
441         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
442         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
443         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
444         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
445         drvs->rx_input_fifo_overflow_drop = port_stats->rx_input_fifo_overflow;
446         drvs->rx_dropped_header_too_small =
447                 port_stats->rx_dropped_header_too_small;
448         drvs->rx_address_filtered =
449                                         port_stats->rx_address_filtered +
450                                         port_stats->rx_vlan_filtered;
451         drvs->rx_alignment_symbol_errors =
452                 port_stats->rx_alignment_symbol_errors;
453
454         drvs->tx_pauseframes = port_stats->tx_pauseframes;
455         drvs->tx_controlframes = port_stats->tx_controlframes;
456
457         if (adapter->port_num)
458                 drvs->jabber_events = rxf_stats->port1_jabber_events;
459         else
460                 drvs->jabber_events = rxf_stats->port0_jabber_events;
461         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
462         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
463         drvs->forwarded_packets = rxf_stats->forwarded_packets;
464         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
465         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
466         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
467         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
468 }
469
470 static void populate_be_v1_stats(struct be_adapter *adapter)
471 {
472         struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
473         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
474         struct be_rxf_stats_v1 *rxf_stats = &hw_stats->rxf;
475         struct be_port_rxf_stats_v1 *port_stats =
476                                         &rxf_stats->port[adapter->port_num];
477         struct be_drv_stats *drvs = &adapter->drv_stats;
478
479         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
480         drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
481         drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
482         drvs->rx_pause_frames = port_stats->rx_pause_frames;
483         drvs->rx_crc_errors = port_stats->rx_crc_errors;
484         drvs->rx_control_frames = port_stats->rx_control_frames;
485         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
486         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
487         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
488         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
489         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
490         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
491         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
492         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
493         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
494         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
495         drvs->rx_dropped_header_too_small =
496                 port_stats->rx_dropped_header_too_small;
497         drvs->rx_input_fifo_overflow_drop =
498                 port_stats->rx_input_fifo_overflow_drop;
499         drvs->rx_address_filtered = port_stats->rx_address_filtered;
500         drvs->rx_alignment_symbol_errors =
501                 port_stats->rx_alignment_symbol_errors;
502         drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
503         drvs->tx_pauseframes = port_stats->tx_pauseframes;
504         drvs->tx_controlframes = port_stats->tx_controlframes;
505         drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
506         drvs->jabber_events = port_stats->jabber_events;
507         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
508         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
509         drvs->forwarded_packets = rxf_stats->forwarded_packets;
510         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
511         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
512         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
513         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
514 }
515
516 static void populate_be_v2_stats(struct be_adapter *adapter)
517 {
518         struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
519         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
520         struct be_rxf_stats_v2 *rxf_stats = &hw_stats->rxf;
521         struct be_port_rxf_stats_v2 *port_stats =
522                                         &rxf_stats->port[adapter->port_num];
523         struct be_drv_stats *drvs = &adapter->drv_stats;
524
525         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
526         drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
527         drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
528         drvs->rx_pause_frames = port_stats->rx_pause_frames;
529         drvs->rx_crc_errors = port_stats->rx_crc_errors;
530         drvs->rx_control_frames = port_stats->rx_control_frames;
531         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
532         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
533         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
534         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
535         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
536         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
537         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
538         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
539         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
540         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
541         drvs->rx_dropped_header_too_small =
542                 port_stats->rx_dropped_header_too_small;
543         drvs->rx_input_fifo_overflow_drop =
544                 port_stats->rx_input_fifo_overflow_drop;
545         drvs->rx_address_filtered = port_stats->rx_address_filtered;
546         drvs->rx_alignment_symbol_errors =
547                 port_stats->rx_alignment_symbol_errors;
548         drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
549         drvs->tx_pauseframes = port_stats->tx_pauseframes;
550         drvs->tx_controlframes = port_stats->tx_controlframes;
551         drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
552         drvs->jabber_events = port_stats->jabber_events;
553         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
554         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
555         drvs->forwarded_packets = rxf_stats->forwarded_packets;
556         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
557         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
558         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
559         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
560         if (be_roce_supported(adapter)) {
561                 drvs->rx_roce_bytes_lsd = port_stats->roce_bytes_received_lsd;
562                 drvs->rx_roce_bytes_msd = port_stats->roce_bytes_received_msd;
563                 drvs->rx_roce_frames = port_stats->roce_frames_received;
564                 drvs->roce_drops_crc = port_stats->roce_drops_crc;
565                 drvs->roce_drops_payload_len =
566                         port_stats->roce_drops_payload_len;
567         }
568 }
569
570 static void populate_lancer_stats(struct be_adapter *adapter)
571 {
572         struct be_drv_stats *drvs = &adapter->drv_stats;
573         struct lancer_pport_stats *pport_stats = pport_stats_from_cmd(adapter);
574
575         be_dws_le_to_cpu(pport_stats, sizeof(*pport_stats));
576         drvs->rx_pause_frames = pport_stats->rx_pause_frames_lo;
577         drvs->rx_crc_errors = pport_stats->rx_crc_errors_lo;
578         drvs->rx_control_frames = pport_stats->rx_control_frames_lo;
579         drvs->rx_in_range_errors = pport_stats->rx_in_range_errors;
580         drvs->rx_frame_too_long = pport_stats->rx_frames_too_long_lo;
581         drvs->rx_dropped_runt = pport_stats->rx_dropped_runt;
582         drvs->rx_ip_checksum_errs = pport_stats->rx_ip_checksum_errors;
583         drvs->rx_tcp_checksum_errs = pport_stats->rx_tcp_checksum_errors;
584         drvs->rx_udp_checksum_errs = pport_stats->rx_udp_checksum_errors;
585         drvs->rx_dropped_tcp_length =
586                                 pport_stats->rx_dropped_invalid_tcp_length;
587         drvs->rx_dropped_too_small = pport_stats->rx_dropped_too_small;
588         drvs->rx_dropped_too_short = pport_stats->rx_dropped_too_short;
589         drvs->rx_out_range_errors = pport_stats->rx_out_of_range_errors;
590         drvs->rx_dropped_header_too_small =
591                                 pport_stats->rx_dropped_header_too_small;
592         drvs->rx_input_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
593         drvs->rx_address_filtered =
594                                         pport_stats->rx_address_filtered +
595                                         pport_stats->rx_vlan_filtered;
596         drvs->rx_alignment_symbol_errors = pport_stats->rx_symbol_errors_lo;
597         drvs->rxpp_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
598         drvs->tx_pauseframes = pport_stats->tx_pause_frames_lo;
599         drvs->tx_controlframes = pport_stats->tx_control_frames_lo;
600         drvs->jabber_events = pport_stats->rx_jabbers;
601         drvs->forwarded_packets = pport_stats->num_forwards_lo;
602         drvs->rx_drops_mtu = pport_stats->rx_drops_mtu_lo;
603         drvs->rx_drops_too_many_frags =
604                                 pport_stats->rx_drops_too_many_frags_lo;
605 }
606
607 static void accumulate_16bit_val(u32 *acc, u16 val)
608 {
609 #define lo(x)                   (x & 0xFFFF)
610 #define hi(x)                   (x & 0xFFFF0000)
611         bool wrapped = val < lo(*acc);
612         u32 newacc = hi(*acc) + val;
613
614         if (wrapped)
615                 newacc += 65536;
616         WRITE_ONCE(*acc, newacc);
617 }
618
619 static void populate_erx_stats(struct be_adapter *adapter,
620                                struct be_rx_obj *rxo, u32 erx_stat)
621 {
622         if (!BEx_chip(adapter))
623                 rx_stats(rxo)->rx_drops_no_frags = erx_stat;
624         else
625                 /* below erx HW counter can actually wrap around after
626                  * 65535. Driver accumulates a 32-bit value
627                  */
628                 accumulate_16bit_val(&rx_stats(rxo)->rx_drops_no_frags,
629                                      (u16)erx_stat);
630 }
631
632 void be_parse_stats(struct be_adapter *adapter)
633 {
634         struct be_erx_stats_v2 *erx = be_erx_stats_from_cmd(adapter);
635         struct be_rx_obj *rxo;
636         int i;
637         u32 erx_stat;
638
639         if (lancer_chip(adapter)) {
640                 populate_lancer_stats(adapter);
641         } else {
642                 if (BE2_chip(adapter))
643                         populate_be_v0_stats(adapter);
644                 else if (BE3_chip(adapter))
645                         /* for BE3 */
646                         populate_be_v1_stats(adapter);
647                 else
648                         populate_be_v2_stats(adapter);
649
650                 /* erx_v2 is longer than v0, v1. use v2 for v0, v1 access */
651                 for_all_rx_queues(adapter, rxo, i) {
652                         erx_stat = erx->rx_drops_no_fragments[rxo->q.id];
653                         populate_erx_stats(adapter, rxo, erx_stat);
654                 }
655         }
656 }
657
658 static void be_get_stats64(struct net_device *netdev,
659                            struct rtnl_link_stats64 *stats)
660 {
661         struct be_adapter *adapter = netdev_priv(netdev);
662         struct be_drv_stats *drvs = &adapter->drv_stats;
663         struct be_rx_obj *rxo;
664         struct be_tx_obj *txo;
665         u64 pkts, bytes;
666         unsigned int start;
667         int i;
668
669         for_all_rx_queues(adapter, rxo, i) {
670                 const struct be_rx_stats *rx_stats = rx_stats(rxo);
671
672                 do {
673                         start = u64_stats_fetch_begin_irq(&rx_stats->sync);
674                         pkts = rx_stats(rxo)->rx_pkts;
675                         bytes = rx_stats(rxo)->rx_bytes;
676                 } while (u64_stats_fetch_retry_irq(&rx_stats->sync, start));
677                 stats->rx_packets += pkts;
678                 stats->rx_bytes += bytes;
679                 stats->multicast += rx_stats(rxo)->rx_mcast_pkts;
680                 stats->rx_dropped += rx_stats(rxo)->rx_drops_no_skbs +
681                                         rx_stats(rxo)->rx_drops_no_frags;
682         }
683
684         for_all_tx_queues(adapter, txo, i) {
685                 const struct be_tx_stats *tx_stats = tx_stats(txo);
686
687                 do {
688                         start = u64_stats_fetch_begin_irq(&tx_stats->sync);
689                         pkts = tx_stats(txo)->tx_pkts;
690                         bytes = tx_stats(txo)->tx_bytes;
691                 } while (u64_stats_fetch_retry_irq(&tx_stats->sync, start));
692                 stats->tx_packets += pkts;
693                 stats->tx_bytes += bytes;
694         }
695
696         /* bad pkts received */
697         stats->rx_errors = drvs->rx_crc_errors +
698                 drvs->rx_alignment_symbol_errors +
699                 drvs->rx_in_range_errors +
700                 drvs->rx_out_range_errors +
701                 drvs->rx_frame_too_long +
702                 drvs->rx_dropped_too_small +
703                 drvs->rx_dropped_too_short +
704                 drvs->rx_dropped_header_too_small +
705                 drvs->rx_dropped_tcp_length +
706                 drvs->rx_dropped_runt;
707
708         /* detailed rx errors */
709         stats->rx_length_errors = drvs->rx_in_range_errors +
710                 drvs->rx_out_range_errors +
711                 drvs->rx_frame_too_long;
712
713         stats->rx_crc_errors = drvs->rx_crc_errors;
714
715         /* frame alignment errors */
716         stats->rx_frame_errors = drvs->rx_alignment_symbol_errors;
717
718         /* receiver fifo overrun */
719         /* drops_no_pbuf is no per i/f, it's per BE card */
720         stats->rx_fifo_errors = drvs->rxpp_fifo_overflow_drop +
721                                 drvs->rx_input_fifo_overflow_drop +
722                                 drvs->rx_drops_no_pbuf;
723 }
724
725 void be_link_status_update(struct be_adapter *adapter, u8 link_status)
726 {
727         struct net_device *netdev = adapter->netdev;
728
729         if (!(adapter->flags & BE_FLAGS_LINK_STATUS_INIT)) {
730                 netif_carrier_off(netdev);
731                 adapter->flags |= BE_FLAGS_LINK_STATUS_INIT;
732         }
733
734         if (link_status)
735                 netif_carrier_on(netdev);
736         else
737                 netif_carrier_off(netdev);
738
739         netdev_info(netdev, "Link is %s\n", link_status ? "Up" : "Down");
740 }
741
742 static int be_gso_hdr_len(struct sk_buff *skb)
743 {
744         if (skb->encapsulation)
745                 return skb_inner_transport_offset(skb) +
746                        inner_tcp_hdrlen(skb);
747         return skb_transport_offset(skb) + tcp_hdrlen(skb);
748 }
749
750 static void be_tx_stats_update(struct be_tx_obj *txo, struct sk_buff *skb)
751 {
752         struct be_tx_stats *stats = tx_stats(txo);
753         u32 tx_pkts = skb_shinfo(skb)->gso_segs ? : 1;
754         /* Account for headers which get duplicated in TSO pkt */
755         u32 dup_hdr_len = tx_pkts > 1 ? be_gso_hdr_len(skb) * (tx_pkts - 1) : 0;
756
757         u64_stats_update_begin(&stats->sync);
758         stats->tx_reqs++;
759         stats->tx_bytes += skb->len + dup_hdr_len;
760         stats->tx_pkts += tx_pkts;
761         if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL)
762                 stats->tx_vxlan_offload_pkts += tx_pkts;
763         u64_stats_update_end(&stats->sync);
764 }
765
766 /* Returns number of WRBs needed for the skb */
767 static u32 skb_wrb_cnt(struct sk_buff *skb)
768 {
769         /* +1 for the header wrb */
770         return 1 + (skb_headlen(skb) ? 1 : 0) + skb_shinfo(skb)->nr_frags;
771 }
772
773 static inline void wrb_fill(struct be_eth_wrb *wrb, u64 addr, int len)
774 {
775         wrb->frag_pa_hi = cpu_to_le32(upper_32_bits(addr));
776         wrb->frag_pa_lo = cpu_to_le32(lower_32_bits(addr));
777         wrb->frag_len = cpu_to_le32(len & ETH_WRB_FRAG_LEN_MASK);
778         wrb->rsvd0 = 0;
779 }
780
781 /* A dummy wrb is just all zeros. Using a separate routine for dummy-wrb
782  * to avoid the swap and shift/mask operations in wrb_fill().
783  */
784 static inline void wrb_fill_dummy(struct be_eth_wrb *wrb)
785 {
786         wrb->frag_pa_hi = 0;
787         wrb->frag_pa_lo = 0;
788         wrb->frag_len = 0;
789         wrb->rsvd0 = 0;
790 }
791
792 static inline u16 be_get_tx_vlan_tag(struct be_adapter *adapter,
793                                      struct sk_buff *skb)
794 {
795         u8 vlan_prio;
796         u16 vlan_tag;
797
798         vlan_tag = skb_vlan_tag_get(skb);
799         vlan_prio = (vlan_tag & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
800         /* If vlan priority provided by OS is NOT in available bmap */
801         if (!(adapter->vlan_prio_bmap & (1 << vlan_prio)))
802                 vlan_tag = (vlan_tag & ~VLAN_PRIO_MASK) |
803                                 adapter->recommended_prio_bits;
804
805         return vlan_tag;
806 }
807
808 /* Used only for IP tunnel packets */
809 static u16 skb_inner_ip_proto(struct sk_buff *skb)
810 {
811         return (inner_ip_hdr(skb)->version == 4) ?
812                 inner_ip_hdr(skb)->protocol : inner_ipv6_hdr(skb)->nexthdr;
813 }
814
815 static u16 skb_ip_proto(struct sk_buff *skb)
816 {
817         return (ip_hdr(skb)->version == 4) ?
818                 ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr;
819 }
820
821 static inline bool be_is_txq_full(struct be_tx_obj *txo)
822 {
823         return atomic_read(&txo->q.used) + BE_MAX_TX_FRAG_COUNT >= txo->q.len;
824 }
825
826 static inline bool be_can_txq_wake(struct be_tx_obj *txo)
827 {
828         return atomic_read(&txo->q.used) < txo->q.len / 2;
829 }
830
831 static inline bool be_is_tx_compl_pending(struct be_tx_obj *txo)
832 {
833         return atomic_read(&txo->q.used) > txo->pend_wrb_cnt;
834 }
835
836 static void be_get_wrb_params_from_skb(struct be_adapter *adapter,
837                                        struct sk_buff *skb,
838                                        struct be_wrb_params *wrb_params)
839 {
840         u16 proto;
841
842         if (skb_is_gso(skb)) {
843                 BE_WRB_F_SET(wrb_params->features, LSO, 1);
844                 wrb_params->lso_mss = skb_shinfo(skb)->gso_size;
845                 if (skb_is_gso_v6(skb) && !lancer_chip(adapter))
846                         BE_WRB_F_SET(wrb_params->features, LSO6, 1);
847         } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
848                 if (skb->encapsulation) {
849                         BE_WRB_F_SET(wrb_params->features, IPCS, 1);
850                         proto = skb_inner_ip_proto(skb);
851                 } else {
852                         proto = skb_ip_proto(skb);
853                 }
854                 if (proto == IPPROTO_TCP)
855                         BE_WRB_F_SET(wrb_params->features, TCPCS, 1);
856                 else if (proto == IPPROTO_UDP)
857                         BE_WRB_F_SET(wrb_params->features, UDPCS, 1);
858         }
859
860         if (skb_vlan_tag_present(skb)) {
861                 BE_WRB_F_SET(wrb_params->features, VLAN, 1);
862                 wrb_params->vlan_tag = be_get_tx_vlan_tag(adapter, skb);
863         }
864
865         BE_WRB_F_SET(wrb_params->features, CRC, 1);
866 }
867
868 static void wrb_fill_hdr(struct be_adapter *adapter,
869                          struct be_eth_hdr_wrb *hdr,
870                          struct be_wrb_params *wrb_params,
871                          struct sk_buff *skb)
872 {
873         memset(hdr, 0, sizeof(*hdr));
874
875         SET_TX_WRB_HDR_BITS(crc, hdr,
876                             BE_WRB_F_GET(wrb_params->features, CRC));
877         SET_TX_WRB_HDR_BITS(ipcs, hdr,
878                             BE_WRB_F_GET(wrb_params->features, IPCS));
879         SET_TX_WRB_HDR_BITS(tcpcs, hdr,
880                             BE_WRB_F_GET(wrb_params->features, TCPCS));
881         SET_TX_WRB_HDR_BITS(udpcs, hdr,
882                             BE_WRB_F_GET(wrb_params->features, UDPCS));
883
884         SET_TX_WRB_HDR_BITS(lso, hdr,
885                             BE_WRB_F_GET(wrb_params->features, LSO));
886         SET_TX_WRB_HDR_BITS(lso6, hdr,
887                             BE_WRB_F_GET(wrb_params->features, LSO6));
888         SET_TX_WRB_HDR_BITS(lso_mss, hdr, wrb_params->lso_mss);
889
890         /* Hack to skip HW VLAN tagging needs evt = 1, compl = 0. When this
891          * hack is not needed, the evt bit is set while ringing DB.
892          */
893         SET_TX_WRB_HDR_BITS(event, hdr,
894                             BE_WRB_F_GET(wrb_params->features, VLAN_SKIP_HW));
895         SET_TX_WRB_HDR_BITS(vlan, hdr,
896                             BE_WRB_F_GET(wrb_params->features, VLAN));
897         SET_TX_WRB_HDR_BITS(vlan_tag, hdr, wrb_params->vlan_tag);
898
899         SET_TX_WRB_HDR_BITS(num_wrb, hdr, skb_wrb_cnt(skb));
900         SET_TX_WRB_HDR_BITS(len, hdr, skb->len);
901         SET_TX_WRB_HDR_BITS(mgmt, hdr,
902                             BE_WRB_F_GET(wrb_params->features, OS2BMC));
903 }
904
905 static void unmap_tx_frag(struct device *dev, struct be_eth_wrb *wrb,
906                           bool unmap_single)
907 {
908         dma_addr_t dma;
909         u32 frag_len = le32_to_cpu(wrb->frag_len);
910
911
912         dma = (u64)le32_to_cpu(wrb->frag_pa_hi) << 32 |
913                 (u64)le32_to_cpu(wrb->frag_pa_lo);
914         if (frag_len) {
915                 if (unmap_single)
916                         dma_unmap_single(dev, dma, frag_len, DMA_TO_DEVICE);
917                 else
918                         dma_unmap_page(dev, dma, frag_len, DMA_TO_DEVICE);
919         }
920 }
921
922 /* Grab a WRB header for xmit */
923 static u32 be_tx_get_wrb_hdr(struct be_tx_obj *txo)
924 {
925         u32 head = txo->q.head;
926
927         queue_head_inc(&txo->q);
928         return head;
929 }
930
931 /* Set up the WRB header for xmit */
932 static void be_tx_setup_wrb_hdr(struct be_adapter *adapter,
933                                 struct be_tx_obj *txo,
934                                 struct be_wrb_params *wrb_params,
935                                 struct sk_buff *skb, u16 head)
936 {
937         u32 num_frags = skb_wrb_cnt(skb);
938         struct be_queue_info *txq = &txo->q;
939         struct be_eth_hdr_wrb *hdr = queue_index_node(txq, head);
940
941         wrb_fill_hdr(adapter, hdr, wrb_params, skb);
942         be_dws_cpu_to_le(hdr, sizeof(*hdr));
943
944         BUG_ON(txo->sent_skb_list[head]);
945         txo->sent_skb_list[head] = skb;
946         txo->last_req_hdr = head;
947         atomic_add(num_frags, &txq->used);
948         txo->last_req_wrb_cnt = num_frags;
949         txo->pend_wrb_cnt += num_frags;
950 }
951
952 /* Setup a WRB fragment (buffer descriptor) for xmit */
953 static void be_tx_setup_wrb_frag(struct be_tx_obj *txo, dma_addr_t busaddr,
954                                  int len)
955 {
956         struct be_eth_wrb *wrb;
957         struct be_queue_info *txq = &txo->q;
958
959         wrb = queue_head_node(txq);
960         wrb_fill(wrb, busaddr, len);
961         queue_head_inc(txq);
962 }
963
964 /* Bring the queue back to the state it was in before be_xmit_enqueue() routine
965  * was invoked. The producer index is restored to the previous packet and the
966  * WRBs of the current packet are unmapped. Invoked to handle tx setup errors.
967  */
968 static void be_xmit_restore(struct be_adapter *adapter,
969                             struct be_tx_obj *txo, u32 head, bool map_single,
970                             u32 copied)
971 {
972         struct device *dev;
973         struct be_eth_wrb *wrb;
974         struct be_queue_info *txq = &txo->q;
975
976         dev = &adapter->pdev->dev;
977         txq->head = head;
978
979         /* skip the first wrb (hdr); it's not mapped */
980         queue_head_inc(txq);
981         while (copied) {
982                 wrb = queue_head_node(txq);
983                 unmap_tx_frag(dev, wrb, map_single);
984                 map_single = false;
985                 copied -= le32_to_cpu(wrb->frag_len);
986                 queue_head_inc(txq);
987         }
988
989         txq->head = head;
990 }
991
992 /* Enqueue the given packet for transmit. This routine allocates WRBs for the
993  * packet, dma maps the packet buffers and sets up the WRBs. Returns the number
994  * of WRBs used up by the packet.
995  */
996 static u32 be_xmit_enqueue(struct be_adapter *adapter, struct be_tx_obj *txo,
997                            struct sk_buff *skb,
998                            struct be_wrb_params *wrb_params)
999 {
1000         u32 i, copied = 0, wrb_cnt = skb_wrb_cnt(skb);
1001         struct device *dev = &adapter->pdev->dev;
1002         bool map_single = false;
1003         u32 head;
1004         dma_addr_t busaddr;
1005         int len;
1006
1007         head = be_tx_get_wrb_hdr(txo);
1008
1009         if (skb->len > skb->data_len) {
1010                 len = skb_headlen(skb);
1011
1012                 busaddr = dma_map_single(dev, skb->data, len, DMA_TO_DEVICE);
1013                 if (dma_mapping_error(dev, busaddr))
1014                         goto dma_err;
1015                 map_single = true;
1016                 be_tx_setup_wrb_frag(txo, busaddr, len);
1017                 copied += len;
1018         }
1019
1020         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1021                 const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
1022                 len = skb_frag_size(frag);
1023
1024                 busaddr = skb_frag_dma_map(dev, frag, 0, len, DMA_TO_DEVICE);
1025                 if (dma_mapping_error(dev, busaddr))
1026                         goto dma_err;
1027                 be_tx_setup_wrb_frag(txo, busaddr, len);
1028                 copied += len;
1029         }
1030
1031         be_tx_setup_wrb_hdr(adapter, txo, wrb_params, skb, head);
1032
1033         be_tx_stats_update(txo, skb);
1034         return wrb_cnt;
1035
1036 dma_err:
1037         adapter->drv_stats.dma_map_errors++;
1038         be_xmit_restore(adapter, txo, head, map_single, copied);
1039         return 0;
1040 }
1041
1042 static inline int qnq_async_evt_rcvd(struct be_adapter *adapter)
1043 {
1044         return adapter->flags & BE_FLAGS_QNQ_ASYNC_EVT_RCVD;
1045 }
1046
1047 static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter,
1048                                              struct sk_buff *skb,
1049                                              struct be_wrb_params
1050                                              *wrb_params)
1051 {
1052         u16 vlan_tag = 0;
1053
1054         skb = skb_share_check(skb, GFP_ATOMIC);
1055         if (unlikely(!skb))
1056                 return skb;
1057
1058         if (skb_vlan_tag_present(skb))
1059                 vlan_tag = be_get_tx_vlan_tag(adapter, skb);
1060
1061         if (qnq_async_evt_rcvd(adapter) && adapter->pvid) {
1062                 if (!vlan_tag)
1063                         vlan_tag = adapter->pvid;
1064                 /* f/w workaround to set skip_hw_vlan = 1, informs the F/W to
1065                  * skip VLAN insertion
1066                  */
1067                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1068         }
1069
1070         if (vlan_tag) {
1071                 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1072                                                 vlan_tag);
1073                 if (unlikely(!skb))
1074                         return skb;
1075                 skb->vlan_tci = 0;
1076         }
1077
1078         /* Insert the outer VLAN, if any */
1079         if (adapter->qnq_vid) {
1080                 vlan_tag = adapter->qnq_vid;
1081                 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1082                                                 vlan_tag);
1083                 if (unlikely(!skb))
1084                         return skb;
1085                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1086         }
1087
1088         return skb;
1089 }
1090
1091 static bool be_ipv6_exthdr_check(struct sk_buff *skb)
1092 {
1093         struct ethhdr *eh = (struct ethhdr *)skb->data;
1094         u16 offset = ETH_HLEN;
1095
1096         if (eh->h_proto == htons(ETH_P_IPV6)) {
1097                 struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + offset);
1098
1099                 offset += sizeof(struct ipv6hdr);
1100                 if (ip6h->nexthdr != NEXTHDR_TCP &&
1101                     ip6h->nexthdr != NEXTHDR_UDP) {
1102                         struct ipv6_opt_hdr *ehdr =
1103                                 (struct ipv6_opt_hdr *)(skb->data + offset);
1104
1105                         /* offending pkt: 2nd byte following IPv6 hdr is 0xff */
1106                         if (ehdr->hdrlen == 0xff)
1107                                 return true;
1108                 }
1109         }
1110         return false;
1111 }
1112
1113 static int be_vlan_tag_tx_chk(struct be_adapter *adapter, struct sk_buff *skb)
1114 {
1115         return skb_vlan_tag_present(skb) || adapter->pvid || adapter->qnq_vid;
1116 }
1117
1118 static int be_ipv6_tx_stall_chk(struct be_adapter *adapter, struct sk_buff *skb)
1119 {
1120         return BE3_chip(adapter) && be_ipv6_exthdr_check(skb);
1121 }
1122
1123 static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter,
1124                                                   struct sk_buff *skb,
1125                                                   struct be_wrb_params
1126                                                   *wrb_params)
1127 {
1128         struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1129         unsigned int eth_hdr_len;
1130         struct iphdr *ip;
1131
1132         /* For padded packets, BE HW modifies tot_len field in IP header
1133          * incorrecly when VLAN tag is inserted by HW.
1134          * For padded packets, Lancer computes incorrect checksum.
1135          */
1136         eth_hdr_len = ntohs(skb->protocol) == ETH_P_8021Q ?
1137                                                 VLAN_ETH_HLEN : ETH_HLEN;
1138         if (skb->len <= 60 &&
1139             (lancer_chip(adapter) || skb_vlan_tag_present(skb)) &&
1140             is_ipv4_pkt(skb)) {
1141                 ip = (struct iphdr *)ip_hdr(skb);
1142                 pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len));
1143         }
1144
1145         /* If vlan tag is already inlined in the packet, skip HW VLAN
1146          * tagging in pvid-tagging mode
1147          */
1148         if (be_pvid_tagging_enabled(adapter) &&
1149             veh->h_vlan_proto == htons(ETH_P_8021Q))
1150                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1151
1152         /* HW has a bug wherein it will calculate CSUM for VLAN
1153          * pkts even though it is disabled.
1154          * Manually insert VLAN in pkt.
1155          */
1156         if (skb->ip_summed != CHECKSUM_PARTIAL &&
1157             skb_vlan_tag_present(skb)) {
1158                 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1159                 if (unlikely(!skb))
1160                         goto err;
1161         }
1162
1163         /* HW may lockup when VLAN HW tagging is requested on
1164          * certain ipv6 packets. Drop such pkts if the HW workaround to
1165          * skip HW tagging is not enabled by FW.
1166          */
1167         if (unlikely(be_ipv6_tx_stall_chk(adapter, skb) &&
1168                      (adapter->pvid || adapter->qnq_vid) &&
1169                      !qnq_async_evt_rcvd(adapter)))
1170                 goto tx_drop;
1171
1172         /* Manual VLAN tag insertion to prevent:
1173          * ASIC lockup when the ASIC inserts VLAN tag into
1174          * certain ipv6 packets. Insert VLAN tags in driver,
1175          * and set event, completion, vlan bits accordingly
1176          * in the Tx WRB.
1177          */
1178         if (be_ipv6_tx_stall_chk(adapter, skb) &&
1179             be_vlan_tag_tx_chk(adapter, skb)) {
1180                 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1181                 if (unlikely(!skb))
1182                         goto err;
1183         }
1184
1185         return skb;
1186 tx_drop:
1187         dev_kfree_skb_any(skb);
1188 err:
1189         return NULL;
1190 }
1191
1192 static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter,
1193                                            struct sk_buff *skb,
1194                                            struct be_wrb_params *wrb_params)
1195 {
1196         int err;
1197
1198         /* Lancer, SH and BE3 in SRIOV mode have a bug wherein
1199          * packets that are 32b or less may cause a transmit stall
1200          * on that port. The workaround is to pad such packets
1201          * (len <= 32 bytes) to a minimum length of 36b.
1202          */
1203         if (skb->len <= 32) {
1204                 if (skb_put_padto(skb, 36))
1205                         return NULL;
1206         }
1207
1208         if (BEx_chip(adapter) || lancer_chip(adapter)) {
1209                 skb = be_lancer_xmit_workarounds(adapter, skb, wrb_params);
1210                 if (!skb)
1211                         return NULL;
1212         }
1213
1214         /* The stack can send us skbs with length greater than
1215          * what the HW can handle. Trim the extra bytes.
1216          */
1217         WARN_ON_ONCE(skb->len > BE_MAX_GSO_SIZE);
1218         err = pskb_trim(skb, BE_MAX_GSO_SIZE);
1219         WARN_ON(err);
1220
1221         return skb;
1222 }
1223
1224 static void be_xmit_flush(struct be_adapter *adapter, struct be_tx_obj *txo)
1225 {
1226         struct be_queue_info *txq = &txo->q;
1227         struct be_eth_hdr_wrb *hdr = queue_index_node(txq, txo->last_req_hdr);
1228
1229         /* Mark the last request eventable if it hasn't been marked already */
1230         if (!(hdr->dw[2] & cpu_to_le32(TX_HDR_WRB_EVT)))
1231                 hdr->dw[2] |= cpu_to_le32(TX_HDR_WRB_EVT | TX_HDR_WRB_COMPL);
1232
1233         /* compose a dummy wrb if there are odd set of wrbs to notify */
1234         if (!lancer_chip(adapter) && (txo->pend_wrb_cnt & 1)) {
1235                 wrb_fill_dummy(queue_head_node(txq));
1236                 queue_head_inc(txq);
1237                 atomic_inc(&txq->used);
1238                 txo->pend_wrb_cnt++;
1239                 hdr->dw[2] &= ~cpu_to_le32(TX_HDR_WRB_NUM_MASK <<
1240                                            TX_HDR_WRB_NUM_SHIFT);
1241                 hdr->dw[2] |= cpu_to_le32((txo->last_req_wrb_cnt + 1) <<
1242                                           TX_HDR_WRB_NUM_SHIFT);
1243         }
1244         be_txq_notify(adapter, txo, txo->pend_wrb_cnt);
1245         txo->pend_wrb_cnt = 0;
1246 }
1247
1248 /* OS2BMC related */
1249
1250 #define DHCP_CLIENT_PORT        68
1251 #define DHCP_SERVER_PORT        67
1252 #define NET_BIOS_PORT1          137
1253 #define NET_BIOS_PORT2          138
1254 #define DHCPV6_RAS_PORT         547
1255
1256 #define is_mc_allowed_on_bmc(adapter, eh)       \
1257         (!is_multicast_filt_enabled(adapter) && \
1258          is_multicast_ether_addr(eh->h_dest) && \
1259          !is_broadcast_ether_addr(eh->h_dest))
1260
1261 #define is_bc_allowed_on_bmc(adapter, eh)       \
1262         (!is_broadcast_filt_enabled(adapter) && \
1263          is_broadcast_ether_addr(eh->h_dest))
1264
1265 #define is_arp_allowed_on_bmc(adapter, skb)     \
1266         (is_arp(skb) && is_arp_filt_enabled(adapter))
1267
1268 #define is_broadcast_packet(eh, adapter)        \
1269                 (is_multicast_ether_addr(eh->h_dest) && \
1270                 !compare_ether_addr(eh->h_dest, adapter->netdev->broadcast))
1271
1272 #define is_arp(skb)     (skb->protocol == htons(ETH_P_ARP))
1273
1274 #define is_arp_filt_enabled(adapter)    \
1275                 (adapter->bmc_filt_mask & (BMC_FILT_BROADCAST_ARP))
1276
1277 #define is_dhcp_client_filt_enabled(adapter)    \
1278                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_CLIENT)
1279
1280 #define is_dhcp_srvr_filt_enabled(adapter)      \
1281                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_SERVER)
1282
1283 #define is_nbios_filt_enabled(adapter)  \
1284                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_NET_BIOS)
1285
1286 #define is_ipv6_na_filt_enabled(adapter)        \
1287                 (adapter->bmc_filt_mask &       \
1288                         BMC_FILT_MULTICAST_IPV6_NEIGH_ADVER)
1289
1290 #define is_ipv6_ra_filt_enabled(adapter)        \
1291                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RA)
1292
1293 #define is_ipv6_ras_filt_enabled(adapter)       \
1294                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RAS)
1295
1296 #define is_broadcast_filt_enabled(adapter)      \
1297                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST)
1298
1299 #define is_multicast_filt_enabled(adapter)      \
1300                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST)
1301
1302 static bool be_send_pkt_to_bmc(struct be_adapter *adapter,
1303                                struct sk_buff **skb)
1304 {
1305         struct ethhdr *eh = (struct ethhdr *)(*skb)->data;
1306         bool os2bmc = false;
1307
1308         if (!be_is_os2bmc_enabled(adapter))
1309                 goto done;
1310
1311         if (!is_multicast_ether_addr(eh->h_dest))
1312                 goto done;
1313
1314         if (is_mc_allowed_on_bmc(adapter, eh) ||
1315             is_bc_allowed_on_bmc(adapter, eh) ||
1316             is_arp_allowed_on_bmc(adapter, (*skb))) {
1317                 os2bmc = true;
1318                 goto done;
1319         }
1320
1321         if ((*skb)->protocol == htons(ETH_P_IPV6)) {
1322                 struct ipv6hdr *hdr = ipv6_hdr((*skb));
1323                 u8 nexthdr = hdr->nexthdr;
1324
1325                 if (nexthdr == IPPROTO_ICMPV6) {
1326                         struct icmp6hdr *icmp6 = icmp6_hdr((*skb));
1327
1328                         switch (icmp6->icmp6_type) {
1329                         case NDISC_ROUTER_ADVERTISEMENT:
1330                                 os2bmc = is_ipv6_ra_filt_enabled(adapter);
1331                                 goto done;
1332                         case NDISC_NEIGHBOUR_ADVERTISEMENT:
1333                                 os2bmc = is_ipv6_na_filt_enabled(adapter);
1334                                 goto done;
1335                         default:
1336                                 break;
1337                         }
1338                 }
1339         }
1340
1341         if (is_udp_pkt((*skb))) {
1342                 struct udphdr *udp = udp_hdr((*skb));
1343
1344                 switch (ntohs(udp->dest)) {
1345                 case DHCP_CLIENT_PORT:
1346                         os2bmc = is_dhcp_client_filt_enabled(adapter);
1347                         goto done;
1348                 case DHCP_SERVER_PORT:
1349                         os2bmc = is_dhcp_srvr_filt_enabled(adapter);
1350                         goto done;
1351                 case NET_BIOS_PORT1:
1352                 case NET_BIOS_PORT2:
1353                         os2bmc = is_nbios_filt_enabled(adapter);
1354                         goto done;
1355                 case DHCPV6_RAS_PORT:
1356                         os2bmc = is_ipv6_ras_filt_enabled(adapter);
1357                         goto done;
1358                 default:
1359                         break;
1360                 }
1361         }
1362 done:
1363         /* For packets over a vlan, which are destined
1364          * to BMC, asic expects the vlan to be inline in the packet.
1365          */
1366         if (os2bmc)
1367                 *skb = be_insert_vlan_in_pkt(adapter, *skb, NULL);
1368
1369         return os2bmc;
1370 }
1371
1372 static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev)
1373 {
1374         struct be_adapter *adapter = netdev_priv(netdev);
1375         u16 q_idx = skb_get_queue_mapping(skb);
1376         struct be_tx_obj *txo = &adapter->tx_obj[q_idx];
1377         struct be_wrb_params wrb_params = { 0 };
1378         bool flush = !skb->xmit_more;
1379         u16 wrb_cnt;
1380
1381         skb = be_xmit_workarounds(adapter, skb, &wrb_params);
1382         if (unlikely(!skb))
1383                 goto drop;
1384
1385         be_get_wrb_params_from_skb(adapter, skb, &wrb_params);
1386
1387         wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1388         if (unlikely(!wrb_cnt)) {
1389                 dev_kfree_skb_any(skb);
1390                 goto drop;
1391         }
1392
1393         /* if os2bmc is enabled and if the pkt is destined to bmc,
1394          * enqueue the pkt a 2nd time with mgmt bit set.
1395          */
1396         if (be_send_pkt_to_bmc(adapter, &skb)) {
1397                 BE_WRB_F_SET(wrb_params.features, OS2BMC, 1);
1398                 wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1399                 if (unlikely(!wrb_cnt))
1400                         goto drop;
1401                 else
1402                         skb_get(skb);
1403         }
1404
1405         if (be_is_txq_full(txo)) {
1406                 netif_stop_subqueue(netdev, q_idx);
1407                 tx_stats(txo)->tx_stops++;
1408         }
1409
1410         if (flush || __netif_subqueue_stopped(netdev, q_idx))
1411                 be_xmit_flush(adapter, txo);
1412
1413         return NETDEV_TX_OK;
1414 drop:
1415         tx_stats(txo)->tx_drv_drops++;
1416         /* Flush the already enqueued tx requests */
1417         if (flush && txo->pend_wrb_cnt)
1418                 be_xmit_flush(adapter, txo);
1419
1420         return NETDEV_TX_OK;
1421 }
1422
1423 static void be_tx_timeout(struct net_device *netdev)
1424 {
1425         struct be_adapter *adapter = netdev_priv(netdev);
1426         struct device *dev = &adapter->pdev->dev;
1427         struct be_tx_obj *txo;
1428         struct sk_buff *skb;
1429         struct tcphdr *tcphdr;
1430         struct udphdr *udphdr;
1431         u32 *entry;
1432         int status;
1433         int i, j;
1434
1435         for_all_tx_queues(adapter, txo, i) {
1436                 dev_info(dev, "TXQ Dump: %d H: %d T: %d used: %d, qid: 0x%x\n",
1437                          i, txo->q.head, txo->q.tail,
1438                          atomic_read(&txo->q.used), txo->q.id);
1439
1440                 entry = txo->q.dma_mem.va;
1441                 for (j = 0; j < TX_Q_LEN * 4; j += 4) {
1442                         if (entry[j] != 0 || entry[j + 1] != 0 ||
1443                             entry[j + 2] != 0 || entry[j + 3] != 0) {
1444                                 dev_info(dev, "Entry %d 0x%x 0x%x 0x%x 0x%x\n",
1445                                          j, entry[j], entry[j + 1],
1446                                          entry[j + 2], entry[j + 3]);
1447                         }
1448                 }
1449
1450                 entry = txo->cq.dma_mem.va;
1451                 dev_info(dev, "TXCQ Dump: %d  H: %d T: %d used: %d\n",
1452                          i, txo->cq.head, txo->cq.tail,
1453                          atomic_read(&txo->cq.used));
1454                 for (j = 0; j < TX_CQ_LEN * 4; j += 4) {
1455                         if (entry[j] != 0 || entry[j + 1] != 0 ||
1456                             entry[j + 2] != 0 || entry[j + 3] != 0) {
1457                                 dev_info(dev, "Entry %d 0x%x 0x%x 0x%x 0x%x\n",
1458                                          j, entry[j], entry[j + 1],
1459                                          entry[j + 2], entry[j + 3]);
1460                         }
1461                 }
1462
1463                 for (j = 0; j < TX_Q_LEN; j++) {
1464                         if (txo->sent_skb_list[j]) {
1465                                 skb = txo->sent_skb_list[j];
1466                                 if (ip_hdr(skb)->protocol == IPPROTO_TCP) {
1467                                         tcphdr = tcp_hdr(skb);
1468                                         dev_info(dev, "TCP source port %d\n",
1469                                                  ntohs(tcphdr->source));
1470                                         dev_info(dev, "TCP dest port %d\n",
1471                                                  ntohs(tcphdr->dest));
1472                                         dev_info(dev, "TCP sequence num %d\n",
1473                                                  ntohs(tcphdr->seq));
1474                                         dev_info(dev, "TCP ack_seq %d\n",
1475                                                  ntohs(tcphdr->ack_seq));
1476                                 } else if (ip_hdr(skb)->protocol ==
1477                                            IPPROTO_UDP) {
1478                                         udphdr = udp_hdr(skb);
1479                                         dev_info(dev, "UDP source port %d\n",
1480                                                  ntohs(udphdr->source));
1481                                         dev_info(dev, "UDP dest port %d\n",
1482                                                  ntohs(udphdr->dest));
1483                                 }
1484                                 dev_info(dev, "skb[%d] %p len %d proto 0x%x\n",
1485                                          j, skb, skb->len, skb->protocol);
1486                         }
1487                 }
1488         }
1489
1490         if (lancer_chip(adapter)) {
1491                 dev_info(dev, "Initiating reset due to tx timeout\n");
1492                 dev_info(dev, "Resetting adapter\n");
1493                 status = lancer_physdev_ctrl(adapter,
1494                                              PHYSDEV_CONTROL_FW_RESET_MASK);
1495                 if (status)
1496                         dev_err(dev, "Reset failed .. Reboot server\n");
1497         }
1498 }
1499
1500 static inline bool be_in_all_promisc(struct be_adapter *adapter)
1501 {
1502         return (adapter->if_flags & BE_IF_FLAGS_ALL_PROMISCUOUS) ==
1503                         BE_IF_FLAGS_ALL_PROMISCUOUS;
1504 }
1505
1506 static int be_set_vlan_promisc(struct be_adapter *adapter)
1507 {
1508         struct device *dev = &adapter->pdev->dev;
1509         int status;
1510
1511         if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS)
1512                 return 0;
1513
1514         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, ON);
1515         if (!status) {
1516                 dev_info(dev, "Enabled VLAN promiscuous mode\n");
1517                 adapter->if_flags |= BE_IF_FLAGS_VLAN_PROMISCUOUS;
1518         } else {
1519                 dev_err(dev, "Failed to enable VLAN promiscuous mode\n");
1520         }
1521         return status;
1522 }
1523
1524 static int be_clear_vlan_promisc(struct be_adapter *adapter)
1525 {
1526         struct device *dev = &adapter->pdev->dev;
1527         int status;
1528
1529         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, OFF);
1530         if (!status) {
1531                 dev_info(dev, "Disabling VLAN promiscuous mode\n");
1532                 adapter->if_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
1533         }
1534         return status;
1535 }
1536
1537 /*
1538  * A max of 64 (BE_NUM_VLANS_SUPPORTED) vlans can be configured in BE.
1539  * If the user configures more, place BE in vlan promiscuous mode.
1540  */
1541 static int be_vid_config(struct be_adapter *adapter)
1542 {
1543         struct device *dev = &adapter->pdev->dev;
1544         u16 vids[BE_NUM_VLANS_SUPPORTED];
1545         u16 num = 0, i = 0;
1546         int status = 0;
1547
1548         /* No need to change the VLAN state if the I/F is in promiscuous */
1549         if (adapter->netdev->flags & IFF_PROMISC)
1550                 return 0;
1551
1552         if (adapter->vlans_added > be_max_vlans(adapter))
1553                 return be_set_vlan_promisc(adapter);
1554
1555         if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
1556                 status = be_clear_vlan_promisc(adapter);
1557                 if (status)
1558                         return status;
1559         }
1560         /* Construct VLAN Table to give to HW */
1561         for_each_set_bit(i, adapter->vids, VLAN_N_VID)
1562                 vids[num++] = cpu_to_le16(i);
1563
1564         status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num, 0);
1565         if (status) {
1566                 dev_err(dev, "Setting HW VLAN filtering failed\n");
1567                 /* Set to VLAN promisc mode as setting VLAN filter failed */
1568                 if (addl_status(status) == MCC_ADDL_STATUS_INSUFFICIENT_VLANS ||
1569                     addl_status(status) ==
1570                                 MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES)
1571                         return be_set_vlan_promisc(adapter);
1572         }
1573         return status;
1574 }
1575
1576 static int be_vlan_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
1577 {
1578         struct be_adapter *adapter = netdev_priv(netdev);
1579         int status = 0;
1580
1581         mutex_lock(&adapter->rx_filter_lock);
1582
1583         /* Packets with VID 0 are always received by Lancer by default */
1584         if (lancer_chip(adapter) && vid == 0)
1585                 goto done;
1586
1587         if (test_bit(vid, adapter->vids))
1588                 goto done;
1589
1590         set_bit(vid, adapter->vids);
1591         adapter->vlans_added++;
1592
1593         status = be_vid_config(adapter);
1594 done:
1595         mutex_unlock(&adapter->rx_filter_lock);
1596         return status;
1597 }
1598
1599 static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid)
1600 {
1601         struct be_adapter *adapter = netdev_priv(netdev);
1602         int status = 0;
1603
1604         mutex_lock(&adapter->rx_filter_lock);
1605
1606         /* Packets with VID 0 are always received by Lancer by default */
1607         if (lancer_chip(adapter) && vid == 0)
1608                 goto done;
1609
1610         if (!test_bit(vid, adapter->vids))
1611                 goto done;
1612
1613         clear_bit(vid, adapter->vids);
1614         adapter->vlans_added--;
1615
1616         status = be_vid_config(adapter);
1617 done:
1618         mutex_unlock(&adapter->rx_filter_lock);
1619         return status;
1620 }
1621
1622 static void be_set_all_promisc(struct be_adapter *adapter)
1623 {
1624         be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, ON);
1625         adapter->if_flags |= BE_IF_FLAGS_ALL_PROMISCUOUS;
1626 }
1627
1628 static void be_set_mc_promisc(struct be_adapter *adapter)
1629 {
1630         int status;
1631
1632         if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS)
1633                 return;
1634
1635         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MCAST_PROMISCUOUS, ON);
1636         if (!status)
1637                 adapter->if_flags |= BE_IF_FLAGS_MCAST_PROMISCUOUS;
1638 }
1639
1640 static void be_set_uc_promisc(struct be_adapter *adapter)
1641 {
1642         int status;
1643
1644         if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS)
1645                 return;
1646
1647         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, ON);
1648         if (!status)
1649                 adapter->if_flags |= BE_IF_FLAGS_PROMISCUOUS;
1650 }
1651
1652 static void be_clear_uc_promisc(struct be_adapter *adapter)
1653 {
1654         int status;
1655
1656         if (!(adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS))
1657                 return;
1658
1659         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, OFF);
1660         if (!status)
1661                 adapter->if_flags &= ~BE_IF_FLAGS_PROMISCUOUS;
1662 }
1663
1664 /* The below 2 functions are the callback args for __dev_mc_sync/dev_uc_sync().
1665  * We use a single callback function for both sync and unsync. We really don't
1666  * add/remove addresses through this callback. But, we use it to detect changes
1667  * to the uc/mc lists. The entire uc/mc list is programmed in be_set_rx_mode().
1668  */
1669 static int be_uc_list_update(struct net_device *netdev,
1670                              const unsigned char *addr)
1671 {
1672         struct be_adapter *adapter = netdev_priv(netdev);
1673
1674         adapter->update_uc_list = true;
1675         return 0;
1676 }
1677
1678 static int be_mc_list_update(struct net_device *netdev,
1679                              const unsigned char *addr)
1680 {
1681         struct be_adapter *adapter = netdev_priv(netdev);
1682
1683         adapter->update_mc_list = true;
1684         return 0;
1685 }
1686
1687 static void be_set_mc_list(struct be_adapter *adapter)
1688 {
1689         struct net_device *netdev = adapter->netdev;
1690         struct netdev_hw_addr *ha;
1691         bool mc_promisc = false;
1692         int status;
1693
1694         netif_addr_lock_bh(netdev);
1695         __dev_mc_sync(netdev, be_mc_list_update, be_mc_list_update);
1696
1697         if (netdev->flags & IFF_PROMISC) {
1698                 adapter->update_mc_list = false;
1699         } else if (netdev->flags & IFF_ALLMULTI ||
1700                    netdev_mc_count(netdev) > be_max_mc(adapter)) {
1701                 /* Enable multicast promisc if num configured exceeds
1702                  * what we support
1703                  */
1704                 mc_promisc = true;
1705                 adapter->update_mc_list = false;
1706         } else if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS) {
1707                 /* Update mc-list unconditionally if the iface was previously
1708                  * in mc-promisc mode and now is out of that mode.
1709                  */
1710                 adapter->update_mc_list = true;
1711         }
1712
1713         if (adapter->update_mc_list) {
1714                 int i = 0;
1715
1716                 /* cache the mc-list in adapter */
1717                 netdev_for_each_mc_addr(ha, netdev) {
1718                         ether_addr_copy(adapter->mc_list[i].mac, ha->addr);
1719                         i++;
1720                 }
1721                 adapter->mc_count = netdev_mc_count(netdev);
1722         }
1723         netif_addr_unlock_bh(netdev);
1724
1725         if (mc_promisc) {
1726                 be_set_mc_promisc(adapter);
1727         } else if (adapter->update_mc_list) {
1728                 status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, ON);
1729                 if (!status)
1730                         adapter->if_flags &= ~BE_IF_FLAGS_MCAST_PROMISCUOUS;
1731                 else
1732                         be_set_mc_promisc(adapter);
1733
1734                 adapter->update_mc_list = false;
1735         }
1736 }
1737
1738 static void be_clear_mc_list(struct be_adapter *adapter)
1739 {
1740         struct net_device *netdev = adapter->netdev;
1741
1742         __dev_mc_unsync(netdev, NULL);
1743         be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, OFF);
1744         adapter->mc_count = 0;
1745 }
1746
1747 static int be_uc_mac_add(struct be_adapter *adapter, int uc_idx)
1748 {
1749         if (ether_addr_equal(adapter->uc_list[uc_idx].mac, adapter->dev_mac)) {
1750                 adapter->pmac_id[uc_idx + 1] = adapter->pmac_id[0];
1751                 return 0;
1752         }
1753
1754         return be_cmd_pmac_add(adapter, adapter->uc_list[uc_idx].mac,
1755                                adapter->if_handle,
1756                                &adapter->pmac_id[uc_idx + 1], 0);
1757 }
1758
1759 static void be_uc_mac_del(struct be_adapter *adapter, int pmac_id)
1760 {
1761         if (pmac_id == adapter->pmac_id[0])
1762                 return;
1763
1764         be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
1765 }
1766
1767 static void be_set_uc_list(struct be_adapter *adapter)
1768 {
1769         struct net_device *netdev = adapter->netdev;
1770         struct netdev_hw_addr *ha;
1771         bool uc_promisc = false;
1772         int curr_uc_macs = 0, i;
1773
1774         netif_addr_lock_bh(netdev);
1775         __dev_uc_sync(netdev, be_uc_list_update, be_uc_list_update);
1776
1777         if (netdev->flags & IFF_PROMISC) {
1778                 adapter->update_uc_list = false;
1779         } else if (netdev_uc_count(netdev) > (be_max_uc(adapter) - 1)) {
1780                 uc_promisc = true;
1781                 adapter->update_uc_list = false;
1782         }  else if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS) {
1783                 /* Update uc-list unconditionally if the iface was previously
1784                  * in uc-promisc mode and now is out of that mode.
1785                  */
1786                 adapter->update_uc_list = true;
1787         }
1788
1789         if (adapter->update_uc_list) {
1790                 /* cache the uc-list in adapter array */
1791                 i = 0;
1792                 netdev_for_each_uc_addr(ha, netdev) {
1793                         ether_addr_copy(adapter->uc_list[i].mac, ha->addr);
1794                         i++;
1795                 }
1796                 curr_uc_macs = netdev_uc_count(netdev);
1797         }
1798         netif_addr_unlock_bh(netdev);
1799
1800         if (uc_promisc) {
1801                 be_set_uc_promisc(adapter);
1802         } else if (adapter->update_uc_list) {
1803                 be_clear_uc_promisc(adapter);
1804
1805                 for (i = 0; i < adapter->uc_macs; i++)
1806                         be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1807
1808                 for (i = 0; i < curr_uc_macs; i++)
1809                         be_uc_mac_add(adapter, i);
1810                 adapter->uc_macs = curr_uc_macs;
1811                 adapter->update_uc_list = false;
1812         }
1813 }
1814
1815 static void be_clear_uc_list(struct be_adapter *adapter)
1816 {
1817         struct net_device *netdev = adapter->netdev;
1818         int i;
1819
1820         __dev_uc_unsync(netdev, NULL);
1821         for (i = 0; i < adapter->uc_macs; i++)
1822                 be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1823
1824         adapter->uc_macs = 0;
1825 }
1826
1827 static void __be_set_rx_mode(struct be_adapter *adapter)
1828 {
1829         struct net_device *netdev = adapter->netdev;
1830
1831         mutex_lock(&adapter->rx_filter_lock);
1832
1833         if (netdev->flags & IFF_PROMISC) {
1834                 if (!be_in_all_promisc(adapter))
1835                         be_set_all_promisc(adapter);
1836         } else if (be_in_all_promisc(adapter)) {
1837                 /* We need to re-program the vlan-list or clear
1838                  * vlan-promisc mode (if needed) when the interface
1839                  * comes out of promisc mode.
1840                  */
1841                 be_vid_config(adapter);
1842         }
1843
1844         be_set_uc_list(adapter);
1845         be_set_mc_list(adapter);
1846
1847         mutex_unlock(&adapter->rx_filter_lock);
1848 }
1849
1850 static void be_work_set_rx_mode(struct work_struct *work)
1851 {
1852         struct be_cmd_work *cmd_work =
1853                                 container_of(work, struct be_cmd_work, work);
1854
1855         __be_set_rx_mode(cmd_work->adapter);
1856         kfree(cmd_work);
1857 }
1858
1859 static int be_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
1860 {
1861         struct be_adapter *adapter = netdev_priv(netdev);
1862         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1863         int status;
1864
1865         if (!sriov_enabled(adapter))
1866                 return -EPERM;
1867
1868         if (!is_valid_ether_addr(mac) || vf >= adapter->num_vfs)
1869                 return -EINVAL;
1870
1871         /* Proceed further only if user provided MAC is different
1872          * from active MAC
1873          */
1874         if (ether_addr_equal(mac, vf_cfg->mac_addr))
1875                 return 0;
1876
1877         if (BEx_chip(adapter)) {
1878                 be_cmd_pmac_del(adapter, vf_cfg->if_handle, vf_cfg->pmac_id,
1879                                 vf + 1);
1880
1881                 status = be_cmd_pmac_add(adapter, mac, vf_cfg->if_handle,
1882                                          &vf_cfg->pmac_id, vf + 1);
1883         } else {
1884                 status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
1885                                         vf + 1);
1886         }
1887
1888         if (status) {
1889                 dev_err(&adapter->pdev->dev, "MAC %pM set on VF %d Failed: %#x",
1890                         mac, vf, status);
1891                 return be_cmd_status(status);
1892         }
1893
1894         ether_addr_copy(vf_cfg->mac_addr, mac);
1895
1896         return 0;
1897 }
1898
1899 static int be_get_vf_config(struct net_device *netdev, int vf,
1900                             struct ifla_vf_info *vi)
1901 {
1902         struct be_adapter *adapter = netdev_priv(netdev);
1903         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1904
1905         if (!sriov_enabled(adapter))
1906                 return -EPERM;
1907
1908         if (vf >= adapter->num_vfs)
1909                 return -EINVAL;
1910
1911         vi->vf = vf;
1912         vi->max_tx_rate = vf_cfg->tx_rate;
1913         vi->min_tx_rate = 0;
1914         vi->vlan = vf_cfg->vlan_tag & VLAN_VID_MASK;
1915         vi->qos = vf_cfg->vlan_tag >> VLAN_PRIO_SHIFT;
1916         memcpy(&vi->mac, vf_cfg->mac_addr, ETH_ALEN);
1917         vi->linkstate = adapter->vf_cfg[vf].plink_tracking;
1918         vi->spoofchk = adapter->vf_cfg[vf].spoofchk;
1919
1920         return 0;
1921 }
1922
1923 static int be_set_vf_tvt(struct be_adapter *adapter, int vf, u16 vlan)
1924 {
1925         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1926         u16 vids[BE_NUM_VLANS_SUPPORTED];
1927         int vf_if_id = vf_cfg->if_handle;
1928         int status;
1929
1930         /* Enable Transparent VLAN Tagging */
1931         status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0, 0);
1932         if (status)
1933                 return status;
1934
1935         /* Clear pre-programmed VLAN filters on VF if any, if TVT is enabled */
1936         vids[0] = 0;
1937         status = be_cmd_vlan_config(adapter, vf_if_id, vids, 1, vf + 1);
1938         if (!status)
1939                 dev_info(&adapter->pdev->dev,
1940                          "Cleared guest VLANs on VF%d", vf);
1941
1942         /* After TVT is enabled, disallow VFs to program VLAN filters */
1943         if (vf_cfg->privileges & BE_PRIV_FILTMGMT) {
1944                 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges &
1945                                                   ~BE_PRIV_FILTMGMT, vf + 1);
1946                 if (!status)
1947                         vf_cfg->privileges &= ~BE_PRIV_FILTMGMT;
1948         }
1949         return 0;
1950 }
1951
1952 static int be_clear_vf_tvt(struct be_adapter *adapter, int vf)
1953 {
1954         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1955         struct device *dev = &adapter->pdev->dev;
1956         int status;
1957
1958         /* Reset Transparent VLAN Tagging. */
1959         status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, vf + 1,
1960                                        vf_cfg->if_handle, 0, 0);
1961         if (status)
1962                 return status;
1963
1964         /* Allow VFs to program VLAN filtering */
1965         if (!(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
1966                 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges |
1967                                                   BE_PRIV_FILTMGMT, vf + 1);
1968                 if (!status) {
1969                         vf_cfg->privileges |= BE_PRIV_FILTMGMT;
1970                         dev_info(dev, "VF%d: FILTMGMT priv enabled", vf);
1971                 }
1972         }
1973
1974         dev_info(dev,
1975                  "Disable/re-enable i/f in VM to clear Transparent VLAN tag");
1976         return 0;
1977 }
1978
1979 static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos,
1980                           __be16 vlan_proto)
1981 {
1982         struct be_adapter *adapter = netdev_priv(netdev);
1983         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1984         int status;
1985
1986         if (!sriov_enabled(adapter))
1987                 return -EPERM;
1988
1989         if (vf >= adapter->num_vfs || vlan > 4095 || qos > 7)
1990                 return -EINVAL;
1991
1992         if (vlan_proto != htons(ETH_P_8021Q))
1993                 return -EPROTONOSUPPORT;
1994
1995         if (vlan || qos) {
1996                 vlan |= qos << VLAN_PRIO_SHIFT;
1997                 status = be_set_vf_tvt(adapter, vf, vlan);
1998         } else {
1999                 status = be_clear_vf_tvt(adapter, vf);
2000         }
2001
2002         if (status) {
2003                 dev_err(&adapter->pdev->dev,
2004                         "VLAN %d config on VF %d failed : %#x\n", vlan, vf,
2005                         status);
2006                 return be_cmd_status(status);
2007         }
2008
2009         vf_cfg->vlan_tag = vlan;
2010         return 0;
2011 }
2012
2013 static int be_set_vf_tx_rate(struct net_device *netdev, int vf,
2014                              int min_tx_rate, int max_tx_rate)
2015 {
2016         struct be_adapter *adapter = netdev_priv(netdev);
2017         struct device *dev = &adapter->pdev->dev;
2018         int percent_rate, status = 0;
2019         u16 link_speed = 0;
2020         u8 link_status;
2021
2022         if (!sriov_enabled(adapter))
2023                 return -EPERM;
2024
2025         if (vf >= adapter->num_vfs)
2026                 return -EINVAL;
2027
2028         if (min_tx_rate)
2029                 return -EINVAL;
2030
2031         if (!max_tx_rate)
2032                 goto config_qos;
2033
2034         status = be_cmd_link_status_query(adapter, &link_speed,
2035                                           &link_status, 0);
2036         if (status)
2037                 goto err;
2038
2039         if (!link_status) {
2040                 dev_err(dev, "TX-rate setting not allowed when link is down\n");
2041                 status = -ENETDOWN;
2042                 goto err;
2043         }
2044
2045         if (max_tx_rate < 100 || max_tx_rate > link_speed) {
2046                 dev_err(dev, "TX-rate must be between 100 and %d Mbps\n",
2047                         link_speed);
2048                 status = -EINVAL;
2049                 goto err;
2050         }
2051
2052         /* On Skyhawk the QOS setting must be done only as a % value */
2053         percent_rate = link_speed / 100;
2054         if (skyhawk_chip(adapter) && (max_tx_rate % percent_rate)) {
2055                 dev_err(dev, "TX-rate must be a multiple of %d Mbps\n",
2056                         percent_rate);
2057                 status = -EINVAL;
2058                 goto err;
2059         }
2060
2061 config_qos:
2062         status = be_cmd_config_qos(adapter, max_tx_rate, link_speed, vf + 1);
2063         if (status)
2064                 goto err;
2065
2066         adapter->vf_cfg[vf].tx_rate = max_tx_rate;
2067         return 0;
2068
2069 err:
2070         dev_err(dev, "TX-rate setting of %dMbps on VF%d failed\n",
2071                 max_tx_rate, vf);
2072         return be_cmd_status(status);
2073 }
2074
2075 static int be_set_vf_link_state(struct net_device *netdev, int vf,
2076                                 int link_state)
2077 {
2078         struct be_adapter *adapter = netdev_priv(netdev);
2079         int status;
2080
2081         if (!sriov_enabled(adapter))
2082                 return -EPERM;
2083
2084         if (vf >= adapter->num_vfs)
2085                 return -EINVAL;
2086
2087         status = be_cmd_set_logical_link_config(adapter, link_state, vf+1);
2088         if (status) {
2089                 dev_err(&adapter->pdev->dev,
2090                         "Link state change on VF %d failed: %#x\n", vf, status);
2091                 return be_cmd_status(status);
2092         }
2093
2094         adapter->vf_cfg[vf].plink_tracking = link_state;
2095
2096         return 0;
2097 }
2098
2099 static int be_set_vf_spoofchk(struct net_device *netdev, int vf, bool enable)
2100 {
2101         struct be_adapter *adapter = netdev_priv(netdev);
2102         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
2103         u8 spoofchk;
2104         int status;
2105
2106         if (!sriov_enabled(adapter))
2107                 return -EPERM;
2108
2109         if (vf >= adapter->num_vfs)
2110                 return -EINVAL;
2111
2112         if (BEx_chip(adapter))
2113                 return -EOPNOTSUPP;
2114
2115         if (enable == vf_cfg->spoofchk)
2116                 return 0;
2117
2118         spoofchk = enable ? ENABLE_MAC_SPOOFCHK : DISABLE_MAC_SPOOFCHK;
2119
2120         status = be_cmd_set_hsw_config(adapter, 0, vf + 1, vf_cfg->if_handle,
2121                                        0, spoofchk);
2122         if (status) {
2123                 dev_err(&adapter->pdev->dev,
2124                         "Spoofchk change on VF %d failed: %#x\n", vf, status);
2125                 return be_cmd_status(status);
2126         }
2127
2128         vf_cfg->spoofchk = enable;
2129         return 0;
2130 }
2131
2132 static void be_aic_update(struct be_aic_obj *aic, u64 rx_pkts, u64 tx_pkts,
2133                           ulong now)
2134 {
2135         aic->rx_pkts_prev = rx_pkts;
2136         aic->tx_reqs_prev = tx_pkts;
2137         aic->jiffies = now;
2138 }
2139
2140 static int be_get_new_eqd(struct be_eq_obj *eqo)
2141 {
2142         struct be_adapter *adapter = eqo->adapter;
2143         int eqd, start;
2144         struct be_aic_obj *aic;
2145         struct be_rx_obj *rxo;
2146         struct be_tx_obj *txo;
2147         u64 rx_pkts = 0, tx_pkts = 0;
2148         ulong now;
2149         u32 pps, delta;
2150         int i;
2151
2152         aic = &adapter->aic_obj[eqo->idx];
2153         if (!aic->enable) {
2154                 if (aic->jiffies)
2155                         aic->jiffies = 0;
2156                 eqd = aic->et_eqd;
2157                 return eqd;
2158         }
2159
2160         for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
2161                 do {
2162                         start = u64_stats_fetch_begin_irq(&rxo->stats.sync);
2163                         rx_pkts += rxo->stats.rx_pkts;
2164                 } while (u64_stats_fetch_retry_irq(&rxo->stats.sync, start));
2165         }
2166
2167         for_all_tx_queues_on_eq(adapter, eqo, txo, i) {
2168                 do {
2169                         start = u64_stats_fetch_begin_irq(&txo->stats.sync);
2170                         tx_pkts += txo->stats.tx_reqs;
2171                 } while (u64_stats_fetch_retry_irq(&txo->stats.sync, start));
2172         }
2173
2174         /* Skip, if wrapped around or first calculation */
2175         now = jiffies;
2176         if (!aic->jiffies || time_before(now, aic->jiffies) ||
2177             rx_pkts < aic->rx_pkts_prev ||
2178             tx_pkts < aic->tx_reqs_prev) {
2179                 be_aic_update(aic, rx_pkts, tx_pkts, now);
2180                 return aic->prev_eqd;
2181         }
2182
2183         delta = jiffies_to_msecs(now - aic->jiffies);
2184         if (delta == 0)
2185                 return aic->prev_eqd;
2186
2187         pps = (((u32)(rx_pkts - aic->rx_pkts_prev) * 1000) / delta) +
2188                 (((u32)(tx_pkts - aic->tx_reqs_prev) * 1000) / delta);
2189         eqd = (pps / 15000) << 2;
2190
2191         if (eqd < 8)
2192                 eqd = 0;
2193         eqd = min_t(u32, eqd, aic->max_eqd);
2194         eqd = max_t(u32, eqd, aic->min_eqd);
2195
2196         be_aic_update(aic, rx_pkts, tx_pkts, now);
2197
2198         return eqd;
2199 }
2200
2201 /* For Skyhawk-R only */
2202 static u32 be_get_eq_delay_mult_enc(struct be_eq_obj *eqo)
2203 {
2204         struct be_adapter *adapter = eqo->adapter;
2205         struct be_aic_obj *aic = &adapter->aic_obj[eqo->idx];
2206         ulong now = jiffies;
2207         int eqd;
2208         u32 mult_enc;
2209
2210         if (!aic->enable)
2211                 return 0;
2212
2213         if (jiffies_to_msecs(now - aic->jiffies) < 1)
2214                 eqd = aic->prev_eqd;
2215         else
2216                 eqd = be_get_new_eqd(eqo);
2217
2218         if (eqd > 100)
2219                 mult_enc = R2I_DLY_ENC_1;
2220         else if (eqd > 60)
2221                 mult_enc = R2I_DLY_ENC_2;
2222         else if (eqd > 20)
2223                 mult_enc = R2I_DLY_ENC_3;
2224         else
2225                 mult_enc = R2I_DLY_ENC_0;
2226
2227         aic->prev_eqd = eqd;
2228
2229         return mult_enc;
2230 }
2231
2232 void be_eqd_update(struct be_adapter *adapter, bool force_update)
2233 {
2234         struct be_set_eqd set_eqd[MAX_EVT_QS];
2235         struct be_aic_obj *aic;
2236         struct be_eq_obj *eqo;
2237         int i, num = 0, eqd;
2238
2239         for_all_evt_queues(adapter, eqo, i) {
2240                 aic = &adapter->aic_obj[eqo->idx];
2241                 eqd = be_get_new_eqd(eqo);
2242                 if (force_update || eqd != aic->prev_eqd) {
2243                         set_eqd[num].delay_multiplier = (eqd * 65)/100;
2244                         set_eqd[num].eq_id = eqo->q.id;
2245                         aic->prev_eqd = eqd;
2246                         num++;
2247                 }
2248         }
2249
2250         if (num)
2251                 be_cmd_modify_eqd(adapter, set_eqd, num);
2252 }
2253
2254 static void be_rx_stats_update(struct be_rx_obj *rxo,
2255                                struct be_rx_compl_info *rxcp)
2256 {
2257         struct be_rx_stats *stats = rx_stats(rxo);
2258
2259         u64_stats_update_begin(&stats->sync);
2260         stats->rx_compl++;
2261         stats->rx_bytes += rxcp->pkt_size;
2262         stats->rx_pkts++;
2263         if (rxcp->tunneled)
2264                 stats->rx_vxlan_offload_pkts++;
2265         if (rxcp->pkt_type == BE_MULTICAST_PACKET)
2266                 stats->rx_mcast_pkts++;
2267         if (rxcp->err)
2268                 stats->rx_compl_err++;
2269         u64_stats_update_end(&stats->sync);
2270 }
2271
2272 static inline bool csum_passed(struct be_rx_compl_info *rxcp)
2273 {
2274         /* L4 checksum is not reliable for non TCP/UDP packets.
2275          * Also ignore ipcksm for ipv6 pkts
2276          */
2277         return (rxcp->tcpf || rxcp->udpf) && rxcp->l4_csum &&
2278                 (rxcp->ip_csum || rxcp->ipv6) && !rxcp->err;
2279 }
2280
2281 static struct be_rx_page_info *get_rx_page_info(struct be_rx_obj *rxo)
2282 {
2283         struct be_adapter *adapter = rxo->adapter;
2284         struct be_rx_page_info *rx_page_info;
2285         struct be_queue_info *rxq = &rxo->q;
2286         u32 frag_idx = rxq->tail;
2287
2288         rx_page_info = &rxo->page_info_tbl[frag_idx];
2289         BUG_ON(!rx_page_info->page);
2290
2291         if (rx_page_info->last_frag) {
2292                 dma_unmap_page(&adapter->pdev->dev,
2293                                dma_unmap_addr(rx_page_info, bus),
2294                                adapter->big_page_size, DMA_FROM_DEVICE);
2295                 rx_page_info->last_frag = false;
2296         } else {
2297                 dma_sync_single_for_cpu(&adapter->pdev->dev,
2298                                         dma_unmap_addr(rx_page_info, bus),
2299                                         rx_frag_size, DMA_FROM_DEVICE);
2300         }
2301
2302         queue_tail_inc(rxq);
2303         atomic_dec(&rxq->used);
2304         return rx_page_info;
2305 }
2306
2307 /* Throwaway the data in the Rx completion */
2308 static void be_rx_compl_discard(struct be_rx_obj *rxo,
2309                                 struct be_rx_compl_info *rxcp)
2310 {
2311         struct be_rx_page_info *page_info;
2312         u16 i, num_rcvd = rxcp->num_rcvd;
2313
2314         for (i = 0; i < num_rcvd; i++) {
2315                 page_info = get_rx_page_info(rxo);
2316                 put_page(page_info->page);
2317                 memset(page_info, 0, sizeof(*page_info));
2318         }
2319 }
2320
2321 /*
2322  * skb_fill_rx_data forms a complete skb for an ether frame
2323  * indicated by rxcp.
2324  */
2325 static void skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb,
2326                              struct be_rx_compl_info *rxcp)
2327 {
2328         struct be_rx_page_info *page_info;
2329         u16 i, j;
2330         u16 hdr_len, curr_frag_len, remaining;
2331         u8 *start;
2332
2333         page_info = get_rx_page_info(rxo);
2334         start = page_address(page_info->page) + page_info->page_offset;
2335         prefetch(start);
2336
2337         /* Copy data in the first descriptor of this completion */
2338         curr_frag_len = min(rxcp->pkt_size, rx_frag_size);
2339
2340         skb->len = curr_frag_len;
2341         if (curr_frag_len <= BE_HDR_LEN) { /* tiny packet */
2342                 memcpy(skb->data, start, curr_frag_len);
2343                 /* Complete packet has now been moved to data */
2344                 put_page(page_info->page);
2345                 skb->data_len = 0;
2346                 skb->tail += curr_frag_len;
2347         } else {
2348                 hdr_len = ETH_HLEN;
2349                 memcpy(skb->data, start, hdr_len);
2350                 skb_shinfo(skb)->nr_frags = 1;
2351                 skb_frag_set_page(skb, 0, page_info->page);
2352                 skb_shinfo(skb)->frags[0].page_offset =
2353                                         page_info->page_offset + hdr_len;
2354                 skb_frag_size_set(&skb_shinfo(skb)->frags[0],
2355                                   curr_frag_len - hdr_len);
2356                 skb->data_len = curr_frag_len - hdr_len;
2357                 skb->truesize += rx_frag_size;
2358                 skb->tail += hdr_len;
2359         }
2360         page_info->page = NULL;
2361
2362         if (rxcp->pkt_size <= rx_frag_size) {
2363                 BUG_ON(rxcp->num_rcvd != 1);
2364                 return;
2365         }
2366
2367         /* More frags present for this completion */
2368         remaining = rxcp->pkt_size - curr_frag_len;
2369         for (i = 1, j = 0; i < rxcp->num_rcvd; i++) {
2370                 page_info = get_rx_page_info(rxo);
2371                 curr_frag_len = min(remaining, rx_frag_size);
2372
2373                 /* Coalesce all frags from the same physical page in one slot */
2374                 if (page_info->page_offset == 0) {
2375                         /* Fresh page */
2376                         j++;
2377                         skb_frag_set_page(skb, j, page_info->page);
2378                         skb_shinfo(skb)->frags[j].page_offset =
2379                                                         page_info->page_offset;
2380                         skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2381                         skb_shinfo(skb)->nr_frags++;
2382                 } else {
2383                         put_page(page_info->page);
2384                 }
2385
2386                 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2387                 skb->len += curr_frag_len;
2388                 skb->data_len += curr_frag_len;
2389                 skb->truesize += rx_frag_size;
2390                 remaining -= curr_frag_len;
2391                 page_info->page = NULL;
2392         }
2393         BUG_ON(j > MAX_SKB_FRAGS);
2394 }
2395
2396 /* Process the RX completion indicated by rxcp when GRO is disabled */
2397 static void be_rx_compl_process(struct be_rx_obj *rxo, struct napi_struct *napi,
2398                                 struct be_rx_compl_info *rxcp)
2399 {
2400         struct be_adapter *adapter = rxo->adapter;
2401         struct net_device *netdev = adapter->netdev;
2402         struct sk_buff *skb;
2403
2404         skb = netdev_alloc_skb_ip_align(netdev, BE_RX_SKB_ALLOC_SIZE);
2405         if (unlikely(!skb)) {
2406                 rx_stats(rxo)->rx_drops_no_skbs++;
2407                 be_rx_compl_discard(rxo, rxcp);
2408                 return;
2409         }
2410
2411         skb_fill_rx_data(rxo, skb, rxcp);
2412
2413         if (likely((netdev->features & NETIF_F_RXCSUM) && csum_passed(rxcp)))
2414                 skb->ip_summed = CHECKSUM_UNNECESSARY;
2415         else
2416                 skb_checksum_none_assert(skb);
2417
2418         skb->protocol = eth_type_trans(skb, netdev);
2419         skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2420         if (netdev->features & NETIF_F_RXHASH)
2421                 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2422
2423         skb->csum_level = rxcp->tunneled;
2424         skb_mark_napi_id(skb, napi);
2425
2426         if (rxcp->vlanf)
2427                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2428
2429         netif_receive_skb(skb);
2430 }
2431
2432 /* Process the RX completion indicated by rxcp when GRO is enabled */
2433 static void be_rx_compl_process_gro(struct be_rx_obj *rxo,
2434                                     struct napi_struct *napi,
2435                                     struct be_rx_compl_info *rxcp)
2436 {
2437         struct be_adapter *adapter = rxo->adapter;
2438         struct be_rx_page_info *page_info;
2439         struct sk_buff *skb = NULL;
2440         u16 remaining, curr_frag_len;
2441         u16 i, j;
2442
2443         skb = napi_get_frags(napi);
2444         if (!skb) {
2445                 be_rx_compl_discard(rxo, rxcp);
2446                 return;
2447         }
2448
2449         remaining = rxcp->pkt_size;
2450         for (i = 0, j = -1; i < rxcp->num_rcvd; i++) {
2451                 page_info = get_rx_page_info(rxo);
2452
2453                 curr_frag_len = min(remaining, rx_frag_size);
2454
2455                 /* Coalesce all frags from the same physical page in one slot */
2456                 if (i == 0 || page_info->page_offset == 0) {
2457                         /* First frag or Fresh page */
2458                         j++;
2459                         skb_frag_set_page(skb, j, page_info->page);
2460                         skb_shinfo(skb)->frags[j].page_offset =
2461                                                         page_info->page_offset;
2462                         skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2463                 } else {
2464                         put_page(page_info->page);
2465                 }
2466                 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2467                 skb->truesize += rx_frag_size;
2468                 remaining -= curr_frag_len;
2469                 memset(page_info, 0, sizeof(*page_info));
2470         }
2471         BUG_ON(j > MAX_SKB_FRAGS);
2472
2473         skb_shinfo(skb)->nr_frags = j + 1;
2474         skb->len = rxcp->pkt_size;
2475         skb->data_len = rxcp->pkt_size;
2476         skb->ip_summed = CHECKSUM_UNNECESSARY;
2477         skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2478         if (adapter->netdev->features & NETIF_F_RXHASH)
2479                 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2480
2481         skb->csum_level = rxcp->tunneled;
2482
2483         if (rxcp->vlanf)
2484                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2485
2486         napi_gro_frags(napi);
2487 }
2488
2489 static void be_parse_rx_compl_v1(struct be_eth_rx_compl *compl,
2490                                  struct be_rx_compl_info *rxcp)
2491 {
2492         rxcp->pkt_size = GET_RX_COMPL_V1_BITS(pktsize, compl);
2493         rxcp->vlanf = GET_RX_COMPL_V1_BITS(vtp, compl);
2494         rxcp->err = GET_RX_COMPL_V1_BITS(err, compl);
2495         rxcp->tcpf = GET_RX_COMPL_V1_BITS(tcpf, compl);
2496         rxcp->udpf = GET_RX_COMPL_V1_BITS(udpf, compl);
2497         rxcp->ip_csum = GET_RX_COMPL_V1_BITS(ipcksm, compl);
2498         rxcp->l4_csum = GET_RX_COMPL_V1_BITS(l4_cksm, compl);
2499         rxcp->ipv6 = GET_RX_COMPL_V1_BITS(ip_version, compl);
2500         rxcp->num_rcvd = GET_RX_COMPL_V1_BITS(numfrags, compl);
2501         rxcp->pkt_type = GET_RX_COMPL_V1_BITS(cast_enc, compl);
2502         rxcp->rss_hash = GET_RX_COMPL_V1_BITS(rsshash, compl);
2503         if (rxcp->vlanf) {
2504                 rxcp->qnq = GET_RX_COMPL_V1_BITS(qnq, compl);
2505                 rxcp->vlan_tag = GET_RX_COMPL_V1_BITS(vlan_tag, compl);
2506         }
2507         rxcp->port = GET_RX_COMPL_V1_BITS(port, compl);
2508         rxcp->tunneled =
2509                 GET_RX_COMPL_V1_BITS(tunneled, compl);
2510 }
2511
2512 static void be_parse_rx_compl_v0(struct be_eth_rx_compl *compl,
2513                                  struct be_rx_compl_info *rxcp)
2514 {
2515         rxcp->pkt_size = GET_RX_COMPL_V0_BITS(pktsize, compl);
2516         rxcp->vlanf = GET_RX_COMPL_V0_BITS(vtp, compl);
2517         rxcp->err = GET_RX_COMPL_V0_BITS(err, compl);
2518         rxcp->tcpf = GET_RX_COMPL_V0_BITS(tcpf, compl);
2519         rxcp->udpf = GET_RX_COMPL_V0_BITS(udpf, compl);
2520         rxcp->ip_csum = GET_RX_COMPL_V0_BITS(ipcksm, compl);
2521         rxcp->l4_csum = GET_RX_COMPL_V0_BITS(l4_cksm, compl);
2522         rxcp->ipv6 = GET_RX_COMPL_V0_BITS(ip_version, compl);
2523         rxcp->num_rcvd = GET_RX_COMPL_V0_BITS(numfrags, compl);
2524         rxcp->pkt_type = GET_RX_COMPL_V0_BITS(cast_enc, compl);
2525         rxcp->rss_hash = GET_RX_COMPL_V0_BITS(rsshash, compl);
2526         if (rxcp->vlanf) {
2527                 rxcp->qnq = GET_RX_COMPL_V0_BITS(qnq, compl);
2528                 rxcp->vlan_tag = GET_RX_COMPL_V0_BITS(vlan_tag, compl);
2529         }
2530         rxcp->port = GET_RX_COMPL_V0_BITS(port, compl);
2531         rxcp->ip_frag = GET_RX_COMPL_V0_BITS(ip_frag, compl);
2532 }
2533
2534 static struct be_rx_compl_info *be_rx_compl_get(struct be_rx_obj *rxo)
2535 {
2536         struct be_eth_rx_compl *compl = queue_tail_node(&rxo->cq);
2537         struct be_rx_compl_info *rxcp = &rxo->rxcp;
2538         struct be_adapter *adapter = rxo->adapter;
2539
2540         /* For checking the valid bit it is Ok to use either definition as the
2541          * valid bit is at the same position in both v0 and v1 Rx compl */
2542         if (compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] == 0)
2543                 return NULL;
2544
2545         rmb();
2546         be_dws_le_to_cpu(compl, sizeof(*compl));
2547
2548         if (adapter->be3_native)
2549                 be_parse_rx_compl_v1(compl, rxcp);
2550         else
2551                 be_parse_rx_compl_v0(compl, rxcp);
2552
2553         if (rxcp->ip_frag)
2554                 rxcp->l4_csum = 0;
2555
2556         if (rxcp->vlanf) {
2557                 /* In QNQ modes, if qnq bit is not set, then the packet was
2558                  * tagged only with the transparent outer vlan-tag and must
2559                  * not be treated as a vlan packet by host
2560                  */
2561                 if (be_is_qnq_mode(adapter) && !rxcp->qnq)
2562                         rxcp->vlanf = 0;
2563
2564                 if (!lancer_chip(adapter))
2565                         rxcp->vlan_tag = swab16(rxcp->vlan_tag);
2566
2567                 if (adapter->pvid == (rxcp->vlan_tag & VLAN_VID_MASK) &&
2568                     !test_bit(rxcp->vlan_tag, adapter->vids))
2569                         rxcp->vlanf = 0;
2570         }
2571
2572         /* As the compl has been parsed, reset it; we wont touch it again */
2573         compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] = 0;
2574
2575         queue_tail_inc(&rxo->cq);
2576         return rxcp;
2577 }
2578
2579 static inline struct page *be_alloc_pages(u32 size, gfp_t gfp)
2580 {
2581         u32 order = get_order(size);
2582
2583         if (order > 0)
2584                 gfp |= __GFP_COMP;
2585         return  alloc_pages(gfp, order);
2586 }
2587
2588 /*
2589  * Allocate a page, split it to fragments of size rx_frag_size and post as
2590  * receive buffers to BE
2591  */
2592 static void be_post_rx_frags(struct be_rx_obj *rxo, gfp_t gfp, u32 frags_needed)
2593 {
2594         struct be_adapter *adapter = rxo->adapter;
2595         struct be_rx_page_info *page_info = NULL, *prev_page_info = NULL;
2596         struct be_queue_info *rxq = &rxo->q;
2597         struct page *pagep = NULL;
2598         struct device *dev = &adapter->pdev->dev;
2599         struct be_eth_rx_d *rxd;
2600         u64 page_dmaaddr = 0, frag_dmaaddr;
2601         u32 posted, page_offset = 0, notify = 0;
2602
2603         page_info = &rxo->page_info_tbl[rxq->head];
2604         for (posted = 0; posted < frags_needed && !page_info->page; posted++) {
2605                 if (!pagep) {
2606                         pagep = be_alloc_pages(adapter->big_page_size, gfp);
2607                         if (unlikely(!pagep)) {
2608                                 rx_stats(rxo)->rx_post_fail++;
2609                                 break;
2610                         }
2611                         page_dmaaddr = dma_map_page(dev, pagep, 0,
2612                                                     adapter->big_page_size,
2613                                                     DMA_FROM_DEVICE);
2614                         if (dma_mapping_error(dev, page_dmaaddr)) {
2615                                 put_page(pagep);
2616                                 pagep = NULL;
2617                                 adapter->drv_stats.dma_map_errors++;
2618                                 break;
2619                         }
2620                         page_offset = 0;
2621                 } else {
2622                         get_page(pagep);
2623                         page_offset += rx_frag_size;
2624                 }
2625                 page_info->page_offset = page_offset;
2626                 page_info->page = pagep;
2627
2628                 rxd = queue_head_node(rxq);
2629                 frag_dmaaddr = page_dmaaddr + page_info->page_offset;
2630                 rxd->fragpa_lo = cpu_to_le32(frag_dmaaddr & 0xFFFFFFFF);
2631                 rxd->fragpa_hi = cpu_to_le32(upper_32_bits(frag_dmaaddr));
2632
2633                 /* Any space left in the current big page for another frag? */
2634                 if ((page_offset + rx_frag_size + rx_frag_size) >
2635                                         adapter->big_page_size) {
2636                         pagep = NULL;
2637                         page_info->last_frag = true;
2638                         dma_unmap_addr_set(page_info, bus, page_dmaaddr);
2639                 } else {
2640                         dma_unmap_addr_set(page_info, bus, frag_dmaaddr);
2641                 }
2642
2643                 prev_page_info = page_info;
2644                 queue_head_inc(rxq);
2645                 page_info = &rxo->page_info_tbl[rxq->head];
2646         }
2647
2648         /* Mark the last frag of a page when we break out of the above loop
2649          * with no more slots available in the RXQ
2650          */
2651         if (pagep) {
2652                 prev_page_info->last_frag = true;
2653                 dma_unmap_addr_set(prev_page_info, bus, page_dmaaddr);
2654         }
2655
2656         if (posted) {
2657                 atomic_add(posted, &rxq->used);
2658                 if (rxo->rx_post_starved)
2659                         rxo->rx_post_starved = false;
2660                 do {
2661                         notify = min(MAX_NUM_POST_ERX_DB, posted);
2662                         be_rxq_notify(adapter, rxq->id, notify);
2663                         posted -= notify;
2664                 } while (posted);
2665         } else if (atomic_read(&rxq->used) == 0) {
2666                 /* Let be_worker replenish when memory is available */
2667                 rxo->rx_post_starved = true;
2668         }
2669 }
2670
2671 static inline void be_update_tx_err(struct be_tx_obj *txo, u8 status)
2672 {
2673         switch (status) {
2674         case BE_TX_COMP_HDR_PARSE_ERR:
2675                 tx_stats(txo)->tx_hdr_parse_err++;
2676                 break;
2677         case BE_TX_COMP_NDMA_ERR:
2678                 tx_stats(txo)->tx_dma_err++;
2679                 break;
2680         case BE_TX_COMP_ACL_ERR:
2681                 tx_stats(txo)->tx_spoof_check_err++;
2682                 break;
2683         }
2684 }
2685
2686 static inline void lancer_update_tx_err(struct be_tx_obj *txo, u8 status)
2687 {
2688         switch (status) {
2689         case LANCER_TX_COMP_LSO_ERR:
2690                 tx_stats(txo)->tx_tso_err++;
2691                 break;
2692         case LANCER_TX_COMP_HSW_DROP_MAC_ERR:
2693         case LANCER_TX_COMP_HSW_DROP_VLAN_ERR:
2694                 tx_stats(txo)->tx_spoof_check_err++;
2695                 break;
2696         case LANCER_TX_COMP_QINQ_ERR:
2697                 tx_stats(txo)->tx_qinq_err++;
2698                 break;
2699         case LANCER_TX_COMP_PARITY_ERR:
2700                 tx_stats(txo)->tx_internal_parity_err++;
2701                 break;
2702         case LANCER_TX_COMP_DMA_ERR:
2703                 tx_stats(txo)->tx_dma_err++;
2704                 break;
2705         case LANCER_TX_COMP_SGE_ERR:
2706                 tx_stats(txo)->tx_sge_err++;
2707                 break;
2708         }
2709 }
2710
2711 static struct be_tx_compl_info *be_tx_compl_get(struct be_adapter *adapter,
2712                                                 struct be_tx_obj *txo)
2713 {
2714         struct be_queue_info *tx_cq = &txo->cq;
2715         struct be_tx_compl_info *txcp = &txo->txcp;
2716         struct be_eth_tx_compl *compl = queue_tail_node(tx_cq);
2717
2718         if (compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] == 0)
2719                 return NULL;
2720
2721         /* Ensure load ordering of valid bit dword and other dwords below */
2722         rmb();
2723         be_dws_le_to_cpu(compl, sizeof(*compl));
2724
2725         txcp->status = GET_TX_COMPL_BITS(status, compl);
2726         txcp->end_index = GET_TX_COMPL_BITS(wrb_index, compl);
2727
2728         if (txcp->status) {
2729                 if (lancer_chip(adapter)) {
2730                         lancer_update_tx_err(txo, txcp->status);
2731                         /* Reset the adapter incase of TSO,
2732                          * SGE or Parity error
2733                          */
2734                         if (txcp->status == LANCER_TX_COMP_LSO_ERR ||
2735                             txcp->status == LANCER_TX_COMP_PARITY_ERR ||
2736                             txcp->status == LANCER_TX_COMP_SGE_ERR)
2737                                 be_set_error(adapter, BE_ERROR_TX);
2738                 } else {
2739                         be_update_tx_err(txo, txcp->status);
2740                 }
2741         }
2742
2743         if (be_check_error(adapter, BE_ERROR_TX))
2744                 return NULL;
2745
2746         compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] = 0;
2747         queue_tail_inc(tx_cq);
2748         return txcp;
2749 }
2750
2751 static u16 be_tx_compl_process(struct be_adapter *adapter,
2752                                struct be_tx_obj *txo, u16 last_index)
2753 {
2754         struct sk_buff **sent_skbs = txo->sent_skb_list;
2755         struct be_queue_info *txq = &txo->q;
2756         struct sk_buff *skb = NULL;
2757         bool unmap_skb_hdr = false;
2758         struct be_eth_wrb *wrb;
2759         u16 num_wrbs = 0;
2760         u32 frag_index;
2761
2762         do {
2763                 if (sent_skbs[txq->tail]) {
2764                         /* Free skb from prev req */
2765                         if (skb)
2766                                 dev_consume_skb_any(skb);
2767                         skb = sent_skbs[txq->tail];
2768                         sent_skbs[txq->tail] = NULL;
2769                         queue_tail_inc(txq);  /* skip hdr wrb */
2770                         num_wrbs++;
2771                         unmap_skb_hdr = true;
2772                 }
2773                 wrb = queue_tail_node(txq);
2774                 frag_index = txq->tail;
2775                 unmap_tx_frag(&adapter->pdev->dev, wrb,
2776                               (unmap_skb_hdr && skb_headlen(skb)));
2777                 unmap_skb_hdr = false;
2778                 queue_tail_inc(txq);
2779                 num_wrbs++;
2780         } while (frag_index != last_index);
2781         dev_consume_skb_any(skb);
2782
2783         return num_wrbs;
2784 }
2785
2786 /* Return the number of events in the event queue */
2787 static inline int events_get(struct be_eq_obj *eqo)
2788 {
2789         struct be_eq_entry *eqe;
2790         int num = 0;
2791
2792         do {
2793                 eqe = queue_tail_node(&eqo->q);
2794                 if (eqe->evt == 0)
2795                         break;
2796
2797                 rmb();
2798                 eqe->evt = 0;
2799                 num++;
2800                 queue_tail_inc(&eqo->q);
2801         } while (true);
2802
2803         return num;
2804 }
2805
2806 /* Leaves the EQ is disarmed state */
2807 static void be_eq_clean(struct be_eq_obj *eqo)
2808 {
2809         int num = events_get(eqo);
2810
2811         be_eq_notify(eqo->adapter, eqo->q.id, false, true, num, 0);
2812 }
2813
2814 /* Free posted rx buffers that were not used */
2815 static void be_rxq_clean(struct be_rx_obj *rxo)
2816 {
2817         struct be_queue_info *rxq = &rxo->q;
2818         struct be_rx_page_info *page_info;
2819
2820         while (atomic_read(&rxq->used) > 0) {
2821                 page_info = get_rx_page_info(rxo);
2822                 put_page(page_info->page);
2823                 memset(page_info, 0, sizeof(*page_info));
2824         }
2825         BUG_ON(atomic_read(&rxq->used));
2826         rxq->tail = 0;
2827         rxq->head = 0;
2828 }
2829
2830 static void be_rx_cq_clean(struct be_rx_obj *rxo)
2831 {
2832         struct be_queue_info *rx_cq = &rxo->cq;
2833         struct be_rx_compl_info *rxcp;
2834         struct be_adapter *adapter = rxo->adapter;
2835         int flush_wait = 0;
2836
2837         /* Consume pending rx completions.
2838          * Wait for the flush completion (identified by zero num_rcvd)
2839          * to arrive. Notify CQ even when there are no more CQ entries
2840          * for HW to flush partially coalesced CQ entries.
2841          * In Lancer, there is no need to wait for flush compl.
2842          */
2843         for (;;) {
2844                 rxcp = be_rx_compl_get(rxo);
2845                 if (!rxcp) {
2846                         if (lancer_chip(adapter))
2847                                 break;
2848
2849                         if (flush_wait++ > 50 ||
2850                             be_check_error(adapter,
2851                                            BE_ERROR_HW)) {
2852                                 dev_warn(&adapter->pdev->dev,
2853                                          "did not receive flush compl\n");
2854                                 break;
2855                         }
2856                         be_cq_notify(adapter, rx_cq->id, true, 0);
2857                         mdelay(1);
2858                 } else {
2859                         be_rx_compl_discard(rxo, rxcp);
2860                         be_cq_notify(adapter, rx_cq->id, false, 1);
2861                         if (rxcp->num_rcvd == 0)
2862                                 break;
2863                 }
2864         }
2865
2866         /* After cleanup, leave the CQ in unarmed state */
2867         be_cq_notify(adapter, rx_cq->id, false, 0);
2868 }
2869
2870 static void be_tx_compl_clean(struct be_adapter *adapter)
2871 {
2872         struct device *dev = &adapter->pdev->dev;
2873         u16 cmpl = 0, timeo = 0, num_wrbs = 0;
2874         struct be_tx_compl_info *txcp;
2875         struct be_queue_info *txq;
2876         u32 end_idx, notified_idx;
2877         struct be_tx_obj *txo;
2878         int i, pending_txqs;
2879
2880         /* Stop polling for compls when HW has been silent for 10ms */
2881         do {
2882                 pending_txqs = adapter->num_tx_qs;
2883
2884                 for_all_tx_queues(adapter, txo, i) {
2885                         cmpl = 0;
2886                         num_wrbs = 0;
2887                         txq = &txo->q;
2888                         while ((txcp = be_tx_compl_get(adapter, txo))) {
2889                                 num_wrbs +=
2890                                         be_tx_compl_process(adapter, txo,
2891                                                             txcp->end_index);
2892                                 cmpl++;
2893                         }
2894                         if (cmpl) {
2895                                 be_cq_notify(adapter, txo->cq.id, false, cmpl);
2896                                 atomic_sub(num_wrbs, &txq->used);
2897                                 timeo = 0;
2898                         }
2899                         if (!be_is_tx_compl_pending(txo))
2900                                 pending_txqs--;
2901                 }
2902
2903                 if (pending_txqs == 0 || ++timeo > 10 ||
2904                     be_check_error(adapter, BE_ERROR_HW))
2905                         break;
2906
2907                 mdelay(1);
2908         } while (true);
2909
2910         /* Free enqueued TX that was never notified to HW */
2911         for_all_tx_queues(adapter, txo, i) {
2912                 txq = &txo->q;
2913
2914                 if (atomic_read(&txq->used)) {
2915                         dev_info(dev, "txq%d: cleaning %d pending tx-wrbs\n",
2916                                  i, atomic_read(&txq->used));
2917                         notified_idx = txq->tail;
2918                         end_idx = txq->tail;
2919                         index_adv(&end_idx, atomic_read(&txq->used) - 1,
2920                                   txq->len);
2921                         /* Use the tx-compl process logic to handle requests
2922                          * that were not sent to the HW.
2923                          */
2924                         num_wrbs = be_tx_compl_process(adapter, txo, end_idx);
2925                         atomic_sub(num_wrbs, &txq->used);
2926                         BUG_ON(atomic_read(&txq->used));
2927                         txo->pend_wrb_cnt = 0;
2928                         /* Since hw was never notified of these requests,
2929                          * reset TXQ indices
2930                          */
2931                         txq->head = notified_idx;
2932                         txq->tail = notified_idx;
2933                 }
2934         }
2935 }
2936
2937 static void be_evt_queues_destroy(struct be_adapter *adapter)
2938 {
2939         struct be_eq_obj *eqo;
2940         int i;
2941
2942         for_all_evt_queues(adapter, eqo, i) {
2943                 if (eqo->q.created) {
2944                         be_eq_clean(eqo);
2945                         be_cmd_q_destroy(adapter, &eqo->q, QTYPE_EQ);
2946                         netif_napi_del(&eqo->napi);
2947                         free_cpumask_var(eqo->affinity_mask);
2948                 }
2949                 be_queue_free(adapter, &eqo->q);
2950         }
2951 }
2952
2953 static int be_evt_queues_create(struct be_adapter *adapter)
2954 {
2955         struct be_queue_info *eq;
2956         struct be_eq_obj *eqo;
2957         struct be_aic_obj *aic;
2958         int i, rc;
2959
2960         /* need enough EQs to service both RX and TX queues */
2961         adapter->num_evt_qs = min_t(u16, num_irqs(adapter),
2962                                     max(adapter->cfg_num_rx_irqs,
2963                                         adapter->cfg_num_tx_irqs));
2964
2965         for_all_evt_queues(adapter, eqo, i) {
2966                 int numa_node = dev_to_node(&adapter->pdev->dev);
2967
2968                 aic = &adapter->aic_obj[i];
2969                 eqo->adapter = adapter;
2970                 eqo->idx = i;
2971                 aic->max_eqd = BE_MAX_EQD;
2972                 aic->enable = true;
2973
2974                 eq = &eqo->q;
2975                 rc = be_queue_alloc(adapter, eq, EVNT_Q_LEN,
2976                                     sizeof(struct be_eq_entry));
2977                 if (rc)
2978                         return rc;
2979
2980                 rc = be_cmd_eq_create(adapter, eqo);
2981                 if (rc)
2982                         return rc;
2983
2984                 if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL))
2985                         return -ENOMEM;
2986                 cpumask_set_cpu(cpumask_local_spread(i, numa_node),
2987                                 eqo->affinity_mask);
2988                 netif_napi_add(adapter->netdev, &eqo->napi, be_poll,
2989                                BE_NAPI_WEIGHT);
2990         }
2991         return 0;
2992 }
2993
2994 static void be_mcc_queues_destroy(struct be_adapter *adapter)
2995 {
2996         struct be_queue_info *q;
2997
2998         q = &adapter->mcc_obj.q;
2999         if (q->created)
3000                 be_cmd_q_destroy(adapter, q, QTYPE_MCCQ);
3001         be_queue_free(adapter, q);
3002
3003         q = &adapter->mcc_obj.cq;
3004         if (q->created)
3005                 be_cmd_q_destroy(adapter, q, QTYPE_CQ);
3006         be_queue_free(adapter, q);
3007 }
3008
3009 /* Must be called only after TX qs are created as MCC shares TX EQ */
3010 static int be_mcc_queues_create(struct be_adapter *adapter)
3011 {
3012         struct be_queue_info *q, *cq;
3013
3014         cq = &adapter->mcc_obj.cq;
3015         if (be_queue_alloc(adapter, cq, MCC_CQ_LEN,
3016                            sizeof(struct be_mcc_compl)))
3017                 goto err;
3018
3019         /* Use the default EQ for MCC completions */
3020         if (be_cmd_cq_create(adapter, cq, &mcc_eqo(adapter)->q, true, 0))
3021                 goto mcc_cq_free;
3022
3023         q = &adapter->mcc_obj.q;
3024         if (be_queue_alloc(adapter, q, MCC_Q_LEN, sizeof(struct be_mcc_wrb)))
3025                 goto mcc_cq_destroy;
3026
3027         if (be_cmd_mccq_create(adapter, q, cq))
3028                 goto mcc_q_free;
3029
3030         return 0;
3031
3032 mcc_q_free:
3033         be_queue_free(adapter, q);
3034 mcc_cq_destroy:
3035         be_cmd_q_destroy(adapter, cq, QTYPE_CQ);
3036 mcc_cq_free:
3037         be_queue_free(adapter, cq);
3038 err:
3039         return -1;
3040 }
3041
3042 static void be_tx_queues_destroy(struct be_adapter *adapter)
3043 {
3044         struct be_queue_info *q;
3045         struct be_tx_obj *txo;
3046         u8 i;
3047
3048         for_all_tx_queues(adapter, txo, i) {
3049                 q = &txo->q;
3050                 if (q->created)
3051                         be_cmd_q_destroy(adapter, q, QTYPE_TXQ);
3052                 be_queue_free(adapter, q);
3053
3054                 q = &txo->cq;
3055                 if (q->created)
3056                         be_cmd_q_destroy(adapter, q, QTYPE_CQ);
3057                 be_queue_free(adapter, q);
3058         }
3059 }
3060
3061 static int be_tx_qs_create(struct be_adapter *adapter)
3062 {
3063         struct be_queue_info *cq;
3064         struct be_tx_obj *txo;
3065         struct be_eq_obj *eqo;
3066         int status, i;
3067
3068         adapter->num_tx_qs = min(adapter->num_evt_qs, adapter->cfg_num_tx_irqs);
3069
3070         for_all_tx_queues(adapter, txo, i) {
3071                 cq = &txo->cq;
3072                 status = be_queue_alloc(adapter, cq, TX_CQ_LEN,
3073                                         sizeof(struct be_eth_tx_compl));
3074                 if (status)
3075                         return status;
3076
3077                 u64_stats_init(&txo->stats.sync);
3078                 u64_stats_init(&txo->stats.sync_compl);
3079
3080                 /* If num_evt_qs is less than num_tx_qs, then more than
3081                  * one txq share an eq
3082                  */
3083                 eqo = &adapter->eq_obj[i % adapter->num_evt_qs];
3084                 status = be_cmd_cq_create(adapter, cq, &eqo->q, false, 3);
3085                 if (status)
3086                         return status;
3087
3088                 status = be_queue_alloc(adapter, &txo->q, TX_Q_LEN,
3089                                         sizeof(struct be_eth_wrb));
3090                 if (status)
3091                         return status;
3092
3093                 status = be_cmd_txq_create(adapter, txo);
3094                 if (status)
3095                         return status;
3096
3097                 netif_set_xps_queue(adapter->netdev, eqo->affinity_mask,
3098                                     eqo->idx);
3099         }
3100
3101         dev_info(&adapter->pdev->dev, "created %d TX queue(s)\n",
3102                  adapter->num_tx_qs);
3103         return 0;
3104 }
3105
3106 static void be_rx_cqs_destroy(struct be_adapter *adapter)
3107 {
3108         struct be_queue_info *q;
3109         struct be_rx_obj *rxo;
3110         int i;
3111
3112         for_all_rx_queues(adapter, rxo, i) {
3113                 q = &rxo->cq;
3114                 if (q->created)
3115                         be_cmd_q_destroy(adapter, q, QTYPE_CQ);
3116                 be_queue_free(adapter, q);
3117         }
3118 }
3119
3120 static int be_rx_cqs_create(struct be_adapter *adapter)
3121 {
3122         struct be_queue_info *eq, *cq;
3123         struct be_rx_obj *rxo;
3124         int rc, i;
3125
3126         adapter->num_rss_qs =
3127                         min(adapter->num_evt_qs, adapter->cfg_num_rx_irqs);
3128
3129         /* We'll use RSS only if atleast 2 RSS rings are supported. */
3130         if (adapter->num_rss_qs < 2)
3131                 adapter->num_rss_qs = 0;
3132
3133         adapter->num_rx_qs = adapter->num_rss_qs + adapter->need_def_rxq;
3134
3135         /* When the interface is not capable of RSS rings (and there is no
3136          * need to create a default RXQ) we'll still need one RXQ
3137          */
3138         if (adapter->num_rx_qs == 0)
3139                 adapter->num_rx_qs = 1;
3140
3141         adapter->big_page_size = (1 << get_order(rx_frag_size)) * PAGE_SIZE;
3142         for_all_rx_queues(adapter, rxo, i) {
3143                 rxo->adapter = adapter;
3144                 cq = &rxo->cq;
3145                 rc = be_queue_alloc(adapter, cq, RX_CQ_LEN,
3146                                     sizeof(struct be_eth_rx_compl));
3147                 if (rc)
3148                         return rc;
3149
3150                 u64_stats_init(&rxo->stats.sync);
3151                 eq = &adapter->eq_obj[i % adapter->num_evt_qs].q;
3152                 rc = be_cmd_cq_create(adapter, cq, eq, false, 3);
3153                 if (rc)
3154                         return rc;
3155         }
3156
3157         dev_info(&adapter->pdev->dev,
3158                  "created %d RX queue(s)\n", adapter->num_rx_qs);
3159         return 0;
3160 }
3161
3162 static irqreturn_t be_intx(int irq, void *dev)
3163 {
3164         struct be_eq_obj *eqo = dev;
3165         struct be_adapter *adapter = eqo->adapter;
3166         int num_evts = 0;
3167
3168         /* IRQ is not expected when NAPI is scheduled as the EQ
3169          * will not be armed.
3170          * But, this can happen on Lancer INTx where it takes
3171          * a while to de-assert INTx or in BE2 where occasionaly
3172          * an interrupt may be raised even when EQ is unarmed.
3173          * If NAPI is already scheduled, then counting & notifying
3174          * events will orphan them.
3175          */
3176         if (napi_schedule_prep(&eqo->napi)) {
3177                 num_evts = events_get(eqo);
3178                 __napi_schedule(&eqo->napi);
3179                 if (num_evts)
3180                         eqo->spurious_intr = 0;
3181         }
3182         be_eq_notify(adapter, eqo->q.id, false, true, num_evts, 0);
3183
3184         /* Return IRQ_HANDLED only for the the first spurious intr
3185          * after a valid intr to stop the kernel from branding
3186          * this irq as a bad one!
3187          */
3188         if (num_evts || eqo->spurious_intr++ == 0)
3189                 return IRQ_HANDLED;
3190         else
3191                 return IRQ_NONE;
3192 }
3193
3194 static irqreturn_t be_msix(int irq, void *dev)
3195 {
3196         struct be_eq_obj *eqo = dev;
3197
3198         be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
3199         napi_schedule(&eqo->napi);
3200         return IRQ_HANDLED;
3201 }
3202
3203 static inline bool do_gro(struct be_rx_compl_info *rxcp)
3204 {
3205         return (rxcp->tcpf && !rxcp->err && rxcp->l4_csum) ? true : false;
3206 }
3207
3208 static int be_process_rx(struct be_rx_obj *rxo, struct napi_struct *napi,
3209                          int budget)
3210 {
3211         struct be_adapter *adapter = rxo->adapter;
3212         struct be_queue_info *rx_cq = &rxo->cq;
3213         struct be_rx_compl_info *rxcp;
3214         u32 work_done;
3215         u32 frags_consumed = 0;
3216
3217         for (work_done = 0; work_done < budget; work_done++) {
3218                 rxcp = be_rx_compl_get(rxo);
3219                 if (!rxcp)
3220                         break;
3221
3222                 /* Is it a flush compl that has no data */
3223                 if (unlikely(rxcp->num_rcvd == 0))
3224                         goto loop_continue;
3225
3226                 /* Discard compl with partial DMA Lancer B0 */
3227                 if (unlikely(!rxcp->pkt_size)) {
3228                         be_rx_compl_discard(rxo, rxcp);
3229                         goto loop_continue;
3230                 }
3231
3232                 /* On BE drop pkts that arrive due to imperfect filtering in
3233                  * promiscuous mode on some skews
3234                  */
3235                 if (unlikely(rxcp->port != adapter->port_num &&
3236                              !lancer_chip(adapter))) {
3237                         be_rx_compl_discard(rxo, rxcp);
3238                         goto loop_continue;
3239                 }
3240
3241                 if (do_gro(rxcp))
3242                         be_rx_compl_process_gro(rxo, napi, rxcp);
3243                 else
3244                         be_rx_compl_process(rxo, napi, rxcp);
3245
3246 loop_continue:
3247                 frags_consumed += rxcp->num_rcvd;
3248                 be_rx_stats_update(rxo, rxcp);
3249         }
3250
3251         if (work_done) {
3252                 be_cq_notify(adapter, rx_cq->id, true, work_done);
3253
3254                 /* When an rx-obj gets into post_starved state, just
3255                  * let be_worker do the posting.
3256                  */
3257                 if (atomic_read(&rxo->q.used) < RX_FRAGS_REFILL_WM &&
3258                     !rxo->rx_post_starved)
3259                         be_post_rx_frags(rxo, GFP_ATOMIC,
3260                                          max_t(u32, MAX_RX_POST,
3261                                                frags_consumed));
3262         }
3263
3264         return work_done;
3265 }
3266
3267
3268 static void be_process_tx(struct be_adapter *adapter, struct be_tx_obj *txo,
3269                           int idx)
3270 {
3271         int num_wrbs = 0, work_done = 0;
3272         struct be_tx_compl_info *txcp;
3273
3274         while ((txcp = be_tx_compl_get(adapter, txo))) {
3275                 num_wrbs += be_tx_compl_process(adapter, txo, txcp->end_index);
3276                 work_done++;
3277         }
3278
3279         if (work_done) {
3280                 be_cq_notify(adapter, txo->cq.id, true, work_done);
3281                 atomic_sub(num_wrbs, &txo->q.used);
3282
3283                 /* As Tx wrbs have been freed up, wake up netdev queue
3284                  * if it was stopped due to lack of tx wrbs.  */
3285                 if (__netif_subqueue_stopped(adapter->netdev, idx) &&
3286                     be_can_txq_wake(txo)) {
3287                         netif_wake_subqueue(adapter->netdev, idx);
3288                 }
3289
3290                 u64_stats_update_begin(&tx_stats(txo)->sync_compl);
3291                 tx_stats(txo)->tx_compl += work_done;
3292                 u64_stats_update_end(&tx_stats(txo)->sync_compl);
3293         }
3294 }
3295
3296 int be_poll(struct napi_struct *napi, int budget)
3297 {
3298         struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3299         struct be_adapter *adapter = eqo->adapter;
3300         int max_work = 0, work, i, num_evts;
3301         struct be_rx_obj *rxo;
3302         struct be_tx_obj *txo;
3303         u32 mult_enc = 0;
3304
3305         num_evts = events_get(eqo);
3306
3307         for_all_tx_queues_on_eq(adapter, eqo, txo, i)
3308                 be_process_tx(adapter, txo, i);
3309
3310         /* This loop will iterate twice for EQ0 in which
3311          * completions of the last RXQ (default one) are also processed
3312          * For other EQs the loop iterates only once
3313          */
3314         for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3315                 work = be_process_rx(rxo, napi, budget);
3316                 max_work = max(work, max_work);
3317         }
3318
3319         if (is_mcc_eqo(eqo))
3320                 be_process_mcc(adapter);
3321
3322         if (max_work < budget) {
3323                 napi_complete_done(napi, max_work);
3324
3325                 /* Skyhawk EQ_DB has a provision to set the rearm to interrupt
3326                  * delay via a delay multiplier encoding value
3327                  */
3328                 if (skyhawk_chip(adapter))
3329                         mult_enc = be_get_eq_delay_mult_enc(eqo);
3330
3331                 be_eq_notify(adapter, eqo->q.id, true, false, num_evts,
3332                              mult_enc);
3333         } else {
3334                 /* As we'll continue in polling mode, count and clear events */
3335                 be_eq_notify(adapter, eqo->q.id, false, false, num_evts, 0);
3336         }
3337         return max_work;
3338 }
3339
3340 void be_detect_error(struct be_adapter *adapter)
3341 {
3342         u32 ue_lo = 0, ue_hi = 0, ue_lo_mask = 0, ue_hi_mask = 0;
3343         u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0;
3344         struct device *dev = &adapter->pdev->dev;
3345         u16 val;
3346         u32 i;
3347
3348         if (be_check_error(adapter, BE_ERROR_HW))
3349                 return;
3350
3351         if (lancer_chip(adapter)) {
3352                 sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET);
3353                 if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
3354                         be_set_error(adapter, BE_ERROR_UE);
3355                         sliport_err1 = ioread32(adapter->db +
3356                                                 SLIPORT_ERROR1_OFFSET);
3357                         sliport_err2 = ioread32(adapter->db +
3358                                                 SLIPORT_ERROR2_OFFSET);
3359                         /* Do not log error messages if its a FW reset */
3360                         if (sliport_err1 == SLIPORT_ERROR_FW_RESET1 &&
3361                             sliport_err2 == SLIPORT_ERROR_FW_RESET2) {
3362                                 dev_info(dev, "Reset is in progress\n");
3363                         } else {
3364                                 dev_err(dev, "Error detected in the card\n");
3365                                 dev_err(dev, "ERR: sliport status 0x%x\n",
3366                                         sliport_status);
3367                                 dev_err(dev, "ERR: sliport error1 0x%x\n",
3368                                         sliport_err1);
3369                                 dev_err(dev, "ERR: sliport error2 0x%x\n",
3370                                         sliport_err2);
3371                         }
3372                 }
3373         } else {
3374                 ue_lo = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_LOW);
3375                 ue_hi = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_HIGH);
3376                 ue_lo_mask = ioread32(adapter->pcicfg +
3377                                       PCICFG_UE_STATUS_LOW_MASK);
3378                 ue_hi_mask = ioread32(adapter->pcicfg +
3379                                       PCICFG_UE_STATUS_HI_MASK);
3380
3381                 ue_lo = (ue_lo & ~ue_lo_mask);
3382                 ue_hi = (ue_hi & ~ue_hi_mask);
3383
3384                 if (ue_lo || ue_hi) {
3385                         /* On certain platforms BE3 hardware can indicate
3386                          * spurious UEs. In case of a UE in the chip,
3387                          * the POST register correctly reports either a
3388                          * FAT_LOG_START state (FW is currently dumping
3389                          * FAT log data) or a ARMFW_UE state. Check for the
3390                          * above states to ascertain if the UE is valid or not.
3391                          */
3392                         if (BE3_chip(adapter)) {
3393                                 val = be_POST_stage_get(adapter);
3394                                 if ((val & POST_STAGE_FAT_LOG_START)
3395                                      != POST_STAGE_FAT_LOG_START &&
3396                                     (val & POST_STAGE_ARMFW_UE)
3397                                      != POST_STAGE_ARMFW_UE &&
3398                                     (val & POST_STAGE_RECOVERABLE_ERR)
3399                                      != POST_STAGE_RECOVERABLE_ERR)
3400                                         return;
3401                         }
3402
3403                         dev_err(dev, "Error detected in the adapter");
3404                         be_set_error(adapter, BE_ERROR_UE);
3405
3406                         for (i = 0; ue_lo; ue_lo >>= 1, i++) {
3407                                 if (ue_lo & 1)
3408                                         dev_err(dev, "UE: %s bit set\n",
3409                                                 ue_status_low_desc[i]);
3410                         }
3411                         for (i = 0; ue_hi; ue_hi >>= 1, i++) {
3412                                 if (ue_hi & 1)
3413                                         dev_err(dev, "UE: %s bit set\n",
3414                                                 ue_status_hi_desc[i]);
3415                         }
3416                 }
3417         }
3418 }
3419
3420 static void be_msix_disable(struct be_adapter *adapter)
3421 {
3422         if (msix_enabled(adapter)) {
3423                 pci_disable_msix(adapter->pdev);
3424                 adapter->num_msix_vec = 0;
3425                 adapter->num_msix_roce_vec = 0;
3426         }
3427 }
3428
3429 static int be_msix_enable(struct be_adapter *adapter)
3430 {
3431         unsigned int i, max_roce_eqs;
3432         struct device *dev = &adapter->pdev->dev;
3433         int num_vec;
3434
3435         /* If RoCE is supported, program the max number of vectors that
3436          * could be used for NIC and RoCE, else, just program the number
3437          * we'll use initially.
3438          */
3439         if (be_roce_supported(adapter)) {
3440                 max_roce_eqs =
3441                         be_max_func_eqs(adapter) - be_max_nic_eqs(adapter);
3442                 max_roce_eqs = min(max_roce_eqs, num_online_cpus());
3443                 num_vec = be_max_any_irqs(adapter) + max_roce_eqs;
3444         } else {
3445                 num_vec = max(adapter->cfg_num_rx_irqs,
3446                               adapter->cfg_num_tx_irqs);
3447         }
3448
3449         for (i = 0; i < num_vec; i++)
3450                 adapter->msix_entries[i].entry = i;
3451
3452         num_vec = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
3453                                         MIN_MSIX_VECTORS, num_vec);
3454         if (num_vec < 0)
3455                 goto fail;
3456
3457         if (be_roce_supported(adapter) && num_vec > MIN_MSIX_VECTORS) {
3458                 adapter->num_msix_roce_vec = num_vec / 2;
3459                 dev_info(dev, "enabled %d MSI-x vector(s) for RoCE\n",
3460                          adapter->num_msix_roce_vec);
3461         }
3462
3463         adapter->num_msix_vec = num_vec - adapter->num_msix_roce_vec;
3464
3465         dev_info(dev, "enabled %d MSI-x vector(s) for NIC\n",
3466                  adapter->num_msix_vec);
3467         return 0;
3468
3469 fail:
3470         dev_warn(dev, "MSIx enable failed\n");
3471
3472         /* INTx is not supported in VFs, so fail probe if enable_msix fails */
3473         if (be_virtfn(adapter))
3474                 return num_vec;
3475         return 0;
3476 }
3477
3478 static inline int be_msix_vec_get(struct be_adapter *adapter,
3479                                   struct be_eq_obj *eqo)
3480 {
3481         return adapter->msix_entries[eqo->msix_idx].vector;
3482 }
3483
3484 static int be_msix_register(struct be_adapter *adapter)
3485 {
3486         struct net_device *netdev = adapter->netdev;
3487         struct be_eq_obj *eqo;
3488         int status, i, vec;
3489
3490         for_all_evt_queues(adapter, eqo, i) {
3491                 sprintf(eqo->desc, "%s-q%d", netdev->name, i);
3492                 vec = be_msix_vec_get(adapter, eqo);
3493                 status = request_irq(vec, be_msix, 0, eqo->desc, eqo);
3494                 if (status)
3495                         goto err_msix;
3496
3497                 irq_set_affinity_hint(vec, eqo->affinity_mask);
3498         }
3499
3500         return 0;
3501 err_msix:
3502         for (i--; i >= 0; i--) {
3503                 eqo = &adapter->eq_obj[i];
3504                 free_irq(be_msix_vec_get(adapter, eqo), eqo);
3505         }
3506         dev_warn(&adapter->pdev->dev, "MSIX Request IRQ failed - err %d\n",
3507                  status);
3508         be_msix_disable(adapter);
3509         return status;
3510 }
3511
3512 static int be_irq_register(struct be_adapter *adapter)
3513 {
3514         struct net_device *netdev = adapter->netdev;
3515         int status;
3516
3517         if (msix_enabled(adapter)) {
3518                 status = be_msix_register(adapter);
3519                 if (status == 0)
3520                         goto done;
3521                 /* INTx is not supported for VF */
3522                 if (be_virtfn(adapter))
3523                         return status;
3524         }
3525
3526         /* INTx: only the first EQ is used */
3527         netdev->irq = adapter->pdev->irq;
3528         status = request_irq(netdev->irq, be_intx, IRQF_SHARED, netdev->name,
3529                              &adapter->eq_obj[0]);
3530         if (status) {
3531                 dev_err(&adapter->pdev->dev,
3532                         "INTx request IRQ failed - err %d\n", status);
3533                 return status;
3534         }
3535 done:
3536         adapter->isr_registered = true;
3537         return 0;
3538 }
3539
3540 static void be_irq_unregister(struct be_adapter *adapter)
3541 {
3542         struct net_device *netdev = adapter->netdev;
3543         struct be_eq_obj *eqo;
3544         int i, vec;
3545
3546         if (!adapter->isr_registered)
3547                 return;
3548
3549         /* INTx */
3550         if (!msix_enabled(adapter)) {
3551                 free_irq(netdev->irq, &adapter->eq_obj[0]);
3552                 goto done;
3553         }
3554
3555         /* MSIx */
3556         for_all_evt_queues(adapter, eqo, i) {
3557                 vec = be_msix_vec_get(adapter, eqo);
3558                 irq_set_affinity_hint(vec, NULL);
3559                 free_irq(vec, eqo);
3560         }
3561
3562 done:
3563         adapter->isr_registered = false;
3564 }
3565
3566 static void be_rx_qs_destroy(struct be_adapter *adapter)
3567 {
3568         struct rss_info *rss = &adapter->rss_info;
3569         struct be_queue_info *q;
3570         struct be_rx_obj *rxo;
3571         int i;
3572
3573         for_all_rx_queues(adapter, rxo, i) {
3574                 q = &rxo->q;
3575                 if (q->created) {
3576                         /* If RXQs are destroyed while in an "out of buffer"
3577                          * state, there is a possibility of an HW stall on
3578                          * Lancer. So, post 64 buffers to each queue to relieve
3579                          * the "out of buffer" condition.
3580                          * Make sure there's space in the RXQ before posting.
3581                          */
3582                         if (lancer_chip(adapter)) {
3583                                 be_rx_cq_clean(rxo);
3584                                 if (atomic_read(&q->used) == 0)
3585                                         be_post_rx_frags(rxo, GFP_KERNEL,
3586                                                          MAX_RX_POST);
3587                         }
3588
3589                         be_cmd_rxq_destroy(adapter, q);
3590                         be_rx_cq_clean(rxo);
3591                         be_rxq_clean(rxo);
3592                 }
3593                 be_queue_free(adapter, q);
3594         }
3595
3596         if (rss->rss_flags) {
3597                 rss->rss_flags = RSS_ENABLE_NONE;
3598                 be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3599                                   128, rss->rss_hkey);
3600         }
3601 }
3602
3603 static void be_disable_if_filters(struct be_adapter *adapter)
3604 {
3605         /* Don't delete MAC on BE3 VFs without FILTMGMT privilege  */
3606         if (!BEx_chip(adapter) || !be_virtfn(adapter) ||
3607             check_privilege(adapter, BE_PRIV_FILTMGMT)) {
3608                 be_dev_mac_del(adapter, adapter->pmac_id[0]);
3609                 eth_zero_addr(adapter->dev_mac);
3610         }
3611
3612         be_clear_uc_list(adapter);
3613         be_clear_mc_list(adapter);
3614
3615         /* The IFACE flags are enabled in the open path and cleared
3616          * in the close path. When a VF gets detached from the host and
3617          * assigned to a VM the following happens:
3618          *      - VF's IFACE flags get cleared in the detach path
3619          *      - IFACE create is issued by the VF in the attach path
3620          * Due to a bug in the BE3/Skyhawk-R FW
3621          * (Lancer FW doesn't have the bug), the IFACE capability flags
3622          * specified along with the IFACE create cmd issued by a VF are not
3623          * honoured by FW.  As a consequence, if a *new* driver
3624          * (that enables/disables IFACE flags in open/close)
3625          * is loaded in the host and an *old* driver is * used by a VM/VF,
3626          * the IFACE gets created *without* the needed flags.
3627          * To avoid this, disable RX-filter flags only for Lancer.
3628          */
3629         if (lancer_chip(adapter)) {
3630                 be_cmd_rx_filter(adapter, BE_IF_ALL_FILT_FLAGS, OFF);
3631                 adapter->if_flags &= ~BE_IF_ALL_FILT_FLAGS;
3632         }
3633 }
3634
3635 static int be_close(struct net_device *netdev)
3636 {
3637         struct be_adapter *adapter = netdev_priv(netdev);
3638         struct be_eq_obj *eqo;
3639         int i;
3640
3641         /* This protection is needed as be_close() may be called even when the
3642          * adapter is in cleared state (after eeh perm failure)
3643          */
3644         if (!(adapter->flags & BE_FLAGS_SETUP_DONE))
3645                 return 0;
3646
3647         /* Before attempting cleanup ensure all the pending cmds in the
3648          * config_wq have finished execution
3649          */
3650         flush_workqueue(be_wq);
3651
3652         be_disable_if_filters(adapter);
3653
3654         if (adapter->flags & BE_FLAGS_NAPI_ENABLED) {
3655                 for_all_evt_queues(adapter, eqo, i) {
3656                         napi_disable(&eqo->napi);
3657                 }
3658                 adapter->flags &= ~BE_FLAGS_NAPI_ENABLED;
3659         }
3660
3661         be_async_mcc_disable(adapter);
3662
3663         /* Wait for all pending tx completions to arrive so that
3664          * all tx skbs are freed.
3665          */
3666         netif_tx_disable(netdev);
3667         be_tx_compl_clean(adapter);
3668
3669         be_rx_qs_destroy(adapter);
3670
3671         for_all_evt_queues(adapter, eqo, i) {
3672                 if (msix_enabled(adapter))
3673                         synchronize_irq(be_msix_vec_get(adapter, eqo));
3674                 else
3675                         synchronize_irq(netdev->irq);
3676                 be_eq_clean(eqo);
3677         }
3678
3679         be_irq_unregister(adapter);
3680
3681         return 0;
3682 }
3683
3684 static int be_rx_qs_create(struct be_adapter *adapter)
3685 {
3686         struct rss_info *rss = &adapter->rss_info;
3687         u8 rss_key[RSS_HASH_KEY_LEN];
3688         struct be_rx_obj *rxo;
3689         int rc, i, j;
3690
3691         for_all_rx_queues(adapter, rxo, i) {
3692                 rc = be_queue_alloc(adapter, &rxo->q, RX_Q_LEN,
3693                                     sizeof(struct be_eth_rx_d));
3694                 if (rc)
3695                         return rc;
3696         }
3697
3698         if (adapter->need_def_rxq || !adapter->num_rss_qs) {
3699                 rxo = default_rxo(adapter);
3700                 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3701                                        rx_frag_size, adapter->if_handle,
3702                                        false, &rxo->rss_id);
3703                 if (rc)
3704                         return rc;
3705         }
3706
3707         for_all_rss_queues(adapter, rxo, i) {
3708                 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3709                                        rx_frag_size, adapter->if_handle,
3710                                        true, &rxo->rss_id);
3711                 if (rc)
3712                         return rc;
3713         }
3714
3715         if (be_multi_rxq(adapter)) {
3716                 for (j = 0; j < RSS_INDIR_TABLE_LEN; j += adapter->num_rss_qs) {
3717                         for_all_rss_queues(adapter, rxo, i) {
3718                                 if ((j + i) >= RSS_INDIR_TABLE_LEN)
3719                                         break;
3720                                 rss->rsstable[j + i] = rxo->rss_id;
3721                                 rss->rss_queue[j + i] = i;
3722                         }
3723                 }
3724                 rss->rss_flags = RSS_ENABLE_TCP_IPV4 | RSS_ENABLE_IPV4 |
3725                         RSS_ENABLE_TCP_IPV6 | RSS_ENABLE_IPV6;
3726
3727                 if (!BEx_chip(adapter))
3728                         rss->rss_flags |= RSS_ENABLE_UDP_IPV4 |
3729                                 RSS_ENABLE_UDP_IPV6;
3730
3731                 netdev_rss_key_fill(rss_key, RSS_HASH_KEY_LEN);
3732                 rc = be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3733                                        RSS_INDIR_TABLE_LEN, rss_key);
3734                 if (rc) {
3735                         rss->rss_flags = RSS_ENABLE_NONE;
3736                         return rc;
3737                 }
3738
3739                 memcpy(rss->rss_hkey, rss_key, RSS_HASH_KEY_LEN);
3740         } else {
3741                 /* Disable RSS, if only default RX Q is created */
3742                 rss->rss_flags = RSS_ENABLE_NONE;
3743         }
3744
3745
3746         /* Post 1 less than RXQ-len to avoid head being equal to tail,
3747          * which is a queue empty condition
3748          */
3749         for_all_rx_queues(adapter, rxo, i)
3750                 be_post_rx_frags(rxo, GFP_KERNEL, RX_Q_LEN - 1);
3751
3752         return 0;
3753 }
3754
3755 static int be_enable_if_filters(struct be_adapter *adapter)
3756 {
3757         int status;
3758
3759         status = be_cmd_rx_filter(adapter, BE_IF_FILT_FLAGS_BASIC, ON);
3760         if (status)
3761                 return status;
3762
3763         /* Normally this condition usually true as the ->dev_mac is zeroed.
3764          * But on BE3 VFs the initial MAC is pre-programmed by PF and
3765          * subsequent be_dev_mac_add() can fail (after fresh boot)
3766          */
3767         if (!ether_addr_equal(adapter->dev_mac, adapter->netdev->dev_addr)) {
3768                 int old_pmac_id = -1;
3769
3770                 /* Remember old programmed MAC if any - can happen on BE3 VF */
3771                 if (!is_zero_ether_addr(adapter->dev_mac))
3772                         old_pmac_id = adapter->pmac_id[0];
3773
3774                 status = be_dev_mac_add(adapter, adapter->netdev->dev_addr);
3775                 if (status)
3776                         return status;
3777
3778                 /* Delete the old programmed MAC as we successfully programmed
3779                  * a new MAC
3780                  */
3781                 if (old_pmac_id >= 0 && old_pmac_id != adapter->pmac_id[0])
3782                         be_dev_mac_del(adapter, old_pmac_id);
3783
3784                 ether_addr_copy(adapter->dev_mac, adapter->netdev->dev_addr);
3785         }
3786
3787         if (adapter->vlans_added)
3788                 be_vid_config(adapter);
3789
3790         __be_set_rx_mode(adapter);
3791
3792         return 0;
3793 }
3794
3795 static int be_open(struct net_device *netdev)
3796 {
3797         struct be_adapter *adapter = netdev_priv(netdev);
3798         struct be_eq_obj *eqo;
3799         struct be_rx_obj *rxo;
3800         struct be_tx_obj *txo;
3801         u8 link_status;
3802         int status, i;
3803
3804         status = be_rx_qs_create(adapter);
3805         if (status)
3806                 goto err;
3807
3808         status = be_enable_if_filters(adapter);
3809         if (status)
3810                 goto err;
3811
3812         status = be_irq_register(adapter);
3813         if (status)
3814                 goto err;
3815
3816         for_all_rx_queues(adapter, rxo, i)
3817                 be_cq_notify(adapter, rxo->cq.id, true, 0);
3818
3819         for_all_tx_queues(adapter, txo, i)
3820                 be_cq_notify(adapter, txo->cq.id, true, 0);
3821
3822         be_async_mcc_enable(adapter);
3823
3824         for_all_evt_queues(adapter, eqo, i) {
3825                 napi_enable(&eqo->napi);
3826                 be_eq_notify(adapter, eqo->q.id, true, true, 0, 0);
3827         }
3828         adapter->flags |= BE_FLAGS_NAPI_ENABLED;
3829
3830         status = be_cmd_link_status_query(adapter, NULL, &link_status, 0);
3831         if (!status)
3832                 be_link_status_update(adapter, link_status);
3833
3834         netif_tx_start_all_queues(netdev);
3835         if (skyhawk_chip(adapter))
3836                 udp_tunnel_get_rx_info(netdev);
3837
3838         return 0;
3839 err:
3840         be_close(adapter->netdev);
3841         return -EIO;
3842 }
3843
3844 static void be_vf_eth_addr_generate(struct be_adapter *adapter, u8 *mac)
3845 {
3846         u32 addr;
3847
3848         addr = jhash(adapter->netdev->dev_addr, ETH_ALEN, 0);
3849
3850         mac[5] = (u8)(addr & 0xFF);
3851         mac[4] = (u8)((addr >> 8) & 0xFF);
3852         mac[3] = (u8)((addr >> 16) & 0xFF);
3853         /* Use the OUI from the current MAC address */
3854         memcpy(mac, adapter->netdev->dev_addr, 3);
3855 }
3856
3857 /*
3858  * Generate a seed MAC address from the PF MAC Address using jhash.
3859  * MAC Address for VFs are assigned incrementally starting from the seed.
3860  * These addresses are programmed in the ASIC by the PF and the VF driver
3861  * queries for the MAC address during its probe.
3862  */
3863 static int be_vf_eth_addr_config(struct be_adapter *adapter)
3864 {
3865         u32 vf;
3866         int status = 0;
3867         u8 mac[ETH_ALEN];
3868         struct be_vf_cfg *vf_cfg;
3869
3870         be_vf_eth_addr_generate(adapter, mac);
3871
3872         for_all_vfs(adapter, vf_cfg, vf) {
3873                 if (BEx_chip(adapter))
3874                         status = be_cmd_pmac_add(adapter, mac,
3875                                                  vf_cfg->if_handle,
3876                                                  &vf_cfg->pmac_id, vf + 1);
3877                 else
3878                         status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
3879                                                 vf + 1);
3880
3881                 if (status)
3882                         dev_err(&adapter->pdev->dev,
3883                                 "Mac address assignment failed for VF %d\n",
3884                                 vf);
3885                 else
3886                         memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3887
3888                 mac[5] += 1;
3889         }
3890         return status;
3891 }
3892
3893 static int be_vfs_mac_query(struct be_adapter *adapter)
3894 {
3895         int status, vf;
3896         u8 mac[ETH_ALEN];
3897         struct be_vf_cfg *vf_cfg;
3898
3899         for_all_vfs(adapter, vf_cfg, vf) {
3900                 status = be_cmd_get_active_mac(adapter, vf_cfg->pmac_id,
3901                                                mac, vf_cfg->if_handle,
3902                                                false, vf+1);
3903                 if (status)
3904                         return status;
3905                 memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3906         }
3907         return 0;
3908 }
3909
3910 static void be_vf_clear(struct be_adapter *adapter)
3911 {
3912         struct be_vf_cfg *vf_cfg;
3913         u32 vf;
3914
3915         if (pci_vfs_assigned(adapter->pdev)) {
3916                 dev_warn(&adapter->pdev->dev,
3917                          "VFs are assigned to VMs: not disabling VFs\n");
3918                 goto done;
3919         }
3920
3921         pci_disable_sriov(adapter->pdev);
3922
3923         for_all_vfs(adapter, vf_cfg, vf) {
3924                 if (BEx_chip(adapter))
3925                         be_cmd_pmac_del(adapter, vf_cfg->if_handle,
3926                                         vf_cfg->pmac_id, vf + 1);
3927                 else
3928                         be_cmd_set_mac(adapter, NULL, vf_cfg->if_handle,
3929                                        vf + 1);
3930
3931                 be_cmd_if_destroy(adapter, vf_cfg->if_handle, vf + 1);
3932         }
3933
3934         if (BE3_chip(adapter))
3935                 be_cmd_set_hsw_config(adapter, 0, 0,
3936                                       adapter->if_handle,
3937                                       PORT_FWD_TYPE_PASSTHRU, 0);
3938 done:
3939         kfree(adapter->vf_cfg);
3940         adapter->num_vfs = 0;
3941         adapter->flags &= ~BE_FLAGS_SRIOV_ENABLED;
3942 }
3943
3944 static void be_clear_queues(struct be_adapter *adapter)
3945 {
3946         be_mcc_queues_destroy(adapter);
3947         be_rx_cqs_destroy(adapter);
3948         be_tx_queues_destroy(adapter);
3949         be_evt_queues_destroy(adapter);
3950 }
3951
3952 static void be_cancel_worker(struct be_adapter *adapter)
3953 {
3954         if (adapter->flags & BE_FLAGS_WORKER_SCHEDULED) {
3955                 cancel_delayed_work_sync(&adapter->work);
3956                 adapter->flags &= ~BE_FLAGS_WORKER_SCHEDULED;
3957         }
3958 }
3959
3960 static void be_cancel_err_detection(struct be_adapter *adapter)
3961 {
3962         struct be_error_recovery *err_rec = &adapter->error_recovery;
3963
3964         if (!be_err_recovery_workq)
3965                 return;
3966
3967         if (adapter->flags & BE_FLAGS_ERR_DETECTION_SCHEDULED) {
3968                 cancel_delayed_work_sync(&err_rec->err_detection_work);
3969                 adapter->flags &= ~BE_FLAGS_ERR_DETECTION_SCHEDULED;
3970         }
3971 }
3972
3973 static int be_enable_vxlan_offloads(struct be_adapter *adapter)
3974 {
3975         struct net_device *netdev = adapter->netdev;
3976         struct device *dev = &adapter->pdev->dev;
3977         struct be_vxlan_port *vxlan_port;
3978         __be16 port;
3979         int status;
3980
3981         vxlan_port = list_first_entry(&adapter->vxlan_port_list,
3982                                       struct be_vxlan_port, list);
3983         port = vxlan_port->port;
3984
3985         status = be_cmd_manage_iface(adapter, adapter->if_handle,
3986                                      OP_CONVERT_NORMAL_TO_TUNNEL);
3987         if (status) {
3988                 dev_warn(dev, "Failed to convert normal interface to tunnel\n");
3989                 return status;
3990         }
3991         adapter->flags |= BE_FLAGS_VXLAN_OFFLOADS;
3992
3993         status = be_cmd_set_vxlan_port(adapter, port);
3994         if (status) {
3995                 dev_warn(dev, "Failed to add VxLAN port\n");
3996                 return status;
3997         }
3998         adapter->vxlan_port = port;
3999
4000         netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
4001                                    NETIF_F_TSO | NETIF_F_TSO6 |
4002                                    NETIF_F_GSO_UDP_TUNNEL;
4003
4004         dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n",
4005                  be16_to_cpu(port));
4006         return 0;
4007 }
4008
4009 static void be_disable_vxlan_offloads(struct be_adapter *adapter)
4010 {
4011         struct net_device *netdev = adapter->netdev;
4012
4013         if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS)
4014                 be_cmd_manage_iface(adapter, adapter->if_handle,
4015                                     OP_CONVERT_TUNNEL_TO_NORMAL);
4016
4017         if (adapter->vxlan_port)
4018                 be_cmd_set_vxlan_port(adapter, 0);
4019
4020         adapter->flags &= ~BE_FLAGS_VXLAN_OFFLOADS;
4021         adapter->vxlan_port = 0;
4022
4023         netdev->hw_enc_features = 0;
4024 }
4025
4026 static void be_calculate_vf_res(struct be_adapter *adapter, u16 num_vfs,
4027                                 struct be_resources *vft_res)
4028 {
4029         struct be_resources res = adapter->pool_res;
4030         u32 vf_if_cap_flags = res.vf_if_cap_flags;
4031         struct be_resources res_mod = {0};
4032         u16 num_vf_qs = 1;
4033
4034         /* Distribute the queue resources among the PF and it's VFs */
4035         if (num_vfs) {
4036                 /* Divide the rx queues evenly among the VFs and the PF, capped
4037                  * at VF-EQ-count. Any remainder queues belong to the PF.
4038                  */
4039                 num_vf_qs = min(SH_VF_MAX_NIC_EQS,
4040                                 res.max_rss_qs / (num_vfs + 1));
4041
4042                 /* Skyhawk-R chip supports only MAX_PORT_RSS_TABLES
4043                  * RSS Tables per port. Provide RSS on VFs, only if number of
4044                  * VFs requested is less than it's PF Pool's RSS Tables limit.
4045                  */
4046                 if (num_vfs >= be_max_pf_pool_rss_tables(adapter))
4047                         num_vf_qs = 1;
4048         }
4049
4050         /* Resource with fields set to all '1's by GET_PROFILE_CONFIG cmd,
4051          * which are modifiable using SET_PROFILE_CONFIG cmd.
4052          */
4053         be_cmd_get_profile_config(adapter, &res_mod, NULL, ACTIVE_PROFILE_TYPE,
4054                                   RESOURCE_MODIFIABLE, 0);
4055
4056         /* If RSS IFACE capability flags are modifiable for a VF, set the
4057          * capability flag as valid and set RSS and DEFQ_RSS IFACE flags if
4058          * more than 1 RSSQ is available for a VF.
4059          * Otherwise, provision only 1 queue pair for VF.
4060          */
4061         if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_RSS) {
4062                 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4063                 if (num_vf_qs > 1) {
4064                         vf_if_cap_flags |= BE_IF_FLAGS_RSS;
4065                         if (res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS)
4066                                 vf_if_cap_flags |= BE_IF_FLAGS_DEFQ_RSS;
4067                 } else {
4068                         vf_if_cap_flags &= ~(BE_IF_FLAGS_RSS |
4069                                              BE_IF_FLAGS_DEFQ_RSS);
4070                 }
4071         } else {
4072                 num_vf_qs = 1;
4073         }
4074
4075         if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
4076                 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4077                 vf_if_cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4078         }
4079
4080         vft_res->vf_if_cap_flags = vf_if_cap_flags;
4081         vft_res->max_rx_qs = num_vf_qs;
4082         vft_res->max_rss_qs = num_vf_qs;
4083         vft_res->max_tx_qs = res.max_tx_qs / (num_vfs + 1);
4084         vft_res->max_cq_count = res.max_cq_count / (num_vfs + 1);
4085
4086         /* Distribute unicast MACs, VLANs, IFACE count and MCCQ count equally
4087          * among the PF and it's VFs, if the fields are changeable
4088          */
4089         if (res_mod.max_uc_mac == FIELD_MODIFIABLE)
4090                 vft_res->max_uc_mac = res.max_uc_mac / (num_vfs + 1);
4091
4092         if (res_mod.max_vlans == FIELD_MODIFIABLE)
4093                 vft_res->max_vlans = res.max_vlans / (num_vfs + 1);
4094
4095         if (res_mod.max_iface_count == FIELD_MODIFIABLE)
4096                 vft_res->max_iface_count = res.max_iface_count / (num_vfs + 1);
4097
4098         if (res_mod.max_mcc_count == FIELD_MODIFIABLE)
4099                 vft_res->max_mcc_count = res.max_mcc_count / (num_vfs + 1);
4100 }
4101
4102 static void be_if_destroy(struct be_adapter *adapter)
4103 {
4104         be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4105
4106         kfree(adapter->pmac_id);
4107         adapter->pmac_id = NULL;
4108
4109         kfree(adapter->mc_list);
4110         adapter->mc_list = NULL;
4111
4112         kfree(adapter->uc_list);
4113         adapter->uc_list = NULL;
4114 }
4115
4116 static int be_clear(struct be_adapter *adapter)
4117 {
4118         struct pci_dev *pdev = adapter->pdev;
4119         struct  be_resources vft_res = {0};
4120
4121         be_cancel_worker(adapter);
4122
4123         flush_workqueue(be_wq);
4124
4125         if (sriov_enabled(adapter))
4126                 be_vf_clear(adapter);
4127
4128         /* Re-configure FW to distribute resources evenly across max-supported
4129          * number of VFs, only when VFs are not already enabled.
4130          */
4131         if (skyhawk_chip(adapter) && be_physfn(adapter) &&
4132             !pci_vfs_assigned(pdev)) {
4133                 be_calculate_vf_res(adapter,
4134                                     pci_sriov_get_totalvfs(pdev),
4135                                     &vft_res);
4136                 be_cmd_set_sriov_config(adapter, adapter->pool_res,
4137                                         pci_sriov_get_totalvfs(pdev),
4138                                         &vft_res);
4139         }
4140
4141         be_disable_vxlan_offloads(adapter);
4142
4143         be_if_destroy(adapter);
4144
4145         be_clear_queues(adapter);
4146
4147         be_msix_disable(adapter);
4148         adapter->flags &= ~BE_FLAGS_SETUP_DONE;
4149         return 0;
4150 }
4151
4152 static int be_vfs_if_create(struct be_adapter *adapter)
4153 {
4154         struct be_resources res = {0};
4155         u32 cap_flags, en_flags, vf;
4156         struct be_vf_cfg *vf_cfg;
4157         int status;
4158
4159         /* If a FW profile exists, then cap_flags are updated */
4160         cap_flags = BE_VF_IF_EN_FLAGS;
4161
4162         for_all_vfs(adapter, vf_cfg, vf) {
4163                 if (!BE3_chip(adapter)) {
4164                         status = be_cmd_get_profile_config(adapter, &res, NULL,
4165                                                            ACTIVE_PROFILE_TYPE,
4166                                                            RESOURCE_LIMITS,
4167                                                            vf + 1);
4168                         if (!status) {
4169                                 cap_flags = res.if_cap_flags;
4170                                 /* Prevent VFs from enabling VLAN promiscuous
4171                                  * mode
4172                                  */
4173                                 cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4174                         }
4175                 }
4176
4177                 /* PF should enable IF flags during proxy if_create call */
4178                 en_flags = cap_flags & BE_VF_IF_EN_FLAGS;
4179                 status = be_cmd_if_create(adapter, cap_flags, en_flags,
4180                                           &vf_cfg->if_handle, vf + 1);
4181                 if (status)
4182                         return status;
4183         }
4184
4185         return 0;
4186 }
4187
4188 static int be_vf_setup_init(struct be_adapter *adapter)
4189 {
4190         struct be_vf_cfg *vf_cfg;
4191         int vf;
4192
4193         adapter->vf_cfg = kcalloc(adapter->num_vfs, sizeof(*vf_cfg),
4194                                   GFP_KERNEL);
4195         if (!adapter->vf_cfg)
4196                 return -ENOMEM;
4197
4198         for_all_vfs(adapter, vf_cfg, vf) {
4199                 vf_cfg->if_handle = -1;
4200                 vf_cfg->pmac_id = -1;
4201         }
4202         return 0;
4203 }
4204
4205 static int be_vf_setup(struct be_adapter *adapter)
4206 {
4207         struct device *dev = &adapter->pdev->dev;
4208         struct be_vf_cfg *vf_cfg;
4209         int status, old_vfs, vf;
4210         bool spoofchk;
4211
4212         old_vfs = pci_num_vf(adapter->pdev);
4213
4214         status = be_vf_setup_init(adapter);
4215         if (status)
4216                 goto err;
4217
4218         if (old_vfs) {
4219                 for_all_vfs(adapter, vf_cfg, vf) {
4220                         status = be_cmd_get_if_id(adapter, vf_cfg, vf);
4221                         if (status)
4222                                 goto err;
4223                 }
4224
4225                 status = be_vfs_mac_query(adapter);
4226                 if (status)
4227                         goto err;
4228         } else {
4229                 status = be_vfs_if_create(adapter);
4230                 if (status)
4231                         goto err;
4232
4233                 status = be_vf_eth_addr_config(adapter);
4234                 if (status)
4235                         goto err;
4236         }
4237
4238         for_all_vfs(adapter, vf_cfg, vf) {
4239                 /* Allow VFs to programs MAC/VLAN filters */
4240                 status = be_cmd_get_fn_privileges(adapter, &vf_cfg->privileges,
4241                                                   vf + 1);
4242                 if (!status && !(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
4243                         status = be_cmd_set_fn_privileges(adapter,
4244                                                           vf_cfg->privileges |
4245                                                           BE_PRIV_FILTMGMT,
4246                                                           vf + 1);
4247                         if (!status) {
4248                                 vf_cfg->privileges |= BE_PRIV_FILTMGMT;
4249                                 dev_info(dev, "VF%d has FILTMGMT privilege\n",
4250                                          vf);
4251                         }
4252                 }
4253
4254                 /* Allow full available bandwidth */
4255                 if (!old_vfs)
4256                         be_cmd_config_qos(adapter, 0, 0, vf + 1);
4257
4258                 status = be_cmd_get_hsw_config(adapter, NULL, vf + 1,
4259                                                vf_cfg->if_handle, NULL,
4260                                                &spoofchk);
4261                 if (!status)
4262                         vf_cfg->spoofchk = spoofchk;
4263
4264                 if (!old_vfs) {
4265                         be_cmd_enable_vf(adapter, vf + 1);
4266                         be_cmd_set_logical_link_config(adapter,
4267                                                        IFLA_VF_LINK_STATE_AUTO,
4268                                                        vf+1);
4269                 }
4270         }
4271
4272         if (!old_vfs) {
4273                 status = pci_enable_sriov(adapter->pdev, adapter->num_vfs);
4274                 if (status) {
4275                         dev_err(dev, "SRIOV enable failed\n");
4276                         adapter->num_vfs = 0;
4277                         goto err;
4278                 }
4279         }
4280
4281         if (BE3_chip(adapter)) {
4282                 /* On BE3, enable VEB only when SRIOV is enabled */
4283                 status = be_cmd_set_hsw_config(adapter, 0, 0,
4284                                                adapter->if_handle,
4285                                                PORT_FWD_TYPE_VEB, 0);
4286                 if (status)
4287                         goto err;
4288         }
4289
4290         adapter->flags |= BE_FLAGS_SRIOV_ENABLED;
4291         return 0;
4292 err:
4293         dev_err(dev, "VF setup failed\n");
4294         be_vf_clear(adapter);
4295         return status;
4296 }
4297
4298 /* Converting function_mode bits on BE3 to SH mc_type enums */
4299
4300 static u8 be_convert_mc_type(u32 function_mode)
4301 {
4302         if (function_mode & VNIC_MODE && function_mode & QNQ_MODE)
4303                 return vNIC1;
4304         else if (function_mode & QNQ_MODE)
4305                 return FLEX10;
4306         else if (function_mode & VNIC_MODE)
4307                 return vNIC2;
4308         else if (function_mode & UMC_ENABLED)
4309                 return UMC;
4310         else
4311                 return MC_NONE;
4312 }
4313
4314 /* On BE2/BE3 FW does not suggest the supported limits */
4315 static void BEx_get_resources(struct be_adapter *adapter,
4316                               struct be_resources *res)
4317 {
4318         bool use_sriov = adapter->num_vfs ? 1 : 0;
4319
4320         if (be_physfn(adapter))
4321                 res->max_uc_mac = BE_UC_PMAC_COUNT;
4322         else
4323                 res->max_uc_mac = BE_VF_UC_PMAC_COUNT;
4324
4325         adapter->mc_type = be_convert_mc_type(adapter->function_mode);
4326
4327         if (be_is_mc(adapter)) {
4328                 /* Assuming that there are 4 channels per port,
4329                  * when multi-channel is enabled
4330                  */
4331                 if (be_is_qnq_mode(adapter))
4332                         res->max_vlans = BE_NUM_VLANS_SUPPORTED/8;
4333                 else
4334                         /* In a non-qnq multichannel mode, the pvid
4335                          * takes up one vlan entry
4336                          */
4337                         res->max_vlans = (BE_NUM_VLANS_SUPPORTED / 4) - 1;
4338         } else {
4339                 res->max_vlans = BE_NUM_VLANS_SUPPORTED;
4340         }
4341
4342         res->max_mcast_mac = BE_MAX_MC;
4343
4344         /* 1) For BE3 1Gb ports, FW does not support multiple TXQs
4345          * 2) Create multiple TX rings on a BE3-R multi-channel interface
4346          *    *only* if it is RSS-capable.
4347          */
4348         if (BE2_chip(adapter) || use_sriov ||  (adapter->port_num > 1) ||
4349             be_virtfn(adapter) ||
4350             (be_is_mc(adapter) &&
4351              !(adapter->function_caps & BE_FUNCTION_CAPS_RSS))) {
4352                 res->max_tx_qs = 1;
4353         } else if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) {
4354                 struct be_resources super_nic_res = {0};
4355
4356                 /* On a SuperNIC profile, the driver needs to use the
4357                  * GET_PROFILE_CONFIG cmd to query the per-function TXQ limits
4358                  */
4359                 be_cmd_get_profile_config(adapter, &super_nic_res, NULL,
4360                                           ACTIVE_PROFILE_TYPE, RESOURCE_LIMITS,
4361                                           0);
4362                 /* Some old versions of BE3 FW don't report max_tx_qs value */
4363                 res->max_tx_qs = super_nic_res.max_tx_qs ? : BE3_MAX_TX_QS;
4364         } else {
4365                 res->max_tx_qs = BE3_MAX_TX_QS;
4366         }
4367
4368         if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) &&
4369             !use_sriov && be_physfn(adapter))
4370                 res->max_rss_qs = (adapter->be3_native) ?
4371                                            BE3_MAX_RSS_QS : BE2_MAX_RSS_QS;
4372         res->max_rx_qs = res->max_rss_qs + 1;
4373
4374         if (be_physfn(adapter))
4375                 res->max_evt_qs = (be_max_vfs(adapter) > 0) ?
4376                                         BE3_SRIOV_MAX_EVT_QS : BE3_MAX_EVT_QS;
4377         else
4378                 res->max_evt_qs = 1;
4379
4380         res->if_cap_flags = BE_IF_CAP_FLAGS_WANT;
4381         res->if_cap_flags &= ~BE_IF_FLAGS_DEFQ_RSS;
4382         if (!(adapter->function_caps & BE_FUNCTION_CAPS_RSS))
4383                 res->if_cap_flags &= ~BE_IF_FLAGS_RSS;
4384 }
4385
4386 static void be_setup_init(struct be_adapter *adapter)
4387 {
4388         adapter->vlan_prio_bmap = 0xff;
4389         adapter->phy.link_speed = -1;
4390         adapter->if_handle = -1;
4391         adapter->be3_native = false;
4392         adapter->if_flags = 0;
4393         adapter->phy_state = BE_UNKNOWN_PHY_STATE;
4394         if (be_physfn(adapter))
4395                 adapter->cmd_privileges = MAX_PRIVILEGES;
4396         else
4397                 adapter->cmd_privileges = MIN_PRIVILEGES;
4398 }
4399
4400 /* HW supports only MAX_PORT_RSS_TABLES RSS Policy Tables per port.
4401  * However, this HW limitation is not exposed to the host via any SLI cmd.
4402  * As a result, in the case of SRIOV and in particular multi-partition configs
4403  * the driver needs to calcuate a proportional share of RSS Tables per PF-pool
4404  * for distribution between the VFs. This self-imposed limit will determine the
4405  * no: of VFs for which RSS can be enabled.
4406  */
4407 static void be_calculate_pf_pool_rss_tables(struct be_adapter *adapter)
4408 {
4409         struct be_port_resources port_res = {0};
4410         u8 rss_tables_on_port;
4411         u16 max_vfs = be_max_vfs(adapter);
4412
4413         be_cmd_get_profile_config(adapter, NULL, &port_res, SAVED_PROFILE_TYPE,
4414                                   RESOURCE_LIMITS, 0);
4415
4416         rss_tables_on_port = MAX_PORT_RSS_TABLES - port_res.nic_pfs;
4417
4418         /* Each PF Pool's RSS Tables limit =
4419          * PF's Max VFs / Total_Max_VFs on Port * RSS Tables on Port
4420          */
4421         adapter->pool_res.max_rss_tables =
4422                 max_vfs * rss_tables_on_port / port_res.max_vfs;
4423 }
4424
4425 static int be_get_sriov_config(struct be_adapter *adapter)
4426 {
4427         struct be_resources res = {0};
4428         int max_vfs, old_vfs;
4429
4430         be_cmd_get_profile_config(adapter, &res, NULL, ACTIVE_PROFILE_TYPE,
4431                                   RESOURCE_LIMITS, 0);
4432
4433         /* Some old versions of BE3 FW don't report max_vfs value */
4434         if (BE3_chip(adapter) && !res.max_vfs) {
4435                 max_vfs = pci_sriov_get_totalvfs(adapter->pdev);
4436                 res.max_vfs = max_vfs > 0 ? min(MAX_VFS, max_vfs) : 0;
4437         }
4438
4439         adapter->pool_res = res;
4440
4441         /* If during previous unload of the driver, the VFs were not disabled,
4442          * then we cannot rely on the PF POOL limits for the TotalVFs value.
4443          * Instead use the TotalVFs value stored in the pci-dev struct.
4444          */
4445         old_vfs = pci_num_vf(adapter->pdev);
4446         if (old_vfs) {
4447                 dev_info(&adapter->pdev->dev, "%d VFs are already enabled\n",
4448                          old_vfs);
4449
4450                 adapter->pool_res.max_vfs =
4451                         pci_sriov_get_totalvfs(adapter->pdev);
4452                 adapter->num_vfs = old_vfs;
4453         }
4454
4455         if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4456                 be_calculate_pf_pool_rss_tables(adapter);
4457                 dev_info(&adapter->pdev->dev,
4458                          "RSS can be enabled for all VFs if num_vfs <= %d\n",
4459                          be_max_pf_pool_rss_tables(adapter));
4460         }
4461         return 0;
4462 }
4463
4464 static void be_alloc_sriov_res(struct be_adapter *adapter)
4465 {
4466         int old_vfs = pci_num_vf(adapter->pdev);
4467         struct  be_resources vft_res = {0};
4468         int status;
4469
4470         be_get_sriov_config(adapter);
4471
4472         if (!old_vfs)
4473                 pci_sriov_set_totalvfs(adapter->pdev, be_max_vfs(adapter));
4474
4475         /* When the HW is in SRIOV capable configuration, the PF-pool
4476          * resources are given to PF during driver load, if there are no
4477          * old VFs. This facility is not available in BE3 FW.
4478          * Also, this is done by FW in Lancer chip.
4479          */
4480         if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4481                 be_calculate_vf_res(adapter, 0, &vft_res);
4482                 status = be_cmd_set_sriov_config(adapter, adapter->pool_res, 0,
4483                                                  &vft_res);
4484                 if (status)
4485                         dev_err(&adapter->pdev->dev,
4486                                 "Failed to optimize SRIOV resources\n");
4487         }
4488 }
4489
4490 static int be_get_resources(struct be_adapter *adapter)
4491 {
4492         struct device *dev = &adapter->pdev->dev;
4493         struct be_resources res = {0};
4494         int status;
4495
4496         /* For Lancer, SH etc read per-function resource limits from FW.
4497          * GET_FUNC_CONFIG returns per function guaranteed limits.
4498          * GET_PROFILE_CONFIG returns PCI-E related limits PF-pool limits
4499          */
4500         if (BEx_chip(adapter)) {
4501                 BEx_get_resources(adapter, &res);
4502         } else {
4503                 status = be_cmd_get_func_config(adapter, &res);
4504                 if (status)
4505                         return status;
4506
4507                 /* If a deafault RXQ must be created, we'll use up one RSSQ*/
4508                 if (res.max_rss_qs && res.max_rss_qs == res.max_rx_qs &&
4509                     !(res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS))
4510                         res.max_rss_qs -= 1;
4511         }
4512
4513         /* If RoCE is supported stash away half the EQs for RoCE */
4514         res.max_nic_evt_qs = be_roce_supported(adapter) ?
4515                                 res.max_evt_qs / 2 : res.max_evt_qs;
4516         adapter->res = res;
4517
4518         /* If FW supports RSS default queue, then skip creating non-RSS
4519          * queue for non-IP traffic.
4520          */
4521         adapter->need_def_rxq = (be_if_cap_flags(adapter) &
4522                                  BE_IF_FLAGS_DEFQ_RSS) ? 0 : 1;
4523
4524         dev_info(dev, "Max: txqs %d, rxqs %d, rss %d, eqs %d, vfs %d\n",
4525                  be_max_txqs(adapter), be_max_rxqs(adapter),
4526                  be_max_rss(adapter), be_max_nic_eqs(adapter),
4527                  be_max_vfs(adapter));
4528         dev_info(dev, "Max: uc-macs %d, mc-macs %d, vlans %d\n",
4529                  be_max_uc(adapter), be_max_mc(adapter),
4530                  be_max_vlans(adapter));
4531
4532         /* Ensure RX and TX queues are created in pairs at init time */
4533         adapter->cfg_num_rx_irqs =
4534                                 min_t(u16, netif_get_num_default_rss_queues(),
4535                                       be_max_qp_irqs(adapter));
4536         adapter->cfg_num_tx_irqs = adapter->cfg_num_rx_irqs;
4537         return 0;
4538 }
4539
4540 static int be_get_config(struct be_adapter *adapter)
4541 {
4542         int status, level;
4543         u16 profile_id;
4544
4545         status = be_cmd_get_cntl_attributes(adapter);
4546         if (status)
4547                 return status;
4548
4549         status = be_cmd_query_fw_cfg(adapter);
4550         if (status)
4551                 return status;
4552
4553         if (!lancer_chip(adapter) && be_physfn(adapter))
4554                 be_cmd_get_fat_dump_len(adapter, &adapter->fat_dump_len);
4555
4556         if (BEx_chip(adapter)) {
4557                 level = be_cmd_get_fw_log_level(adapter);
4558                 adapter->msg_enable =
4559                         level <= FW_LOG_LEVEL_DEFAULT ? NETIF_MSG_HW : 0;
4560         }
4561
4562         be_cmd_get_acpi_wol_cap(adapter);
4563         pci_enable_wake(adapter->pdev, PCI_D3hot, adapter->wol_en);
4564         pci_enable_wake(adapter->pdev, PCI_D3cold, adapter->wol_en);
4565
4566         be_cmd_query_port_name(adapter);
4567
4568         if (be_physfn(adapter)) {
4569                 status = be_cmd_get_active_profile(adapter, &profile_id);
4570                 if (!status)
4571                         dev_info(&adapter->pdev->dev,
4572                                  "Using profile 0x%x\n", profile_id);
4573         }
4574
4575         return 0;
4576 }
4577
4578 static int be_mac_setup(struct be_adapter *adapter)
4579 {
4580         u8 mac[ETH_ALEN];
4581         int status;
4582
4583         if (is_zero_ether_addr(adapter->netdev->dev_addr)) {
4584                 status = be_cmd_get_perm_mac(adapter, mac);
4585                 if (status)
4586                         return status;
4587
4588                 memcpy(adapter->netdev->dev_addr, mac, ETH_ALEN);
4589                 memcpy(adapter->netdev->perm_addr, mac, ETH_ALEN);
4590
4591                 /* Initial MAC for BE3 VFs is already programmed by PF */
4592                 if (BEx_chip(adapter) && be_virtfn(adapter))
4593                         memcpy(adapter->dev_mac, mac, ETH_ALEN);
4594         }
4595
4596         return 0;
4597 }
4598
4599 static void be_schedule_worker(struct be_adapter *adapter)
4600 {
4601         queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
4602         adapter->flags |= BE_FLAGS_WORKER_SCHEDULED;
4603 }
4604
4605 static void be_destroy_err_recovery_workq(void)
4606 {
4607         if (!be_err_recovery_workq)
4608                 return;
4609
4610         flush_workqueue(be_err_recovery_workq);
4611         destroy_workqueue(be_err_recovery_workq);
4612         be_err_recovery_workq = NULL;
4613 }
4614
4615 static void be_schedule_err_detection(struct be_adapter *adapter, u32 delay)
4616 {
4617         struct be_error_recovery *err_rec = &adapter->error_recovery;
4618
4619         if (!be_err_recovery_workq)
4620                 return;
4621
4622         queue_delayed_work(be_err_recovery_workq, &err_rec->err_detection_work,
4623                            msecs_to_jiffies(delay));
4624         adapter->flags |= BE_FLAGS_ERR_DETECTION_SCHEDULED;
4625 }
4626
4627 static int be_setup_queues(struct be_adapter *adapter)
4628 {
4629         struct net_device *netdev = adapter->netdev;
4630         int status;
4631
4632         status = be_evt_queues_create(adapter);
4633         if (status)
4634                 goto err;
4635
4636         status = be_tx_qs_create(adapter);
4637         if (status)
4638                 goto err;
4639
4640         status = be_rx_cqs_create(adapter);
4641         if (status)
4642                 goto err;
4643
4644         status = be_mcc_queues_create(adapter);
4645         if (status)
4646                 goto err;
4647
4648         status = netif_set_real_num_rx_queues(netdev, adapter->num_rx_qs);
4649         if (status)
4650                 goto err;
4651
4652         status = netif_set_real_num_tx_queues(netdev, adapter->num_tx_qs);
4653         if (status)
4654                 goto err;
4655
4656         return 0;
4657 err:
4658         dev_err(&adapter->pdev->dev, "queue_setup failed\n");
4659         return status;
4660 }
4661
4662 static int be_if_create(struct be_adapter *adapter)
4663 {
4664         u32 en_flags = BE_IF_FLAGS_RSS | BE_IF_FLAGS_DEFQ_RSS;
4665         u32 cap_flags = be_if_cap_flags(adapter);
4666         int status;
4667
4668         /* alloc required memory for other filtering fields */
4669         adapter->pmac_id = kcalloc(be_max_uc(adapter),
4670                                    sizeof(*adapter->pmac_id), GFP_KERNEL);
4671         if (!adapter->pmac_id)
4672                 return -ENOMEM;
4673
4674         adapter->mc_list = kcalloc(be_max_mc(adapter),
4675                                    sizeof(*adapter->mc_list), GFP_KERNEL);
4676         if (!adapter->mc_list)
4677                 return -ENOMEM;
4678
4679         adapter->uc_list = kcalloc(be_max_uc(adapter),
4680                                    sizeof(*adapter->uc_list), GFP_KERNEL);
4681         if (!adapter->uc_list)
4682                 return -ENOMEM;
4683
4684         if (adapter->cfg_num_rx_irqs == 1)
4685                 cap_flags &= ~(BE_IF_FLAGS_DEFQ_RSS | BE_IF_FLAGS_RSS);
4686
4687         en_flags &= cap_flags;
4688         /* will enable all the needed filter flags in be_open() */
4689         status = be_cmd_if_create(adapter, be_if_cap_flags(adapter), en_flags,
4690                                   &adapter->if_handle, 0);
4691
4692         if (status)
4693                 return status;
4694
4695         return 0;
4696 }
4697
4698 int be_update_queues(struct be_adapter *adapter)
4699 {
4700         struct net_device *netdev = adapter->netdev;
4701         int status;
4702
4703         if (netif_running(netdev)) {
4704                 /* device cannot transmit now, avoid dev_watchdog timeouts */
4705                 netif_carrier_off(netdev);
4706
4707                 be_close(netdev);
4708         }
4709
4710         be_cancel_worker(adapter);
4711
4712         /* If any vectors have been shared with RoCE we cannot re-program
4713          * the MSIx table.
4714          */
4715         if (!adapter->num_msix_roce_vec)
4716                 be_msix_disable(adapter);
4717
4718         be_clear_queues(adapter);
4719         status = be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4720         if (status)
4721                 return status;
4722
4723         if (!msix_enabled(adapter)) {
4724                 status = be_msix_enable(adapter);
4725                 if (status)
4726                         return status;
4727         }
4728
4729         status = be_if_create(adapter);
4730         if (status)
4731                 return status;
4732
4733         status = be_setup_queues(adapter);
4734         if (status)
4735                 return status;
4736
4737         be_schedule_worker(adapter);
4738
4739         /* The IF was destroyed and re-created. We need to clear
4740          * all promiscuous flags valid for the destroyed IF.
4741          * Without this promisc mode is not restored during
4742          * be_open() because the driver thinks that it is
4743          * already enabled in HW.
4744          */
4745         adapter->if_flags &= ~BE_IF_FLAGS_ALL_PROMISCUOUS;
4746
4747         if (netif_running(netdev))
4748                 status = be_open(netdev);
4749
4750         return status;
4751 }
4752
4753 static inline int fw_major_num(const char *fw_ver)
4754 {
4755         int fw_major = 0, i;
4756
4757         i = sscanf(fw_ver, "%d.", &fw_major);
4758         if (i != 1)
4759                 return 0;
4760
4761         return fw_major;
4762 }
4763
4764 /* If it is error recovery, FLR the PF
4765  * Else if any VFs are already enabled don't FLR the PF
4766  */
4767 static bool be_reset_required(struct be_adapter *adapter)
4768 {
4769         if (be_error_recovering(adapter))
4770                 return true;
4771         else
4772                 return pci_num_vf(adapter->pdev) == 0;
4773 }
4774
4775 /* Wait for the FW to be ready and perform the required initialization */
4776 static int be_func_init(struct be_adapter *adapter)
4777 {
4778         int status;
4779
4780         status = be_fw_wait_ready(adapter);
4781         if (status)
4782                 return status;
4783
4784         /* FW is now ready; clear errors to allow cmds/doorbell */
4785         be_clear_error(adapter, BE_CLEAR_ALL);
4786
4787         if (be_reset_required(adapter)) {
4788                 status = be_cmd_reset_function(adapter);
4789                 if (status)
4790                         return status;
4791
4792                 /* Wait for interrupts to quiesce after an FLR */
4793                 msleep(100);
4794         }
4795
4796         /* Tell FW we're ready to fire cmds */
4797         status = be_cmd_fw_init(adapter);
4798         if (status)
4799                 return status;
4800
4801         /* Allow interrupts for other ULPs running on NIC function */
4802         be_intr_set(adapter, true);
4803
4804         return 0;
4805 }
4806
4807 static int be_setup(struct be_adapter *adapter)
4808 {
4809         struct device *dev = &adapter->pdev->dev;
4810         int status;
4811
4812         status = be_func_init(adapter);
4813         if (status)
4814                 return status;
4815
4816         be_setup_init(adapter);
4817
4818         if (!lancer_chip(adapter))
4819                 be_cmd_req_native_mode(adapter);
4820
4821         /* invoke this cmd first to get pf_num and vf_num which are needed
4822          * for issuing profile related cmds
4823          */
4824         if (!BEx_chip(adapter)) {
4825                 status = be_cmd_get_func_config(adapter, NULL);
4826                 if (status)
4827                         return status;
4828         }
4829
4830         status = be_get_config(adapter);
4831         if (status)
4832                 goto err;
4833
4834         if (!BE2_chip(adapter) && be_physfn(adapter))
4835                 be_alloc_sriov_res(adapter);
4836
4837         status = be_get_resources(adapter);
4838         if (status)
4839                 goto err;
4840
4841         status = be_msix_enable(adapter);
4842         if (status)
4843                 goto err;
4844
4845         /* will enable all the needed filter flags in be_open() */
4846         status = be_if_create(adapter);
4847         if (status)
4848                 goto err;
4849
4850         /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
4851         rtnl_lock();
4852         status = be_setup_queues(adapter);
4853         rtnl_unlock();
4854         if (status)
4855                 goto err;
4856
4857         be_cmd_get_fn_privileges(adapter, &adapter->cmd_privileges, 0);
4858
4859         status = be_mac_setup(adapter);
4860         if (status)
4861                 goto err;
4862
4863         be_cmd_get_fw_ver(adapter);
4864         dev_info(dev, "FW version is %s\n", adapter->fw_ver);
4865
4866         if (BE2_chip(adapter) && fw_major_num(adapter->fw_ver) < 4) {
4867                 dev_err(dev, "Firmware on card is old(%s), IRQs may not work",
4868                         adapter->fw_ver);
4869                 dev_err(dev, "Please upgrade firmware to version >= 4.0\n");
4870         }
4871
4872         status = be_cmd_set_flow_control(adapter, adapter->tx_fc,
4873                                          adapter->rx_fc);
4874         if (status)
4875                 be_cmd_get_flow_control(adapter, &adapter->tx_fc,
4876                                         &adapter->rx_fc);
4877
4878         dev_info(&adapter->pdev->dev, "HW Flow control - TX:%d RX:%d\n",
4879                  adapter->tx_fc, adapter->rx_fc);
4880
4881         if (be_physfn(adapter))
4882                 be_cmd_set_logical_link_config(adapter,
4883                                                IFLA_VF_LINK_STATE_AUTO, 0);
4884
4885         /* BE3 EVB echoes broadcast/multicast packets back to PF's vport
4886          * confusing a linux bridge or OVS that it might be connected to.
4887          * Set the EVB to PASSTHRU mode which effectively disables the EVB
4888          * when SRIOV is not enabled.
4889          */
4890         if (BE3_chip(adapter))
4891                 be_cmd_set_hsw_config(adapter, 0, 0, adapter->if_handle,
4892                                       PORT_FWD_TYPE_PASSTHRU, 0);
4893
4894         if (adapter->num_vfs)
4895                 be_vf_setup(adapter);
4896
4897         status = be_cmd_get_phy_info(adapter);
4898         if (!status && be_pause_supported(adapter))
4899                 adapter->phy.fc_autoneg = 1;
4900
4901         if (be_physfn(adapter) && !lancer_chip(adapter))
4902                 be_cmd_set_features(adapter);
4903
4904         be_schedule_worker(adapter);
4905         adapter->flags |= BE_FLAGS_SETUP_DONE;
4906         return 0;
4907 err:
4908         be_clear(adapter);
4909         return status;
4910 }
4911
4912 #ifdef CONFIG_NET_POLL_CONTROLLER
4913 static void be_netpoll(struct net_device *netdev)
4914 {
4915         struct be_adapter *adapter = netdev_priv(netdev);
4916         struct be_eq_obj *eqo;
4917         int i;
4918
4919         for_all_evt_queues(adapter, eqo, i) {
4920                 be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
4921                 napi_schedule(&eqo->napi);
4922         }
4923 }
4924 #endif
4925
4926 int be_load_fw(struct be_adapter *adapter, u8 *fw_file)
4927 {
4928         const struct firmware *fw;
4929         int status;
4930
4931         if (!netif_running(adapter->netdev)) {
4932                 dev_err(&adapter->pdev->dev,
4933                         "Firmware load not allowed (interface is down)\n");
4934                 return -ENETDOWN;
4935         }
4936
4937         status = request_firmware(&fw, fw_file, &adapter->pdev->dev);
4938         if (status)
4939                 goto fw_exit;
4940
4941         dev_info(&adapter->pdev->dev, "Flashing firmware file %s\n", fw_file);
4942
4943         if (lancer_chip(adapter))
4944                 status = lancer_fw_download(adapter, fw);
4945         else
4946                 status = be_fw_download(adapter, fw);
4947
4948         if (!status)
4949                 be_cmd_get_fw_ver(adapter);
4950
4951 fw_exit:
4952         release_firmware(fw);
4953         return status;
4954 }
4955
4956 static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
4957                                  u16 flags)
4958 {
4959         struct be_adapter *adapter = netdev_priv(dev);
4960         struct nlattr *attr, *br_spec;
4961         int rem;
4962         int status = 0;
4963         u16 mode = 0;
4964
4965         if (!sriov_enabled(adapter))
4966                 return -EOPNOTSUPP;
4967
4968         br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
4969         if (!br_spec)
4970                 return -EINVAL;
4971
4972         nla_for_each_nested(attr, br_spec, rem) {
4973                 if (nla_type(attr) != IFLA_BRIDGE_MODE)
4974                         continue;
4975
4976                 if (nla_len(attr) < sizeof(mode))
4977                         return -EINVAL;
4978
4979                 mode = nla_get_u16(attr);
4980                 if (BE3_chip(adapter) && mode == BRIDGE_MODE_VEPA)
4981                         return -EOPNOTSUPP;
4982
4983                 if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB)
4984                         return -EINVAL;
4985
4986                 status = be_cmd_set_hsw_config(adapter, 0, 0,
4987                                                adapter->if_handle,
4988                                                mode == BRIDGE_MODE_VEPA ?
4989                                                PORT_FWD_TYPE_VEPA :
4990                                                PORT_FWD_TYPE_VEB, 0);
4991                 if (status)
4992                         goto err;
4993
4994                 dev_info(&adapter->pdev->dev, "enabled switch mode: %s\n",
4995                          mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4996
4997                 return status;
4998         }
4999 err:
5000         dev_err(&adapter->pdev->dev, "Failed to set switch mode %s\n",
5001                 mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
5002
5003         return status;
5004 }
5005
5006 static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
5007                                  struct net_device *dev, u32 filter_mask,
5008                                  int nlflags)
5009 {
5010         struct be_adapter *adapter = netdev_priv(dev);
5011         int status = 0;
5012         u8 hsw_mode;
5013
5014         /* BE and Lancer chips support VEB mode only */
5015         if (BEx_chip(adapter) || lancer_chip(adapter)) {
5016                 /* VEB is disabled in non-SR-IOV profiles on BE3/Lancer */
5017                 if (!pci_sriov_get_totalvfs(adapter->pdev))
5018                         return 0;
5019                 hsw_mode = PORT_FWD_TYPE_VEB;
5020         } else {
5021                 status = be_cmd_get_hsw_config(adapter, NULL, 0,
5022                                                adapter->if_handle, &hsw_mode,
5023                                                NULL);
5024                 if (status)
5025                         return 0;
5026
5027                 if (hsw_mode == PORT_FWD_TYPE_PASSTHRU)
5028                         return 0;
5029         }
5030
5031         return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
5032                                        hsw_mode == PORT_FWD_TYPE_VEPA ?
5033                                        BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB,
5034                                        0, 0, nlflags, filter_mask, NULL);
5035 }
5036
5037 static struct be_cmd_work *be_alloc_work(struct be_adapter *adapter,
5038                                          void (*func)(struct work_struct *))
5039 {
5040         struct be_cmd_work *work;
5041
5042         work = kzalloc(sizeof(*work), GFP_ATOMIC);
5043         if (!work) {
5044                 dev_err(&adapter->pdev->dev,
5045                         "be_work memory allocation failed\n");
5046                 return NULL;
5047         }
5048
5049         INIT_WORK(&work->work, func);
5050         work->adapter = adapter;
5051         return work;
5052 }
5053
5054 /* VxLAN offload Notes:
5055  *
5056  * The stack defines tunnel offload flags (hw_enc_features) for IP and doesn't
5057  * distinguish various types of transports (VxLAN, GRE, NVGRE ..). So, offload
5058  * is expected to work across all types of IP tunnels once exported. Skyhawk
5059  * supports offloads for either VxLAN or NVGRE, exclusively. So we export VxLAN
5060  * offloads in hw_enc_features only when a VxLAN port is added. If other (non
5061  * VxLAN) tunnels are configured while VxLAN offloads are enabled, offloads for
5062  * those other tunnels are unexported on the fly through ndo_features_check().
5063  *
5064  * Skyhawk supports VxLAN offloads only for one UDP dport. So, if the stack
5065  * adds more than one port, disable offloads and re-enable them again when
5066  * there's only one port left. We maintain a list of ports for this purpose.
5067  */
5068 static void be_work_add_vxlan_port(struct work_struct *work)
5069 {
5070         struct be_cmd_work *cmd_work =
5071                                 container_of(work, struct be_cmd_work, work);
5072         struct be_adapter *adapter = cmd_work->adapter;
5073         struct device *dev = &adapter->pdev->dev;
5074         __be16 port = cmd_work->info.vxlan_port;
5075         struct be_vxlan_port *vxlan_port;
5076         int status;
5077
5078         /* Bump up the alias count if it is an existing port */
5079         list_for_each_entry(vxlan_port, &adapter->vxlan_port_list, list) {
5080                 if (vxlan_port->port == port) {
5081                         vxlan_port->port_aliases++;
5082                         goto done;
5083                 }
5084         }
5085
5086         /* Add a new port to our list. We don't need a lock here since port
5087          * add/delete are done only in the context of a single-threaded work
5088          * queue (be_wq).
5089          */
5090         vxlan_port = kzalloc(sizeof(*vxlan_port), GFP_KERNEL);
5091         if (!vxlan_port)
5092                 goto done;
5093
5094         vxlan_port->port = port;
5095         INIT_LIST_HEAD(&vxlan_port->list);
5096         list_add_tail(&vxlan_port->list, &adapter->vxlan_port_list);
5097         adapter->vxlan_port_count++;
5098
5099         if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS) {
5100                 dev_info(dev,
5101                          "Only one UDP port supported for VxLAN offloads\n");
5102                 dev_info(dev, "Disabling VxLAN offloads\n");
5103                 goto err;
5104         }
5105
5106         if (adapter->vxlan_port_count > 1)
5107                 goto done;
5108
5109         status = be_enable_vxlan_offloads(adapter);
5110         if (!status)
5111                 goto done;
5112
5113 err:
5114         be_disable_vxlan_offloads(adapter);
5115 done:
5116         kfree(cmd_work);
5117         return;
5118 }
5119
5120 static void be_work_del_vxlan_port(struct work_struct *work)
5121 {
5122         struct be_cmd_work *cmd_work =
5123                                 container_of(work, struct be_cmd_work, work);
5124         struct be_adapter *adapter = cmd_work->adapter;
5125         __be16 port = cmd_work->info.vxlan_port;
5126         struct be_vxlan_port *vxlan_port;
5127
5128         /* Nothing to be done if a port alias is being deleted */
5129         list_for_each_entry(vxlan_port, &adapter->vxlan_port_list, list) {
5130                 if (vxlan_port->port == port) {
5131                         if (vxlan_port->port_aliases) {
5132                                 vxlan_port->port_aliases--;
5133                                 goto done;
5134                         }
5135                         break;
5136                 }
5137         }
5138
5139         /* No port aliases left; delete the port from the list */
5140         list_del(&vxlan_port->list);
5141         adapter->vxlan_port_count--;
5142
5143         /* Disable VxLAN offload if this is the offloaded port */
5144         if (adapter->vxlan_port == vxlan_port->port) {
5145                 WARN_ON(adapter->vxlan_port_count);
5146                 be_disable_vxlan_offloads(adapter);
5147                 dev_info(&adapter->pdev->dev,
5148                          "Disabled VxLAN offloads for UDP port %d\n",
5149                          be16_to_cpu(port));
5150                 goto out;
5151         }
5152
5153         /* If only 1 port is left, re-enable VxLAN offload */
5154         if (adapter->vxlan_port_count == 1)
5155                 be_enable_vxlan_offloads(adapter);
5156
5157 out:
5158         kfree(vxlan_port);
5159 done:
5160         kfree(cmd_work);
5161 }
5162
5163 static void be_cfg_vxlan_port(struct net_device *netdev,
5164                               struct udp_tunnel_info *ti,
5165                               void (*func)(struct work_struct *))
5166 {
5167         struct be_adapter *adapter = netdev_priv(netdev);
5168         struct be_cmd_work *cmd_work;
5169
5170         if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
5171                 return;
5172
5173         if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter))
5174                 return;
5175
5176         cmd_work = be_alloc_work(adapter, func);
5177         if (cmd_work) {
5178                 cmd_work->info.vxlan_port = ti->port;
5179                 queue_work(be_wq, &cmd_work->work);
5180         }
5181 }
5182
5183 static void be_del_vxlan_port(struct net_device *netdev,
5184                               struct udp_tunnel_info *ti)
5185 {
5186         be_cfg_vxlan_port(netdev, ti, be_work_del_vxlan_port);
5187 }
5188
5189 static void be_add_vxlan_port(struct net_device *netdev,
5190                               struct udp_tunnel_info *ti)
5191 {
5192         be_cfg_vxlan_port(netdev, ti, be_work_add_vxlan_port);
5193 }
5194
5195 static netdev_features_t be_features_check(struct sk_buff *skb,
5196                                            struct net_device *dev,
5197                                            netdev_features_t features)
5198 {
5199         struct be_adapter *adapter = netdev_priv(dev);
5200         u8 l4_hdr = 0;
5201
5202         if (skb_is_gso(skb)) {
5203                 /* IPv6 TSO requests with extension hdrs are a problem
5204                  * to Lancer and BE3 HW. Disable TSO6 feature.
5205                  */
5206                 if (!skyhawk_chip(adapter) && is_ipv6_ext_hdr(skb))
5207                         features &= ~NETIF_F_TSO6;
5208
5209                 /* Lancer cannot handle the packet with MSS less than 256.
5210                  * Also it can't handle a TSO packet with a single segment
5211                  * Disable the GSO support in such cases
5212                  */
5213                 if (lancer_chip(adapter) &&
5214                     (skb_shinfo(skb)->gso_size < 256 ||
5215                      skb_shinfo(skb)->gso_segs == 1))
5216                         features &= ~NETIF_F_GSO_MASK;
5217         }
5218
5219         /* The code below restricts offload features for some tunneled and
5220          * Q-in-Q packets.
5221          * Offload features for normal (non tunnel) packets are unchanged.
5222          */
5223         features = vlan_features_check(skb, features);
5224         if (!skb->encapsulation ||
5225             !(adapter->flags & BE_FLAGS_VXLAN_OFFLOADS))
5226                 return features;
5227
5228         /* It's an encapsulated packet and VxLAN offloads are enabled. We
5229          * should disable tunnel offload features if it's not a VxLAN packet,
5230          * as tunnel offloads have been enabled only for VxLAN. This is done to
5231          * allow other tunneled traffic like GRE work fine while VxLAN
5232          * offloads are configured in Skyhawk-R.
5233          */
5234         switch (vlan_get_protocol(skb)) {
5235         case htons(ETH_P_IP):
5236                 l4_hdr = ip_hdr(skb)->protocol;
5237                 break;
5238         case htons(ETH_P_IPV6):
5239                 l4_hdr = ipv6_hdr(skb)->nexthdr;
5240                 break;
5241         default:
5242                 return features;
5243         }
5244
5245         if (l4_hdr != IPPROTO_UDP ||
5246             skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
5247             skb->inner_protocol != htons(ETH_P_TEB) ||
5248             skb_inner_mac_header(skb) - skb_transport_header(skb) !=
5249                 sizeof(struct udphdr) + sizeof(struct vxlanhdr) ||
5250             !adapter->vxlan_port ||
5251             udp_hdr(skb)->dest != adapter->vxlan_port)
5252                 return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
5253
5254         return features;
5255 }
5256
5257 static int be_get_phys_port_id(struct net_device *dev,
5258                                struct netdev_phys_item_id *ppid)
5259 {
5260         int i, id_len = CNTL_SERIAL_NUM_WORDS * CNTL_SERIAL_NUM_WORD_SZ + 1;
5261         struct be_adapter *adapter = netdev_priv(dev);
5262         u8 *id;
5263
5264         if (MAX_PHYS_ITEM_ID_LEN < id_len)
5265                 return -ENOSPC;
5266
5267         ppid->id[0] = adapter->hba_port_num + 1;
5268         id = &ppid->id[1];
5269         for (i = CNTL_SERIAL_NUM_WORDS - 1; i >= 0;
5270              i--, id += CNTL_SERIAL_NUM_WORD_SZ)
5271                 memcpy(id, &adapter->serial_num[i], CNTL_SERIAL_NUM_WORD_SZ);
5272
5273         ppid->id_len = id_len;
5274
5275         return 0;
5276 }
5277
5278 static void be_set_rx_mode(struct net_device *dev)
5279 {
5280         struct be_adapter *adapter = netdev_priv(dev);
5281         struct be_cmd_work *work;
5282
5283         work = be_alloc_work(adapter, be_work_set_rx_mode);
5284         if (work)
5285                 queue_work(be_wq, &work->work);
5286 }
5287
5288 static const struct net_device_ops be_netdev_ops = {
5289         .ndo_open               = be_open,
5290         .ndo_stop               = be_close,
5291         .ndo_start_xmit         = be_xmit,
5292         .ndo_set_rx_mode        = be_set_rx_mode,
5293         .ndo_set_mac_address    = be_mac_addr_set,
5294         .ndo_get_stats64        = be_get_stats64,
5295         .ndo_validate_addr      = eth_validate_addr,
5296         .ndo_vlan_rx_add_vid    = be_vlan_add_vid,
5297         .ndo_vlan_rx_kill_vid   = be_vlan_rem_vid,
5298         .ndo_set_vf_mac         = be_set_vf_mac,
5299         .ndo_set_vf_vlan        = be_set_vf_vlan,
5300         .ndo_set_vf_rate        = be_set_vf_tx_rate,
5301         .ndo_get_vf_config      = be_get_vf_config,
5302         .ndo_set_vf_link_state  = be_set_vf_link_state,
5303         .ndo_set_vf_spoofchk    = be_set_vf_spoofchk,
5304         .ndo_tx_timeout         = be_tx_timeout,
5305 #ifdef CONFIG_NET_POLL_CONTROLLER
5306         .ndo_poll_controller    = be_netpoll,
5307 #endif
5308         .ndo_bridge_setlink     = be_ndo_bridge_setlink,
5309         .ndo_bridge_getlink     = be_ndo_bridge_getlink,
5310         .ndo_udp_tunnel_add     = be_add_vxlan_port,
5311         .ndo_udp_tunnel_del     = be_del_vxlan_port,
5312         .ndo_features_check     = be_features_check,
5313         .ndo_get_phys_port_id   = be_get_phys_port_id,
5314 };
5315
5316 static void be_netdev_init(struct net_device *netdev)
5317 {
5318         struct be_adapter *adapter = netdev_priv(netdev);
5319
5320         netdev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5321                 NETIF_F_GSO_UDP_TUNNEL |
5322                 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
5323                 NETIF_F_HW_VLAN_CTAG_TX;
5324         if ((be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS))
5325                 netdev->hw_features |= NETIF_F_RXHASH;
5326
5327         netdev->features |= netdev->hw_features |
5328                 NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER;
5329
5330         netdev->vlan_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5331                 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
5332
5333         netdev->priv_flags |= IFF_UNICAST_FLT;
5334
5335         netdev->flags |= IFF_MULTICAST;
5336
5337         netif_set_gso_max_size(netdev, BE_MAX_GSO_SIZE - ETH_HLEN);
5338
5339         netdev->netdev_ops = &be_netdev_ops;
5340
5341         netdev->ethtool_ops = &be_ethtool_ops;
5342
5343         /* MTU range: 256 - 9000 */
5344         netdev->min_mtu = BE_MIN_MTU;
5345         netdev->max_mtu = BE_MAX_MTU;
5346 }
5347
5348 static void be_cleanup(struct be_adapter *adapter)
5349 {
5350         struct net_device *netdev = adapter->netdev;
5351
5352         rtnl_lock();
5353         netif_device_detach(netdev);
5354         if (netif_running(netdev))
5355                 be_close(netdev);
5356         rtnl_unlock();
5357
5358         be_clear(adapter);
5359 }
5360
5361 static int be_resume(struct be_adapter *adapter)
5362 {
5363         struct net_device *netdev = adapter->netdev;
5364         int status;
5365
5366         status = be_setup(adapter);
5367         if (status)
5368                 return status;
5369
5370         rtnl_lock();
5371         if (netif_running(netdev))
5372                 status = be_open(netdev);
5373         rtnl_unlock();
5374
5375         if (status)
5376                 return status;
5377
5378         netif_device_attach(netdev);
5379
5380         return 0;
5381 }
5382
5383 static void be_soft_reset(struct be_adapter *adapter)
5384 {
5385         u32 val;
5386
5387         dev_info(&adapter->pdev->dev, "Initiating chip soft reset\n");
5388         val = ioread32(adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5389         val |= SLIPORT_SOFTRESET_SR_MASK;
5390         iowrite32(val, adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5391 }
5392
5393 static bool be_err_is_recoverable(struct be_adapter *adapter)
5394 {
5395         struct be_error_recovery *err_rec = &adapter->error_recovery;
5396         unsigned long initial_idle_time =
5397                 msecs_to_jiffies(ERR_RECOVERY_IDLE_TIME);
5398         unsigned long recovery_interval =
5399                 msecs_to_jiffies(ERR_RECOVERY_INTERVAL);
5400         u16 ue_err_code;
5401         u32 val;
5402
5403         val = be_POST_stage_get(adapter);
5404         if ((val & POST_STAGE_RECOVERABLE_ERR) != POST_STAGE_RECOVERABLE_ERR)
5405                 return false;
5406         ue_err_code = val & POST_ERR_RECOVERY_CODE_MASK;
5407         if (ue_err_code == 0)
5408                 return false;
5409
5410         dev_err(&adapter->pdev->dev, "Recoverable HW error code: 0x%x\n",
5411                 ue_err_code);
5412
5413         if (time_before_eq(jiffies - err_rec->probe_time, initial_idle_time)) {
5414                 dev_err(&adapter->pdev->dev,
5415                         "Cannot recover within %lu sec from driver load\n",
5416                         jiffies_to_msecs(initial_idle_time) / MSEC_PER_SEC);
5417                 return false;
5418         }
5419
5420         if (err_rec->last_recovery_time && time_before_eq(
5421                 jiffies - err_rec->last_recovery_time, recovery_interval)) {
5422                 dev_err(&adapter->pdev->dev,
5423                         "Cannot recover within %lu sec from last recovery\n",
5424                         jiffies_to_msecs(recovery_interval) / MSEC_PER_SEC);
5425                 return false;
5426         }
5427
5428         if (ue_err_code == err_rec->last_err_code) {
5429                 dev_err(&adapter->pdev->dev,
5430                         "Cannot recover from a consecutive TPE error\n");
5431                 return false;
5432         }
5433
5434         err_rec->last_recovery_time = jiffies;
5435         err_rec->last_err_code = ue_err_code;
5436         return true;
5437 }
5438
5439 static int be_tpe_recover(struct be_adapter *adapter)
5440 {
5441         struct be_error_recovery *err_rec = &adapter->error_recovery;
5442         int status = -EAGAIN;
5443         u32 val;
5444
5445         switch (err_rec->recovery_state) {
5446         case ERR_RECOVERY_ST_NONE:
5447                 err_rec->recovery_state = ERR_RECOVERY_ST_DETECT;
5448                 err_rec->resched_delay = ERR_RECOVERY_UE_DETECT_DURATION;
5449                 break;
5450
5451         case ERR_RECOVERY_ST_DETECT:
5452                 val = be_POST_stage_get(adapter);
5453                 if ((val & POST_STAGE_RECOVERABLE_ERR) !=
5454                     POST_STAGE_RECOVERABLE_ERR) {
5455                         dev_err(&adapter->pdev->dev,
5456                                 "Unrecoverable HW error detected: 0x%x\n", val);
5457                         status = -EINVAL;
5458                         err_rec->resched_delay = 0;
5459                         break;
5460                 }
5461
5462                 dev_err(&adapter->pdev->dev, "Recoverable HW error detected\n");
5463
5464                 /* Only PF0 initiates Chip Soft Reset. But PF0 must wait UE2SR
5465                  * milliseconds before it checks for final error status in
5466                  * SLIPORT_SEMAPHORE to determine if recovery criteria is met.
5467                  * If it does, then PF0 initiates a Soft Reset.
5468                  */
5469                 if (adapter->pf_num == 0) {
5470                         err_rec->recovery_state = ERR_RECOVERY_ST_RESET;
5471                         err_rec->resched_delay = err_rec->ue_to_reset_time -
5472                                         ERR_RECOVERY_UE_DETECT_DURATION;
5473                         break;
5474                 }
5475
5476                 err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5477                 err_rec->resched_delay = err_rec->ue_to_poll_time -
5478                                         ERR_RECOVERY_UE_DETECT_DURATION;
5479                 break;
5480
5481         case ERR_RECOVERY_ST_RESET:
5482                 if (!be_err_is_recoverable(adapter)) {
5483                         dev_err(&adapter->pdev->dev,
5484                                 "Failed to meet recovery criteria\n");
5485                         status = -EIO;
5486                         err_rec->resched_delay = 0;
5487                         break;
5488                 }
5489                 be_soft_reset(adapter);
5490                 err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5491                 err_rec->resched_delay = err_rec->ue_to_poll_time -
5492                                         err_rec->ue_to_reset_time;
5493                 break;
5494
5495         case ERR_RECOVERY_ST_PRE_POLL:
5496                 err_rec->recovery_state = ERR_RECOVERY_ST_REINIT;
5497                 err_rec->resched_delay = 0;
5498                 status = 0;                     /* done */
5499                 break;
5500
5501         default:
5502                 status = -EINVAL;
5503                 err_rec->resched_delay = 0;
5504                 break;
5505         }
5506
5507         return status;
5508 }
5509
5510 static int be_err_recover(struct be_adapter *adapter)
5511 {
5512         int status;
5513
5514         if (!lancer_chip(adapter)) {
5515                 if (!adapter->error_recovery.recovery_supported ||
5516                     adapter->priv_flags & BE_DISABLE_TPE_RECOVERY)
5517                         return -EIO;
5518                 status = be_tpe_recover(adapter);
5519                 if (status)
5520                         goto err;
5521         }
5522
5523         /* Wait for adapter to reach quiescent state before
5524          * destroying queues
5525          */
5526         status = be_fw_wait_ready(adapter);
5527         if (status)
5528                 goto err;
5529
5530         adapter->flags |= BE_FLAGS_TRY_RECOVERY;
5531
5532         be_cleanup(adapter);
5533
5534         status = be_resume(adapter);
5535         if (status)
5536                 goto err;
5537
5538         adapter->flags &= ~BE_FLAGS_TRY_RECOVERY;
5539
5540 err:
5541         return status;
5542 }
5543
5544 static void be_err_detection_task(struct work_struct *work)
5545 {
5546         struct be_error_recovery *err_rec =
5547                         container_of(work, struct be_error_recovery,
5548                                      err_detection_work.work);
5549         struct be_adapter *adapter =
5550                         container_of(err_rec, struct be_adapter,
5551                                      error_recovery);
5552         u32 resched_delay = ERR_RECOVERY_DETECTION_DELAY;
5553         struct device *dev = &adapter->pdev->dev;
5554         int recovery_status;
5555
5556         be_detect_error(adapter);
5557         if (!be_check_error(adapter, BE_ERROR_HW))
5558                 goto reschedule_task;
5559
5560         recovery_status = be_err_recover(adapter);
5561         if (!recovery_status) {
5562                 err_rec->recovery_retries = 0;
5563                 err_rec->recovery_state = ERR_RECOVERY_ST_NONE;
5564                 dev_info(dev, "Adapter recovery successful\n");
5565                 goto reschedule_task;
5566         } else if (!lancer_chip(adapter) && err_rec->resched_delay) {
5567                 /* BEx/SH recovery state machine */
5568                 if (adapter->pf_num == 0 &&
5569                     err_rec->recovery_state > ERR_RECOVERY_ST_DETECT)
5570                         dev_err(&adapter->pdev->dev,
5571                                 "Adapter recovery in progress\n");
5572                 resched_delay = err_rec->resched_delay;
5573                 goto reschedule_task;
5574         } else if (lancer_chip(adapter) && be_virtfn(adapter)) {
5575                 /* For VFs, check if PF have allocated resources
5576                  * every second.
5577                  */
5578                 dev_err(dev, "Re-trying adapter recovery\n");
5579                 goto reschedule_task;
5580         } else if (lancer_chip(adapter) && err_rec->recovery_retries++ <
5581                    ERR_RECOVERY_MAX_RETRY_COUNT) {
5582                 /* In case of another error during recovery, it takes 30 sec
5583                  * for adapter to come out of error. Retry error recovery after
5584                  * this time interval.
5585                  */
5586                 dev_err(&adapter->pdev->dev, "Re-trying adapter recovery\n");
5587                 resched_delay = ERR_RECOVERY_RETRY_DELAY;
5588                 goto reschedule_task;
5589         } else {
5590                 dev_err(dev, "Adapter recovery failed\n");
5591                 dev_err(dev, "Please reboot server to recover\n");
5592         }
5593
5594         return;
5595
5596 reschedule_task:
5597         be_schedule_err_detection(adapter, resched_delay);
5598 }
5599
5600 static void be_log_sfp_info(struct be_adapter *adapter)
5601 {
5602         int status;
5603
5604         status = be_cmd_query_sfp_info(adapter);
5605         if (!status) {
5606                 dev_err(&adapter->pdev->dev,
5607                         "Port %c: %s Vendor: %s part no: %s",
5608                         adapter->port_name,
5609                         be_misconfig_evt_port_state[adapter->phy_state],
5610                         adapter->phy.vendor_name,
5611                         adapter->phy.vendor_pn);
5612         }
5613         adapter->flags &= ~BE_FLAGS_PHY_MISCONFIGURED;
5614 }
5615
5616 static void be_worker(struct work_struct *work)
5617 {
5618         struct be_adapter *adapter =
5619                 container_of(work, struct be_adapter, work.work);
5620         struct be_rx_obj *rxo;
5621         int i;
5622
5623         if (be_physfn(adapter) &&
5624             MODULO(adapter->work_counter, adapter->be_get_temp_freq) == 0)
5625                 be_cmd_get_die_temperature(adapter);
5626
5627         /* when interrupts are not yet enabled, just reap any pending
5628          * mcc completions
5629          */
5630         if (!netif_running(adapter->netdev)) {
5631                 local_bh_disable();
5632                 be_process_mcc(adapter);
5633                 local_bh_enable();
5634                 goto reschedule;
5635         }
5636
5637         if (!adapter->stats_cmd_sent) {
5638                 if (lancer_chip(adapter))
5639                         lancer_cmd_get_pport_stats(adapter,
5640                                                    &adapter->stats_cmd);
5641                 else
5642                         be_cmd_get_stats(adapter, &adapter->stats_cmd);
5643         }
5644
5645         for_all_rx_queues(adapter, rxo, i) {
5646                 /* Replenish RX-queues starved due to memory
5647                  * allocation failures.
5648                  */
5649                 if (rxo->rx_post_starved)
5650                         be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST);
5651         }
5652
5653         /* EQ-delay update for Skyhawk is done while notifying EQ */
5654         if (!skyhawk_chip(adapter))
5655                 be_eqd_update(adapter, false);
5656
5657         if (adapter->flags & BE_FLAGS_PHY_MISCONFIGURED)
5658                 be_log_sfp_info(adapter);
5659
5660 reschedule:
5661         adapter->work_counter++;
5662         queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
5663 }
5664
5665 static void be_unmap_pci_bars(struct be_adapter *adapter)
5666 {
5667         if (adapter->csr)
5668                 pci_iounmap(adapter->pdev, adapter->csr);
5669         if (adapter->db)
5670                 pci_iounmap(adapter->pdev, adapter->db);
5671         if (adapter->pcicfg && adapter->pcicfg_mapped)
5672                 pci_iounmap(adapter->pdev, adapter->pcicfg);
5673 }
5674
5675 static int db_bar(struct be_adapter *adapter)
5676 {
5677         if (lancer_chip(adapter) || be_virtfn(adapter))
5678                 return 0;
5679         else
5680                 return 4;
5681 }
5682
5683 static int be_roce_map_pci_bars(struct be_adapter *adapter)
5684 {
5685         if (skyhawk_chip(adapter)) {
5686                 adapter->roce_db.size = 4096;
5687                 adapter->roce_db.io_addr = pci_resource_start(adapter->pdev,
5688                                                               db_bar(adapter));
5689                 adapter->roce_db.total_size = pci_resource_len(adapter->pdev,
5690                                                                db_bar(adapter));
5691         }
5692         return 0;
5693 }
5694
5695 static int be_map_pci_bars(struct be_adapter *adapter)
5696 {
5697         struct pci_dev *pdev = adapter->pdev;
5698         u8 __iomem *addr;
5699         u32 sli_intf;
5700
5701         pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf);
5702         adapter->sli_family = (sli_intf & SLI_INTF_FAMILY_MASK) >>
5703                                 SLI_INTF_FAMILY_SHIFT;
5704         adapter->virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0;
5705
5706         if (BEx_chip(adapter) && be_physfn(adapter)) {
5707                 adapter->csr = pci_iomap(pdev, 2, 0);
5708                 if (!adapter->csr)
5709                         return -ENOMEM;
5710         }
5711
5712         addr = pci_iomap(pdev, db_bar(adapter), 0);
5713         if (!addr)
5714                 goto pci_map_err;
5715         adapter->db = addr;
5716
5717         if (skyhawk_chip(adapter) || BEx_chip(adapter)) {
5718                 if (be_physfn(adapter)) {
5719                         /* PCICFG is the 2nd BAR in BE2 */
5720                         addr = pci_iomap(pdev, BE2_chip(adapter) ? 1 : 0, 0);
5721                         if (!addr)
5722                                 goto pci_map_err;
5723                         adapter->pcicfg = addr;
5724                         adapter->pcicfg_mapped = true;
5725                 } else {
5726                         adapter->pcicfg = adapter->db + SRIOV_VF_PCICFG_OFFSET;
5727                         adapter->pcicfg_mapped = false;
5728                 }
5729         }
5730
5731         be_roce_map_pci_bars(adapter);
5732         return 0;
5733
5734 pci_map_err:
5735         dev_err(&pdev->dev, "Error in mapping PCI BARs\n");
5736         be_unmap_pci_bars(adapter);
5737         return -ENOMEM;
5738 }
5739
5740 static void be_drv_cleanup(struct be_adapter *adapter)
5741 {
5742         struct be_dma_mem *mem = &adapter->mbox_mem_alloced;
5743         struct device *dev = &adapter->pdev->dev;
5744
5745         if (mem->va)
5746                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5747
5748         mem = &adapter->rx_filter;
5749         if (mem->va)
5750                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5751
5752         mem = &adapter->stats_cmd;
5753         if (mem->va)
5754                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5755 }
5756
5757 /* Allocate and initialize various fields in be_adapter struct */
5758 static int be_drv_init(struct be_adapter *adapter)
5759 {
5760         struct be_dma_mem *mbox_mem_alloc = &adapter->mbox_mem_alloced;
5761         struct be_dma_mem *mbox_mem_align = &adapter->mbox_mem;
5762         struct be_dma_mem *rx_filter = &adapter->rx_filter;
5763         struct be_dma_mem *stats_cmd = &adapter->stats_cmd;
5764         struct device *dev = &adapter->pdev->dev;
5765         int status = 0;
5766
5767         mbox_mem_alloc->size = sizeof(struct be_mcc_mailbox) + 16;
5768         mbox_mem_alloc->va = dma_zalloc_coherent(dev, mbox_mem_alloc->size,
5769                                                  &mbox_mem_alloc->dma,
5770                                                  GFP_KERNEL);
5771         if (!mbox_mem_alloc->va)
5772                 return -ENOMEM;
5773
5774         mbox_mem_align->size = sizeof(struct be_mcc_mailbox);
5775         mbox_mem_align->va = PTR_ALIGN(mbox_mem_alloc->va, 16);
5776         mbox_mem_align->dma = PTR_ALIGN(mbox_mem_alloc->dma, 16);
5777
5778         rx_filter->size = sizeof(struct be_cmd_req_rx_filter);
5779         rx_filter->va = dma_zalloc_coherent(dev, rx_filter->size,
5780                                             &rx_filter->dma, GFP_KERNEL);
5781         if (!rx_filter->va) {
5782                 status = -ENOMEM;
5783                 goto free_mbox;
5784         }
5785
5786         if (lancer_chip(adapter))
5787                 stats_cmd->size = sizeof(struct lancer_cmd_req_pport_stats);
5788         else if (BE2_chip(adapter))
5789                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v0);
5790         else if (BE3_chip(adapter))
5791                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v1);
5792         else
5793                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v2);
5794         stats_cmd->va = dma_zalloc_coherent(dev, stats_cmd->size,
5795                                             &stats_cmd->dma, GFP_KERNEL);
5796         if (!stats_cmd->va) {
5797                 status = -ENOMEM;
5798                 goto free_rx_filter;
5799         }
5800
5801         mutex_init(&adapter->mbox_lock);
5802         mutex_init(&adapter->mcc_lock);
5803         mutex_init(&adapter->rx_filter_lock);
5804         spin_lock_init(&adapter->mcc_cq_lock);
5805         init_completion(&adapter->et_cmd_compl);
5806
5807         pci_save_state(adapter->pdev);
5808
5809         INIT_DELAYED_WORK(&adapter->work, be_worker);
5810
5811         adapter->error_recovery.recovery_state = ERR_RECOVERY_ST_NONE;
5812         adapter->error_recovery.resched_delay = 0;
5813         INIT_DELAYED_WORK(&adapter->error_recovery.err_detection_work,
5814                           be_err_detection_task);
5815
5816         adapter->rx_fc = true;
5817         adapter->tx_fc = true;
5818
5819         /* Must be a power of 2 or else MODULO will BUG_ON */
5820         adapter->be_get_temp_freq = 64;
5821
5822         INIT_LIST_HEAD(&adapter->vxlan_port_list);
5823         return 0;
5824
5825 free_rx_filter:
5826         dma_free_coherent(dev, rx_filter->size, rx_filter->va, rx_filter->dma);
5827 free_mbox:
5828         dma_free_coherent(dev, mbox_mem_alloc->size, mbox_mem_alloc->va,
5829                           mbox_mem_alloc->dma);
5830         return status;
5831 }
5832
5833 static void be_remove(struct pci_dev *pdev)
5834 {
5835         struct be_adapter *adapter = pci_get_drvdata(pdev);
5836
5837         if (!adapter)
5838                 return;
5839
5840         be_roce_dev_remove(adapter);
5841         be_intr_set(adapter, false);
5842
5843         be_cancel_err_detection(adapter);
5844
5845         unregister_netdev(adapter->netdev);
5846
5847         be_clear(adapter);
5848
5849         if (!pci_vfs_assigned(adapter->pdev))
5850                 be_cmd_reset_function(adapter);
5851
5852         /* tell fw we're done with firing cmds */
5853         be_cmd_fw_clean(adapter);
5854
5855         be_unmap_pci_bars(adapter);
5856         be_drv_cleanup(adapter);
5857
5858         pci_disable_pcie_error_reporting(pdev);
5859
5860         pci_release_regions(pdev);
5861         pci_disable_device(pdev);
5862
5863         free_netdev(adapter->netdev);
5864 }
5865
5866 static ssize_t be_hwmon_show_temp(struct device *dev,
5867                                   struct device_attribute *dev_attr,
5868                                   char *buf)
5869 {
5870         struct be_adapter *adapter = dev_get_drvdata(dev);
5871
5872         /* Unit: millidegree Celsius */
5873         if (adapter->hwmon_info.be_on_die_temp == BE_INVALID_DIE_TEMP)
5874                 return -EIO;
5875         else
5876                 return sprintf(buf, "%u\n",
5877                                adapter->hwmon_info.be_on_die_temp * 1000);
5878 }
5879
5880 static SENSOR_DEVICE_ATTR(temp1_input, 0444,
5881                           be_hwmon_show_temp, NULL, 1);
5882
5883 static struct attribute *be_hwmon_attrs[] = {
5884         &sensor_dev_attr_temp1_input.dev_attr.attr,
5885         NULL
5886 };
5887
5888 ATTRIBUTE_GROUPS(be_hwmon);
5889
5890 static char *mc_name(struct be_adapter *adapter)
5891 {
5892         char *str = ""; /* default */
5893
5894         switch (adapter->mc_type) {
5895         case UMC:
5896                 str = "UMC";
5897                 break;
5898         case FLEX10:
5899                 str = "FLEX10";
5900                 break;
5901         case vNIC1:
5902                 str = "vNIC-1";
5903                 break;
5904         case nPAR:
5905                 str = "nPAR";
5906                 break;
5907         case UFP:
5908                 str = "UFP";
5909                 break;
5910         case vNIC2:
5911                 str = "vNIC-2";
5912                 break;
5913         default:
5914                 str = "";
5915         }
5916
5917         return str;
5918 }
5919
5920 static inline char *func_name(struct be_adapter *adapter)
5921 {
5922         return be_physfn(adapter) ? "PF" : "VF";
5923 }
5924
5925 static inline char *nic_name(struct pci_dev *pdev)
5926 {
5927         switch (pdev->device) {
5928         case OC_DEVICE_ID1:
5929                 return OC_NAME;
5930         case OC_DEVICE_ID2:
5931                 return OC_NAME_BE;
5932         case OC_DEVICE_ID3:
5933         case OC_DEVICE_ID4:
5934                 return OC_NAME_LANCER;
5935         case BE_DEVICE_ID2:
5936                 return BE3_NAME;
5937         case OC_DEVICE_ID5:
5938         case OC_DEVICE_ID6:
5939                 return OC_NAME_SH;
5940         default:
5941                 return BE_NAME;
5942         }
5943 }
5944
5945 static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id)
5946 {
5947         struct be_adapter *adapter;
5948         struct net_device *netdev;
5949         int status = 0;
5950
5951         dev_info(&pdev->dev, "%s version is %s\n", DRV_NAME, DRV_VER);
5952
5953         status = pci_enable_device(pdev);
5954         if (status)
5955                 goto do_none;
5956
5957         status = pci_request_regions(pdev, DRV_NAME);
5958         if (status)
5959                 goto disable_dev;
5960         pci_set_master(pdev);
5961
5962         netdev = alloc_etherdev_mqs(sizeof(*adapter), MAX_TX_QS, MAX_RX_QS);
5963         if (!netdev) {
5964                 status = -ENOMEM;
5965                 goto rel_reg;
5966         }
5967         adapter = netdev_priv(netdev);
5968         adapter->pdev = pdev;
5969         pci_set_drvdata(pdev, adapter);
5970         adapter->netdev = netdev;
5971         SET_NETDEV_DEV(netdev, &pdev->dev);
5972
5973         status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
5974         if (!status) {
5975                 netdev->features |= NETIF_F_HIGHDMA;
5976         } else {
5977                 status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
5978                 if (status) {
5979                         dev_err(&pdev->dev, "Could not set PCI DMA Mask\n");
5980                         goto free_netdev;
5981                 }
5982         }
5983
5984         status = pci_enable_pcie_error_reporting(pdev);
5985         if (!status)
5986                 dev_info(&pdev->dev, "PCIe error reporting enabled\n");
5987
5988         status = be_map_pci_bars(adapter);
5989         if (status)
5990                 goto free_netdev;
5991
5992         status = be_drv_init(adapter);
5993         if (status)
5994                 goto unmap_bars;
5995
5996         status = be_setup(adapter);
5997         if (status)
5998                 goto drv_cleanup;
5999
6000         be_netdev_init(netdev);
6001         status = register_netdev(netdev);
6002         if (status != 0)
6003                 goto unsetup;
6004
6005         be_roce_dev_add(adapter);
6006
6007         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6008         adapter->error_recovery.probe_time = jiffies;
6009
6010         /* On Die temperature not supported for VF. */
6011         if (be_physfn(adapter) && IS_ENABLED(CONFIG_BE2NET_HWMON)) {
6012                 adapter->hwmon_info.hwmon_dev =
6013                         devm_hwmon_device_register_with_groups(&pdev->dev,
6014                                                                DRV_NAME,
6015                                                                adapter,
6016                                                                be_hwmon_groups);
6017                 adapter->hwmon_info.be_on_die_temp = BE_INVALID_DIE_TEMP;
6018         }
6019
6020         dev_info(&pdev->dev, "%s: %s %s port %c\n", nic_name(pdev),
6021                  func_name(adapter), mc_name(adapter), adapter->port_name);
6022
6023         return 0;
6024
6025 unsetup:
6026         be_clear(adapter);
6027 drv_cleanup:
6028         be_drv_cleanup(adapter);
6029 unmap_bars:
6030         be_unmap_pci_bars(adapter);
6031 free_netdev:
6032         pci_disable_pcie_error_reporting(pdev);
6033         free_netdev(netdev);
6034 rel_reg:
6035         pci_release_regions(pdev);
6036 disable_dev:
6037         pci_disable_device(pdev);
6038 do_none:
6039         dev_err(&pdev->dev, "%s initialization failed\n", nic_name(pdev));
6040         return status;
6041 }
6042
6043 static int be_suspend(struct pci_dev *pdev, pm_message_t state)
6044 {
6045         struct be_adapter *adapter = pci_get_drvdata(pdev);
6046
6047         be_intr_set(adapter, false);
6048         be_cancel_err_detection(adapter);
6049
6050         be_cleanup(adapter);
6051
6052         pci_save_state(pdev);
6053         pci_disable_device(pdev);
6054         pci_set_power_state(pdev, pci_choose_state(pdev, state));
6055         return 0;
6056 }
6057
6058 static int be_pci_resume(struct pci_dev *pdev)
6059 {
6060         struct be_adapter *adapter = pci_get_drvdata(pdev);
6061         int status = 0;
6062
6063         status = pci_enable_device(pdev);
6064         if (status)
6065                 return status;
6066
6067         pci_restore_state(pdev);
6068
6069         status = be_resume(adapter);
6070         if (status)
6071                 return status;
6072
6073         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6074
6075         return 0;
6076 }
6077
6078 /*
6079  * An FLR will stop BE from DMAing any data.
6080  */
6081 static void be_shutdown(struct pci_dev *pdev)
6082 {
6083         struct be_adapter *adapter = pci_get_drvdata(pdev);
6084
6085         if (!adapter)
6086                 return;
6087
6088         be_roce_dev_shutdown(adapter);
6089         cancel_delayed_work_sync(&adapter->work);
6090         be_cancel_err_detection(adapter);
6091
6092         netif_device_detach(adapter->netdev);
6093
6094         be_cmd_reset_function(adapter);
6095
6096         pci_disable_device(pdev);
6097 }
6098
6099 static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev,
6100                                             pci_channel_state_t state)
6101 {
6102         struct be_adapter *adapter = pci_get_drvdata(pdev);
6103
6104         dev_err(&adapter->pdev->dev, "EEH error detected\n");
6105
6106         be_roce_dev_remove(adapter);
6107
6108         if (!be_check_error(adapter, BE_ERROR_EEH)) {
6109                 be_set_error(adapter, BE_ERROR_EEH);
6110
6111                 be_cancel_err_detection(adapter);
6112
6113                 be_cleanup(adapter);
6114         }
6115
6116         if (state == pci_channel_io_perm_failure)
6117                 return PCI_ERS_RESULT_DISCONNECT;
6118
6119         pci_disable_device(pdev);
6120
6121         /* The error could cause the FW to trigger a flash debug dump.
6122          * Resetting the card while flash dump is in progress
6123          * can cause it not to recover; wait for it to finish.
6124          * Wait only for first function as it is needed only once per
6125          * adapter.
6126          */
6127         if (pdev->devfn == 0)
6128                 ssleep(30);
6129
6130         return PCI_ERS_RESULT_NEED_RESET;
6131 }
6132
6133 static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev)
6134 {
6135         struct be_adapter *adapter = pci_get_drvdata(pdev);
6136         int status;
6137
6138         dev_info(&adapter->pdev->dev, "EEH reset\n");
6139
6140         status = pci_enable_device(pdev);
6141         if (status)
6142                 return PCI_ERS_RESULT_DISCONNECT;
6143
6144         pci_set_master(pdev);
6145         pci_restore_state(pdev);
6146
6147         /* Check if card is ok and fw is ready */
6148         dev_info(&adapter->pdev->dev,
6149                  "Waiting for FW to be ready after EEH reset\n");
6150         status = be_fw_wait_ready(adapter);
6151         if (status)
6152                 return PCI_ERS_RESULT_DISCONNECT;
6153
6154         pci_cleanup_aer_uncorrect_error_status(pdev);
6155         be_clear_error(adapter, BE_CLEAR_ALL);
6156         return PCI_ERS_RESULT_RECOVERED;
6157 }
6158
6159 static void be_eeh_resume(struct pci_dev *pdev)
6160 {
6161         int status = 0;
6162         struct be_adapter *adapter = pci_get_drvdata(pdev);
6163
6164         dev_info(&adapter->pdev->dev, "EEH resume\n");
6165
6166         pci_save_state(pdev);
6167
6168         status = be_resume(adapter);
6169         if (status)
6170                 goto err;
6171
6172         be_roce_dev_add(adapter);
6173
6174         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6175         return;
6176 err:
6177         dev_err(&adapter->pdev->dev, "EEH resume failed\n");
6178 }
6179
6180 static int be_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
6181 {
6182         struct be_adapter *adapter = pci_get_drvdata(pdev);
6183         struct be_resources vft_res = {0};
6184         int status;
6185
6186         if (!num_vfs)
6187                 be_vf_clear(adapter);
6188
6189         adapter->num_vfs = num_vfs;
6190
6191         if (adapter->num_vfs == 0 && pci_vfs_assigned(pdev)) {
6192                 dev_warn(&pdev->dev,
6193                          "Cannot disable VFs while they are assigned\n");
6194                 return -EBUSY;
6195         }
6196
6197         /* When the HW is in SRIOV capable configuration, the PF-pool resources
6198          * are equally distributed across the max-number of VFs. The user may
6199          * request only a subset of the max-vfs to be enabled.
6200          * Based on num_vfs, redistribute the resources across num_vfs so that
6201          * each VF will have access to more number of resources.
6202          * This facility is not available in BE3 FW.
6203          * Also, this is done by FW in Lancer chip.
6204          */
6205         if (skyhawk_chip(adapter) && !pci_num_vf(pdev)) {
6206                 be_calculate_vf_res(adapter, adapter->num_vfs,
6207                                     &vft_res);
6208                 status = be_cmd_set_sriov_config(adapter, adapter->pool_res,
6209                                                  adapter->num_vfs, &vft_res);
6210                 if (status)
6211                         dev_err(&pdev->dev,
6212                                 "Failed to optimize SR-IOV resources\n");
6213         }
6214
6215         status = be_get_resources(adapter);
6216         if (status)
6217                 return be_cmd_status(status);
6218
6219         /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
6220         rtnl_lock();
6221         status = be_update_queues(adapter);
6222         rtnl_unlock();
6223         if (status)
6224                 return be_cmd_status(status);
6225
6226         if (adapter->num_vfs)
6227                 status = be_vf_setup(adapter);
6228
6229         if (!status)
6230                 return adapter->num_vfs;
6231
6232         return 0;
6233 }
6234
6235 static const struct pci_error_handlers be_eeh_handlers = {
6236         .error_detected = be_eeh_err_detected,
6237         .slot_reset = be_eeh_reset,
6238         .resume = be_eeh_resume,
6239 };
6240
6241 static struct pci_driver be_driver = {
6242         .name = DRV_NAME,
6243         .id_table = be_dev_ids,
6244         .probe = be_probe,
6245         .remove = be_remove,
6246         .suspend = be_suspend,
6247         .resume = be_pci_resume,
6248         .shutdown = be_shutdown,
6249         .sriov_configure = be_pci_sriov_configure,
6250         .err_handler = &be_eeh_handlers
6251 };
6252
6253 static int __init be_init_module(void)
6254 {
6255         int status;
6256
6257         if (rx_frag_size != 8192 && rx_frag_size != 4096 &&
6258             rx_frag_size != 2048) {
6259                 printk(KERN_WARNING DRV_NAME
6260                         " : Module param rx_frag_size must be 2048/4096/8192."
6261                         " Using 2048\n");
6262                 rx_frag_size = 2048;
6263         }
6264
6265         if (num_vfs > 0) {
6266                 pr_info(DRV_NAME " : Module param num_vfs is obsolete.");
6267                 pr_info(DRV_NAME " : Use sysfs method to enable VFs\n");
6268         }
6269
6270         be_wq = create_singlethread_workqueue("be_wq");
6271         if (!be_wq) {
6272                 pr_warn(DRV_NAME "workqueue creation failed\n");
6273                 return -1;
6274         }
6275
6276         be_err_recovery_workq =
6277                 create_singlethread_workqueue("be_err_recover");
6278         if (!be_err_recovery_workq)
6279                 pr_warn(DRV_NAME "Could not create error recovery workqueue\n");
6280
6281         status = pci_register_driver(&be_driver);
6282         if (status) {
6283                 destroy_workqueue(be_wq);
6284                 be_destroy_err_recovery_workq();
6285         }
6286         return status;
6287 }
6288 module_init(be_init_module);
6289
6290 static void __exit be_exit_module(void)
6291 {
6292         pci_unregister_driver(&be_driver);
6293
6294         be_destroy_err_recovery_workq();
6295
6296         if (be_wq)
6297                 destroy_workqueue(be_wq);
6298 }
6299 module_exit(be_exit_module);