drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c

   1 /*
   2  * This file is part of the Chelsio T4 PCI-E SR-IOV Virtual Function Ethernet
   3  * driver for Linux.
   4  *
   5  * Copyright (c) 2009-2010 Chelsio Communications, Inc. All rights reserved.
   6  *
   7  * This software is available to you under a choice of one of two
   8  * licenses.  You may choose to be licensed under the terms of the GNU
   9  * General Public License (GPL) Version 2, available from the file
  10  * COPYING in the main directory of this source tree, or the
  11  * OpenIB.org BSD license below:
  12  *
  13  *     Redistribution and use in source and binary forms, with or
  14  *     without modification, are permitted provided that the following
  15  *     conditions are met:
  16  *
  17  *      - Redistributions of source code must retain the above
  18  *        copyright notice, this list of conditions and the following
  19  *        disclaimer.
  20  *
  21  *      - Redistributions in binary form must reproduce the above
  22  *        copyright notice, this list of conditions and the following
  23  *        disclaimer in the documentation and/or other materials
  24  *        provided with the distribution.
  25  *
  26  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  27  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  28  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  29  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  30  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  31  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  32  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  33  * SOFTWARE.
  34  */
  35
  36 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  37
  38 #include <linux/module.h>
  39 #include <linux/moduleparam.h>
  40 #include <linux/init.h>
  41 #include <linux/pci.h>
  42 #include <linux/dma-mapping.h>
  43 #include <linux/netdevice.h>
  44 #include <linux/etherdevice.h>
  45 #include <linux/debugfs.h>
  46 #include <linux/ethtool.h>
  47 #include <linux/mdio.h>
  48
  49 #include "t4vf_common.h"
  50 #include "t4vf_defs.h"
  51
  52 #include "../cxgb4/t4_regs.h"
  53 #include "../cxgb4/t4_msg.h"
  54
  55 /*
  56  * Generic information about the driver.
  57  */
  58 #define DRV_VERSION "2.0.0-ko"
  59 #define DRV_DESC "Chelsio T4/T5/T6 Virtual Function (VF) Network Driver"
  60
  61 /*
  62  * Module Parameters.
  63  * ==================
  64  */
  65
  66 /*
  67  * Default ethtool "message level" for adapters.
  68  */
  69 #define DFLT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK | \
  70                          NETIF_MSG_TIMER | NETIF_MSG_IFDOWN | NETIF_MSG_IFUP |\
  71                          NETIF_MSG_RX_ERR | NETIF_MSG_TX_ERR)
  72
  73 /*
  74  * The driver uses the best interrupt scheme available on a platform in the
  75  * order MSI-X then MSI.  This parameter determines which of these schemes the
  76  * driver may consider as follows:
  77  *
  78  *     msi = 2: choose from among MSI-X and MSI
  79  *     msi = 1: only consider MSI interrupts
  80  *
  81  * Note that unlike the Physical Function driver, this Virtual Function driver
  82  * does _not_ support legacy INTx interrupts (this limitation is mandated by
  83  * the PCI-E SR-IOV standard).
  84  */
  85 #define MSI_MSIX        2
  86 #define MSI_MSI         1
  87 #define MSI_DEFAULT     MSI_MSIX
  88
  89 static int msi = MSI_DEFAULT;
  90
  91 module_param(msi, int, 0644);
  92 MODULE_PARM_DESC(msi, "whether to use MSI-X or MSI");
  93
  94 /*
  95  * Fundamental constants.
  96  * ======================
  97  */
  98
  99 enum {
 100         MAX_TXQ_ENTRIES         = 16384,
 101         MAX_RSPQ_ENTRIES        = 16384,
 102         MAX_RX_BUFFERS          = 16384,
 103
 104         MIN_TXQ_ENTRIES         = 32,
 105         MIN_RSPQ_ENTRIES        = 128,
 106         MIN_FL_ENTRIES          = 16,
 107
 108         /*
 109          * For purposes of manipulating the Free List size we need to
 110          * recognize that Free Lists are actually Egress Queues (the host
 111          * produces free buffers which the hardware consumes), Egress Queues
 112          * indices are all in units of Egress Context Units bytes, and free
 113          * list entries are 64-bit PCI DMA addresses.  And since the state of
 114          * the Producer Index == the Consumer Index implies an EMPTY list, we
 115          * always have at least one Egress Unit's worth of Free List entries
 116          * unused.  See sge.c for more details ...
 117          */
 118         EQ_UNIT = SGE_EQ_IDXSIZE,
 119         FL_PER_EQ_UNIT = EQ_UNIT / sizeof(__be64),
 120         MIN_FL_RESID = FL_PER_EQ_UNIT,
 121 };
 122
 123 /*
 124  * Global driver state.
 125  * ====================
 126  */
 127
 128 static struct dentry *cxgb4vf_debugfs_root;
 129
 130 /*
 131  * OS "Callback" functions.
 132  * ========================
 133  */
 134
 135 /*
 136  * The link status has changed on the indicated "port" (Virtual Interface).
 137  */
 138 void t4vf_os_link_changed(struct adapter *adapter, int pidx, int link_ok)
 139 {
 140         struct net_device *dev = adapter->port[pidx];
 141
 142         /*
 143          * If the port is disabled or the current recorded "link up"
 144          * status matches the new status, just return.
 145          */
 146         if (!netif_running(dev) || link_ok == netif_carrier_ok(dev))
 147                 return;
 148
 149         /*
 150          * Tell the OS that the link status has changed and print a short
 151          * informative message on the console about the event.
 152          */
 153         if (link_ok) {
 154                 const char *s;
 155                 const char *fc;
 156                 const struct port_info *pi = netdev_priv(dev);
 157
 158                 netif_carrier_on(dev);
 159
 160                 switch (pi->link_cfg.speed) {
 161                 case 100:
 162                         s = "100Mbps";
 163                         break;
 164                 case 1000:
 165                         s = "1Gbps";
 166                         break;
 167                 case 10000:
 168                         s = "10Gbps";
 169                         break;
 170                 case 25000:
 171                         s = "25Gbps";
 172                         break;
 173                 case 40000:
 174                         s = "40Gbps";
 175                         break;
 176                 case 100000:
 177                         s = "100Gbps";
 178                         break;
 179
 180                 default:
 181                         s = "unknown";
 182                         break;
 183                 }
 184
 185                 switch ((int)pi->link_cfg.fc) {
 186                 case PAUSE_RX:
 187                         fc = "RX";
 188                         break;
 189
 190                 case PAUSE_TX:
 191                         fc = "TX";
 192                         break;
 193
 194                 case PAUSE_RX | PAUSE_TX:
 195                         fc = "RX/TX";
 196                         break;
 197
 198                 default:
 199                         fc = "no";
 200                         break;
 201                 }
 202
 203                 netdev_info(dev, "link up, %s, full-duplex, %s PAUSE\n", s, fc);
 204         } else {
 205                 netif_carrier_off(dev);
 206                 netdev_info(dev, "link down\n");
 207         }
 208 }
 209
 210 /*
 211  * THe port module type has changed on the indicated "port" (Virtual
 212  * Interface).
 213  */
 214 void t4vf_os_portmod_changed(struct adapter *adapter, int pidx)
 215 {
 216         static const char * const mod_str[] = {
 217                 NULL, "LR", "SR", "ER", "passive DA", "active DA", "LRM"
 218         };
 219         const struct net_device *dev = adapter->port[pidx];
 220         const struct port_info *pi = netdev_priv(dev);
 221
 222         if (pi->mod_type == FW_PORT_MOD_TYPE_NONE)
 223                 dev_info(adapter->pdev_dev, "%s: port module unplugged\n",
 224                          dev->name);
 225         else if (pi->mod_type < ARRAY_SIZE(mod_str))
 226                 dev_info(adapter->pdev_dev, "%s: %s port module inserted\n",
 227                          dev->name, mod_str[pi->mod_type]);
 228         else if (pi->mod_type == FW_PORT_MOD_TYPE_NOTSUPPORTED)
 229                 dev_info(adapter->pdev_dev, "%s: unsupported optical port "
 230                          "module inserted\n", dev->name);
 231         else if (pi->mod_type == FW_PORT_MOD_TYPE_UNKNOWN)
 232                 dev_info(adapter->pdev_dev, "%s: unknown port module inserted,"
 233                          "forcing TWINAX\n", dev->name);
 234         else if (pi->mod_type == FW_PORT_MOD_TYPE_ERROR)
 235                 dev_info(adapter->pdev_dev, "%s: transceiver module error\n",
 236                          dev->name);
 237         else
 238                 dev_info(adapter->pdev_dev, "%s: unknown module type %d "
 239                          "inserted\n", dev->name, pi->mod_type);
 240 }
 241
 242 /*
 243  * Net device operations.
 244  * ======================
 245  */
 246
 247
 248
 249
 250 /*
 251  * Perform the MAC and PHY actions needed to enable a "port" (Virtual
 252  * Interface).
 253  */
 254 static int link_start(struct net_device *dev)
 255 {
 256         int ret;
 257         struct port_info *pi = netdev_priv(dev);
 258
 259         /*
 260          * We do not set address filters and promiscuity here, the stack does
 261          * that step explicitly. Enable vlan accel.
 262          */
 263         ret = t4vf_set_rxmode(pi->adapter, pi->viid, dev->mtu, -1, -1, -1, 1,
 264                               true);
 265         if (ret == 0) {
 266                 ret = t4vf_change_mac(pi->adapter, pi->viid,
 267                                       pi->xact_addr_filt, dev->dev_addr, true);
 268                 if (ret >= 0) {
 269                         pi->xact_addr_filt = ret;
 270                         ret = 0;
 271                 }
 272         }
 273
 274         /*
 275          * We don't need to actually "start the link" itself since the
 276          * firmware will do that for us when the first Virtual Interface
 277          * is enabled on a port.
 278          */
 279         if (ret == 0)
 280                 ret = t4vf_enable_vi(pi->adapter, pi->viid, true, true);
 281         return ret;
 282 }
 283
 284 /*
 285  * Name the MSI-X interrupts.
 286  */
 287 static void name_msix_vecs(struct adapter *adapter)
 288 {
 289         int namelen = sizeof(adapter->msix_info[0].desc) - 1;
 290         int pidx;
 291
 292         /*
 293          * Firmware events.
 294          */
 295         snprintf(adapter->msix_info[MSIX_FW].desc, namelen,
 296                  "%s-FWeventq", adapter->name);
 297         adapter->msix_info[MSIX_FW].desc[namelen] = 0;
 298
 299         /*
 300          * Ethernet queues.
 301          */
 302         for_each_port(adapter, pidx) {
 303                 struct net_device *dev = adapter->port[pidx];
 304                 const struct port_info *pi = netdev_priv(dev);
 305                 int qs, msi;
 306
 307                 for (qs = 0, msi = MSIX_IQFLINT; qs < pi->nqsets; qs++, msi++) {
 308                         snprintf(adapter->msix_info[msi].desc, namelen,
 309                                  "%s-%d", dev->name, qs);
 310                         adapter->msix_info[msi].desc[namelen] = 0;
 311                 }
 312         }
 313 }
 314
 315 /*
 316  * Request all of our MSI-X resources.
 317  */
 318 static int request_msix_queue_irqs(struct adapter *adapter)
 319 {
 320         struct sge *s = &adapter->sge;
 321         int rxq, msi, err;
 322
 323         /*
 324          * Firmware events.
 325          */
 326         err = request_irq(adapter->msix_info[MSIX_FW].vec, t4vf_sge_intr_msix,
 327                           0, adapter->msix_info[MSIX_FW].desc, &s->fw_evtq);
 328         if (err)
 329                 return err;
 330
 331         /*
 332          * Ethernet queues.
 333          */
 334         msi = MSIX_IQFLINT;
 335         for_each_ethrxq(s, rxq) {
 336                 err = request_irq(adapter->msix_info[msi].vec,
 337                                   t4vf_sge_intr_msix, 0,
 338                                   adapter->msix_info[msi].desc,
 339                                   &s->ethrxq[rxq].rspq);
 340                 if (err)
 341                         goto err_free_irqs;
 342                 msi++;
 343         }
 344         return 0;
 345
 346 err_free_irqs:
 347         while (--rxq >= 0)
 348                 free_irq(adapter->msix_info[--msi].vec, &s->ethrxq[rxq].rspq);
 349         free_irq(adapter->msix_info[MSIX_FW].vec, &s->fw_evtq);
 350         return err;
 351 }
 352
 353 /*
 354  * Free our MSI-X resources.
 355  */
 356 static void free_msix_queue_irqs(struct adapter *adapter)
 357 {
 358         struct sge *s = &adapter->sge;
 359         int rxq, msi;
 360
 361         free_irq(adapter->msix_info[MSIX_FW].vec, &s->fw_evtq);
 362         msi = MSIX_IQFLINT;
 363         for_each_ethrxq(s, rxq)
 364                 free_irq(adapter->msix_info[msi++].vec,
 365                          &s->ethrxq[rxq].rspq);
 366 }
 367
 368 /*
 369  * Turn on NAPI and start up interrupts on a response queue.
 370  */
 371 static void qenable(struct sge_rspq *rspq)
 372 {
 373         napi_enable(&rspq->napi);
 374
 375         /*
 376          * 0-increment the Going To Sleep register to start the timer and
 377          * enable interrupts.
 378          */
 379         t4_write_reg(rspq->adapter, T4VF_SGE_BASE_ADDR + SGE_VF_GTS,
 380                      CIDXINC_V(0) |
 381                      SEINTARM_V(rspq->intr_params) |
 382                      INGRESSQID_V(rspq->cntxt_id));
 383 }
 384
 385 /*
 386  * Enable NAPI scheduling and interrupt generation for all Receive Queues.
 387  */
 388 static void enable_rx(struct adapter *adapter)
 389 {
 390         int rxq;
 391         struct sge *s = &adapter->sge;
 392
 393         for_each_ethrxq(s, rxq)
 394                 qenable(&s->ethrxq[rxq].rspq);
 395         qenable(&s->fw_evtq);
 396
 397         /*
 398          * The interrupt queue doesn't use NAPI so we do the 0-increment of
 399          * its Going To Sleep register here to get it started.
 400          */
 401         if (adapter->flags & USING_MSI)
 402                 t4_write_reg(adapter, T4VF_SGE_BASE_ADDR + SGE_VF_GTS,
 403                              CIDXINC_V(0) |
 404                              SEINTARM_V(s->intrq.intr_params) |
 405                              INGRESSQID_V(s->intrq.cntxt_id));
 406
 407 }
 408
 409 /*
 410  * Wait until all NAPI handlers are descheduled.
 411  */
 412 static void quiesce_rx(struct adapter *adapter)
 413 {
 414         struct sge *s = &adapter->sge;
 415         int rxq;
 416
 417         for_each_ethrxq(s, rxq)
 418                 napi_disable(&s->ethrxq[rxq].rspq.napi);
 419         napi_disable(&s->fw_evtq.napi);
 420 }
 421
 422 /*
 423  * Response queue handler for the firmware event queue.
 424  */
 425 static int fwevtq_handler(struct sge_rspq *rspq, const __be64 *rsp,
 426                           const struct pkt_gl *gl)
 427 {
 428         /*
 429          * Extract response opcode and get pointer to CPL message body.
 430          */
 431         struct adapter *adapter = rspq->adapter;
 432         u8 opcode = ((const struct rss_header *)rsp)->opcode;
 433         void *cpl = (void *)(rsp + 1);
 434
 435         switch (opcode) {
 436         case CPL_FW6_MSG: {
 437                 /*
 438                  * We've received an asynchronous message from the firmware.
 439                  */
 440                 const struct cpl_fw6_msg *fw_msg = cpl;
 441                 if (fw_msg->type == FW6_TYPE_CMD_RPL)
 442                         t4vf_handle_fw_rpl(adapter, fw_msg->data);
 443                 break;
 444         }
 445
 446         case CPL_FW4_MSG: {
 447                 /* FW can send EGR_UPDATEs encapsulated in a CPL_FW4_MSG.
 448                  */
 449                 const struct cpl_sge_egr_update *p = (void *)(rsp + 3);
 450                 opcode = CPL_OPCODE_G(ntohl(p->opcode_qid));
 451                 if (opcode != CPL_SGE_EGR_UPDATE) {
 452                         dev_err(adapter->pdev_dev, "unexpected FW4/CPL %#x on FW event queue\n"
 453                                 , opcode);
 454                         break;
 455                 }
 456                 cpl = (void *)p;
 457                 /*FALLTHROUGH*/
 458         }
 459
 460         case CPL_SGE_EGR_UPDATE: {
 461                 /*
 462                  * We've received an Egress Queue Status Update message.  We
 463                  * get these, if the SGE is configured to send these when the
 464                  * firmware passes certain points in processing our TX
 465                  * Ethernet Queue or if we make an explicit request for one.
 466                  * We use these updates to determine when we may need to
 467                  * restart a TX Ethernet Queue which was stopped for lack of
 468                  * free TX Queue Descriptors ...
 469                  */
 470                 const struct cpl_sge_egr_update *p = cpl;
 471                 unsigned int qid = EGR_QID_G(be32_to_cpu(p->opcode_qid));
 472                 struct sge *s = &adapter->sge;
 473                 struct sge_txq *tq;
 474                 struct sge_eth_txq *txq;
 475                 unsigned int eq_idx;
 476
 477                 /*
 478                  * Perform sanity checking on the Queue ID to make sure it
 479                  * really refers to one of our TX Ethernet Egress Queues which
 480                  * is active and matches the queue's ID.  None of these error
 481                  * conditions should ever happen so we may want to either make
 482                  * them fatal and/or conditionalized under DEBUG.
 483                  */
 484                 eq_idx = EQ_IDX(s, qid);
 485                 if (unlikely(eq_idx >= MAX_EGRQ)) {
 486                         dev_err(adapter->pdev_dev,
 487                                 "Egress Update QID %d out of range\n", qid);
 488                         break;
 489                 }
 490                 tq = s->egr_map[eq_idx];
 491                 if (unlikely(tq == NULL)) {
 492                         dev_err(adapter->pdev_dev,
 493                                 "Egress Update QID %d TXQ=NULL\n", qid);
 494                         break;
 495                 }
 496                 txq = container_of(tq, struct sge_eth_txq, q);
 497                 if (unlikely(tq->abs_id != qid)) {
 498                         dev_err(adapter->pdev_dev,
 499                                 "Egress Update QID %d refers to TXQ %d\n",
 500                                 qid, tq->abs_id);
 501                         break;
 502                 }
 503
 504                 /*
 505                  * Restart a stopped TX Queue which has less than half of its
 506                  * TX ring in use ...
 507                  */
 508                 txq->q.restarts++;
 509                 netif_tx_wake_queue(txq->txq);
 510                 break;
 511         }
 512
 513         default:
 514                 dev_err(adapter->pdev_dev,
 515                         "unexpected CPL %#x on FW event queue\n", opcode);
 516         }
 517
 518         return 0;
 519 }
 520
 521 /*
 522  * Allocate SGE TX/RX response queues.  Determine how many sets of SGE queues
 523  * to use and initializes them.  We support multiple "Queue Sets" per port if
 524  * we have MSI-X, otherwise just one queue set per port.
 525  */
 526 static int setup_sge_queues(struct adapter *adapter)
 527 {
 528         struct sge *s = &adapter->sge;
 529         int err, pidx, msix;
 530
 531         /*
 532          * Clear "Queue Set" Free List Starving and TX Queue Mapping Error
 533          * state.
 534          */
 535         bitmap_zero(s->starving_fl, MAX_EGRQ);
 536
 537         /*
 538          * If we're using MSI interrupt mode we need to set up a "forwarded
 539          * interrupt" queue which we'll set up with our MSI vector.  The rest
 540          * of the ingress queues will be set up to forward their interrupts to
 541          * this queue ...  This must be first since t4vf_sge_alloc_rxq() uses
 542          * the intrq's queue ID as the interrupt forwarding queue for the
 543          * subsequent calls ...
 544          */
 545         if (adapter->flags & USING_MSI) {
 546                 err = t4vf_sge_alloc_rxq(adapter, &s->intrq, false,
 547                                          adapter->port[0], 0, NULL, NULL);
 548                 if (err)
 549                         goto err_free_queues;
 550         }
 551
 552         /*
 553          * Allocate our ingress queue for asynchronous firmware messages.
 554          */
 555         err = t4vf_sge_alloc_rxq(adapter, &s->fw_evtq, true, adapter->port[0],
 556                                  MSIX_FW, NULL, fwevtq_handler);
 557         if (err)
 558                 goto err_free_queues;
 559
 560         /*
 561          * Allocate each "port"'s initial Queue Sets.  These can be changed
 562          * later on ... up to the point where any interface on the adapter is
 563          * brought up at which point lots of things get nailed down
 564          * permanently ...
 565          */
 566         msix = MSIX_IQFLINT;
 567         for_each_port(adapter, pidx) {
 568                 struct net_device *dev = adapter->port[pidx];
 569                 struct port_info *pi = netdev_priv(dev);
 570                 struct sge_eth_rxq *rxq = &s->ethrxq[pi->first_qset];
 571                 struct sge_eth_txq *txq = &s->ethtxq[pi->first_qset];
 572                 int qs;
 573
 574                 for (qs = 0; qs < pi->nqsets; qs++, rxq++, txq++) {
 575                         err = t4vf_sge_alloc_rxq(adapter, &rxq->rspq, false,
 576                                                  dev, msix++,
 577                                                  &rxq->fl, t4vf_ethrx_handler);
 578                         if (err)
 579                                 goto err_free_queues;
 580
 581                         err = t4vf_sge_alloc_eth_txq(adapter, txq, dev,
 582                                              netdev_get_tx_queue(dev, qs),
 583                                              s->fw_evtq.cntxt_id);
 584                         if (err)
 585                                 goto err_free_queues;
 586
 587                         rxq->rspq.idx = qs;
 588                         memset(&rxq->stats, 0, sizeof(rxq->stats));
 589                 }
 590         }
 591
 592         /*
 593          * Create the reverse mappings for the queues.
 594          */
 595         s->egr_base = s->ethtxq[0].q.abs_id - s->ethtxq[0].q.cntxt_id;
 596         s->ingr_base = s->ethrxq[0].rspq.abs_id - s->ethrxq[0].rspq.cntxt_id;
 597         IQ_MAP(s, s->fw_evtq.abs_id) = &s->fw_evtq;
 598         for_each_port(adapter, pidx) {
 599                 struct net_device *dev = adapter->port[pidx];
 600                 struct port_info *pi = netdev_priv(dev);
 601                 struct sge_eth_rxq *rxq = &s->ethrxq[pi->first_qset];
 602                 struct sge_eth_txq *txq = &s->ethtxq[pi->first_qset];
 603                 int qs;
 604
 605                 for (qs = 0; qs < pi->nqsets; qs++, rxq++, txq++) {
 606                         IQ_MAP(s, rxq->rspq.abs_id) = &rxq->rspq;
 607                         EQ_MAP(s, txq->q.abs_id) = &txq->q;
 608
 609                         /*
 610                          * The FW_IQ_CMD doesn't return the Absolute Queue IDs
 611                          * for Free Lists but since all of the Egress Queues
 612                          * (including Free Lists) have Relative Queue IDs
 613                          * which are computed as Absolute - Base Queue ID, we
 614                          * can synthesize the Absolute Queue IDs for the Free
 615                          * Lists.  This is useful for debugging purposes when
 616                          * we want to dump Queue Contexts via the PF Driver.
 617                          */
 618                         rxq->fl.abs_id = rxq->fl.cntxt_id + s->egr_base;
 619                         EQ_MAP(s, rxq->fl.abs_id) = &rxq->fl;
 620                 }
 621         }
 622         return 0;
 623
 624 err_free_queues:
 625         t4vf_free_sge_resources(adapter);
 626         return err;
 627 }
 628
 629 /*
 630  * Set up Receive Side Scaling (RSS) to distribute packets to multiple receive
 631  * queues.  We configure the RSS CPU lookup table to distribute to the number
 632  * of HW receive queues, and the response queue lookup table to narrow that
 633  * down to the response queues actually configured for each "port" (Virtual
 634  * Interface).  We always configure the RSS mapping for all ports since the
 635  * mapping table has plenty of entries.
 636  */
 637 static int setup_rss(struct adapter *adapter)
 638 {
 639         int pidx;
 640
 641         for_each_port(adapter, pidx) {
 642                 struct port_info *pi = adap2pinfo(adapter, pidx);
 643                 struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[pi->first_qset];
 644                 u16 rss[MAX_PORT_QSETS];
 645                 int qs, err;
 646
 647                 for (qs = 0; qs < pi->nqsets; qs++)
 648                         rss[qs] = rxq[qs].rspq.abs_id;
 649
 650                 err = t4vf_config_rss_range(adapter, pi->viid,
 651                                             0, pi->rss_size, rss, pi->nqsets);
 652                 if (err)
 653                         return err;
 654
 655                 /*
 656                  * Perform Global RSS Mode-specific initialization.
 657                  */
 658                 switch (adapter->params.rss.mode) {
 659                 case FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL:
 660                         /*
 661                          * If Tunnel All Lookup isn't specified in the global
 662                          * RSS Configuration, then we need to specify a
 663                          * default Ingress Queue for any ingress packets which
 664                          * aren't hashed.  We'll use our first ingress queue
 665                          * ...
 666                          */
 667                         if (!adapter->params.rss.u.basicvirtual.tnlalllookup) {
 668                                 union rss_vi_config config;
 669                                 err = t4vf_read_rss_vi_config(adapter,
 670                                                               pi->viid,
 671                                                               &config);
 672                                 if (err)
 673                                         return err;
 674                                 config.basicvirtual.defaultq =
 675                                         rxq[0].rspq.abs_id;
 676                                 err = t4vf_write_rss_vi_config(adapter,
 677                                                                pi->viid,
 678                                                                &config);
 679                                 if (err)
 680                                         return err;
 681                         }
 682                         break;
 683                 }
 684         }
 685
 686         return 0;
 687 }
 688
 689 /*
 690  * Bring the adapter up.  Called whenever we go from no "ports" open to having
 691  * one open.  This function performs the actions necessary to make an adapter
 692  * operational, such as completing the initialization of HW modules, and
 693  * enabling interrupts.  Must be called with the rtnl lock held.  (Note that
 694  * this is called "cxgb_up" in the PF Driver.)
 695  */
 696 static int adapter_up(struct adapter *adapter)
 697 {
 698         int err;
 699
 700         /*
 701          * If this is the first time we've been called, perform basic
 702          * adapter setup.  Once we've done this, many of our adapter
 703          * parameters can no longer be changed ...
 704          */
 705         if ((adapter->flags & FULL_INIT_DONE) == 0) {
 706                 err = setup_sge_queues(adapter);
 707                 if (err)
 708                         return err;
 709                 err = setup_rss(adapter);
 710                 if (err) {
 711                         t4vf_free_sge_resources(adapter);
 712                         return err;
 713                 }
 714
 715                 if (adapter->flags & USING_MSIX)
 716                         name_msix_vecs(adapter);
 717
 718                 adapter->flags |= FULL_INIT_DONE;
 719         }
 720
 721         /*
 722          * Acquire our interrupt resources.  We only support MSI-X and MSI.
 723          */
 724         BUG_ON((adapter->flags & (USING_MSIX|USING_MSI)) == 0);
 725         if (adapter->flags & USING_MSIX)
 726                 err = request_msix_queue_irqs(adapter);
 727         else
 728                 err = request_irq(adapter->pdev->irq,
 729                                   t4vf_intr_handler(adapter), 0,
 730                                   adapter->name, adapter);
 731         if (err) {
 732                 dev_err(adapter->pdev_dev, "request_irq failed, err %d\n",
 733                         err);
 734                 return err;
 735         }
 736
 737         /*
 738          * Enable NAPI ingress processing and return success.
 739          */
 740         enable_rx(adapter);
 741         t4vf_sge_start(adapter);
 742
 743         return 0;
 744 }
 745
 746 /*
 747  * Bring the adapter down.  Called whenever the last "port" (Virtual
 748  * Interface) closed.  (Note that this routine is called "cxgb_down" in the PF
 749  * Driver.)
 750  */
 751 static void adapter_down(struct adapter *adapter)
 752 {
 753         /*
 754          * Free interrupt resources.
 755          */
 756         if (adapter->flags & USING_MSIX)
 757                 free_msix_queue_irqs(adapter);
 758         else
 759                 free_irq(adapter->pdev->irq, adapter);
 760
 761         /*
 762          * Wait for NAPI handlers to finish.
 763          */
 764         quiesce_rx(adapter);
 765 }
 766
 767 /*
 768  * Start up a net device.
 769  */
 770 static int cxgb4vf_open(struct net_device *dev)
 771 {
 772         int err;
 773         struct port_info *pi = netdev_priv(dev);
 774         struct adapter *adapter = pi->adapter;
 775
 776         /*
 777          * If this is the first interface that we're opening on the "adapter",
 778          * bring the "adapter" up now.
 779          */
 780         if (adapter->open_device_map == 0) {
 781                 err = adapter_up(adapter);
 782                 if (err)
 783                         return err;
 784         }
 785
 786         /*
 787          * Note that this interface is up and start everything up ...
 788          */
 789         err = link_start(dev);
 790         if (err)
 791                 goto err_unwind;
 792
 793         netif_tx_start_all_queues(dev);
 794         set_bit(pi->port_id, &adapter->open_device_map);
 795         return 0;
 796
 797 err_unwind:
 798         if (adapter->open_device_map == 0)
 799                 adapter_down(adapter);
 800         return err;
 801 }
 802
 803 /*
 804  * Shut down a net device.  This routine is called "cxgb_close" in the PF
 805  * Driver ...
 806  */
 807 static int cxgb4vf_stop(struct net_device *dev)
 808 {
 809         struct port_info *pi = netdev_priv(dev);
 810         struct adapter *adapter = pi->adapter;
 811
 812         netif_tx_stop_all_queues(dev);
 813         netif_carrier_off(dev);
 814         t4vf_enable_vi(adapter, pi->viid, false, false);
 815         pi->link_cfg.link_ok = 0;
 816
 817         clear_bit(pi->port_id, &adapter->open_device_map);
 818         if (adapter->open_device_map == 0)
 819                 adapter_down(adapter);
 820         return 0;
 821 }
 822
 823 /*
 824  * Translate our basic statistics into the standard "ifconfig" statistics.
 825  */
 826 static struct net_device_stats *cxgb4vf_get_stats(struct net_device *dev)
 827 {
 828         struct t4vf_port_stats stats;
 829         struct port_info *pi = netdev2pinfo(dev);
 830         struct adapter *adapter = pi->adapter;
 831         struct net_device_stats *ns = &dev->stats;
 832         int err;
 833
 834         spin_lock(&adapter->stats_lock);
 835         err = t4vf_get_port_stats(adapter, pi->pidx, &stats);
 836         spin_unlock(&adapter->stats_lock);
 837
 838         memset(ns, 0, sizeof(*ns));
 839         if (err)
 840                 return ns;
 841
 842         ns->tx_bytes = (stats.tx_bcast_bytes + stats.tx_mcast_bytes +
 843                         stats.tx_ucast_bytes + stats.tx_offload_bytes);
 844         ns->tx_packets = (stats.tx_bcast_frames + stats.tx_mcast_frames +
 845                           stats.tx_ucast_frames + stats.tx_offload_frames);
 846         ns->rx_bytes = (stats.rx_bcast_bytes + stats.rx_mcast_bytes +
 847                         stats.rx_ucast_bytes);
 848         ns->rx_packets = (stats.rx_bcast_frames + stats.rx_mcast_frames +
 849                           stats.rx_ucast_frames);
 850         ns->multicast = stats.rx_mcast_frames;
 851         ns->tx_errors = stats.tx_drop_frames;
 852         ns->rx_errors = stats.rx_err_frames;
 853
 854         return ns;
 855 }
 856
 857 static inline int cxgb4vf_set_addr_hash(struct port_info *pi)
 858 {
 859         struct adapter *adapter = pi->adapter;
 860         u64 vec = 0;
 861         bool ucast = false;
 862         struct hash_mac_addr *entry;
 863
 864         /* Calculate the hash vector for the updated list and program it */
 865         list_for_each_entry(entry, &adapter->mac_hlist, list) {
 866                 ucast |= is_unicast_ether_addr(entry->addr);
 867                 vec |= (1ULL << hash_mac_addr(entry->addr));
 868         }
 869         return t4vf_set_addr_hash(adapter, pi->viid, ucast, vec, false);
 870 }
 871
 872 static int cxgb4vf_mac_sync(struct net_device *netdev, const u8 *mac_addr)
 873 {
 874         struct port_info *pi = netdev_priv(netdev);
 875         struct adapter *adapter = pi->adapter;
 876         int ret;
 877         u64 mhash = 0;
 878         u64 uhash = 0;
 879         bool free = false;
 880         bool ucast = is_unicast_ether_addr(mac_addr);
 881         const u8 *maclist[1] = {mac_addr};
 882         struct hash_mac_addr *new_entry;
 883
 884         ret = t4vf_alloc_mac_filt(adapter, pi->viid, free, 1, maclist,
 885                                   NULL, ucast ? &uhash : &mhash, false);
 886         if (ret < 0)
 887                 goto out;
 888         /* if hash != 0, then add the addr to hash addr list
 889          * so on the end we will calculate the hash for the
 890          * list and program it
 891          */
 892         if (uhash || mhash) {
 893                 new_entry = kzalloc(sizeof(*new_entry), GFP_ATOMIC);
 894                 if (!new_entry)
 895                         return -ENOMEM;
 896                 ether_addr_copy(new_entry->addr, mac_addr);
 897                 list_add_tail(&new_entry->list, &adapter->mac_hlist);
 898                 ret = cxgb4vf_set_addr_hash(pi);
 899         }
 900 out:
 901         return ret < 0 ? ret : 0;
 902 }
 903
 904 static int cxgb4vf_mac_unsync(struct net_device *netdev, const u8 *mac_addr)
 905 {
 906         struct port_info *pi = netdev_priv(netdev);
 907         struct adapter *adapter = pi->adapter;
 908         int ret;
 909         const u8 *maclist[1] = {mac_addr};
 910         struct hash_mac_addr *entry, *tmp;
 911
 912         /* If the MAC address to be removed is in the hash addr
 913          * list, delete it from the list and update hash vector
 914          */
 915         list_for_each_entry_safe(entry, tmp, &adapter->mac_hlist, list) {
 916                 if (ether_addr_equal(entry->addr, mac_addr)) {
 917                         list_del(&entry->list);
 918                         kfree(entry);
 919                         return cxgb4vf_set_addr_hash(pi);
 920                 }
 921         }
 922
 923         ret = t4vf_free_mac_filt(adapter, pi->viid, 1, maclist, false);
 924         return ret < 0 ? -EINVAL : 0;
 925 }
 926
 927 /*
 928  * Set RX properties of a port, such as promiscruity, address filters, and MTU.
 929  * If @mtu is -1 it is left unchanged.
 930  */
 931 static int set_rxmode(struct net_device *dev, int mtu, bool sleep_ok)
 932 {
 933         struct port_info *pi = netdev_priv(dev);
 934
 935         __dev_uc_sync(dev, cxgb4vf_mac_sync, cxgb4vf_mac_unsync);
 936         __dev_mc_sync(dev, cxgb4vf_mac_sync, cxgb4vf_mac_unsync);
 937         return t4vf_set_rxmode(pi->adapter, pi->viid, -1,
 938                                (dev->flags & IFF_PROMISC) != 0,
 939                                (dev->flags & IFF_ALLMULTI) != 0,
 940                                1, -1, sleep_ok);
 941 }
 942
 943 /*
 944  * Set the current receive modes on the device.
 945  */
 946 static void cxgb4vf_set_rxmode(struct net_device *dev)
 947 {
 948         /* unfortunately we can't return errors to the stack */
 949         set_rxmode(dev, -1, false);
 950 }
 951
 952 /*
 953  * Find the entry in the interrupt holdoff timer value array which comes
 954  * closest to the specified interrupt holdoff value.
 955  */
 956 static int closest_timer(const struct sge *s, int us)
 957 {
 958         int i, timer_idx = 0, min_delta = INT_MAX;
 959
 960         for (i = 0; i < ARRAY_SIZE(s->timer_val); i++) {
 961                 int delta = us - s->timer_val[i];
 962                 if (delta < 0)
 963                         delta = -delta;
 964                 if (delta < min_delta) {
 965                         min_delta = delta;
 966                         timer_idx = i;
 967                 }
 968         }
 969         return timer_idx;
 970 }
 971
 972 static int closest_thres(const struct sge *s, int thres)
 973 {
 974         int i, delta, pktcnt_idx = 0, min_delta = INT_MAX;
 975
 976         for (i = 0; i < ARRAY_SIZE(s->counter_val); i++) {
 977                 delta = thres - s->counter_val[i];
 978                 if (delta < 0)
 979                         delta = -delta;
 980                 if (delta < min_delta) {
 981                         min_delta = delta;
 982                         pktcnt_idx = i;
 983                 }
 984         }
 985         return pktcnt_idx;
 986 }
 987
 988 /*
 989  * Return a queue's interrupt hold-off time in us.  0 means no timer.
 990  */
 991 static unsigned int qtimer_val(const struct adapter *adapter,
 992                                const struct sge_rspq *rspq)
 993 {
 994         unsigned int timer_idx = QINTR_TIMER_IDX_G(rspq->intr_params);
 995
 996         return timer_idx < SGE_NTIMERS
 997                 ? adapter->sge.timer_val[timer_idx]
 998                 : 0;
 999 }
1000
1001 /**
1002  *      set_rxq_intr_params - set a queue's interrupt holdoff parameters
1003  *      @adapter: the adapter
1004  *      @rspq: the RX response queue
1005  *      @us: the hold-off time in us, or 0 to disable timer
1006  *      @cnt: the hold-off packet count, or 0 to disable counter
1007  *
1008  *      Sets an RX response queue's interrupt hold-off time and packet count.
1009  *      At least one of the two needs to be enabled for the queue to generate
1010  *      interrupts.
1011  */
1012 static int set_rxq_intr_params(struct adapter *adapter, struct sge_rspq *rspq,
1013                                unsigned int us, unsigned int cnt)
1014 {
1015         unsigned int timer_idx;
1016
1017         /*
1018          * If both the interrupt holdoff timer and count are specified as
1019          * zero, default to a holdoff count of 1 ...
1020          */
1021         if ((us | cnt) == 0)
1022                 cnt = 1;
1023
1024         /*
1025          * If an interrupt holdoff count has been specified, then find the
1026          * closest configured holdoff count and use that.  If the response
1027          * queue has already been created, then update its queue context
1028          * parameters ...
1029          */
1030         if (cnt) {
1031                 int err;
1032                 u32 v, pktcnt_idx;
1033
1034                 pktcnt_idx = closest_thres(&adapter->sge, cnt);
1035                 if (rspq->desc && rspq->pktcnt_idx != pktcnt_idx) {
1036                         v = FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DMAQ) |
1037                             FW_PARAMS_PARAM_X_V(
1038                                         FW_PARAMS_PARAM_DMAQ_IQ_INTCNTTHRESH) |
1039                             FW_PARAMS_PARAM_YZ_V(rspq->cntxt_id);
1040                         err = t4vf_set_params(adapter, 1, &v, &pktcnt_idx);
1041                         if (err)
1042                                 return err;
1043                 }
1044                 rspq->pktcnt_idx = pktcnt_idx;
1045         }
1046
1047         /*
1048          * Compute the closest holdoff timer index from the supplied holdoff
1049          * timer value.
1050          */
1051         timer_idx = (us == 0
1052                      ? SGE_TIMER_RSTRT_CNTR
1053                      : closest_timer(&adapter->sge, us));
1054
1055         /*
1056          * Update the response queue's interrupt coalescing parameters and
1057          * return success.
1058          */
1059         rspq->intr_params = (QINTR_TIMER_IDX_V(timer_idx) |
1060                              QINTR_CNT_EN_V(cnt > 0));
1061         return 0;
1062 }
1063
1064 /*
1065  * Return a version number to identify the type of adapter.  The scheme is:
1066  * - bits 0..9: chip version
1067  * - bits 10..15: chip revision
1068  */
1069 static inline unsigned int mk_adap_vers(const struct adapter *adapter)
1070 {
1071         /*
1072          * Chip version 4, revision 0x3f (cxgb4vf).
1073          */
1074         return CHELSIO_CHIP_VERSION(adapter->params.chip) | (0x3f << 10);
1075 }
1076
1077 /*
1078  * Execute the specified ioctl command.
1079  */
1080 static int cxgb4vf_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
1081 {
1082         int ret = 0;
1083
1084         switch (cmd) {
1085             /*
1086              * The VF Driver doesn't have access to any of the other
1087              * common Ethernet device ioctl()'s (like reading/writing
1088              * PHY registers, etc.
1089              */
1090
1091         default:
1092                 ret = -EOPNOTSUPP;
1093                 break;
1094         }
1095         return ret;
1096 }
1097
1098 /*
1099  * Change the device's MTU.
1100  */
1101 static int cxgb4vf_change_mtu(struct net_device *dev, int new_mtu)
1102 {
1103         int ret;
1104         struct port_info *pi = netdev_priv(dev);
1105
1106         ret = t4vf_set_rxmode(pi->adapter, pi->viid, new_mtu,
1107                               -1, -1, -1, -1, true);
1108         if (!ret)
1109                 dev->mtu = new_mtu;
1110         return ret;
1111 }
1112
1113 static netdev_features_t cxgb4vf_fix_features(struct net_device *dev,
1114         netdev_features_t features)
1115 {
1116         /*
1117          * Since there is no support for separate rx/tx vlan accel
1118          * enable/disable make sure tx flag is always in same state as rx.
1119          */
1120         if (features & NETIF_F_HW_VLAN_CTAG_RX)
1121                 features |= NETIF_F_HW_VLAN_CTAG_TX;
1122         else
1123                 features &= ~NETIF_F_HW_VLAN_CTAG_TX;
1124
1125         return features;
1126 }
1127
1128 static int cxgb4vf_set_features(struct net_device *dev,
1129         netdev_features_t features)
1130 {
1131         struct port_info *pi = netdev_priv(dev);
1132         netdev_features_t changed = dev->features ^ features;
1133
1134         if (changed & NETIF_F_HW_VLAN_CTAG_RX)
1135                 t4vf_set_rxmode(pi->adapter, pi->viid, -1, -1, -1, -1,
1136                                 features & NETIF_F_HW_VLAN_CTAG_TX, 0);
1137
1138         return 0;
1139 }
1140
1141 /*
1142  * Change the devices MAC address.
1143  */
1144 static int cxgb4vf_set_mac_addr(struct net_device *dev, void *_addr)
1145 {
1146         int ret;
1147         struct sockaddr *addr = _addr;
1148         struct port_info *pi = netdev_priv(dev);
1149
1150         if (!is_valid_ether_addr(addr->sa_data))
1151                 return -EADDRNOTAVAIL;
1152
1153         ret = t4vf_change_mac(pi->adapter, pi->viid, pi->xact_addr_filt,
1154                               addr->sa_data, true);
1155         if (ret < 0)
1156                 return ret;
1157
1158         memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
1159         pi->xact_addr_filt = ret;
1160         return 0;
1161 }
1162
1163 #ifdef CONFIG_NET_POLL_CONTROLLER
1164 /*
1165  * Poll all of our receive queues.  This is called outside of normal interrupt
1166  * context.
1167  */
1168 static void cxgb4vf_poll_controller(struct net_device *dev)
1169 {
1170         struct port_info *pi = netdev_priv(dev);
1171         struct adapter *adapter = pi->adapter;
1172
1173         if (adapter->flags & USING_MSIX) {
1174                 struct sge_eth_rxq *rxq;
1175                 int nqsets;
1176
1177                 rxq = &adapter->sge.ethrxq[pi->first_qset];
1178                 for (nqsets = pi->nqsets; nqsets; nqsets--) {
1179                         t4vf_sge_intr_msix(0, &rxq->rspq);
1180                         rxq++;
1181                 }
1182         } else
1183                 t4vf_intr_handler(adapter)(0, adapter);
1184 }
1185 #endif
1186
1187 /*
1188  * Ethtool operations.
1189  * ===================
1190  *
1191  * Note that we don't support any ethtool operations which change the physical
1192  * state of the port to which we're linked.
1193  */
1194
1195 /**
1196  *      from_fw_port_mod_type - translate Firmware Port/Module type to Ethtool
1197  *      @port_type: Firmware Port Type
1198  *      @mod_type: Firmware Module Type
1199  *
1200  *      Translate Firmware Port/Module type to Ethtool Port Type.
1201  */
1202 static int from_fw_port_mod_type(enum fw_port_type port_type,
1203                                  enum fw_port_module_type mod_type)
1204 {
1205         if (port_type == FW_PORT_TYPE_BT_SGMII ||
1206             port_type == FW_PORT_TYPE_BT_XFI ||
1207             port_type == FW_PORT_TYPE_BT_XAUI) {
1208                 return PORT_TP;
1209         } else if (port_type == FW_PORT_TYPE_FIBER_XFI ||
1210                    port_type == FW_PORT_TYPE_FIBER_XAUI) {
1211                 return PORT_FIBRE;
1212         } else if (port_type == FW_PORT_TYPE_SFP ||
1213                    port_type == FW_PORT_TYPE_QSFP_10G ||
1214                    port_type == FW_PORT_TYPE_QSA ||
1215                    port_type == FW_PORT_TYPE_QSFP ||
1216                    port_type == FW_PORT_TYPE_CR4_QSFP ||
1217                    port_type == FW_PORT_TYPE_CR_QSFP ||
1218                    port_type == FW_PORT_TYPE_CR2_QSFP ||
1219                    port_type == FW_PORT_TYPE_SFP28) {
1220                 if (mod_type == FW_PORT_MOD_TYPE_LR ||
1221                     mod_type == FW_PORT_MOD_TYPE_SR ||
1222                     mod_type == FW_PORT_MOD_TYPE_ER ||
1223                     mod_type == FW_PORT_MOD_TYPE_LRM)
1224                         return PORT_FIBRE;
1225                 else if (mod_type == FW_PORT_MOD_TYPE_TWINAX_PASSIVE ||
1226                          mod_type == FW_PORT_MOD_TYPE_TWINAX_ACTIVE)
1227                         return PORT_DA;
1228                 else
1229                         return PORT_OTHER;
1230         } else if (port_type == FW_PORT_TYPE_KR4_100G ||
1231                    port_type == FW_PORT_TYPE_KR_SFP28) {
1232                 return PORT_NONE;
1233         }
1234
1235         return PORT_OTHER;
1236 }
1237
1238 /**
1239  *      fw_caps_to_lmm - translate Firmware to ethtool Link Mode Mask
1240  *      @port_type: Firmware Port Type
1241  *      @fw_caps: Firmware Port Capabilities
1242  *      @link_mode_mask: ethtool Link Mode Mask
1243  *
1244  *      Translate a Firmware Port Capabilities specification to an ethtool
1245  *      Link Mode Mask.
1246  */
1247 static void fw_caps_to_lmm(enum fw_port_type port_type,
1248                            unsigned int fw_caps,
1249                            unsigned long *link_mode_mask)
1250 {
1251         #define SET_LMM(__lmm_name) \
1252                 __set_bit(ETHTOOL_LINK_MODE_ ## __lmm_name ## _BIT, \
1253                           link_mode_mask)
1254
1255         #define FW_CAPS_TO_LMM(__fw_name, __lmm_name) \
1256                 do { \
1257                         if (fw_caps & FW_PORT_CAP32_ ## __fw_name) \
1258                                 SET_LMM(__lmm_name); \
1259                 } while (0)
1260
1261         switch (port_type) {
1262         case FW_PORT_TYPE_BT_SGMII:
1263         case FW_PORT_TYPE_BT_XFI:
1264         case FW_PORT_TYPE_BT_XAUI:
1265                 SET_LMM(TP);
1266                 FW_CAPS_TO_LMM(SPEED_100M, 100baseT_Full);
1267                 FW_CAPS_TO_LMM(SPEED_1G, 1000baseT_Full);
1268                 FW_CAPS_TO_LMM(SPEED_10G, 10000baseT_Full);
1269                 break;
1270
1271         case FW_PORT_TYPE_KX4:
1272         case FW_PORT_TYPE_KX:
1273                 SET_LMM(Backplane);
1274                 FW_CAPS_TO_LMM(SPEED_1G, 1000baseKX_Full);
1275                 FW_CAPS_TO_LMM(SPEED_10G, 10000baseKX4_Full);
1276                 break;
1277
1278         case FW_PORT_TYPE_KR:
1279                 SET_LMM(Backplane);
1280                 SET_LMM(10000baseKR_Full);
1281                 break;
1282
1283         case FW_PORT_TYPE_BP_AP:
1284                 SET_LMM(Backplane);
1285                 SET_LMM(10000baseR_FEC);
1286                 SET_LMM(10000baseKR_Full);
1287                 SET_LMM(1000baseKX_Full);
1288                 break;
1289
1290         case FW_PORT_TYPE_BP4_AP:
1291                 SET_LMM(Backplane);
1292                 SET_LMM(10000baseR_FEC);
1293                 SET_LMM(10000baseKR_Full);
1294                 SET_LMM(1000baseKX_Full);
1295                 SET_LMM(10000baseKX4_Full);
1296                 break;
1297
1298         case FW_PORT_TYPE_FIBER_XFI:
1299         case FW_PORT_TYPE_FIBER_XAUI:
1300         case FW_PORT_TYPE_SFP:
1301         case FW_PORT_TYPE_QSFP_10G:
1302         case FW_PORT_TYPE_QSA:
1303                 SET_LMM(FIBRE);
1304                 FW_CAPS_TO_LMM(SPEED_1G, 1000baseT_Full);
1305                 FW_CAPS_TO_LMM(SPEED_10G, 10000baseT_Full);
1306                 break;
1307
1308         case FW_PORT_TYPE_BP40_BA:
1309         case FW_PORT_TYPE_QSFP:
1310                 SET_LMM(FIBRE);
1311                 SET_LMM(40000baseSR4_Full);
1312                 break;
1313
1314         case FW_PORT_TYPE_CR_QSFP:
1315         case FW_PORT_TYPE_SFP28:
1316                 SET_LMM(FIBRE);
1317                 SET_LMM(25000baseCR_Full);
1318                 break;
1319
1320         case FW_PORT_TYPE_KR_SFP28:
1321                 SET_LMM(Backplane);
1322                 SET_LMM(25000baseKR_Full);
1323                 break;
1324
1325         case FW_PORT_TYPE_CR2_QSFP:
1326                 SET_LMM(FIBRE);
1327                 SET_LMM(50000baseSR2_Full);
1328                 break;
1329
1330         case FW_PORT_TYPE_KR4_100G:
1331         case FW_PORT_TYPE_CR4_QSFP:
1332                 SET_LMM(FIBRE);
1333                 SET_LMM(100000baseCR4_Full);
1334                 break;
1335
1336         default:
1337                 break;
1338         }
1339
1340         FW_CAPS_TO_LMM(ANEG, Autoneg);
1341         FW_CAPS_TO_LMM(802_3_PAUSE, Pause);
1342         FW_CAPS_TO_LMM(802_3_ASM_DIR, Asym_Pause);
1343
1344         #undef FW_CAPS_TO_LMM
1345         #undef SET_LMM
1346 }
1347
1348 static int cxgb4vf_get_link_ksettings(struct net_device *dev,
1349                                   struct ethtool_link_ksettings *link_ksettings)
1350 {
1351         struct port_info *pi = netdev_priv(dev);
1352         struct ethtool_link_settings *base = &link_ksettings->base;
1353
1354         /* For the nonce, the Firmware doesn't send up Port State changes
1355          * when the Virtual Interface attached to the Port is down.  So
1356          * if it's down, let's grab any changes.
1357          */
1358         if (!netif_running(dev))
1359                 (void)t4vf_update_port_info(pi);
1360
1361         ethtool_link_ksettings_zero_link_mode(link_ksettings, supported);
1362         ethtool_link_ksettings_zero_link_mode(link_ksettings, advertising);
1363         ethtool_link_ksettings_zero_link_mode(link_ksettings, lp_advertising);
1364
1365         base->port = from_fw_port_mod_type(pi->port_type, pi->mod_type);
1366
1367         if (pi->mdio_addr >= 0) {
1368                 base->phy_address = pi->mdio_addr;
1369                 base->mdio_support = (pi->port_type == FW_PORT_TYPE_BT_SGMII
1370                                       ? ETH_MDIO_SUPPORTS_C22
1371                                       : ETH_MDIO_SUPPORTS_C45);
1372         } else {
1373                 base->phy_address = 255;
1374                 base->mdio_support = 0;
1375         }
1376
1377         fw_caps_to_lmm(pi->port_type, pi->link_cfg.pcaps,
1378                        link_ksettings->link_modes.supported);
1379         fw_caps_to_lmm(pi->port_type, pi->link_cfg.acaps,
1380                        link_ksettings->link_modes.advertising);
1381         fw_caps_to_lmm(pi->port_type, pi->link_cfg.lpacaps,
1382                        link_ksettings->link_modes.lp_advertising);
1383
1384         if (netif_carrier_ok(dev)) {
1385                 base->speed = pi->link_cfg.speed;
1386                 base->duplex = DUPLEX_FULL;
1387         } else {
1388                 base->speed = SPEED_UNKNOWN;
1389                 base->duplex = DUPLEX_UNKNOWN;
1390         }
1391
1392         base->autoneg = pi->link_cfg.autoneg;
1393         if (pi->link_cfg.pcaps & FW_PORT_CAP32_ANEG)
1394                 ethtool_link_ksettings_add_link_mode(link_ksettings,
1395                                                      supported, Autoneg);
1396         if (pi->link_cfg.autoneg)
1397                 ethtool_link_ksettings_add_link_mode(link_ksettings,
1398                                                      advertising, Autoneg);
1399
1400         return 0;
1401 }
1402
1403 /*
1404  * Return our driver information.
1405  */
1406 static void cxgb4vf_get_drvinfo(struct net_device *dev,
1407                                 struct ethtool_drvinfo *drvinfo)
1408 {
1409         struct adapter *adapter = netdev2adap(dev);
1410
1411         strlcpy(drvinfo->driver, KBUILD_MODNAME, sizeof(drvinfo->driver));
1412         strlcpy(drvinfo->version, DRV_VERSION, sizeof(drvinfo->version));
1413         strlcpy(drvinfo->bus_info, pci_name(to_pci_dev(dev->dev.parent)),
1414                 sizeof(drvinfo->bus_info));
1415         snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
1416                  "%u.%u.%u.%u, TP %u.%u.%u.%u",
1417                  FW_HDR_FW_VER_MAJOR_G(adapter->params.dev.fwrev),
1418                  FW_HDR_FW_VER_MINOR_G(adapter->params.dev.fwrev),
1419                  FW_HDR_FW_VER_MICRO_G(adapter->params.dev.fwrev),
1420                  FW_HDR_FW_VER_BUILD_G(adapter->params.dev.fwrev),
1421                  FW_HDR_FW_VER_MAJOR_G(adapter->params.dev.tprev),
1422                  FW_HDR_FW_VER_MINOR_G(adapter->params.dev.tprev),
1423                  FW_HDR_FW_VER_MICRO_G(adapter->params.dev.tprev),
1424                  FW_HDR_FW_VER_BUILD_G(adapter->params.dev.tprev));
1425 }
1426
1427 /*
1428  * Return current adapter message level.
1429  */
1430 static u32 cxgb4vf_get_msglevel(struct net_device *dev)
1431 {
1432         return netdev2adap(dev)->msg_enable;
1433 }
1434
1435 /*
1436  * Set current adapter message level.
1437  */
1438 static void cxgb4vf_set_msglevel(struct net_device *dev, u32 msglevel)
1439 {
1440         netdev2adap(dev)->msg_enable = msglevel;
1441 }
1442
1443 /*
1444  * Return the device's current Queue Set ring size parameters along with the
1445  * allowed maximum values.  Since ethtool doesn't understand the concept of
1446  * multi-queue devices, we just return the current values associated with the
1447  * first Queue Set.
1448  */
1449 static void cxgb4vf_get_ringparam(struct net_device *dev,
1450                                   struct ethtool_ringparam *rp)
1451 {
1452         const struct port_info *pi = netdev_priv(dev);
1453         const struct sge *s = &pi->adapter->sge;
1454
1455         rp->rx_max_pending = MAX_RX_BUFFERS;
1456         rp->rx_mini_max_pending = MAX_RSPQ_ENTRIES;
1457         rp->rx_jumbo_max_pending = 0;
1458         rp->tx_max_pending = MAX_TXQ_ENTRIES;
1459
1460         rp->rx_pending = s->ethrxq[pi->first_qset].fl.size - MIN_FL_RESID;
1461         rp->rx_mini_pending = s->ethrxq[pi->first_qset].rspq.size;
1462         rp->rx_jumbo_pending = 0;
1463         rp->tx_pending = s->ethtxq[pi->first_qset].q.size;
1464 }
1465
1466 /*
1467  * Set the Queue Set ring size parameters for the device.  Again, since
1468  * ethtool doesn't allow for the concept of multiple queues per device, we'll
1469  * apply these new values across all of the Queue Sets associated with the
1470  * device -- after vetting them of course!
1471  */
1472 static int cxgb4vf_set_ringparam(struct net_device *dev,
1473                                  struct ethtool_ringparam *rp)
1474 {
1475         const struct port_info *pi = netdev_priv(dev);
1476         struct adapter *adapter = pi->adapter;
1477         struct sge *s = &adapter->sge;
1478         int qs;
1479
1480         if (rp->rx_pending > MAX_RX_BUFFERS ||
1481             rp->rx_jumbo_pending ||
1482             rp->tx_pending > MAX_TXQ_ENTRIES ||
1483             rp->rx_mini_pending > MAX_RSPQ_ENTRIES ||
1484             rp->rx_mini_pending < MIN_RSPQ_ENTRIES ||
1485             rp->rx_pending < MIN_FL_ENTRIES ||
1486             rp->tx_pending < MIN_TXQ_ENTRIES)
1487                 return -EINVAL;
1488
1489         if (adapter->flags & FULL_INIT_DONE)
1490                 return -EBUSY;
1491
1492         for (qs = pi->first_qset; qs < pi->first_qset + pi->nqsets; qs++) {
1493                 s->ethrxq[qs].fl.size = rp->rx_pending + MIN_FL_RESID;
1494                 s->ethrxq[qs].rspq.size = rp->rx_mini_pending;
1495                 s->ethtxq[qs].q.size = rp->tx_pending;
1496         }
1497         return 0;
1498 }
1499
1500 /*
1501  * Return the interrupt holdoff timer and count for the first Queue Set on the
1502  * device.  Our extension ioctl() (the cxgbtool interface) allows the
1503  * interrupt holdoff timer to be read on all of the device's Queue Sets.
1504  */
1505 static int cxgb4vf_get_coalesce(struct net_device *dev,
1506                                 struct ethtool_coalesce *coalesce)
1507 {
1508         const struct port_info *pi = netdev_priv(dev);
1509         const struct adapter *adapter = pi->adapter;
1510         const struct sge_rspq *rspq = &adapter->sge.ethrxq[pi->first_qset].rspq;
1511
1512         coalesce->rx_coalesce_usecs = qtimer_val(adapter, rspq);
1513         coalesce->rx_max_coalesced_frames =
1514                 ((rspq->intr_params & QINTR_CNT_EN_F)
1515                  ? adapter->sge.counter_val[rspq->pktcnt_idx]
1516                  : 0);
1517         return 0;
1518 }
1519
1520 /*
1521  * Set the RX interrupt holdoff timer and count for the first Queue Set on the
1522  * interface.  Our extension ioctl() (the cxgbtool interface) allows us to set
1523  * the interrupt holdoff timer on any of the device's Queue Sets.
1524  */
1525 static int cxgb4vf_set_coalesce(struct net_device *dev,
1526                                 struct ethtool_coalesce *coalesce)
1527 {
1528         const struct port_info *pi = netdev_priv(dev);
1529         struct adapter *adapter = pi->adapter;
1530
1531         return set_rxq_intr_params(adapter,
1532                                    &adapter->sge.ethrxq[pi->first_qset].rspq,
1533                                    coalesce->rx_coalesce_usecs,
1534                                    coalesce->rx_max_coalesced_frames);
1535 }
1536
1537 /*
1538  * Report current port link pause parameter settings.
1539  */
1540 static void cxgb4vf_get_pauseparam(struct net_device *dev,
1541                                    struct ethtool_pauseparam *pauseparam)
1542 {
1543         struct port_info *pi = netdev_priv(dev);
1544
1545         pauseparam->autoneg = (pi->link_cfg.requested_fc & PAUSE_AUTONEG) != 0;
1546         pauseparam->rx_pause = (pi->link_cfg.fc & PAUSE_RX) != 0;
1547         pauseparam->tx_pause = (pi->link_cfg.fc & PAUSE_TX) != 0;
1548 }
1549
1550 /*
1551  * Identify the port by blinking the port's LED.
1552  */
1553 static int cxgb4vf_phys_id(struct net_device *dev,
1554                            enum ethtool_phys_id_state state)
1555 {
1556         unsigned int val;
1557         struct port_info *pi = netdev_priv(dev);
1558
1559         if (state == ETHTOOL_ID_ACTIVE)
1560                 val = 0xffff;
1561         else if (state == ETHTOOL_ID_INACTIVE)
1562                 val = 0;
1563         else
1564                 return -EINVAL;
1565
1566         return t4vf_identify_port(pi->adapter, pi->viid, val);
1567 }
1568
1569 /*
1570  * Port stats maintained per queue of the port.
1571  */
1572 struct queue_port_stats {
1573         u64 tso;
1574         u64 tx_csum;
1575         u64 rx_csum;
1576         u64 vlan_ex;
1577         u64 vlan_ins;
1578         u64 lro_pkts;
1579         u64 lro_merged;
1580 };
1581
1582 /*
1583  * Strings for the ETH_SS_STATS statistics set ("ethtool -S").  Note that
1584  * these need to match the order of statistics returned by
1585  * t4vf_get_port_stats().
1586  */
1587 static const char stats_strings[][ETH_GSTRING_LEN] = {
1588         /*
1589          * These must match the layout of the t4vf_port_stats structure.
1590          */
1591         "TxBroadcastBytes  ",
1592         "TxBroadcastFrames ",
1593         "TxMulticastBytes  ",
1594         "TxMulticastFrames ",
1595         "TxUnicastBytes    ",
1596         "TxUnicastFrames   ",
1597         "TxDroppedFrames   ",
1598         "TxOffloadBytes    ",
1599         "TxOffloadFrames   ",
1600         "RxBroadcastBytes  ",
1601         "RxBroadcastFrames ",
1602         "RxMulticastBytes  ",
1603         "RxMulticastFrames ",
1604         "RxUnicastBytes    ",
1605         "RxUnicastFrames   ",
1606         "RxErrorFrames     ",
1607
1608         /*
1609          * These are accumulated per-queue statistics and must match the
1610          * order of the fields in the queue_port_stats structure.
1611          */
1612         "TSO               ",
1613         "TxCsumOffload     ",
1614         "RxCsumGood        ",
1615         "VLANextractions   ",
1616         "VLANinsertions    ",
1617         "GROPackets        ",
1618         "GROMerged         ",
1619 };
1620
1621 /*
1622  * Return the number of statistics in the specified statistics set.
1623  */
1624 static int cxgb4vf_get_sset_count(struct net_device *dev, int sset)
1625 {
1626         switch (sset) {
1627         case ETH_SS_STATS:
1628                 return ARRAY_SIZE(stats_strings);
1629         default:
1630                 return -EOPNOTSUPP;
1631         }
1632         /*NOTREACHED*/
1633 }
1634
1635 /*
1636  * Return the strings for the specified statistics set.
1637  */
1638 static void cxgb4vf_get_strings(struct net_device *dev,
1639                                 u32 sset,
1640                                 u8 *data)
1641 {
1642         switch (sset) {
1643         case ETH_SS_STATS:
1644                 memcpy(data, stats_strings, sizeof(stats_strings));
1645                 break;
1646         }
1647 }
1648
1649 /*
1650  * Small utility routine to accumulate queue statistics across the queues of
1651  * a "port".
1652  */
1653 static void collect_sge_port_stats(const struct adapter *adapter,
1654                                    const struct port_info *pi,
1655                                    struct queue_port_stats *stats)
1656 {
1657         const struct sge_eth_txq *txq = &adapter->sge.ethtxq[pi->first_qset];
1658         const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[pi->first_qset];
1659         int qs;
1660
1661         memset(stats, 0, sizeof(*stats));
1662         for (qs = 0; qs < pi->nqsets; qs++, rxq++, txq++) {
1663                 stats->tso += txq->tso;
1664                 stats->tx_csum += txq->tx_cso;
1665                 stats->rx_csum += rxq->stats.rx_cso;
1666                 stats->vlan_ex += rxq->stats.vlan_ex;
1667                 stats->vlan_ins += txq->vlan_ins;
1668                 stats->lro_pkts += rxq->stats.lro_pkts;
1669                 stats->lro_merged += rxq->stats.lro_merged;
1670         }
1671 }
1672
1673 /*
1674  * Return the ETH_SS_STATS statistics set.
1675  */
1676 static void cxgb4vf_get_ethtool_stats(struct net_device *dev,
1677                                       struct ethtool_stats *stats,
1678                                       u64 *data)
1679 {
1680         struct port_info *pi = netdev2pinfo(dev);
1681         struct adapter *adapter = pi->adapter;
1682         int err = t4vf_get_port_stats(adapter, pi->pidx,
1683                                       (struct t4vf_port_stats *)data);
1684         if (err)
1685                 memset(data, 0, sizeof(struct t4vf_port_stats));
1686
1687         data += sizeof(struct t4vf_port_stats) / sizeof(u64);
1688         collect_sge_port_stats(adapter, pi, (struct queue_port_stats *)data);
1689 }
1690
1691 /*
1692  * Return the size of our register map.
1693  */
1694 static int cxgb4vf_get_regs_len(struct net_device *dev)
1695 {
1696         return T4VF_REGMAP_SIZE;
1697 }
1698
1699 /*
1700  * Dump a block of registers, start to end inclusive, into a buffer.
1701  */
1702 static void reg_block_dump(struct adapter *adapter, void *regbuf,
1703                            unsigned int start, unsigned int end)
1704 {
1705         u32 *bp = regbuf + start - T4VF_REGMAP_START;
1706
1707         for ( ; start <= end; start += sizeof(u32)) {
1708                 /*
1709                  * Avoid reading the Mailbox Control register since that
1710                  * can trigger a Mailbox Ownership Arbitration cycle and
1711                  * interfere with communication with the firmware.
1712                  */
1713                 if (start == T4VF_CIM_BASE_ADDR + CIM_VF_EXT_MAILBOX_CTRL)
1714                         *bp++ = 0xffff;
1715                 else
1716                         *bp++ = t4_read_reg(adapter, start);
1717         }
1718 }
1719
1720 /*
1721  * Copy our entire register map into the provided buffer.
1722  */
1723 static void cxgb4vf_get_regs(struct net_device *dev,
1724                              struct ethtool_regs *regs,
1725                              void *regbuf)
1726 {
1727         struct adapter *adapter = netdev2adap(dev);
1728
1729         regs->version = mk_adap_vers(adapter);
1730
1731         /*
1732          * Fill in register buffer with our register map.
1733          */
1734         memset(regbuf, 0, T4VF_REGMAP_SIZE);
1735
1736         reg_block_dump(adapter, regbuf,
1737                        T4VF_SGE_BASE_ADDR + T4VF_MOD_MAP_SGE_FIRST,
1738                        T4VF_SGE_BASE_ADDR + T4VF_MOD_MAP_SGE_LAST);
1739         reg_block_dump(adapter, regbuf,
1740                        T4VF_MPS_BASE_ADDR + T4VF_MOD_MAP_MPS_FIRST,
1741                        T4VF_MPS_BASE_ADDR + T4VF_MOD_MAP_MPS_LAST);
1742
1743         /* T5 adds new registers in the PL Register map.
1744          */
1745         reg_block_dump(adapter, regbuf,
1746                        T4VF_PL_BASE_ADDR + T4VF_MOD_MAP_PL_FIRST,
1747                        T4VF_PL_BASE_ADDR + (is_t4(adapter->params.chip)
1748                        ? PL_VF_WHOAMI_A : PL_VF_REVISION_A));
1749         reg_block_dump(adapter, regbuf,
1750                        T4VF_CIM_BASE_ADDR + T4VF_MOD_MAP_CIM_FIRST,
1751                        T4VF_CIM_BASE_ADDR + T4VF_MOD_MAP_CIM_LAST);
1752
1753         reg_block_dump(adapter, regbuf,
1754                        T4VF_MBDATA_BASE_ADDR + T4VF_MBDATA_FIRST,
1755                        T4VF_MBDATA_BASE_ADDR + T4VF_MBDATA_LAST);
1756 }
1757
1758 /*
1759  * Report current Wake On LAN settings.
1760  */
1761 static void cxgb4vf_get_wol(struct net_device *dev,
1762                             struct ethtool_wolinfo *wol)
1763 {
1764         wol->supported = 0;
1765         wol->wolopts = 0;
1766         memset(&wol->sopass, 0, sizeof(wol->sopass));
1767 }
1768
1769 /*
1770  * TCP Segmentation Offload flags which we support.
1771  */
1772 #define TSO_FLAGS (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN)
1773
1774 static const struct ethtool_ops cxgb4vf_ethtool_ops = {
1775         .get_link_ksettings     = cxgb4vf_get_link_ksettings,
1776         .get_drvinfo            = cxgb4vf_get_drvinfo,
1777         .get_msglevel           = cxgb4vf_get_msglevel,
1778         .set_msglevel           = cxgb4vf_set_msglevel,
1779         .get_ringparam          = cxgb4vf_get_ringparam,
1780         .set_ringparam          = cxgb4vf_set_ringparam,
1781         .get_coalesce           = cxgb4vf_get_coalesce,
1782         .set_coalesce           = cxgb4vf_set_coalesce,
1783         .get_pauseparam         = cxgb4vf_get_pauseparam,
1784         .get_link               = ethtool_op_get_link,
1785         .get_strings            = cxgb4vf_get_strings,
1786         .set_phys_id            = cxgb4vf_phys_id,
1787         .get_sset_count         = cxgb4vf_get_sset_count,
1788         .get_ethtool_stats      = cxgb4vf_get_ethtool_stats,
1789         .get_regs_len           = cxgb4vf_get_regs_len,
1790         .get_regs               = cxgb4vf_get_regs,
1791         .get_wol                = cxgb4vf_get_wol,
1792 };
1793
1794 /*
1795  * /sys/kernel/debug/cxgb4vf support code and data.
1796  * ================================================
1797  */
1798
1799 /*
1800  * Show Firmware Mailbox Command/Reply Log
1801  *
1802  * Note that we don't do any locking when dumping the Firmware Mailbox Log so
1803  * it's possible that we can catch things during a log update and therefore
1804  * see partially corrupted log entries.  But i9t's probably Good Enough(tm).
1805  * If we ever decide that we want to make sure that we're dumping a coherent
1806  * log, we'd need to perform locking in the mailbox logging and in
1807  * mboxlog_open() where we'd need to grab the entire mailbox log in one go
1808  * like we do for the Firmware Device Log.  But as stated above, meh ...
1809  */
1810 static int mboxlog_show(struct seq_file *seq, void *v)
1811 {
1812         struct adapter *adapter = seq->private;
1813         struct mbox_cmd_log *log = adapter->mbox_log;
1814         struct mbox_cmd *entry;
1815         int entry_idx, i;
1816
1817         if (v == SEQ_START_TOKEN) {
1818                 seq_printf(seq,
1819                            "%10s  %15s  %5s  %5s  %s\n",
1820                            "Seq#", "Tstamp", "Atime", "Etime",
1821                            "Command/Reply");
1822                 return 0;
1823         }
1824
1825         entry_idx = log->cursor + ((uintptr_t)v - 2);
1826         if (entry_idx >= log->size)
1827                 entry_idx -= log->size;
1828         entry = mbox_cmd_log_entry(log, entry_idx);
1829
1830         /* skip over unused entries */
1831         if (entry->timestamp == 0)
1832                 return 0;
1833
1834         seq_printf(seq, "%10u  %15llu  %5d  %5d",
1835                    entry->seqno, entry->timestamp,
1836                    entry->access, entry->execute);
1837         for (i = 0; i < MBOX_LEN / 8; i++) {
1838                 u64 flit = entry->cmd[i];
1839                 u32 hi = (u32)(flit >> 32);
1840                 u32 lo = (u32)flit;
1841
1842                 seq_printf(seq, "  %08x %08x", hi, lo);
1843         }
1844         seq_puts(seq, "\n");
1845         return 0;
1846 }
1847
1848 static inline void *mboxlog_get_idx(struct seq_file *seq, loff_t pos)
1849 {
1850         struct adapter *adapter = seq->private;
1851         struct mbox_cmd_log *log = adapter->mbox_log;
1852
1853         return ((pos <= log->size) ? (void *)(uintptr_t)(pos + 1) : NULL);
1854 }
1855
1856 static void *mboxlog_start(struct seq_file *seq, loff_t *pos)
1857 {
1858         return *pos ? mboxlog_get_idx(seq, *pos) : SEQ_START_TOKEN;
1859 }
1860
1861 static void *mboxlog_next(struct seq_file *seq, void *v, loff_t *pos)
1862 {
1863         ++*pos;
1864         return mboxlog_get_idx(seq, *pos);
1865 }
1866
1867 static void mboxlog_stop(struct seq_file *seq, void *v)
1868 {
1869 }
1870
1871 static const struct seq_operations mboxlog_seq_ops = {
1872         .start = mboxlog_start,
1873         .next  = mboxlog_next,
1874         .stop  = mboxlog_stop,
1875         .show  = mboxlog_show
1876 };
1877
1878 static int mboxlog_open(struct inode *inode, struct file *file)
1879 {
1880         int res = seq_open(file, &mboxlog_seq_ops);
1881
1882         if (!res) {
1883                 struct seq_file *seq = file->private_data;
1884
1885                 seq->private = inode->i_private;
1886         }
1887         return res;
1888 }
1889
1890 static const struct file_operations mboxlog_fops = {
1891         .owner   = THIS_MODULE,
1892         .open    = mboxlog_open,
1893         .read    = seq_read,
1894         .llseek  = seq_lseek,
1895         .release = seq_release,
1896 };
1897
1898 /*
1899  * Show SGE Queue Set information.  We display QPL Queues Sets per line.
1900  */
1901 #define QPL     4
1902
1903 static int sge_qinfo_show(struct seq_file *seq, void *v)
1904 {
1905         struct adapter *adapter = seq->private;
1906         int eth_entries = DIV_ROUND_UP(adapter->sge.ethqsets, QPL);
1907         int qs, r = (uintptr_t)v - 1;
1908
1909         if (r)
1910                 seq_putc(seq, '\n');
1911
1912         #define S3(fmt_spec, s, v) \
1913                 do {\
1914                         seq_printf(seq, "%-12s", s); \
1915                         for (qs = 0; qs < n; ++qs) \
1916                                 seq_printf(seq, " %16" fmt_spec, v); \
1917                         seq_putc(seq, '\n'); \
1918                 } while (0)
1919         #define S(s, v)         S3("s", s, v)
1920         #define T(s, v)         S3("u", s, txq[qs].v)
1921         #define R(s, v)         S3("u", s, rxq[qs].v)
1922
1923         if (r < eth_entries) {
1924                 const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[r * QPL];
1925                 const struct sge_eth_txq *txq = &adapter->sge.ethtxq[r * QPL];
1926                 int n = min(QPL, adapter->sge.ethqsets - QPL * r);
1927
1928                 S("QType:", "Ethernet");
1929                 S("Interface:",
1930                   (rxq[qs].rspq.netdev
1931                    ? rxq[qs].rspq.netdev->name
1932                    : "N/A"));
1933                 S3("d", "Port:",
1934                    (rxq[qs].rspq.netdev
1935                     ? ((struct port_info *)
1936                        netdev_priv(rxq[qs].rspq.netdev))->port_id
1937                     : -1));
1938                 T("TxQ ID:", q.abs_id);
1939                 T("TxQ size:", q.size);
1940                 T("TxQ inuse:", q.in_use);
1941                 T("TxQ PIdx:", q.pidx);
1942                 T("TxQ CIdx:", q.cidx);
1943                 R("RspQ ID:", rspq.abs_id);
1944                 R("RspQ size:", rspq.size);
1945                 R("RspQE size:", rspq.iqe_len);
1946                 S3("u", "Intr delay:", qtimer_val(adapter, &rxq[qs].rspq));
1947                 S3("u", "Intr pktcnt:",
1948                    adapter->sge.counter_val[rxq[qs].rspq.pktcnt_idx]);
1949                 R("RspQ CIdx:", rspq.cidx);
1950                 R("RspQ Gen:", rspq.gen);
1951                 R("FL ID:", fl.abs_id);
1952                 R("FL size:", fl.size - MIN_FL_RESID);
1953                 R("FL avail:", fl.avail);
1954                 R("FL PIdx:", fl.pidx);
1955                 R("FL CIdx:", fl.cidx);
1956                 return 0;
1957         }
1958
1959         r -= eth_entries;
1960         if (r == 0) {
1961                 const struct sge_rspq *evtq = &adapter->sge.fw_evtq;
1962
1963                 seq_printf(seq, "%-12s %16s\n", "QType:", "FW event queue");
1964                 seq_printf(seq, "%-12s %16u\n", "RspQ ID:", evtq->abs_id);
1965                 seq_printf(seq, "%-12s %16u\n", "Intr delay:",
1966                            qtimer_val(adapter, evtq));
1967                 seq_printf(seq, "%-12s %16u\n", "Intr pktcnt:",
1968                            adapter->sge.counter_val[evtq->pktcnt_idx]);
1969                 seq_printf(seq, "%-12s %16u\n", "RspQ Cidx:", evtq->cidx);
1970                 seq_printf(seq, "%-12s %16u\n", "RspQ Gen:", evtq->gen);
1971         } else if (r == 1) {
1972                 const struct sge_rspq *intrq = &adapter->sge.intrq;
1973
1974                 seq_printf(seq, "%-12s %16s\n", "QType:", "Interrupt Queue");
1975                 seq_printf(seq, "%-12s %16u\n", "RspQ ID:", intrq->abs_id);
1976                 seq_printf(seq, "%-12s %16u\n", "Intr delay:",
1977                            qtimer_val(adapter, intrq));
1978                 seq_printf(seq, "%-12s %16u\n", "Intr pktcnt:",
1979                            adapter->sge.counter_val[intrq->pktcnt_idx]);
1980                 seq_printf(seq, "%-12s %16u\n", "RspQ Cidx:", intrq->cidx);
1981                 seq_printf(seq, "%-12s %16u\n", "RspQ Gen:", intrq->gen);
1982         }
1983
1984         #undef R
1985         #undef T
1986         #undef S
1987         #undef S3
1988
1989         return 0;
1990 }
1991
1992 /*
1993  * Return the number of "entries" in our "file".  We group the multi-Queue
1994  * sections with QPL Queue Sets per "entry".  The sections of the output are:
1995  *
1996  *     Ethernet RX/TX Queue Sets
1997  *     Firmware Event Queue
1998  *     Forwarded Interrupt Queue (if in MSI mode)
1999  */
2000 static int sge_queue_entries(const struct adapter *adapter)
2001 {
2002         return DIV_ROUND_UP(adapter->sge.ethqsets, QPL) + 1 +
2003                 ((adapter->flags & USING_MSI) != 0);
2004 }
2005
2006 static void *sge_queue_start(struct seq_file *seq, loff_t *pos)
2007 {
2008         int entries = sge_queue_entries(seq->private);
2009
2010         return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
2011 }
2012
2013 static void sge_queue_stop(struct seq_file *seq, void *v)
2014 {
2015 }
2016
2017 static void *sge_queue_next(struct seq_file *seq, void *v, loff_t *pos)
2018 {
2019         int entries = sge_queue_entries(seq->private);
2020
2021         ++*pos;
2022         return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
2023 }
2024
2025 static const struct seq_operations sge_qinfo_seq_ops = {
2026         .start = sge_queue_start,
2027         .next  = sge_queue_next,
2028         .stop  = sge_queue_stop,
2029         .show  = sge_qinfo_show
2030 };
2031
2032 static int sge_qinfo_open(struct inode *inode, struct file *file)
2033 {
2034         int res = seq_open(file, &sge_qinfo_seq_ops);
2035
2036         if (!res) {
2037                 struct seq_file *seq = file->private_data;
2038                 seq->private = inode->i_private;
2039         }
2040         return res;
2041 }
2042
2043 static const struct file_operations sge_qinfo_debugfs_fops = {
2044         .owner   = THIS_MODULE,
2045         .open    = sge_qinfo_open,
2046         .read    = seq_read,
2047         .llseek  = seq_lseek,
2048         .release = seq_release,
2049 };
2050
2051 /*
2052  * Show SGE Queue Set statistics.  We display QPL Queues Sets per line.
2053  */
2054 #define QPL     4
2055
2056 static int sge_qstats_show(struct seq_file *seq, void *v)
2057 {
2058         struct adapter *adapter = seq->private;
2059         int eth_entries = DIV_ROUND_UP(adapter->sge.ethqsets, QPL);
2060         int qs, r = (uintptr_t)v - 1;
2061
2062         if (r)
2063                 seq_putc(seq, '\n');
2064
2065         #define S3(fmt, s, v) \
2066                 do { \
2067                         seq_printf(seq, "%-16s", s); \
2068                         for (qs = 0; qs < n; ++qs) \
2069                                 seq_printf(seq, " %8" fmt, v); \
2070                         seq_putc(seq, '\n'); \
2071                 } while (0)
2072         #define S(s, v)         S3("s", s, v)
2073
2074         #define T3(fmt, s, v)   S3(fmt, s, txq[qs].v)
2075         #define T(s, v)         T3("lu", s, v)
2076
2077         #define R3(fmt, s, v)   S3(fmt, s, rxq[qs].v)
2078         #define R(s, v)         R3("lu", s, v)
2079
2080         if (r < eth_entries) {
2081                 const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[r * QPL];
2082                 const struct sge_eth_txq *txq = &adapter->sge.ethtxq[r * QPL];
2083                 int n = min(QPL, adapter->sge.ethqsets - QPL * r);
2084
2085                 S("QType:", "Ethernet");
2086                 S("Interface:",
2087                   (rxq[qs].rspq.netdev
2088                    ? rxq[qs].rspq.netdev->name
2089                    : "N/A"));
2090                 R3("u", "RspQNullInts:", rspq.unhandled_irqs);
2091                 R("RxPackets:", stats.pkts);
2092                 R("RxCSO:", stats.rx_cso);
2093                 R("VLANxtract:", stats.vlan_ex);
2094                 R("LROmerged:", stats.lro_merged);
2095                 R("LROpackets:", stats.lro_pkts);
2096                 R("RxDrops:", stats.rx_drops);
2097                 T("TSO:", tso);
2098                 T("TxCSO:", tx_cso);
2099                 T("VLANins:", vlan_ins);
2100                 T("TxQFull:", q.stops);
2101                 T("TxQRestarts:", q.restarts);
2102                 T("TxMapErr:", mapping_err);
2103                 R("FLAllocErr:", fl.alloc_failed);
2104                 R("FLLrgAlcErr:", fl.large_alloc_failed);
2105                 R("FLStarving:", fl.starving);
2106                 return 0;
2107         }
2108
2109         r -= eth_entries;
2110         if (r == 0) {
2111                 const struct sge_rspq *evtq = &adapter->sge.fw_evtq;
2112
2113                 seq_printf(seq, "%-8s %16s\n", "QType:", "FW event queue");
2114                 seq_printf(seq, "%-16s %8u\n", "RspQNullInts:",
2115                            evtq->unhandled_irqs);
2116                 seq_printf(seq, "%-16s %8u\n", "RspQ CIdx:", evtq->cidx);
2117                 seq_printf(seq, "%-16s %8u\n", "RspQ Gen:", evtq->gen);
2118         } else if (r == 1) {
2119                 const struct sge_rspq *intrq = &adapter->sge.intrq;
2120
2121                 seq_printf(seq, "%-8s %16s\n", "QType:", "Interrupt Queue");
2122                 seq_printf(seq, "%-16s %8u\n", "RspQNullInts:",
2123                            intrq->unhandled_irqs);
2124                 seq_printf(seq, "%-16s %8u\n", "RspQ CIdx:", intrq->cidx);
2125                 seq_printf(seq, "%-16s %8u\n", "RspQ Gen:", intrq->gen);
2126         }
2127
2128         #undef R
2129         #undef T
2130         #undef S
2131         #undef R3
2132         #undef T3
2133         #undef S3
2134
2135         return 0;
2136 }
2137
2138 /*
2139  * Return the number of "entries" in our "file".  We group the multi-Queue
2140  * sections with QPL Queue Sets per "entry".  The sections of the output are:
2141  *
2142  *     Ethernet RX/TX Queue Sets
2143  *     Firmware Event Queue
2144  *     Forwarded Interrupt Queue (if in MSI mode)
2145  */
2146 static int sge_qstats_entries(const struct adapter *adapter)
2147 {
2148         return DIV_ROUND_UP(adapter->sge.ethqsets, QPL) + 1 +
2149                 ((adapter->flags & USING_MSI) != 0);
2150 }
2151
2152 static void *sge_qstats_start(struct seq_file *seq, loff_t *pos)
2153 {
2154         int entries = sge_qstats_entries(seq->private);
2155
2156         return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
2157 }
2158
2159 static void sge_qstats_stop(struct seq_file *seq, void *v)
2160 {
2161 }
2162
2163 static void *sge_qstats_next(struct seq_file *seq, void *v, loff_t *pos)
2164 {
2165         int entries = sge_qstats_entries(seq->private);
2166
2167         (*pos)++;
2168         return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
2169 }
2170
2171 static const struct seq_operations sge_qstats_seq_ops = {
2172         .start = sge_qstats_start,
2173         .next  = sge_qstats_next,
2174         .stop  = sge_qstats_stop,
2175         .show  = sge_qstats_show
2176 };
2177
2178 static int sge_qstats_open(struct inode *inode, struct file *file)
2179 {
2180         int res = seq_open(file, &sge_qstats_seq_ops);
2181
2182         if (res == 0) {
2183                 struct seq_file *seq = file->private_data;
2184                 seq->private = inode->i_private;
2185         }
2186         return res;
2187 }
2188
2189 static const struct file_operations sge_qstats_proc_fops = {
2190         .owner   = THIS_MODULE,
2191         .open    = sge_qstats_open,
2192         .read    = seq_read,
2193         .llseek  = seq_lseek,
2194         .release = seq_release,
2195 };
2196
2197 /*
2198  * Show PCI-E SR-IOV Virtual Function Resource Limits.
2199  */
2200 static int resources_show(struct seq_file *seq, void *v)
2201 {
2202         struct adapter *adapter = seq->private;
2203         struct vf_resources *vfres = &adapter->params.vfres;
2204
2205         #define S(desc, fmt, var) \
2206                 seq_printf(seq, "%-60s " fmt "\n", \
2207                            desc " (" #var "):", vfres->var)
2208
2209         S("Virtual Interfaces", "%d", nvi);
2210         S("Egress Queues", "%d", neq);
2211         S("Ethernet Control", "%d", nethctrl);
2212         S("Ingress Queues/w Free Lists/Interrupts", "%d", niqflint);
2213         S("Ingress Queues", "%d", niq);
2214         S("Traffic Class", "%d", tc);
2215         S("Port Access Rights Mask", "%#x", pmask);
2216         S("MAC Address Filters", "%d", nexactf);
2217         S("Firmware Command Read Capabilities", "%#x", r_caps);
2218         S("Firmware Command Write/Execute Capabilities", "%#x", wx_caps);
2219
2220         #undef S
2221
2222         return 0;
2223 }
2224
2225 static int resources_open(struct inode *inode, struct file *file)
2226 {
2227         return single_open(file, resources_show, inode->i_private);
2228 }
2229
2230 static const struct file_operations resources_proc_fops = {
2231         .owner   = THIS_MODULE,
2232         .open    = resources_open,
2233         .read    = seq_read,
2234         .llseek  = seq_lseek,
2235         .release = single_release,
2236 };
2237
2238 /*
2239  * Show Virtual Interfaces.
2240  */
2241 static int interfaces_show(struct seq_file *seq, void *v)
2242 {
2243         if (v == SEQ_START_TOKEN) {
2244                 seq_puts(seq, "Interface  Port   VIID\n");
2245         } else {
2246                 struct adapter *adapter = seq->private;
2247                 int pidx = (uintptr_t)v - 2;
2248                 struct net_device *dev = adapter->port[pidx];
2249                 struct port_info *pi = netdev_priv(dev);
2250
2251                 seq_printf(seq, "%9s  %4d  %#5x\n",
2252                            dev->name, pi->port_id, pi->viid);
2253         }
2254         return 0;
2255 }
2256
2257 static inline void *interfaces_get_idx(struct adapter *adapter, loff_t pos)
2258 {
2259         return pos <= adapter->params.nports
2260                 ? (void *)(uintptr_t)(pos + 1)
2261                 : NULL;
2262 }
2263
2264 static void *interfaces_start(struct seq_file *seq, loff_t *pos)
2265 {
2266         return *pos
2267                 ? interfaces_get_idx(seq->private, *pos)
2268                 : SEQ_START_TOKEN;
2269 }
2270
2271 static void *interfaces_next(struct seq_file *seq, void *v, loff_t *pos)
2272 {
2273         (*pos)++;
2274         return interfaces_get_idx(seq->private, *pos);
2275 }
2276
2277 static void interfaces_stop(struct seq_file *seq, void *v)
2278 {
2279 }
2280
2281 static const struct seq_operations interfaces_seq_ops = {
2282         .start = interfaces_start,
2283         .next  = interfaces_next,
2284         .stop  = interfaces_stop,
2285         .show  = interfaces_show
2286 };
2287
2288 static int interfaces_open(struct inode *inode, struct file *file)
2289 {
2290         int res = seq_open(file, &interfaces_seq_ops);
2291
2292         if (res == 0) {
2293                 struct seq_file *seq = file->private_data;
2294                 seq->private = inode->i_private;
2295         }
2296         return res;
2297 }
2298
2299 static const struct file_operations interfaces_proc_fops = {
2300         .owner   = THIS_MODULE,
2301         .open    = interfaces_open,
2302         .read    = seq_read,
2303         .llseek  = seq_lseek,
2304         .release = seq_release,
2305 };
2306
2307 /*
2308  * /sys/kernel/debugfs/cxgb4vf/ files list.
2309  */
2310 struct cxgb4vf_debugfs_entry {
2311         const char *name;               /* name of debugfs node */
2312         umode_t mode;                   /* file system mode */
2313         const struct file_operations *fops;
2314 };
2315
2316 static struct cxgb4vf_debugfs_entry debugfs_files[] = {
2317         { "mboxlog",    S_IRUGO, &mboxlog_fops },
2318         { "sge_qinfo",  S_IRUGO, &sge_qinfo_debugfs_fops },
2319         { "sge_qstats", S_IRUGO, &sge_qstats_proc_fops },
2320         { "resources",  S_IRUGO, &resources_proc_fops },
2321         { "interfaces", S_IRUGO, &interfaces_proc_fops },
2322 };
2323
2324 /*
2325  * Module and device initialization and cleanup code.
2326  * ==================================================
2327  */
2328
2329 /*
2330  * Set up out /sys/kernel/debug/cxgb4vf sub-nodes.  We assume that the
2331  * directory (debugfs_root) has already been set up.
2332  */
2333 static int setup_debugfs(struct adapter *adapter)
2334 {
2335         int i;
2336
2337         BUG_ON(IS_ERR_OR_NULL(adapter->debugfs_root));
2338
2339         /*
2340          * Debugfs support is best effort.
2341          */
2342         for (i = 0; i < ARRAY_SIZE(debugfs_files); i++)
2343                 (void)debugfs_create_file(debugfs_files[i].name,
2344                                   debugfs_files[i].mode,
2345                                   adapter->debugfs_root,
2346                                   (void *)adapter,
2347                                   debugfs_files[i].fops);
2348
2349         return 0;
2350 }
2351
2352 /*
2353  * Tear down the /sys/kernel/debug/cxgb4vf sub-nodes created above.  We leave
2354  * it to our caller to tear down the directory (debugfs_root).
2355  */
2356 static void cleanup_debugfs(struct adapter *adapter)
2357 {
2358         BUG_ON(IS_ERR_OR_NULL(adapter->debugfs_root));
2359
2360         /*
2361          * Unlike our sister routine cleanup_proc(), we don't need to remove
2362          * individual entries because a call will be made to
2363          * debugfs_remove_recursive().  We just need to clean up any ancillary
2364          * persistent state.
2365          */
2366         /* nothing to do */
2367 }
2368
2369 /* Figure out how many Ports and Queue Sets we can support.  This depends on
2370  * knowing our Virtual Function Resources and may be called a second time if
2371  * we fall back from MSI-X to MSI Interrupt Mode.
2372  */
2373 static void size_nports_qsets(struct adapter *adapter)
2374 {
2375         struct vf_resources *vfres = &adapter->params.vfres;
2376         unsigned int ethqsets, pmask_nports;
2377
2378         /* The number of "ports" which we support is equal to the number of
2379          * Virtual Interfaces with which we've been provisioned.
2380          */
2381         adapter->params.nports = vfres->nvi;
2382         if (adapter->params.nports > MAX_NPORTS) {
2383                 dev_warn(adapter->pdev_dev, "only using %d of %d maximum"
2384                          " allowed virtual interfaces\n", MAX_NPORTS,
2385                          adapter->params.nports);
2386                 adapter->params.nports = MAX_NPORTS;
2387         }
2388
2389         /* We may have been provisioned with more VIs than the number of
2390          * ports we're allowed to access (our Port Access Rights Mask).
2391          * This is obviously a configuration conflict but we don't want to
2392          * crash the kernel or anything silly just because of that.
2393          */
2394         pmask_nports = hweight32(adapter->params.vfres.pmask);
2395         if (pmask_nports < adapter->params.nports) {
2396                 dev_warn(adapter->pdev_dev, "only using %d of %d provisioned"
2397                          " virtual interfaces; limited by Port Access Rights"
2398                          " mask %#x\n", pmask_nports, adapter->params.nports,
2399                          adapter->params.vfres.pmask);
2400                 adapter->params.nports = pmask_nports;
2401         }
2402
2403         /* We need to reserve an Ingress Queue for the Asynchronous Firmware
2404          * Event Queue.  And if we're using MSI Interrupts, we'll also need to
2405          * reserve an Ingress Queue for a Forwarded Interrupts.
2406          *
2407          * The rest of the FL/Intr-capable ingress queues will be matched up
2408          * one-for-one with Ethernet/Control egress queues in order to form
2409          * "Queue Sets" which will be aportioned between the "ports".  For
2410          * each Queue Set, we'll need the ability to allocate two Egress
2411          * Contexts -- one for the Ingress Queue Free List and one for the TX
2412          * Ethernet Queue.
2413          *
2414          * Note that even if we're currently configured to use MSI-X
2415          * Interrupts (module variable msi == MSI_MSIX) we may get downgraded
2416          * to MSI Interrupts if we can't get enough MSI-X Interrupts.  If that
2417          * happens we'll need to adjust things later.
2418          */
2419         ethqsets = vfres->niqflint - 1 - (msi == MSI_MSI);
2420         if (vfres->nethctrl != ethqsets)
2421                 ethqsets = min(vfres->nethctrl, ethqsets);
2422         if (vfres->neq < ethqsets*2)
2423                 ethqsets = vfres->neq/2;
2424         if (ethqsets > MAX_ETH_QSETS)
2425                 ethqsets = MAX_ETH_QSETS;
2426         adapter->sge.max_ethqsets = ethqsets;
2427
2428         if (adapter->sge.max_ethqsets < adapter->params.nports) {
2429                 dev_warn(adapter->pdev_dev, "only using %d of %d available"
2430                          " virtual interfaces (too few Queue Sets)\n",
2431                          adapter->sge.max_ethqsets, adapter->params.nports);
2432                 adapter->params.nports = adapter->sge.max_ethqsets;
2433         }
2434 }
2435
2436 /*
2437  * Perform early "adapter" initialization.  This is where we discover what
2438  * adapter parameters we're going to be using and initialize basic adapter
2439  * hardware support.
2440  */
2441 static int adap_init0(struct adapter *adapter)
2442 {
2443         struct sge_params *sge_params = &adapter->params.sge;
2444         struct sge *s = &adapter->sge;
2445         int err;
2446         u32 param, val = 0;
2447
2448         /*
2449          * Some environments do not properly handle PCIE FLRs -- e.g. in Linux
2450          * 2.6.31 and later we can't call pci_reset_function() in order to
2451          * issue an FLR because of a self- deadlock on the device semaphore.
2452          * Meanwhile, the OS infrastructure doesn't issue FLRs in all the
2453          * cases where they're needed -- for instance, some versions of KVM
2454          * fail to reset "Assigned Devices" when the VM reboots.  Therefore we
2455          * use the firmware based reset in order to reset any per function
2456          * state.
2457          */
2458         err = t4vf_fw_reset(adapter);
2459         if (err < 0) {
2460                 dev_err(adapter->pdev_dev, "FW reset failed: err=%d\n", err);
2461                 return err;
2462         }
2463
2464         /*
2465          * Grab basic operational parameters.  These will predominantly have
2466          * been set up by the Physical Function Driver or will be hard coded
2467          * into the adapter.  We just have to live with them ...  Note that
2468          * we _must_ get our VPD parameters before our SGE parameters because
2469          * we need to know the adapter's core clock from the VPD in order to
2470          * properly decode the SGE Timer Values.
2471          */
2472         err = t4vf_get_dev_params(adapter);
2473         if (err) {
2474                 dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2475                         " device parameters: err=%d\n", err);
2476                 return err;
2477         }
2478         err = t4vf_get_vpd_params(adapter);
2479         if (err) {
2480                 dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2481                         " VPD parameters: err=%d\n", err);
2482                 return err;
2483         }
2484         err = t4vf_get_sge_params(adapter);
2485         if (err) {
2486                 dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2487                         " SGE parameters: err=%d\n", err);
2488                 return err;
2489         }
2490         err = t4vf_get_rss_glb_config(adapter);
2491         if (err) {
2492                 dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2493                         " RSS parameters: err=%d\n", err);
2494                 return err;
2495         }
2496         if (adapter->params.rss.mode !=
2497             FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL) {
2498                 dev_err(adapter->pdev_dev, "unable to operate with global RSS"
2499                         " mode %d\n", adapter->params.rss.mode);
2500                 return -EINVAL;
2501         }
2502         err = t4vf_sge_init(adapter);
2503         if (err) {
2504                 dev_err(adapter->pdev_dev, "unable to use adapter parameters:"
2505                         " err=%d\n", err);
2506                 return err;
2507         }
2508
2509         /* If we're running on newer firmware, let it know that we're
2510          * prepared to deal with encapsulated CPL messages.  Older
2511          * firmware won't understand this and we'll just get
2512          * unencapsulated messages ...
2513          */
2514         param = FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_PFVF) |
2515                 FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_PFVF_CPLFW4MSG_ENCAP);
2516         val = 1;
2517         (void) t4vf_set_params(adapter, 1, &param, &val);
2518
2519         /*
2520          * Retrieve our RX interrupt holdoff timer values and counter
2521          * threshold values from the SGE parameters.
2522          */
2523         s->timer_val[0] = core_ticks_to_us(adapter,
2524                 TIMERVALUE0_G(sge_params->sge_timer_value_0_and_1));
2525         s->timer_val[1] = core_ticks_to_us(adapter,
2526                 TIMERVALUE1_G(sge_params->sge_timer_value_0_and_1));
2527         s->timer_val[2] = core_ticks_to_us(adapter,
2528                 TIMERVALUE0_G(sge_params->sge_timer_value_2_and_3));
2529         s->timer_val[3] = core_ticks_to_us(adapter,
2530                 TIMERVALUE1_G(sge_params->sge_timer_value_2_and_3));
2531         s->timer_val[4] = core_ticks_to_us(adapter,
2532                 TIMERVALUE0_G(sge_params->sge_timer_value_4_and_5));
2533         s->timer_val[5] = core_ticks_to_us(adapter,
2534                 TIMERVALUE1_G(sge_params->sge_timer_value_4_and_5));
2535
2536         s->counter_val[0] = THRESHOLD_0_G(sge_params->sge_ingress_rx_threshold);
2537         s->counter_val[1] = THRESHOLD_1_G(sge_params->sge_ingress_rx_threshold);
2538         s->counter_val[2] = THRESHOLD_2_G(sge_params->sge_ingress_rx_threshold);
2539         s->counter_val[3] = THRESHOLD_3_G(sge_params->sge_ingress_rx_threshold);
2540
2541         /*
2542          * Grab our Virtual Interface resource allocation, extract the
2543          * features that we're interested in and do a bit of sanity testing on
2544          * what we discover.
2545          */
2546         err = t4vf_get_vfres(adapter);
2547         if (err) {
2548                 dev_err(adapter->pdev_dev, "unable to get virtual interface"
2549                         " resources: err=%d\n", err);
2550                 return err;
2551         }
2552
2553         /* Check for various parameter sanity issues */
2554         if (adapter->params.vfres.pmask == 0) {
2555                 dev_err(adapter->pdev_dev, "no port access configured\n"
2556                         "usable!\n");
2557                 return -EINVAL;
2558         }
2559         if (adapter->params.vfres.nvi == 0) {
2560                 dev_err(adapter->pdev_dev, "no virtual interfaces configured/"
2561                         "usable!\n");
2562                 return -EINVAL;
2563         }
2564
2565         /* Initialize nports and max_ethqsets now that we have our Virtual
2566          * Function Resources.
2567          */
2568         size_nports_qsets(adapter);
2569
2570         return 0;
2571 }
2572
2573 static inline void init_rspq(struct sge_rspq *rspq, u8 timer_idx,
2574                              u8 pkt_cnt_idx, unsigned int size,
2575                              unsigned int iqe_size)
2576 {
2577         rspq->intr_params = (QINTR_TIMER_IDX_V(timer_idx) |
2578                              (pkt_cnt_idx < SGE_NCOUNTERS ?
2579                               QINTR_CNT_EN_F : 0));
2580         rspq->pktcnt_idx = (pkt_cnt_idx < SGE_NCOUNTERS
2581                             ? pkt_cnt_idx
2582                             : 0);
2583         rspq->iqe_len = iqe_size;
2584         rspq->size = size;
2585 }
2586
2587 /*
2588  * Perform default configuration of DMA queues depending on the number and
2589  * type of ports we found and the number of available CPUs.  Most settings can
2590  * be modified by the admin via ethtool and cxgbtool prior to the adapter
2591  * being brought up for the first time.
2592  */
2593 static void cfg_queues(struct adapter *adapter)
2594 {
2595         struct sge *s = &adapter->sge;
2596         int q10g, n10g, qidx, pidx, qs;
2597         size_t iqe_size;
2598
2599         /*
2600          * We should not be called till we know how many Queue Sets we can
2601          * support.  In particular, this means that we need to know what kind
2602          * of interrupts we'll be using ...
2603          */
2604         BUG_ON((adapter->flags & (USING_MSIX|USING_MSI)) == 0);
2605
2606         /*
2607          * Count the number of 10GbE Virtual Interfaces that we have.
2608          */
2609         n10g = 0;
2610         for_each_port(adapter, pidx)
2611                 n10g += is_x_10g_port(&adap2pinfo(adapter, pidx)->link_cfg);
2612
2613         /*
2614          * We default to 1 queue per non-10G port and up to # of cores queues
2615          * per 10G port.
2616          */
2617         if (n10g == 0)
2618                 q10g = 0;
2619         else {
2620                 int n1g = (adapter->params.nports - n10g);
2621                 q10g = (adapter->sge.max_ethqsets - n1g) / n10g;
2622                 if (q10g > num_online_cpus())
2623                         q10g = num_online_cpus();
2624         }
2625
2626         /*
2627          * Allocate the "Queue Sets" to the various Virtual Interfaces.
2628          * The layout will be established in setup_sge_queues() when the
2629          * adapter is brough up for the first time.
2630          */
2631         qidx = 0;
2632         for_each_port(adapter, pidx) {
2633                 struct port_info *pi = adap2pinfo(adapter, pidx);
2634
2635                 pi->first_qset = qidx;
2636                 pi->nqsets = is_x_10g_port(&pi->link_cfg) ? q10g : 1;
2637                 qidx += pi->nqsets;
2638         }
2639         s->ethqsets = qidx;
2640
2641         /*
2642          * The Ingress Queue Entry Size for our various Response Queues needs
2643          * to be big enough to accommodate the largest message we can receive
2644          * from the chip/firmware; which is 64 bytes ...
2645          */
2646         iqe_size = 64;
2647
2648         /*
2649          * Set up default Queue Set parameters ...  Start off with the
2650          * shortest interrupt holdoff timer.
2651          */
2652         for (qs = 0; qs < s->max_ethqsets; qs++) {
2653                 struct sge_eth_rxq *rxq = &s->ethrxq[qs];
2654                 struct sge_eth_txq *txq = &s->ethtxq[qs];
2655
2656                 init_rspq(&rxq->rspq, 0, 0, 1024, iqe_size);
2657                 rxq->fl.size = 72;
2658                 txq->q.size = 1024;
2659         }
2660
2661         /*
2662          * The firmware event queue is used for link state changes and
2663          * notifications of TX DMA completions.
2664          */
2665         init_rspq(&s->fw_evtq, SGE_TIMER_RSTRT_CNTR, 0, 512, iqe_size);
2666
2667         /*
2668          * The forwarded interrupt queue is used when we're in MSI interrupt
2669          * mode.  In this mode all interrupts associated with RX queues will
2670          * be forwarded to a single queue which we'll associate with our MSI
2671          * interrupt vector.  The messages dropped in the forwarded interrupt
2672          * queue will indicate which ingress queue needs servicing ...  This
2673          * queue needs to be large enough to accommodate all of the ingress
2674          * queues which are forwarding their interrupt (+1 to prevent the PIDX
2675          * from equalling the CIDX if every ingress queue has an outstanding
2676          * interrupt).  The queue doesn't need to be any larger because no
2677          * ingress queue will ever have more than one outstanding interrupt at
2678          * any time ...
2679          */
2680         init_rspq(&s->intrq, SGE_TIMER_RSTRT_CNTR, 0, MSIX_ENTRIES + 1,
2681                   iqe_size);
2682 }
2683
2684 /*
2685  * Reduce the number of Ethernet queues across all ports to at most n.
2686  * n provides at least one queue per port.
2687  */
2688 static void reduce_ethqs(struct adapter *adapter, int n)
2689 {
2690         int i;
2691         struct port_info *pi;
2692
2693         /*
2694          * While we have too many active Ether Queue Sets, interate across the
2695          * "ports" and reduce their individual Queue Set allocations.
2696          */
2697         BUG_ON(n < adapter->params.nports);
2698         while (n < adapter->sge.ethqsets)
2699                 for_each_port(adapter, i) {
2700                         pi = adap2pinfo(adapter, i);
2701                         if (pi->nqsets > 1) {
2702                                 pi->nqsets--;
2703                                 adapter->sge.ethqsets--;
2704                                 if (adapter->sge.ethqsets <= n)
2705                                         break;
2706                         }
2707                 }
2708
2709         /*
2710          * Reassign the starting Queue Sets for each of the "ports" ...
2711          */
2712         n = 0;
2713         for_each_port(adapter, i) {
2714                 pi = adap2pinfo(adapter, i);
2715                 pi->first_qset = n;
2716                 n += pi->nqsets;
2717         }
2718 }
2719
2720 /*
2721  * We need to grab enough MSI-X vectors to cover our interrupt needs.  Ideally
2722  * we get a separate MSI-X vector for every "Queue Set" plus any extras we
2723  * need.  Minimally we need one for every Virtual Interface plus those needed
2724  * for our "extras".  Note that this process may lower the maximum number of
2725  * allowed Queue Sets ...
2726  */
2727 static int enable_msix(struct adapter *adapter)
2728 {
2729         int i, want, need, nqsets;
2730         struct msix_entry entries[MSIX_ENTRIES];
2731         struct sge *s = &adapter->sge;
2732
2733         for (i = 0; i < MSIX_ENTRIES; ++i)
2734                 entries[i].entry = i;
2735
2736         /*
2737          * We _want_ enough MSI-X interrupts to cover all of our "Queue Sets"
2738          * plus those needed for our "extras" (for example, the firmware
2739          * message queue).  We _need_ at least one "Queue Set" per Virtual
2740          * Interface plus those needed for our "extras".  So now we get to see
2741          * if the song is right ...
2742          */
2743         want = s->max_ethqsets + MSIX_EXTRAS;
2744         need = adapter->params.nports + MSIX_EXTRAS;
2745
2746         want = pci_enable_msix_range(adapter->pdev, entries, need, want);
2747         if (want < 0)
2748                 return want;
2749
2750         nqsets = want - MSIX_EXTRAS;
2751         if (nqsets < s->max_ethqsets) {
2752                 dev_warn(adapter->pdev_dev, "only enough MSI-X vectors"
2753                          " for %d Queue Sets\n", nqsets);
2754                 s->max_ethqsets = nqsets;
2755                 if (nqsets < s->ethqsets)
2756                         reduce_ethqs(adapter, nqsets);
2757         }
2758         for (i = 0; i < want; ++i)
2759                 adapter->msix_info[i].vec = entries[i].vector;
2760
2761         return 0;
2762 }
2763
2764 static const struct net_device_ops cxgb4vf_netdev_ops   = {
2765         .ndo_open               = cxgb4vf_open,
2766         .ndo_stop               = cxgb4vf_stop,
2767         .ndo_start_xmit         = t4vf_eth_xmit,
2768         .ndo_get_stats          = cxgb4vf_get_stats,
2769         .ndo_set_rx_mode        = cxgb4vf_set_rxmode,
2770         .ndo_set_mac_address    = cxgb4vf_set_mac_addr,
2771         .ndo_validate_addr      = eth_validate_addr,
2772         .ndo_do_ioctl           = cxgb4vf_do_ioctl,
2773         .ndo_change_mtu         = cxgb4vf_change_mtu,
2774         .ndo_fix_features       = cxgb4vf_fix_features,
2775         .ndo_set_features       = cxgb4vf_set_features,
2776 #ifdef CONFIG_NET_POLL_CONTROLLER
2777         .ndo_poll_controller    = cxgb4vf_poll_controller,
2778 #endif
2779 };
2780
2781 /*
2782  * "Probe" a device: initialize a device and construct all kernel and driver
2783  * state needed to manage the device.  This routine is called "init_one" in
2784  * the PF Driver ...
2785  */
2786 static int cxgb4vf_pci_probe(struct pci_dev *pdev,
2787                              const struct pci_device_id *ent)
2788 {
2789         int pci_using_dac;
2790         int err, pidx;
2791         unsigned int pmask;
2792         struct adapter *adapter;
2793         struct port_info *pi;
2794         struct net_device *netdev;
2795         unsigned int pf;
2796
2797         /*
2798          * Print our driver banner the first time we're called to initialize a
2799          * device.
2800          */
2801         pr_info_once("%s - version %s\n", DRV_DESC, DRV_VERSION);
2802
2803         /*
2804          * Initialize generic PCI device state.
2805          */
2806         err = pci_enable_device(pdev);
2807         if (err) {
2808                 dev_err(&pdev->dev, "cannot enable PCI device\n");
2809                 return err;
2810         }
2811
2812         /*
2813          * Reserve PCI resources for the device.  If we can't get them some
2814          * other driver may have already claimed the device ...
2815          */
2816         err = pci_request_regions(pdev, KBUILD_MODNAME);
2817         if (err) {
2818                 dev_err(&pdev->dev, "cannot obtain PCI resources\n");
2819                 goto err_disable_device;
2820         }
2821
2822         /*
2823          * Set up our DMA mask: try for 64-bit address masking first and
2824          * fall back to 32-bit if we can't get 64 bits ...
2825          */
2826         err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
2827         if (err == 0) {
2828                 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
2829                 if (err) {
2830                         dev_err(&pdev->dev, "unable to obtain 64-bit DMA for"
2831                                 " coherent allocations\n");
2832                         goto err_release_regions;
2833                 }
2834                 pci_using_dac = 1;
2835         } else {
2836                 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
2837                 if (err != 0) {
2838                         dev_err(&pdev->dev, "no usable DMA configuration\n");
2839                         goto err_release_regions;
2840                 }
2841                 pci_using_dac = 0;
2842         }
2843
2844         /*
2845          * Enable bus mastering for the device ...
2846          */
2847         pci_set_master(pdev);
2848
2849         /*
2850          * Allocate our adapter data structure and attach it to the device.
2851          */
2852         adapter = kzalloc(sizeof(*adapter), GFP_KERNEL);
2853         if (!adapter) {
2854                 err = -ENOMEM;
2855                 goto err_release_regions;
2856         }
2857         pci_set_drvdata(pdev, adapter);
2858         adapter->pdev = pdev;
2859         adapter->pdev_dev = &pdev->dev;
2860
2861         adapter->mbox_log = kzalloc(sizeof(*adapter->mbox_log) +
2862                                     (sizeof(struct mbox_cmd) *
2863                                      T4VF_OS_LOG_MBOX_CMDS),
2864                                     GFP_KERNEL);
2865         if (!adapter->mbox_log) {
2866                 err = -ENOMEM;
2867                 goto err_free_adapter;
2868         }
2869         adapter->mbox_log->size = T4VF_OS_LOG_MBOX_CMDS;
2870
2871         /*
2872          * Initialize SMP data synchronization resources.
2873          */
2874         spin_lock_init(&adapter->stats_lock);
2875         spin_lock_init(&adapter->mbox_lock);
2876         INIT_LIST_HEAD(&adapter->mlist.list);
2877
2878         /*
2879          * Map our I/O registers in BAR0.
2880          */
2881         adapter->regs = pci_ioremap_bar(pdev, 0);
2882         if (!adapter->regs) {
2883                 dev_err(&pdev->dev, "cannot map device registers\n");
2884                 err = -ENOMEM;
2885                 goto err_free_adapter;
2886         }
2887
2888         /* Wait for the device to become ready before proceeding ...
2889          */
2890         err = t4vf_prep_adapter(adapter);
2891         if (err) {
2892                 dev_err(adapter->pdev_dev, "device didn't become ready:"
2893                         " err=%d\n", err);
2894                 goto err_unmap_bar0;
2895         }
2896
2897         /* For T5 and later we want to use the new BAR-based User Doorbells,
2898          * so we need to map BAR2 here ...
2899          */
2900         if (!is_t4(adapter->params.chip)) {
2901                 adapter->bar2 = ioremap_wc(pci_resource_start(pdev, 2),
2902                                            pci_resource_len(pdev, 2));
2903                 if (!adapter->bar2) {
2904                         dev_err(adapter->pdev_dev, "cannot map BAR2 doorbells\n");
2905                         err = -ENOMEM;
2906                         goto err_unmap_bar0;
2907                 }
2908         }
2909         /*
2910          * Initialize adapter level features.
2911          */
2912         adapter->name = pci_name(pdev);
2913         adapter->msg_enable = DFLT_MSG_ENABLE;
2914
2915         /* If possible, we use PCIe Relaxed Ordering Attribute to deliver
2916          * Ingress Packet Data to Free List Buffers in order to allow for
2917          * chipset performance optimizations between the Root Complex and
2918          * Memory Controllers.  (Messages to the associated Ingress Queue
2919          * notifying new Packet Placement in the Free Lists Buffers will be
2920          * send without the Relaxed Ordering Attribute thus guaranteeing that
2921          * all preceding PCIe Transaction Layer Packets will be processed
2922          * first.)  But some Root Complexes have various issues with Upstream
2923          * Transaction Layer Packets with the Relaxed Ordering Attribute set.
2924          * The PCIe devices which under the Root Complexes will be cleared the
2925          * Relaxed Ordering bit in the configuration space, So we check our
2926          * PCIe configuration space to see if it's flagged with advice against
2927          * using Relaxed Ordering.
2928          */
2929         if (!pcie_relaxed_ordering_enabled(pdev))
2930                 adapter->flags |= ROOT_NO_RELAXED_ORDERING;
2931
2932         err = adap_init0(adapter);
2933         if (err)
2934                 goto err_unmap_bar;
2935
2936         /* Initialize hash mac addr list */
2937         INIT_LIST_HEAD(&adapter->mac_hlist);
2938
2939         /*
2940          * Allocate our "adapter ports" and stitch everything together.
2941          */
2942         pmask = adapter->params.vfres.pmask;
2943         pf = t4vf_get_pf_from_vf(adapter);
2944         for_each_port(adapter, pidx) {
2945                 int port_id, viid;
2946                 u8 mac[ETH_ALEN];
2947                 unsigned int naddr = 1;
2948
2949                 /*
2950                  * We simplistically allocate our virtual interfaces
2951                  * sequentially across the port numbers to which we have
2952                  * access rights.  This should be configurable in some manner
2953                  * ...
2954                  */
2955                 if (pmask == 0)
2956                         break;
2957                 port_id = ffs(pmask) - 1;
2958                 pmask &= ~(1 << port_id);
2959                 viid = t4vf_alloc_vi(adapter, port_id);
2960                 if (viid < 0) {
2961                         dev_err(&pdev->dev, "cannot allocate VI for port %d:"
2962                                 " err=%d\n", port_id, viid);
2963                         err = viid;
2964                         goto err_free_dev;
2965                 }
2966
2967                 /*
2968                  * Allocate our network device and stitch things together.
2969                  */
2970                 netdev = alloc_etherdev_mq(sizeof(struct port_info),
2971                                            MAX_PORT_QSETS);
2972                 if (netdev == NULL) {
2973                         t4vf_free_vi(adapter, viid);
2974                         err = -ENOMEM;
2975                         goto err_free_dev;
2976                 }
2977                 adapter->port[pidx] = netdev;
2978                 SET_NETDEV_DEV(netdev, &pdev->dev);
2979                 pi = netdev_priv(netdev);
2980                 pi->adapter = adapter;
2981                 pi->pidx = pidx;
2982                 pi->port_id = port_id;
2983                 pi->viid = viid;
2984
2985                 /*
2986                  * Initialize the starting state of our "port" and register
2987                  * it.
2988                  */
2989                 pi->xact_addr_filt = -1;
2990                 netif_carrier_off(netdev);
2991                 netdev->irq = pdev->irq;
2992
2993                 netdev->hw_features = NETIF_F_SG | TSO_FLAGS |
2994                         NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
2995                         NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_RXCSUM;
2996                 netdev->vlan_features = NETIF_F_SG | TSO_FLAGS |
2997                         NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
2998                         NETIF_F_HIGHDMA;
2999                 netdev->features = netdev->hw_features |
3000                                    NETIF_F_HW_VLAN_CTAG_TX;
3001                 if (pci_using_dac)
3002                         netdev->features |= NETIF_F_HIGHDMA;
3003
3004                 netdev->priv_flags |= IFF_UNICAST_FLT;
3005                 netdev->min_mtu = 81;
3006                 netdev->max_mtu = ETH_MAX_MTU;
3007
3008                 netdev->netdev_ops = &cxgb4vf_netdev_ops;
3009                 netdev->ethtool_ops = &cxgb4vf_ethtool_ops;
3010                 netdev->dev_port = pi->port_id;
3011
3012                 /*
3013                  * Initialize the hardware/software state for the port.
3014                  */
3015                 err = t4vf_port_init(adapter, pidx);
3016                 if (err) {
3017                         dev_err(&pdev->dev, "cannot initialize port %d\n",
3018                                 pidx);
3019                         goto err_free_dev;
3020                 }
3021
3022                 err = t4vf_get_vf_mac_acl(adapter, pf, &naddr, mac);
3023                 if (err) {
3024                         dev_err(&pdev->dev,
3025                                 "unable to determine MAC ACL address, "
3026                                 "continuing anyway.. (status %d)\n", err);
3027                 } else if (naddr && adapter->params.vfres.nvi == 1) {
3028                         struct sockaddr addr;
3029
3030                         ether_addr_copy(addr.sa_data, mac);
3031                         err = cxgb4vf_set_mac_addr(netdev, &addr);
3032                         if (err) {
3033                                 dev_err(&pdev->dev,
3034                                         "unable to set MAC address %pM\n",
3035                                         mac);
3036                                 goto err_free_dev;
3037                         }
3038                         dev_info(&pdev->dev,
3039                                  "Using assigned MAC ACL: %pM\n", mac);
3040                 }
3041         }
3042
3043         /* See what interrupts we'll be using.  If we've been configured to
3044          * use MSI-X interrupts, try to enable them but fall back to using
3045          * MSI interrupts if we can't enable MSI-X interrupts.  If we can't
3046          * get MSI interrupts we bail with the error.
3047          */
3048         if (msi == MSI_MSIX && enable_msix(adapter) == 0)
3049                 adapter->flags |= USING_MSIX;
3050         else {
3051                 if (msi == MSI_MSIX) {
3052                         dev_info(adapter->pdev_dev,
3053                                  "Unable to use MSI-X Interrupts; falling "
3054                                  "back to MSI Interrupts\n");
3055
3056                         /* We're going to need a Forwarded Interrupt Queue so
3057                          * that may cut into how many Queue Sets we can
3058                          * support.
3059                          */
3060                         msi = MSI_MSI;
3061                         size_nports_qsets(adapter);
3062                 }
3063                 err = pci_enable_msi(pdev);
3064                 if (err) {
3065                         dev_err(&pdev->dev, "Unable to allocate MSI Interrupts;"
3066                                 " err=%d\n", err);
3067                         goto err_free_dev;
3068                 }
3069                 adapter->flags |= USING_MSI;
3070         }
3071
3072         /* Now that we know how many "ports" we have and what interrupt
3073          * mechanism we're going to use, we can configure our queue resources.
3074          */
3075         cfg_queues(adapter);
3076
3077         /*
3078          * The "card" is now ready to go.  If any errors occur during device
3079          * registration we do not fail the whole "card" but rather proceed
3080          * only with the ports we manage to register successfully.  However we
3081          * must register at least one net device.
3082          */
3083         for_each_port(adapter, pidx) {
3084                 struct port_info *pi = netdev_priv(adapter->port[pidx]);
3085                 netdev = adapter->port[pidx];
3086                 if (netdev == NULL)
3087                         continue;
3088
3089                 netif_set_real_num_tx_queues(netdev, pi->nqsets);
3090                 netif_set_real_num_rx_queues(netdev, pi->nqsets);
3091
3092                 err = register_netdev(netdev);
3093                 if (err) {
3094                         dev_warn(&pdev->dev, "cannot register net device %s,"
3095                                  " skipping\n", netdev->name);
3096                         continue;
3097                 }
3098
3099                 set_bit(pidx, &adapter->registered_device_map);
3100         }
3101         if (adapter->registered_device_map == 0) {
3102                 dev_err(&pdev->dev, "could not register any net devices\n");
3103                 goto err_disable_interrupts;
3104         }
3105
3106         /*
3107          * Set up our debugfs entries.
3108          */
3109         if (!IS_ERR_OR_NULL(cxgb4vf_debugfs_root)) {
3110                 adapter->debugfs_root =
3111                         debugfs_create_dir(pci_name(pdev),
3112                                            cxgb4vf_debugfs_root);
3113                 if (IS_ERR_OR_NULL(adapter->debugfs_root))
3114                         dev_warn(&pdev->dev, "could not create debugfs"
3115                                  " directory");
3116                 else
3117                         setup_debugfs(adapter);
3118         }
3119
3120         /*
3121          * Print a short notice on the existence and configuration of the new
3122          * VF network device ...
3123          */
3124         for_each_port(adapter, pidx) {
3125                 dev_info(adapter->pdev_dev, "%s: Chelsio VF NIC PCIe %s\n",
3126                          adapter->port[pidx]->name,
3127                          (adapter->flags & USING_MSIX) ? "MSI-X" :
3128                          (adapter->flags & USING_MSI)  ? "MSI" : "");
3129         }
3130
3131         /*
3132          * Return success!
3133          */
3134         return 0;
3135
3136         /*
3137          * Error recovery and exit code.  Unwind state that's been created
3138          * so far and return the error.
3139          */
3140 err_disable_interrupts:
3141         if (adapter->flags & USING_MSIX) {
3142                 pci_disable_msix(adapter->pdev);
3143                 adapter->flags &= ~USING_MSIX;
3144         } else if (adapter->flags & USING_MSI) {
3145                 pci_disable_msi(adapter->pdev);
3146                 adapter->flags &= ~USING_MSI;
3147         }
3148
3149 err_free_dev:
3150         for_each_port(adapter, pidx) {
3151                 netdev = adapter->port[pidx];
3152                 if (netdev == NULL)
3153                         continue;
3154                 pi = netdev_priv(netdev);
3155                 t4vf_free_vi(adapter, pi->viid);
3156                 if (test_bit(pidx, &adapter->registered_device_map))
3157                         unregister_netdev(netdev);
3158                 free_netdev(netdev);
3159         }
3160
3161 err_unmap_bar:
3162         if (!is_t4(adapter->params.chip))
3163                 iounmap(adapter->bar2);
3164
3165 err_unmap_bar0:
3166         iounmap(adapter->regs);
3167
3168 err_free_adapter:
3169         kfree(adapter->mbox_log);
3170         kfree(adapter);
3171
3172 err_release_regions:
3173         pci_release_regions(pdev);
3174         pci_clear_master(pdev);
3175
3176 err_disable_device:
3177         pci_disable_device(pdev);
3178
3179         return err;
3180 }
3181
3182 /*
3183  * "Remove" a device: tear down all kernel and driver state created in the
3184  * "probe" routine and quiesce the device (disable interrupts, etc.).  (Note
3185  * that this is called "remove_one" in the PF Driver.)
3186  */
3187 static void cxgb4vf_pci_remove(struct pci_dev *pdev)
3188 {
3189         struct adapter *adapter = pci_get_drvdata(pdev);
3190
3191         /*
3192          * Tear down driver state associated with device.
3193          */
3194         if (adapter) {
3195                 int pidx;
3196
3197                 /*
3198                  * Stop all of our activity.  Unregister network port,
3199                  * disable interrupts, etc.
3200                  */
3201                 for_each_port(adapter, pidx)
3202                         if (test_bit(pidx, &adapter->registered_device_map))
3203                                 unregister_netdev(adapter->port[pidx]);
3204                 t4vf_sge_stop(adapter);
3205                 if (adapter->flags & USING_MSIX) {
3206                         pci_disable_msix(adapter->pdev);
3207                         adapter->flags &= ~USING_MSIX;
3208                 } else if (adapter->flags & USING_MSI) {
3209                         pci_disable_msi(adapter->pdev);
3210                         adapter->flags &= ~USING_MSI;
3211                 }
3212
3213                 /*
3214                  * Tear down our debugfs entries.
3215                  */
3216                 if (!IS_ERR_OR_NULL(adapter->debugfs_root)) {
3217                         cleanup_debugfs(adapter);
3218                         debugfs_remove_recursive(adapter->debugfs_root);
3219                 }
3220
3221                 /*
3222                  * Free all of the various resources which we've acquired ...
3223                  */
3224                 t4vf_free_sge_resources(adapter);
3225                 for_each_port(adapter, pidx) {
3226                         struct net_device *netdev = adapter->port[pidx];
3227                         struct port_info *pi;
3228
3229                         if (netdev == NULL)
3230                                 continue;
3231
3232                         pi = netdev_priv(netdev);
3233                         t4vf_free_vi(adapter, pi->viid);
3234                         free_netdev(netdev);
3235                 }
3236                 iounmap(adapter->regs);
3237                 if (!is_t4(adapter->params.chip))
3238                         iounmap(adapter->bar2);
3239                 kfree(adapter->mbox_log);
3240                 kfree(adapter);
3241         }
3242
3243         /*
3244          * Disable the device and release its PCI resources.
3245          */
3246         pci_disable_device(pdev);
3247         pci_clear_master(pdev);
3248         pci_release_regions(pdev);
3249 }
3250
3251 /*
3252  * "Shutdown" quiesce the device, stopping Ingress Packet and Interrupt
3253  * delivery.
3254  */
3255 static void cxgb4vf_pci_shutdown(struct pci_dev *pdev)
3256 {
3257         struct adapter *adapter;
3258         int pidx;
3259
3260         adapter = pci_get_drvdata(pdev);
3261         if (!adapter)
3262                 return;
3263
3264         /* Disable all Virtual Interfaces.  This will shut down the
3265          * delivery of all ingress packets into the chip for these
3266          * Virtual Interfaces.
3267          */
3268         for_each_port(adapter, pidx)
3269                 if (test_bit(pidx, &adapter->registered_device_map))
3270                         unregister_netdev(adapter->port[pidx]);
3271
3272         /* Free up all Queues which will prevent further DMA and
3273          * Interrupts allowing various internal pathways to drain.
3274          */
3275         t4vf_sge_stop(adapter);
3276         if (adapter->flags & USING_MSIX) {
3277                 pci_disable_msix(adapter->pdev);
3278                 adapter->flags &= ~USING_MSIX;
3279         } else if (adapter->flags & USING_MSI) {
3280                 pci_disable_msi(adapter->pdev);
3281                 adapter->flags &= ~USING_MSI;
3282         }
3283
3284         /*
3285          * Free up all Queues which will prevent further DMA and
3286          * Interrupts allowing various internal pathways to drain.
3287          */
3288         t4vf_free_sge_resources(adapter);
3289         pci_set_drvdata(pdev, NULL);
3290 }
3291
3292 /* Macros needed to support the PCI Device ID Table ...
3293  */
3294 #define CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN \
3295         static const struct pci_device_id cxgb4vf_pci_tbl[] = {
3296 #define CH_PCI_DEVICE_ID_FUNCTION       0x8
3297
3298 #define CH_PCI_ID_TABLE_ENTRY(devid) \
3299                 { PCI_VDEVICE(CHELSIO, (devid)), 0 }
3300
3301 #define CH_PCI_DEVICE_ID_TABLE_DEFINE_END { 0, } }
3302
3303 #include "../cxgb4/t4_pci_id_tbl.h"
3304
3305 MODULE_DESCRIPTION(DRV_DESC);
3306 MODULE_AUTHOR("Chelsio Communications");
3307 MODULE_LICENSE("Dual BSD/GPL");
3308 MODULE_VERSION(DRV_VERSION);
3309 MODULE_DEVICE_TABLE(pci, cxgb4vf_pci_tbl);
3310
3311 static struct pci_driver cxgb4vf_driver = {
3312         .name           = KBUILD_MODNAME,
3313         .id_table       = cxgb4vf_pci_tbl,
3314         .probe          = cxgb4vf_pci_probe,
3315         .remove         = cxgb4vf_pci_remove,
3316         .shutdown       = cxgb4vf_pci_shutdown,
3317 };
3318
3319 /*
3320  * Initialize global driver state.
3321  */
3322 static int __init cxgb4vf_module_init(void)
3323 {
3324         int ret;
3325
3326         /*
3327          * Vet our module parameters.
3328          */
3329         if (msi != MSI_MSIX && msi != MSI_MSI) {
3330                 pr_warn("bad module parameter msi=%d; must be %d (MSI-X or MSI) or %d (MSI)\n",
3331                         msi, MSI_MSIX, MSI_MSI);
3332                 return -EINVAL;
3333         }
3334
3335         /* Debugfs support is optional, just warn if this fails */
3336         cxgb4vf_debugfs_root = debugfs_create_dir(KBUILD_MODNAME, NULL);
3337         if (IS_ERR_OR_NULL(cxgb4vf_debugfs_root))
3338                 pr_warn("could not create debugfs entry, continuing\n");
3339
3340         ret = pci_register_driver(&cxgb4vf_driver);
3341         if (ret < 0 && !IS_ERR_OR_NULL(cxgb4vf_debugfs_root))
3342                 debugfs_remove(cxgb4vf_debugfs_root);
3343         return ret;
3344 }
3345
3346 /*
3347  * Tear down global driver state.
3348  */
3349 static void __exit cxgb4vf_module_exit(void)
3350 {
3351         pci_unregister_driver(&cxgb4vf_driver);
3352         debugfs_remove(cxgb4vf_debugfs_root);
3353 }
3354
3355 module_init(cxgb4vf_module_init);
3356 module_exit(cxgb4vf_module_exit);