drivers/net/ethernet/sun/sunvnet_common.c

   1 /* sunvnet.c: Sun LDOM Virtual Network Driver.
   2  *
   3  * Copyright (C) 2007, 2008 David S. Miller <davem@davemloft.net>
   4  * Copyright (C) 2016-2017 Oracle. All rights reserved.
   5  */
   6
   7 #include <linux/module.h>
   8 #include <linux/kernel.h>
   9 #include <linux/types.h>
  10 #include <linux/slab.h>
  11 #include <linux/delay.h>
  12 #include <linux/init.h>
  13 #include <linux/netdevice.h>
  14 #include <linux/ethtool.h>
  15 #include <linux/etherdevice.h>
  16 #include <linux/mutex.h>
  17 #include <linux/highmem.h>
  18 #include <linux/if_vlan.h>
  19 #define CREATE_TRACE_POINTS
  20 #include <trace/events/sunvnet.h>
  21
  22 #if IS_ENABLED(CONFIG_IPV6)
  23 #include <linux/icmpv6.h>
  24 #endif
  25
  26 #include <net/ip.h>
  27 #include <net/icmp.h>
  28 #include <net/route.h>
  29
  30 #include <asm/vio.h>
  31 #include <asm/ldc.h>
  32
  33 #include "sunvnet_common.h"
  34
  35 /* Heuristic for the number of times to exponentially backoff and
  36  * retry sending an LDC trigger when EAGAIN is encountered
  37  */
  38 #define VNET_MAX_RETRIES        10
  39
  40 MODULE_AUTHOR("David S. Miller (davem@davemloft.net)");
  41 MODULE_DESCRIPTION("Sun LDOM virtual network support library");
  42 MODULE_LICENSE("GPL");
  43 MODULE_VERSION("1.1");
  44
  45 static int __vnet_tx_trigger(struct vnet_port *port, u32 start);
  46
  47 static inline u32 vnet_tx_dring_avail(struct vio_dring_state *dr)
  48 {
  49         return vio_dring_avail(dr, VNET_TX_RING_SIZE);
  50 }
  51
  52 static int vnet_handle_unknown(struct vnet_port *port, void *arg)
  53 {
  54         struct vio_msg_tag *pkt = arg;
  55
  56         pr_err("Received unknown msg [%02x:%02x:%04x:%08x]\n",
  57                pkt->type, pkt->stype, pkt->stype_env, pkt->sid);
  58         pr_err("Resetting connection\n");
  59
  60         ldc_disconnect(port->vio.lp);
  61
  62         return -ECONNRESET;
  63 }
  64
  65 static int vnet_port_alloc_tx_ring(struct vnet_port *port);
  66
  67 int sunvnet_send_attr_common(struct vio_driver_state *vio)
  68 {
  69         struct vnet_port *port = to_vnet_port(vio);
  70         struct net_device *dev = VNET_PORT_TO_NET_DEVICE(port);
  71         struct vio_net_attr_info pkt;
  72         int framelen = ETH_FRAME_LEN;
  73         int i, err;
  74
  75         err = vnet_port_alloc_tx_ring(to_vnet_port(vio));
  76         if (err)
  77                 return err;
  78
  79         memset(&pkt, 0, sizeof(pkt));
  80         pkt.tag.type = VIO_TYPE_CTRL;
  81         pkt.tag.stype = VIO_SUBTYPE_INFO;
  82         pkt.tag.stype_env = VIO_ATTR_INFO;
  83         pkt.tag.sid = vio_send_sid(vio);
  84         if (vio_version_before(vio, 1, 2))
  85                 pkt.xfer_mode = VIO_DRING_MODE;
  86         else
  87                 pkt.xfer_mode = VIO_NEW_DRING_MODE;
  88         pkt.addr_type = VNET_ADDR_ETHERMAC;
  89         pkt.ack_freq = 0;
  90         for (i = 0; i < 6; i++)
  91                 pkt.addr |= (u64)dev->dev_addr[i] << ((5 - i) * 8);
  92         if (vio_version_after(vio, 1, 3)) {
  93                 if (port->rmtu) {
  94                         port->rmtu = min(VNET_MAXPACKET, port->rmtu);
  95                         pkt.mtu = port->rmtu;
  96                 } else {
  97                         port->rmtu = VNET_MAXPACKET;
  98                         pkt.mtu = port->rmtu;
  99                 }
 100                 if (vio_version_after_eq(vio, 1, 6))
 101                         pkt.options = VIO_TX_DRING;
 102         } else if (vio_version_before(vio, 1, 3)) {
 103                 pkt.mtu = framelen;
 104         } else { /* v1.3 */
 105                 pkt.mtu = framelen + VLAN_HLEN;
 106         }
 107
 108         pkt.cflags = 0;
 109         if (vio_version_after_eq(vio, 1, 7) && port->tso) {
 110                 pkt.cflags |= VNET_LSO_IPV4_CAPAB;
 111                 if (!port->tsolen)
 112                         port->tsolen = VNET_MAXTSO;
 113                 pkt.ipv4_lso_maxlen = port->tsolen;
 114         }
 115
 116         pkt.plnk_updt = PHYSLINK_UPDATE_NONE;
 117
 118         viodbg(HS, "SEND NET ATTR xmode[0x%x] atype[0x%x] addr[%llx] "
 119                "ackfreq[%u] plnk_updt[0x%02x] opts[0x%02x] mtu[%llu] "
 120                "cflags[0x%04x] lso_max[%u]\n",
 121                pkt.xfer_mode, pkt.addr_type,
 122                (unsigned long long)pkt.addr,
 123                pkt.ack_freq, pkt.plnk_updt, pkt.options,
 124                (unsigned long long)pkt.mtu, pkt.cflags, pkt.ipv4_lso_maxlen);
 125
 126         return vio_ldc_send(vio, &pkt, sizeof(pkt));
 127 }
 128 EXPORT_SYMBOL_GPL(sunvnet_send_attr_common);
 129
 130 static int handle_attr_info(struct vio_driver_state *vio,
 131                             struct vio_net_attr_info *pkt)
 132 {
 133         struct vnet_port *port = to_vnet_port(vio);
 134         u64     localmtu;
 135         u8      xfer_mode;
 136
 137         viodbg(HS, "GOT NET ATTR xmode[0x%x] atype[0x%x] addr[%llx] "
 138                "ackfreq[%u] plnk_updt[0x%02x] opts[0x%02x] mtu[%llu] "
 139                " (rmtu[%llu]) cflags[0x%04x] lso_max[%u]\n",
 140                pkt->xfer_mode, pkt->addr_type,
 141                (unsigned long long)pkt->addr,
 142                pkt->ack_freq, pkt->plnk_updt, pkt->options,
 143                (unsigned long long)pkt->mtu, port->rmtu, pkt->cflags,
 144                pkt->ipv4_lso_maxlen);
 145
 146         pkt->tag.sid = vio_send_sid(vio);
 147
 148         xfer_mode = pkt->xfer_mode;
 149         /* for version < 1.2, VIO_DRING_MODE = 0x3 and no bitmask */
 150         if (vio_version_before(vio, 1, 2) && xfer_mode == VIO_DRING_MODE)
 151                 xfer_mode = VIO_NEW_DRING_MODE;
 152
 153         /* MTU negotiation:
 154          *      < v1.3 - ETH_FRAME_LEN exactly
 155          *      > v1.3 - MIN(pkt.mtu, VNET_MAXPACKET, port->rmtu) and change
 156          *                      pkt->mtu for ACK
 157          *      = v1.3 - ETH_FRAME_LEN + VLAN_HLEN exactly
 158          */
 159         if (vio_version_before(vio, 1, 3)) {
 160                 localmtu = ETH_FRAME_LEN;
 161         } else if (vio_version_after(vio, 1, 3)) {
 162                 localmtu = port->rmtu ? port->rmtu : VNET_MAXPACKET;
 163                 localmtu = min(pkt->mtu, localmtu);
 164                 pkt->mtu = localmtu;
 165         } else { /* v1.3 */
 166                 localmtu = ETH_FRAME_LEN + VLAN_HLEN;
 167         }
 168         port->rmtu = localmtu;
 169
 170         /* LSO negotiation */
 171         if (vio_version_after_eq(vio, 1, 7))
 172                 port->tso &= !!(pkt->cflags & VNET_LSO_IPV4_CAPAB);
 173         else
 174                 port->tso = false;
 175         if (port->tso) {
 176                 if (!port->tsolen)
 177                         port->tsolen = VNET_MAXTSO;
 178                 port->tsolen = min(port->tsolen, pkt->ipv4_lso_maxlen);
 179                 if (port->tsolen < VNET_MINTSO) {
 180                         port->tso = false;
 181                         port->tsolen = 0;
 182                         pkt->cflags &= ~VNET_LSO_IPV4_CAPAB;
 183                 }
 184                 pkt->ipv4_lso_maxlen = port->tsolen;
 185         } else {
 186                 pkt->cflags &= ~VNET_LSO_IPV4_CAPAB;
 187                 pkt->ipv4_lso_maxlen = 0;
 188                 port->tsolen = 0;
 189         }
 190
 191         /* for version >= 1.6, ACK packet mode we support */
 192         if (vio_version_after_eq(vio, 1, 6)) {
 193                 pkt->xfer_mode = VIO_NEW_DRING_MODE;
 194                 pkt->options = VIO_TX_DRING;
 195         }
 196
 197         if (!(xfer_mode | VIO_NEW_DRING_MODE) ||
 198             pkt->addr_type != VNET_ADDR_ETHERMAC ||
 199             pkt->mtu != localmtu) {
 200                 viodbg(HS, "SEND NET ATTR NACK\n");
 201
 202                 pkt->tag.stype = VIO_SUBTYPE_NACK;
 203
 204                 (void)vio_ldc_send(vio, pkt, sizeof(*pkt));
 205
 206                 return -ECONNRESET;
 207         }
 208
 209         viodbg(HS, "SEND NET ATTR ACK xmode[0x%x] atype[0x%x] "
 210                "addr[%llx] ackfreq[%u] plnk_updt[0x%02x] opts[0x%02x] "
 211                "mtu[%llu] (rmtu[%llu]) cflags[0x%04x] lso_max[%u]\n",
 212                pkt->xfer_mode, pkt->addr_type,
 213                (unsigned long long)pkt->addr,
 214                pkt->ack_freq, pkt->plnk_updt, pkt->options,
 215                (unsigned long long)pkt->mtu, port->rmtu, pkt->cflags,
 216                pkt->ipv4_lso_maxlen);
 217
 218         pkt->tag.stype = VIO_SUBTYPE_ACK;
 219
 220         return vio_ldc_send(vio, pkt, sizeof(*pkt));
 221 }
 222
 223 static int handle_attr_ack(struct vio_driver_state *vio,
 224                            struct vio_net_attr_info *pkt)
 225 {
 226         viodbg(HS, "GOT NET ATTR ACK\n");
 227
 228         return 0;
 229 }
 230
 231 static int handle_attr_nack(struct vio_driver_state *vio,
 232                             struct vio_net_attr_info *pkt)
 233 {
 234         viodbg(HS, "GOT NET ATTR NACK\n");
 235
 236         return -ECONNRESET;
 237 }
 238
 239 int sunvnet_handle_attr_common(struct vio_driver_state *vio, void *arg)
 240 {
 241         struct vio_net_attr_info *pkt = arg;
 242
 243         switch (pkt->tag.stype) {
 244         case VIO_SUBTYPE_INFO:
 245                 return handle_attr_info(vio, pkt);
 246
 247         case VIO_SUBTYPE_ACK:
 248                 return handle_attr_ack(vio, pkt);
 249
 250         case VIO_SUBTYPE_NACK:
 251                 return handle_attr_nack(vio, pkt);
 252
 253         default:
 254                 return -ECONNRESET;
 255         }
 256 }
 257 EXPORT_SYMBOL_GPL(sunvnet_handle_attr_common);
 258
 259 void sunvnet_handshake_complete_common(struct vio_driver_state *vio)
 260 {
 261         struct vio_dring_state *dr;
 262
 263         dr = &vio->drings[VIO_DRIVER_RX_RING];
 264         dr->rcv_nxt = 1;
 265         dr->snd_nxt = 1;
 266
 267         dr = &vio->drings[VIO_DRIVER_TX_RING];
 268         dr->rcv_nxt = 1;
 269         dr->snd_nxt = 1;
 270 }
 271 EXPORT_SYMBOL_GPL(sunvnet_handshake_complete_common);
 272
 273 /* The hypervisor interface that implements copying to/from imported
 274  * memory from another domain requires that copies are done to 8-byte
 275  * aligned buffers, and that the lengths of such copies are also 8-byte
 276  * multiples.
 277  *
 278  * So we align skb->data to an 8-byte multiple and pad-out the data
 279  * area so we can round the copy length up to the next multiple of
 280  * 8 for the copy.
 281  *
 282  * The transmitter puts the actual start of the packet 6 bytes into
 283  * the buffer it sends over, so that the IP headers after the ethernet
 284  * header are aligned properly.  These 6 bytes are not in the descriptor
 285  * length, they are simply implied.  This offset is represented using
 286  * the VNET_PACKET_SKIP macro.
 287  */
 288 static struct sk_buff *alloc_and_align_skb(struct net_device *dev,
 289                                            unsigned int len)
 290 {
 291         struct sk_buff *skb;
 292         unsigned long addr, off;
 293
 294         skb = netdev_alloc_skb(dev, len + VNET_PACKET_SKIP + 8 + 8);
 295         if (unlikely(!skb))
 296                 return NULL;
 297
 298         addr = (unsigned long)skb->data;
 299         off = ((addr + 7UL) & ~7UL) - addr;
 300         if (off)
 301                 skb_reserve(skb, off);
 302
 303         return skb;
 304 }
 305
 306 static inline void vnet_fullcsum_ipv4(struct sk_buff *skb)
 307 {
 308         struct iphdr *iph = ip_hdr(skb);
 309         int offset = skb_transport_offset(skb);
 310
 311         if (skb->protocol != htons(ETH_P_IP))
 312                 return;
 313         if (iph->protocol != IPPROTO_TCP &&
 314             iph->protocol != IPPROTO_UDP)
 315                 return;
 316         skb->ip_summed = CHECKSUM_NONE;
 317         skb->csum_level = 1;
 318         skb->csum = 0;
 319         if (iph->protocol == IPPROTO_TCP) {
 320                 struct tcphdr *ptcp = tcp_hdr(skb);
 321
 322                 ptcp->check = 0;
 323                 skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
 324                 ptcp->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
 325                                                 skb->len - offset, IPPROTO_TCP,
 326                                                 skb->csum);
 327         } else if (iph->protocol == IPPROTO_UDP) {
 328                 struct udphdr *pudp = udp_hdr(skb);
 329
 330                 pudp->check = 0;
 331                 skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
 332                 pudp->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
 333                                                 skb->len - offset, IPPROTO_UDP,
 334                                                 skb->csum);
 335         }
 336 }
 337
 338 #if IS_ENABLED(CONFIG_IPV6)
 339 static inline void vnet_fullcsum_ipv6(struct sk_buff *skb)
 340 {
 341         struct ipv6hdr *ip6h = ipv6_hdr(skb);
 342         int offset = skb_transport_offset(skb);
 343
 344         if (skb->protocol != htons(ETH_P_IPV6))
 345                 return;
 346         if (ip6h->nexthdr != IPPROTO_TCP &&
 347             ip6h->nexthdr != IPPROTO_UDP)
 348                 return;
 349         skb->ip_summed = CHECKSUM_NONE;
 350         skb->csum_level = 1;
 351         skb->csum = 0;
 352         if (ip6h->nexthdr == IPPROTO_TCP) {
 353                 struct tcphdr *ptcp = tcp_hdr(skb);
 354
 355                 ptcp->check = 0;
 356                 skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
 357                 ptcp->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
 358                                               skb->len - offset, IPPROTO_TCP,
 359                                               skb->csum);
 360         } else if (ip6h->nexthdr == IPPROTO_UDP) {
 361                 struct udphdr *pudp = udp_hdr(skb);
 362
 363                 pudp->check = 0;
 364                 skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
 365                 pudp->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
 366                                               skb->len - offset, IPPROTO_UDP,
 367                                               skb->csum);
 368         }
 369 }
 370 #endif
 371
 372 static int vnet_rx_one(struct vnet_port *port, struct vio_net_desc *desc)
 373 {
 374         struct net_device *dev = VNET_PORT_TO_NET_DEVICE(port);
 375         unsigned int len = desc->size;
 376         unsigned int copy_len;
 377         struct sk_buff *skb;
 378         int maxlen;
 379         int err;
 380
 381         err = -EMSGSIZE;
 382         if (port->tso && port->tsolen > port->rmtu)
 383                 maxlen = port->tsolen;
 384         else
 385                 maxlen = port->rmtu;
 386         if (unlikely(len < ETH_ZLEN || len > maxlen)) {
 387                 dev->stats.rx_length_errors++;
 388                 goto out_dropped;
 389         }
 390
 391         skb = alloc_and_align_skb(dev, len);
 392         err = -ENOMEM;
 393         if (unlikely(!skb)) {
 394                 dev->stats.rx_missed_errors++;
 395                 goto out_dropped;
 396         }
 397
 398         copy_len = (len + VNET_PACKET_SKIP + 7U) & ~7U;
 399         skb_put(skb, copy_len);
 400         err = ldc_copy(port->vio.lp, LDC_COPY_IN,
 401                        skb->data, copy_len, 0,
 402                        desc->cookies, desc->ncookies);
 403         if (unlikely(err < 0)) {
 404                 dev->stats.rx_frame_errors++;
 405                 goto out_free_skb;
 406         }
 407
 408         skb_pull(skb, VNET_PACKET_SKIP);
 409         skb_trim(skb, len);
 410         skb->protocol = eth_type_trans(skb, dev);
 411
 412         if (vio_version_after_eq(&port->vio, 1, 8)) {
 413                 struct vio_net_dext *dext = vio_net_ext(desc);
 414
 415                 skb_reset_network_header(skb);
 416
 417                 if (dext->flags & VNET_PKT_HCK_IPV4_HDRCKSUM) {
 418                         if (skb->protocol == ETH_P_IP) {
 419                                 struct iphdr *iph = ip_hdr(skb);
 420
 421                                 iph->check = 0;
 422                                 ip_send_check(iph);
 423                         }
 424                 }
 425                 if ((dext->flags & VNET_PKT_HCK_FULLCKSUM) &&
 426                     skb->ip_summed == CHECKSUM_NONE) {
 427                         if (skb->protocol == htons(ETH_P_IP)) {
 428                                 struct iphdr *iph = ip_hdr(skb);
 429                                 int ihl = iph->ihl * 4;
 430
 431                                 skb_set_transport_header(skb, ihl);
 432                                 vnet_fullcsum_ipv4(skb);
 433 #if IS_ENABLED(CONFIG_IPV6)
 434                         } else if (skb->protocol == htons(ETH_P_IPV6)) {
 435                                 skb_set_transport_header(skb,
 436                                                          sizeof(struct ipv6hdr));
 437                                 vnet_fullcsum_ipv6(skb);
 438 #endif
 439                         }
 440                 }
 441                 if (dext->flags & VNET_PKT_HCK_IPV4_HDRCKSUM_OK) {
 442                         skb->ip_summed = CHECKSUM_PARTIAL;
 443                         skb->csum_level = 0;
 444                         if (dext->flags & VNET_PKT_HCK_FULLCKSUM_OK)
 445                                 skb->csum_level = 1;
 446                 }
 447         }
 448
 449         skb->ip_summed = port->switch_port ? CHECKSUM_NONE : CHECKSUM_PARTIAL;
 450
 451         if (unlikely(is_multicast_ether_addr(eth_hdr(skb)->h_dest)))
 452                 dev->stats.multicast++;
 453         dev->stats.rx_packets++;
 454         dev->stats.rx_bytes += len;
 455         port->stats.rx_packets++;
 456         port->stats.rx_bytes += len;
 457         napi_gro_receive(&port->napi, skb);
 458         return 0;
 459
 460 out_free_skb:
 461         kfree_skb(skb);
 462
 463 out_dropped:
 464         dev->stats.rx_dropped++;
 465         return err;
 466 }
 467
 468 static int vnet_send_ack(struct vnet_port *port, struct vio_dring_state *dr,
 469                          u32 start, u32 end, u8 vio_dring_state)
 470 {
 471         struct vio_dring_data hdr = {
 472                 .tag = {
 473                         .type           = VIO_TYPE_DATA,
 474                         .stype          = VIO_SUBTYPE_ACK,
 475                         .stype_env      = VIO_DRING_DATA,
 476                         .sid            = vio_send_sid(&port->vio),
 477                 },
 478                 .dring_ident            = dr->ident,
 479                 .start_idx              = start,
 480                 .end_idx                = end,
 481                 .state                  = vio_dring_state,
 482         };
 483         int err, delay;
 484         int retries = 0;
 485
 486         hdr.seq = dr->snd_nxt;
 487         delay = 1;
 488         do {
 489                 err = vio_ldc_send(&port->vio, &hdr, sizeof(hdr));
 490                 if (err > 0) {
 491                         dr->snd_nxt++;
 492                         break;
 493                 }
 494                 udelay(delay);
 495                 if ((delay <<= 1) > 128)
 496                         delay = 128;
 497                 if (retries++ > VNET_MAX_RETRIES) {
 498                         pr_info("ECONNRESET %x:%x:%x:%x:%x:%x\n",
 499                                 port->raddr[0], port->raddr[1],
 500                                 port->raddr[2], port->raddr[3],
 501                                 port->raddr[4], port->raddr[5]);
 502                         break;
 503                 }
 504         } while (err == -EAGAIN);
 505
 506         if (err <= 0 && vio_dring_state == VIO_DRING_STOPPED) {
 507                 port->stop_rx_idx = end;
 508                 port->stop_rx = true;
 509         } else {
 510                 port->stop_rx_idx = 0;
 511                 port->stop_rx = false;
 512         }
 513
 514         return err;
 515 }
 516
 517 static struct vio_net_desc *get_rx_desc(struct vnet_port *port,
 518                                         struct vio_dring_state *dr,
 519                                         u32 index)
 520 {
 521         struct vio_net_desc *desc = port->vio.desc_buf;
 522         int err;
 523
 524         err = ldc_get_dring_entry(port->vio.lp, desc, dr->entry_size,
 525                                   (index * dr->entry_size),
 526                                   dr->cookies, dr->ncookies);
 527         if (err < 0)
 528                 return ERR_PTR(err);
 529
 530         return desc;
 531 }
 532
 533 static int put_rx_desc(struct vnet_port *port,
 534                        struct vio_dring_state *dr,
 535                        struct vio_net_desc *desc,
 536                        u32 index)
 537 {
 538         int err;
 539
 540         err = ldc_put_dring_entry(port->vio.lp, desc, dr->entry_size,
 541                                   (index * dr->entry_size),
 542                                   dr->cookies, dr->ncookies);
 543         if (err < 0)
 544                 return err;
 545
 546         return 0;
 547 }
 548
 549 static int vnet_walk_rx_one(struct vnet_port *port,
 550                             struct vio_dring_state *dr,
 551                             u32 index, int *needs_ack)
 552 {
 553         struct vio_net_desc *desc = get_rx_desc(port, dr, index);
 554         struct vio_driver_state *vio = &port->vio;
 555         int err;
 556
 557         BUG_ON(!desc);
 558         if (IS_ERR(desc))
 559                 return PTR_ERR(desc);
 560
 561         if (desc->hdr.state != VIO_DESC_READY)
 562                 return 1;
 563
 564         dma_rmb();
 565
 566         viodbg(DATA, "vio_walk_rx_one desc[%02x:%02x:%08x:%08x:%llx:%llx]\n",
 567                desc->hdr.state, desc->hdr.ack,
 568                desc->size, desc->ncookies,
 569                desc->cookies[0].cookie_addr,
 570                desc->cookies[0].cookie_size);
 571
 572         err = vnet_rx_one(port, desc);
 573         if (err == -ECONNRESET)
 574                 return err;
 575         trace_vnet_rx_one(port->vio._local_sid, port->vio._peer_sid,
 576                           index, desc->hdr.ack);
 577         desc->hdr.state = VIO_DESC_DONE;
 578         err = put_rx_desc(port, dr, desc, index);
 579         if (err < 0)
 580                 return err;
 581         *needs_ack = desc->hdr.ack;
 582         return 0;
 583 }
 584
 585 static int vnet_walk_rx(struct vnet_port *port, struct vio_dring_state *dr,
 586                         u32 start, u32 end, int *npkts, int budget)
 587 {
 588         struct vio_driver_state *vio = &port->vio;
 589         int ack_start = -1, ack_end = -1;
 590         bool send_ack = true;
 591
 592         end = (end == (u32)-1) ? vio_dring_prev(dr, start)
 593                                : vio_dring_next(dr, end);
 594
 595         viodbg(DATA, "vnet_walk_rx start[%08x] end[%08x]\n", start, end);
 596
 597         while (start != end) {
 598                 int ack = 0, err = vnet_walk_rx_one(port, dr, start, &ack);
 599
 600                 if (err == -ECONNRESET)
 601                         return err;
 602                 if (err != 0)
 603                         break;
 604                 (*npkts)++;
 605                 if (ack_start == -1)
 606                         ack_start = start;
 607                 ack_end = start;
 608                 start = vio_dring_next(dr, start);
 609                 if (ack && start != end) {
 610                         err = vnet_send_ack(port, dr, ack_start, ack_end,
 611                                             VIO_DRING_ACTIVE);
 612                         if (err == -ECONNRESET)
 613                                 return err;
 614                         ack_start = -1;
 615                 }
 616                 if ((*npkts) >= budget) {
 617                         send_ack = false;
 618                         break;
 619                 }
 620         }
 621         if (unlikely(ack_start == -1)) {
 622                 ack_end = vio_dring_prev(dr, start);
 623                 ack_start = ack_end;
 624         }
 625         if (send_ack) {
 626                 port->napi_resume = false;
 627                 trace_vnet_tx_send_stopped_ack(port->vio._local_sid,
 628                                                port->vio._peer_sid,
 629                                                ack_end, *npkts);
 630                 return vnet_send_ack(port, dr, ack_start, ack_end,
 631                                      VIO_DRING_STOPPED);
 632         } else  {
 633                 trace_vnet_tx_defer_stopped_ack(port->vio._local_sid,
 634                                                 port->vio._peer_sid,
 635                                                 ack_end, *npkts);
 636                 port->napi_resume = true;
 637                 port->napi_stop_idx = ack_end;
 638                 return 1;
 639         }
 640 }
 641
 642 static int vnet_rx(struct vnet_port *port, void *msgbuf, int *npkts,
 643                    int budget)
 644 {
 645         struct vio_dring_data *pkt = msgbuf;
 646         struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_RX_RING];
 647         struct vio_driver_state *vio = &port->vio;
 648
 649         viodbg(DATA, "vnet_rx stype_env[%04x] seq[%016llx] rcv_nxt[%016llx]\n",
 650                pkt->tag.stype_env, pkt->seq, dr->rcv_nxt);
 651
 652         if (unlikely(pkt->tag.stype_env != VIO_DRING_DATA))
 653                 return 0;
 654         if (unlikely(pkt->seq != dr->rcv_nxt)) {
 655                 pr_err("RX out of sequence seq[0x%llx] rcv_nxt[0x%llx]\n",
 656                        pkt->seq, dr->rcv_nxt);
 657                 return 0;
 658         }
 659
 660         if (!port->napi_resume)
 661                 dr->rcv_nxt++;
 662
 663         /* XXX Validate pkt->start_idx and pkt->end_idx XXX */
 664
 665         return vnet_walk_rx(port, dr, pkt->start_idx, pkt->end_idx,
 666                             npkts, budget);
 667 }
 668
 669 static int idx_is_pending(struct vio_dring_state *dr, u32 end)
 670 {
 671         u32 idx = dr->cons;
 672         int found = 0;
 673
 674         while (idx != dr->prod) {
 675                 if (idx == end) {
 676                         found = 1;
 677                         break;
 678                 }
 679                 idx = vio_dring_next(dr, idx);
 680         }
 681         return found;
 682 }
 683
 684 static int vnet_ack(struct vnet_port *port, void *msgbuf)
 685 {
 686         struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
 687         struct vio_dring_data *pkt = msgbuf;
 688         struct net_device *dev;
 689         u32 end;
 690         struct vio_net_desc *desc;
 691         struct netdev_queue *txq;
 692
 693         if (unlikely(pkt->tag.stype_env != VIO_DRING_DATA))
 694                 return 0;
 695
 696         end = pkt->end_idx;
 697         dev = VNET_PORT_TO_NET_DEVICE(port);
 698         netif_tx_lock(dev);
 699         if (unlikely(!idx_is_pending(dr, end))) {
 700                 netif_tx_unlock(dev);
 701                 return 0;
 702         }
 703
 704         /* sync for race conditions with vnet_start_xmit() and tell xmit it
 705          * is time to send a trigger.
 706          */
 707         trace_vnet_rx_stopped_ack(port->vio._local_sid,
 708                                   port->vio._peer_sid, end);
 709         dr->cons = vio_dring_next(dr, end);
 710         desc = vio_dring_entry(dr, dr->cons);
 711         if (desc->hdr.state == VIO_DESC_READY && !port->start_cons) {
 712                 /* vnet_start_xmit() just populated this dring but missed
 713                  * sending the "start" LDC message to the consumer.
 714                  * Send a "start" trigger on its behalf.
 715                  */
 716                 if (__vnet_tx_trigger(port, dr->cons) > 0)
 717                         port->start_cons = false;
 718                 else
 719                         port->start_cons = true;
 720         } else {
 721                 port->start_cons = true;
 722         }
 723         netif_tx_unlock(dev);
 724
 725         txq = netdev_get_tx_queue(dev, port->q_index);
 726         if (unlikely(netif_tx_queue_stopped(txq) &&
 727                      vnet_tx_dring_avail(dr) >= VNET_TX_WAKEUP_THRESH(dr)))
 728                 return 1;
 729
 730         return 0;
 731 }
 732
 733 static int vnet_nack(struct vnet_port *port, void *msgbuf)
 734 {
 735         /* XXX just reset or similar XXX */
 736         return 0;
 737 }
 738
 739 static int handle_mcast(struct vnet_port *port, void *msgbuf)
 740 {
 741         struct vio_net_mcast_info *pkt = msgbuf;
 742         struct net_device *dev = VNET_PORT_TO_NET_DEVICE(port);
 743
 744         if (pkt->tag.stype != VIO_SUBTYPE_ACK)
 745                 pr_err("%s: Got unexpected MCAST reply [%02x:%02x:%04x:%08x]\n",
 746                        dev->name,
 747                        pkt->tag.type,
 748                        pkt->tag.stype,
 749                        pkt->tag.stype_env,
 750                        pkt->tag.sid);
 751
 752         return 0;
 753 }
 754
 755 /* If the queue is stopped, wake it up so that we'll
 756  * send out another START message at the next TX.
 757  */
 758 static void maybe_tx_wakeup(struct vnet_port *port)
 759 {
 760         struct netdev_queue *txq;
 761
 762         txq = netdev_get_tx_queue(VNET_PORT_TO_NET_DEVICE(port),
 763                                   port->q_index);
 764         __netif_tx_lock(txq, smp_processor_id());
 765         if (likely(netif_tx_queue_stopped(txq)))
 766                 netif_tx_wake_queue(txq);
 767         __netif_tx_unlock(txq);
 768 }
 769
 770 bool sunvnet_port_is_up_common(struct vnet_port *vnet)
 771 {
 772         struct vio_driver_state *vio = &vnet->vio;
 773
 774         return !!(vio->hs_state & VIO_HS_COMPLETE);
 775 }
 776 EXPORT_SYMBOL_GPL(sunvnet_port_is_up_common);
 777
 778 static int vnet_event_napi(struct vnet_port *port, int budget)
 779 {
 780         struct net_device *dev = VNET_PORT_TO_NET_DEVICE(port);
 781         struct vio_driver_state *vio = &port->vio;
 782         int tx_wakeup, err;
 783         int npkts = 0;
 784
 785         /* we don't expect any other bits */
 786         BUG_ON(port->rx_event & ~(LDC_EVENT_DATA_READY |
 787                                   LDC_EVENT_RESET |
 788                                   LDC_EVENT_UP));
 789
 790         /* RESET takes precedent over any other event */
 791         if (port->rx_event & LDC_EVENT_RESET) {
 792                 /* a link went down */
 793
 794                 if (port->vsw == 1) {
 795                         netif_tx_stop_all_queues(dev);
 796                         netif_carrier_off(dev);
 797                 }
 798
 799                 vio_link_state_change(vio, LDC_EVENT_RESET);
 800                 vnet_port_reset(port);
 801                 vio_port_up(vio);
 802
 803                 /* If the device is running but its tx queue was
 804                  * stopped (due to flow control), restart it.
 805                  * This is necessary since vnet_port_reset()
 806                  * clears the tx drings and thus we may never get
 807                  * back a VIO_TYPE_DATA ACK packet - which is
 808                  * the normal mechanism to restart the tx queue.
 809                  */
 810                 if (netif_running(dev))
 811                         maybe_tx_wakeup(port);
 812
 813                 port->rx_event = 0;
 814                 port->stats.event_reset++;
 815                 return 0;
 816         }
 817
 818         if (port->rx_event & LDC_EVENT_UP) {
 819                 /* a link came up */
 820
 821                 if (port->vsw == 1) {
 822                         netif_carrier_on(port->dev);
 823                         netif_tx_start_all_queues(port->dev);
 824                 }
 825
 826                 vio_link_state_change(vio, LDC_EVENT_UP);
 827                 port->rx_event = 0;
 828                 port->stats.event_up++;
 829                 return 0;
 830         }
 831
 832         err = 0;
 833         tx_wakeup = 0;
 834         while (1) {
 835                 union {
 836                         struct vio_msg_tag tag;
 837                         u64 raw[8];
 838                 } msgbuf;
 839
 840                 if (port->napi_resume) {
 841                         struct vio_dring_data *pkt =
 842                                 (struct vio_dring_data *)&msgbuf;
 843                         struct vio_dring_state *dr =
 844                                 &port->vio.drings[VIO_DRIVER_RX_RING];
 845
 846                         pkt->tag.type = VIO_TYPE_DATA;
 847                         pkt->tag.stype = VIO_SUBTYPE_INFO;
 848                         pkt->tag.stype_env = VIO_DRING_DATA;
 849                         pkt->seq = dr->rcv_nxt;
 850                         pkt->start_idx = vio_dring_next(dr,
 851                                                         port->napi_stop_idx);
 852                         pkt->end_idx = -1;
 853                 } else {
 854                         err = ldc_read(vio->lp, &msgbuf, sizeof(msgbuf));
 855                         if (unlikely(err < 0)) {
 856                                 if (err == -ECONNRESET)
 857                                         vio_conn_reset(vio);
 858                                 break;
 859                         }
 860                         if (err == 0)
 861                                 break;
 862                         viodbg(DATA, "TAG [%02x:%02x:%04x:%08x]\n",
 863                                msgbuf.tag.type,
 864                                msgbuf.tag.stype,
 865                                msgbuf.tag.stype_env,
 866                                msgbuf.tag.sid);
 867                         err = vio_validate_sid(vio, &msgbuf.tag);
 868                         if (err < 0)
 869                                 break;
 870                 }
 871
 872                 if (likely(msgbuf.tag.type == VIO_TYPE_DATA)) {
 873                         if (msgbuf.tag.stype == VIO_SUBTYPE_INFO) {
 874                                 if (!sunvnet_port_is_up_common(port)) {
 875                                         /* failures like handshake_failure()
 876                                          * may have cleaned up dring, but
 877                                          * NAPI polling may bring us here.
 878                                          */
 879                                         err = -ECONNRESET;
 880                                         break;
 881                                 }
 882                                 err = vnet_rx(port, &msgbuf, &npkts, budget);
 883                                 if (npkts >= budget)
 884                                         break;
 885                                 if (npkts == 0)
 886                                         break;
 887                         } else if (msgbuf.tag.stype == VIO_SUBTYPE_ACK) {
 888                                 err = vnet_ack(port, &msgbuf);
 889                                 if (err > 0)
 890                                         tx_wakeup |= err;
 891                         } else if (msgbuf.tag.stype == VIO_SUBTYPE_NACK) {
 892                                 err = vnet_nack(port, &msgbuf);
 893                         }
 894                 } else if (msgbuf.tag.type == VIO_TYPE_CTRL) {
 895                         if (msgbuf.tag.stype_env == VNET_MCAST_INFO)
 896                                 err = handle_mcast(port, &msgbuf);
 897                         else
 898                                 err = vio_control_pkt_engine(vio, &msgbuf);
 899                         if (err)
 900                                 break;
 901                 } else {
 902                         err = vnet_handle_unknown(port, &msgbuf);
 903                 }
 904                 if (err == -ECONNRESET)
 905                         break;
 906         }
 907         if (unlikely(tx_wakeup && err != -ECONNRESET))
 908                 maybe_tx_wakeup(port);
 909         return npkts;
 910 }
 911
 912 int sunvnet_poll_common(struct napi_struct *napi, int budget)
 913 {
 914         struct vnet_port *port = container_of(napi, struct vnet_port, napi);
 915         struct vio_driver_state *vio = &port->vio;
 916         int processed = vnet_event_napi(port, budget);
 917
 918         if (processed < budget) {
 919                 napi_complete_done(napi, processed);
 920                 port->rx_event &= ~LDC_EVENT_DATA_READY;
 921                 vio_set_intr(vio->vdev->rx_ino, HV_INTR_ENABLED);
 922         }
 923         return processed;
 924 }
 925 EXPORT_SYMBOL_GPL(sunvnet_poll_common);
 926
 927 void sunvnet_event_common(void *arg, int event)
 928 {
 929         struct vnet_port *port = arg;
 930         struct vio_driver_state *vio = &port->vio;
 931
 932         port->rx_event |= event;
 933         vio_set_intr(vio->vdev->rx_ino, HV_INTR_DISABLED);
 934         napi_schedule(&port->napi);
 935 }
 936 EXPORT_SYMBOL_GPL(sunvnet_event_common);
 937
 938 static int __vnet_tx_trigger(struct vnet_port *port, u32 start)
 939 {
 940         struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
 941         struct vio_dring_data hdr = {
 942                 .tag = {
 943                         .type           = VIO_TYPE_DATA,
 944                         .stype          = VIO_SUBTYPE_INFO,
 945                         .stype_env      = VIO_DRING_DATA,
 946                         .sid            = vio_send_sid(&port->vio),
 947                 },
 948                 .dring_ident            = dr->ident,
 949                 .start_idx              = start,
 950                 .end_idx                = (u32)-1,
 951         };
 952         int err, delay;
 953         int retries = 0;
 954
 955         if (port->stop_rx) {
 956                 trace_vnet_tx_pending_stopped_ack(port->vio._local_sid,
 957                                                   port->vio._peer_sid,
 958                                                   port->stop_rx_idx, -1);
 959                 err = vnet_send_ack(port,
 960                                     &port->vio.drings[VIO_DRIVER_RX_RING],
 961                                     port->stop_rx_idx, -1,
 962                                     VIO_DRING_STOPPED);
 963                 if (err <= 0)
 964                         return err;
 965         }
 966
 967         hdr.seq = dr->snd_nxt;
 968         delay = 1;
 969         do {
 970                 err = vio_ldc_send(&port->vio, &hdr, sizeof(hdr));
 971                 if (err > 0) {
 972                         dr->snd_nxt++;
 973                         break;
 974                 }
 975                 udelay(delay);
 976                 if ((delay <<= 1) > 128)
 977                         delay = 128;
 978                 if (retries++ > VNET_MAX_RETRIES)
 979                         break;
 980         } while (err == -EAGAIN);
 981         trace_vnet_tx_trigger(port->vio._local_sid,
 982                               port->vio._peer_sid, start, err);
 983
 984         return err;
 985 }
 986
 987 static struct sk_buff *vnet_clean_tx_ring(struct vnet_port *port,
 988                                           unsigned *pending)
 989 {
 990         struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
 991         struct sk_buff *skb = NULL;
 992         int i, txi;
 993
 994         *pending = 0;
 995
 996         txi = dr->prod;
 997         for (i = 0; i < VNET_TX_RING_SIZE; ++i) {
 998                 struct vio_net_desc *d;
 999
1000                 --txi;
1001                 if (txi < 0)
1002                         txi = VNET_TX_RING_SIZE - 1;
1003
1004                 d = vio_dring_entry(dr, txi);
1005
1006                 if (d->hdr.state == VIO_DESC_READY) {
1007                         (*pending)++;
1008                         continue;
1009                 }
1010                 if (port->tx_bufs[txi].skb) {
1011                         if (d->hdr.state != VIO_DESC_DONE)
1012                                 pr_notice("invalid ring buffer state %d\n",
1013                                           d->hdr.state);
1014                         BUG_ON(port->tx_bufs[txi].skb->next);
1015
1016                         port->tx_bufs[txi].skb->next = skb;
1017                         skb = port->tx_bufs[txi].skb;
1018                         port->tx_bufs[txi].skb = NULL;
1019
1020                         ldc_unmap(port->vio.lp,
1021                                   port->tx_bufs[txi].cookies,
1022                                   port->tx_bufs[txi].ncookies);
1023                 } else if (d->hdr.state == VIO_DESC_FREE) {
1024                         break;
1025                 }
1026                 d->hdr.state = VIO_DESC_FREE;
1027         }
1028         return skb;
1029 }
1030
1031 static inline void vnet_free_skbs(struct sk_buff *skb)
1032 {
1033         struct sk_buff *next;
1034
1035         while (skb) {
1036                 next = skb->next;
1037                 skb->next = NULL;
1038                 dev_kfree_skb(skb);
1039                 skb = next;
1040         }
1041 }
1042
1043 void sunvnet_clean_timer_expire_common(unsigned long port0)
1044 {
1045         struct vnet_port *port = (struct vnet_port *)port0;
1046         struct sk_buff *freeskbs;
1047         unsigned pending;
1048
1049         netif_tx_lock(VNET_PORT_TO_NET_DEVICE(port));
1050         freeskbs = vnet_clean_tx_ring(port, &pending);
1051         netif_tx_unlock(VNET_PORT_TO_NET_DEVICE(port));
1052
1053         vnet_free_skbs(freeskbs);
1054
1055         if (pending)
1056                 (void)mod_timer(&port->clean_timer,
1057                                 jiffies + VNET_CLEAN_TIMEOUT);
1058          else
1059                 del_timer(&port->clean_timer);
1060 }
1061 EXPORT_SYMBOL_GPL(sunvnet_clean_timer_expire_common);
1062
1063 static inline int vnet_skb_map(struct ldc_channel *lp, struct sk_buff *skb,
1064                                struct ldc_trans_cookie *cookies, int ncookies,
1065                                unsigned int map_perm)
1066 {
1067         int i, nc, err, blen;
1068
1069         /* header */
1070         blen = skb_headlen(skb);
1071         if (blen < ETH_ZLEN)
1072                 blen = ETH_ZLEN;
1073         blen += VNET_PACKET_SKIP;
1074         blen += 8 - (blen & 7);
1075
1076         err = ldc_map_single(lp, skb->data - VNET_PACKET_SKIP, blen, cookies,
1077                              ncookies, map_perm);
1078         if (err < 0)
1079                 return err;
1080         nc = err;
1081
1082         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1083                 skb_frag_t *f = &skb_shinfo(skb)->frags[i];
1084                 u8 *vaddr;
1085
1086                 if (nc < ncookies) {
1087                         vaddr = kmap_atomic(skb_frag_page(f));
1088                         blen = skb_frag_size(f);
1089                         blen += 8 - (blen & 7);
1090                         err = ldc_map_single(lp, vaddr + f->page_offset,
1091                                              blen, cookies + nc, ncookies - nc,
1092                                              map_perm);
1093                         kunmap_atomic(vaddr);
1094                 } else {
1095                         err = -EMSGSIZE;
1096                 }
1097
1098                 if (err < 0) {
1099                         ldc_unmap(lp, cookies, nc);
1100                         return err;
1101                 }
1102                 nc += err;
1103         }
1104         return nc;
1105 }
1106
1107 static inline struct sk_buff *vnet_skb_shape(struct sk_buff *skb, int ncookies)
1108 {
1109         struct sk_buff *nskb;
1110         int i, len, pad, docopy;
1111
1112         len = skb->len;
1113         pad = 0;
1114         if (len < ETH_ZLEN) {
1115                 pad += ETH_ZLEN - skb->len;
1116                 len += pad;
1117         }
1118         len += VNET_PACKET_SKIP;
1119         pad += 8 - (len & 7);
1120
1121         /* make sure we have enough cookies and alignment in every frag */
1122         docopy = skb_shinfo(skb)->nr_frags >= ncookies;
1123         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1124                 skb_frag_t *f = &skb_shinfo(skb)->frags[i];
1125
1126                 docopy |= f->page_offset & 7;
1127         }
1128         if (((unsigned long)skb->data & 7) != VNET_PACKET_SKIP ||
1129             skb_tailroom(skb) < pad ||
1130             skb_headroom(skb) < VNET_PACKET_SKIP || docopy) {
1131                 int start = 0, offset;
1132                 __wsum csum;
1133
1134                 len = skb->len > ETH_ZLEN ? skb->len : ETH_ZLEN;
1135                 nskb = alloc_and_align_skb(skb->dev, len);
1136                 if (!nskb) {
1137                         dev_kfree_skb(skb);
1138                         return NULL;
1139                 }
1140                 skb_reserve(nskb, VNET_PACKET_SKIP);
1141
1142                 nskb->protocol = skb->protocol;
1143                 offset = skb_mac_header(skb) - skb->data;
1144                 skb_set_mac_header(nskb, offset);
1145                 offset = skb_network_header(skb) - skb->data;
1146                 skb_set_network_header(nskb, offset);
1147                 offset = skb_transport_header(skb) - skb->data;
1148                 skb_set_transport_header(nskb, offset);
1149
1150                 offset = 0;
1151                 nskb->csum_offset = skb->csum_offset;
1152                 nskb->ip_summed = skb->ip_summed;
1153
1154                 if (skb->ip_summed == CHECKSUM_PARTIAL)
1155                         start = skb_checksum_start_offset(skb);
1156                 if (start) {
1157                         int offset = start + nskb->csum_offset;
1158
1159                         /* copy the headers, no csum here */
1160                         if (skb_copy_bits(skb, 0, nskb->data, start)) {
1161                                 dev_kfree_skb(nskb);
1162                                 dev_kfree_skb(skb);
1163                                 return NULL;
1164                         }
1165
1166                         /* copy the rest, with csum calculation */
1167                         *(__sum16 *)(skb->data + offset) = 0;
1168                         csum = skb_copy_and_csum_bits(skb, start,
1169                                                       nskb->data + start,
1170                                                       skb->len - start, 0);
1171
1172                         /* add in the header checksums */
1173                         if (skb->protocol == htons(ETH_P_IP)) {
1174                                 struct iphdr *iph = ip_hdr(nskb);
1175
1176                                 if (iph->protocol == IPPROTO_TCP ||
1177                                     iph->protocol == IPPROTO_UDP) {
1178                                         csum = csum_tcpudp_magic(iph->saddr,
1179                                                                  iph->daddr,
1180                                                                  skb->len - start,
1181                                                                  iph->protocol,
1182                                                                  csum);
1183                                 }
1184                         } else if (skb->protocol == htons(ETH_P_IPV6)) {
1185                                 struct ipv6hdr *ip6h = ipv6_hdr(nskb);
1186
1187                                 if (ip6h->nexthdr == IPPROTO_TCP ||
1188                                     ip6h->nexthdr == IPPROTO_UDP) {
1189                                         csum = csum_ipv6_magic(&ip6h->saddr,
1190                                                                &ip6h->daddr,
1191                                                                skb->len - start,
1192                                                                ip6h->nexthdr,
1193                                                                csum);
1194                                 }
1195                         }
1196
1197                         /* save the final result */
1198                         *(__sum16 *)(nskb->data + offset) = csum;
1199
1200                         nskb->ip_summed = CHECKSUM_NONE;
1201                 } else if (skb_copy_bits(skb, 0, nskb->data, skb->len)) {
1202                         dev_kfree_skb(nskb);
1203                         dev_kfree_skb(skb);
1204                         return NULL;
1205                 }
1206                 (void)skb_put(nskb, skb->len);
1207                 if (skb_is_gso(skb)) {
1208                         skb_shinfo(nskb)->gso_size = skb_shinfo(skb)->gso_size;
1209                         skb_shinfo(nskb)->gso_type = skb_shinfo(skb)->gso_type;
1210                 }
1211                 nskb->queue_mapping = skb->queue_mapping;
1212                 dev_kfree_skb(skb);
1213                 skb = nskb;
1214         }
1215         return skb;
1216 }
1217
1218 static netdev_tx_t
1219 vnet_handle_offloads(struct vnet_port *port, struct sk_buff *skb,
1220                      struct vnet_port *(*vnet_tx_port)
1221                      (struct sk_buff *, struct net_device *))
1222 {
1223         struct net_device *dev = VNET_PORT_TO_NET_DEVICE(port);
1224         struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
1225         struct sk_buff *segs;
1226         int maclen, datalen;
1227         int status;
1228         int gso_size, gso_type, gso_segs;
1229         int hlen = skb_transport_header(skb) - skb_mac_header(skb);
1230         int proto = IPPROTO_IP;
1231
1232         if (skb->protocol == htons(ETH_P_IP))
1233                 proto = ip_hdr(skb)->protocol;
1234         else if (skb->protocol == htons(ETH_P_IPV6))
1235                 proto = ipv6_hdr(skb)->nexthdr;
1236
1237         if (proto == IPPROTO_TCP) {
1238                 hlen += tcp_hdr(skb)->doff * 4;
1239         } else if (proto == IPPROTO_UDP) {
1240                 hlen += sizeof(struct udphdr);
1241         } else {
1242                 pr_err("vnet_handle_offloads GSO with unknown transport "
1243                        "protocol %d tproto %d\n", skb->protocol, proto);
1244                 hlen = 128; /* XXX */
1245         }
1246         datalen = port->tsolen - hlen;
1247
1248         gso_size = skb_shinfo(skb)->gso_size;
1249         gso_type = skb_shinfo(skb)->gso_type;
1250         gso_segs = skb_shinfo(skb)->gso_segs;
1251
1252         if (port->tso && gso_size < datalen)
1253                 gso_segs = DIV_ROUND_UP(skb->len - hlen, datalen);
1254
1255         if (unlikely(vnet_tx_dring_avail(dr) < gso_segs)) {
1256                 struct netdev_queue *txq;
1257
1258                 txq  = netdev_get_tx_queue(dev, port->q_index);
1259                 netif_tx_stop_queue(txq);
1260                 if (vnet_tx_dring_avail(dr) < skb_shinfo(skb)->gso_segs)
1261                         return NETDEV_TX_BUSY;
1262                 netif_tx_wake_queue(txq);
1263         }
1264
1265         maclen = skb_network_header(skb) - skb_mac_header(skb);
1266         skb_pull(skb, maclen);
1267
1268         if (port->tso && gso_size < datalen) {
1269                 if (skb_unclone(skb, GFP_ATOMIC))
1270                         goto out_dropped;
1271
1272                 /* segment to TSO size */
1273                 skb_shinfo(skb)->gso_size = datalen;
1274                 skb_shinfo(skb)->gso_segs = gso_segs;
1275         }
1276         segs = skb_gso_segment(skb, dev->features & ~NETIF_F_TSO);
1277         if (IS_ERR(segs))
1278                 goto out_dropped;
1279
1280         skb_push(skb, maclen);
1281         skb_reset_mac_header(skb);
1282
1283         status = 0;
1284         while (segs) {
1285                 struct sk_buff *curr = segs;
1286
1287                 segs = segs->next;
1288                 curr->next = NULL;
1289                 if (port->tso && curr->len > dev->mtu) {
1290                         skb_shinfo(curr)->gso_size = gso_size;
1291                         skb_shinfo(curr)->gso_type = gso_type;
1292                         skb_shinfo(curr)->gso_segs =
1293                                 DIV_ROUND_UP(curr->len - hlen, gso_size);
1294                 } else {
1295                         skb_shinfo(curr)->gso_size = 0;
1296                 }
1297
1298                 skb_push(curr, maclen);
1299                 skb_reset_mac_header(curr);
1300                 memcpy(skb_mac_header(curr), skb_mac_header(skb),
1301                        maclen);
1302                 curr->csum_start = skb_transport_header(curr) - curr->head;
1303                 if (ip_hdr(curr)->protocol == IPPROTO_TCP)
1304                         curr->csum_offset = offsetof(struct tcphdr, check);
1305                 else if (ip_hdr(curr)->protocol == IPPROTO_UDP)
1306                         curr->csum_offset = offsetof(struct udphdr, check);
1307
1308                 if (!(status & NETDEV_TX_MASK))
1309                         status = sunvnet_start_xmit_common(curr, dev,
1310                                                            vnet_tx_port);
1311                 if (status & NETDEV_TX_MASK)
1312                         dev_kfree_skb_any(curr);
1313         }
1314
1315         if (!(status & NETDEV_TX_MASK))
1316                 dev_kfree_skb_any(skb);
1317         return status;
1318 out_dropped:
1319         dev->stats.tx_dropped++;
1320         dev_kfree_skb_any(skb);
1321         return NETDEV_TX_OK;
1322 }
1323
1324 netdev_tx_t
1325 sunvnet_start_xmit_common(struct sk_buff *skb, struct net_device *dev,
1326                           struct vnet_port *(*vnet_tx_port)
1327                           (struct sk_buff *, struct net_device *))
1328 {
1329         struct vnet_port *port = NULL;
1330         struct vio_dring_state *dr;
1331         struct vio_net_desc *d;
1332         unsigned int len;
1333         struct sk_buff *freeskbs = NULL;
1334         int i, err, txi;
1335         unsigned pending = 0;
1336         struct netdev_queue *txq;
1337
1338         rcu_read_lock();
1339         port = vnet_tx_port(skb, dev);
1340         if (unlikely(!port))
1341                 goto out_dropped;
1342
1343         if (skb_is_gso(skb) && skb->len > port->tsolen) {
1344                 err = vnet_handle_offloads(port, skb, vnet_tx_port);
1345                 rcu_read_unlock();
1346                 return err;
1347         }
1348
1349         if (!skb_is_gso(skb) && skb->len > port->rmtu) {
1350                 unsigned long localmtu = port->rmtu - ETH_HLEN;
1351
1352                 if (vio_version_after_eq(&port->vio, 1, 3))
1353                         localmtu -= VLAN_HLEN;
1354
1355                 if (skb->protocol == htons(ETH_P_IP))
1356                         icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
1357                                       htonl(localmtu));
1358 #if IS_ENABLED(CONFIG_IPV6)
1359                 else if (skb->protocol == htons(ETH_P_IPV6))
1360                         icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, localmtu);
1361 #endif
1362                 goto out_dropped;
1363         }
1364
1365         skb = vnet_skb_shape(skb, 2);
1366
1367         if (unlikely(!skb))
1368                 goto out_dropped;
1369
1370         if (skb->ip_summed == CHECKSUM_PARTIAL) {
1371                 if (skb->protocol == htons(ETH_P_IP))
1372                         vnet_fullcsum_ipv4(skb);
1373 #if IS_ENABLED(CONFIG_IPV6)
1374                 else if (skb->protocol == htons(ETH_P_IPV6))
1375                         vnet_fullcsum_ipv6(skb);
1376 #endif
1377         }
1378
1379         dr = &port->vio.drings[VIO_DRIVER_TX_RING];
1380         i = skb_get_queue_mapping(skb);
1381         txq = netdev_get_tx_queue(dev, i);
1382         if (unlikely(vnet_tx_dring_avail(dr) < 1)) {
1383                 if (!netif_tx_queue_stopped(txq)) {
1384                         netif_tx_stop_queue(txq);
1385
1386                         /* This is a hard error, log it. */
1387                         netdev_err(dev, "BUG! Tx Ring full when queue awake!\n");
1388                         dev->stats.tx_errors++;
1389                 }
1390                 rcu_read_unlock();
1391                 return NETDEV_TX_BUSY;
1392         }
1393
1394         d = vio_dring_cur(dr);
1395
1396         txi = dr->prod;
1397
1398         freeskbs = vnet_clean_tx_ring(port, &pending);
1399
1400         BUG_ON(port->tx_bufs[txi].skb);
1401
1402         len = skb->len;
1403         if (len < ETH_ZLEN)
1404                 len = ETH_ZLEN;
1405
1406         err = vnet_skb_map(port->vio.lp, skb, port->tx_bufs[txi].cookies, 2,
1407                            (LDC_MAP_SHADOW | LDC_MAP_DIRECT | LDC_MAP_RW));
1408         if (err < 0) {
1409                 netdev_info(dev, "tx buffer map error %d\n", err);
1410                 goto out_dropped;
1411         }
1412
1413         port->tx_bufs[txi].skb = skb;
1414         skb = NULL;
1415         port->tx_bufs[txi].ncookies = err;
1416
1417         /* We don't rely on the ACKs to free the skb in vnet_start_xmit(),
1418          * thus it is safe to not set VIO_ACK_ENABLE for each transmission:
1419          * the protocol itself does not require it as long as the peer
1420          * sends a VIO_SUBTYPE_ACK for VIO_DRING_STOPPED.
1421          *
1422          * An ACK for every packet in the ring is expensive as the
1423          * sending of LDC messages is slow and affects performance.
1424          */
1425         d->hdr.ack = VIO_ACK_DISABLE;
1426         d->size = len;
1427         d->ncookies = port->tx_bufs[txi].ncookies;
1428         for (i = 0; i < d->ncookies; i++)
1429                 d->cookies[i] = port->tx_bufs[txi].cookies[i];
1430         if (vio_version_after_eq(&port->vio, 1, 7)) {
1431                 struct vio_net_dext *dext = vio_net_ext(d);
1432
1433                 memset(dext, 0, sizeof(*dext));
1434                 if (skb_is_gso(port->tx_bufs[txi].skb)) {
1435                         dext->ipv4_lso_mss = skb_shinfo(port->tx_bufs[txi].skb)
1436                                              ->gso_size;
1437                         dext->flags |= VNET_PKT_IPV4_LSO;
1438                 }
1439                 if (vio_version_after_eq(&port->vio, 1, 8) &&
1440                     !port->switch_port) {
1441                         dext->flags |= VNET_PKT_HCK_IPV4_HDRCKSUM_OK;
1442                         dext->flags |= VNET_PKT_HCK_FULLCKSUM_OK;
1443                 }
1444         }
1445
1446         /* This has to be a non-SMP write barrier because we are writing
1447          * to memory which is shared with the peer LDOM.
1448          */
1449         dma_wmb();
1450
1451         d->hdr.state = VIO_DESC_READY;
1452
1453         /* Exactly one ldc "start" trigger (for dr->cons) needs to be sent
1454          * to notify the consumer that some descriptors are READY.
1455          * After that "start" trigger, no additional triggers are needed until
1456          * a DRING_STOPPED is received from the consumer. The dr->cons field
1457          * (set up by vnet_ack()) has the value of the next dring index
1458          * that has not yet been ack-ed. We send a "start" trigger here
1459          * if, and only if, start_cons is true (reset it afterward). Conversely,
1460          * vnet_ack() should check if the dring corresponding to cons
1461          * is marked READY, but start_cons was false.
1462          * If so, vnet_ack() should send out the missed "start" trigger.
1463          *
1464          * Note that the dma_wmb() above makes sure the cookies et al. are
1465          * not globally visible before the VIO_DESC_READY, and that the
1466          * stores are ordered correctly by the compiler. The consumer will
1467          * not proceed until the VIO_DESC_READY is visible assuring that
1468          * the consumer does not observe anything related to descriptors
1469          * out of order. The HV trap from the LDC start trigger is the
1470          * producer to consumer announcement that work is available to the
1471          * consumer
1472          */
1473         if (!port->start_cons) { /* previous trigger suffices */
1474                 trace_vnet_skip_tx_trigger(port->vio._local_sid,
1475                                            port->vio._peer_sid, dr->cons);
1476                 goto ldc_start_done;
1477         }
1478
1479         err = __vnet_tx_trigger(port, dr->cons);
1480         if (unlikely(err < 0)) {
1481                 netdev_info(dev, "TX trigger error %d\n", err);
1482                 d->hdr.state = VIO_DESC_FREE;
1483                 skb = port->tx_bufs[txi].skb;
1484                 port->tx_bufs[txi].skb = NULL;
1485                 dev->stats.tx_carrier_errors++;
1486                 goto out_dropped;
1487         }
1488
1489 ldc_start_done:
1490         port->start_cons = false;
1491
1492         dev->stats.tx_packets++;
1493         dev->stats.tx_bytes += port->tx_bufs[txi].skb->len;
1494         port->stats.tx_packets++;
1495         port->stats.tx_bytes += port->tx_bufs[txi].skb->len;
1496
1497         dr->prod = (dr->prod + 1) & (VNET_TX_RING_SIZE - 1);
1498         if (unlikely(vnet_tx_dring_avail(dr) < 1)) {
1499                 netif_tx_stop_queue(txq);
1500                 smp_rmb();
1501                 if (vnet_tx_dring_avail(dr) > VNET_TX_WAKEUP_THRESH(dr))
1502                         netif_tx_wake_queue(txq);
1503         }
1504
1505         (void)mod_timer(&port->clean_timer, jiffies + VNET_CLEAN_TIMEOUT);
1506         rcu_read_unlock();
1507
1508         vnet_free_skbs(freeskbs);
1509
1510         return NETDEV_TX_OK;
1511
1512 out_dropped:
1513         if (pending)
1514                 (void)mod_timer(&port->clean_timer,
1515                                 jiffies + VNET_CLEAN_TIMEOUT);
1516         else if (port)
1517                 del_timer(&port->clean_timer);
1518         rcu_read_unlock();
1519         if (skb)
1520                 dev_kfree_skb(skb);
1521         vnet_free_skbs(freeskbs);
1522         dev->stats.tx_dropped++;
1523         return NETDEV_TX_OK;
1524 }
1525 EXPORT_SYMBOL_GPL(sunvnet_start_xmit_common);
1526
1527 void sunvnet_tx_timeout_common(struct net_device *dev)
1528 {
1529         /* XXX Implement me XXX */
1530 }
1531 EXPORT_SYMBOL_GPL(sunvnet_tx_timeout_common);
1532
1533 int sunvnet_open_common(struct net_device *dev)
1534 {
1535         netif_carrier_on(dev);
1536         netif_tx_start_all_queues(dev);
1537
1538         return 0;
1539 }
1540 EXPORT_SYMBOL_GPL(sunvnet_open_common);
1541
1542 int sunvnet_close_common(struct net_device *dev)
1543 {
1544         netif_tx_stop_all_queues(dev);
1545         netif_carrier_off(dev);
1546
1547         return 0;
1548 }
1549 EXPORT_SYMBOL_GPL(sunvnet_close_common);
1550
1551 static struct vnet_mcast_entry *__vnet_mc_find(struct vnet *vp, u8 *addr)
1552 {
1553         struct vnet_mcast_entry *m;
1554
1555         for (m = vp->mcast_list; m; m = m->next) {
1556                 if (ether_addr_equal(m->addr, addr))
1557                         return m;
1558         }
1559         return NULL;
1560 }
1561
1562 static void __update_mc_list(struct vnet *vp, struct net_device *dev)
1563 {
1564         struct netdev_hw_addr *ha;
1565
1566         netdev_for_each_mc_addr(ha, dev) {
1567                 struct vnet_mcast_entry *m;
1568
1569                 m = __vnet_mc_find(vp, ha->addr);
1570                 if (m) {
1571                         m->hit = 1;
1572                         continue;
1573                 }
1574
1575                 if (!m) {
1576                         m = kzalloc(sizeof(*m), GFP_ATOMIC);
1577                         if (!m)
1578                                 continue;
1579                         memcpy(m->addr, ha->addr, ETH_ALEN);
1580                         m->hit = 1;
1581
1582                         m->next = vp->mcast_list;
1583                         vp->mcast_list = m;
1584                 }
1585         }
1586 }
1587
1588 static void __send_mc_list(struct vnet *vp, struct vnet_port *port)
1589 {
1590         struct vio_net_mcast_info info;
1591         struct vnet_mcast_entry *m, **pp;
1592         int n_addrs;
1593
1594         memset(&info, 0, sizeof(info));
1595
1596         info.tag.type = VIO_TYPE_CTRL;
1597         info.tag.stype = VIO_SUBTYPE_INFO;
1598         info.tag.stype_env = VNET_MCAST_INFO;
1599         info.tag.sid = vio_send_sid(&port->vio);
1600         info.set = 1;
1601
1602         n_addrs = 0;
1603         for (m = vp->mcast_list; m; m = m->next) {
1604                 if (m->sent)
1605                         continue;
1606                 m->sent = 1;
1607                 memcpy(&info.mcast_addr[n_addrs * ETH_ALEN],
1608                        m->addr, ETH_ALEN);
1609                 if (++n_addrs == VNET_NUM_MCAST) {
1610                         info.count = n_addrs;
1611
1612                         (void)vio_ldc_send(&port->vio, &info,
1613                                            sizeof(info));
1614                         n_addrs = 0;
1615                 }
1616         }
1617         if (n_addrs) {
1618                 info.count = n_addrs;
1619                 (void)vio_ldc_send(&port->vio, &info, sizeof(info));
1620         }
1621
1622         info.set = 0;
1623
1624         n_addrs = 0;
1625         pp = &vp->mcast_list;
1626         while ((m = *pp) != NULL) {
1627                 if (m->hit) {
1628                         m->hit = 0;
1629                         pp = &m->next;
1630                         continue;
1631                 }
1632
1633                 memcpy(&info.mcast_addr[n_addrs * ETH_ALEN],
1634                        m->addr, ETH_ALEN);
1635                 if (++n_addrs == VNET_NUM_MCAST) {
1636                         info.count = n_addrs;
1637                         (void)vio_ldc_send(&port->vio, &info,
1638                                            sizeof(info));
1639                         n_addrs = 0;
1640                 }
1641
1642                 *pp = m->next;
1643                 kfree(m);
1644         }
1645         if (n_addrs) {
1646                 info.count = n_addrs;
1647                 (void)vio_ldc_send(&port->vio, &info, sizeof(info));
1648         }
1649 }
1650
1651 void sunvnet_set_rx_mode_common(struct net_device *dev, struct vnet *vp)
1652 {
1653         struct vnet_port *port;
1654
1655         rcu_read_lock();
1656         list_for_each_entry_rcu(port, &vp->port_list, list) {
1657                 if (port->switch_port) {
1658                         __update_mc_list(vp, dev);
1659                         __send_mc_list(vp, port);
1660                         break;
1661                 }
1662         }
1663         rcu_read_unlock();
1664 }
1665 EXPORT_SYMBOL_GPL(sunvnet_set_rx_mode_common);
1666
1667 int sunvnet_set_mac_addr_common(struct net_device *dev, void *p)
1668 {
1669         return -EINVAL;
1670 }
1671 EXPORT_SYMBOL_GPL(sunvnet_set_mac_addr_common);
1672
1673 void sunvnet_port_free_tx_bufs_common(struct vnet_port *port)
1674 {
1675         struct vio_dring_state *dr;
1676         int i;
1677
1678         dr = &port->vio.drings[VIO_DRIVER_TX_RING];
1679
1680         if (!dr->base)
1681                 return;
1682
1683         for (i = 0; i < VNET_TX_RING_SIZE; i++) {
1684                 struct vio_net_desc *d;
1685                 void *skb = port->tx_bufs[i].skb;
1686
1687                 if (!skb)
1688                         continue;
1689
1690                 d = vio_dring_entry(dr, i);
1691
1692                 ldc_unmap(port->vio.lp,
1693                           port->tx_bufs[i].cookies,
1694                           port->tx_bufs[i].ncookies);
1695                 dev_kfree_skb(skb);
1696                 port->tx_bufs[i].skb = NULL;
1697                 d->hdr.state = VIO_DESC_FREE;
1698         }
1699         ldc_free_exp_dring(port->vio.lp, dr->base,
1700                            (dr->entry_size * dr->num_entries),
1701                            dr->cookies, dr->ncookies);
1702         dr->base = NULL;
1703         dr->entry_size = 0;
1704         dr->num_entries = 0;
1705         dr->pending = 0;
1706         dr->ncookies = 0;
1707 }
1708 EXPORT_SYMBOL_GPL(sunvnet_port_free_tx_bufs_common);
1709
1710 void vnet_port_reset(struct vnet_port *port)
1711 {
1712         del_timer(&port->clean_timer);
1713         sunvnet_port_free_tx_bufs_common(port);
1714         port->rmtu = 0;
1715         port->tso = (port->vsw == 0);  /* no tso in vsw, misbehaves in bridge */
1716         port->tsolen = 0;
1717 }
1718 EXPORT_SYMBOL_GPL(vnet_port_reset);
1719
1720 static int vnet_port_alloc_tx_ring(struct vnet_port *port)
1721 {
1722         struct vio_dring_state *dr;
1723         unsigned long len, elen;
1724         int i, err, ncookies;
1725         void *dring;
1726
1727         dr = &port->vio.drings[VIO_DRIVER_TX_RING];
1728
1729         elen = sizeof(struct vio_net_desc) +
1730                sizeof(struct ldc_trans_cookie) * 2;
1731         if (vio_version_after_eq(&port->vio, 1, 7))
1732                 elen += sizeof(struct vio_net_dext);
1733         len = VNET_TX_RING_SIZE * elen;
1734
1735         ncookies = VIO_MAX_RING_COOKIES;
1736         dring = ldc_alloc_exp_dring(port->vio.lp, len,
1737                                     dr->cookies, &ncookies,
1738                                     (LDC_MAP_SHADOW |
1739                                      LDC_MAP_DIRECT |
1740                                      LDC_MAP_RW));
1741         if (IS_ERR(dring)) {
1742                 err = PTR_ERR(dring);
1743                 goto err_out;
1744         }
1745
1746         dr->base = dring;
1747         dr->entry_size = elen;
1748         dr->num_entries = VNET_TX_RING_SIZE;
1749         dr->prod = 0;
1750         dr->cons = 0;
1751         port->start_cons  = true; /* need an initial trigger */
1752         dr->pending = VNET_TX_RING_SIZE;
1753         dr->ncookies = ncookies;
1754
1755         for (i = 0; i < VNET_TX_RING_SIZE; ++i) {
1756                 struct vio_net_desc *d;
1757
1758                 d = vio_dring_entry(dr, i);
1759                 d->hdr.state = VIO_DESC_FREE;
1760         }
1761         return 0;
1762
1763 err_out:
1764         sunvnet_port_free_tx_bufs_common(port);
1765
1766         return err;
1767 }
1768
1769 #ifdef CONFIG_NET_POLL_CONTROLLER
1770 void sunvnet_poll_controller_common(struct net_device *dev, struct vnet *vp)
1771 {
1772         struct vnet_port *port;
1773         unsigned long flags;
1774
1775         spin_lock_irqsave(&vp->lock, flags);
1776         if (!list_empty(&vp->port_list)) {
1777                 port = list_entry(vp->port_list.next, struct vnet_port, list);
1778                 napi_schedule(&port->napi);
1779         }
1780         spin_unlock_irqrestore(&vp->lock, flags);
1781 }
1782 EXPORT_SYMBOL_GPL(sunvnet_poll_controller_common);
1783 #endif
1784
1785 void sunvnet_port_add_txq_common(struct vnet_port *port)
1786 {
1787         struct vnet *vp = port->vp;
1788         int smallest = 0;
1789         int i;
1790
1791         /* find the first least-used q
1792          * When there are more ldoms than q's, we start to
1793          * double up on ports per queue.
1794          */
1795         for (i = 0; i < VNET_MAX_TXQS; i++) {
1796                 if (vp->q_used[i] == 0) {
1797                         smallest = i;
1798                         break;
1799                 }
1800                 if (vp->q_used[i] < vp->q_used[smallest])
1801                         smallest = i;
1802         }
1803
1804         vp->nports++;
1805         vp->q_used[smallest]++;
1806         port->q_index = smallest;
1807 }
1808 EXPORT_SYMBOL_GPL(sunvnet_port_add_txq_common);
1809
1810 void sunvnet_port_rm_txq_common(struct vnet_port *port)
1811 {
1812         port->vp->nports--;
1813         port->vp->q_used[port->q_index]--;
1814         port->q_index = 0;
1815 }
1816 EXPORT_SYMBOL_GPL(sunvnet_port_rm_txq_common);