GNU Linux-libre 4.14.266-gnu1
[releases.git] / drivers / net / ethernet / sun / sunvnet_common.c
1 /* sunvnet.c: Sun LDOM Virtual Network Driver.
2  *
3  * Copyright (C) 2007, 2008 David S. Miller <davem@davemloft.net>
4  * Copyright (C) 2016-2017 Oracle. All rights reserved.
5  */
6
7 #include <linux/module.h>
8 #include <linux/kernel.h>
9 #include <linux/types.h>
10 #include <linux/slab.h>
11 #include <linux/delay.h>
12 #include <linux/init.h>
13 #include <linux/netdevice.h>
14 #include <linux/ethtool.h>
15 #include <linux/etherdevice.h>
16 #include <linux/mutex.h>
17 #include <linux/highmem.h>
18 #include <linux/if_vlan.h>
19 #define CREATE_TRACE_POINTS
20 #include <trace/events/sunvnet.h>
21
22 #if IS_ENABLED(CONFIG_IPV6)
23 #include <linux/icmpv6.h>
24 #endif
25
26 #include <net/ip.h>
27 #include <net/icmp.h>
28 #include <net/route.h>
29
30 #include <asm/vio.h>
31 #include <asm/ldc.h>
32
33 #include "sunvnet_common.h"
34
35 /* Heuristic for the number of times to exponentially backoff and
36  * retry sending an LDC trigger when EAGAIN is encountered
37  */
38 #define VNET_MAX_RETRIES        10
39
40 MODULE_AUTHOR("David S. Miller (davem@davemloft.net)");
41 MODULE_DESCRIPTION("Sun LDOM virtual network support library");
42 MODULE_LICENSE("GPL");
43 MODULE_VERSION("1.1");
44
45 static int __vnet_tx_trigger(struct vnet_port *port, u32 start);
46
47 static inline u32 vnet_tx_dring_avail(struct vio_dring_state *dr)
48 {
49         return vio_dring_avail(dr, VNET_TX_RING_SIZE);
50 }
51
52 static int vnet_handle_unknown(struct vnet_port *port, void *arg)
53 {
54         struct vio_msg_tag *pkt = arg;
55
56         pr_err("Received unknown msg [%02x:%02x:%04x:%08x]\n",
57                pkt->type, pkt->stype, pkt->stype_env, pkt->sid);
58         pr_err("Resetting connection\n");
59
60         ldc_disconnect(port->vio.lp);
61
62         return -ECONNRESET;
63 }
64
65 static int vnet_port_alloc_tx_ring(struct vnet_port *port);
66
67 int sunvnet_send_attr_common(struct vio_driver_state *vio)
68 {
69         struct vnet_port *port = to_vnet_port(vio);
70         struct net_device *dev = VNET_PORT_TO_NET_DEVICE(port);
71         struct vio_net_attr_info pkt;
72         int framelen = ETH_FRAME_LEN;
73         int i, err;
74
75         err = vnet_port_alloc_tx_ring(to_vnet_port(vio));
76         if (err)
77                 return err;
78
79         memset(&pkt, 0, sizeof(pkt));
80         pkt.tag.type = VIO_TYPE_CTRL;
81         pkt.tag.stype = VIO_SUBTYPE_INFO;
82         pkt.tag.stype_env = VIO_ATTR_INFO;
83         pkt.tag.sid = vio_send_sid(vio);
84         if (vio_version_before(vio, 1, 2))
85                 pkt.xfer_mode = VIO_DRING_MODE;
86         else
87                 pkt.xfer_mode = VIO_NEW_DRING_MODE;
88         pkt.addr_type = VNET_ADDR_ETHERMAC;
89         pkt.ack_freq = 0;
90         for (i = 0; i < 6; i++)
91                 pkt.addr |= (u64)dev->dev_addr[i] << ((5 - i) * 8);
92         if (vio_version_after(vio, 1, 3)) {
93                 if (port->rmtu) {
94                         port->rmtu = min(VNET_MAXPACKET, port->rmtu);
95                         pkt.mtu = port->rmtu;
96                 } else {
97                         port->rmtu = VNET_MAXPACKET;
98                         pkt.mtu = port->rmtu;
99                 }
100                 if (vio_version_after_eq(vio, 1, 6))
101                         pkt.options = VIO_TX_DRING;
102         } else if (vio_version_before(vio, 1, 3)) {
103                 pkt.mtu = framelen;
104         } else { /* v1.3 */
105                 pkt.mtu = framelen + VLAN_HLEN;
106         }
107
108         pkt.cflags = 0;
109         if (vio_version_after_eq(vio, 1, 7) && port->tso) {
110                 pkt.cflags |= VNET_LSO_IPV4_CAPAB;
111                 if (!port->tsolen)
112                         port->tsolen = VNET_MAXTSO;
113                 pkt.ipv4_lso_maxlen = port->tsolen;
114         }
115
116         pkt.plnk_updt = PHYSLINK_UPDATE_NONE;
117
118         viodbg(HS, "SEND NET ATTR xmode[0x%x] atype[0x%x] addr[%llx] "
119                "ackfreq[%u] plnk_updt[0x%02x] opts[0x%02x] mtu[%llu] "
120                "cflags[0x%04x] lso_max[%u]\n",
121                pkt.xfer_mode, pkt.addr_type,
122                (unsigned long long)pkt.addr,
123                pkt.ack_freq, pkt.plnk_updt, pkt.options,
124                (unsigned long long)pkt.mtu, pkt.cflags, pkt.ipv4_lso_maxlen);
125
126         return vio_ldc_send(vio, &pkt, sizeof(pkt));
127 }
128 EXPORT_SYMBOL_GPL(sunvnet_send_attr_common);
129
130 static int handle_attr_info(struct vio_driver_state *vio,
131                             struct vio_net_attr_info *pkt)
132 {
133         struct vnet_port *port = to_vnet_port(vio);
134         u64     localmtu;
135         u8      xfer_mode;
136
137         viodbg(HS, "GOT NET ATTR xmode[0x%x] atype[0x%x] addr[%llx] "
138                "ackfreq[%u] plnk_updt[0x%02x] opts[0x%02x] mtu[%llu] "
139                " (rmtu[%llu]) cflags[0x%04x] lso_max[%u]\n",
140                pkt->xfer_mode, pkt->addr_type,
141                (unsigned long long)pkt->addr,
142                pkt->ack_freq, pkt->plnk_updt, pkt->options,
143                (unsigned long long)pkt->mtu, port->rmtu, pkt->cflags,
144                pkt->ipv4_lso_maxlen);
145
146         pkt->tag.sid = vio_send_sid(vio);
147
148         xfer_mode = pkt->xfer_mode;
149         /* for version < 1.2, VIO_DRING_MODE = 0x3 and no bitmask */
150         if (vio_version_before(vio, 1, 2) && xfer_mode == VIO_DRING_MODE)
151                 xfer_mode = VIO_NEW_DRING_MODE;
152
153         /* MTU negotiation:
154          *      < v1.3 - ETH_FRAME_LEN exactly
155          *      > v1.3 - MIN(pkt.mtu, VNET_MAXPACKET, port->rmtu) and change
156          *                      pkt->mtu for ACK
157          *      = v1.3 - ETH_FRAME_LEN + VLAN_HLEN exactly
158          */
159         if (vio_version_before(vio, 1, 3)) {
160                 localmtu = ETH_FRAME_LEN;
161         } else if (vio_version_after(vio, 1, 3)) {
162                 localmtu = port->rmtu ? port->rmtu : VNET_MAXPACKET;
163                 localmtu = min(pkt->mtu, localmtu);
164                 pkt->mtu = localmtu;
165         } else { /* v1.3 */
166                 localmtu = ETH_FRAME_LEN + VLAN_HLEN;
167         }
168         port->rmtu = localmtu;
169
170         /* LSO negotiation */
171         if (vio_version_after_eq(vio, 1, 7))
172                 port->tso &= !!(pkt->cflags & VNET_LSO_IPV4_CAPAB);
173         else
174                 port->tso = false;
175         if (port->tso) {
176                 if (!port->tsolen)
177                         port->tsolen = VNET_MAXTSO;
178                 port->tsolen = min(port->tsolen, pkt->ipv4_lso_maxlen);
179                 if (port->tsolen < VNET_MINTSO) {
180                         port->tso = false;
181                         port->tsolen = 0;
182                         pkt->cflags &= ~VNET_LSO_IPV4_CAPAB;
183                 }
184                 pkt->ipv4_lso_maxlen = port->tsolen;
185         } else {
186                 pkt->cflags &= ~VNET_LSO_IPV4_CAPAB;
187                 pkt->ipv4_lso_maxlen = 0;
188                 port->tsolen = 0;
189         }
190
191         /* for version >= 1.6, ACK packet mode we support */
192         if (vio_version_after_eq(vio, 1, 6)) {
193                 pkt->xfer_mode = VIO_NEW_DRING_MODE;
194                 pkt->options = VIO_TX_DRING;
195         }
196
197         if (!(xfer_mode | VIO_NEW_DRING_MODE) ||
198             pkt->addr_type != VNET_ADDR_ETHERMAC ||
199             pkt->mtu != localmtu) {
200                 viodbg(HS, "SEND NET ATTR NACK\n");
201
202                 pkt->tag.stype = VIO_SUBTYPE_NACK;
203
204                 (void)vio_ldc_send(vio, pkt, sizeof(*pkt));
205
206                 return -ECONNRESET;
207         }
208
209         viodbg(HS, "SEND NET ATTR ACK xmode[0x%x] atype[0x%x] "
210                "addr[%llx] ackfreq[%u] plnk_updt[0x%02x] opts[0x%02x] "
211                "mtu[%llu] (rmtu[%llu]) cflags[0x%04x] lso_max[%u]\n",
212                pkt->xfer_mode, pkt->addr_type,
213                (unsigned long long)pkt->addr,
214                pkt->ack_freq, pkt->plnk_updt, pkt->options,
215                (unsigned long long)pkt->mtu, port->rmtu, pkt->cflags,
216                pkt->ipv4_lso_maxlen);
217
218         pkt->tag.stype = VIO_SUBTYPE_ACK;
219
220         return vio_ldc_send(vio, pkt, sizeof(*pkt));
221 }
222
223 static int handle_attr_ack(struct vio_driver_state *vio,
224                            struct vio_net_attr_info *pkt)
225 {
226         viodbg(HS, "GOT NET ATTR ACK\n");
227
228         return 0;
229 }
230
231 static int handle_attr_nack(struct vio_driver_state *vio,
232                             struct vio_net_attr_info *pkt)
233 {
234         viodbg(HS, "GOT NET ATTR NACK\n");
235
236         return -ECONNRESET;
237 }
238
239 int sunvnet_handle_attr_common(struct vio_driver_state *vio, void *arg)
240 {
241         struct vio_net_attr_info *pkt = arg;
242
243         switch (pkt->tag.stype) {
244         case VIO_SUBTYPE_INFO:
245                 return handle_attr_info(vio, pkt);
246
247         case VIO_SUBTYPE_ACK:
248                 return handle_attr_ack(vio, pkt);
249
250         case VIO_SUBTYPE_NACK:
251                 return handle_attr_nack(vio, pkt);
252
253         default:
254                 return -ECONNRESET;
255         }
256 }
257 EXPORT_SYMBOL_GPL(sunvnet_handle_attr_common);
258
259 void sunvnet_handshake_complete_common(struct vio_driver_state *vio)
260 {
261         struct vio_dring_state *dr;
262
263         dr = &vio->drings[VIO_DRIVER_RX_RING];
264         dr->rcv_nxt = 1;
265         dr->snd_nxt = 1;
266
267         dr = &vio->drings[VIO_DRIVER_TX_RING];
268         dr->rcv_nxt = 1;
269         dr->snd_nxt = 1;
270 }
271 EXPORT_SYMBOL_GPL(sunvnet_handshake_complete_common);
272
273 /* The hypervisor interface that implements copying to/from imported
274  * memory from another domain requires that copies are done to 8-byte
275  * aligned buffers, and that the lengths of such copies are also 8-byte
276  * multiples.
277  *
278  * So we align skb->data to an 8-byte multiple and pad-out the data
279  * area so we can round the copy length up to the next multiple of
280  * 8 for the copy.
281  *
282  * The transmitter puts the actual start of the packet 6 bytes into
283  * the buffer it sends over, so that the IP headers after the ethernet
284  * header are aligned properly.  These 6 bytes are not in the descriptor
285  * length, they are simply implied.  This offset is represented using
286  * the VNET_PACKET_SKIP macro.
287  */
288 static struct sk_buff *alloc_and_align_skb(struct net_device *dev,
289                                            unsigned int len)
290 {
291         struct sk_buff *skb;
292         unsigned long addr, off;
293
294         skb = netdev_alloc_skb(dev, len + VNET_PACKET_SKIP + 8 + 8);
295         if (unlikely(!skb))
296                 return NULL;
297
298         addr = (unsigned long)skb->data;
299         off = ((addr + 7UL) & ~7UL) - addr;
300         if (off)
301                 skb_reserve(skb, off);
302
303         return skb;
304 }
305
306 static inline void vnet_fullcsum_ipv4(struct sk_buff *skb)
307 {
308         struct iphdr *iph = ip_hdr(skb);
309         int offset = skb_transport_offset(skb);
310
311         if (skb->protocol != htons(ETH_P_IP))
312                 return;
313         if (iph->protocol != IPPROTO_TCP &&
314             iph->protocol != IPPROTO_UDP)
315                 return;
316         skb->ip_summed = CHECKSUM_NONE;
317         skb->csum_level = 1;
318         skb->csum = 0;
319         if (iph->protocol == IPPROTO_TCP) {
320                 struct tcphdr *ptcp = tcp_hdr(skb);
321
322                 ptcp->check = 0;
323                 skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
324                 ptcp->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
325                                                 skb->len - offset, IPPROTO_TCP,
326                                                 skb->csum);
327         } else if (iph->protocol == IPPROTO_UDP) {
328                 struct udphdr *pudp = udp_hdr(skb);
329
330                 pudp->check = 0;
331                 skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
332                 pudp->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
333                                                 skb->len - offset, IPPROTO_UDP,
334                                                 skb->csum);
335         }
336 }
337
338 #if IS_ENABLED(CONFIG_IPV6)
339 static inline void vnet_fullcsum_ipv6(struct sk_buff *skb)
340 {
341         struct ipv6hdr *ip6h = ipv6_hdr(skb);
342         int offset = skb_transport_offset(skb);
343
344         if (skb->protocol != htons(ETH_P_IPV6))
345                 return;
346         if (ip6h->nexthdr != IPPROTO_TCP &&
347             ip6h->nexthdr != IPPROTO_UDP)
348                 return;
349         skb->ip_summed = CHECKSUM_NONE;
350         skb->csum_level = 1;
351         skb->csum = 0;
352         if (ip6h->nexthdr == IPPROTO_TCP) {
353                 struct tcphdr *ptcp = tcp_hdr(skb);
354
355                 ptcp->check = 0;
356                 skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
357                 ptcp->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
358                                               skb->len - offset, IPPROTO_TCP,
359                                               skb->csum);
360         } else if (ip6h->nexthdr == IPPROTO_UDP) {
361                 struct udphdr *pudp = udp_hdr(skb);
362
363                 pudp->check = 0;
364                 skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
365                 pudp->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
366                                               skb->len - offset, IPPROTO_UDP,
367                                               skb->csum);
368         }
369 }
370 #endif
371
372 static int vnet_rx_one(struct vnet_port *port, struct vio_net_desc *desc)
373 {
374         struct net_device *dev = VNET_PORT_TO_NET_DEVICE(port);
375         unsigned int len = desc->size;
376         unsigned int copy_len;
377         struct sk_buff *skb;
378         int maxlen;
379         int err;
380
381         err = -EMSGSIZE;
382         if (port->tso && port->tsolen > port->rmtu)
383                 maxlen = port->tsolen;
384         else
385                 maxlen = port->rmtu;
386         if (unlikely(len < ETH_ZLEN || len > maxlen)) {
387                 dev->stats.rx_length_errors++;
388                 goto out_dropped;
389         }
390
391         skb = alloc_and_align_skb(dev, len);
392         err = -ENOMEM;
393         if (unlikely(!skb)) {
394                 dev->stats.rx_missed_errors++;
395                 goto out_dropped;
396         }
397
398         copy_len = (len + VNET_PACKET_SKIP + 7U) & ~7U;
399         skb_put(skb, copy_len);
400         err = ldc_copy(port->vio.lp, LDC_COPY_IN,
401                        skb->data, copy_len, 0,
402                        desc->cookies, desc->ncookies);
403         if (unlikely(err < 0)) {
404                 dev->stats.rx_frame_errors++;
405                 goto out_free_skb;
406         }
407
408         skb_pull(skb, VNET_PACKET_SKIP);
409         skb_trim(skb, len);
410         skb->protocol = eth_type_trans(skb, dev);
411
412         if (vio_version_after_eq(&port->vio, 1, 8)) {
413                 struct vio_net_dext *dext = vio_net_ext(desc);
414
415                 skb_reset_network_header(skb);
416
417                 if (dext->flags & VNET_PKT_HCK_IPV4_HDRCKSUM) {
418                         if (skb->protocol == ETH_P_IP) {
419                                 struct iphdr *iph = ip_hdr(skb);
420
421                                 iph->check = 0;
422                                 ip_send_check(iph);
423                         }
424                 }
425                 if ((dext->flags & VNET_PKT_HCK_FULLCKSUM) &&
426                     skb->ip_summed == CHECKSUM_NONE) {
427                         if (skb->protocol == htons(ETH_P_IP)) {
428                                 struct iphdr *iph = ip_hdr(skb);
429                                 int ihl = iph->ihl * 4;
430
431                                 skb_set_transport_header(skb, ihl);
432                                 vnet_fullcsum_ipv4(skb);
433 #if IS_ENABLED(CONFIG_IPV6)
434                         } else if (skb->protocol == htons(ETH_P_IPV6)) {
435                                 skb_set_transport_header(skb,
436                                                          sizeof(struct ipv6hdr));
437                                 vnet_fullcsum_ipv6(skb);
438 #endif
439                         }
440                 }
441                 if (dext->flags & VNET_PKT_HCK_IPV4_HDRCKSUM_OK) {
442                         skb->ip_summed = CHECKSUM_PARTIAL;
443                         skb->csum_level = 0;
444                         if (dext->flags & VNET_PKT_HCK_FULLCKSUM_OK)
445                                 skb->csum_level = 1;
446                 }
447         }
448
449         skb->ip_summed = port->switch_port ? CHECKSUM_NONE : CHECKSUM_PARTIAL;
450
451         if (unlikely(is_multicast_ether_addr(eth_hdr(skb)->h_dest)))
452                 dev->stats.multicast++;
453         dev->stats.rx_packets++;
454         dev->stats.rx_bytes += len;
455         port->stats.rx_packets++;
456         port->stats.rx_bytes += len;
457         napi_gro_receive(&port->napi, skb);
458         return 0;
459
460 out_free_skb:
461         kfree_skb(skb);
462
463 out_dropped:
464         dev->stats.rx_dropped++;
465         return err;
466 }
467
468 static int vnet_send_ack(struct vnet_port *port, struct vio_dring_state *dr,
469                          u32 start, u32 end, u8 vio_dring_state)
470 {
471         struct vio_dring_data hdr = {
472                 .tag = {
473                         .type           = VIO_TYPE_DATA,
474                         .stype          = VIO_SUBTYPE_ACK,
475                         .stype_env      = VIO_DRING_DATA,
476                         .sid            = vio_send_sid(&port->vio),
477                 },
478                 .dring_ident            = dr->ident,
479                 .start_idx              = start,
480                 .end_idx                = end,
481                 .state                  = vio_dring_state,
482         };
483         int err, delay;
484         int retries = 0;
485
486         hdr.seq = dr->snd_nxt;
487         delay = 1;
488         do {
489                 err = vio_ldc_send(&port->vio, &hdr, sizeof(hdr));
490                 if (err > 0) {
491                         dr->snd_nxt++;
492                         break;
493                 }
494                 udelay(delay);
495                 if ((delay <<= 1) > 128)
496                         delay = 128;
497                 if (retries++ > VNET_MAX_RETRIES) {
498                         pr_info("ECONNRESET %x:%x:%x:%x:%x:%x\n",
499                                 port->raddr[0], port->raddr[1],
500                                 port->raddr[2], port->raddr[3],
501                                 port->raddr[4], port->raddr[5]);
502                         break;
503                 }
504         } while (err == -EAGAIN);
505
506         if (err <= 0 && vio_dring_state == VIO_DRING_STOPPED) {
507                 port->stop_rx_idx = end;
508                 port->stop_rx = true;
509         } else {
510                 port->stop_rx_idx = 0;
511                 port->stop_rx = false;
512         }
513
514         return err;
515 }
516
517 static struct vio_net_desc *get_rx_desc(struct vnet_port *port,
518                                         struct vio_dring_state *dr,
519                                         u32 index)
520 {
521         struct vio_net_desc *desc = port->vio.desc_buf;
522         int err;
523
524         err = ldc_get_dring_entry(port->vio.lp, desc, dr->entry_size,
525                                   (index * dr->entry_size),
526                                   dr->cookies, dr->ncookies);
527         if (err < 0)
528                 return ERR_PTR(err);
529
530         return desc;
531 }
532
533 static int put_rx_desc(struct vnet_port *port,
534                        struct vio_dring_state *dr,
535                        struct vio_net_desc *desc,
536                        u32 index)
537 {
538         int err;
539
540         err = ldc_put_dring_entry(port->vio.lp, desc, dr->entry_size,
541                                   (index * dr->entry_size),
542                                   dr->cookies, dr->ncookies);
543         if (err < 0)
544                 return err;
545
546         return 0;
547 }
548
549 static int vnet_walk_rx_one(struct vnet_port *port,
550                             struct vio_dring_state *dr,
551                             u32 index, int *needs_ack)
552 {
553         struct vio_net_desc *desc = get_rx_desc(port, dr, index);
554         struct vio_driver_state *vio = &port->vio;
555         int err;
556
557         BUG_ON(!desc);
558         if (IS_ERR(desc))
559                 return PTR_ERR(desc);
560
561         if (desc->hdr.state != VIO_DESC_READY)
562                 return 1;
563
564         dma_rmb();
565
566         viodbg(DATA, "vio_walk_rx_one desc[%02x:%02x:%08x:%08x:%llx:%llx]\n",
567                desc->hdr.state, desc->hdr.ack,
568                desc->size, desc->ncookies,
569                desc->cookies[0].cookie_addr,
570                desc->cookies[0].cookie_size);
571
572         err = vnet_rx_one(port, desc);
573         if (err == -ECONNRESET)
574                 return err;
575         trace_vnet_rx_one(port->vio._local_sid, port->vio._peer_sid,
576                           index, desc->hdr.ack);
577         desc->hdr.state = VIO_DESC_DONE;
578         err = put_rx_desc(port, dr, desc, index);
579         if (err < 0)
580                 return err;
581         *needs_ack = desc->hdr.ack;
582         return 0;
583 }
584
585 static int vnet_walk_rx(struct vnet_port *port, struct vio_dring_state *dr,
586                         u32 start, u32 end, int *npkts, int budget)
587 {
588         struct vio_driver_state *vio = &port->vio;
589         int ack_start = -1, ack_end = -1;
590         bool send_ack = true;
591
592         end = (end == (u32)-1) ? vio_dring_prev(dr, start)
593                                : vio_dring_next(dr, end);
594
595         viodbg(DATA, "vnet_walk_rx start[%08x] end[%08x]\n", start, end);
596
597         while (start != end) {
598                 int ack = 0, err = vnet_walk_rx_one(port, dr, start, &ack);
599
600                 if (err == -ECONNRESET)
601                         return err;
602                 if (err != 0)
603                         break;
604                 (*npkts)++;
605                 if (ack_start == -1)
606                         ack_start = start;
607                 ack_end = start;
608                 start = vio_dring_next(dr, start);
609                 if (ack && start != end) {
610                         err = vnet_send_ack(port, dr, ack_start, ack_end,
611                                             VIO_DRING_ACTIVE);
612                         if (err == -ECONNRESET)
613                                 return err;
614                         ack_start = -1;
615                 }
616                 if ((*npkts) >= budget) {
617                         send_ack = false;
618                         break;
619                 }
620         }
621         if (unlikely(ack_start == -1)) {
622                 ack_end = vio_dring_prev(dr, start);
623                 ack_start = ack_end;
624         }
625         if (send_ack) {
626                 port->napi_resume = false;
627                 trace_vnet_tx_send_stopped_ack(port->vio._local_sid,
628                                                port->vio._peer_sid,
629                                                ack_end, *npkts);
630                 return vnet_send_ack(port, dr, ack_start, ack_end,
631                                      VIO_DRING_STOPPED);
632         } else  {
633                 trace_vnet_tx_defer_stopped_ack(port->vio._local_sid,
634                                                 port->vio._peer_sid,
635                                                 ack_end, *npkts);
636                 port->napi_resume = true;
637                 port->napi_stop_idx = ack_end;
638                 return 1;
639         }
640 }
641
642 static int vnet_rx(struct vnet_port *port, void *msgbuf, int *npkts,
643                    int budget)
644 {
645         struct vio_dring_data *pkt = msgbuf;
646         struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_RX_RING];
647         struct vio_driver_state *vio = &port->vio;
648
649         viodbg(DATA, "vnet_rx stype_env[%04x] seq[%016llx] rcv_nxt[%016llx]\n",
650                pkt->tag.stype_env, pkt->seq, dr->rcv_nxt);
651
652         if (unlikely(pkt->tag.stype_env != VIO_DRING_DATA))
653                 return 0;
654         if (unlikely(pkt->seq != dr->rcv_nxt)) {
655                 pr_err("RX out of sequence seq[0x%llx] rcv_nxt[0x%llx]\n",
656                        pkt->seq, dr->rcv_nxt);
657                 return 0;
658         }
659
660         if (!port->napi_resume)
661                 dr->rcv_nxt++;
662
663         /* XXX Validate pkt->start_idx and pkt->end_idx XXX */
664
665         return vnet_walk_rx(port, dr, pkt->start_idx, pkt->end_idx,
666                             npkts, budget);
667 }
668
669 static int idx_is_pending(struct vio_dring_state *dr, u32 end)
670 {
671         u32 idx = dr->cons;
672         int found = 0;
673
674         while (idx != dr->prod) {
675                 if (idx == end) {
676                         found = 1;
677                         break;
678                 }
679                 idx = vio_dring_next(dr, idx);
680         }
681         return found;
682 }
683
684 static int vnet_ack(struct vnet_port *port, void *msgbuf)
685 {
686         struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
687         struct vio_dring_data *pkt = msgbuf;
688         struct net_device *dev;
689         u32 end;
690         struct vio_net_desc *desc;
691         struct netdev_queue *txq;
692
693         if (unlikely(pkt->tag.stype_env != VIO_DRING_DATA))
694                 return 0;
695
696         end = pkt->end_idx;
697         dev = VNET_PORT_TO_NET_DEVICE(port);
698         netif_tx_lock(dev);
699         if (unlikely(!idx_is_pending(dr, end))) {
700                 netif_tx_unlock(dev);
701                 return 0;
702         }
703
704         /* sync for race conditions with vnet_start_xmit() and tell xmit it
705          * is time to send a trigger.
706          */
707         trace_vnet_rx_stopped_ack(port->vio._local_sid,
708                                   port->vio._peer_sid, end);
709         dr->cons = vio_dring_next(dr, end);
710         desc = vio_dring_entry(dr, dr->cons);
711         if (desc->hdr.state == VIO_DESC_READY && !port->start_cons) {
712                 /* vnet_start_xmit() just populated this dring but missed
713                  * sending the "start" LDC message to the consumer.
714                  * Send a "start" trigger on its behalf.
715                  */
716                 if (__vnet_tx_trigger(port, dr->cons) > 0)
717                         port->start_cons = false;
718                 else
719                         port->start_cons = true;
720         } else {
721                 port->start_cons = true;
722         }
723         netif_tx_unlock(dev);
724
725         txq = netdev_get_tx_queue(dev, port->q_index);
726         if (unlikely(netif_tx_queue_stopped(txq) &&
727                      vnet_tx_dring_avail(dr) >= VNET_TX_WAKEUP_THRESH(dr)))
728                 return 1;
729
730         return 0;
731 }
732
733 static int vnet_nack(struct vnet_port *port, void *msgbuf)
734 {
735         /* XXX just reset or similar XXX */
736         return 0;
737 }
738
739 static int handle_mcast(struct vnet_port *port, void *msgbuf)
740 {
741         struct vio_net_mcast_info *pkt = msgbuf;
742         struct net_device *dev = VNET_PORT_TO_NET_DEVICE(port);
743
744         if (pkt->tag.stype != VIO_SUBTYPE_ACK)
745                 pr_err("%s: Got unexpected MCAST reply [%02x:%02x:%04x:%08x]\n",
746                        dev->name,
747                        pkt->tag.type,
748                        pkt->tag.stype,
749                        pkt->tag.stype_env,
750                        pkt->tag.sid);
751
752         return 0;
753 }
754
755 /* If the queue is stopped, wake it up so that we'll
756  * send out another START message at the next TX.
757  */
758 static void maybe_tx_wakeup(struct vnet_port *port)
759 {
760         struct netdev_queue *txq;
761
762         txq = netdev_get_tx_queue(VNET_PORT_TO_NET_DEVICE(port),
763                                   port->q_index);
764         __netif_tx_lock(txq, smp_processor_id());
765         if (likely(netif_tx_queue_stopped(txq)))
766                 netif_tx_wake_queue(txq);
767         __netif_tx_unlock(txq);
768 }
769
770 bool sunvnet_port_is_up_common(struct vnet_port *vnet)
771 {
772         struct vio_driver_state *vio = &vnet->vio;
773
774         return !!(vio->hs_state & VIO_HS_COMPLETE);
775 }
776 EXPORT_SYMBOL_GPL(sunvnet_port_is_up_common);
777
778 static int vnet_event_napi(struct vnet_port *port, int budget)
779 {
780         struct net_device *dev = VNET_PORT_TO_NET_DEVICE(port);
781         struct vio_driver_state *vio = &port->vio;
782         int tx_wakeup, err;
783         int npkts = 0;
784
785         /* we don't expect any other bits */
786         BUG_ON(port->rx_event & ~(LDC_EVENT_DATA_READY |
787                                   LDC_EVENT_RESET |
788                                   LDC_EVENT_UP));
789
790         /* RESET takes precedent over any other event */
791         if (port->rx_event & LDC_EVENT_RESET) {
792                 /* a link went down */
793
794                 if (port->vsw == 1) {
795                         netif_tx_stop_all_queues(dev);
796                         netif_carrier_off(dev);
797                 }
798
799                 vio_link_state_change(vio, LDC_EVENT_RESET);
800                 vnet_port_reset(port);
801                 vio_port_up(vio);
802
803                 /* If the device is running but its tx queue was
804                  * stopped (due to flow control), restart it.
805                  * This is necessary since vnet_port_reset()
806                  * clears the tx drings and thus we may never get
807                  * back a VIO_TYPE_DATA ACK packet - which is
808                  * the normal mechanism to restart the tx queue.
809                  */
810                 if (netif_running(dev))
811                         maybe_tx_wakeup(port);
812
813                 port->rx_event = 0;
814                 port->stats.event_reset++;
815                 return 0;
816         }
817
818         if (port->rx_event & LDC_EVENT_UP) {
819                 /* a link came up */
820
821                 if (port->vsw == 1) {
822                         netif_carrier_on(port->dev);
823                         netif_tx_start_all_queues(port->dev);
824                 }
825
826                 vio_link_state_change(vio, LDC_EVENT_UP);
827                 port->rx_event = 0;
828                 port->stats.event_up++;
829                 return 0;
830         }
831
832         err = 0;
833         tx_wakeup = 0;
834         while (1) {
835                 union {
836                         struct vio_msg_tag tag;
837                         u64 raw[8];
838                 } msgbuf;
839
840                 if (port->napi_resume) {
841                         struct vio_dring_data *pkt =
842                                 (struct vio_dring_data *)&msgbuf;
843                         struct vio_dring_state *dr =
844                                 &port->vio.drings[VIO_DRIVER_RX_RING];
845
846                         pkt->tag.type = VIO_TYPE_DATA;
847                         pkt->tag.stype = VIO_SUBTYPE_INFO;
848                         pkt->tag.stype_env = VIO_DRING_DATA;
849                         pkt->seq = dr->rcv_nxt;
850                         pkt->start_idx = vio_dring_next(dr,
851                                                         port->napi_stop_idx);
852                         pkt->end_idx = -1;
853                 } else {
854                         err = ldc_read(vio->lp, &msgbuf, sizeof(msgbuf));
855                         if (unlikely(err < 0)) {
856                                 if (err == -ECONNRESET)
857                                         vio_conn_reset(vio);
858                                 break;
859                         }
860                         if (err == 0)
861                                 break;
862                         viodbg(DATA, "TAG [%02x:%02x:%04x:%08x]\n",
863                                msgbuf.tag.type,
864                                msgbuf.tag.stype,
865                                msgbuf.tag.stype_env,
866                                msgbuf.tag.sid);
867                         err = vio_validate_sid(vio, &msgbuf.tag);
868                         if (err < 0)
869                                 break;
870                 }
871
872                 if (likely(msgbuf.tag.type == VIO_TYPE_DATA)) {
873                         if (msgbuf.tag.stype == VIO_SUBTYPE_INFO) {
874                                 if (!sunvnet_port_is_up_common(port)) {
875                                         /* failures like handshake_failure()
876                                          * may have cleaned up dring, but
877                                          * NAPI polling may bring us here.
878                                          */
879                                         err = -ECONNRESET;
880                                         break;
881                                 }
882                                 err = vnet_rx(port, &msgbuf, &npkts, budget);
883                                 if (npkts >= budget)
884                                         break;
885                                 if (npkts == 0)
886                                         break;
887                         } else if (msgbuf.tag.stype == VIO_SUBTYPE_ACK) {
888                                 err = vnet_ack(port, &msgbuf);
889                                 if (err > 0)
890                                         tx_wakeup |= err;
891                         } else if (msgbuf.tag.stype == VIO_SUBTYPE_NACK) {
892                                 err = vnet_nack(port, &msgbuf);
893                         }
894                 } else if (msgbuf.tag.type == VIO_TYPE_CTRL) {
895                         if (msgbuf.tag.stype_env == VNET_MCAST_INFO)
896                                 err = handle_mcast(port, &msgbuf);
897                         else
898                                 err = vio_control_pkt_engine(vio, &msgbuf);
899                         if (err)
900                                 break;
901                 } else {
902                         err = vnet_handle_unknown(port, &msgbuf);
903                 }
904                 if (err == -ECONNRESET)
905                         break;
906         }
907         if (unlikely(tx_wakeup && err != -ECONNRESET))
908                 maybe_tx_wakeup(port);
909         return npkts;
910 }
911
912 int sunvnet_poll_common(struct napi_struct *napi, int budget)
913 {
914         struct vnet_port *port = container_of(napi, struct vnet_port, napi);
915         struct vio_driver_state *vio = &port->vio;
916         int processed = vnet_event_napi(port, budget);
917
918         if (processed < budget) {
919                 napi_complete_done(napi, processed);
920                 port->rx_event &= ~LDC_EVENT_DATA_READY;
921                 vio_set_intr(vio->vdev->rx_ino, HV_INTR_ENABLED);
922         }
923         return processed;
924 }
925 EXPORT_SYMBOL_GPL(sunvnet_poll_common);
926
927 void sunvnet_event_common(void *arg, int event)
928 {
929         struct vnet_port *port = arg;
930         struct vio_driver_state *vio = &port->vio;
931
932         port->rx_event |= event;
933         vio_set_intr(vio->vdev->rx_ino, HV_INTR_DISABLED);
934         napi_schedule(&port->napi);
935 }
936 EXPORT_SYMBOL_GPL(sunvnet_event_common);
937
938 static int __vnet_tx_trigger(struct vnet_port *port, u32 start)
939 {
940         struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
941         struct vio_dring_data hdr = {
942                 .tag = {
943                         .type           = VIO_TYPE_DATA,
944                         .stype          = VIO_SUBTYPE_INFO,
945                         .stype_env      = VIO_DRING_DATA,
946                         .sid            = vio_send_sid(&port->vio),
947                 },
948                 .dring_ident            = dr->ident,
949                 .start_idx              = start,
950                 .end_idx                = (u32)-1,
951         };
952         int err, delay;
953         int retries = 0;
954
955         if (port->stop_rx) {
956                 trace_vnet_tx_pending_stopped_ack(port->vio._local_sid,
957                                                   port->vio._peer_sid,
958                                                   port->stop_rx_idx, -1);
959                 err = vnet_send_ack(port,
960                                     &port->vio.drings[VIO_DRIVER_RX_RING],
961                                     port->stop_rx_idx, -1,
962                                     VIO_DRING_STOPPED);
963                 if (err <= 0)
964                         return err;
965         }
966
967         hdr.seq = dr->snd_nxt;
968         delay = 1;
969         do {
970                 err = vio_ldc_send(&port->vio, &hdr, sizeof(hdr));
971                 if (err > 0) {
972                         dr->snd_nxt++;
973                         break;
974                 }
975                 udelay(delay);
976                 if ((delay <<= 1) > 128)
977                         delay = 128;
978                 if (retries++ > VNET_MAX_RETRIES)
979                         break;
980         } while (err == -EAGAIN);
981         trace_vnet_tx_trigger(port->vio._local_sid,
982                               port->vio._peer_sid, start, err);
983
984         return err;
985 }
986
987 static struct sk_buff *vnet_clean_tx_ring(struct vnet_port *port,
988                                           unsigned *pending)
989 {
990         struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
991         struct sk_buff *skb = NULL;
992         int i, txi;
993
994         *pending = 0;
995
996         txi = dr->prod;
997         for (i = 0; i < VNET_TX_RING_SIZE; ++i) {
998                 struct vio_net_desc *d;
999
1000                 --txi;
1001                 if (txi < 0)
1002                         txi = VNET_TX_RING_SIZE - 1;
1003
1004                 d = vio_dring_entry(dr, txi);
1005
1006                 if (d->hdr.state == VIO_DESC_READY) {
1007                         (*pending)++;
1008                         continue;
1009                 }
1010                 if (port->tx_bufs[txi].skb) {
1011                         if (d->hdr.state != VIO_DESC_DONE)
1012                                 pr_notice("invalid ring buffer state %d\n",
1013                                           d->hdr.state);
1014                         BUG_ON(port->tx_bufs[txi].skb->next);
1015
1016                         port->tx_bufs[txi].skb->next = skb;
1017                         skb = port->tx_bufs[txi].skb;
1018                         port->tx_bufs[txi].skb = NULL;
1019
1020                         ldc_unmap(port->vio.lp,
1021                                   port->tx_bufs[txi].cookies,
1022                                   port->tx_bufs[txi].ncookies);
1023                 } else if (d->hdr.state == VIO_DESC_FREE) {
1024                         break;
1025                 }
1026                 d->hdr.state = VIO_DESC_FREE;
1027         }
1028         return skb;
1029 }
1030
1031 static inline void vnet_free_skbs(struct sk_buff *skb)
1032 {
1033         struct sk_buff *next;
1034
1035         while (skb) {
1036                 next = skb->next;
1037                 skb->next = NULL;
1038                 dev_kfree_skb(skb);
1039                 skb = next;
1040         }
1041 }
1042
1043 void sunvnet_clean_timer_expire_common(unsigned long port0)
1044 {
1045         struct vnet_port *port = (struct vnet_port *)port0;
1046         struct sk_buff *freeskbs;
1047         unsigned pending;
1048
1049         netif_tx_lock(VNET_PORT_TO_NET_DEVICE(port));
1050         freeskbs = vnet_clean_tx_ring(port, &pending);
1051         netif_tx_unlock(VNET_PORT_TO_NET_DEVICE(port));
1052
1053         vnet_free_skbs(freeskbs);
1054
1055         if (pending)
1056                 (void)mod_timer(&port->clean_timer,
1057                                 jiffies + VNET_CLEAN_TIMEOUT);
1058          else
1059                 del_timer(&port->clean_timer);
1060 }
1061 EXPORT_SYMBOL_GPL(sunvnet_clean_timer_expire_common);
1062
1063 static inline int vnet_skb_map(struct ldc_channel *lp, struct sk_buff *skb,
1064                                struct ldc_trans_cookie *cookies, int ncookies,
1065                                unsigned int map_perm)
1066 {
1067         int i, nc, err, blen;
1068
1069         /* header */
1070         blen = skb_headlen(skb);
1071         if (blen < ETH_ZLEN)
1072                 blen = ETH_ZLEN;
1073         blen += VNET_PACKET_SKIP;
1074         blen += 8 - (blen & 7);
1075
1076         err = ldc_map_single(lp, skb->data - VNET_PACKET_SKIP, blen, cookies,
1077                              ncookies, map_perm);
1078         if (err < 0)
1079                 return err;
1080         nc = err;
1081
1082         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1083                 skb_frag_t *f = &skb_shinfo(skb)->frags[i];
1084                 u8 *vaddr;
1085
1086                 if (nc < ncookies) {
1087                         vaddr = kmap_atomic(skb_frag_page(f));
1088                         blen = skb_frag_size(f);
1089                         blen += 8 - (blen & 7);
1090                         err = ldc_map_single(lp, vaddr + f->page_offset,
1091                                              blen, cookies + nc, ncookies - nc,
1092                                              map_perm);
1093                         kunmap_atomic(vaddr);
1094                 } else {
1095                         err = -EMSGSIZE;
1096                 }
1097
1098                 if (err < 0) {
1099                         ldc_unmap(lp, cookies, nc);
1100                         return err;
1101                 }
1102                 nc += err;
1103         }
1104         return nc;
1105 }
1106
1107 static inline struct sk_buff *vnet_skb_shape(struct sk_buff *skb, int ncookies)
1108 {
1109         struct sk_buff *nskb;
1110         int i, len, pad, docopy;
1111
1112         len = skb->len;
1113         pad = 0;
1114         if (len < ETH_ZLEN) {
1115                 pad += ETH_ZLEN - skb->len;
1116                 len += pad;
1117         }
1118         len += VNET_PACKET_SKIP;
1119         pad += 8 - (len & 7);
1120
1121         /* make sure we have enough cookies and alignment in every frag */
1122         docopy = skb_shinfo(skb)->nr_frags >= ncookies;
1123         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1124                 skb_frag_t *f = &skb_shinfo(skb)->frags[i];
1125
1126                 docopy |= f->page_offset & 7;
1127         }
1128         if (((unsigned long)skb->data & 7) != VNET_PACKET_SKIP ||
1129             skb_tailroom(skb) < pad ||
1130             skb_headroom(skb) < VNET_PACKET_SKIP || docopy) {
1131                 int start = 0, offset;
1132                 __wsum csum;
1133
1134                 len = skb->len > ETH_ZLEN ? skb->len : ETH_ZLEN;
1135                 nskb = alloc_and_align_skb(skb->dev, len);
1136                 if (!nskb) {
1137                         dev_kfree_skb(skb);
1138                         return NULL;
1139                 }
1140                 skb_reserve(nskb, VNET_PACKET_SKIP);
1141
1142                 nskb->protocol = skb->protocol;
1143                 offset = skb_mac_header(skb) - skb->data;
1144                 skb_set_mac_header(nskb, offset);
1145                 offset = skb_network_header(skb) - skb->data;
1146                 skb_set_network_header(nskb, offset);
1147                 offset = skb_transport_header(skb) - skb->data;
1148                 skb_set_transport_header(nskb, offset);
1149
1150                 offset = 0;
1151                 nskb->csum_offset = skb->csum_offset;
1152                 nskb->ip_summed = skb->ip_summed;
1153
1154                 if (skb->ip_summed == CHECKSUM_PARTIAL)
1155                         start = skb_checksum_start_offset(skb);
1156                 if (start) {
1157                         int offset = start + nskb->csum_offset;
1158
1159                         /* copy the headers, no csum here */
1160                         if (skb_copy_bits(skb, 0, nskb->data, start)) {
1161                                 dev_kfree_skb(nskb);
1162                                 dev_kfree_skb(skb);
1163                                 return NULL;
1164                         }
1165
1166                         /* copy the rest, with csum calculation */
1167                         *(__sum16 *)(skb->data + offset) = 0;
1168                         csum = skb_copy_and_csum_bits(skb, start,
1169                                                       nskb->data + start,
1170                                                       skb->len - start, 0);
1171
1172                         /* add in the header checksums */
1173                         if (skb->protocol == htons(ETH_P_IP)) {
1174                                 struct iphdr *iph = ip_hdr(nskb);
1175
1176                                 if (iph->protocol == IPPROTO_TCP ||
1177                                     iph->protocol == IPPROTO_UDP) {
1178                                         csum = csum_tcpudp_magic(iph->saddr,
1179                                                                  iph->daddr,
1180                                                                  skb->len - start,
1181                                                                  iph->protocol,
1182                                                                  csum);
1183                                 }
1184                         } else if (skb->protocol == htons(ETH_P_IPV6)) {
1185                                 struct ipv6hdr *ip6h = ipv6_hdr(nskb);
1186
1187                                 if (ip6h->nexthdr == IPPROTO_TCP ||
1188                                     ip6h->nexthdr == IPPROTO_UDP) {
1189                                         csum = csum_ipv6_magic(&ip6h->saddr,
1190                                                                &ip6h->daddr,
1191                                                                skb->len - start,
1192                                                                ip6h->nexthdr,
1193                                                                csum);
1194                                 }
1195                         }
1196
1197                         /* save the final result */
1198                         *(__sum16 *)(nskb->data + offset) = csum;
1199
1200                         nskb->ip_summed = CHECKSUM_NONE;
1201                 } else if (skb_copy_bits(skb, 0, nskb->data, skb->len)) {
1202                         dev_kfree_skb(nskb);
1203                         dev_kfree_skb(skb);
1204                         return NULL;
1205                 }
1206                 (void)skb_put(nskb, skb->len);
1207                 if (skb_is_gso(skb)) {
1208                         skb_shinfo(nskb)->gso_size = skb_shinfo(skb)->gso_size;
1209                         skb_shinfo(nskb)->gso_type = skb_shinfo(skb)->gso_type;
1210                 }
1211                 nskb->queue_mapping = skb->queue_mapping;
1212                 dev_kfree_skb(skb);
1213                 skb = nskb;
1214         }
1215         return skb;
1216 }
1217
1218 static netdev_tx_t
1219 vnet_handle_offloads(struct vnet_port *port, struct sk_buff *skb,
1220                      struct vnet_port *(*vnet_tx_port)
1221                      (struct sk_buff *, struct net_device *))
1222 {
1223         struct net_device *dev = VNET_PORT_TO_NET_DEVICE(port);
1224         struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
1225         struct sk_buff *segs;
1226         int maclen, datalen;
1227         int status;
1228         int gso_size, gso_type, gso_segs;
1229         int hlen = skb_transport_header(skb) - skb_mac_header(skb);
1230         int proto = IPPROTO_IP;
1231
1232         if (skb->protocol == htons(ETH_P_IP))
1233                 proto = ip_hdr(skb)->protocol;
1234         else if (skb->protocol == htons(ETH_P_IPV6))
1235                 proto = ipv6_hdr(skb)->nexthdr;
1236
1237         if (proto == IPPROTO_TCP) {
1238                 hlen += tcp_hdr(skb)->doff * 4;
1239         } else if (proto == IPPROTO_UDP) {
1240                 hlen += sizeof(struct udphdr);
1241         } else {
1242                 pr_err("vnet_handle_offloads GSO with unknown transport "
1243                        "protocol %d tproto %d\n", skb->protocol, proto);
1244                 hlen = 128; /* XXX */
1245         }
1246         datalen = port->tsolen - hlen;
1247
1248         gso_size = skb_shinfo(skb)->gso_size;
1249         gso_type = skb_shinfo(skb)->gso_type;
1250         gso_segs = skb_shinfo(skb)->gso_segs;
1251
1252         if (port->tso && gso_size < datalen)
1253                 gso_segs = DIV_ROUND_UP(skb->len - hlen, datalen);
1254
1255         if (unlikely(vnet_tx_dring_avail(dr) < gso_segs)) {
1256                 struct netdev_queue *txq;
1257
1258                 txq  = netdev_get_tx_queue(dev, port->q_index);
1259                 netif_tx_stop_queue(txq);
1260                 if (vnet_tx_dring_avail(dr) < skb_shinfo(skb)->gso_segs)
1261                         return NETDEV_TX_BUSY;
1262                 netif_tx_wake_queue(txq);
1263         }
1264
1265         maclen = skb_network_header(skb) - skb_mac_header(skb);
1266         skb_pull(skb, maclen);
1267
1268         if (port->tso && gso_size < datalen) {
1269                 if (skb_unclone(skb, GFP_ATOMIC))
1270                         goto out_dropped;
1271
1272                 /* segment to TSO size */
1273                 skb_shinfo(skb)->gso_size = datalen;
1274                 skb_shinfo(skb)->gso_segs = gso_segs;
1275         }
1276         segs = skb_gso_segment(skb, dev->features & ~NETIF_F_TSO);
1277         if (IS_ERR(segs))
1278                 goto out_dropped;
1279
1280         skb_push(skb, maclen);
1281         skb_reset_mac_header(skb);
1282
1283         status = 0;
1284         while (segs) {
1285                 struct sk_buff *curr = segs;
1286
1287                 segs = segs->next;
1288                 curr->next = NULL;
1289                 if (port->tso && curr->len > dev->mtu) {
1290                         skb_shinfo(curr)->gso_size = gso_size;
1291                         skb_shinfo(curr)->gso_type = gso_type;
1292                         skb_shinfo(curr)->gso_segs =
1293                                 DIV_ROUND_UP(curr->len - hlen, gso_size);
1294                 } else {
1295                         skb_shinfo(curr)->gso_size = 0;
1296                 }
1297
1298                 skb_push(curr, maclen);
1299                 skb_reset_mac_header(curr);
1300                 memcpy(skb_mac_header(curr), skb_mac_header(skb),
1301                        maclen);
1302                 curr->csum_start = skb_transport_header(curr) - curr->head;
1303                 if (ip_hdr(curr)->protocol == IPPROTO_TCP)
1304                         curr->csum_offset = offsetof(struct tcphdr, check);
1305                 else if (ip_hdr(curr)->protocol == IPPROTO_UDP)
1306                         curr->csum_offset = offsetof(struct udphdr, check);
1307
1308                 if (!(status & NETDEV_TX_MASK))
1309                         status = sunvnet_start_xmit_common(curr, dev,
1310                                                            vnet_tx_port);
1311                 if (status & NETDEV_TX_MASK)
1312                         dev_kfree_skb_any(curr);
1313         }
1314
1315         if (!(status & NETDEV_TX_MASK))
1316                 dev_kfree_skb_any(skb);
1317         return status;
1318 out_dropped:
1319         dev->stats.tx_dropped++;
1320         dev_kfree_skb_any(skb);
1321         return NETDEV_TX_OK;
1322 }
1323
1324 netdev_tx_t
1325 sunvnet_start_xmit_common(struct sk_buff *skb, struct net_device *dev,
1326                           struct vnet_port *(*vnet_tx_port)
1327                           (struct sk_buff *, struct net_device *))
1328 {
1329         struct vnet_port *port = NULL;
1330         struct vio_dring_state *dr;
1331         struct vio_net_desc *d;
1332         unsigned int len;
1333         struct sk_buff *freeskbs = NULL;
1334         int i, err, txi;
1335         unsigned pending = 0;
1336         struct netdev_queue *txq;
1337
1338         rcu_read_lock();
1339         port = vnet_tx_port(skb, dev);
1340         if (unlikely(!port))
1341                 goto out_dropped;
1342
1343         if (skb_is_gso(skb) && skb->len > port->tsolen) {
1344                 err = vnet_handle_offloads(port, skb, vnet_tx_port);
1345                 rcu_read_unlock();
1346                 return err;
1347         }
1348
1349         if (!skb_is_gso(skb) && skb->len > port->rmtu) {
1350                 unsigned long localmtu = port->rmtu - ETH_HLEN;
1351
1352                 if (vio_version_after_eq(&port->vio, 1, 3))
1353                         localmtu -= VLAN_HLEN;
1354
1355                 if (skb->protocol == htons(ETH_P_IP))
1356                         icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
1357                                       htonl(localmtu));
1358 #if IS_ENABLED(CONFIG_IPV6)
1359                 else if (skb->protocol == htons(ETH_P_IPV6))
1360                         icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, localmtu);
1361 #endif
1362                 goto out_dropped;
1363         }
1364
1365         skb = vnet_skb_shape(skb, 2);
1366
1367         if (unlikely(!skb))
1368                 goto out_dropped;
1369
1370         if (skb->ip_summed == CHECKSUM_PARTIAL) {
1371                 if (skb->protocol == htons(ETH_P_IP))
1372                         vnet_fullcsum_ipv4(skb);
1373 #if IS_ENABLED(CONFIG_IPV6)
1374                 else if (skb->protocol == htons(ETH_P_IPV6))
1375                         vnet_fullcsum_ipv6(skb);
1376 #endif
1377         }
1378
1379         dr = &port->vio.drings[VIO_DRIVER_TX_RING];
1380         i = skb_get_queue_mapping(skb);
1381         txq = netdev_get_tx_queue(dev, i);
1382         if (unlikely(vnet_tx_dring_avail(dr) < 1)) {
1383                 if (!netif_tx_queue_stopped(txq)) {
1384                         netif_tx_stop_queue(txq);
1385
1386                         /* This is a hard error, log it. */
1387                         netdev_err(dev, "BUG! Tx Ring full when queue awake!\n");
1388                         dev->stats.tx_errors++;
1389                 }
1390                 rcu_read_unlock();
1391                 return NETDEV_TX_BUSY;
1392         }
1393
1394         d = vio_dring_cur(dr);
1395
1396         txi = dr->prod;
1397
1398         freeskbs = vnet_clean_tx_ring(port, &pending);
1399
1400         BUG_ON(port->tx_bufs[txi].skb);
1401
1402         len = skb->len;
1403         if (len < ETH_ZLEN)
1404                 len = ETH_ZLEN;
1405
1406         err = vnet_skb_map(port->vio.lp, skb, port->tx_bufs[txi].cookies, 2,
1407                            (LDC_MAP_SHADOW | LDC_MAP_DIRECT | LDC_MAP_RW));
1408         if (err < 0) {
1409                 netdev_info(dev, "tx buffer map error %d\n", err);
1410                 goto out_dropped;
1411         }
1412
1413         port->tx_bufs[txi].skb = skb;
1414         skb = NULL;
1415         port->tx_bufs[txi].ncookies = err;
1416
1417         /* We don't rely on the ACKs to free the skb in vnet_start_xmit(),
1418          * thus it is safe to not set VIO_ACK_ENABLE for each transmission:
1419          * the protocol itself does not require it as long as the peer
1420          * sends a VIO_SUBTYPE_ACK for VIO_DRING_STOPPED.
1421          *
1422          * An ACK for every packet in the ring is expensive as the
1423          * sending of LDC messages is slow and affects performance.
1424          */
1425         d->hdr.ack = VIO_ACK_DISABLE;
1426         d->size = len;
1427         d->ncookies = port->tx_bufs[txi].ncookies;
1428         for (i = 0; i < d->ncookies; i++)
1429                 d->cookies[i] = port->tx_bufs[txi].cookies[i];
1430         if (vio_version_after_eq(&port->vio, 1, 7)) {
1431                 struct vio_net_dext *dext = vio_net_ext(d);
1432
1433                 memset(dext, 0, sizeof(*dext));
1434                 if (skb_is_gso(port->tx_bufs[txi].skb)) {
1435                         dext->ipv4_lso_mss = skb_shinfo(port->tx_bufs[txi].skb)
1436                                              ->gso_size;
1437                         dext->flags |= VNET_PKT_IPV4_LSO;
1438                 }
1439                 if (vio_version_after_eq(&port->vio, 1, 8) &&
1440                     !port->switch_port) {
1441                         dext->flags |= VNET_PKT_HCK_IPV4_HDRCKSUM_OK;
1442                         dext->flags |= VNET_PKT_HCK_FULLCKSUM_OK;
1443                 }
1444         }
1445
1446         /* This has to be a non-SMP write barrier because we are writing
1447          * to memory which is shared with the peer LDOM.
1448          */
1449         dma_wmb();
1450
1451         d->hdr.state = VIO_DESC_READY;
1452
1453         /* Exactly one ldc "start" trigger (for dr->cons) needs to be sent
1454          * to notify the consumer that some descriptors are READY.
1455          * After that "start" trigger, no additional triggers are needed until
1456          * a DRING_STOPPED is received from the consumer. The dr->cons field
1457          * (set up by vnet_ack()) has the value of the next dring index
1458          * that has not yet been ack-ed. We send a "start" trigger here
1459          * if, and only if, start_cons is true (reset it afterward). Conversely,
1460          * vnet_ack() should check if the dring corresponding to cons
1461          * is marked READY, but start_cons was false.
1462          * If so, vnet_ack() should send out the missed "start" trigger.
1463          *
1464          * Note that the dma_wmb() above makes sure the cookies et al. are
1465          * not globally visible before the VIO_DESC_READY, and that the
1466          * stores are ordered correctly by the compiler. The consumer will
1467          * not proceed until the VIO_DESC_READY is visible assuring that
1468          * the consumer does not observe anything related to descriptors
1469          * out of order. The HV trap from the LDC start trigger is the
1470          * producer to consumer announcement that work is available to the
1471          * consumer
1472          */
1473         if (!port->start_cons) { /* previous trigger suffices */
1474                 trace_vnet_skip_tx_trigger(port->vio._local_sid,
1475                                            port->vio._peer_sid, dr->cons);
1476                 goto ldc_start_done;
1477         }
1478
1479         err = __vnet_tx_trigger(port, dr->cons);
1480         if (unlikely(err < 0)) {
1481                 netdev_info(dev, "TX trigger error %d\n", err);
1482                 d->hdr.state = VIO_DESC_FREE;
1483                 skb = port->tx_bufs[txi].skb;
1484                 port->tx_bufs[txi].skb = NULL;
1485                 dev->stats.tx_carrier_errors++;
1486                 goto out_dropped;
1487         }
1488
1489 ldc_start_done:
1490         port->start_cons = false;
1491
1492         dev->stats.tx_packets++;
1493         dev->stats.tx_bytes += port->tx_bufs[txi].skb->len;
1494         port->stats.tx_packets++;
1495         port->stats.tx_bytes += port->tx_bufs[txi].skb->len;
1496
1497         dr->prod = (dr->prod + 1) & (VNET_TX_RING_SIZE - 1);
1498         if (unlikely(vnet_tx_dring_avail(dr) < 1)) {
1499                 netif_tx_stop_queue(txq);
1500                 smp_rmb();
1501                 if (vnet_tx_dring_avail(dr) > VNET_TX_WAKEUP_THRESH(dr))
1502                         netif_tx_wake_queue(txq);
1503         }
1504
1505         (void)mod_timer(&port->clean_timer, jiffies + VNET_CLEAN_TIMEOUT);
1506         rcu_read_unlock();
1507
1508         vnet_free_skbs(freeskbs);
1509
1510         return NETDEV_TX_OK;
1511
1512 out_dropped:
1513         if (pending)
1514                 (void)mod_timer(&port->clean_timer,
1515                                 jiffies + VNET_CLEAN_TIMEOUT);
1516         else if (port)
1517                 del_timer(&port->clean_timer);
1518         rcu_read_unlock();
1519         if (skb)
1520                 dev_kfree_skb(skb);
1521         vnet_free_skbs(freeskbs);
1522         dev->stats.tx_dropped++;
1523         return NETDEV_TX_OK;
1524 }
1525 EXPORT_SYMBOL_GPL(sunvnet_start_xmit_common);
1526
1527 void sunvnet_tx_timeout_common(struct net_device *dev)
1528 {
1529         /* XXX Implement me XXX */
1530 }
1531 EXPORT_SYMBOL_GPL(sunvnet_tx_timeout_common);
1532
1533 int sunvnet_open_common(struct net_device *dev)
1534 {
1535         netif_carrier_on(dev);
1536         netif_tx_start_all_queues(dev);
1537
1538         return 0;
1539 }
1540 EXPORT_SYMBOL_GPL(sunvnet_open_common);
1541
1542 int sunvnet_close_common(struct net_device *dev)
1543 {
1544         netif_tx_stop_all_queues(dev);
1545         netif_carrier_off(dev);
1546
1547         return 0;
1548 }
1549 EXPORT_SYMBOL_GPL(sunvnet_close_common);
1550
1551 static struct vnet_mcast_entry *__vnet_mc_find(struct vnet *vp, u8 *addr)
1552 {
1553         struct vnet_mcast_entry *m;
1554
1555         for (m = vp->mcast_list; m; m = m->next) {
1556                 if (ether_addr_equal(m->addr, addr))
1557                         return m;
1558         }
1559         return NULL;
1560 }
1561
1562 static void __update_mc_list(struct vnet *vp, struct net_device *dev)
1563 {
1564         struct netdev_hw_addr *ha;
1565
1566         netdev_for_each_mc_addr(ha, dev) {
1567                 struct vnet_mcast_entry *m;
1568
1569                 m = __vnet_mc_find(vp, ha->addr);
1570                 if (m) {
1571                         m->hit = 1;
1572                         continue;
1573                 }
1574
1575                 if (!m) {
1576                         m = kzalloc(sizeof(*m), GFP_ATOMIC);
1577                         if (!m)
1578                                 continue;
1579                         memcpy(m->addr, ha->addr, ETH_ALEN);
1580                         m->hit = 1;
1581
1582                         m->next = vp->mcast_list;
1583                         vp->mcast_list = m;
1584                 }
1585         }
1586 }
1587
1588 static void __send_mc_list(struct vnet *vp, struct vnet_port *port)
1589 {
1590         struct vio_net_mcast_info info;
1591         struct vnet_mcast_entry *m, **pp;
1592         int n_addrs;
1593
1594         memset(&info, 0, sizeof(info));
1595
1596         info.tag.type = VIO_TYPE_CTRL;
1597         info.tag.stype = VIO_SUBTYPE_INFO;
1598         info.tag.stype_env = VNET_MCAST_INFO;
1599         info.tag.sid = vio_send_sid(&port->vio);
1600         info.set = 1;
1601
1602         n_addrs = 0;
1603         for (m = vp->mcast_list; m; m = m->next) {
1604                 if (m->sent)
1605                         continue;
1606                 m->sent = 1;
1607                 memcpy(&info.mcast_addr[n_addrs * ETH_ALEN],
1608                        m->addr, ETH_ALEN);
1609                 if (++n_addrs == VNET_NUM_MCAST) {
1610                         info.count = n_addrs;
1611
1612                         (void)vio_ldc_send(&port->vio, &info,
1613                                            sizeof(info));
1614                         n_addrs = 0;
1615                 }
1616         }
1617         if (n_addrs) {
1618                 info.count = n_addrs;
1619                 (void)vio_ldc_send(&port->vio, &info, sizeof(info));
1620         }
1621
1622         info.set = 0;
1623
1624         n_addrs = 0;
1625         pp = &vp->mcast_list;
1626         while ((m = *pp) != NULL) {
1627                 if (m->hit) {
1628                         m->hit = 0;
1629                         pp = &m->next;
1630                         continue;
1631                 }
1632
1633                 memcpy(&info.mcast_addr[n_addrs * ETH_ALEN],
1634                        m->addr, ETH_ALEN);
1635                 if (++n_addrs == VNET_NUM_MCAST) {
1636                         info.count = n_addrs;
1637                         (void)vio_ldc_send(&port->vio, &info,
1638                                            sizeof(info));
1639                         n_addrs = 0;
1640                 }
1641
1642                 *pp = m->next;
1643                 kfree(m);
1644         }
1645         if (n_addrs) {
1646                 info.count = n_addrs;
1647                 (void)vio_ldc_send(&port->vio, &info, sizeof(info));
1648         }
1649 }
1650
1651 void sunvnet_set_rx_mode_common(struct net_device *dev, struct vnet *vp)
1652 {
1653         struct vnet_port *port;
1654
1655         rcu_read_lock();
1656         list_for_each_entry_rcu(port, &vp->port_list, list) {
1657                 if (port->switch_port) {
1658                         __update_mc_list(vp, dev);
1659                         __send_mc_list(vp, port);
1660                         break;
1661                 }
1662         }
1663         rcu_read_unlock();
1664 }
1665 EXPORT_SYMBOL_GPL(sunvnet_set_rx_mode_common);
1666
1667 int sunvnet_set_mac_addr_common(struct net_device *dev, void *p)
1668 {
1669         return -EINVAL;
1670 }
1671 EXPORT_SYMBOL_GPL(sunvnet_set_mac_addr_common);
1672
1673 void sunvnet_port_free_tx_bufs_common(struct vnet_port *port)
1674 {
1675         struct vio_dring_state *dr;
1676         int i;
1677
1678         dr = &port->vio.drings[VIO_DRIVER_TX_RING];
1679
1680         if (!dr->base)
1681                 return;
1682
1683         for (i = 0; i < VNET_TX_RING_SIZE; i++) {
1684                 struct vio_net_desc *d;
1685                 void *skb = port->tx_bufs[i].skb;
1686
1687                 if (!skb)
1688                         continue;
1689
1690                 d = vio_dring_entry(dr, i);
1691
1692                 ldc_unmap(port->vio.lp,
1693                           port->tx_bufs[i].cookies,
1694                           port->tx_bufs[i].ncookies);
1695                 dev_kfree_skb(skb);
1696                 port->tx_bufs[i].skb = NULL;
1697                 d->hdr.state = VIO_DESC_FREE;
1698         }
1699         ldc_free_exp_dring(port->vio.lp, dr->base,
1700                            (dr->entry_size * dr->num_entries),
1701                            dr->cookies, dr->ncookies);
1702         dr->base = NULL;
1703         dr->entry_size = 0;
1704         dr->num_entries = 0;
1705         dr->pending = 0;
1706         dr->ncookies = 0;
1707 }
1708 EXPORT_SYMBOL_GPL(sunvnet_port_free_tx_bufs_common);
1709
1710 void vnet_port_reset(struct vnet_port *port)
1711 {
1712         del_timer(&port->clean_timer);
1713         sunvnet_port_free_tx_bufs_common(port);
1714         port->rmtu = 0;
1715         port->tso = (port->vsw == 0);  /* no tso in vsw, misbehaves in bridge */
1716         port->tsolen = 0;
1717 }
1718 EXPORT_SYMBOL_GPL(vnet_port_reset);
1719
1720 static int vnet_port_alloc_tx_ring(struct vnet_port *port)
1721 {
1722         struct vio_dring_state *dr;
1723         unsigned long len, elen;
1724         int i, err, ncookies;
1725         void *dring;
1726
1727         dr = &port->vio.drings[VIO_DRIVER_TX_RING];
1728
1729         elen = sizeof(struct vio_net_desc) +
1730                sizeof(struct ldc_trans_cookie) * 2;
1731         if (vio_version_after_eq(&port->vio, 1, 7))
1732                 elen += sizeof(struct vio_net_dext);
1733         len = VNET_TX_RING_SIZE * elen;
1734
1735         ncookies = VIO_MAX_RING_COOKIES;
1736         dring = ldc_alloc_exp_dring(port->vio.lp, len,
1737                                     dr->cookies, &ncookies,
1738                                     (LDC_MAP_SHADOW |
1739                                      LDC_MAP_DIRECT |
1740                                      LDC_MAP_RW));
1741         if (IS_ERR(dring)) {
1742                 err = PTR_ERR(dring);
1743                 goto err_out;
1744         }
1745
1746         dr->base = dring;
1747         dr->entry_size = elen;
1748         dr->num_entries = VNET_TX_RING_SIZE;
1749         dr->prod = 0;
1750         dr->cons = 0;
1751         port->start_cons  = true; /* need an initial trigger */
1752         dr->pending = VNET_TX_RING_SIZE;
1753         dr->ncookies = ncookies;
1754
1755         for (i = 0; i < VNET_TX_RING_SIZE; ++i) {
1756                 struct vio_net_desc *d;
1757
1758                 d = vio_dring_entry(dr, i);
1759                 d->hdr.state = VIO_DESC_FREE;
1760         }
1761         return 0;
1762
1763 err_out:
1764         sunvnet_port_free_tx_bufs_common(port);
1765
1766         return err;
1767 }
1768
1769 #ifdef CONFIG_NET_POLL_CONTROLLER
1770 void sunvnet_poll_controller_common(struct net_device *dev, struct vnet *vp)
1771 {
1772         struct vnet_port *port;
1773         unsigned long flags;
1774
1775         spin_lock_irqsave(&vp->lock, flags);
1776         if (!list_empty(&vp->port_list)) {
1777                 port = list_entry(vp->port_list.next, struct vnet_port, list);
1778                 napi_schedule(&port->napi);
1779         }
1780         spin_unlock_irqrestore(&vp->lock, flags);
1781 }
1782 EXPORT_SYMBOL_GPL(sunvnet_poll_controller_common);
1783 #endif
1784
1785 void sunvnet_port_add_txq_common(struct vnet_port *port)
1786 {
1787         struct vnet *vp = port->vp;
1788         int smallest = 0;
1789         int i;
1790
1791         /* find the first least-used q
1792          * When there are more ldoms than q's, we start to
1793          * double up on ports per queue.
1794          */
1795         for (i = 0; i < VNET_MAX_TXQS; i++) {
1796                 if (vp->q_used[i] == 0) {
1797                         smallest = i;
1798                         break;
1799                 }
1800                 if (vp->q_used[i] < vp->q_used[smallest])
1801                         smallest = i;
1802         }
1803
1804         vp->nports++;
1805         vp->q_used[smallest]++;
1806         port->q_index = smallest;
1807 }
1808 EXPORT_SYMBOL_GPL(sunvnet_port_add_txq_common);
1809
1810 void sunvnet_port_rm_txq_common(struct vnet_port *port)
1811 {
1812         port->vp->nports--;
1813         port->vp->q_used[port->q_index]--;
1814         port->q_index = 0;
1815 }
1816 EXPORT_SYMBOL_GPL(sunvnet_port_rm_txq_common);