GNU Linux-libre 4.14.290-gnu1
[releases.git] / net / ipv4 / ip_gre.c
1 /*
2  *      Linux NET3:     GRE over IP protocol decoder.
3  *
4  *      Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5  *
6  *      This program is free software; you can redistribute it and/or
7  *      modify it under the terms of the GNU General Public License
8  *      as published by the Free Software Foundation; either version
9  *      2 of the License, or (at your option) any later version.
10  *
11  */
12
13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14
15 #include <linux/capability.h>
16 #include <linux/module.h>
17 #include <linux/types.h>
18 #include <linux/kernel.h>
19 #include <linux/slab.h>
20 #include <linux/uaccess.h>
21 #include <linux/skbuff.h>
22 #include <linux/netdevice.h>
23 #include <linux/in.h>
24 #include <linux/tcp.h>
25 #include <linux/udp.h>
26 #include <linux/if_arp.h>
27 #include <linux/if_vlan.h>
28 #include <linux/init.h>
29 #include <linux/in6.h>
30 #include <linux/inetdevice.h>
31 #include <linux/igmp.h>
32 #include <linux/netfilter_ipv4.h>
33 #include <linux/etherdevice.h>
34 #include <linux/if_ether.h>
35
36 #include <net/sock.h>
37 #include <net/ip.h>
38 #include <net/icmp.h>
39 #include <net/protocol.h>
40 #include <net/ip_tunnels.h>
41 #include <net/arp.h>
42 #include <net/checksum.h>
43 #include <net/dsfield.h>
44 #include <net/inet_ecn.h>
45 #include <net/xfrm.h>
46 #include <net/net_namespace.h>
47 #include <net/netns/generic.h>
48 #include <net/rtnetlink.h>
49 #include <net/gre.h>
50 #include <net/dst_metadata.h>
51 #include <net/erspan.h>
52
53 /*
54    Problems & solutions
55    --------------------
56
57    1. The most important issue is detecting local dead loops.
58    They would cause complete host lockup in transmit, which
59    would be "resolved" by stack overflow or, if queueing is enabled,
60    with infinite looping in net_bh.
61
62    We cannot track such dead loops during route installation,
63    it is infeasible task. The most general solutions would be
64    to keep skb->encapsulation counter (sort of local ttl),
65    and silently drop packet when it expires. It is a good
66    solution, but it supposes maintaining new variable in ALL
67    skb, even if no tunneling is used.
68
69    Current solution: xmit_recursion breaks dead loops. This is a percpu
70    counter, since when we enter the first ndo_xmit(), cpu migration is
71    forbidden. We force an exit if this counter reaches RECURSION_LIMIT
72
73    2. Networking dead loops would not kill routers, but would really
74    kill network. IP hop limit plays role of "t->recursion" in this case,
75    if we copy it from packet being encapsulated to upper header.
76    It is very good solution, but it introduces two problems:
77
78    - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
79      do not work over tunnels.
80    - traceroute does not work. I planned to relay ICMP from tunnel,
81      so that this problem would be solved and traceroute output
82      would even more informative. This idea appeared to be wrong:
83      only Linux complies to rfc1812 now (yes, guys, Linux is the only
84      true router now :-)), all routers (at least, in neighbourhood of mine)
85      return only 8 bytes of payload. It is the end.
86
87    Hence, if we want that OSPF worked or traceroute said something reasonable,
88    we should search for another solution.
89
90    One of them is to parse packet trying to detect inner encapsulation
91    made by our node. It is difficult or even impossible, especially,
92    taking into account fragmentation. TO be short, ttl is not solution at all.
93
94    Current solution: The solution was UNEXPECTEDLY SIMPLE.
95    We force DF flag on tunnels with preconfigured hop limit,
96    that is ALL. :-) Well, it does not remove the problem completely,
97    but exponential growth of network traffic is changed to linear
98    (branches, that exceed pmtu are pruned) and tunnel mtu
99    rapidly degrades to value <68, where looping stops.
100    Yes, it is not good if there exists a router in the loop,
101    which does not force DF, even when encapsulating packets have DF set.
102    But it is not our problem! Nobody could accuse us, we made
103    all that we could make. Even if it is your gated who injected
104    fatal route to network, even if it were you who configured
105    fatal static route: you are innocent. :-)
106
107    Alexey Kuznetsov.
108  */
109
110 static bool log_ecn_error = true;
111 module_param(log_ecn_error, bool, 0644);
112 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
113
114 static struct rtnl_link_ops ipgre_link_ops __read_mostly;
115 static int ipgre_tunnel_init(struct net_device *dev);
116 static void erspan_build_header(struct sk_buff *skb,
117                                 __be32 id, u32 index, bool truncate);
118
119 static unsigned int ipgre_net_id __read_mostly;
120 static unsigned int gre_tap_net_id __read_mostly;
121 static unsigned int erspan_net_id __read_mostly;
122
123 static void ipgre_err(struct sk_buff *skb, u32 info,
124                       const struct tnl_ptk_info *tpi)
125 {
126
127         /* All the routers (except for Linux) return only
128            8 bytes of packet payload. It means, that precise relaying of
129            ICMP in the real Internet is absolutely infeasible.
130
131            Moreover, Cisco "wise men" put GRE key to the third word
132            in GRE header. It makes impossible maintaining even soft
133            state for keyed GRE tunnels with enabled checksum. Tell
134            them "thank you".
135
136            Well, I wonder, rfc1812 was written by Cisco employee,
137            what the hell these idiots break standards established
138            by themselves???
139            */
140         struct net *net = dev_net(skb->dev);
141         struct ip_tunnel_net *itn;
142         const struct iphdr *iph;
143         const int type = icmp_hdr(skb)->type;
144         const int code = icmp_hdr(skb)->code;
145         unsigned int data_len = 0;
146         struct ip_tunnel *t;
147
148         switch (type) {
149         default:
150         case ICMP_PARAMETERPROB:
151                 return;
152
153         case ICMP_DEST_UNREACH:
154                 switch (code) {
155                 case ICMP_SR_FAILED:
156                 case ICMP_PORT_UNREACH:
157                         /* Impossible event. */
158                         return;
159                 default:
160                         /* All others are translated to HOST_UNREACH.
161                            rfc2003 contains "deep thoughts" about NET_UNREACH,
162                            I believe they are just ether pollution. --ANK
163                          */
164                         break;
165                 }
166                 break;
167
168         case ICMP_TIME_EXCEEDED:
169                 if (code != ICMP_EXC_TTL)
170                         return;
171                 data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */
172                 break;
173
174         case ICMP_REDIRECT:
175                 break;
176         }
177
178         if (tpi->proto == htons(ETH_P_TEB))
179                 itn = net_generic(net, gre_tap_net_id);
180         else if (tpi->proto == htons(ETH_P_ERSPAN))
181                 itn = net_generic(net, erspan_net_id);
182         else
183                 itn = net_generic(net, ipgre_net_id);
184
185         iph = (const struct iphdr *)(icmp_hdr(skb) + 1);
186         t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
187                              iph->daddr, iph->saddr, tpi->key);
188
189         if (!t)
190                 return;
191
192 #if IS_ENABLED(CONFIG_IPV6)
193        if (tpi->proto == htons(ETH_P_IPV6) &&
194            !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len,
195                                        type, data_len))
196                return;
197 #endif
198
199         if (t->parms.iph.daddr == 0 ||
200             ipv4_is_multicast(t->parms.iph.daddr))
201                 return;
202
203         if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
204                 return;
205
206         if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
207                 t->err_count++;
208         else
209                 t->err_count = 1;
210         t->err_time = jiffies;
211 }
212
213 static void gre_err(struct sk_buff *skb, u32 info)
214 {
215         /* All the routers (except for Linux) return only
216          * 8 bytes of packet payload. It means, that precise relaying of
217          * ICMP in the real Internet is absolutely infeasible.
218          *
219          * Moreover, Cisco "wise men" put GRE key to the third word
220          * in GRE header. It makes impossible maintaining even soft
221          * state for keyed
222          * GRE tunnels with enabled checksum. Tell them "thank you".
223          *
224          * Well, I wonder, rfc1812 was written by Cisco employee,
225          * what the hell these idiots break standards established
226          * by themselves???
227          */
228
229         const struct iphdr *iph = (struct iphdr *)skb->data;
230         const int type = icmp_hdr(skb)->type;
231         const int code = icmp_hdr(skb)->code;
232         struct tnl_ptk_info tpi;
233
234         if (gre_parse_header(skb, &tpi, NULL, htons(ETH_P_IP),
235                              iph->ihl * 4) < 0)
236                 return;
237
238         if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
239                 ipv4_update_pmtu(skb, dev_net(skb->dev), info,
240                                  skb->dev->ifindex, 0, IPPROTO_GRE, 0);
241                 return;
242         }
243         if (type == ICMP_REDIRECT) {
244                 ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex, 0,
245                               IPPROTO_GRE, 0);
246                 return;
247         }
248
249         ipgre_err(skb, info, &tpi);
250 }
251
252 static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
253                       int gre_hdr_len)
254 {
255         struct net *net = dev_net(skb->dev);
256         struct metadata_dst *tun_dst = NULL;
257         struct ip_tunnel_net *itn;
258         struct ip_tunnel *tunnel;
259         struct erspanhdr *ershdr;
260         const struct iphdr *iph;
261         __be32 index;
262         int len;
263
264         itn = net_generic(net, erspan_net_id);
265         len = gre_hdr_len + sizeof(*ershdr);
266
267         if (unlikely(!pskb_may_pull(skb, len)))
268                 return -ENOMEM;
269
270         iph = ip_hdr(skb);
271         ershdr = (struct erspanhdr *)(skb->data + gre_hdr_len);
272
273         /* The original GRE header does not have key field,
274          * Use ERSPAN 10-bit session ID as key.
275          */
276         tpi->key = cpu_to_be32(ntohs(ershdr->session_id) & ID_MASK);
277         index = ershdr->md.index;
278         tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
279                                   tpi->flags | TUNNEL_KEY,
280                                   iph->saddr, iph->daddr, tpi->key);
281
282         if (tunnel) {
283                 if (__iptunnel_pull_header(skb,
284                                            gre_hdr_len + sizeof(*ershdr),
285                                            htons(ETH_P_TEB),
286                                            false, false) < 0)
287                         goto drop;
288
289                 if (tunnel->collect_md) {
290                         struct ip_tunnel_info *info;
291                         struct erspan_metadata *md;
292                         __be64 tun_id;
293                         __be16 flags;
294
295                         tpi->flags |= TUNNEL_KEY;
296                         flags = tpi->flags;
297                         tun_id = key32_to_tunnel_id(tpi->key);
298
299                         tun_dst = ip_tun_rx_dst(skb, flags,
300                                                 tun_id, sizeof(*md));
301                         if (!tun_dst)
302                                 return PACKET_REJECT;
303
304                         md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
305                         if (!md) {
306                                 dst_release((struct dst_entry *)tun_dst);
307                                 return PACKET_REJECT;
308                         }
309
310                         md->index = index;
311                         info = &tun_dst->u.tun_info;
312                         info->key.tun_flags |= TUNNEL_ERSPAN_OPT;
313                         info->options_len = sizeof(*md);
314                 } else {
315                         tunnel->index = ntohl(index);
316                 }
317
318                 skb_reset_mac_header(skb);
319                 ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
320                 return PACKET_RCVD;
321         }
322         return PACKET_REJECT;
323
324 drop:
325         kfree_skb(skb);
326         return PACKET_RCVD;
327 }
328
329 static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
330                        struct ip_tunnel_net *itn, int hdr_len, bool raw_proto)
331 {
332         struct metadata_dst *tun_dst = NULL;
333         const struct iphdr *iph;
334         struct ip_tunnel *tunnel;
335
336         iph = ip_hdr(skb);
337         tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
338                                   iph->saddr, iph->daddr, tpi->key);
339
340         if (tunnel) {
341                 if (__iptunnel_pull_header(skb, hdr_len, tpi->proto,
342                                            raw_proto, false) < 0)
343                         goto drop;
344
345                 if (tunnel->dev->type != ARPHRD_NONE)
346                         skb_pop_mac_header(skb);
347                 else
348                         skb_reset_mac_header(skb);
349                 if (tunnel->collect_md) {
350                         __be16 flags;
351                         __be64 tun_id;
352
353                         flags = tpi->flags & (TUNNEL_CSUM | TUNNEL_KEY);
354                         tun_id = key32_to_tunnel_id(tpi->key);
355                         tun_dst = ip_tun_rx_dst(skb, flags, tun_id, 0);
356                         if (!tun_dst)
357                                 return PACKET_REJECT;
358                 }
359
360                 ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
361                 return PACKET_RCVD;
362         }
363         return PACKET_NEXT;
364
365 drop:
366         kfree_skb(skb);
367         return PACKET_RCVD;
368 }
369
370 static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
371                      int hdr_len)
372 {
373         struct net *net = dev_net(skb->dev);
374         struct ip_tunnel_net *itn;
375         int res;
376
377         if (tpi->proto == htons(ETH_P_TEB))
378                 itn = net_generic(net, gre_tap_net_id);
379         else
380                 itn = net_generic(net, ipgre_net_id);
381
382         res = __ipgre_rcv(skb, tpi, itn, hdr_len, false);
383         if (res == PACKET_NEXT && tpi->proto == htons(ETH_P_TEB)) {
384                 /* ipgre tunnels in collect metadata mode should receive
385                  * also ETH_P_TEB traffic.
386                  */
387                 itn = net_generic(net, ipgre_net_id);
388                 res = __ipgre_rcv(skb, tpi, itn, hdr_len, true);
389         }
390         return res;
391 }
392
393 static int gre_rcv(struct sk_buff *skb)
394 {
395         struct tnl_ptk_info tpi;
396         bool csum_err = false;
397         int hdr_len;
398
399 #ifdef CONFIG_NET_IPGRE_BROADCAST
400         if (ipv4_is_multicast(ip_hdr(skb)->daddr)) {
401                 /* Looped back packet, drop it! */
402                 if (rt_is_output_route(skb_rtable(skb)))
403                         goto drop;
404         }
405 #endif
406
407         hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP), 0);
408         if (hdr_len < 0)
409                 goto drop;
410
411         if (unlikely(tpi.proto == htons(ETH_P_ERSPAN))) {
412                 if (erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
413                         return 0;
414                 goto out;
415         }
416
417         if (ipgre_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
418                 return 0;
419
420 out:
421         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
422 drop:
423         kfree_skb(skb);
424         return 0;
425 }
426
427 static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
428                        const struct iphdr *tnl_params,
429                        __be16 proto)
430 {
431         struct ip_tunnel *tunnel = netdev_priv(dev);
432         __be16 flags = tunnel->parms.o_flags;
433
434         /* Push GRE header. */
435         gre_build_header(skb, tunnel->tun_hlen,
436                          flags, proto, tunnel->parms.o_key,
437                          (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) : 0);
438
439         ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
440 }
441
442 static int gre_handle_offloads(struct sk_buff *skb, bool csum)
443 {
444         if (csum && skb_checksum_start(skb) < skb->data)
445                 return -EINVAL;
446         return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
447 }
448
449 static struct rtable *gre_get_rt(struct sk_buff *skb,
450                                  struct net_device *dev,
451                                  struct flowi4 *fl,
452                                  const struct ip_tunnel_key *key)
453 {
454         struct net *net = dev_net(dev);
455
456         memset(fl, 0, sizeof(*fl));
457         fl->daddr = key->u.ipv4.dst;
458         fl->saddr = key->u.ipv4.src;
459         fl->flowi4_tos = RT_TOS(key->tos);
460         fl->flowi4_mark = skb->mark;
461         fl->flowi4_proto = IPPROTO_GRE;
462
463         return ip_route_output_key(net, fl);
464 }
465
466 static struct rtable *prepare_fb_xmit(struct sk_buff *skb,
467                                       struct net_device *dev,
468                                       struct flowi4 *fl,
469                                       int tunnel_hlen)
470 {
471         struct ip_tunnel_info *tun_info;
472         const struct ip_tunnel_key *key;
473         struct rtable *rt = NULL;
474         int min_headroom;
475         bool use_cache;
476         int err;
477
478         tun_info = skb_tunnel_info(skb);
479         key = &tun_info->key;
480         use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
481
482         if (use_cache)
483                 rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl->saddr);
484         if (!rt) {
485                 rt = gre_get_rt(skb, dev, fl, key);
486                 if (IS_ERR(rt))
487                         goto err_free_skb;
488                 if (use_cache)
489                         dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
490                                           fl->saddr);
491         }
492
493         min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
494                         + tunnel_hlen + sizeof(struct iphdr);
495         if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
496                 int head_delta = SKB_DATA_ALIGN(min_headroom -
497                                                 skb_headroom(skb) +
498                                                 16);
499                 err = pskb_expand_head(skb, max_t(int, head_delta, 0),
500                                        0, GFP_ATOMIC);
501                 if (unlikely(err))
502                         goto err_free_rt;
503         }
504         return rt;
505
506 err_free_rt:
507         ip_rt_put(rt);
508 err_free_skb:
509         kfree_skb(skb);
510         dev->stats.tx_dropped++;
511         return NULL;
512 }
513
514 static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
515                         __be16 proto)
516 {
517         struct ip_tunnel_info *tun_info;
518         const struct ip_tunnel_key *key;
519         struct rtable *rt = NULL;
520         struct flowi4 fl;
521         int tunnel_hlen;
522         __be16 df, flags;
523
524         tun_info = skb_tunnel_info(skb);
525         if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
526                      ip_tunnel_info_af(tun_info) != AF_INET))
527                 goto err_free_skb;
528
529         key = &tun_info->key;
530         tunnel_hlen = gre_calc_hlen(key->tun_flags);
531
532         rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen);
533         if (!rt)
534                 return;
535
536         /* Push Tunnel header. */
537         if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM)))
538                 goto err_free_rt;
539
540         flags = tun_info->key.tun_flags & (TUNNEL_CSUM | TUNNEL_KEY);
541         gre_build_header(skb, tunnel_hlen, flags, proto,
542                          tunnel_id_to_key32(tun_info->key.tun_id), 0);
543
544         df = key->tun_flags & TUNNEL_DONT_FRAGMENT ?  htons(IP_DF) : 0;
545
546         iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE,
547                       key->tos, key->ttl, df, false);
548         return;
549
550 err_free_rt:
551         ip_rt_put(rt);
552 err_free_skb:
553         kfree_skb(skb);
554         dev->stats.tx_dropped++;
555 }
556
557 static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev,
558                            __be16 proto)
559 {
560         struct ip_tunnel *tunnel = netdev_priv(dev);
561         struct ip_tunnel_info *tun_info;
562         const struct ip_tunnel_key *key;
563         struct erspan_metadata *md;
564         struct rtable *rt = NULL;
565         bool truncate = false;
566         struct flowi4 fl;
567         int tunnel_hlen;
568         __be16 df;
569
570         tun_info = skb_tunnel_info(skb);
571         if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
572                      ip_tunnel_info_af(tun_info) != AF_INET))
573                 goto err_free_skb;
574
575         key = &tun_info->key;
576
577         /* ERSPAN has fixed 8 byte GRE header */
578         tunnel_hlen = 8 + sizeof(struct erspanhdr);
579
580         rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen);
581         if (!rt)
582                 return;
583
584         if (gre_handle_offloads(skb, false))
585                 goto err_free_rt;
586
587         if (skb->len > dev->mtu + dev->hard_header_len) {
588                 pskb_trim(skb, dev->mtu + dev->hard_header_len);
589                 truncate = true;
590         }
591
592         if (tun_info->options_len < sizeof(*md))
593                 goto err_free_rt;
594
595         md = ip_tunnel_info_opts(tun_info);
596         if (!md)
597                 goto err_free_rt;
598
599         erspan_build_header(skb, tunnel_id_to_key32(key->tun_id),
600                             ntohl(md->index), truncate);
601
602         gre_build_header(skb, 8, TUNNEL_SEQ,
603                          htons(ETH_P_ERSPAN), 0, htonl(tunnel->o_seqno++));
604
605         df = key->tun_flags & TUNNEL_DONT_FRAGMENT ?  htons(IP_DF) : 0;
606
607         iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE,
608                       key->tos, key->ttl, df, false);
609         return;
610
611 err_free_rt:
612         ip_rt_put(rt);
613 err_free_skb:
614         kfree_skb(skb);
615         dev->stats.tx_dropped++;
616 }
617
618 static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
619 {
620         struct ip_tunnel_info *info = skb_tunnel_info(skb);
621         struct rtable *rt;
622         struct flowi4 fl4;
623
624         if (ip_tunnel_info_af(info) != AF_INET)
625                 return -EINVAL;
626
627         rt = gre_get_rt(skb, dev, &fl4, &info->key);
628         if (IS_ERR(rt))
629                 return PTR_ERR(rt);
630
631         ip_rt_put(rt);
632         info->key.u.ipv4.src = fl4.saddr;
633         return 0;
634 }
635
636 static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
637                               struct net_device *dev)
638 {
639         struct ip_tunnel *tunnel = netdev_priv(dev);
640         const struct iphdr *tnl_params;
641
642         if (tunnel->collect_md) {
643                 gre_fb_xmit(skb, dev, skb->protocol);
644                 return NETDEV_TX_OK;
645         }
646
647         if (dev->header_ops) {
648                 /* Need space for new headers */
649                 if (skb_cow_head(skb, dev->needed_headroom -
650                                       (tunnel->hlen + sizeof(struct iphdr))))
651                         goto free_skb;
652
653                 tnl_params = (const struct iphdr *)skb->data;
654
655                 /* Pull skb since ip_tunnel_xmit() needs skb->data pointing
656                  * to gre header.
657                  */
658                 skb_pull(skb, tunnel->hlen + sizeof(struct iphdr));
659                 skb_reset_mac_header(skb);
660         } else {
661                 if (skb_cow_head(skb, dev->needed_headroom))
662                         goto free_skb;
663
664                 tnl_params = &tunnel->parms.iph;
665         }
666
667         if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM)))
668                 goto free_skb;
669
670         __gre_xmit(skb, dev, tnl_params, skb->protocol);
671         return NETDEV_TX_OK;
672
673 free_skb:
674         kfree_skb(skb);
675         dev->stats.tx_dropped++;
676         return NETDEV_TX_OK;
677 }
678
679 static inline u8 tos_to_cos(u8 tos)
680 {
681         u8 dscp, cos;
682
683         dscp = tos >> 2;
684         cos = dscp >> 3;
685         return cos;
686 }
687
688 static void erspan_build_header(struct sk_buff *skb,
689                                 __be32 id, u32 index, bool truncate)
690 {
691         struct iphdr *iphdr = ip_hdr(skb);
692         struct ethhdr *eth = (struct ethhdr *)skb->data;
693         enum erspan_encap_type enc_type;
694         struct erspanhdr *ershdr;
695         struct qtag_prefix {
696                 __be16 eth_type;
697                 __be16 tci;
698         } *qp;
699         u16 vlan_tci = 0;
700
701         enc_type = ERSPAN_ENCAP_NOVLAN;
702
703         /* If mirrored packet has vlan tag, extract tci and
704          *  perserve vlan header in the mirrored frame.
705          */
706         if (eth->h_proto == htons(ETH_P_8021Q)) {
707                 qp = (struct qtag_prefix *)(skb->data + 2 * ETH_ALEN);
708                 vlan_tci = ntohs(qp->tci);
709                 enc_type = ERSPAN_ENCAP_INFRAME;
710         }
711
712         skb_push(skb, sizeof(*ershdr));
713         ershdr = (struct erspanhdr *)skb->data;
714         memset(ershdr, 0, sizeof(*ershdr));
715
716         ershdr->ver_vlan = htons((vlan_tci & VLAN_MASK) |
717                                  (ERSPAN_VERSION << VER_OFFSET));
718         ershdr->session_id = htons((u16)(ntohl(id) & ID_MASK) |
719                            ((tos_to_cos(iphdr->tos) << COS_OFFSET) & COS_MASK) |
720                            (enc_type << EN_OFFSET & EN_MASK) |
721                            ((truncate << T_OFFSET) & T_MASK));
722         ershdr->md.index = htonl(index & INDEX_MASK);
723 }
724
725 static netdev_tx_t erspan_xmit(struct sk_buff *skb,
726                                struct net_device *dev)
727 {
728         struct ip_tunnel *tunnel = netdev_priv(dev);
729         bool truncate = false;
730
731         if (tunnel->collect_md) {
732                 erspan_fb_xmit(skb, dev, skb->protocol);
733                 return NETDEV_TX_OK;
734         }
735
736         if (gre_handle_offloads(skb, false))
737                 goto free_skb;
738
739         if (skb_cow_head(skb, dev->needed_headroom))
740                 goto free_skb;
741
742         if (skb->len > dev->mtu + dev->hard_header_len) {
743                 pskb_trim(skb, dev->mtu + dev->hard_header_len);
744                 truncate = true;
745         }
746
747         /* Push ERSPAN header */
748         erspan_build_header(skb, tunnel->parms.o_key, tunnel->index, truncate);
749         tunnel->parms.o_flags &= ~TUNNEL_KEY;
750         __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_ERSPAN));
751         return NETDEV_TX_OK;
752
753 free_skb:
754         kfree_skb(skb);
755         dev->stats.tx_dropped++;
756         return NETDEV_TX_OK;
757 }
758
759 static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
760                                 struct net_device *dev)
761 {
762         struct ip_tunnel *tunnel = netdev_priv(dev);
763
764         if (tunnel->collect_md) {
765                 gre_fb_xmit(skb, dev, htons(ETH_P_TEB));
766                 return NETDEV_TX_OK;
767         }
768
769         if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM)))
770                 goto free_skb;
771
772         if (skb_cow_head(skb, dev->needed_headroom))
773                 goto free_skb;
774
775         __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB));
776         return NETDEV_TX_OK;
777
778 free_skb:
779         kfree_skb(skb);
780         dev->stats.tx_dropped++;
781         return NETDEV_TX_OK;
782 }
783
784 static int ipgre_tunnel_ioctl(struct net_device *dev,
785                               struct ifreq *ifr, int cmd)
786 {
787         int err;
788         struct ip_tunnel_parm p;
789
790         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
791                 return -EFAULT;
792         if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
793                 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
794                     p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
795                     ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
796                         return -EINVAL;
797         }
798         p.i_flags = gre_flags_to_tnl_flags(p.i_flags);
799         p.o_flags = gre_flags_to_tnl_flags(p.o_flags);
800
801         err = ip_tunnel_ioctl(dev, &p, cmd);
802         if (err)
803                 return err;
804
805         p.i_flags = gre_tnl_flags_to_gre_flags(p.i_flags);
806         p.o_flags = gre_tnl_flags_to_gre_flags(p.o_flags);
807
808         if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
809                 return -EFAULT;
810         return 0;
811 }
812
813 /* Nice toy. Unfortunately, useless in real life :-)
814    It allows to construct virtual multiprotocol broadcast "LAN"
815    over the Internet, provided multicast routing is tuned.
816
817
818    I have no idea was this bicycle invented before me,
819    so that I had to set ARPHRD_IPGRE to a random value.
820    I have an impression, that Cisco could make something similar,
821    but this feature is apparently missing in IOS<=11.2(8).
822
823    I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
824    with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
825
826    ping -t 255 224.66.66.66
827
828    If nobody answers, mbone does not work.
829
830    ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
831    ip addr add 10.66.66.<somewhat>/24 dev Universe
832    ifconfig Universe up
833    ifconfig Universe add fe80::<Your_real_addr>/10
834    ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
835    ftp 10.66.66.66
836    ...
837    ftp fec0:6666:6666::193.233.7.65
838    ...
839  */
840 static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
841                         unsigned short type,
842                         const void *daddr, const void *saddr, unsigned int len)
843 {
844         struct ip_tunnel *t = netdev_priv(dev);
845         struct iphdr *iph;
846         struct gre_base_hdr *greh;
847
848         iph = skb_push(skb, t->hlen + sizeof(*iph));
849         greh = (struct gre_base_hdr *)(iph+1);
850         greh->flags = gre_tnl_flags_to_gre_flags(t->parms.o_flags);
851         greh->protocol = htons(type);
852
853         memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
854
855         /* Set the source hardware address. */
856         if (saddr)
857                 memcpy(&iph->saddr, saddr, 4);
858         if (daddr)
859                 memcpy(&iph->daddr, daddr, 4);
860         if (iph->daddr)
861                 return t->hlen + sizeof(*iph);
862
863         return -(t->hlen + sizeof(*iph));
864 }
865
866 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
867 {
868         const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
869         memcpy(haddr, &iph->saddr, 4);
870         return 4;
871 }
872
873 static const struct header_ops ipgre_header_ops = {
874         .create = ipgre_header,
875         .parse  = ipgre_header_parse,
876 };
877
878 #ifdef CONFIG_NET_IPGRE_BROADCAST
879 static int ipgre_open(struct net_device *dev)
880 {
881         struct ip_tunnel *t = netdev_priv(dev);
882
883         if (ipv4_is_multicast(t->parms.iph.daddr)) {
884                 struct flowi4 fl4;
885                 struct rtable *rt;
886
887                 rt = ip_route_output_gre(t->net, &fl4,
888                                          t->parms.iph.daddr,
889                                          t->parms.iph.saddr,
890                                          t->parms.o_key,
891                                          RT_TOS(t->parms.iph.tos),
892                                          t->parms.link);
893                 if (IS_ERR(rt))
894                         return -EADDRNOTAVAIL;
895                 dev = rt->dst.dev;
896                 ip_rt_put(rt);
897                 if (!__in_dev_get_rtnl(dev))
898                         return -EADDRNOTAVAIL;
899                 t->mlink = dev->ifindex;
900                 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
901         }
902         return 0;
903 }
904
905 static int ipgre_close(struct net_device *dev)
906 {
907         struct ip_tunnel *t = netdev_priv(dev);
908
909         if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
910                 struct in_device *in_dev;
911                 in_dev = inetdev_by_index(t->net, t->mlink);
912                 if (in_dev)
913                         ip_mc_dec_group(in_dev, t->parms.iph.daddr);
914         }
915         return 0;
916 }
917 #endif
918
919 static const struct net_device_ops ipgre_netdev_ops = {
920         .ndo_init               = ipgre_tunnel_init,
921         .ndo_uninit             = ip_tunnel_uninit,
922 #ifdef CONFIG_NET_IPGRE_BROADCAST
923         .ndo_open               = ipgre_open,
924         .ndo_stop               = ipgre_close,
925 #endif
926         .ndo_start_xmit         = ipgre_xmit,
927         .ndo_do_ioctl           = ipgre_tunnel_ioctl,
928         .ndo_change_mtu         = ip_tunnel_change_mtu,
929         .ndo_get_stats64        = ip_tunnel_get_stats64,
930         .ndo_get_iflink         = ip_tunnel_get_iflink,
931 };
932
933 #define GRE_FEATURES (NETIF_F_SG |              \
934                       NETIF_F_FRAGLIST |        \
935                       NETIF_F_HIGHDMA |         \
936                       NETIF_F_HW_CSUM)
937
938 static void ipgre_tunnel_setup(struct net_device *dev)
939 {
940         dev->netdev_ops         = &ipgre_netdev_ops;
941         dev->type               = ARPHRD_IPGRE;
942         ip_tunnel_setup(dev, ipgre_net_id);
943 }
944
945 static void __gre_tunnel_init(struct net_device *dev)
946 {
947         struct ip_tunnel *tunnel;
948         int t_hlen;
949
950         tunnel = netdev_priv(dev);
951         tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
952         tunnel->parms.iph.protocol = IPPROTO_GRE;
953
954         tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
955
956         t_hlen = tunnel->hlen + sizeof(struct iphdr);
957
958         dev->features           |= GRE_FEATURES;
959         dev->hw_features        |= GRE_FEATURES;
960
961         if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
962                 /* TCP offload with GRE SEQ is not supported, nor
963                  * can we support 2 levels of outer headers requiring
964                  * an update.
965                  */
966                 if (!(tunnel->parms.o_flags & TUNNEL_CSUM) ||
967                     (tunnel->encap.type == TUNNEL_ENCAP_NONE)) {
968                         dev->features    |= NETIF_F_GSO_SOFTWARE;
969                         dev->hw_features |= NETIF_F_GSO_SOFTWARE;
970                 }
971
972                 /* Can use a lockless transmit, unless we generate
973                  * output sequences
974                  */
975                 dev->features |= NETIF_F_LLTX;
976         }
977 }
978
979 static int ipgre_tunnel_init(struct net_device *dev)
980 {
981         struct ip_tunnel *tunnel = netdev_priv(dev);
982         struct iphdr *iph = &tunnel->parms.iph;
983
984         __gre_tunnel_init(dev);
985
986         memcpy(dev->dev_addr, &iph->saddr, 4);
987         memcpy(dev->broadcast, &iph->daddr, 4);
988
989         dev->flags              = IFF_NOARP;
990         netif_keep_dst(dev);
991         dev->addr_len           = 4;
992
993         if (iph->daddr && !tunnel->collect_md) {
994 #ifdef CONFIG_NET_IPGRE_BROADCAST
995                 if (ipv4_is_multicast(iph->daddr)) {
996                         if (!iph->saddr)
997                                 return -EINVAL;
998                         dev->flags = IFF_BROADCAST;
999                         dev->header_ops = &ipgre_header_ops;
1000                 }
1001 #endif
1002         } else if (!tunnel->collect_md) {
1003                 dev->header_ops = &ipgre_header_ops;
1004         }
1005
1006         return ip_tunnel_init(dev);
1007 }
1008
1009 static const struct gre_protocol ipgre_protocol = {
1010         .handler     = gre_rcv,
1011         .err_handler = gre_err,
1012 };
1013
1014 static int __net_init ipgre_init_net(struct net *net)
1015 {
1016         return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
1017 }
1018
1019 static void __net_exit ipgre_exit_net(struct net *net)
1020 {
1021         struct ip_tunnel_net *itn = net_generic(net, ipgre_net_id);
1022         ip_tunnel_delete_net(itn, &ipgre_link_ops);
1023 }
1024
1025 static struct pernet_operations ipgre_net_ops = {
1026         .init = ipgre_init_net,
1027         .exit = ipgre_exit_net,
1028         .id   = &ipgre_net_id,
1029         .size = sizeof(struct ip_tunnel_net),
1030 };
1031
1032 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
1033                                  struct netlink_ext_ack *extack)
1034 {
1035         __be16 flags;
1036
1037         if (!data)
1038                 return 0;
1039
1040         flags = 0;
1041         if (data[IFLA_GRE_IFLAGS])
1042                 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1043         if (data[IFLA_GRE_OFLAGS])
1044                 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1045         if (flags & (GRE_VERSION|GRE_ROUTING))
1046                 return -EINVAL;
1047
1048         if (data[IFLA_GRE_COLLECT_METADATA] &&
1049             data[IFLA_GRE_ENCAP_TYPE] &&
1050             nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]) != TUNNEL_ENCAP_NONE)
1051                 return -EINVAL;
1052
1053         return 0;
1054 }
1055
1056 static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[],
1057                               struct netlink_ext_ack *extack)
1058 {
1059         __be32 daddr;
1060
1061         if (tb[IFLA_ADDRESS]) {
1062                 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1063                         return -EINVAL;
1064                 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1065                         return -EADDRNOTAVAIL;
1066         }
1067
1068         if (!data)
1069                 goto out;
1070
1071         if (data[IFLA_GRE_REMOTE]) {
1072                 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1073                 if (!daddr)
1074                         return -EINVAL;
1075         }
1076
1077 out:
1078         return ipgre_tunnel_validate(tb, data, extack);
1079 }
1080
1081 static int erspan_validate(struct nlattr *tb[], struct nlattr *data[],
1082                            struct netlink_ext_ack *extack)
1083 {
1084         __be16 flags = 0;
1085         int ret;
1086
1087         if (!data)
1088                 return 0;
1089
1090         ret = ipgre_tap_validate(tb, data, extack);
1091         if (ret)
1092                 return ret;
1093
1094         /* ERSPAN should only have GRE sequence and key flag */
1095         if (data[IFLA_GRE_OFLAGS])
1096                 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1097         if (data[IFLA_GRE_IFLAGS])
1098                 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1099         if (!data[IFLA_GRE_COLLECT_METADATA] &&
1100             flags != (GRE_SEQ | GRE_KEY))
1101                 return -EINVAL;
1102
1103         /* ERSPAN Session ID only has 10-bit. Since we reuse
1104          * 32-bit key field as ID, check it's range.
1105          */
1106         if (data[IFLA_GRE_IKEY] &&
1107             (ntohl(nla_get_be32(data[IFLA_GRE_IKEY])) & ~ID_MASK))
1108                 return -EINVAL;
1109
1110         if (data[IFLA_GRE_OKEY] &&
1111             (ntohl(nla_get_be32(data[IFLA_GRE_OKEY])) & ~ID_MASK))
1112                 return -EINVAL;
1113
1114         return 0;
1115 }
1116
1117 static int ipgre_netlink_parms(struct net_device *dev,
1118                                 struct nlattr *data[],
1119                                 struct nlattr *tb[],
1120                                 struct ip_tunnel_parm *parms,
1121                                 __u32 *fwmark)
1122 {
1123         struct ip_tunnel *t = netdev_priv(dev);
1124
1125         memset(parms, 0, sizeof(*parms));
1126
1127         parms->iph.protocol = IPPROTO_GRE;
1128
1129         if (!data)
1130                 return 0;
1131
1132         if (data[IFLA_GRE_LINK])
1133                 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1134
1135         if (data[IFLA_GRE_IFLAGS])
1136                 parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS]));
1137
1138         if (data[IFLA_GRE_OFLAGS])
1139                 parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS]));
1140
1141         if (data[IFLA_GRE_IKEY])
1142                 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1143
1144         if (data[IFLA_GRE_OKEY])
1145                 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1146
1147         if (data[IFLA_GRE_LOCAL])
1148                 parms->iph.saddr = nla_get_in_addr(data[IFLA_GRE_LOCAL]);
1149
1150         if (data[IFLA_GRE_REMOTE])
1151                 parms->iph.daddr = nla_get_in_addr(data[IFLA_GRE_REMOTE]);
1152
1153         if (data[IFLA_GRE_TTL])
1154                 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1155
1156         if (data[IFLA_GRE_TOS])
1157                 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1158
1159         if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC])) {
1160                 if (t->ignore_df)
1161                         return -EINVAL;
1162                 parms->iph.frag_off = htons(IP_DF);
1163         }
1164
1165         if (data[IFLA_GRE_COLLECT_METADATA]) {
1166                 t->collect_md = true;
1167                 if (dev->type == ARPHRD_IPGRE)
1168                         dev->type = ARPHRD_NONE;
1169         }
1170
1171         if (data[IFLA_GRE_IGNORE_DF]) {
1172                 if (nla_get_u8(data[IFLA_GRE_IGNORE_DF])
1173                   && (parms->iph.frag_off & htons(IP_DF)))
1174                         return -EINVAL;
1175                 t->ignore_df = !!nla_get_u8(data[IFLA_GRE_IGNORE_DF]);
1176         }
1177
1178         if (data[IFLA_GRE_FWMARK])
1179                 *fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]);
1180
1181         if (data[IFLA_GRE_ERSPAN_INDEX]) {
1182                 t->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
1183
1184                 if (t->index & ~INDEX_MASK)
1185                         return -EINVAL;
1186         }
1187
1188         return 0;
1189 }
1190
1191 /* This function returns true when ENCAP attributes are present in the nl msg */
1192 static bool ipgre_netlink_encap_parms(struct nlattr *data[],
1193                                       struct ip_tunnel_encap *ipencap)
1194 {
1195         bool ret = false;
1196
1197         memset(ipencap, 0, sizeof(*ipencap));
1198
1199         if (!data)
1200                 return ret;
1201
1202         if (data[IFLA_GRE_ENCAP_TYPE]) {
1203                 ret = true;
1204                 ipencap->type = nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]);
1205         }
1206
1207         if (data[IFLA_GRE_ENCAP_FLAGS]) {
1208                 ret = true;
1209                 ipencap->flags = nla_get_u16(data[IFLA_GRE_ENCAP_FLAGS]);
1210         }
1211
1212         if (data[IFLA_GRE_ENCAP_SPORT]) {
1213                 ret = true;
1214                 ipencap->sport = nla_get_be16(data[IFLA_GRE_ENCAP_SPORT]);
1215         }
1216
1217         if (data[IFLA_GRE_ENCAP_DPORT]) {
1218                 ret = true;
1219                 ipencap->dport = nla_get_be16(data[IFLA_GRE_ENCAP_DPORT]);
1220         }
1221
1222         return ret;
1223 }
1224
1225 static int gre_tap_init(struct net_device *dev)
1226 {
1227         __gre_tunnel_init(dev);
1228         dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1229         netif_keep_dst(dev);
1230
1231         return ip_tunnel_init(dev);
1232 }
1233
1234 static const struct net_device_ops gre_tap_netdev_ops = {
1235         .ndo_init               = gre_tap_init,
1236         .ndo_uninit             = ip_tunnel_uninit,
1237         .ndo_start_xmit         = gre_tap_xmit,
1238         .ndo_set_mac_address    = eth_mac_addr,
1239         .ndo_validate_addr      = eth_validate_addr,
1240         .ndo_change_mtu         = ip_tunnel_change_mtu,
1241         .ndo_get_stats64        = ip_tunnel_get_stats64,
1242         .ndo_get_iflink         = ip_tunnel_get_iflink,
1243         .ndo_fill_metadata_dst  = gre_fill_metadata_dst,
1244 };
1245
1246 static int erspan_tunnel_init(struct net_device *dev)
1247 {
1248         struct ip_tunnel *tunnel = netdev_priv(dev);
1249         int t_hlen;
1250
1251         tunnel->tun_hlen = 8;
1252         tunnel->parms.iph.protocol = IPPROTO_GRE;
1253         tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen +
1254                        sizeof(struct erspanhdr);
1255         t_hlen = tunnel->hlen + sizeof(struct iphdr);
1256
1257         dev->features           |= GRE_FEATURES;
1258         dev->hw_features        |= GRE_FEATURES;
1259         dev->priv_flags         |= IFF_LIVE_ADDR_CHANGE;
1260         netif_keep_dst(dev);
1261
1262         return ip_tunnel_init(dev);
1263 }
1264
1265 static const struct net_device_ops erspan_netdev_ops = {
1266         .ndo_init               = erspan_tunnel_init,
1267         .ndo_uninit             = ip_tunnel_uninit,
1268         .ndo_start_xmit         = erspan_xmit,
1269         .ndo_set_mac_address    = eth_mac_addr,
1270         .ndo_validate_addr      = eth_validate_addr,
1271         .ndo_change_mtu         = ip_tunnel_change_mtu,
1272         .ndo_get_stats64        = ip_tunnel_get_stats64,
1273         .ndo_get_iflink         = ip_tunnel_get_iflink,
1274         .ndo_fill_metadata_dst  = gre_fill_metadata_dst,
1275 };
1276
1277 static void ipgre_tap_setup(struct net_device *dev)
1278 {
1279         ether_setup(dev);
1280         dev->max_mtu = 0;
1281         dev->netdev_ops = &gre_tap_netdev_ops;
1282         dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1283         dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1284         ip_tunnel_setup(dev, gre_tap_net_id);
1285 }
1286
1287 static int ipgre_newlink(struct net *src_net, struct net_device *dev,
1288                          struct nlattr *tb[], struct nlattr *data[],
1289                          struct netlink_ext_ack *extack)
1290 {
1291         struct ip_tunnel_parm p;
1292         struct ip_tunnel_encap ipencap;
1293         __u32 fwmark = 0;
1294         int err;
1295
1296         if (ipgre_netlink_encap_parms(data, &ipencap)) {
1297                 struct ip_tunnel *t = netdev_priv(dev);
1298                 err = ip_tunnel_encap_setup(t, &ipencap);
1299
1300                 if (err < 0)
1301                         return err;
1302         }
1303
1304         err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
1305         if (err < 0)
1306                 return err;
1307         return ip_tunnel_newlink(dev, tb, &p, fwmark);
1308 }
1309
1310 static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1311                             struct nlattr *data[],
1312                             struct netlink_ext_ack *extack)
1313 {
1314         struct ip_tunnel *t = netdev_priv(dev);
1315         struct ip_tunnel_parm p;
1316         struct ip_tunnel_encap ipencap;
1317         __u32 fwmark = t->fwmark;
1318         int err;
1319
1320         if (ipgre_netlink_encap_parms(data, &ipencap)) {
1321                 err = ip_tunnel_encap_setup(t, &ipencap);
1322
1323                 if (err < 0)
1324                         return err;
1325         }
1326
1327         err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
1328         if (err < 0)
1329                 return err;
1330         return ip_tunnel_changelink(dev, tb, &p, fwmark);
1331 }
1332
1333 static size_t ipgre_get_size(const struct net_device *dev)
1334 {
1335         return
1336                 /* IFLA_GRE_LINK */
1337                 nla_total_size(4) +
1338                 /* IFLA_GRE_IFLAGS */
1339                 nla_total_size(2) +
1340                 /* IFLA_GRE_OFLAGS */
1341                 nla_total_size(2) +
1342                 /* IFLA_GRE_IKEY */
1343                 nla_total_size(4) +
1344                 /* IFLA_GRE_OKEY */
1345                 nla_total_size(4) +
1346                 /* IFLA_GRE_LOCAL */
1347                 nla_total_size(4) +
1348                 /* IFLA_GRE_REMOTE */
1349                 nla_total_size(4) +
1350                 /* IFLA_GRE_TTL */
1351                 nla_total_size(1) +
1352                 /* IFLA_GRE_TOS */
1353                 nla_total_size(1) +
1354                 /* IFLA_GRE_PMTUDISC */
1355                 nla_total_size(1) +
1356                 /* IFLA_GRE_ENCAP_TYPE */
1357                 nla_total_size(2) +
1358                 /* IFLA_GRE_ENCAP_FLAGS */
1359                 nla_total_size(2) +
1360                 /* IFLA_GRE_ENCAP_SPORT */
1361                 nla_total_size(2) +
1362                 /* IFLA_GRE_ENCAP_DPORT */
1363                 nla_total_size(2) +
1364                 /* IFLA_GRE_COLLECT_METADATA */
1365                 nla_total_size(0) +
1366                 /* IFLA_GRE_IGNORE_DF */
1367                 nla_total_size(1) +
1368                 /* IFLA_GRE_FWMARK */
1369                 nla_total_size(4) +
1370                 /* IFLA_GRE_ERSPAN_INDEX */
1371                 nla_total_size(4) +
1372                 0;
1373 }
1374
1375 static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1376 {
1377         struct ip_tunnel *t = netdev_priv(dev);
1378         struct ip_tunnel_parm *p = &t->parms;
1379
1380         if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
1381             nla_put_be16(skb, IFLA_GRE_IFLAGS,
1382                          gre_tnl_flags_to_gre_flags(p->i_flags)) ||
1383             nla_put_be16(skb, IFLA_GRE_OFLAGS,
1384                          gre_tnl_flags_to_gre_flags(p->o_flags)) ||
1385             nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
1386             nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
1387             nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
1388             nla_put_in_addr(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
1389             nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
1390             nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
1391             nla_put_u8(skb, IFLA_GRE_PMTUDISC,
1392                        !!(p->iph.frag_off & htons(IP_DF))) ||
1393             nla_put_u32(skb, IFLA_GRE_FWMARK, t->fwmark))
1394                 goto nla_put_failure;
1395
1396         if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
1397                         t->encap.type) ||
1398             nla_put_be16(skb, IFLA_GRE_ENCAP_SPORT,
1399                          t->encap.sport) ||
1400             nla_put_be16(skb, IFLA_GRE_ENCAP_DPORT,
1401                          t->encap.dport) ||
1402             nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS,
1403                         t->encap.flags))
1404                 goto nla_put_failure;
1405
1406         if (nla_put_u8(skb, IFLA_GRE_IGNORE_DF, t->ignore_df))
1407                 goto nla_put_failure;
1408
1409         if (t->collect_md) {
1410                 if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA))
1411                         goto nla_put_failure;
1412         }
1413
1414         if (t->index)
1415                 if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index))
1416                         goto nla_put_failure;
1417
1418         return 0;
1419
1420 nla_put_failure:
1421         return -EMSGSIZE;
1422 }
1423
1424 static void erspan_setup(struct net_device *dev)
1425 {
1426         ether_setup(dev);
1427         dev->max_mtu = 0;
1428         dev->netdev_ops = &erspan_netdev_ops;
1429         dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1430         dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1431         ip_tunnel_setup(dev, erspan_net_id);
1432 }
1433
1434 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1435         [IFLA_GRE_LINK]         = { .type = NLA_U32 },
1436         [IFLA_GRE_IFLAGS]       = { .type = NLA_U16 },
1437         [IFLA_GRE_OFLAGS]       = { .type = NLA_U16 },
1438         [IFLA_GRE_IKEY]         = { .type = NLA_U32 },
1439         [IFLA_GRE_OKEY]         = { .type = NLA_U32 },
1440         [IFLA_GRE_LOCAL]        = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1441         [IFLA_GRE_REMOTE]       = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
1442         [IFLA_GRE_TTL]          = { .type = NLA_U8 },
1443         [IFLA_GRE_TOS]          = { .type = NLA_U8 },
1444         [IFLA_GRE_PMTUDISC]     = { .type = NLA_U8 },
1445         [IFLA_GRE_ENCAP_TYPE]   = { .type = NLA_U16 },
1446         [IFLA_GRE_ENCAP_FLAGS]  = { .type = NLA_U16 },
1447         [IFLA_GRE_ENCAP_SPORT]  = { .type = NLA_U16 },
1448         [IFLA_GRE_ENCAP_DPORT]  = { .type = NLA_U16 },
1449         [IFLA_GRE_COLLECT_METADATA]     = { .type = NLA_FLAG },
1450         [IFLA_GRE_IGNORE_DF]    = { .type = NLA_U8 },
1451         [IFLA_GRE_FWMARK]       = { .type = NLA_U32 },
1452         [IFLA_GRE_ERSPAN_INDEX] = { .type = NLA_U32 },
1453 };
1454
1455 static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1456         .kind           = "gre",
1457         .maxtype        = IFLA_GRE_MAX,
1458         .policy         = ipgre_policy,
1459         .priv_size      = sizeof(struct ip_tunnel),
1460         .setup          = ipgre_tunnel_setup,
1461         .validate       = ipgre_tunnel_validate,
1462         .newlink        = ipgre_newlink,
1463         .changelink     = ipgre_changelink,
1464         .dellink        = ip_tunnel_dellink,
1465         .get_size       = ipgre_get_size,
1466         .fill_info      = ipgre_fill_info,
1467         .get_link_net   = ip_tunnel_get_link_net,
1468 };
1469
1470 static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1471         .kind           = "gretap",
1472         .maxtype        = IFLA_GRE_MAX,
1473         .policy         = ipgre_policy,
1474         .priv_size      = sizeof(struct ip_tunnel),
1475         .setup          = ipgre_tap_setup,
1476         .validate       = ipgre_tap_validate,
1477         .newlink        = ipgre_newlink,
1478         .changelink     = ipgre_changelink,
1479         .dellink        = ip_tunnel_dellink,
1480         .get_size       = ipgre_get_size,
1481         .fill_info      = ipgre_fill_info,
1482         .get_link_net   = ip_tunnel_get_link_net,
1483 };
1484
1485 static struct rtnl_link_ops erspan_link_ops __read_mostly = {
1486         .kind           = "erspan",
1487         .maxtype        = IFLA_GRE_MAX,
1488         .policy         = ipgre_policy,
1489         .priv_size      = sizeof(struct ip_tunnel),
1490         .setup          = erspan_setup,
1491         .validate       = erspan_validate,
1492         .newlink        = ipgre_newlink,
1493         .changelink     = ipgre_changelink,
1494         .dellink        = ip_tunnel_dellink,
1495         .get_size       = ipgre_get_size,
1496         .fill_info      = ipgre_fill_info,
1497         .get_link_net   = ip_tunnel_get_link_net,
1498 };
1499
1500 struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
1501                                         u8 name_assign_type)
1502 {
1503         struct nlattr *tb[IFLA_MAX + 1];
1504         struct net_device *dev;
1505         LIST_HEAD(list_kill);
1506         struct ip_tunnel *t;
1507         int err;
1508
1509         memset(&tb, 0, sizeof(tb));
1510
1511         dev = rtnl_create_link(net, name, name_assign_type,
1512                                &ipgre_tap_ops, tb);
1513         if (IS_ERR(dev))
1514                 return dev;
1515
1516         /* Configure flow based GRE device. */
1517         t = netdev_priv(dev);
1518         t->collect_md = true;
1519
1520         err = ipgre_newlink(net, dev, tb, NULL, NULL);
1521         if (err < 0) {
1522                 free_netdev(dev);
1523                 return ERR_PTR(err);
1524         }
1525
1526         /* openvswitch users expect packet sizes to be unrestricted,
1527          * so set the largest MTU we can.
1528          */
1529         err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false);
1530         if (err)
1531                 goto out;
1532
1533         err = rtnl_configure_link(dev, NULL);
1534         if (err < 0)
1535                 goto out;
1536
1537         return dev;
1538 out:
1539         ip_tunnel_dellink(dev, &list_kill);
1540         unregister_netdevice_many(&list_kill);
1541         return ERR_PTR(err);
1542 }
1543 EXPORT_SYMBOL_GPL(gretap_fb_dev_create);
1544
1545 static int __net_init ipgre_tap_init_net(struct net *net)
1546 {
1547         return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0");
1548 }
1549
1550 static void __net_exit ipgre_tap_exit_net(struct net *net)
1551 {
1552         struct ip_tunnel_net *itn = net_generic(net, gre_tap_net_id);
1553         ip_tunnel_delete_net(itn, &ipgre_tap_ops);
1554 }
1555
1556 static struct pernet_operations ipgre_tap_net_ops = {
1557         .init = ipgre_tap_init_net,
1558         .exit = ipgre_tap_exit_net,
1559         .id   = &gre_tap_net_id,
1560         .size = sizeof(struct ip_tunnel_net),
1561 };
1562
1563 static int __net_init erspan_init_net(struct net *net)
1564 {
1565         return ip_tunnel_init_net(net, erspan_net_id,
1566                                   &erspan_link_ops, "erspan0");
1567 }
1568
1569 static void __net_exit erspan_exit_net(struct net *net)
1570 {
1571         struct ip_tunnel_net *itn = net_generic(net, erspan_net_id);
1572
1573         ip_tunnel_delete_net(itn, &erspan_link_ops);
1574 }
1575
1576 static struct pernet_operations erspan_net_ops = {
1577         .init = erspan_init_net,
1578         .exit = erspan_exit_net,
1579         .id   = &erspan_net_id,
1580         .size = sizeof(struct ip_tunnel_net),
1581 };
1582
1583 static int __init ipgre_init(void)
1584 {
1585         int err;
1586
1587         pr_info("GRE over IPv4 tunneling driver\n");
1588
1589         err = register_pernet_device(&ipgre_net_ops);
1590         if (err < 0)
1591                 return err;
1592
1593         err = register_pernet_device(&ipgre_tap_net_ops);
1594         if (err < 0)
1595                 goto pnet_tap_failed;
1596
1597         err = register_pernet_device(&erspan_net_ops);
1598         if (err < 0)
1599                 goto pnet_erspan_failed;
1600
1601         err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
1602         if (err < 0) {
1603                 pr_info("%s: can't add protocol\n", __func__);
1604                 goto add_proto_failed;
1605         }
1606
1607         err = rtnl_link_register(&ipgre_link_ops);
1608         if (err < 0)
1609                 goto rtnl_link_failed;
1610
1611         err = rtnl_link_register(&ipgre_tap_ops);
1612         if (err < 0)
1613                 goto tap_ops_failed;
1614
1615         err = rtnl_link_register(&erspan_link_ops);
1616         if (err < 0)
1617                 goto erspan_link_failed;
1618
1619         return 0;
1620
1621 erspan_link_failed:
1622         rtnl_link_unregister(&ipgre_tap_ops);
1623 tap_ops_failed:
1624         rtnl_link_unregister(&ipgre_link_ops);
1625 rtnl_link_failed:
1626         gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1627 add_proto_failed:
1628         unregister_pernet_device(&erspan_net_ops);
1629 pnet_erspan_failed:
1630         unregister_pernet_device(&ipgre_tap_net_ops);
1631 pnet_tap_failed:
1632         unregister_pernet_device(&ipgre_net_ops);
1633         return err;
1634 }
1635
1636 static void __exit ipgre_fini(void)
1637 {
1638         rtnl_link_unregister(&ipgre_tap_ops);
1639         rtnl_link_unregister(&ipgre_link_ops);
1640         rtnl_link_unregister(&erspan_link_ops);
1641         gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1642         unregister_pernet_device(&ipgre_tap_net_ops);
1643         unregister_pernet_device(&ipgre_net_ops);
1644         unregister_pernet_device(&erspan_net_ops);
1645 }
1646
1647 module_init(ipgre_init);
1648 module_exit(ipgre_fini);
1649 MODULE_LICENSE("GPL");
1650 MODULE_ALIAS_RTNL_LINK("gre");
1651 MODULE_ALIAS_RTNL_LINK("gretap");
1652 MODULE_ALIAS_RTNL_LINK("erspan");
1653 MODULE_ALIAS_NETDEV("gre0");
1654 MODULE_ALIAS_NETDEV("gretap0");
1655 MODULE_ALIAS_NETDEV("erspan0");