2 * Linux NET3: GRE over IP protocol decoder.
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
15 #include <linux/capability.h>
16 #include <linux/module.h>
17 #include <linux/types.h>
18 #include <linux/kernel.h>
19 #include <linux/slab.h>
20 #include <linux/uaccess.h>
21 #include <linux/skbuff.h>
22 #include <linux/netdevice.h>
24 #include <linux/tcp.h>
25 #include <linux/udp.h>
26 #include <linux/if_arp.h>
27 #include <linux/if_vlan.h>
28 #include <linux/init.h>
29 #include <linux/in6.h>
30 #include <linux/inetdevice.h>
31 #include <linux/igmp.h>
32 #include <linux/netfilter_ipv4.h>
33 #include <linux/etherdevice.h>
34 #include <linux/if_ether.h>
39 #include <net/protocol.h>
40 #include <net/ip_tunnels.h>
42 #include <net/checksum.h>
43 #include <net/dsfield.h>
44 #include <net/inet_ecn.h>
46 #include <net/net_namespace.h>
47 #include <net/netns/generic.h>
48 #include <net/rtnetlink.h>
50 #include <net/dst_metadata.h>
51 #include <net/erspan.h>
57 1. The most important issue is detecting local dead loops.
58 They would cause complete host lockup in transmit, which
59 would be "resolved" by stack overflow or, if queueing is enabled,
60 with infinite looping in net_bh.
62 We cannot track such dead loops during route installation,
63 it is infeasible task. The most general solutions would be
64 to keep skb->encapsulation counter (sort of local ttl),
65 and silently drop packet when it expires. It is a good
66 solution, but it supposes maintaining new variable in ALL
67 skb, even if no tunneling is used.
69 Current solution: xmit_recursion breaks dead loops. This is a percpu
70 counter, since when we enter the first ndo_xmit(), cpu migration is
71 forbidden. We force an exit if this counter reaches RECURSION_LIMIT
73 2. Networking dead loops would not kill routers, but would really
74 kill network. IP hop limit plays role of "t->recursion" in this case,
75 if we copy it from packet being encapsulated to upper header.
76 It is very good solution, but it introduces two problems:
78 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
79 do not work over tunnels.
80 - traceroute does not work. I planned to relay ICMP from tunnel,
81 so that this problem would be solved and traceroute output
82 would even more informative. This idea appeared to be wrong:
83 only Linux complies to rfc1812 now (yes, guys, Linux is the only
84 true router now :-)), all routers (at least, in neighbourhood of mine)
85 return only 8 bytes of payload. It is the end.
87 Hence, if we want that OSPF worked or traceroute said something reasonable,
88 we should search for another solution.
90 One of them is to parse packet trying to detect inner encapsulation
91 made by our node. It is difficult or even impossible, especially,
92 taking into account fragmentation. TO be short, ttl is not solution at all.
94 Current solution: The solution was UNEXPECTEDLY SIMPLE.
95 We force DF flag on tunnels with preconfigured hop limit,
96 that is ALL. :-) Well, it does not remove the problem completely,
97 but exponential growth of network traffic is changed to linear
98 (branches, that exceed pmtu are pruned) and tunnel mtu
99 rapidly degrades to value <68, where looping stops.
100 Yes, it is not good if there exists a router in the loop,
101 which does not force DF, even when encapsulating packets have DF set.
102 But it is not our problem! Nobody could accuse us, we made
103 all that we could make. Even if it is your gated who injected
104 fatal route to network, even if it were you who configured
105 fatal static route: you are innocent. :-)
110 static bool log_ecn_error = true;
111 module_param(log_ecn_error, bool, 0644);
112 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
114 static struct rtnl_link_ops ipgre_link_ops __read_mostly;
115 static int ipgre_tunnel_init(struct net_device *dev);
116 static void erspan_build_header(struct sk_buff *skb,
118 bool truncate, bool is_ipv4);
120 static unsigned int ipgre_net_id __read_mostly;
121 static unsigned int gre_tap_net_id __read_mostly;
122 static unsigned int erspan_net_id __read_mostly;
124 static void ipgre_err(struct sk_buff *skb, u32 info,
125 const struct tnl_ptk_info *tpi)
128 /* All the routers (except for Linux) return only
129 8 bytes of packet payload. It means, that precise relaying of
130 ICMP in the real Internet is absolutely infeasible.
132 Moreover, Cisco "wise men" put GRE key to the third word
133 in GRE header. It makes impossible maintaining even soft
134 state for keyed GRE tunnels with enabled checksum. Tell
137 Well, I wonder, rfc1812 was written by Cisco employee,
138 what the hell these idiots break standards established
141 struct net *net = dev_net(skb->dev);
142 struct ip_tunnel_net *itn;
143 const struct iphdr *iph;
144 const int type = icmp_hdr(skb)->type;
145 const int code = icmp_hdr(skb)->code;
146 unsigned int data_len = 0;
151 case ICMP_PARAMETERPROB:
154 case ICMP_DEST_UNREACH:
157 case ICMP_PORT_UNREACH:
158 /* Impossible event. */
161 /* All others are translated to HOST_UNREACH.
162 rfc2003 contains "deep thoughts" about NET_UNREACH,
163 I believe they are just ether pollution. --ANK
169 case ICMP_TIME_EXCEEDED:
170 if (code != ICMP_EXC_TTL)
172 data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */
179 if (tpi->proto == htons(ETH_P_TEB))
180 itn = net_generic(net, gre_tap_net_id);
181 else if (tpi->proto == htons(ETH_P_ERSPAN) ||
182 tpi->proto == htons(ETH_P_ERSPAN2))
183 itn = net_generic(net, erspan_net_id);
185 itn = net_generic(net, ipgre_net_id);
187 iph = (const struct iphdr *)(icmp_hdr(skb) + 1);
188 t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
189 iph->daddr, iph->saddr, tpi->key);
194 #if IS_ENABLED(CONFIG_IPV6)
195 if (tpi->proto == htons(ETH_P_IPV6) &&
196 !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len,
201 if (t->parms.iph.daddr == 0 ||
202 ipv4_is_multicast(t->parms.iph.daddr))
205 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
208 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
212 t->err_time = jiffies;
215 static void gre_err(struct sk_buff *skb, u32 info)
217 /* All the routers (except for Linux) return only
218 * 8 bytes of packet payload. It means, that precise relaying of
219 * ICMP in the real Internet is absolutely infeasible.
221 * Moreover, Cisco "wise men" put GRE key to the third word
222 * in GRE header. It makes impossible maintaining even soft
224 * GRE tunnels with enabled checksum. Tell them "thank you".
226 * Well, I wonder, rfc1812 was written by Cisco employee,
227 * what the hell these idiots break standards established
231 const struct iphdr *iph = (struct iphdr *)skb->data;
232 const int type = icmp_hdr(skb)->type;
233 const int code = icmp_hdr(skb)->code;
234 struct tnl_ptk_info tpi;
236 if (gre_parse_header(skb, &tpi, NULL, htons(ETH_P_IP),
240 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
241 ipv4_update_pmtu(skb, dev_net(skb->dev), info,
242 skb->dev->ifindex, 0, IPPROTO_GRE, 0);
245 if (type == ICMP_REDIRECT) {
246 ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex, 0,
251 ipgre_err(skb, info, &tpi);
254 static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
257 struct net *net = dev_net(skb->dev);
258 struct metadata_dst *tun_dst = NULL;
259 struct erspan_base_hdr *ershdr;
260 struct ip_tunnel_net *itn;
261 struct ip_tunnel *tunnel;
262 const struct iphdr *iph;
263 struct erspan_md2 *md2;
267 itn = net_generic(net, erspan_net_id);
270 ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len);
273 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
274 tpi->flags | TUNNEL_KEY,
275 iph->saddr, iph->daddr, tpi->key);
278 len = gre_hdr_len + erspan_hdr_len(ver);
279 if (unlikely(!pskb_may_pull(skb, len)))
280 return PACKET_REJECT;
282 if (__iptunnel_pull_header(skb,
288 if (tunnel->collect_md) {
289 struct erspan_metadata *pkt_md, *md;
290 struct ip_tunnel_info *info;
295 tpi->flags |= TUNNEL_KEY;
297 tun_id = key32_to_tunnel_id(tpi->key);
299 tun_dst = ip_tun_rx_dst(skb, flags,
300 tun_id, sizeof(*md));
302 return PACKET_REJECT;
304 /* skb can be uncloned in __iptunnel_pull_header, so
305 * old pkt_md is no longer valid and we need to reset
308 gh = skb_network_header(skb) +
309 skb_network_header_len(skb);
310 pkt_md = (struct erspan_metadata *)(gh + gre_hdr_len +
312 md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
315 memcpy(md2, pkt_md, ver == 1 ? ERSPAN_V1_MDSIZE :
318 info = &tun_dst->u.tun_info;
319 info->key.tun_flags |= TUNNEL_ERSPAN_OPT;
320 info->options_len = sizeof(*md);
323 skb_reset_mac_header(skb);
324 ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
327 return PACKET_REJECT;
334 static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
335 struct ip_tunnel_net *itn, int hdr_len, bool raw_proto)
337 struct metadata_dst *tun_dst = NULL;
338 const struct iphdr *iph;
339 struct ip_tunnel *tunnel;
342 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
343 iph->saddr, iph->daddr, tpi->key);
346 if (__iptunnel_pull_header(skb, hdr_len, tpi->proto,
347 raw_proto, false) < 0)
350 if (tunnel->dev->type != ARPHRD_NONE)
351 skb_pop_mac_header(skb);
353 skb_reset_mac_header(skb);
354 if (tunnel->collect_md) {
358 flags = tpi->flags & (TUNNEL_CSUM | TUNNEL_KEY);
359 tun_id = key32_to_tunnel_id(tpi->key);
360 tun_dst = ip_tun_rx_dst(skb, flags, tun_id, 0);
362 return PACKET_REJECT;
365 ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
375 static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
378 struct net *net = dev_net(skb->dev);
379 struct ip_tunnel_net *itn;
382 if (tpi->proto == htons(ETH_P_TEB))
383 itn = net_generic(net, gre_tap_net_id);
385 itn = net_generic(net, ipgre_net_id);
387 res = __ipgre_rcv(skb, tpi, itn, hdr_len, false);
388 if (res == PACKET_NEXT && tpi->proto == htons(ETH_P_TEB)) {
389 /* ipgre tunnels in collect metadata mode should receive
390 * also ETH_P_TEB traffic.
392 itn = net_generic(net, ipgre_net_id);
393 res = __ipgre_rcv(skb, tpi, itn, hdr_len, true);
398 static int gre_rcv(struct sk_buff *skb)
400 struct tnl_ptk_info tpi;
401 bool csum_err = false;
404 #ifdef CONFIG_NET_IPGRE_BROADCAST
405 if (ipv4_is_multicast(ip_hdr(skb)->daddr)) {
406 /* Looped back packet, drop it! */
407 if (rt_is_output_route(skb_rtable(skb)))
412 hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP), 0);
416 if (unlikely(tpi.proto == htons(ETH_P_ERSPAN) ||
417 tpi.proto == htons(ETH_P_ERSPAN2))) {
418 if (erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
423 if (ipgre_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
427 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
433 static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
434 const struct iphdr *tnl_params,
437 struct ip_tunnel *tunnel = netdev_priv(dev);
438 __be16 flags = tunnel->parms.o_flags;
440 /* Push GRE header. */
441 gre_build_header(skb, tunnel->tun_hlen,
442 flags, proto, tunnel->parms.o_key,
443 (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) : 0);
445 ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
448 static int gre_handle_offloads(struct sk_buff *skb, bool csum)
450 return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
453 static struct rtable *gre_get_rt(struct sk_buff *skb,
454 struct net_device *dev,
456 const struct ip_tunnel_key *key)
458 struct net *net = dev_net(dev);
460 memset(fl, 0, sizeof(*fl));
461 fl->daddr = key->u.ipv4.dst;
462 fl->saddr = key->u.ipv4.src;
463 fl->flowi4_tos = RT_TOS(key->tos);
464 fl->flowi4_mark = skb->mark;
465 fl->flowi4_proto = IPPROTO_GRE;
467 return ip_route_output_key(net, fl);
470 static struct rtable *prepare_fb_xmit(struct sk_buff *skb,
471 struct net_device *dev,
475 struct ip_tunnel_info *tun_info;
476 const struct ip_tunnel_key *key;
477 struct rtable *rt = NULL;
482 tun_info = skb_tunnel_info(skb);
483 key = &tun_info->key;
484 use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
487 rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl->saddr);
489 rt = gre_get_rt(skb, dev, fl, key);
493 dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
497 min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
498 + tunnel_hlen + sizeof(struct iphdr);
499 if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
500 int head_delta = SKB_DATA_ALIGN(min_headroom -
503 err = pskb_expand_head(skb, max_t(int, head_delta, 0),
514 dev->stats.tx_dropped++;
518 static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
521 struct ip_tunnel *tunnel = netdev_priv(dev);
522 struct ip_tunnel_info *tun_info;
523 const struct ip_tunnel_key *key;
524 struct rtable *rt = NULL;
529 tun_info = skb_tunnel_info(skb);
530 if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
531 ip_tunnel_info_af(tun_info) != AF_INET))
534 key = &tun_info->key;
535 tunnel_hlen = gre_calc_hlen(key->tun_flags);
537 rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen);
541 /* Push Tunnel header. */
542 if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM)))
545 flags = tun_info->key.tun_flags &
546 (TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ);
547 gre_build_header(skb, tunnel_hlen, flags, proto,
548 tunnel_id_to_key32(tun_info->key.tun_id),
549 (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) : 0);
551 df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
553 iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE,
554 key->tos, key->ttl, df, false);
561 dev->stats.tx_dropped++;
564 static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev)
566 struct ip_tunnel *tunnel = netdev_priv(dev);
567 struct ip_tunnel_info *tun_info;
568 const struct ip_tunnel_key *key;
569 struct erspan_metadata *md;
570 struct rtable *rt = NULL;
571 bool truncate = false;
578 tun_info = skb_tunnel_info(skb);
579 if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
580 ip_tunnel_info_af(tun_info) != AF_INET))
583 key = &tun_info->key;
584 if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT))
586 if (tun_info->options_len < sizeof(*md))
588 md = ip_tunnel_info_opts(tun_info);
590 /* ERSPAN has fixed 8 byte GRE header */
591 version = md->version;
592 tunnel_hlen = 8 + erspan_hdr_len(version);
594 rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen);
598 if (gre_handle_offloads(skb, false))
601 if (skb->len > dev->mtu + dev->hard_header_len) {
602 pskb_trim(skb, dev->mtu + dev->hard_header_len);
606 nhoff = skb_network_header(skb) - skb_mac_header(skb);
607 if (skb->protocol == htons(ETH_P_IP) &&
608 (ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff))
611 if (skb->protocol == htons(ETH_P_IPV6)) {
614 if (skb_transport_header_was_set(skb))
615 thoff = skb_transport_header(skb) - skb_mac_header(skb);
617 thoff = nhoff + sizeof(struct ipv6hdr);
618 if (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff)
623 erspan_build_header(skb, ntohl(tunnel_id_to_key32(key->tun_id)),
624 ntohl(md->u.index), truncate, true);
625 proto = htons(ETH_P_ERSPAN);
626 } else if (version == 2) {
627 erspan_build_header_v2(skb,
628 ntohl(tunnel_id_to_key32(key->tun_id)),
630 get_hwid(&md->u.md2),
632 proto = htons(ETH_P_ERSPAN2);
637 gre_build_header(skb, 8, TUNNEL_SEQ,
638 proto, 0, htonl(tunnel->o_seqno++));
640 df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
642 iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE,
643 key->tos, key->ttl, df, false);
650 dev->stats.tx_dropped++;
653 static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
655 struct ip_tunnel_info *info = skb_tunnel_info(skb);
659 if (ip_tunnel_info_af(info) != AF_INET)
662 rt = gre_get_rt(skb, dev, &fl4, &info->key);
667 info->key.u.ipv4.src = fl4.saddr;
671 static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
672 struct net_device *dev)
674 struct ip_tunnel *tunnel = netdev_priv(dev);
675 const struct iphdr *tnl_params;
677 if (!pskb_inet_may_pull(skb))
680 if (tunnel->collect_md) {
681 gre_fb_xmit(skb, dev, skb->protocol);
685 if (dev->header_ops) {
686 if (skb_cow_head(skb, 0))
689 tnl_params = (const struct iphdr *)skb->data;
691 /* Pull skb since ip_tunnel_xmit() needs skb->data pointing
694 skb_pull(skb, tunnel->hlen + sizeof(struct iphdr));
695 skb_reset_mac_header(skb);
697 if (skb->ip_summed == CHECKSUM_PARTIAL &&
698 skb_checksum_start(skb) < skb->data)
701 if (skb_cow_head(skb, dev->needed_headroom))
704 tnl_params = &tunnel->parms.iph;
707 if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM)))
710 __gre_xmit(skb, dev, tnl_params, skb->protocol);
715 dev->stats.tx_dropped++;
719 static netdev_tx_t erspan_xmit(struct sk_buff *skb,
720 struct net_device *dev)
722 struct ip_tunnel *tunnel = netdev_priv(dev);
723 bool truncate = false;
726 if (!pskb_inet_may_pull(skb))
729 if (tunnel->collect_md) {
730 erspan_fb_xmit(skb, dev);
734 if (gre_handle_offloads(skb, false))
737 if (skb_cow_head(skb, dev->needed_headroom))
740 if (skb->len > dev->mtu + dev->hard_header_len) {
741 pskb_trim(skb, dev->mtu + dev->hard_header_len);
745 /* Push ERSPAN header */
746 if (tunnel->erspan_ver == 1) {
747 erspan_build_header(skb, ntohl(tunnel->parms.o_key),
750 proto = htons(ETH_P_ERSPAN);
751 } else if (tunnel->erspan_ver == 2) {
752 erspan_build_header_v2(skb, ntohl(tunnel->parms.o_key),
753 tunnel->dir, tunnel->hwid,
755 proto = htons(ETH_P_ERSPAN2);
760 tunnel->parms.o_flags &= ~TUNNEL_KEY;
761 __gre_xmit(skb, dev, &tunnel->parms.iph, proto);
766 dev->stats.tx_dropped++;
770 static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
771 struct net_device *dev)
773 struct ip_tunnel *tunnel = netdev_priv(dev);
775 if (!pskb_inet_may_pull(skb))
778 if (tunnel->collect_md) {
779 gre_fb_xmit(skb, dev, htons(ETH_P_TEB));
783 if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM)))
786 if (skb_cow_head(skb, dev->needed_headroom))
789 __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB));
794 dev->stats.tx_dropped++;
798 static void ipgre_link_update(struct net_device *dev, bool set_mtu)
800 struct ip_tunnel *tunnel = netdev_priv(dev);
803 len = tunnel->tun_hlen;
804 tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
805 len = tunnel->tun_hlen - len;
806 tunnel->hlen = tunnel->hlen + len;
809 dev->hard_header_len += len;
811 dev->needed_headroom += len;
814 dev->mtu = max_t(int, dev->mtu - len, 68);
816 if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
817 if (!(tunnel->parms.o_flags & TUNNEL_CSUM) ||
818 tunnel->encap.type == TUNNEL_ENCAP_NONE) {
819 dev->features |= NETIF_F_GSO_SOFTWARE;
820 dev->hw_features |= NETIF_F_GSO_SOFTWARE;
822 dev->features &= ~NETIF_F_GSO_SOFTWARE;
823 dev->hw_features &= ~NETIF_F_GSO_SOFTWARE;
825 dev->features |= NETIF_F_LLTX;
827 dev->hw_features &= ~NETIF_F_GSO_SOFTWARE;
828 dev->features &= ~(NETIF_F_LLTX | NETIF_F_GSO_SOFTWARE);
832 static int ipgre_tunnel_ioctl(struct net_device *dev,
833 struct ifreq *ifr, int cmd)
835 struct ip_tunnel_parm p;
838 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
841 if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
842 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
843 p.iph.ihl != 5 || (p.iph.frag_off & htons(~IP_DF)) ||
844 ((p.i_flags | p.o_flags) & (GRE_VERSION | GRE_ROUTING)))
848 p.i_flags = gre_flags_to_tnl_flags(p.i_flags);
849 p.o_flags = gre_flags_to_tnl_flags(p.o_flags);
851 err = ip_tunnel_ioctl(dev, &p, cmd);
855 if (cmd == SIOCCHGTUNNEL) {
856 struct ip_tunnel *t = netdev_priv(dev);
858 t->parms.i_flags = p.i_flags;
859 t->parms.o_flags = p.o_flags;
861 if (strcmp(dev->rtnl_link_ops->kind, "erspan"))
862 ipgre_link_update(dev, true);
865 p.i_flags = gre_tnl_flags_to_gre_flags(p.i_flags);
866 p.o_flags = gre_tnl_flags_to_gre_flags(p.o_flags);
868 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
874 /* Nice toy. Unfortunately, useless in real life :-)
875 It allows to construct virtual multiprotocol broadcast "LAN"
876 over the Internet, provided multicast routing is tuned.
879 I have no idea was this bicycle invented before me,
880 so that I had to set ARPHRD_IPGRE to a random value.
881 I have an impression, that Cisco could make something similar,
882 but this feature is apparently missing in IOS<=11.2(8).
884 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
885 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
887 ping -t 255 224.66.66.66
889 If nobody answers, mbone does not work.
891 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
892 ip addr add 10.66.66.<somewhat>/24 dev Universe
894 ifconfig Universe add fe80::<Your_real_addr>/10
895 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
898 ftp fec0:6666:6666::193.233.7.65
901 static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
903 const void *daddr, const void *saddr, unsigned int len)
905 struct ip_tunnel *t = netdev_priv(dev);
907 struct gre_base_hdr *greh;
909 iph = skb_push(skb, t->hlen + sizeof(*iph));
910 greh = (struct gre_base_hdr *)(iph+1);
911 greh->flags = gre_tnl_flags_to_gre_flags(t->parms.o_flags);
912 greh->protocol = htons(type);
914 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
916 /* Set the source hardware address. */
918 memcpy(&iph->saddr, saddr, 4);
920 memcpy(&iph->daddr, daddr, 4);
922 return t->hlen + sizeof(*iph);
924 return -(t->hlen + sizeof(*iph));
927 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
929 const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
930 memcpy(haddr, &iph->saddr, 4);
934 static const struct header_ops ipgre_header_ops = {
935 .create = ipgre_header,
936 .parse = ipgre_header_parse,
939 #ifdef CONFIG_NET_IPGRE_BROADCAST
940 static int ipgre_open(struct net_device *dev)
942 struct ip_tunnel *t = netdev_priv(dev);
944 if (ipv4_is_multicast(t->parms.iph.daddr)) {
948 rt = ip_route_output_gre(t->net, &fl4,
952 RT_TOS(t->parms.iph.tos),
955 return -EADDRNOTAVAIL;
958 if (!__in_dev_get_rtnl(dev))
959 return -EADDRNOTAVAIL;
960 t->mlink = dev->ifindex;
961 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
966 static int ipgre_close(struct net_device *dev)
968 struct ip_tunnel *t = netdev_priv(dev);
970 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
971 struct in_device *in_dev;
972 in_dev = inetdev_by_index(t->net, t->mlink);
974 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
980 static const struct net_device_ops ipgre_netdev_ops = {
981 .ndo_init = ipgre_tunnel_init,
982 .ndo_uninit = ip_tunnel_uninit,
983 #ifdef CONFIG_NET_IPGRE_BROADCAST
984 .ndo_open = ipgre_open,
985 .ndo_stop = ipgre_close,
987 .ndo_start_xmit = ipgre_xmit,
988 .ndo_do_ioctl = ipgre_tunnel_ioctl,
989 .ndo_change_mtu = ip_tunnel_change_mtu,
990 .ndo_get_stats64 = ip_tunnel_get_stats64,
991 .ndo_get_iflink = ip_tunnel_get_iflink,
994 #define GRE_FEATURES (NETIF_F_SG | \
999 static void ipgre_tunnel_setup(struct net_device *dev)
1001 dev->netdev_ops = &ipgre_netdev_ops;
1002 dev->type = ARPHRD_IPGRE;
1003 ip_tunnel_setup(dev, ipgre_net_id);
1006 static void __gre_tunnel_init(struct net_device *dev)
1008 struct ip_tunnel *tunnel;
1010 tunnel = netdev_priv(dev);
1011 tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
1012 tunnel->parms.iph.protocol = IPPROTO_GRE;
1014 tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
1015 dev->needed_headroom = tunnel->hlen + sizeof(tunnel->parms.iph);
1017 dev->features |= GRE_FEATURES;
1018 dev->hw_features |= GRE_FEATURES;
1020 if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
1021 /* TCP offload with GRE SEQ is not supported, nor
1022 * can we support 2 levels of outer headers requiring
1025 if (!(tunnel->parms.o_flags & TUNNEL_CSUM) ||
1026 (tunnel->encap.type == TUNNEL_ENCAP_NONE)) {
1027 dev->features |= NETIF_F_GSO_SOFTWARE;
1028 dev->hw_features |= NETIF_F_GSO_SOFTWARE;
1031 /* Can use a lockless transmit, unless we generate
1034 dev->features |= NETIF_F_LLTX;
1038 static int ipgre_tunnel_init(struct net_device *dev)
1040 struct ip_tunnel *tunnel = netdev_priv(dev);
1041 struct iphdr *iph = &tunnel->parms.iph;
1043 __gre_tunnel_init(dev);
1045 memcpy(dev->dev_addr, &iph->saddr, 4);
1046 memcpy(dev->broadcast, &iph->daddr, 4);
1048 dev->flags = IFF_NOARP;
1049 netif_keep_dst(dev);
1052 if (iph->daddr && !tunnel->collect_md) {
1053 #ifdef CONFIG_NET_IPGRE_BROADCAST
1054 if (ipv4_is_multicast(iph->daddr)) {
1057 dev->flags = IFF_BROADCAST;
1058 dev->header_ops = &ipgre_header_ops;
1059 dev->hard_header_len = tunnel->hlen + sizeof(*iph);
1060 dev->needed_headroom = 0;
1063 } else if (!tunnel->collect_md) {
1064 dev->header_ops = &ipgre_header_ops;
1065 dev->hard_header_len = tunnel->hlen + sizeof(*iph);
1066 dev->needed_headroom = 0;
1069 return ip_tunnel_init(dev);
1072 static const struct gre_protocol ipgre_protocol = {
1074 .err_handler = gre_err,
1077 static int __net_init ipgre_init_net(struct net *net)
1079 return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
1082 static void __net_exit ipgre_exit_batch_net(struct list_head *list_net)
1084 ip_tunnel_delete_nets(list_net, ipgre_net_id, &ipgre_link_ops);
1087 static struct pernet_operations ipgre_net_ops = {
1088 .init = ipgre_init_net,
1089 .exit_batch = ipgre_exit_batch_net,
1090 .id = &ipgre_net_id,
1091 .size = sizeof(struct ip_tunnel_net),
1094 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
1095 struct netlink_ext_ack *extack)
1103 if (data[IFLA_GRE_IFLAGS])
1104 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1105 if (data[IFLA_GRE_OFLAGS])
1106 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1107 if (flags & (GRE_VERSION|GRE_ROUTING))
1110 if (data[IFLA_GRE_COLLECT_METADATA] &&
1111 data[IFLA_GRE_ENCAP_TYPE] &&
1112 nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]) != TUNNEL_ENCAP_NONE)
1118 static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[],
1119 struct netlink_ext_ack *extack)
1123 if (tb[IFLA_ADDRESS]) {
1124 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1126 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1127 return -EADDRNOTAVAIL;
1133 if (data[IFLA_GRE_REMOTE]) {
1134 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1140 return ipgre_tunnel_validate(tb, data, extack);
1143 static int erspan_validate(struct nlattr *tb[], struct nlattr *data[],
1144 struct netlink_ext_ack *extack)
1152 ret = ipgre_tap_validate(tb, data, extack);
1156 /* ERSPAN should only have GRE sequence and key flag */
1157 if (data[IFLA_GRE_OFLAGS])
1158 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1159 if (data[IFLA_GRE_IFLAGS])
1160 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1161 if (!data[IFLA_GRE_COLLECT_METADATA] &&
1162 flags != (GRE_SEQ | GRE_KEY))
1165 /* ERSPAN Session ID only has 10-bit. Since we reuse
1166 * 32-bit key field as ID, check it's range.
1168 if (data[IFLA_GRE_IKEY] &&
1169 (ntohl(nla_get_be32(data[IFLA_GRE_IKEY])) & ~ID_MASK))
1172 if (data[IFLA_GRE_OKEY] &&
1173 (ntohl(nla_get_be32(data[IFLA_GRE_OKEY])) & ~ID_MASK))
1179 static int ipgre_netlink_parms(struct net_device *dev,
1180 struct nlattr *data[],
1181 struct nlattr *tb[],
1182 struct ip_tunnel_parm *parms,
1185 struct ip_tunnel *t = netdev_priv(dev);
1187 memset(parms, 0, sizeof(*parms));
1189 parms->iph.protocol = IPPROTO_GRE;
1194 if (data[IFLA_GRE_LINK])
1195 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1197 if (data[IFLA_GRE_IFLAGS])
1198 parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS]));
1200 if (data[IFLA_GRE_OFLAGS])
1201 parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS]));
1203 if (data[IFLA_GRE_IKEY])
1204 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1206 if (data[IFLA_GRE_OKEY])
1207 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1209 if (data[IFLA_GRE_LOCAL])
1210 parms->iph.saddr = nla_get_in_addr(data[IFLA_GRE_LOCAL]);
1212 if (data[IFLA_GRE_REMOTE])
1213 parms->iph.daddr = nla_get_in_addr(data[IFLA_GRE_REMOTE]);
1215 if (data[IFLA_GRE_TTL])
1216 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1218 if (data[IFLA_GRE_TOS])
1219 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1221 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC])) {
1224 parms->iph.frag_off = htons(IP_DF);
1227 if (data[IFLA_GRE_COLLECT_METADATA]) {
1228 t->collect_md = true;
1229 if (dev->type == ARPHRD_IPGRE)
1230 dev->type = ARPHRD_NONE;
1233 if (data[IFLA_GRE_IGNORE_DF]) {
1234 if (nla_get_u8(data[IFLA_GRE_IGNORE_DF])
1235 && (parms->iph.frag_off & htons(IP_DF)))
1237 t->ignore_df = !!nla_get_u8(data[IFLA_GRE_IGNORE_DF]);
1240 if (data[IFLA_GRE_FWMARK])
1241 *fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]);
1246 static int erspan_netlink_parms(struct net_device *dev,
1247 struct nlattr *data[],
1248 struct nlattr *tb[],
1249 struct ip_tunnel_parm *parms,
1252 struct ip_tunnel *t = netdev_priv(dev);
1255 err = ipgre_netlink_parms(dev, data, tb, parms, fwmark);
1261 if (data[IFLA_GRE_ERSPAN_VER]) {
1262 t->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]);
1264 if (t->erspan_ver != 1 && t->erspan_ver != 2)
1268 if (t->erspan_ver == 1) {
1269 if (data[IFLA_GRE_ERSPAN_INDEX]) {
1270 t->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
1271 if (t->index & ~INDEX_MASK)
1274 } else if (t->erspan_ver == 2) {
1275 if (data[IFLA_GRE_ERSPAN_DIR]) {
1276 t->dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]);
1277 if (t->dir & ~(DIR_MASK >> DIR_OFFSET))
1280 if (data[IFLA_GRE_ERSPAN_HWID]) {
1281 t->hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]);
1282 if (t->hwid & ~(HWID_MASK >> HWID_OFFSET))
1290 /* This function returns true when ENCAP attributes are present in the nl msg */
1291 static bool ipgre_netlink_encap_parms(struct nlattr *data[],
1292 struct ip_tunnel_encap *ipencap)
1296 memset(ipencap, 0, sizeof(*ipencap));
1301 if (data[IFLA_GRE_ENCAP_TYPE]) {
1303 ipencap->type = nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]);
1306 if (data[IFLA_GRE_ENCAP_FLAGS]) {
1308 ipencap->flags = nla_get_u16(data[IFLA_GRE_ENCAP_FLAGS]);
1311 if (data[IFLA_GRE_ENCAP_SPORT]) {
1313 ipencap->sport = nla_get_be16(data[IFLA_GRE_ENCAP_SPORT]);
1316 if (data[IFLA_GRE_ENCAP_DPORT]) {
1318 ipencap->dport = nla_get_be16(data[IFLA_GRE_ENCAP_DPORT]);
1324 static int gre_tap_init(struct net_device *dev)
1326 __gre_tunnel_init(dev);
1327 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1328 netif_keep_dst(dev);
1330 return ip_tunnel_init(dev);
1333 static const struct net_device_ops gre_tap_netdev_ops = {
1334 .ndo_init = gre_tap_init,
1335 .ndo_uninit = ip_tunnel_uninit,
1336 .ndo_start_xmit = gre_tap_xmit,
1337 .ndo_set_mac_address = eth_mac_addr,
1338 .ndo_validate_addr = eth_validate_addr,
1339 .ndo_change_mtu = ip_tunnel_change_mtu,
1340 .ndo_get_stats64 = ip_tunnel_get_stats64,
1341 .ndo_get_iflink = ip_tunnel_get_iflink,
1342 .ndo_fill_metadata_dst = gre_fill_metadata_dst,
1345 static int erspan_tunnel_init(struct net_device *dev)
1347 struct ip_tunnel *tunnel = netdev_priv(dev);
1349 tunnel->tun_hlen = 8;
1350 tunnel->parms.iph.protocol = IPPROTO_GRE;
1351 tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen +
1352 erspan_hdr_len(tunnel->erspan_ver);
1354 dev->features |= GRE_FEATURES;
1355 dev->hw_features |= GRE_FEATURES;
1356 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1357 netif_keep_dst(dev);
1359 return ip_tunnel_init(dev);
1362 static const struct net_device_ops erspan_netdev_ops = {
1363 .ndo_init = erspan_tunnel_init,
1364 .ndo_uninit = ip_tunnel_uninit,
1365 .ndo_start_xmit = erspan_xmit,
1366 .ndo_set_mac_address = eth_mac_addr,
1367 .ndo_validate_addr = eth_validate_addr,
1368 .ndo_change_mtu = ip_tunnel_change_mtu,
1369 .ndo_get_stats64 = ip_tunnel_get_stats64,
1370 .ndo_get_iflink = ip_tunnel_get_iflink,
1371 .ndo_fill_metadata_dst = gre_fill_metadata_dst,
1374 static void ipgre_tap_setup(struct net_device *dev)
1378 dev->netdev_ops = &gre_tap_netdev_ops;
1379 dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1380 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1381 ip_tunnel_setup(dev, gre_tap_net_id);
1384 bool is_gretap_dev(const struct net_device *dev)
1386 return dev->netdev_ops == &gre_tap_netdev_ops;
1388 EXPORT_SYMBOL_GPL(is_gretap_dev);
1391 ipgre_newlink_encap_setup(struct net_device *dev, struct nlattr *data[])
1393 struct ip_tunnel_encap ipencap;
1395 if (ipgre_netlink_encap_parms(data, &ipencap)) {
1396 struct ip_tunnel *t = netdev_priv(dev);
1397 int err = ip_tunnel_encap_setup(t, &ipencap);
1406 static int ipgre_newlink(struct net *src_net, struct net_device *dev,
1407 struct nlattr *tb[], struct nlattr *data[],
1408 struct netlink_ext_ack *extack)
1410 struct ip_tunnel_parm p;
1414 err = ipgre_newlink_encap_setup(dev, data);
1418 err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
1421 return ip_tunnel_newlink(dev, tb, &p, fwmark);
1424 static int erspan_newlink(struct net *src_net, struct net_device *dev,
1425 struct nlattr *tb[], struct nlattr *data[],
1426 struct netlink_ext_ack *extack)
1428 struct ip_tunnel_parm p;
1432 err = ipgre_newlink_encap_setup(dev, data);
1436 err = erspan_netlink_parms(dev, data, tb, &p, &fwmark);
1439 return ip_tunnel_newlink(dev, tb, &p, fwmark);
1442 static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1443 struct nlattr *data[],
1444 struct netlink_ext_ack *extack)
1446 struct ip_tunnel *t = netdev_priv(dev);
1447 __u32 fwmark = t->fwmark;
1448 struct ip_tunnel_parm p;
1451 err = ipgre_newlink_encap_setup(dev, data);
1455 err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
1459 err = ip_tunnel_changelink(dev, tb, &p, fwmark);
1463 t->parms.i_flags = p.i_flags;
1464 t->parms.o_flags = p.o_flags;
1466 ipgre_link_update(dev, !tb[IFLA_MTU]);
1471 static int erspan_changelink(struct net_device *dev, struct nlattr *tb[],
1472 struct nlattr *data[],
1473 struct netlink_ext_ack *extack)
1475 struct ip_tunnel *t = netdev_priv(dev);
1476 __u32 fwmark = t->fwmark;
1477 struct ip_tunnel_parm p;
1480 err = ipgre_newlink_encap_setup(dev, data);
1484 err = erspan_netlink_parms(dev, data, tb, &p, &fwmark);
1488 err = ip_tunnel_changelink(dev, tb, &p, fwmark);
1492 t->parms.i_flags = p.i_flags;
1493 t->parms.o_flags = p.o_flags;
1498 static size_t ipgre_get_size(const struct net_device *dev)
1503 /* IFLA_GRE_IFLAGS */
1505 /* IFLA_GRE_OFLAGS */
1511 /* IFLA_GRE_LOCAL */
1513 /* IFLA_GRE_REMOTE */
1519 /* IFLA_GRE_PMTUDISC */
1521 /* IFLA_GRE_ENCAP_TYPE */
1523 /* IFLA_GRE_ENCAP_FLAGS */
1525 /* IFLA_GRE_ENCAP_SPORT */
1527 /* IFLA_GRE_ENCAP_DPORT */
1529 /* IFLA_GRE_COLLECT_METADATA */
1531 /* IFLA_GRE_IGNORE_DF */
1533 /* IFLA_GRE_FWMARK */
1535 /* IFLA_GRE_ERSPAN_INDEX */
1537 /* IFLA_GRE_ERSPAN_VER */
1539 /* IFLA_GRE_ERSPAN_DIR */
1541 /* IFLA_GRE_ERSPAN_HWID */
1546 static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1548 struct ip_tunnel *t = netdev_priv(dev);
1549 struct ip_tunnel_parm *p = &t->parms;
1550 __be16 o_flags = p->o_flags;
1552 if (t->erspan_ver == 1 || t->erspan_ver == 2) {
1554 o_flags |= TUNNEL_KEY;
1556 if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, t->erspan_ver))
1557 goto nla_put_failure;
1559 if (t->erspan_ver == 1) {
1560 if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index))
1561 goto nla_put_failure;
1563 if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, t->dir))
1564 goto nla_put_failure;
1565 if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, t->hwid))
1566 goto nla_put_failure;
1570 if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
1571 nla_put_be16(skb, IFLA_GRE_IFLAGS,
1572 gre_tnl_flags_to_gre_flags(p->i_flags)) ||
1573 nla_put_be16(skb, IFLA_GRE_OFLAGS,
1574 gre_tnl_flags_to_gre_flags(o_flags)) ||
1575 nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
1576 nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
1577 nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
1578 nla_put_in_addr(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
1579 nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
1580 nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
1581 nla_put_u8(skb, IFLA_GRE_PMTUDISC,
1582 !!(p->iph.frag_off & htons(IP_DF))) ||
1583 nla_put_u32(skb, IFLA_GRE_FWMARK, t->fwmark))
1584 goto nla_put_failure;
1586 if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
1588 nla_put_be16(skb, IFLA_GRE_ENCAP_SPORT,
1590 nla_put_be16(skb, IFLA_GRE_ENCAP_DPORT,
1592 nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS,
1594 goto nla_put_failure;
1596 if (nla_put_u8(skb, IFLA_GRE_IGNORE_DF, t->ignore_df))
1597 goto nla_put_failure;
1599 if (t->collect_md) {
1600 if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA))
1601 goto nla_put_failure;
1610 static void erspan_setup(struct net_device *dev)
1612 struct ip_tunnel *t = netdev_priv(dev);
1616 dev->netdev_ops = &erspan_netdev_ops;
1617 dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1618 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1619 ip_tunnel_setup(dev, erspan_net_id);
1623 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1624 [IFLA_GRE_LINK] = { .type = NLA_U32 },
1625 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
1626 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
1627 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
1628 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
1629 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1630 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
1631 [IFLA_GRE_TTL] = { .type = NLA_U8 },
1632 [IFLA_GRE_TOS] = { .type = NLA_U8 },
1633 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
1634 [IFLA_GRE_ENCAP_TYPE] = { .type = NLA_U16 },
1635 [IFLA_GRE_ENCAP_FLAGS] = { .type = NLA_U16 },
1636 [IFLA_GRE_ENCAP_SPORT] = { .type = NLA_U16 },
1637 [IFLA_GRE_ENCAP_DPORT] = { .type = NLA_U16 },
1638 [IFLA_GRE_COLLECT_METADATA] = { .type = NLA_FLAG },
1639 [IFLA_GRE_IGNORE_DF] = { .type = NLA_U8 },
1640 [IFLA_GRE_FWMARK] = { .type = NLA_U32 },
1641 [IFLA_GRE_ERSPAN_INDEX] = { .type = NLA_U32 },
1642 [IFLA_GRE_ERSPAN_VER] = { .type = NLA_U8 },
1643 [IFLA_GRE_ERSPAN_DIR] = { .type = NLA_U8 },
1644 [IFLA_GRE_ERSPAN_HWID] = { .type = NLA_U16 },
1647 static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1649 .maxtype = IFLA_GRE_MAX,
1650 .policy = ipgre_policy,
1651 .priv_size = sizeof(struct ip_tunnel),
1652 .setup = ipgre_tunnel_setup,
1653 .validate = ipgre_tunnel_validate,
1654 .newlink = ipgre_newlink,
1655 .changelink = ipgre_changelink,
1656 .dellink = ip_tunnel_dellink,
1657 .get_size = ipgre_get_size,
1658 .fill_info = ipgre_fill_info,
1659 .get_link_net = ip_tunnel_get_link_net,
1662 static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1664 .maxtype = IFLA_GRE_MAX,
1665 .policy = ipgre_policy,
1666 .priv_size = sizeof(struct ip_tunnel),
1667 .setup = ipgre_tap_setup,
1668 .validate = ipgre_tap_validate,
1669 .newlink = ipgre_newlink,
1670 .changelink = ipgre_changelink,
1671 .dellink = ip_tunnel_dellink,
1672 .get_size = ipgre_get_size,
1673 .fill_info = ipgre_fill_info,
1674 .get_link_net = ip_tunnel_get_link_net,
1677 static struct rtnl_link_ops erspan_link_ops __read_mostly = {
1679 .maxtype = IFLA_GRE_MAX,
1680 .policy = ipgre_policy,
1681 .priv_size = sizeof(struct ip_tunnel),
1682 .setup = erspan_setup,
1683 .validate = erspan_validate,
1684 .newlink = erspan_newlink,
1685 .changelink = erspan_changelink,
1686 .dellink = ip_tunnel_dellink,
1687 .get_size = ipgre_get_size,
1688 .fill_info = ipgre_fill_info,
1689 .get_link_net = ip_tunnel_get_link_net,
1692 struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
1693 u8 name_assign_type)
1695 struct nlattr *tb[IFLA_MAX + 1];
1696 struct net_device *dev;
1697 LIST_HEAD(list_kill);
1698 struct ip_tunnel *t;
1701 memset(&tb, 0, sizeof(tb));
1703 dev = rtnl_create_link(net, name, name_assign_type,
1704 &ipgre_tap_ops, tb);
1708 /* Configure flow based GRE device. */
1709 t = netdev_priv(dev);
1710 t->collect_md = true;
1712 err = ipgre_newlink(net, dev, tb, NULL, NULL);
1715 return ERR_PTR(err);
1718 /* openvswitch users expect packet sizes to be unrestricted,
1719 * so set the largest MTU we can.
1721 err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false);
1725 err = rtnl_configure_link(dev, NULL);
1731 ip_tunnel_dellink(dev, &list_kill);
1732 unregister_netdevice_many(&list_kill);
1733 return ERR_PTR(err);
1735 EXPORT_SYMBOL_GPL(gretap_fb_dev_create);
1737 static int __net_init ipgre_tap_init_net(struct net *net)
1739 return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0");
1742 static void __net_exit ipgre_tap_exit_batch_net(struct list_head *list_net)
1744 ip_tunnel_delete_nets(list_net, gre_tap_net_id, &ipgre_tap_ops);
1747 static struct pernet_operations ipgre_tap_net_ops = {
1748 .init = ipgre_tap_init_net,
1749 .exit_batch = ipgre_tap_exit_batch_net,
1750 .id = &gre_tap_net_id,
1751 .size = sizeof(struct ip_tunnel_net),
1754 static int __net_init erspan_init_net(struct net *net)
1756 return ip_tunnel_init_net(net, erspan_net_id,
1757 &erspan_link_ops, "erspan0");
1760 static void __net_exit erspan_exit_batch_net(struct list_head *net_list)
1762 ip_tunnel_delete_nets(net_list, erspan_net_id, &erspan_link_ops);
1765 static struct pernet_operations erspan_net_ops = {
1766 .init = erspan_init_net,
1767 .exit_batch = erspan_exit_batch_net,
1768 .id = &erspan_net_id,
1769 .size = sizeof(struct ip_tunnel_net),
1772 static int __init ipgre_init(void)
1776 pr_info("GRE over IPv4 tunneling driver\n");
1778 err = register_pernet_device(&ipgre_net_ops);
1782 err = register_pernet_device(&ipgre_tap_net_ops);
1784 goto pnet_tap_failed;
1786 err = register_pernet_device(&erspan_net_ops);
1788 goto pnet_erspan_failed;
1790 err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
1792 pr_info("%s: can't add protocol\n", __func__);
1793 goto add_proto_failed;
1796 err = rtnl_link_register(&ipgre_link_ops);
1798 goto rtnl_link_failed;
1800 err = rtnl_link_register(&ipgre_tap_ops);
1802 goto tap_ops_failed;
1804 err = rtnl_link_register(&erspan_link_ops);
1806 goto erspan_link_failed;
1811 rtnl_link_unregister(&ipgre_tap_ops);
1813 rtnl_link_unregister(&ipgre_link_ops);
1815 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1817 unregister_pernet_device(&erspan_net_ops);
1819 unregister_pernet_device(&ipgre_tap_net_ops);
1821 unregister_pernet_device(&ipgre_net_ops);
1825 static void __exit ipgre_fini(void)
1827 rtnl_link_unregister(&ipgre_tap_ops);
1828 rtnl_link_unregister(&ipgre_link_ops);
1829 rtnl_link_unregister(&erspan_link_ops);
1830 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1831 unregister_pernet_device(&ipgre_tap_net_ops);
1832 unregister_pernet_device(&ipgre_net_ops);
1833 unregister_pernet_device(&erspan_net_ops);
1836 module_init(ipgre_init);
1837 module_exit(ipgre_fini);
1838 MODULE_LICENSE("GPL");
1839 MODULE_ALIAS_RTNL_LINK("gre");
1840 MODULE_ALIAS_RTNL_LINK("gretap");
1841 MODULE_ALIAS_RTNL_LINK("erspan");
1842 MODULE_ALIAS_NETDEV("gre0");
1843 MODULE_ALIAS_NETDEV("gretap0");
1844 MODULE_ALIAS_NETDEV("erspan0");