GNU Linux-libre 4.19.264-gnu1
[releases.git] / drivers / net / ipvlan / ipvlan_core.c
1 /* Copyright (c) 2014 Mahesh Bandewar <maheshb@google.com>
2  *
3  * This program is free software; you can redistribute it and/or
4  * modify it under the terms of the GNU General Public License as
5  * published by the Free Software Foundation; either version 2 of
6  * the License, or (at your option) any later version.
7  *
8  */
9
10 #include "ipvlan.h"
11
12 static u32 ipvlan_jhash_secret __read_mostly;
13
14 void ipvlan_init_secret(void)
15 {
16         net_get_random_once(&ipvlan_jhash_secret, sizeof(ipvlan_jhash_secret));
17 }
18
19 void ipvlan_count_rx(const struct ipvl_dev *ipvlan,
20                             unsigned int len, bool success, bool mcast)
21 {
22         if (likely(success)) {
23                 struct ipvl_pcpu_stats *pcptr;
24
25                 pcptr = this_cpu_ptr(ipvlan->pcpu_stats);
26                 u64_stats_update_begin(&pcptr->syncp);
27                 pcptr->rx_pkts++;
28                 pcptr->rx_bytes += len;
29                 if (mcast)
30                         pcptr->rx_mcast++;
31                 u64_stats_update_end(&pcptr->syncp);
32         } else {
33                 this_cpu_inc(ipvlan->pcpu_stats->rx_errs);
34         }
35 }
36 EXPORT_SYMBOL_GPL(ipvlan_count_rx);
37
38 #if IS_ENABLED(CONFIG_IPV6)
39 static u8 ipvlan_get_v6_hash(const void *iaddr)
40 {
41         const struct in6_addr *ip6_addr = iaddr;
42
43         return __ipv6_addr_jhash(ip6_addr, ipvlan_jhash_secret) &
44                IPVLAN_HASH_MASK;
45 }
46 #else
47 static u8 ipvlan_get_v6_hash(const void *iaddr)
48 {
49         return 0;
50 }
51 #endif
52
53 static u8 ipvlan_get_v4_hash(const void *iaddr)
54 {
55         const struct in_addr *ip4_addr = iaddr;
56
57         return jhash_1word(ip4_addr->s_addr, ipvlan_jhash_secret) &
58                IPVLAN_HASH_MASK;
59 }
60
61 static bool addr_equal(bool is_v6, struct ipvl_addr *addr, const void *iaddr)
62 {
63         if (!is_v6 && addr->atype == IPVL_IPV4) {
64                 struct in_addr *i4addr = (struct in_addr *)iaddr;
65
66                 return addr->ip4addr.s_addr == i4addr->s_addr;
67 #if IS_ENABLED(CONFIG_IPV6)
68         } else if (is_v6 && addr->atype == IPVL_IPV6) {
69                 struct in6_addr *i6addr = (struct in6_addr *)iaddr;
70
71                 return ipv6_addr_equal(&addr->ip6addr, i6addr);
72 #endif
73         }
74
75         return false;
76 }
77
78 static struct ipvl_addr *ipvlan_ht_addr_lookup(const struct ipvl_port *port,
79                                                const void *iaddr, bool is_v6)
80 {
81         struct ipvl_addr *addr;
82         u8 hash;
83
84         hash = is_v6 ? ipvlan_get_v6_hash(iaddr) :
85                ipvlan_get_v4_hash(iaddr);
86         hlist_for_each_entry_rcu(addr, &port->hlhead[hash], hlnode)
87                 if (addr_equal(is_v6, addr, iaddr))
88                         return addr;
89         return NULL;
90 }
91
92 void ipvlan_ht_addr_add(struct ipvl_dev *ipvlan, struct ipvl_addr *addr)
93 {
94         struct ipvl_port *port = ipvlan->port;
95         u8 hash;
96
97         hash = (addr->atype == IPVL_IPV6) ?
98                ipvlan_get_v6_hash(&addr->ip6addr) :
99                ipvlan_get_v4_hash(&addr->ip4addr);
100         if (hlist_unhashed(&addr->hlnode))
101                 hlist_add_head_rcu(&addr->hlnode, &port->hlhead[hash]);
102 }
103
104 void ipvlan_ht_addr_del(struct ipvl_addr *addr)
105 {
106         hlist_del_init_rcu(&addr->hlnode);
107 }
108
109 struct ipvl_addr *ipvlan_find_addr(const struct ipvl_dev *ipvlan,
110                                    const void *iaddr, bool is_v6)
111 {
112         struct ipvl_addr *addr, *ret = NULL;
113
114         rcu_read_lock();
115         list_for_each_entry_rcu(addr, &ipvlan->addrs, anode) {
116                 if (addr_equal(is_v6, addr, iaddr)) {
117                         ret = addr;
118                         break;
119                 }
120         }
121         rcu_read_unlock();
122         return ret;
123 }
124
125 bool ipvlan_addr_busy(struct ipvl_port *port, void *iaddr, bool is_v6)
126 {
127         struct ipvl_dev *ipvlan;
128         bool ret = false;
129
130         rcu_read_lock();
131         list_for_each_entry_rcu(ipvlan, &port->ipvlans, pnode) {
132                 if (ipvlan_find_addr(ipvlan, iaddr, is_v6)) {
133                         ret = true;
134                         break;
135                 }
136         }
137         rcu_read_unlock();
138         return ret;
139 }
140
141 static void *ipvlan_get_L3_hdr(struct ipvl_port *port, struct sk_buff *skb, int *type)
142 {
143         void *lyr3h = NULL;
144
145         switch (skb->protocol) {
146         case htons(ETH_P_ARP): {
147                 struct arphdr *arph;
148
149                 if (unlikely(!pskb_may_pull(skb, arp_hdr_len(port->dev))))
150                         return NULL;
151
152                 arph = arp_hdr(skb);
153                 *type = IPVL_ARP;
154                 lyr3h = arph;
155                 break;
156         }
157         case htons(ETH_P_IP): {
158                 u32 pktlen;
159                 struct iphdr *ip4h;
160
161                 if (unlikely(!pskb_may_pull(skb, sizeof(*ip4h))))
162                         return NULL;
163
164                 ip4h = ip_hdr(skb);
165                 pktlen = ntohs(ip4h->tot_len);
166                 if (ip4h->ihl < 5 || ip4h->version != 4)
167                         return NULL;
168                 if (skb->len < pktlen || pktlen < (ip4h->ihl * 4))
169                         return NULL;
170
171                 *type = IPVL_IPV4;
172                 lyr3h = ip4h;
173                 break;
174         }
175 #if IS_ENABLED(CONFIG_IPV6)
176         case htons(ETH_P_IPV6): {
177                 struct ipv6hdr *ip6h;
178
179                 if (unlikely(!pskb_may_pull(skb, sizeof(*ip6h))))
180                         return NULL;
181
182                 ip6h = ipv6_hdr(skb);
183                 if (ip6h->version != 6)
184                         return NULL;
185
186                 *type = IPVL_IPV6;
187                 lyr3h = ip6h;
188                 /* Only Neighbour Solicitation pkts need different treatment */
189                 if (ipv6_addr_any(&ip6h->saddr) &&
190                     ip6h->nexthdr == NEXTHDR_ICMP) {
191                         struct icmp6hdr *icmph;
192
193                         if (unlikely(!pskb_may_pull(skb, sizeof(*ip6h) + sizeof(*icmph))))
194                                 return NULL;
195
196                         ip6h = ipv6_hdr(skb);
197                         icmph = (struct icmp6hdr *)(ip6h + 1);
198
199                         if (icmph->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION) {
200                                 /* Need to access the ipv6 address in body */
201                                 if (unlikely(!pskb_may_pull(skb, sizeof(*ip6h) + sizeof(*icmph)
202                                                 + sizeof(struct in6_addr))))
203                                         return NULL;
204
205                                 ip6h = ipv6_hdr(skb);
206                                 icmph = (struct icmp6hdr *)(ip6h + 1);
207                         }
208
209                         *type = IPVL_ICMPV6;
210                         lyr3h = icmph;
211                 }
212                 break;
213         }
214 #endif
215         default:
216                 return NULL;
217         }
218
219         return lyr3h;
220 }
221
222 unsigned int ipvlan_mac_hash(const unsigned char *addr)
223 {
224         u32 hash = jhash_1word(__get_unaligned_cpu32(addr+2),
225                                ipvlan_jhash_secret);
226
227         return hash & IPVLAN_MAC_FILTER_MASK;
228 }
229
230 void ipvlan_process_multicast(struct work_struct *work)
231 {
232         struct ipvl_port *port = container_of(work, struct ipvl_port, wq);
233         struct ethhdr *ethh;
234         struct ipvl_dev *ipvlan;
235         struct sk_buff *skb, *nskb;
236         struct sk_buff_head list;
237         unsigned int len;
238         unsigned int mac_hash;
239         int ret;
240         u8 pkt_type;
241         bool tx_pkt;
242
243         __skb_queue_head_init(&list);
244
245         spin_lock_bh(&port->backlog.lock);
246         skb_queue_splice_tail_init(&port->backlog, &list);
247         spin_unlock_bh(&port->backlog.lock);
248
249         while ((skb = __skb_dequeue(&list)) != NULL) {
250                 struct net_device *dev = skb->dev;
251                 bool consumed = false;
252
253                 ethh = eth_hdr(skb);
254                 tx_pkt = IPVL_SKB_CB(skb)->tx_pkt;
255                 mac_hash = ipvlan_mac_hash(ethh->h_dest);
256
257                 if (ether_addr_equal(ethh->h_dest, port->dev->broadcast))
258                         pkt_type = PACKET_BROADCAST;
259                 else
260                         pkt_type = PACKET_MULTICAST;
261
262                 rcu_read_lock();
263                 list_for_each_entry_rcu(ipvlan, &port->ipvlans, pnode) {
264                         if (tx_pkt && (ipvlan->dev == skb->dev))
265                                 continue;
266                         if (!test_bit(mac_hash, ipvlan->mac_filters))
267                                 continue;
268                         if (!(ipvlan->dev->flags & IFF_UP))
269                                 continue;
270                         ret = NET_RX_DROP;
271                         len = skb->len + ETH_HLEN;
272                         nskb = skb_clone(skb, GFP_ATOMIC);
273                         local_bh_disable();
274                         if (nskb) {
275                                 consumed = true;
276                                 nskb->pkt_type = pkt_type;
277                                 nskb->dev = ipvlan->dev;
278                                 if (tx_pkt)
279                                         ret = dev_forward_skb(ipvlan->dev, nskb);
280                                 else
281                                         ret = netif_rx(nskb);
282                         }
283                         ipvlan_count_rx(ipvlan, len, ret == NET_RX_SUCCESS, true);
284                         local_bh_enable();
285                 }
286                 rcu_read_unlock();
287
288                 if (tx_pkt) {
289                         /* If the packet originated here, send it out. */
290                         skb->dev = port->dev;
291                         skb->pkt_type = pkt_type;
292                         dev_queue_xmit(skb);
293                 } else {
294                         if (consumed)
295                                 consume_skb(skb);
296                         else
297                                 kfree_skb(skb);
298                 }
299                 if (dev)
300                         dev_put(dev);
301                 cond_resched();
302         }
303 }
304
305 static void ipvlan_skb_crossing_ns(struct sk_buff *skb, struct net_device *dev)
306 {
307         bool xnet = true;
308
309         if (dev)
310                 xnet = !net_eq(dev_net(skb->dev), dev_net(dev));
311
312         skb_scrub_packet(skb, xnet);
313         if (dev)
314                 skb->dev = dev;
315 }
316
317 static int ipvlan_rcv_frame(struct ipvl_addr *addr, struct sk_buff **pskb,
318                             bool local)
319 {
320         struct ipvl_dev *ipvlan = addr->master;
321         struct net_device *dev = ipvlan->dev;
322         unsigned int len;
323         rx_handler_result_t ret = RX_HANDLER_CONSUMED;
324         bool success = false;
325         struct sk_buff *skb = *pskb;
326
327         len = skb->len + ETH_HLEN;
328         /* Only packets exchanged between two local slaves need to have
329          * device-up check as well as skb-share check.
330          */
331         if (local) {
332                 if (unlikely(!(dev->flags & IFF_UP))) {
333                         kfree_skb(skb);
334                         goto out;
335                 }
336
337                 skb = skb_share_check(skb, GFP_ATOMIC);
338                 if (!skb)
339                         goto out;
340
341                 *pskb = skb;
342         }
343
344         if (local) {
345                 skb->pkt_type = PACKET_HOST;
346                 if (dev_forward_skb(ipvlan->dev, skb) == NET_RX_SUCCESS)
347                         success = true;
348         } else {
349                 skb->dev = dev;
350                 ret = RX_HANDLER_ANOTHER;
351                 success = true;
352         }
353
354 out:
355         ipvlan_count_rx(ipvlan, len, success, false);
356         return ret;
357 }
358
359 static struct ipvl_addr *ipvlan_addr_lookup(struct ipvl_port *port,
360                                             void *lyr3h, int addr_type,
361                                             bool use_dest)
362 {
363         struct ipvl_addr *addr = NULL;
364
365         switch (addr_type) {
366 #if IS_ENABLED(CONFIG_IPV6)
367         case IPVL_IPV6: {
368                 struct ipv6hdr *ip6h;
369                 struct in6_addr *i6addr;
370
371                 ip6h = (struct ipv6hdr *)lyr3h;
372                 i6addr = use_dest ? &ip6h->daddr : &ip6h->saddr;
373                 addr = ipvlan_ht_addr_lookup(port, i6addr, true);
374                 break;
375         }
376         case IPVL_ICMPV6: {
377                 struct nd_msg *ndmh;
378                 struct in6_addr *i6addr;
379
380                 /* Make sure that the NeighborSolicitation ICMPv6 packets
381                  * are handled to avoid DAD issue.
382                  */
383                 ndmh = (struct nd_msg *)lyr3h;
384                 if (ndmh->icmph.icmp6_type == NDISC_NEIGHBOUR_SOLICITATION) {
385                         i6addr = &ndmh->target;
386                         addr = ipvlan_ht_addr_lookup(port, i6addr, true);
387                 }
388                 break;
389         }
390 #endif
391         case IPVL_IPV4: {
392                 struct iphdr *ip4h;
393                 __be32 *i4addr;
394
395                 ip4h = (struct iphdr *)lyr3h;
396                 i4addr = use_dest ? &ip4h->daddr : &ip4h->saddr;
397                 addr = ipvlan_ht_addr_lookup(port, i4addr, false);
398                 break;
399         }
400         case IPVL_ARP: {
401                 struct arphdr *arph;
402                 unsigned char *arp_ptr;
403                 __be32 dip;
404
405                 arph = (struct arphdr *)lyr3h;
406                 arp_ptr = (unsigned char *)(arph + 1);
407                 if (use_dest)
408                         arp_ptr += (2 * port->dev->addr_len) + 4;
409                 else
410                         arp_ptr += port->dev->addr_len;
411
412                 memcpy(&dip, arp_ptr, 4);
413                 addr = ipvlan_ht_addr_lookup(port, &dip, false);
414                 break;
415         }
416         }
417
418         return addr;
419 }
420
421 static int ipvlan_process_v4_outbound(struct sk_buff *skb)
422 {
423         const struct iphdr *ip4h = ip_hdr(skb);
424         struct net_device *dev = skb->dev;
425         struct net *net = dev_net(dev);
426         struct rtable *rt;
427         int err, ret = NET_XMIT_DROP;
428         struct flowi4 fl4 = {
429                 .flowi4_oif = dev->ifindex,
430                 .flowi4_tos = RT_TOS(ip4h->tos),
431                 .flowi4_flags = FLOWI_FLAG_ANYSRC,
432                 .flowi4_mark = skb->mark,
433                 .daddr = ip4h->daddr,
434                 .saddr = ip4h->saddr,
435         };
436
437         rt = ip_route_output_flow(net, &fl4, NULL);
438         if (IS_ERR(rt))
439                 goto err;
440
441         if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) {
442                 ip_rt_put(rt);
443                 goto err;
444         }
445         skb_dst_set(skb, &rt->dst);
446         err = ip_local_out(net, skb->sk, skb);
447         if (unlikely(net_xmit_eval(err)))
448                 dev->stats.tx_errors++;
449         else
450                 ret = NET_XMIT_SUCCESS;
451         goto out;
452 err:
453         dev->stats.tx_errors++;
454         kfree_skb(skb);
455 out:
456         return ret;
457 }
458
459 #if IS_ENABLED(CONFIG_IPV6)
460 static int ipvlan_process_v6_outbound(struct sk_buff *skb)
461 {
462         const struct ipv6hdr *ip6h = ipv6_hdr(skb);
463         struct net_device *dev = skb->dev;
464         struct net *net = dev_net(dev);
465         struct dst_entry *dst;
466         int err, ret = NET_XMIT_DROP;
467         struct flowi6 fl6 = {
468                 .flowi6_oif = dev->ifindex,
469                 .daddr = ip6h->daddr,
470                 .saddr = ip6h->saddr,
471                 .flowi6_flags = FLOWI_FLAG_ANYSRC,
472                 .flowlabel = ip6_flowinfo(ip6h),
473                 .flowi6_mark = skb->mark,
474                 .flowi6_proto = ip6h->nexthdr,
475         };
476
477         dst = ip6_route_output(net, NULL, &fl6);
478         if (dst->error) {
479                 ret = dst->error;
480                 dst_release(dst);
481                 goto err;
482         }
483         skb_dst_set(skb, dst);
484         err = ip6_local_out(net, skb->sk, skb);
485         if (unlikely(net_xmit_eval(err)))
486                 dev->stats.tx_errors++;
487         else
488                 ret = NET_XMIT_SUCCESS;
489         goto out;
490 err:
491         dev->stats.tx_errors++;
492         kfree_skb(skb);
493 out:
494         return ret;
495 }
496 #else
497 static int ipvlan_process_v6_outbound(struct sk_buff *skb)
498 {
499         return NET_XMIT_DROP;
500 }
501 #endif
502
503 static int ipvlan_process_outbound(struct sk_buff *skb)
504 {
505         int ret = NET_XMIT_DROP;
506
507         /* The ipvlan is a pseudo-L2 device, so the packets that we receive
508          * will have L2; which need to discarded and processed further
509          * in the net-ns of the main-device.
510          */
511         if (skb_mac_header_was_set(skb)) {
512                 /* In this mode we dont care about
513                  * multicast and broadcast traffic */
514                 struct ethhdr *ethh = eth_hdr(skb);
515
516                 if (is_multicast_ether_addr(ethh->h_dest)) {
517                         pr_debug_ratelimited(
518                                 "Dropped {multi|broad}cast of type=[%x]\n",
519                                 ntohs(skb->protocol));
520                         kfree_skb(skb);
521                         goto out;
522                 }
523
524                 skb_pull(skb, sizeof(*ethh));
525                 skb->mac_header = (typeof(skb->mac_header))~0U;
526                 skb_reset_network_header(skb);
527         }
528
529         if (skb->protocol == htons(ETH_P_IPV6))
530                 ret = ipvlan_process_v6_outbound(skb);
531         else if (skb->protocol == htons(ETH_P_IP))
532                 ret = ipvlan_process_v4_outbound(skb);
533         else {
534                 pr_warn_ratelimited("Dropped outbound packet type=%x\n",
535                                     ntohs(skb->protocol));
536                 kfree_skb(skb);
537         }
538 out:
539         return ret;
540 }
541
542 static void ipvlan_multicast_enqueue(struct ipvl_port *port,
543                                      struct sk_buff *skb, bool tx_pkt)
544 {
545         if (skb->protocol == htons(ETH_P_PAUSE)) {
546                 kfree_skb(skb);
547                 return;
548         }
549
550         /* Record that the deferred packet is from TX or RX path. By
551          * looking at mac-addresses on packet will lead to erronus decisions.
552          * (This would be true for a loopback-mode on master device or a
553          * hair-pin mode of the switch.)
554          */
555         IPVL_SKB_CB(skb)->tx_pkt = tx_pkt;
556
557         spin_lock(&port->backlog.lock);
558         if (skb_queue_len(&port->backlog) < IPVLAN_QBACKLOG_LIMIT) {
559                 if (skb->dev)
560                         dev_hold(skb->dev);
561                 __skb_queue_tail(&port->backlog, skb);
562                 spin_unlock(&port->backlog.lock);
563                 schedule_work(&port->wq);
564         } else {
565                 spin_unlock(&port->backlog.lock);
566                 atomic_long_inc(&skb->dev->rx_dropped);
567                 kfree_skb(skb);
568         }
569 }
570
571 static int ipvlan_xmit_mode_l3(struct sk_buff *skb, struct net_device *dev)
572 {
573         const struct ipvl_dev *ipvlan = netdev_priv(dev);
574         void *lyr3h;
575         struct ipvl_addr *addr;
576         int addr_type;
577
578         lyr3h = ipvlan_get_L3_hdr(ipvlan->port, skb, &addr_type);
579         if (!lyr3h)
580                 goto out;
581
582         if (!ipvlan_is_vepa(ipvlan->port)) {
583                 addr = ipvlan_addr_lookup(ipvlan->port, lyr3h, addr_type, true);
584                 if (addr) {
585                         if (ipvlan_is_private(ipvlan->port)) {
586                                 consume_skb(skb);
587                                 return NET_XMIT_DROP;
588                         }
589                         return ipvlan_rcv_frame(addr, &skb, true);
590                 }
591         }
592 out:
593         ipvlan_skb_crossing_ns(skb, ipvlan->phy_dev);
594         return ipvlan_process_outbound(skb);
595 }
596
597 static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev)
598 {
599         const struct ipvl_dev *ipvlan = netdev_priv(dev);
600         struct ethhdr *eth = skb_eth_hdr(skb);
601         struct ipvl_addr *addr;
602         void *lyr3h;
603         int addr_type;
604
605         if (!ipvlan_is_vepa(ipvlan->port) &&
606             ether_addr_equal(eth->h_dest, eth->h_source)) {
607                 lyr3h = ipvlan_get_L3_hdr(ipvlan->port, skb, &addr_type);
608                 if (lyr3h) {
609                         addr = ipvlan_addr_lookup(ipvlan->port, lyr3h, addr_type, true);
610                         if (addr) {
611                                 if (ipvlan_is_private(ipvlan->port)) {
612                                         consume_skb(skb);
613                                         return NET_XMIT_DROP;
614                                 }
615                                 return ipvlan_rcv_frame(addr, &skb, true);
616                         }
617                 }
618                 skb = skb_share_check(skb, GFP_ATOMIC);
619                 if (!skb)
620                         return NET_XMIT_DROP;
621
622                 /* Packet definitely does not belong to any of the
623                  * virtual devices, but the dest is local. So forward
624                  * the skb for the main-dev. At the RX side we just return
625                  * RX_PASS for it to be processed further on the stack.
626                  */
627                 return dev_forward_skb(ipvlan->phy_dev, skb);
628
629         } else if (is_multicast_ether_addr(eth->h_dest)) {
630                 skb_reset_mac_header(skb);
631                 ipvlan_skb_crossing_ns(skb, NULL);
632                 ipvlan_multicast_enqueue(ipvlan->port, skb, true);
633                 return NET_XMIT_SUCCESS;
634         }
635
636         skb->dev = ipvlan->phy_dev;
637         return dev_queue_xmit(skb);
638 }
639
640 int ipvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev)
641 {
642         struct ipvl_dev *ipvlan = netdev_priv(dev);
643         struct ipvl_port *port = ipvlan_port_get_rcu_bh(ipvlan->phy_dev);
644
645         if (!port)
646                 goto out;
647
648         if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr))))
649                 goto out;
650
651         switch(port->mode) {
652         case IPVLAN_MODE_L2:
653                 return ipvlan_xmit_mode_l2(skb, dev);
654         case IPVLAN_MODE_L3:
655         case IPVLAN_MODE_L3S:
656                 return ipvlan_xmit_mode_l3(skb, dev);
657         }
658
659         /* Should not reach here */
660         WARN_ONCE(true, "ipvlan_queue_xmit() called for mode = [%hx]\n",
661                           port->mode);
662 out:
663         kfree_skb(skb);
664         return NET_XMIT_DROP;
665 }
666
667 static bool ipvlan_external_frame(struct sk_buff *skb, struct ipvl_port *port)
668 {
669         struct ethhdr *eth = eth_hdr(skb);
670         struct ipvl_addr *addr;
671         void *lyr3h;
672         int addr_type;
673
674         if (ether_addr_equal(eth->h_source, skb->dev->dev_addr)) {
675                 lyr3h = ipvlan_get_L3_hdr(port, skb, &addr_type);
676                 if (!lyr3h)
677                         return true;
678
679                 addr = ipvlan_addr_lookup(port, lyr3h, addr_type, false);
680                 if (addr)
681                         return false;
682         }
683
684         return true;
685 }
686
687 static rx_handler_result_t ipvlan_handle_mode_l3(struct sk_buff **pskb,
688                                                  struct ipvl_port *port)
689 {
690         void *lyr3h;
691         int addr_type;
692         struct ipvl_addr *addr;
693         struct sk_buff *skb = *pskb;
694         rx_handler_result_t ret = RX_HANDLER_PASS;
695
696         lyr3h = ipvlan_get_L3_hdr(port, skb, &addr_type);
697         if (!lyr3h)
698                 goto out;
699
700         addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true);
701         if (addr)
702                 ret = ipvlan_rcv_frame(addr, pskb, false);
703
704 out:
705         return ret;
706 }
707
708 static rx_handler_result_t ipvlan_handle_mode_l2(struct sk_buff **pskb,
709                                                  struct ipvl_port *port)
710 {
711         struct sk_buff *skb = *pskb;
712         struct ethhdr *eth = eth_hdr(skb);
713         rx_handler_result_t ret = RX_HANDLER_PASS;
714
715         if (is_multicast_ether_addr(eth->h_dest)) {
716                 if (ipvlan_external_frame(skb, port)) {
717                         struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
718
719                         /* External frames are queued for device local
720                          * distribution, but a copy is given to master
721                          * straight away to avoid sending duplicates later
722                          * when work-queue processes this frame. This is
723                          * achieved by returning RX_HANDLER_PASS.
724                          */
725                         if (nskb) {
726                                 ipvlan_skb_crossing_ns(nskb, NULL);
727                                 ipvlan_multicast_enqueue(port, nskb, false);
728                         }
729                 }
730         } else {
731                 /* Perform like l3 mode for non-multicast packet */
732                 ret = ipvlan_handle_mode_l3(pskb, port);
733         }
734
735         return ret;
736 }
737
738 rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb)
739 {
740         struct sk_buff *skb = *pskb;
741         struct ipvl_port *port = ipvlan_port_get_rcu(skb->dev);
742
743         if (!port)
744                 return RX_HANDLER_PASS;
745
746         switch (port->mode) {
747         case IPVLAN_MODE_L2:
748                 return ipvlan_handle_mode_l2(pskb, port);
749         case IPVLAN_MODE_L3:
750                 return ipvlan_handle_mode_l3(pskb, port);
751         case IPVLAN_MODE_L3S:
752                 return RX_HANDLER_PASS;
753         }
754
755         /* Should not reach here */
756         WARN_ONCE(true, "ipvlan_handle_frame() called for mode = [%hx]\n",
757                           port->mode);
758         kfree_skb(skb);
759         return RX_HANDLER_CONSUMED;
760 }
761
762 static struct ipvl_addr *ipvlan_skb_to_addr(struct sk_buff *skb,
763                                             struct net_device *dev)
764 {
765         struct ipvl_addr *addr = NULL;
766         struct ipvl_port *port;
767         void *lyr3h;
768         int addr_type;
769
770         if (!dev || !netif_is_ipvlan_port(dev))
771                 goto out;
772
773         port = ipvlan_port_get_rcu(dev);
774         if (!port || port->mode != IPVLAN_MODE_L3S)
775                 goto out;
776
777         lyr3h = ipvlan_get_L3_hdr(port, skb, &addr_type);
778         if (!lyr3h)
779                 goto out;
780
781         addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true);
782 out:
783         return addr;
784 }
785
786 struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, struct sk_buff *skb,
787                               u16 proto)
788 {
789         struct ipvl_addr *addr;
790         struct net_device *sdev;
791
792         addr = ipvlan_skb_to_addr(skb, dev);
793         if (!addr)
794                 goto out;
795
796         sdev = addr->master->dev;
797         switch (proto) {
798         case AF_INET:
799         {
800                 int err;
801                 struct iphdr *ip4h = ip_hdr(skb);
802
803                 err = ip_route_input_noref(skb, ip4h->daddr, ip4h->saddr,
804                                            ip4h->tos, sdev);
805                 if (unlikely(err))
806                         goto out;
807                 break;
808         }
809 #if IS_ENABLED(CONFIG_IPV6)
810         case AF_INET6:
811         {
812                 struct dst_entry *dst;
813                 struct ipv6hdr *ip6h = ipv6_hdr(skb);
814                 int flags = RT6_LOOKUP_F_HAS_SADDR;
815                 struct flowi6 fl6 = {
816                         .flowi6_iif   = sdev->ifindex,
817                         .daddr        = ip6h->daddr,
818                         .saddr        = ip6h->saddr,
819                         .flowlabel    = ip6_flowinfo(ip6h),
820                         .flowi6_mark  = skb->mark,
821                         .flowi6_proto = ip6h->nexthdr,
822                 };
823
824                 skb_dst_drop(skb);
825                 dst = ip6_route_input_lookup(dev_net(sdev), sdev, &fl6,
826                                              skb, flags);
827                 skb_dst_set(skb, dst);
828                 break;
829         }
830 #endif
831         default:
832                 break;
833         }
834
835 out:
836         return skb;
837 }
838
839 unsigned int ipvlan_nf_input(void *priv, struct sk_buff *skb,
840                              const struct nf_hook_state *state)
841 {
842         struct ipvl_addr *addr;
843         unsigned int len;
844
845         addr = ipvlan_skb_to_addr(skb, skb->dev);
846         if (!addr)
847                 goto out;
848
849         skb->dev = addr->master->dev;
850         len = skb->len + ETH_HLEN;
851         ipvlan_count_rx(addr->master, len, true, false);
852 out:
853         return NF_ACCEPT;
854 }