GNU Linux-libre 4.14.290-gnu1
[releases.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13
14 /*      Changes:
15  *
16  *      YOSHIFUJI Hideaki @USAGI
17  *              reworked default router selection.
18  *              - respect outgoing interface
19  *              - select from (probably) reachable routers (i.e.
20  *              routers in REACHABLE, STALE, DELAY or PROBE states).
21  *              - always select the same router if it is (probably)
22  *              reachable.  otherwise, round-robin the list.
23  *      Ville Nuorvala
24  *              Fixed routing subtrees.
25  */
26
27 #define pr_fmt(fmt) "IPv6: " fmt
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/dst_metadata.h>
58 #include <net/xfrm.h>
59 #include <net/netevent.h>
60 #include <net/netlink.h>
61 #include <net/nexthop.h>
62 #include <net/lwtunnel.h>
63 #include <net/ip_tunnels.h>
64 #include <net/l3mdev.h>
65 #include <trace/events/fib6.h>
66
67 #include <linux/uaccess.h>
68
69 #ifdef CONFIG_SYSCTL
70 #include <linux/sysctl.h>
71 #endif
72
73 enum rt6_nud_state {
74         RT6_NUD_FAIL_HARD = -3,
75         RT6_NUD_FAIL_PROBE = -2,
76         RT6_NUD_FAIL_DO_RR = -1,
77         RT6_NUD_SUCCEED = 1
78 };
79
80 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
81 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
82 static unsigned int      ip6_default_advmss(const struct dst_entry *dst);
83 static unsigned int      ip6_mtu(const struct dst_entry *dst);
84 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
85 static void             ip6_dst_destroy(struct dst_entry *);
86 static void             ip6_dst_ifdown(struct dst_entry *,
87                                        struct net_device *dev, int how);
88 static int               ip6_dst_gc(struct dst_ops *ops);
89
90 static int              ip6_pkt_discard(struct sk_buff *skb);
91 static int              ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
92 static int              ip6_pkt_prohibit(struct sk_buff *skb);
93 static int              ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
94 static void             ip6_link_failure(struct sk_buff *skb);
95 static void             ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
96                                            struct sk_buff *skb, u32 mtu,
97                                            bool confirm_neigh);
98 static void             rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
99                                         struct sk_buff *skb);
100 static void             rt6_dst_from_metrics_check(struct rt6_info *rt);
101 static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
102 static size_t rt6_nlmsg_size(struct rt6_info *rt);
103 static int rt6_fill_node(struct net *net,
104                          struct sk_buff *skb, struct rt6_info *rt,
105                          struct in6_addr *dst, struct in6_addr *src,
106                          int iif, int type, u32 portid, u32 seq,
107                          unsigned int flags);
108
109 #ifdef CONFIG_IPV6_ROUTE_INFO
110 static struct rt6_info *rt6_add_route_info(struct net *net,
111                                            const struct in6_addr *prefix, int prefixlen,
112                                            const struct in6_addr *gwaddr,
113                                            struct net_device *dev,
114                                            unsigned int pref);
115 static struct rt6_info *rt6_get_route_info(struct net *net,
116                                            const struct in6_addr *prefix, int prefixlen,
117                                            const struct in6_addr *gwaddr,
118                                            struct net_device *dev);
119 #endif
120
121 struct uncached_list {
122         spinlock_t              lock;
123         struct list_head        head;
124 };
125
126 static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
127
128 static void rt6_uncached_list_add(struct rt6_info *rt)
129 {
130         struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
131
132         rt->rt6i_uncached_list = ul;
133
134         spin_lock_bh(&ul->lock);
135         list_add_tail(&rt->rt6i_uncached, &ul->head);
136         spin_unlock_bh(&ul->lock);
137 }
138
139 static void rt6_uncached_list_del(struct rt6_info *rt)
140 {
141         if (!list_empty(&rt->rt6i_uncached)) {
142                 struct uncached_list *ul = rt->rt6i_uncached_list;
143
144                 spin_lock_bh(&ul->lock);
145                 list_del(&rt->rt6i_uncached);
146                 spin_unlock_bh(&ul->lock);
147         }
148 }
149
150 static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
151 {
152         struct net_device *loopback_dev = net->loopback_dev;
153         int cpu;
154
155         if (dev == loopback_dev)
156                 return;
157
158         for_each_possible_cpu(cpu) {
159                 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
160                 struct rt6_info *rt;
161
162                 spin_lock_bh(&ul->lock);
163                 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
164                         struct inet6_dev *rt_idev = rt->rt6i_idev;
165                         struct net_device *rt_dev = rt->dst.dev;
166
167                         if (rt_idev->dev == dev) {
168                                 rt->rt6i_idev = in6_dev_get(loopback_dev);
169                                 in6_dev_put(rt_idev);
170                         }
171
172                         if (rt_dev == dev) {
173                                 rt->dst.dev = loopback_dev;
174                                 dev_hold(rt->dst.dev);
175                                 dev_put(rt_dev);
176                         }
177                 }
178                 spin_unlock_bh(&ul->lock);
179         }
180 }
181
182 static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
183 {
184         return dst_metrics_write_ptr(rt->dst.from);
185 }
186
187 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
188 {
189         struct rt6_info *rt = (struct rt6_info *)dst;
190
191         if (rt->rt6i_flags & RTF_PCPU)
192                 return rt6_pcpu_cow_metrics(rt);
193         else if (rt->rt6i_flags & RTF_CACHE)
194                 return NULL;
195         else
196                 return dst_cow_metrics_generic(dst, old);
197 }
198
199 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
200                                              struct sk_buff *skb,
201                                              const void *daddr)
202 {
203         struct in6_addr *p = &rt->rt6i_gateway;
204
205         if (!ipv6_addr_any(p))
206                 return (const void *) p;
207         else if (skb)
208                 return &ipv6_hdr(skb)->daddr;
209         return daddr;
210 }
211
212 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
213                                           struct sk_buff *skb,
214                                           const void *daddr)
215 {
216         struct rt6_info *rt = (struct rt6_info *) dst;
217         struct neighbour *n;
218
219         daddr = choose_neigh_daddr(rt, skb, daddr);
220         n = __ipv6_neigh_lookup(dst->dev, daddr);
221         if (n)
222                 return n;
223         return neigh_create(&nd_tbl, daddr, dst->dev);
224 }
225
226 static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
227 {
228         struct net_device *dev = dst->dev;
229         struct rt6_info *rt = (struct rt6_info *)dst;
230
231         daddr = choose_neigh_daddr(rt, NULL, daddr);
232         if (!daddr)
233                 return;
234         if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
235                 return;
236         if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
237                 return;
238         __ipv6_confirm_neigh(dev, daddr);
239 }
240
241 static struct dst_ops ip6_dst_ops_template = {
242         .family                 =       AF_INET6,
243         .gc                     =       ip6_dst_gc,
244         .gc_thresh              =       1024,
245         .check                  =       ip6_dst_check,
246         .default_advmss         =       ip6_default_advmss,
247         .mtu                    =       ip6_mtu,
248         .cow_metrics            =       ipv6_cow_metrics,
249         .destroy                =       ip6_dst_destroy,
250         .ifdown                 =       ip6_dst_ifdown,
251         .negative_advice        =       ip6_negative_advice,
252         .link_failure           =       ip6_link_failure,
253         .update_pmtu            =       ip6_rt_update_pmtu,
254         .redirect               =       rt6_do_redirect,
255         .local_out              =       __ip6_local_out,
256         .neigh_lookup           =       ip6_neigh_lookup,
257         .confirm_neigh          =       ip6_confirm_neigh,
258 };
259
260 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
261 {
262         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
263
264         return mtu ? : dst->dev->mtu;
265 }
266
267 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
268                                          struct sk_buff *skb, u32 mtu,
269                                          bool confirm_neigh)
270 {
271 }
272
273 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
274                                       struct sk_buff *skb)
275 {
276 }
277
278 static struct dst_ops ip6_dst_blackhole_ops = {
279         .family                 =       AF_INET6,
280         .destroy                =       ip6_dst_destroy,
281         .check                  =       ip6_dst_check,
282         .mtu                    =       ip6_blackhole_mtu,
283         .default_advmss         =       ip6_default_advmss,
284         .update_pmtu            =       ip6_rt_blackhole_update_pmtu,
285         .redirect               =       ip6_rt_blackhole_redirect,
286         .cow_metrics            =       dst_cow_metrics_generic,
287         .neigh_lookup           =       ip6_neigh_lookup,
288 };
289
290 static const u32 ip6_template_metrics[RTAX_MAX] = {
291         [RTAX_HOPLIMIT - 1] = 0,
292 };
293
294 static const struct rt6_info ip6_null_entry_template = {
295         .dst = {
296                 .__refcnt       = ATOMIC_INIT(1),
297                 .__use          = 1,
298                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
299                 .error          = -ENETUNREACH,
300                 .input          = ip6_pkt_discard,
301                 .output         = ip6_pkt_discard_out,
302         },
303         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
304         .rt6i_protocol  = RTPROT_KERNEL,
305         .rt6i_metric    = ~(u32) 0,
306         .rt6i_ref       = ATOMIC_INIT(1),
307 };
308
309 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
310
311 static const struct rt6_info ip6_prohibit_entry_template = {
312         .dst = {
313                 .__refcnt       = ATOMIC_INIT(1),
314                 .__use          = 1,
315                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
316                 .error          = -EACCES,
317                 .input          = ip6_pkt_prohibit,
318                 .output         = ip6_pkt_prohibit_out,
319         },
320         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
321         .rt6i_protocol  = RTPROT_KERNEL,
322         .rt6i_metric    = ~(u32) 0,
323         .rt6i_ref       = ATOMIC_INIT(1),
324 };
325
326 static const struct rt6_info ip6_blk_hole_entry_template = {
327         .dst = {
328                 .__refcnt       = ATOMIC_INIT(1),
329                 .__use          = 1,
330                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
331                 .error          = -EINVAL,
332                 .input          = dst_discard,
333                 .output         = dst_discard_out,
334         },
335         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
336         .rt6i_protocol  = RTPROT_KERNEL,
337         .rt6i_metric    = ~(u32) 0,
338         .rt6i_ref       = ATOMIC_INIT(1),
339 };
340
341 #endif
342
343 static void rt6_info_init(struct rt6_info *rt)
344 {
345         struct dst_entry *dst = &rt->dst;
346
347         memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
348         INIT_LIST_HEAD(&rt->rt6i_siblings);
349         INIT_LIST_HEAD(&rt->rt6i_uncached);
350 }
351
352 /* allocate dst with ip6_dst_ops */
353 static struct rt6_info *__ip6_dst_alloc(struct net *net,
354                                         struct net_device *dev,
355                                         int flags)
356 {
357         struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
358                                         1, DST_OBSOLETE_FORCE_CHK, flags);
359
360         if (rt)
361                 rt6_info_init(rt);
362
363         return rt;
364 }
365
366 struct rt6_info *ip6_dst_alloc(struct net *net,
367                                struct net_device *dev,
368                                int flags)
369 {
370         struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
371
372         if (rt) {
373                 rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
374                 if (rt->rt6i_pcpu) {
375                         int cpu;
376
377                         for_each_possible_cpu(cpu) {
378                                 struct rt6_info **p;
379
380                                 p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
381                                 /* no one shares rt */
382                                 *p =  NULL;
383                         }
384                 } else {
385                         dst_release_immediate(&rt->dst);
386                         return NULL;
387                 }
388         }
389
390         return rt;
391 }
392 EXPORT_SYMBOL(ip6_dst_alloc);
393
394 static void ip6_dst_destroy(struct dst_entry *dst)
395 {
396         struct rt6_info *rt = (struct rt6_info *)dst;
397         struct dst_entry *from = dst->from;
398         struct inet6_dev *idev;
399
400         dst_destroy_metrics_generic(dst);
401         free_percpu(rt->rt6i_pcpu);
402         rt6_uncached_list_del(rt);
403
404         idev = rt->rt6i_idev;
405         if (idev) {
406                 rt->rt6i_idev = NULL;
407                 in6_dev_put(idev);
408         }
409
410         dst->from = NULL;
411         dst_release(from);
412 }
413
414 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
415                            int how)
416 {
417         struct rt6_info *rt = (struct rt6_info *)dst;
418         struct inet6_dev *idev = rt->rt6i_idev;
419         struct net_device *loopback_dev =
420                 dev_net(dev)->loopback_dev;
421
422         if (idev && idev->dev != loopback_dev) {
423                 struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
424                 if (loopback_idev) {
425                         rt->rt6i_idev = loopback_idev;
426                         in6_dev_put(idev);
427                 }
428         }
429 }
430
431 static bool __rt6_check_expired(const struct rt6_info *rt)
432 {
433         if (rt->rt6i_flags & RTF_EXPIRES)
434                 return time_after(jiffies, rt->dst.expires);
435         else
436                 return false;
437 }
438
439 static bool rt6_check_expired(const struct rt6_info *rt)
440 {
441         if (rt->rt6i_flags & RTF_EXPIRES) {
442                 if (time_after(jiffies, rt->dst.expires))
443                         return true;
444         } else if (rt->dst.from) {
445                 return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
446                        rt6_check_expired((struct rt6_info *)rt->dst.from);
447         }
448         return false;
449 }
450
451 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
452                                              struct flowi6 *fl6, int oif,
453                                              int strict)
454 {
455         struct rt6_info *sibling, *next_sibling;
456         int route_choosen;
457
458         /* We might have already computed the hash for ICMPv6 errors. In such
459          * case it will always be non-zero. Otherwise now is the time to do it.
460          */
461         if (!fl6->mp_hash)
462                 fl6->mp_hash = rt6_multipath_hash(fl6, NULL);
463
464         route_choosen = fl6->mp_hash % (match->rt6i_nsiblings + 1);
465         /* Don't change the route, if route_choosen == 0
466          * (siblings does not include ourself)
467          */
468         if (route_choosen)
469                 list_for_each_entry_safe(sibling, next_sibling,
470                                 &match->rt6i_siblings, rt6i_siblings) {
471                         route_choosen--;
472                         if (route_choosen == 0) {
473                                 if (rt6_score_route(sibling, oif, strict) < 0)
474                                         break;
475                                 match = sibling;
476                                 break;
477                         }
478                 }
479         return match;
480 }
481
482 /*
483  *      Route lookup. Any table->tb6_lock is implied.
484  */
485
486 static inline struct rt6_info *rt6_device_match(struct net *net,
487                                                     struct rt6_info *rt,
488                                                     const struct in6_addr *saddr,
489                                                     int oif,
490                                                     int flags)
491 {
492         struct rt6_info *local = NULL;
493         struct rt6_info *sprt;
494
495         if (!oif && ipv6_addr_any(saddr))
496                 goto out;
497
498         for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
499                 struct net_device *dev = sprt->dst.dev;
500
501                 if (oif) {
502                         if (dev->ifindex == oif)
503                                 return sprt;
504                         if (dev->flags & IFF_LOOPBACK) {
505                                 if (!sprt->rt6i_idev ||
506                                     sprt->rt6i_idev->dev->ifindex != oif) {
507                                         if (flags & RT6_LOOKUP_F_IFACE)
508                                                 continue;
509                                         if (local &&
510                                             local->rt6i_idev->dev->ifindex == oif)
511                                                 continue;
512                                 }
513                                 local = sprt;
514                         }
515                 } else {
516                         if (ipv6_chk_addr(net, saddr, dev,
517                                           flags & RT6_LOOKUP_F_IFACE))
518                                 return sprt;
519                 }
520         }
521
522         if (oif) {
523                 if (local)
524                         return local;
525
526                 if (flags & RT6_LOOKUP_F_IFACE)
527                         return net->ipv6.ip6_null_entry;
528         }
529 out:
530         return rt;
531 }
532
533 #ifdef CONFIG_IPV6_ROUTER_PREF
534 struct __rt6_probe_work {
535         struct work_struct work;
536         struct in6_addr target;
537         struct net_device *dev;
538 };
539
540 static void rt6_probe_deferred(struct work_struct *w)
541 {
542         struct in6_addr mcaddr;
543         struct __rt6_probe_work *work =
544                 container_of(w, struct __rt6_probe_work, work);
545
546         addrconf_addr_solict_mult(&work->target, &mcaddr);
547         ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
548         dev_put(work->dev);
549         kfree(work);
550 }
551
552 static void rt6_probe(struct rt6_info *rt)
553 {
554         struct __rt6_probe_work *work;
555         struct neighbour *neigh;
556         /*
557          * Okay, this does not seem to be appropriate
558          * for now, however, we need to check if it
559          * is really so; aka Router Reachability Probing.
560          *
561          * Router Reachability Probe MUST be rate-limited
562          * to no more than one per minute.
563          */
564         if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
565                 return;
566         rcu_read_lock_bh();
567         neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
568         if (neigh) {
569                 if (neigh->nud_state & NUD_VALID)
570                         goto out;
571
572                 work = NULL;
573                 write_lock(&neigh->lock);
574                 if (!(neigh->nud_state & NUD_VALID) &&
575                     time_after(jiffies,
576                                neigh->updated +
577                                rt->rt6i_idev->cnf.rtr_probe_interval)) {
578                         work = kmalloc(sizeof(*work), GFP_ATOMIC);
579                         if (work)
580                                 __neigh_set_probe_once(neigh);
581                 }
582                 write_unlock(&neigh->lock);
583         } else {
584                 work = kmalloc(sizeof(*work), GFP_ATOMIC);
585         }
586
587         if (work) {
588                 INIT_WORK(&work->work, rt6_probe_deferred);
589                 work->target = rt->rt6i_gateway;
590                 dev_hold(rt->dst.dev);
591                 work->dev = rt->dst.dev;
592                 schedule_work(&work->work);
593         }
594
595 out:
596         rcu_read_unlock_bh();
597 }
598 #else
599 static inline void rt6_probe(struct rt6_info *rt)
600 {
601 }
602 #endif
603
604 /*
605  * Default Router Selection (RFC 2461 6.3.6)
606  */
607 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
608 {
609         struct net_device *dev = rt->dst.dev;
610         if (!oif || dev->ifindex == oif)
611                 return 2;
612         if ((dev->flags & IFF_LOOPBACK) &&
613             rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
614                 return 1;
615         return 0;
616 }
617
618 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
619 {
620         struct neighbour *neigh;
621         enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
622
623         if (rt->rt6i_flags & RTF_NONEXTHOP ||
624             !(rt->rt6i_flags & RTF_GATEWAY))
625                 return RT6_NUD_SUCCEED;
626
627         rcu_read_lock_bh();
628         neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
629         if (neigh) {
630                 read_lock(&neigh->lock);
631                 if (neigh->nud_state & NUD_VALID)
632                         ret = RT6_NUD_SUCCEED;
633 #ifdef CONFIG_IPV6_ROUTER_PREF
634                 else if (!(neigh->nud_state & NUD_FAILED))
635                         ret = RT6_NUD_SUCCEED;
636                 else
637                         ret = RT6_NUD_FAIL_PROBE;
638 #endif
639                 read_unlock(&neigh->lock);
640         } else {
641                 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
642                       RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
643         }
644         rcu_read_unlock_bh();
645
646         return ret;
647 }
648
649 static int rt6_score_route(struct rt6_info *rt, int oif,
650                            int strict)
651 {
652         int m;
653
654         m = rt6_check_dev(rt, oif);
655         if (!m && (strict & RT6_LOOKUP_F_IFACE))
656                 return RT6_NUD_FAIL_HARD;
657 #ifdef CONFIG_IPV6_ROUTER_PREF
658         m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
659 #endif
660         if (strict & RT6_LOOKUP_F_REACHABLE) {
661                 int n = rt6_check_neigh(rt);
662                 if (n < 0)
663                         return n;
664         }
665         return m;
666 }
667
668 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
669                                    int *mpri, struct rt6_info *match,
670                                    bool *do_rr)
671 {
672         int m;
673         bool match_do_rr = false;
674         struct inet6_dev *idev = rt->rt6i_idev;
675         struct net_device *dev = rt->dst.dev;
676
677         if (dev && !netif_carrier_ok(dev) &&
678             idev->cnf.ignore_routes_with_linkdown &&
679             !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
680                 goto out;
681
682         if (rt6_check_expired(rt))
683                 goto out;
684
685         m = rt6_score_route(rt, oif, strict);
686         if (m == RT6_NUD_FAIL_DO_RR) {
687                 match_do_rr = true;
688                 m = 0; /* lowest valid score */
689         } else if (m == RT6_NUD_FAIL_HARD) {
690                 goto out;
691         }
692
693         if (strict & RT6_LOOKUP_F_REACHABLE)
694                 rt6_probe(rt);
695
696         /* note that m can be RT6_NUD_FAIL_PROBE at this point */
697         if (m > *mpri) {
698                 *do_rr = match_do_rr;
699                 *mpri = m;
700                 match = rt;
701         }
702 out:
703         return match;
704 }
705
706 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
707                                      struct rt6_info *rr_head,
708                                      u32 metric, int oif, int strict,
709                                      bool *do_rr)
710 {
711         struct rt6_info *rt, *match, *cont;
712         int mpri = -1;
713
714         match = NULL;
715         cont = NULL;
716         for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
717                 if (rt->rt6i_metric != metric) {
718                         cont = rt;
719                         break;
720                 }
721
722                 match = find_match(rt, oif, strict, &mpri, match, do_rr);
723         }
724
725         for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
726                 if (rt->rt6i_metric != metric) {
727                         cont = rt;
728                         break;
729                 }
730
731                 match = find_match(rt, oif, strict, &mpri, match, do_rr);
732         }
733
734         if (match || !cont)
735                 return match;
736
737         for (rt = cont; rt; rt = rt->dst.rt6_next)
738                 match = find_match(rt, oif, strict, &mpri, match, do_rr);
739
740         return match;
741 }
742
743 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
744 {
745         struct rt6_info *match, *rt0;
746         struct net *net;
747         bool do_rr = false;
748
749         rt0 = fn->rr_ptr;
750         if (!rt0)
751                 fn->rr_ptr = rt0 = fn->leaf;
752
753         match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
754                              &do_rr);
755
756         if (do_rr) {
757                 struct rt6_info *next = rt0->dst.rt6_next;
758
759                 /* no entries matched; do round-robin */
760                 if (!next || next->rt6i_metric != rt0->rt6i_metric)
761                         next = fn->leaf;
762
763                 if (next != rt0)
764                         fn->rr_ptr = next;
765         }
766
767         net = dev_net(rt0->dst.dev);
768         return match ? match : net->ipv6.ip6_null_entry;
769 }
770
771 static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
772 {
773         return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
774 }
775
776 #ifdef CONFIG_IPV6_ROUTE_INFO
777 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
778                   const struct in6_addr *gwaddr)
779 {
780         struct net *net = dev_net(dev);
781         struct route_info *rinfo = (struct route_info *) opt;
782         struct in6_addr prefix_buf, *prefix;
783         unsigned int pref;
784         unsigned long lifetime;
785         struct rt6_info *rt;
786
787         if (len < sizeof(struct route_info)) {
788                 return -EINVAL;
789         }
790
791         /* Sanity check for prefix_len and length */
792         if (rinfo->length > 3) {
793                 return -EINVAL;
794         } else if (rinfo->prefix_len > 128) {
795                 return -EINVAL;
796         } else if (rinfo->prefix_len > 64) {
797                 if (rinfo->length < 2) {
798                         return -EINVAL;
799                 }
800         } else if (rinfo->prefix_len > 0) {
801                 if (rinfo->length < 1) {
802                         return -EINVAL;
803                 }
804         }
805
806         pref = rinfo->route_pref;
807         if (pref == ICMPV6_ROUTER_PREF_INVALID)
808                 return -EINVAL;
809
810         lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
811
812         if (rinfo->length == 3)
813                 prefix = (struct in6_addr *)rinfo->prefix;
814         else {
815                 /* this function is safe */
816                 ipv6_addr_prefix(&prefix_buf,
817                                  (struct in6_addr *)rinfo->prefix,
818                                  rinfo->prefix_len);
819                 prefix = &prefix_buf;
820         }
821
822         if (rinfo->prefix_len == 0)
823                 rt = rt6_get_dflt_router(gwaddr, dev);
824         else
825                 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
826                                         gwaddr, dev);
827
828         if (rt && !lifetime) {
829                 ip6_del_rt(rt);
830                 rt = NULL;
831         }
832
833         if (!rt && lifetime)
834                 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
835                                         dev, pref);
836         else if (rt)
837                 rt->rt6i_flags = RTF_ROUTEINFO |
838                                  (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
839
840         if (rt) {
841                 if (!addrconf_finite_timeout(lifetime))
842                         rt6_clean_expires(rt);
843                 else
844                         rt6_set_expires(rt, jiffies + HZ * lifetime);
845
846                 ip6_rt_put(rt);
847         }
848         return 0;
849 }
850 #endif
851
852 static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
853                                         struct in6_addr *saddr)
854 {
855         struct fib6_node *pn;
856         while (1) {
857                 if (fn->fn_flags & RTN_TL_ROOT)
858                         return NULL;
859                 pn = fn->parent;
860                 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
861                         fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
862                 else
863                         fn = pn;
864                 if (fn->fn_flags & RTN_RTINFO)
865                         return fn;
866         }
867 }
868
869 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
870                                              struct fib6_table *table,
871                                              struct flowi6 *fl6, int flags)
872 {
873         struct fib6_node *fn;
874         struct rt6_info *rt;
875
876         if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
877                 flags &= ~RT6_LOOKUP_F_IFACE;
878
879         read_lock_bh(&table->tb6_lock);
880         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
881 restart:
882         rt = fn->leaf;
883         rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
884         if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
885                 rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
886         if (rt == net->ipv6.ip6_null_entry) {
887                 fn = fib6_backtrack(fn, &fl6->saddr);
888                 if (fn)
889                         goto restart;
890         }
891         dst_use(&rt->dst, jiffies);
892         read_unlock_bh(&table->tb6_lock);
893
894         trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
895
896         return rt;
897
898 }
899
900 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
901                                     int flags)
902 {
903         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
904 }
905 EXPORT_SYMBOL_GPL(ip6_route_lookup);
906
907 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
908                             const struct in6_addr *saddr, int oif, int strict)
909 {
910         struct flowi6 fl6 = {
911                 .flowi6_oif = oif,
912                 .daddr = *daddr,
913         };
914         struct dst_entry *dst;
915         int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
916
917         if (saddr) {
918                 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
919                 flags |= RT6_LOOKUP_F_HAS_SADDR;
920         }
921
922         dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
923         if (dst->error == 0)
924                 return (struct rt6_info *) dst;
925
926         dst_release(dst);
927
928         return NULL;
929 }
930 EXPORT_SYMBOL(rt6_lookup);
931
932 /* ip6_ins_rt is called with FREE table->tb6_lock.
933  * It takes new route entry, the addition fails by any reason the
934  * route is released.
935  * Caller must hold dst before calling it.
936  */
937
938 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
939                         struct mx6_config *mxc,
940                         struct netlink_ext_ack *extack)
941 {
942         int err;
943         struct fib6_table *table;
944
945         table = rt->rt6i_table;
946         write_lock_bh(&table->tb6_lock);
947         err = fib6_add(&table->tb6_root, rt, info, mxc, extack);
948         write_unlock_bh(&table->tb6_lock);
949
950         return err;
951 }
952
953 int ip6_ins_rt(struct rt6_info *rt)
954 {
955         struct nl_info info = { .nl_net = dev_net(rt->dst.dev), };
956         struct mx6_config mxc = { .mx = NULL, };
957
958         /* Hold dst to account for the reference from the fib6 tree */
959         dst_hold(&rt->dst);
960         return __ip6_ins_rt(rt, &info, &mxc, NULL);
961 }
962
963 /* called with rcu_lock held */
964 static struct net_device *ip6_rt_get_dev_rcu(struct rt6_info *rt)
965 {
966         struct net_device *dev = rt->dst.dev;
967
968         if (rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) {
969                 /* for copies of local routes, dst->dev needs to be the
970                  * device if it is a master device, the master device if
971                  * device is enslaved, and the loopback as the default
972                  */
973                 if (netif_is_l3_slave(dev) &&
974                     !rt6_need_strict(&rt->rt6i_dst.addr))
975                         dev = l3mdev_master_dev_rcu(dev);
976                 else if (!netif_is_l3_master(dev))
977                         dev = dev_net(dev)->loopback_dev;
978                 /* last case is netif_is_l3_master(dev) is true in which
979                  * case we want dev returned to be dev
980                  */
981         }
982
983         return dev;
984 }
985
986 static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
987                                            const struct in6_addr *daddr,
988                                            const struct in6_addr *saddr)
989 {
990         struct net_device *dev;
991         struct rt6_info *rt;
992
993         /*
994          *      Clone the route.
995          */
996
997         if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
998                 ort = (struct rt6_info *)ort->dst.from;
999
1000         rcu_read_lock();
1001         dev = ip6_rt_get_dev_rcu(ort);
1002         rt = __ip6_dst_alloc(dev_net(dev), dev, 0);
1003         rcu_read_unlock();
1004         if (!rt)
1005                 return NULL;
1006
1007         ip6_rt_copy_init(rt, ort);
1008         rt->rt6i_flags |= RTF_CACHE;
1009         rt->rt6i_metric = 0;
1010         rt->dst.flags |= DST_HOST;
1011         rt->rt6i_dst.addr = *daddr;
1012         rt->rt6i_dst.plen = 128;
1013
1014         if (!rt6_is_gw_or_nonexthop(ort)) {
1015                 if (ort->rt6i_dst.plen != 128 &&
1016                     ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
1017                         rt->rt6i_flags |= RTF_ANYCAST;
1018 #ifdef CONFIG_IPV6_SUBTREES
1019                 if (rt->rt6i_src.plen && saddr) {
1020                         rt->rt6i_src.addr = *saddr;
1021                         rt->rt6i_src.plen = 128;
1022                 }
1023 #endif
1024         }
1025
1026         return rt;
1027 }
1028
1029 static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
1030 {
1031         struct net_device *dev;
1032         struct rt6_info *pcpu_rt;
1033
1034         rcu_read_lock();
1035         dev = ip6_rt_get_dev_rcu(rt);
1036         pcpu_rt = __ip6_dst_alloc(dev_net(dev), dev, rt->dst.flags);
1037         rcu_read_unlock();
1038         if (!pcpu_rt)
1039                 return NULL;
1040         ip6_rt_copy_init(pcpu_rt, rt);
1041         pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
1042         pcpu_rt->rt6i_flags |= RTF_PCPU;
1043         return pcpu_rt;
1044 }
1045
1046 /* It should be called with read_lock_bh(&tb6_lock) acquired */
1047 static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
1048 {
1049         struct rt6_info *pcpu_rt, **p;
1050
1051         p = this_cpu_ptr(rt->rt6i_pcpu);
1052         pcpu_rt = *p;
1053
1054         if (pcpu_rt) {
1055                 dst_hold(&pcpu_rt->dst);
1056                 rt6_dst_from_metrics_check(pcpu_rt);
1057         }
1058         return pcpu_rt;
1059 }
1060
1061 static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
1062 {
1063         struct fib6_table *table = rt->rt6i_table;
1064         struct rt6_info *pcpu_rt, *prev, **p;
1065
1066         pcpu_rt = ip6_rt_pcpu_alloc(rt);
1067         if (!pcpu_rt) {
1068                 struct net *net = dev_net(rt->dst.dev);
1069
1070                 dst_hold(&net->ipv6.ip6_null_entry->dst);
1071                 return net->ipv6.ip6_null_entry;
1072         }
1073
1074         read_lock_bh(&table->tb6_lock);
1075         if (rt->rt6i_pcpu) {
1076                 p = this_cpu_ptr(rt->rt6i_pcpu);
1077                 prev = cmpxchg(p, NULL, pcpu_rt);
1078                 if (prev) {
1079                         /* If someone did it before us, return prev instead */
1080                         dst_release_immediate(&pcpu_rt->dst);
1081                         pcpu_rt = prev;
1082                 }
1083         } else {
1084                 /* rt has been removed from the fib6 tree
1085                  * before we have a chance to acquire the read_lock.
1086                  * In this case, don't brother to create a pcpu rt
1087                  * since rt is going away anyway.  The next
1088                  * dst_check() will trigger a re-lookup.
1089                  */
1090                 dst_release_immediate(&pcpu_rt->dst);
1091                 pcpu_rt = rt;
1092         }
1093         dst_hold(&pcpu_rt->dst);
1094         rt6_dst_from_metrics_check(pcpu_rt);
1095         read_unlock_bh(&table->tb6_lock);
1096         return pcpu_rt;
1097 }
1098
1099 struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
1100                                int oif, struct flowi6 *fl6, int flags)
1101 {
1102         struct fib6_node *fn, *saved_fn;
1103         struct rt6_info *rt;
1104         int strict = 0;
1105
1106         strict |= flags & RT6_LOOKUP_F_IFACE;
1107         strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
1108         if (net->ipv6.devconf_all->forwarding == 0)
1109                 strict |= RT6_LOOKUP_F_REACHABLE;
1110
1111         read_lock_bh(&table->tb6_lock);
1112
1113         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1114         saved_fn = fn;
1115
1116         if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1117                 oif = 0;
1118
1119 redo_rt6_select:
1120         rt = rt6_select(fn, oif, strict);
1121         if (rt->rt6i_nsiblings)
1122                 rt = rt6_multipath_select(rt, fl6, oif, strict);
1123         if (rt == net->ipv6.ip6_null_entry) {
1124                 fn = fib6_backtrack(fn, &fl6->saddr);
1125                 if (fn)
1126                         goto redo_rt6_select;
1127                 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1128                         /* also consider unreachable route */
1129                         strict &= ~RT6_LOOKUP_F_REACHABLE;
1130                         fn = saved_fn;
1131                         goto redo_rt6_select;
1132                 }
1133         }
1134
1135
1136         if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
1137                 dst_use(&rt->dst, jiffies);
1138                 read_unlock_bh(&table->tb6_lock);
1139
1140                 rt6_dst_from_metrics_check(rt);
1141
1142                 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
1143                 return rt;
1144         } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1145                             !(rt->rt6i_flags & RTF_GATEWAY))) {
1146                 /* Create a RTF_CACHE clone which will not be
1147                  * owned by the fib6 tree.  It is for the special case where
1148                  * the daddr in the skb during the neighbor look-up is different
1149                  * from the fl6->daddr used to look-up route here.
1150                  */
1151
1152                 struct rt6_info *uncached_rt;
1153
1154                 dst_use(&rt->dst, jiffies);
1155                 read_unlock_bh(&table->tb6_lock);
1156
1157                 uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1158                 dst_release(&rt->dst);
1159
1160                 if (uncached_rt) {
1161                         /* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
1162                          * No need for another dst_hold()
1163                          */
1164                         rt6_uncached_list_add(uncached_rt);
1165                 } else {
1166                         uncached_rt = net->ipv6.ip6_null_entry;
1167                         dst_hold(&uncached_rt->dst);
1168                 }
1169
1170                 trace_fib6_table_lookup(net, uncached_rt, table->tb6_id, fl6);
1171                 return uncached_rt;
1172
1173         } else {
1174                 /* Get a percpu copy */
1175
1176                 struct rt6_info *pcpu_rt;
1177
1178                 rt->dst.lastuse = jiffies;
1179                 rt->dst.__use++;
1180                 pcpu_rt = rt6_get_pcpu_route(rt);
1181
1182                 if (pcpu_rt) {
1183                         read_unlock_bh(&table->tb6_lock);
1184                 } else {
1185                         /* We have to do the read_unlock first
1186                          * because rt6_make_pcpu_route() may trigger
1187                          * ip6_dst_gc() which will take the write_lock.
1188                          */
1189                         dst_hold(&rt->dst);
1190                         read_unlock_bh(&table->tb6_lock);
1191                         pcpu_rt = rt6_make_pcpu_route(rt);
1192                         dst_release(&rt->dst);
1193                 }
1194
1195                 trace_fib6_table_lookup(net, pcpu_rt, table->tb6_id, fl6);
1196                 return pcpu_rt;
1197
1198         }
1199 }
1200 EXPORT_SYMBOL_GPL(ip6_pol_route);
1201
1202 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
1203                                             struct flowi6 *fl6, int flags)
1204 {
1205         return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
1206 }
1207
1208 struct dst_entry *ip6_route_input_lookup(struct net *net,
1209                                          struct net_device *dev,
1210                                          struct flowi6 *fl6, int flags)
1211 {
1212         if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1213                 flags |= RT6_LOOKUP_F_IFACE;
1214
1215         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1216 }
1217 EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
1218
1219 static void ip6_multipath_l3_keys(const struct sk_buff *skb,
1220                                   struct flow_keys *keys)
1221 {
1222         const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
1223         const struct ipv6hdr *key_iph = outer_iph;
1224         const struct ipv6hdr *inner_iph;
1225         const struct icmp6hdr *icmph;
1226         struct ipv6hdr _inner_iph;
1227         struct icmp6hdr _icmph;
1228
1229         if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
1230                 goto out;
1231
1232         icmph = skb_header_pointer(skb, skb_transport_offset(skb),
1233                                    sizeof(_icmph), &_icmph);
1234         if (!icmph)
1235                 goto out;
1236
1237         if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
1238             icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
1239             icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
1240             icmph->icmp6_type != ICMPV6_PARAMPROB)
1241                 goto out;
1242
1243         inner_iph = skb_header_pointer(skb,
1244                                        skb_transport_offset(skb) + sizeof(*icmph),
1245                                        sizeof(_inner_iph), &_inner_iph);
1246         if (!inner_iph)
1247                 goto out;
1248
1249         key_iph = inner_iph;
1250 out:
1251         memset(keys, 0, sizeof(*keys));
1252         keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1253         keys->addrs.v6addrs.src = key_iph->saddr;
1254         keys->addrs.v6addrs.dst = key_iph->daddr;
1255         keys->tags.flow_label = ip6_flowlabel(key_iph);
1256         keys->basic.ip_proto = key_iph->nexthdr;
1257 }
1258
1259 /* if skb is set it will be used and fl6 can be NULL */
1260 u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb)
1261 {
1262         struct flow_keys hash_keys;
1263
1264         if (skb) {
1265                 ip6_multipath_l3_keys(skb, &hash_keys);
1266                 return flow_hash_from_keys(&hash_keys);
1267         }
1268
1269         return get_hash_from_flowi6(fl6);
1270 }
1271
1272 void ip6_route_input(struct sk_buff *skb)
1273 {
1274         const struct ipv6hdr *iph = ipv6_hdr(skb);
1275         struct net *net = dev_net(skb->dev);
1276         int flags = RT6_LOOKUP_F_HAS_SADDR;
1277         struct ip_tunnel_info *tun_info;
1278         struct flowi6 fl6 = {
1279                 .flowi6_iif = skb->dev->ifindex,
1280                 .daddr = iph->daddr,
1281                 .saddr = iph->saddr,
1282                 .flowlabel = ip6_flowinfo(iph),
1283                 .flowi6_mark = skb->mark,
1284                 .flowi6_proto = iph->nexthdr,
1285         };
1286
1287         tun_info = skb_tunnel_info(skb);
1288         if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
1289                 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
1290         if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
1291                 fl6.mp_hash = rt6_multipath_hash(&fl6, skb);
1292         skb_dst_drop(skb);
1293         skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1294 }
1295
1296 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1297                                              struct flowi6 *fl6, int flags)
1298 {
1299         return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1300 }
1301
1302 struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
1303                                          struct flowi6 *fl6, int flags)
1304 {
1305         bool any_src;
1306
1307         if (rt6_need_strict(&fl6->daddr)) {
1308                 struct dst_entry *dst;
1309
1310                 dst = l3mdev_link_scope_lookup(net, fl6);
1311                 if (dst)
1312                         return dst;
1313         }
1314
1315         fl6->flowi6_iif = LOOPBACK_IFINDEX;
1316
1317         any_src = ipv6_addr_any(&fl6->saddr);
1318         if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
1319             (fl6->flowi6_oif && any_src))
1320                 flags |= RT6_LOOKUP_F_IFACE;
1321
1322         if (!any_src)
1323                 flags |= RT6_LOOKUP_F_HAS_SADDR;
1324         else if (sk)
1325                 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1326
1327         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1328 }
1329 EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1330
1331 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1332 {
1333         struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1334         struct net_device *loopback_dev = net->loopback_dev;
1335         struct dst_entry *new = NULL;
1336
1337         rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
1338                        DST_OBSOLETE_DEAD, 0);
1339         if (rt) {
1340                 rt6_info_init(rt);
1341
1342                 new = &rt->dst;
1343                 new->__use = 1;
1344                 new->input = dst_discard;
1345                 new->output = dst_discard_out;
1346
1347                 dst_copy_metrics(new, &ort->dst);
1348
1349                 rt->rt6i_idev = in6_dev_get(loopback_dev);
1350                 rt->rt6i_gateway = ort->rt6i_gateway;
1351                 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
1352                 rt->rt6i_metric = 0;
1353
1354                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1355 #ifdef CONFIG_IPV6_SUBTREES
1356                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1357 #endif
1358         }
1359
1360         dst_release(dst_orig);
1361         return new ? new : ERR_PTR(-ENOMEM);
1362 }
1363
1364 /*
1365  *      Destination cache support functions
1366  */
1367
1368 static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1369 {
1370         if (rt->dst.from &&
1371             dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1372                 dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1373 }
1374
1375 static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1376 {
1377         u32 rt_cookie = 0;
1378
1379         if (!rt6_get_cookie_safe(rt, &rt_cookie) || rt_cookie != cookie)
1380                 return NULL;
1381
1382         if (rt6_check_expired(rt))
1383                 return NULL;
1384
1385         return &rt->dst;
1386 }
1387
1388 static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1389 {
1390         if (!__rt6_check_expired(rt) &&
1391             rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1392             rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1393                 return &rt->dst;
1394         else
1395                 return NULL;
1396 }
1397
1398 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1399 {
1400         struct rt6_info *rt;
1401
1402         rt = (struct rt6_info *) dst;
1403
1404         /* All IPV6 dsts are created with ->obsolete set to the value
1405          * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1406          * into this function always.
1407          */
1408
1409         rt6_dst_from_metrics_check(rt);
1410
1411         if (rt->rt6i_flags & RTF_PCPU ||
1412             (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->dst.from))
1413                 return rt6_dst_from_check(rt, cookie);
1414         else
1415                 return rt6_check(rt, cookie);
1416 }
1417
1418 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1419 {
1420         struct rt6_info *rt = (struct rt6_info *) dst;
1421
1422         if (rt) {
1423                 if (rt->rt6i_flags & RTF_CACHE) {
1424                         if (rt6_check_expired(rt)) {
1425                                 ip6_del_rt(rt);
1426                                 dst = NULL;
1427                         }
1428                 } else {
1429                         dst_release(dst);
1430                         dst = NULL;
1431                 }
1432         }
1433         return dst;
1434 }
1435
1436 static void ip6_link_failure(struct sk_buff *skb)
1437 {
1438         struct rt6_info *rt;
1439
1440         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1441
1442         rt = (struct rt6_info *) skb_dst(skb);
1443         if (rt) {
1444                 if (rt->rt6i_flags & RTF_CACHE) {
1445                         if (dst_hold_safe(&rt->dst))
1446                                 ip6_del_rt(rt);
1447                 } else {
1448                         struct fib6_node *fn;
1449
1450                         rcu_read_lock();
1451                         fn = rcu_dereference(rt->rt6i_node);
1452                         if (fn && (rt->rt6i_flags & RTF_DEFAULT))
1453                                 fn->fn_sernum = -1;
1454                         rcu_read_unlock();
1455                 }
1456         }
1457 }
1458
1459 static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1460 {
1461         struct net *net = dev_net(rt->dst.dev);
1462
1463         rt->rt6i_flags |= RTF_MODIFIED;
1464         rt->rt6i_pmtu = mtu;
1465         rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1466 }
1467
1468 static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
1469 {
1470         return !(rt->rt6i_flags & RTF_CACHE) &&
1471                 (rt->rt6i_flags & RTF_PCPU ||
1472                  rcu_access_pointer(rt->rt6i_node));
1473 }
1474
1475 static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1476                                  const struct ipv6hdr *iph, u32 mtu,
1477                                  bool confirm_neigh)
1478 {
1479         const struct in6_addr *daddr, *saddr;
1480         struct rt6_info *rt6 = (struct rt6_info *)dst;
1481
1482         /* Note: do *NOT* check dst_metric_locked(dst, RTAX_MTU)
1483          * IPv6 pmtu discovery isn't optional, so 'mtu lock' cannot disable it.
1484          * [see also comment in rt6_mtu_change_route()]
1485          */
1486
1487         if (iph) {
1488                 daddr = &iph->daddr;
1489                 saddr = &iph->saddr;
1490         } else if (sk) {
1491                 daddr = &sk->sk_v6_daddr;
1492                 saddr = &inet6_sk(sk)->saddr;
1493         } else {
1494                 daddr = NULL;
1495                 saddr = NULL;
1496         }
1497
1498         if (confirm_neigh)
1499                 dst_confirm_neigh(dst, daddr);
1500
1501         mtu = max_t(u32, mtu, IPV6_MIN_MTU);
1502         if (mtu >= dst_mtu(dst))
1503                 return;
1504
1505         if (!rt6_cache_allowed_for_pmtu(rt6)) {
1506                 rt6_do_update_pmtu(rt6, mtu);
1507         } else if (daddr) {
1508                 struct rt6_info *nrt6;
1509
1510                 nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
1511                 if (nrt6) {
1512                         rt6_do_update_pmtu(nrt6, mtu);
1513
1514                         /* ip6_ins_rt(nrt6) will bump the
1515                          * rt6->rt6i_node->fn_sernum
1516                          * which will fail the next rt6_check() and
1517                          * invalidate the sk->sk_dst_cache.
1518                          */
1519                         ip6_ins_rt(nrt6);
1520                         /* Release the reference taken in
1521                          * ip6_rt_cache_alloc()
1522                          */
1523                         dst_release(&nrt6->dst);
1524                 }
1525         }
1526 }
1527
1528 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1529                                struct sk_buff *skb, u32 mtu,
1530                                bool confirm_neigh)
1531 {
1532         __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu,
1533                              confirm_neigh);
1534 }
1535
1536 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1537                      int oif, u32 mark, kuid_t uid)
1538 {
1539         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1540         struct dst_entry *dst;
1541         struct flowi6 fl6;
1542
1543         memset(&fl6, 0, sizeof(fl6));
1544         fl6.flowi6_oif = oif;
1545         fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
1546         fl6.daddr = iph->daddr;
1547         fl6.saddr = iph->saddr;
1548         fl6.flowlabel = ip6_flowinfo(iph);
1549         fl6.flowi6_uid = uid;
1550
1551         dst = ip6_route_output(net, NULL, &fl6);
1552         if (!dst->error)
1553                 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu), true);
1554         dst_release(dst);
1555 }
1556 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1557
1558 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1559 {
1560         int oif = sk->sk_bound_dev_if;
1561         struct dst_entry *dst;
1562
1563         if (!oif && skb->dev)
1564                 oif = l3mdev_master_ifindex(skb->dev);
1565
1566         ip6_update_pmtu(skb, sock_net(sk), mtu, oif, sk->sk_mark, sk->sk_uid);
1567
1568         dst = __sk_dst_get(sk);
1569         if (!dst || !dst->obsolete ||
1570             dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
1571                 return;
1572
1573         bh_lock_sock(sk);
1574         if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
1575                 ip6_datagram_dst_update(sk, false);
1576         bh_unlock_sock(sk);
1577 }
1578 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1579
1580 /* Handle redirects */
1581 struct ip6rd_flowi {
1582         struct flowi6 fl6;
1583         struct in6_addr gateway;
1584 };
1585
1586 static struct rt6_info *__ip6_route_redirect(struct net *net,
1587                                              struct fib6_table *table,
1588                                              struct flowi6 *fl6,
1589                                              int flags)
1590 {
1591         struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1592         struct rt6_info *rt;
1593         struct fib6_node *fn;
1594
1595         /* Get the "current" route for this destination and
1596          * check if the redirect has come from appropriate router.
1597          *
1598          * RFC 4861 specifies that redirects should only be
1599          * accepted if they come from the nexthop to the target.
1600          * Due to the way the routes are chosen, this notion
1601          * is a bit fuzzy and one might need to check all possible
1602          * routes.
1603          */
1604
1605         read_lock_bh(&table->tb6_lock);
1606         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1607 restart:
1608         for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1609                 if (rt6_check_expired(rt))
1610                         continue;
1611                 if (rt->dst.error)
1612                         break;
1613                 if (!(rt->rt6i_flags & RTF_GATEWAY))
1614                         continue;
1615                 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1616                         continue;
1617                 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1618                         continue;
1619                 break;
1620         }
1621
1622         if (!rt)
1623                 rt = net->ipv6.ip6_null_entry;
1624         else if (rt->dst.error) {
1625                 rt = net->ipv6.ip6_null_entry;
1626                 goto out;
1627         }
1628
1629         if (rt == net->ipv6.ip6_null_entry) {
1630                 fn = fib6_backtrack(fn, &fl6->saddr);
1631                 if (fn)
1632                         goto restart;
1633         }
1634
1635 out:
1636         dst_hold(&rt->dst);
1637
1638         read_unlock_bh(&table->tb6_lock);
1639
1640         trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
1641         return rt;
1642 };
1643
1644 static struct dst_entry *ip6_route_redirect(struct net *net,
1645                                         const struct flowi6 *fl6,
1646                                         const struct in6_addr *gateway)
1647 {
1648         int flags = RT6_LOOKUP_F_HAS_SADDR;
1649         struct ip6rd_flowi rdfl;
1650
1651         rdfl.fl6 = *fl6;
1652         rdfl.gateway = *gateway;
1653
1654         return fib6_rule_lookup(net, &rdfl.fl6,
1655                                 flags, __ip6_route_redirect);
1656 }
1657
1658 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
1659                   kuid_t uid)
1660 {
1661         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1662         struct dst_entry *dst;
1663         struct flowi6 fl6;
1664
1665         memset(&fl6, 0, sizeof(fl6));
1666         fl6.flowi6_iif = LOOPBACK_IFINDEX;
1667         fl6.flowi6_oif = oif;
1668         fl6.flowi6_mark = mark;
1669         fl6.daddr = iph->daddr;
1670         fl6.saddr = iph->saddr;
1671         fl6.flowlabel = ip6_flowinfo(iph);
1672         fl6.flowi6_uid = uid;
1673
1674         dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1675         rt6_do_redirect(dst, NULL, skb);
1676         dst_release(dst);
1677 }
1678 EXPORT_SYMBOL_GPL(ip6_redirect);
1679
1680 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1681                             u32 mark)
1682 {
1683         const struct ipv6hdr *iph = ipv6_hdr(skb);
1684         const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1685         struct dst_entry *dst;
1686         struct flowi6 fl6;
1687
1688         memset(&fl6, 0, sizeof(fl6));
1689         fl6.flowi6_iif = LOOPBACK_IFINDEX;
1690         fl6.flowi6_oif = oif;
1691         fl6.flowi6_mark = mark;
1692         fl6.daddr = msg->dest;
1693         fl6.saddr = iph->daddr;
1694         fl6.flowi6_uid = sock_net_uid(net, NULL);
1695
1696         dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1697         rt6_do_redirect(dst, NULL, skb);
1698         dst_release(dst);
1699 }
1700
1701 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1702 {
1703         ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
1704                      sk->sk_uid);
1705 }
1706 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1707
1708 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1709 {
1710         struct net_device *dev = dst->dev;
1711         unsigned int mtu = dst_mtu(dst);
1712         struct net *net = dev_net(dev);
1713
1714         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1715
1716         if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1717                 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1718
1719         /*
1720          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1721          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1722          * IPV6_MAXPLEN is also valid and means: "any MSS,
1723          * rely only on pmtu discovery"
1724          */
1725         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1726                 mtu = IPV6_MAXPLEN;
1727         return mtu;
1728 }
1729
1730 static unsigned int ip6_mtu(const struct dst_entry *dst)
1731 {
1732         const struct rt6_info *rt = (const struct rt6_info *)dst;
1733         unsigned int mtu = rt->rt6i_pmtu;
1734         struct inet6_dev *idev;
1735
1736         if (mtu)
1737                 goto out;
1738
1739         mtu = dst_metric_raw(dst, RTAX_MTU);
1740         if (mtu)
1741                 goto out;
1742
1743         mtu = IPV6_MIN_MTU;
1744
1745         rcu_read_lock();
1746         idev = __in6_dev_get(dst->dev);
1747         if (idev)
1748                 mtu = idev->cnf.mtu6;
1749         rcu_read_unlock();
1750
1751 out:
1752         mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
1753
1754         return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
1755 }
1756
1757 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1758                                   struct flowi6 *fl6)
1759 {
1760         struct dst_entry *dst;
1761         struct rt6_info *rt;
1762         struct inet6_dev *idev = in6_dev_get(dev);
1763         struct net *net = dev_net(dev);
1764
1765         if (unlikely(!idev))
1766                 return ERR_PTR(-ENODEV);
1767
1768         rt = ip6_dst_alloc(net, dev, 0);
1769         if (unlikely(!rt)) {
1770                 in6_dev_put(idev);
1771                 dst = ERR_PTR(-ENOMEM);
1772                 goto out;
1773         }
1774
1775         rt->dst.flags |= DST_HOST;
1776         rt->dst.input = ip6_input;
1777         rt->dst.output  = ip6_output;
1778         rt->rt6i_gateway  = fl6->daddr;
1779         rt->rt6i_dst.addr = fl6->daddr;
1780         rt->rt6i_dst.plen = 128;
1781         rt->rt6i_idev     = idev;
1782         dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1783
1784         /* Add this dst into uncached_list so that rt6_ifdown() can
1785          * do proper release of the net_device
1786          */
1787         rt6_uncached_list_add(rt);
1788
1789         dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1790
1791 out:
1792         return dst;
1793 }
1794
1795 static int ip6_dst_gc(struct dst_ops *ops)
1796 {
1797         struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1798         int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1799         int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1800         int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1801         int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1802         unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1803         int entries;
1804
1805         entries = dst_entries_get_fast(ops);
1806         if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1807             entries <= rt_max_size)
1808                 goto out;
1809
1810         net->ipv6.ip6_rt_gc_expire++;
1811         fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
1812         entries = dst_entries_get_slow(ops);
1813         if (entries < ops->gc_thresh)
1814                 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1815 out:
1816         net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1817         return entries > rt_max_size;
1818 }
1819
1820 static int ip6_convert_metrics(struct mx6_config *mxc,
1821                                const struct fib6_config *cfg)
1822 {
1823         bool ecn_ca = false;
1824         struct nlattr *nla;
1825         int remaining;
1826         u32 *mp;
1827
1828         if (!cfg->fc_mx)
1829                 return 0;
1830
1831         mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1832         if (unlikely(!mp))
1833                 return -ENOMEM;
1834
1835         nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1836                 int type = nla_type(nla);
1837                 u32 val;
1838
1839                 if (!type)
1840                         continue;
1841                 if (unlikely(type > RTAX_MAX))
1842                         goto err;
1843
1844                 if (type == RTAX_CC_ALGO) {
1845                         char tmp[TCP_CA_NAME_MAX];
1846
1847                         nla_strlcpy(tmp, nla, sizeof(tmp));
1848                         val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
1849                         if (val == TCP_CA_UNSPEC)
1850                                 goto err;
1851                 } else {
1852                         val = nla_get_u32(nla);
1853                 }
1854                 if (type == RTAX_HOPLIMIT && val > 255)
1855                         val = 255;
1856                 if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
1857                         goto err;
1858
1859                 mp[type - 1] = val;
1860                 __set_bit(type - 1, mxc->mx_valid);
1861         }
1862
1863         if (ecn_ca) {
1864                 __set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
1865                 mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
1866         }
1867
1868         mxc->mx = mp;
1869         return 0;
1870  err:
1871         kfree(mp);
1872         return -EINVAL;
1873 }
1874
1875 static struct rt6_info *ip6_nh_lookup_table(struct net *net,
1876                                             struct fib6_config *cfg,
1877                                             const struct in6_addr *gw_addr)
1878 {
1879         struct flowi6 fl6 = {
1880                 .flowi6_oif = cfg->fc_ifindex,
1881                 .daddr = *gw_addr,
1882                 .saddr = cfg->fc_prefsrc,
1883         };
1884         struct fib6_table *table;
1885         struct rt6_info *rt;
1886         int flags = RT6_LOOKUP_F_IFACE | RT6_LOOKUP_F_IGNORE_LINKSTATE;
1887
1888         table = fib6_get_table(net, cfg->fc_table);
1889         if (!table)
1890                 return NULL;
1891
1892         if (!ipv6_addr_any(&cfg->fc_prefsrc))
1893                 flags |= RT6_LOOKUP_F_HAS_SADDR;
1894
1895         rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, flags);
1896
1897         /* if table lookup failed, fall back to full lookup */
1898         if (rt == net->ipv6.ip6_null_entry) {
1899                 ip6_rt_put(rt);
1900                 rt = NULL;
1901         }
1902
1903         return rt;
1904 }
1905
1906 static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
1907                                               struct netlink_ext_ack *extack)
1908 {
1909         struct net *net = cfg->fc_nlinfo.nl_net;
1910         struct rt6_info *rt = NULL;
1911         struct net_device *dev = NULL;
1912         struct inet6_dev *idev = NULL;
1913         struct fib6_table *table;
1914         int addr_type;
1915         int err = -EINVAL;
1916
1917         /* RTF_PCPU is an internal flag; can not be set by userspace */
1918         if (cfg->fc_flags & RTF_PCPU) {
1919                 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
1920                 goto out;
1921         }
1922
1923         if (cfg->fc_dst_len > 128) {
1924                 NL_SET_ERR_MSG(extack, "Invalid prefix length");
1925                 goto out;
1926         }
1927         if (cfg->fc_src_len > 128) {
1928                 NL_SET_ERR_MSG(extack, "Invalid source address length");
1929                 goto out;
1930         }
1931 #ifndef CONFIG_IPV6_SUBTREES
1932         if (cfg->fc_src_len) {
1933                 NL_SET_ERR_MSG(extack,
1934                                "Specifying source address requires IPV6_SUBTREES to be enabled");
1935                 goto out;
1936         }
1937 #endif
1938         if (cfg->fc_ifindex) {
1939                 err = -ENODEV;
1940                 dev = dev_get_by_index(net, cfg->fc_ifindex);
1941                 if (!dev)
1942                         goto out;
1943                 idev = in6_dev_get(dev);
1944                 if (!idev)
1945                         goto out;
1946         }
1947
1948         if (cfg->fc_metric == 0)
1949                 cfg->fc_metric = IP6_RT_PRIO_USER;
1950
1951         err = -ENOBUFS;
1952         if (cfg->fc_nlinfo.nlh &&
1953             !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1954                 table = fib6_get_table(net, cfg->fc_table);
1955                 if (!table) {
1956                         pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1957                         table = fib6_new_table(net, cfg->fc_table);
1958                 }
1959         } else {
1960                 table = fib6_new_table(net, cfg->fc_table);
1961         }
1962
1963         if (!table)
1964                 goto out;
1965
1966         rt = ip6_dst_alloc(net, NULL,
1967                            (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
1968
1969         if (!rt) {
1970                 err = -ENOMEM;
1971                 goto out;
1972         }
1973
1974         if (cfg->fc_flags & RTF_EXPIRES)
1975                 rt6_set_expires(rt, jiffies +
1976                                 clock_t_to_jiffies(cfg->fc_expires));
1977         else
1978                 rt6_clean_expires(rt);
1979
1980         if (cfg->fc_protocol == RTPROT_UNSPEC)
1981                 cfg->fc_protocol = RTPROT_BOOT;
1982         rt->rt6i_protocol = cfg->fc_protocol;
1983
1984         addr_type = ipv6_addr_type(&cfg->fc_dst);
1985
1986         if (addr_type & IPV6_ADDR_MULTICAST)
1987                 rt->dst.input = ip6_mc_input;
1988         else if (cfg->fc_flags & RTF_LOCAL)
1989                 rt->dst.input = ip6_input;
1990         else
1991                 rt->dst.input = ip6_forward;
1992
1993         rt->dst.output = ip6_output;
1994
1995         if (cfg->fc_encap) {
1996                 struct lwtunnel_state *lwtstate;
1997
1998                 err = lwtunnel_build_state(cfg->fc_encap_type,
1999                                            cfg->fc_encap, AF_INET6, cfg,
2000                                            &lwtstate, extack);
2001                 if (err)
2002                         goto out;
2003                 rt->dst.lwtstate = lwtstate_get(lwtstate);
2004                 if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
2005                         rt->dst.lwtstate->orig_output = rt->dst.output;
2006                         rt->dst.output = lwtunnel_output;
2007                 }
2008                 if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
2009                         rt->dst.lwtstate->orig_input = rt->dst.input;
2010                         rt->dst.input = lwtunnel_input;
2011                 }
2012         }
2013
2014         ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
2015         rt->rt6i_dst.plen = cfg->fc_dst_len;
2016         if (rt->rt6i_dst.plen == 128)
2017                 rt->dst.flags |= DST_HOST;
2018
2019 #ifdef CONFIG_IPV6_SUBTREES
2020         ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
2021         rt->rt6i_src.plen = cfg->fc_src_len;
2022 #endif
2023
2024         rt->rt6i_metric = cfg->fc_metric;
2025
2026         /* We cannot add true routes via loopback here,
2027            they would result in kernel looping; promote them to reject routes
2028          */
2029         if ((cfg->fc_flags & RTF_REJECT) ||
2030             (dev && (dev->flags & IFF_LOOPBACK) &&
2031              !(addr_type & IPV6_ADDR_LOOPBACK) &&
2032              !(cfg->fc_flags & RTF_LOCAL))) {
2033                 /* hold loopback dev/idev if we haven't done so. */
2034                 if (dev != net->loopback_dev) {
2035                         if (dev) {
2036                                 dev_put(dev);
2037                                 in6_dev_put(idev);
2038                         }
2039                         dev = net->loopback_dev;
2040                         dev_hold(dev);
2041                         idev = in6_dev_get(dev);
2042                         if (!idev) {
2043                                 err = -ENODEV;
2044                                 goto out;
2045                         }
2046                 }
2047                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
2048                 switch (cfg->fc_type) {
2049                 case RTN_BLACKHOLE:
2050                         rt->dst.error = -EINVAL;
2051                         rt->dst.output = dst_discard_out;
2052                         rt->dst.input = dst_discard;
2053                         break;
2054                 case RTN_PROHIBIT:
2055                         rt->dst.error = -EACCES;
2056                         rt->dst.output = ip6_pkt_prohibit_out;
2057                         rt->dst.input = ip6_pkt_prohibit;
2058                         break;
2059                 case RTN_THROW:
2060                 case RTN_UNREACHABLE:
2061                 default:
2062                         rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
2063                                         : (cfg->fc_type == RTN_UNREACHABLE)
2064                                         ? -EHOSTUNREACH : -ENETUNREACH;
2065                         rt->dst.output = ip6_pkt_discard_out;
2066                         rt->dst.input = ip6_pkt_discard;
2067                         break;
2068                 }
2069                 goto install_route;
2070         }
2071
2072         if (cfg->fc_flags & RTF_GATEWAY) {
2073                 const struct in6_addr *gw_addr;
2074                 int gwa_type;
2075
2076                 gw_addr = &cfg->fc_gateway;
2077                 gwa_type = ipv6_addr_type(gw_addr);
2078
2079                 /* if gw_addr is local we will fail to detect this in case
2080                  * address is still TENTATIVE (DAD in progress). rt6_lookup()
2081                  * will return already-added prefix route via interface that
2082                  * prefix route was assigned to, which might be non-loopback.
2083                  */
2084                 err = -EINVAL;
2085                 if (ipv6_chk_addr_and_flags(net, gw_addr,
2086                                             gwa_type & IPV6_ADDR_LINKLOCAL ?
2087                                             dev : NULL, 0, 0)) {
2088                         NL_SET_ERR_MSG(extack, "Invalid gateway address");
2089                         goto out;
2090                 }
2091                 rt->rt6i_gateway = *gw_addr;
2092
2093                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
2094                         struct rt6_info *grt = NULL;
2095
2096                         /* IPv6 strictly inhibits using not link-local
2097                            addresses as nexthop address.
2098                            Otherwise, router will not able to send redirects.
2099                            It is very good, but in some (rare!) circumstances
2100                            (SIT, PtP, NBMA NOARP links) it is handy to allow
2101                            some exceptions. --ANK
2102                            We allow IPv4-mapped nexthops to support RFC4798-type
2103                            addressing
2104                          */
2105                         if (!(gwa_type & (IPV6_ADDR_UNICAST |
2106                                           IPV6_ADDR_MAPPED))) {
2107                                 NL_SET_ERR_MSG(extack,
2108                                                "Invalid gateway address");
2109                                 goto out;
2110                         }
2111
2112                         if (cfg->fc_table) {
2113                                 grt = ip6_nh_lookup_table(net, cfg, gw_addr);
2114
2115                                 if (grt) {
2116                                         if (grt->rt6i_flags & RTF_GATEWAY ||
2117                                             (dev && dev != grt->dst.dev)) {
2118                                                 ip6_rt_put(grt);
2119                                                 grt = NULL;
2120                                         }
2121                                 }
2122                         }
2123
2124                         if (!grt)
2125                                 grt = rt6_lookup(net, gw_addr, NULL,
2126                                                  cfg->fc_ifindex, 1);
2127
2128                         err = -EHOSTUNREACH;
2129                         if (!grt)
2130                                 goto out;
2131                         if (dev) {
2132                                 if (dev != grt->dst.dev) {
2133                                         ip6_rt_put(grt);
2134                                         goto out;
2135                                 }
2136                         } else {
2137                                 dev = grt->dst.dev;
2138                                 idev = grt->rt6i_idev;
2139                                 dev_hold(dev);
2140                                 in6_dev_hold(grt->rt6i_idev);
2141                         }
2142                         if (!(grt->rt6i_flags & RTF_GATEWAY))
2143                                 err = 0;
2144                         ip6_rt_put(grt);
2145
2146                         if (err)
2147                                 goto out;
2148                 }
2149                 err = -EINVAL;
2150                 if (!dev) {
2151                         NL_SET_ERR_MSG(extack, "Egress device not specified");
2152                         goto out;
2153                 } else if (dev->flags & IFF_LOOPBACK) {
2154                         NL_SET_ERR_MSG(extack,
2155                                        "Egress device can not be loopback device for this route");
2156                         goto out;
2157                 }
2158         }
2159
2160         err = -ENODEV;
2161         if (!dev)
2162                 goto out;
2163
2164         if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
2165                 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
2166                         NL_SET_ERR_MSG(extack, "Invalid source address");
2167                         err = -EINVAL;
2168                         goto out;
2169                 }
2170                 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
2171                 rt->rt6i_prefsrc.plen = 128;
2172         } else
2173                 rt->rt6i_prefsrc.plen = 0;
2174
2175         rt->rt6i_flags = cfg->fc_flags;
2176
2177 install_route:
2178         rt->dst.dev = dev;
2179         rt->rt6i_idev = idev;
2180         rt->rt6i_table = table;
2181
2182         cfg->fc_nlinfo.nl_net = dev_net(dev);
2183
2184         return rt;
2185 out:
2186         if (dev)
2187                 dev_put(dev);
2188         if (idev)
2189                 in6_dev_put(idev);
2190         if (rt)
2191                 dst_release_immediate(&rt->dst);
2192
2193         return ERR_PTR(err);
2194 }
2195
2196 int ip6_route_add(struct fib6_config *cfg,
2197                   struct netlink_ext_ack *extack)
2198 {
2199         struct mx6_config mxc = { .mx = NULL, };
2200         struct rt6_info *rt;
2201         int err;
2202
2203         rt = ip6_route_info_create(cfg, extack);
2204         if (IS_ERR(rt)) {
2205                 err = PTR_ERR(rt);
2206                 rt = NULL;
2207                 goto out;
2208         }
2209
2210         err = ip6_convert_metrics(&mxc, cfg);
2211         if (err)
2212                 goto out;
2213
2214         err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc, extack);
2215
2216         kfree(mxc.mx);
2217
2218         return err;
2219 out:
2220         if (rt)
2221                 dst_release_immediate(&rt->dst);
2222
2223         return err;
2224 }
2225
2226 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
2227 {
2228         int err;
2229         struct fib6_table *table;
2230         struct net *net = dev_net(rt->dst.dev);
2231
2232         if (rt == net->ipv6.ip6_null_entry) {
2233                 err = -ENOENT;
2234                 goto out;
2235         }
2236
2237         table = rt->rt6i_table;
2238         write_lock_bh(&table->tb6_lock);
2239         err = fib6_del(rt, info);
2240         write_unlock_bh(&table->tb6_lock);
2241
2242 out:
2243         ip6_rt_put(rt);
2244         return err;
2245 }
2246
2247 int ip6_del_rt(struct rt6_info *rt)
2248 {
2249         struct nl_info info = {
2250                 .nl_net = dev_net(rt->dst.dev),
2251         };
2252         return __ip6_del_rt(rt, &info);
2253 }
2254
2255 static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
2256 {
2257         struct nl_info *info = &cfg->fc_nlinfo;
2258         struct net *net = info->nl_net;
2259         struct sk_buff *skb = NULL;
2260         struct fib6_table *table;
2261         int err = -ENOENT;
2262
2263         if (rt == net->ipv6.ip6_null_entry)
2264                 goto out_put;
2265         table = rt->rt6i_table;
2266         write_lock_bh(&table->tb6_lock);
2267
2268         if (rt->rt6i_nsiblings && cfg->fc_delete_all_nh) {
2269                 struct rt6_info *sibling, *next_sibling;
2270
2271                 /* prefer to send a single notification with all hops */
2272                 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
2273                 if (skb) {
2274                         u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2275
2276                         if (rt6_fill_node(net, skb, rt,
2277                                           NULL, NULL, 0, RTM_DELROUTE,
2278                                           info->portid, seq, 0) < 0) {
2279                                 kfree_skb(skb);
2280                                 skb = NULL;
2281                         } else
2282                                 info->skip_notify = 1;
2283                 }
2284
2285                 list_for_each_entry_safe(sibling, next_sibling,
2286                                          &rt->rt6i_siblings,
2287                                          rt6i_siblings) {
2288                         err = fib6_del(sibling, info);
2289                         if (err)
2290                                 goto out_unlock;
2291                 }
2292         }
2293
2294         err = fib6_del(rt, info);
2295 out_unlock:
2296         write_unlock_bh(&table->tb6_lock);
2297 out_put:
2298         ip6_rt_put(rt);
2299
2300         if (skb) {
2301                 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2302                             info->nlh, gfp_any());
2303         }
2304         return err;
2305 }
2306
2307 static int ip6_route_del(struct fib6_config *cfg,
2308                          struct netlink_ext_ack *extack)
2309 {
2310         struct fib6_table *table;
2311         struct fib6_node *fn;
2312         struct rt6_info *rt;
2313         int err = -ESRCH;
2314
2315         table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
2316         if (!table) {
2317                 NL_SET_ERR_MSG(extack, "FIB table does not exist");
2318                 return err;
2319         }
2320
2321         read_lock_bh(&table->tb6_lock);
2322
2323         fn = fib6_locate(&table->tb6_root,
2324                          &cfg->fc_dst, cfg->fc_dst_len,
2325                          &cfg->fc_src, cfg->fc_src_len);
2326
2327         if (fn) {
2328                 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2329                         if ((rt->rt6i_flags & RTF_CACHE) &&
2330                             !(cfg->fc_flags & RTF_CACHE))
2331                                 continue;
2332                         if (cfg->fc_ifindex &&
2333                             (!rt->dst.dev ||
2334                              rt->dst.dev->ifindex != cfg->fc_ifindex))
2335                                 continue;
2336                         if (cfg->fc_flags & RTF_GATEWAY &&
2337                             !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
2338                                 continue;
2339                         if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
2340                                 continue;
2341                         if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol)
2342                                 continue;
2343                         dst_hold(&rt->dst);
2344                         read_unlock_bh(&table->tb6_lock);
2345
2346                         /* if gateway was specified only delete the one hop */
2347                         if (cfg->fc_flags & RTF_GATEWAY)
2348                                 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
2349
2350                         return __ip6_del_rt_siblings(rt, cfg);
2351                 }
2352         }
2353         read_unlock_bh(&table->tb6_lock);
2354
2355         return err;
2356 }
2357
2358 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
2359 {
2360         struct netevent_redirect netevent;
2361         struct rt6_info *rt, *nrt = NULL;
2362         struct ndisc_options ndopts;
2363         struct inet6_dev *in6_dev;
2364         struct neighbour *neigh;
2365         struct rd_msg *msg;
2366         int optlen, on_link;
2367         u8 *lladdr;
2368
2369         optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
2370         optlen -= sizeof(*msg);
2371
2372         if (optlen < 0) {
2373                 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
2374                 return;
2375         }
2376
2377         msg = (struct rd_msg *)icmp6_hdr(skb);
2378
2379         if (ipv6_addr_is_multicast(&msg->dest)) {
2380                 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
2381                 return;
2382         }
2383
2384         on_link = 0;
2385         if (ipv6_addr_equal(&msg->dest, &msg->target)) {
2386                 on_link = 1;
2387         } else if (ipv6_addr_type(&msg->target) !=
2388                    (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
2389                 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
2390                 return;
2391         }
2392
2393         in6_dev = __in6_dev_get(skb->dev);
2394         if (!in6_dev)
2395                 return;
2396         if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
2397                 return;
2398
2399         /* RFC2461 8.1:
2400          *      The IP source address of the Redirect MUST be the same as the current
2401          *      first-hop router for the specified ICMP Destination Address.
2402          */
2403
2404         if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
2405                 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2406                 return;
2407         }
2408
2409         lladdr = NULL;
2410         if (ndopts.nd_opts_tgt_lladdr) {
2411                 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2412                                              skb->dev);
2413                 if (!lladdr) {
2414                         net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2415                         return;
2416                 }
2417         }
2418
2419         rt = (struct rt6_info *) dst;
2420         if (rt->rt6i_flags & RTF_REJECT) {
2421                 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
2422                 return;
2423         }
2424
2425         /* Redirect received -> path was valid.
2426          * Look, redirects are sent only in response to data packets,
2427          * so that this nexthop apparently is reachable. --ANK
2428          */
2429         dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
2430
2431         neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
2432         if (!neigh)
2433                 return;
2434
2435         /*
2436          *      We have finally decided to accept it.
2437          */
2438
2439         ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
2440                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
2441                      NEIGH_UPDATE_F_OVERRIDE|
2442                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
2443                                      NEIGH_UPDATE_F_ISROUTER)),
2444                      NDISC_REDIRECT, &ndopts);
2445
2446         nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
2447         if (!nrt)
2448                 goto out;
2449
2450         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
2451         if (on_link)
2452                 nrt->rt6i_flags &= ~RTF_GATEWAY;
2453
2454         nrt->rt6i_protocol = RTPROT_REDIRECT;
2455         nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
2456
2457         if (ip6_ins_rt(nrt))
2458                 goto out_release;
2459
2460         netevent.old = &rt->dst;
2461         netevent.new = &nrt->dst;
2462         netevent.daddr = &msg->dest;
2463         netevent.neigh = neigh;
2464         call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
2465
2466         if (rt->rt6i_flags & RTF_CACHE) {
2467                 rt = (struct rt6_info *) dst_clone(&rt->dst);
2468                 ip6_del_rt(rt);
2469         }
2470
2471 out_release:
2472         /* Release the reference taken in
2473          * ip6_rt_cache_alloc()
2474          */
2475         dst_release(&nrt->dst);
2476
2477 out:
2478         neigh_release(neigh);
2479 }
2480
2481 /*
2482  *      Misc support functions
2483  */
2484
2485 static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
2486 {
2487         BUG_ON(from->dst.from);
2488
2489         rt->rt6i_flags &= ~RTF_EXPIRES;
2490         dst_hold(&from->dst);
2491         rt->dst.from = &from->dst;
2492         dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
2493 }
2494
2495 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
2496 {
2497         rt->dst.input = ort->dst.input;
2498         rt->dst.output = ort->dst.output;
2499         rt->rt6i_dst = ort->rt6i_dst;
2500         rt->dst.error = ort->dst.error;
2501         rt->rt6i_idev = ort->rt6i_idev;
2502         if (rt->rt6i_idev)
2503                 in6_dev_hold(rt->rt6i_idev);
2504         rt->dst.lastuse = jiffies;
2505         rt->rt6i_gateway = ort->rt6i_gateway;
2506         rt->rt6i_flags = ort->rt6i_flags;
2507         rt6_set_from(rt, ort);
2508         rt->rt6i_metric = ort->rt6i_metric;
2509 #ifdef CONFIG_IPV6_SUBTREES
2510         rt->rt6i_src = ort->rt6i_src;
2511 #endif
2512         rt->rt6i_prefsrc = ort->rt6i_prefsrc;
2513         rt->rt6i_table = ort->rt6i_table;
2514         rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
2515 }
2516
2517 #ifdef CONFIG_IPV6_ROUTE_INFO
2518 static struct rt6_info *rt6_get_route_info(struct net *net,
2519                                            const struct in6_addr *prefix, int prefixlen,
2520                                            const struct in6_addr *gwaddr,
2521                                            struct net_device *dev)
2522 {
2523         u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
2524         int ifindex = dev->ifindex;
2525         struct fib6_node *fn;
2526         struct rt6_info *rt = NULL;
2527         struct fib6_table *table;
2528
2529         table = fib6_get_table(net, tb_id);
2530         if (!table)
2531                 return NULL;
2532
2533         read_lock_bh(&table->tb6_lock);
2534         fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
2535         if (!fn)
2536                 goto out;
2537
2538         for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2539                 if (rt->dst.dev->ifindex != ifindex)
2540                         continue;
2541                 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2542                         continue;
2543                 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
2544                         continue;
2545                 dst_hold(&rt->dst);
2546                 break;
2547         }
2548 out:
2549         read_unlock_bh(&table->tb6_lock);
2550         return rt;
2551 }
2552
2553 static struct rt6_info *rt6_add_route_info(struct net *net,
2554                                            const struct in6_addr *prefix, int prefixlen,
2555                                            const struct in6_addr *gwaddr,
2556                                            struct net_device *dev,
2557                                            unsigned int pref)
2558 {
2559         struct fib6_config cfg = {
2560                 .fc_metric      = IP6_RT_PRIO_USER,
2561                 .fc_ifindex     = dev->ifindex,
2562                 .fc_dst_len     = prefixlen,
2563                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2564                                   RTF_UP | RTF_PREF(pref),
2565                 .fc_protocol = RTPROT_RA,
2566                 .fc_nlinfo.portid = 0,
2567                 .fc_nlinfo.nlh = NULL,
2568                 .fc_nlinfo.nl_net = net,
2569         };
2570
2571         cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
2572         cfg.fc_dst = *prefix;
2573         cfg.fc_gateway = *gwaddr;
2574
2575         /* We should treat it as a default route if prefix length is 0. */
2576         if (!prefixlen)
2577                 cfg.fc_flags |= RTF_DEFAULT;
2578
2579         ip6_route_add(&cfg, NULL);
2580
2581         return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
2582 }
2583 #endif
2584
2585 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
2586 {
2587         u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
2588         struct rt6_info *rt;
2589         struct fib6_table *table;
2590
2591         table = fib6_get_table(dev_net(dev), tb_id);
2592         if (!table)
2593                 return NULL;
2594
2595         read_lock_bh(&table->tb6_lock);
2596         for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2597                 if (dev == rt->dst.dev &&
2598                     ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
2599                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
2600                         break;
2601         }
2602         if (rt)
2603                 dst_hold(&rt->dst);
2604         read_unlock_bh(&table->tb6_lock);
2605         return rt;
2606 }
2607
2608 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
2609                                      struct net_device *dev,
2610                                      unsigned int pref)
2611 {
2612         struct fib6_config cfg = {
2613                 .fc_table       = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
2614                 .fc_metric      = IP6_RT_PRIO_USER,
2615                 .fc_ifindex     = dev->ifindex,
2616                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2617                                   RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
2618                 .fc_protocol = RTPROT_RA,
2619                 .fc_nlinfo.portid = 0,
2620                 .fc_nlinfo.nlh = NULL,
2621                 .fc_nlinfo.nl_net = dev_net(dev),
2622         };
2623
2624         cfg.fc_gateway = *gwaddr;
2625
2626         if (!ip6_route_add(&cfg, NULL)) {
2627                 struct fib6_table *table;
2628
2629                 table = fib6_get_table(dev_net(dev), cfg.fc_table);
2630                 if (table)
2631                         table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
2632         }
2633
2634         return rt6_get_dflt_router(gwaddr, dev);
2635 }
2636
2637 static void __rt6_purge_dflt_routers(struct fib6_table *table)
2638 {
2639         struct rt6_info *rt;
2640
2641 restart:
2642         read_lock_bh(&table->tb6_lock);
2643         for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2644                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2645                     (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
2646                         dst_hold(&rt->dst);
2647                         read_unlock_bh(&table->tb6_lock);
2648                         ip6_del_rt(rt);
2649                         goto restart;
2650                 }
2651         }
2652         read_unlock_bh(&table->tb6_lock);
2653
2654         table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
2655 }
2656
2657 void rt6_purge_dflt_routers(struct net *net)
2658 {
2659         struct fib6_table *table;
2660         struct hlist_head *head;
2661         unsigned int h;
2662
2663         rcu_read_lock();
2664
2665         for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
2666                 head = &net->ipv6.fib_table_hash[h];
2667                 hlist_for_each_entry_rcu(table, head, tb6_hlist) {
2668                         if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
2669                                 __rt6_purge_dflt_routers(table);
2670                 }
2671         }
2672
2673         rcu_read_unlock();
2674 }
2675
2676 static void rtmsg_to_fib6_config(struct net *net,
2677                                  struct in6_rtmsg *rtmsg,
2678                                  struct fib6_config *cfg)
2679 {
2680         memset(cfg, 0, sizeof(*cfg));
2681
2682         cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
2683                          : RT6_TABLE_MAIN;
2684         cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2685         cfg->fc_metric = rtmsg->rtmsg_metric;
2686         cfg->fc_expires = rtmsg->rtmsg_info;
2687         cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2688         cfg->fc_src_len = rtmsg->rtmsg_src_len;
2689         cfg->fc_flags = rtmsg->rtmsg_flags;
2690
2691         cfg->fc_nlinfo.nl_net = net;
2692
2693         cfg->fc_dst = rtmsg->rtmsg_dst;
2694         cfg->fc_src = rtmsg->rtmsg_src;
2695         cfg->fc_gateway = rtmsg->rtmsg_gateway;
2696 }
2697
2698 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2699 {
2700         struct fib6_config cfg;
2701         struct in6_rtmsg rtmsg;
2702         int err;
2703
2704         switch (cmd) {
2705         case SIOCADDRT:         /* Add a route */
2706         case SIOCDELRT:         /* Delete a route */
2707                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2708                         return -EPERM;
2709                 err = copy_from_user(&rtmsg, arg,
2710                                      sizeof(struct in6_rtmsg));
2711                 if (err)
2712                         return -EFAULT;
2713
2714                 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2715
2716                 rtnl_lock();
2717                 switch (cmd) {
2718                 case SIOCADDRT:
2719                         err = ip6_route_add(&cfg, NULL);
2720                         break;
2721                 case SIOCDELRT:
2722                         err = ip6_route_del(&cfg, NULL);
2723                         break;
2724                 default:
2725                         err = -EINVAL;
2726                 }
2727                 rtnl_unlock();
2728
2729                 return err;
2730         }
2731
2732         return -EINVAL;
2733 }
2734
2735 /*
2736  *      Drop the packet on the floor
2737  */
2738
2739 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2740 {
2741         int type;
2742         struct dst_entry *dst = skb_dst(skb);
2743         switch (ipstats_mib_noroutes) {
2744         case IPSTATS_MIB_INNOROUTES:
2745                 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2746                 if (type == IPV6_ADDR_ANY) {
2747                         IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2748                                       IPSTATS_MIB_INADDRERRORS);
2749                         break;
2750                 }
2751                 /* FALLTHROUGH */
2752         case IPSTATS_MIB_OUTNOROUTES:
2753                 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2754                               ipstats_mib_noroutes);
2755                 break;
2756         }
2757         icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2758         kfree_skb(skb);
2759         return 0;
2760 }
2761
2762 static int ip6_pkt_discard(struct sk_buff *skb)
2763 {
2764         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2765 }
2766
2767 static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2768 {
2769         skb->dev = skb_dst(skb)->dev;
2770         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2771 }
2772
2773 static int ip6_pkt_prohibit(struct sk_buff *skb)
2774 {
2775         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2776 }
2777
2778 static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2779 {
2780         skb->dev = skb_dst(skb)->dev;
2781         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2782 }
2783
2784 /*
2785  *      Allocate a dst for local (unicast / anycast) address.
2786  */
2787
2788 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2789                                     const struct in6_addr *addr,
2790                                     bool anycast)
2791 {
2792         u32 tb_id;
2793         struct net *net = dev_net(idev->dev);
2794         struct net_device *dev = idev->dev;
2795         struct rt6_info *rt;
2796
2797         rt = ip6_dst_alloc(net, dev, DST_NOCOUNT);
2798         if (!rt)
2799                 return ERR_PTR(-ENOMEM);
2800
2801         in6_dev_hold(idev);
2802
2803         rt->dst.flags |= DST_HOST;
2804         rt->dst.input = ip6_input;
2805         rt->dst.output = ip6_output;
2806         rt->rt6i_idev = idev;
2807
2808         rt->rt6i_protocol = RTPROT_KERNEL;
2809         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2810         if (anycast)
2811                 rt->rt6i_flags |= RTF_ANYCAST;
2812         else
2813                 rt->rt6i_flags |= RTF_LOCAL;
2814
2815         rt->rt6i_gateway  = *addr;
2816         rt->rt6i_dst.addr = *addr;
2817         rt->rt6i_dst.plen = 128;
2818         tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
2819         rt->rt6i_table = fib6_get_table(net, tb_id);
2820
2821         return rt;
2822 }
2823
2824 /* remove deleted ip from prefsrc entries */
2825 struct arg_dev_net_ip {
2826         struct net_device *dev;
2827         struct net *net;
2828         struct in6_addr *addr;
2829 };
2830
2831 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2832 {
2833         struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2834         struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2835         struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2836
2837         if (((void *)rt->dst.dev == dev || !dev) &&
2838             rt != net->ipv6.ip6_null_entry &&
2839             ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2840                 /* remove prefsrc entry */
2841                 rt->rt6i_prefsrc.plen = 0;
2842         }
2843         return 0;
2844 }
2845
2846 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2847 {
2848         struct net *net = dev_net(ifp->idev->dev);
2849         struct arg_dev_net_ip adni = {
2850                 .dev = ifp->idev->dev,
2851                 .net = net,
2852                 .addr = &ifp->addr,
2853         };
2854         fib6_clean_all(net, fib6_remove_prefsrc, &adni);
2855 }
2856
2857 #define RTF_RA_ROUTER           (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2858 #define RTF_CACHE_GATEWAY       (RTF_GATEWAY | RTF_CACHE)
2859
2860 /* Remove routers and update dst entries when gateway turn into host. */
2861 static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2862 {
2863         struct in6_addr *gateway = (struct in6_addr *)arg;
2864
2865         if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2866              ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2867              ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2868                 return -1;
2869         }
2870         return 0;
2871 }
2872
2873 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2874 {
2875         fib6_clean_all(net, fib6_clean_tohost, gateway);
2876 }
2877
2878 struct arg_dev_net {
2879         struct net_device *dev;
2880         struct net *net;
2881 };
2882
2883 /* called with write lock held for table with rt */
2884 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2885 {
2886         const struct arg_dev_net *adn = arg;
2887         const struct net_device *dev = adn->dev;
2888
2889         if ((rt->dst.dev == dev || !dev) &&
2890             rt != adn->net->ipv6.ip6_null_entry &&
2891             (rt->rt6i_nsiblings == 0 ||
2892              (dev && netdev_unregistering(dev)) ||
2893              !rt->rt6i_idev->cnf.ignore_routes_with_linkdown))
2894                 return -1;
2895
2896         return 0;
2897 }
2898
2899 void rt6_ifdown(struct net *net, struct net_device *dev)
2900 {
2901         struct arg_dev_net adn = {
2902                 .dev = dev,
2903                 .net = net,
2904         };
2905
2906         fib6_clean_all(net, fib6_ifdown, &adn);
2907         if (dev)
2908                 rt6_uncached_list_flush_dev(net, dev);
2909 }
2910
2911 struct rt6_mtu_change_arg {
2912         struct net_device *dev;
2913         unsigned int mtu;
2914 };
2915
2916 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2917 {
2918         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2919         struct inet6_dev *idev;
2920
2921         /* In IPv6 pmtu discovery is not optional,
2922            so that RTAX_MTU lock cannot disable it.
2923            We still use this lock to block changes
2924            caused by addrconf/ndisc.
2925         */
2926
2927         idev = __in6_dev_get(arg->dev);
2928         if (!idev)
2929                 return 0;
2930
2931         /* For administrative MTU increase, there is no way to discover
2932            IPv6 PMTU increase, so PMTU increase should be updated here.
2933            Since RFC 1981 doesn't include administrative MTU increase
2934            update PMTU increase is a MUST. (i.e. jumbo frame)
2935          */
2936         /*
2937            If new MTU is less than route PMTU, this new MTU will be the
2938            lowest MTU in the path, update the route PMTU to reflect PMTU
2939            decreases; if new MTU is greater than route PMTU, and the
2940            old MTU is the lowest MTU in the path, update the route PMTU
2941            to reflect the increase. In this case if the other nodes' MTU
2942            also have the lowest MTU, TOO BIG MESSAGE will be lead to
2943            PMTU discovery.
2944          */
2945         if (rt->dst.dev == arg->dev &&
2946             dst_metric_raw(&rt->dst, RTAX_MTU) &&
2947             !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2948                 if (rt->rt6i_flags & RTF_CACHE) {
2949                         /* For RTF_CACHE with rt6i_pmtu == 0
2950                          * (i.e. a redirected route),
2951                          * the metrics of its rt->dst.from has already
2952                          * been updated.
2953                          */
2954                         if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2955                                 rt->rt6i_pmtu = arg->mtu;
2956                 } else if (dst_mtu(&rt->dst) >= arg->mtu ||
2957                            (dst_mtu(&rt->dst) < arg->mtu &&
2958                             dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2959                         dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2960                 }
2961         }
2962         return 0;
2963 }
2964
2965 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2966 {
2967         struct rt6_mtu_change_arg arg = {
2968                 .dev = dev,
2969                 .mtu = mtu,
2970         };
2971
2972         fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
2973 }
2974
2975 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2976         [RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2977         [RTA_PREFSRC]           = { .len = sizeof(struct in6_addr) },
2978         [RTA_OIF]               = { .type = NLA_U32 },
2979         [RTA_IIF]               = { .type = NLA_U32 },
2980         [RTA_PRIORITY]          = { .type = NLA_U32 },
2981         [RTA_METRICS]           = { .type = NLA_NESTED },
2982         [RTA_MULTIPATH]         = { .len = sizeof(struct rtnexthop) },
2983         [RTA_PREF]              = { .type = NLA_U8 },
2984         [RTA_ENCAP_TYPE]        = { .type = NLA_U16 },
2985         [RTA_ENCAP]             = { .type = NLA_NESTED },
2986         [RTA_EXPIRES]           = { .type = NLA_U32 },
2987         [RTA_UID]               = { .type = NLA_U32 },
2988         [RTA_MARK]              = { .type = NLA_U32 },
2989         [RTA_TABLE]             = { .type = NLA_U32 },
2990 };
2991
2992 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2993                               struct fib6_config *cfg,
2994                               struct netlink_ext_ack *extack)
2995 {
2996         struct rtmsg *rtm;
2997         struct nlattr *tb[RTA_MAX+1];
2998         unsigned int pref;
2999         int err;
3000
3001         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
3002                           NULL);
3003         if (err < 0)
3004                 goto errout;
3005
3006         err = -EINVAL;
3007         rtm = nlmsg_data(nlh);
3008         memset(cfg, 0, sizeof(*cfg));
3009
3010         cfg->fc_table = rtm->rtm_table;
3011         cfg->fc_dst_len = rtm->rtm_dst_len;
3012         cfg->fc_src_len = rtm->rtm_src_len;
3013         cfg->fc_flags = RTF_UP;
3014         cfg->fc_protocol = rtm->rtm_protocol;
3015         cfg->fc_type = rtm->rtm_type;
3016
3017         if (rtm->rtm_type == RTN_UNREACHABLE ||
3018             rtm->rtm_type == RTN_BLACKHOLE ||
3019             rtm->rtm_type == RTN_PROHIBIT ||
3020             rtm->rtm_type == RTN_THROW)
3021                 cfg->fc_flags |= RTF_REJECT;
3022
3023         if (rtm->rtm_type == RTN_LOCAL)
3024                 cfg->fc_flags |= RTF_LOCAL;
3025
3026         if (rtm->rtm_flags & RTM_F_CLONED)
3027                 cfg->fc_flags |= RTF_CACHE;
3028
3029         cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
3030         cfg->fc_nlinfo.nlh = nlh;
3031         cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
3032
3033         if (tb[RTA_GATEWAY]) {
3034                 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
3035                 cfg->fc_flags |= RTF_GATEWAY;
3036         }
3037         if (tb[RTA_VIA]) {
3038                 NL_SET_ERR_MSG(extack, "IPv6 does not support RTA_VIA attribute");
3039                 goto errout;
3040         }
3041
3042         if (tb[RTA_DST]) {
3043                 int plen = (rtm->rtm_dst_len + 7) >> 3;
3044
3045                 if (nla_len(tb[RTA_DST]) < plen)
3046                         goto errout;
3047
3048                 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
3049         }
3050
3051         if (tb[RTA_SRC]) {
3052                 int plen = (rtm->rtm_src_len + 7) >> 3;
3053
3054                 if (nla_len(tb[RTA_SRC]) < plen)
3055                         goto errout;
3056
3057                 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
3058         }
3059
3060         if (tb[RTA_PREFSRC])
3061                 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
3062
3063         if (tb[RTA_OIF])
3064                 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
3065
3066         if (tb[RTA_PRIORITY])
3067                 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
3068
3069         if (tb[RTA_METRICS]) {
3070                 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
3071                 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
3072         }
3073
3074         if (tb[RTA_TABLE])
3075                 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
3076
3077         if (tb[RTA_MULTIPATH]) {
3078                 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
3079                 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
3080
3081                 err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
3082                                                      cfg->fc_mp_len, extack);
3083                 if (err < 0)
3084                         goto errout;
3085         }
3086
3087         if (tb[RTA_PREF]) {
3088                 pref = nla_get_u8(tb[RTA_PREF]);
3089                 if (pref != ICMPV6_ROUTER_PREF_LOW &&
3090                     pref != ICMPV6_ROUTER_PREF_HIGH)
3091                         pref = ICMPV6_ROUTER_PREF_MEDIUM;
3092                 cfg->fc_flags |= RTF_PREF(pref);
3093         }
3094
3095         if (tb[RTA_ENCAP])
3096                 cfg->fc_encap = tb[RTA_ENCAP];
3097
3098         if (tb[RTA_ENCAP_TYPE]) {
3099                 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
3100
3101                 err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
3102                 if (err < 0)
3103                         goto errout;
3104         }
3105
3106         if (tb[RTA_EXPIRES]) {
3107                 unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
3108
3109                 if (addrconf_finite_timeout(timeout)) {
3110                         cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
3111                         cfg->fc_flags |= RTF_EXPIRES;
3112                 }
3113         }
3114
3115         err = 0;
3116 errout:
3117         return err;
3118 }
3119
3120 struct rt6_nh {
3121         struct rt6_info *rt6_info;
3122         struct fib6_config r_cfg;
3123         struct mx6_config mxc;
3124         struct list_head next;
3125 };
3126
3127 static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
3128 {
3129         struct rt6_nh *nh;
3130
3131         list_for_each_entry(nh, rt6_nh_list, next) {
3132                 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6c nexthop %pI6c ifi %d\n",
3133                         &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
3134                         nh->r_cfg.fc_ifindex);
3135         }
3136 }
3137
3138 static int ip6_route_info_append(struct list_head *rt6_nh_list,
3139                                  struct rt6_info *rt, struct fib6_config *r_cfg)
3140 {
3141         struct rt6_nh *nh;
3142         int err = -EEXIST;
3143
3144         list_for_each_entry(nh, rt6_nh_list, next) {
3145                 /* check if rt6_info already exists */
3146                 if (rt6_duplicate_nexthop(nh->rt6_info, rt))
3147                         return err;
3148         }
3149
3150         nh = kzalloc(sizeof(*nh), GFP_KERNEL);
3151         if (!nh)
3152                 return -ENOMEM;
3153         nh->rt6_info = rt;
3154         err = ip6_convert_metrics(&nh->mxc, r_cfg);
3155         if (err) {
3156                 kfree(nh);
3157                 return err;
3158         }
3159         memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
3160         list_add_tail(&nh->next, rt6_nh_list);
3161
3162         return 0;
3163 }
3164
3165 static void ip6_route_mpath_notify(struct rt6_info *rt,
3166                                    struct rt6_info *rt_last,
3167                                    struct nl_info *info,
3168                                    __u16 nlflags)
3169 {
3170         /* if this is an APPEND route, then rt points to the first route
3171          * inserted and rt_last points to last route inserted. Userspace
3172          * wants a consistent dump of the route which starts at the first
3173          * nexthop. Since sibling routes are always added at the end of
3174          * the list, find the first sibling of the last route appended
3175          */
3176         if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->rt6i_nsiblings) {
3177                 rt = list_first_entry(&rt_last->rt6i_siblings,
3178                                       struct rt6_info,
3179                                       rt6i_siblings);
3180         }
3181
3182         if (rt)
3183                 inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
3184 }
3185
3186 static int fib6_gw_from_attr(struct in6_addr *gw, struct nlattr *nla,
3187                              struct netlink_ext_ack *extack)
3188 {
3189         if (nla_len(nla) < sizeof(*gw)) {
3190                 NL_SET_ERR_MSG(extack, "Invalid IPv6 address in RTA_GATEWAY");
3191                 return -EINVAL;
3192         }
3193
3194         *gw = nla_get_in6_addr(nla);
3195
3196         return 0;
3197 }
3198
3199 static int ip6_route_multipath_add(struct fib6_config *cfg,
3200                                    struct netlink_ext_ack *extack)
3201 {
3202         struct rt6_info *rt_notif = NULL, *rt_last = NULL;
3203         struct nl_info *info = &cfg->fc_nlinfo;
3204         struct fib6_config r_cfg;
3205         struct rtnexthop *rtnh;
3206         struct rt6_info *rt;
3207         struct rt6_nh *err_nh;
3208         struct rt6_nh *nh, *nh_safe;
3209         __u16 nlflags;
3210         int remaining;
3211         int attrlen;
3212         int err = 1;
3213         int nhn = 0;
3214         int replace = (cfg->fc_nlinfo.nlh &&
3215                        (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
3216         LIST_HEAD(rt6_nh_list);
3217
3218         nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
3219         if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
3220                 nlflags |= NLM_F_APPEND;
3221
3222         remaining = cfg->fc_mp_len;
3223         rtnh = (struct rtnexthop *)cfg->fc_mp;
3224
3225         /* Parse a Multipath Entry and build a list (rt6_nh_list) of
3226          * rt6_info structs per nexthop
3227          */
3228         while (rtnh_ok(rtnh, remaining)) {
3229                 memcpy(&r_cfg, cfg, sizeof(*cfg));
3230                 if (rtnh->rtnh_ifindex)
3231                         r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
3232
3233                 attrlen = rtnh_attrlen(rtnh);
3234                 if (attrlen > 0) {
3235                         struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3236
3237                         nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3238                         if (nla) {
3239                                 err = fib6_gw_from_attr(&r_cfg.fc_gateway, nla,
3240                                                         extack);
3241                                 if (err)
3242                                         goto cleanup;
3243
3244                                 r_cfg.fc_flags |= RTF_GATEWAY;
3245                         }
3246                         r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
3247                         nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
3248                         if (nla)
3249                                 r_cfg.fc_encap_type = nla_get_u16(nla);
3250                 }
3251
3252                 rt = ip6_route_info_create(&r_cfg, extack);
3253                 if (IS_ERR(rt)) {
3254                         err = PTR_ERR(rt);
3255                         rt = NULL;
3256                         goto cleanup;
3257                 }
3258
3259                 err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
3260                 if (err) {
3261                         dst_release_immediate(&rt->dst);
3262                         goto cleanup;
3263                 }
3264
3265                 rtnh = rtnh_next(rtnh, &remaining);
3266         }
3267
3268         /* for add and replace send one notification with all nexthops.
3269          * Skip the notification in fib6_add_rt2node and send one with
3270          * the full route when done
3271          */
3272         info->skip_notify = 1;
3273
3274         err_nh = NULL;
3275         list_for_each_entry(nh, &rt6_nh_list, next) {
3276                 err = __ip6_ins_rt(nh->rt6_info, info, &nh->mxc, extack);
3277
3278                 if (!err) {
3279                         /* save reference to last route successfully inserted */
3280                         rt_last = nh->rt6_info;
3281
3282                         /* save reference to first route for notification */
3283                         if (!rt_notif)
3284                                 rt_notif = nh->rt6_info;
3285                 }
3286
3287                 /* nh->rt6_info is used or freed at this point, reset to NULL*/
3288                 nh->rt6_info = NULL;
3289                 if (err) {
3290                         if (replace && nhn)
3291                                 ip6_print_replace_route_err(&rt6_nh_list);
3292                         err_nh = nh;
3293                         goto add_errout;
3294                 }
3295
3296                 /* Because each route is added like a single route we remove
3297                  * these flags after the first nexthop: if there is a collision,
3298                  * we have already failed to add the first nexthop:
3299                  * fib6_add_rt2node() has rejected it; when replacing, old
3300                  * nexthops have been replaced by first new, the rest should
3301                  * be added to it.
3302                  */
3303                 if (cfg->fc_nlinfo.nlh) {
3304                         cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
3305                                                              NLM_F_REPLACE);
3306                         cfg->fc_nlinfo.nlh->nlmsg_flags |= NLM_F_CREATE;
3307                 }
3308                 nhn++;
3309         }
3310
3311         /* success ... tell user about new route */
3312         ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
3313         goto cleanup;
3314
3315 add_errout:
3316         /* send notification for routes that were added so that
3317          * the delete notifications sent by ip6_route_del are
3318          * coherent
3319          */
3320         if (rt_notif)
3321                 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
3322
3323         /* Delete routes that were already added */
3324         list_for_each_entry(nh, &rt6_nh_list, next) {
3325                 if (err_nh == nh)
3326                         break;
3327                 ip6_route_del(&nh->r_cfg, extack);
3328         }
3329
3330 cleanup:
3331         list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
3332                 if (nh->rt6_info)
3333                         dst_release_immediate(&nh->rt6_info->dst);
3334                 kfree(nh->mxc.mx);
3335                 list_del(&nh->next);
3336                 kfree(nh);
3337         }
3338
3339         return err;
3340 }
3341
3342 static int ip6_route_multipath_del(struct fib6_config *cfg,
3343                                    struct netlink_ext_ack *extack)
3344 {
3345         struct fib6_config r_cfg;
3346         struct rtnexthop *rtnh;
3347         int remaining;
3348         int attrlen;
3349         int err = 1, last_err = 0;
3350
3351         remaining = cfg->fc_mp_len;
3352         rtnh = (struct rtnexthop *)cfg->fc_mp;
3353
3354         /* Parse a Multipath Entry */
3355         while (rtnh_ok(rtnh, remaining)) {
3356                 memcpy(&r_cfg, cfg, sizeof(*cfg));
3357                 if (rtnh->rtnh_ifindex)
3358                         r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
3359
3360                 attrlen = rtnh_attrlen(rtnh);
3361                 if (attrlen > 0) {
3362                         struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3363
3364                         nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3365                         if (nla) {
3366                                 err = fib6_gw_from_attr(&r_cfg.fc_gateway, nla,
3367                                                         extack);
3368                                 if (err) {
3369                                         last_err = err;
3370                                         goto next_rtnh;
3371                                 }
3372
3373                                 r_cfg.fc_flags |= RTF_GATEWAY;
3374                         }
3375                 }
3376                 err = ip6_route_del(&r_cfg, extack);
3377                 if (err)
3378                         last_err = err;
3379
3380 next_rtnh:
3381                 rtnh = rtnh_next(rtnh, &remaining);
3382         }
3383
3384         return last_err;
3385 }
3386
3387 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
3388                               struct netlink_ext_ack *extack)
3389 {
3390         struct fib6_config cfg;
3391         int err;
3392
3393         err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
3394         if (err < 0)
3395                 return err;
3396
3397         if (cfg.fc_mp)
3398                 return ip6_route_multipath_del(&cfg, extack);
3399         else {
3400                 cfg.fc_delete_all_nh = 1;
3401                 return ip6_route_del(&cfg, extack);
3402         }
3403 }
3404
3405 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
3406                               struct netlink_ext_ack *extack)
3407 {
3408         struct fib6_config cfg;
3409         int err;
3410
3411         err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
3412         if (err < 0)
3413                 return err;
3414
3415         if (cfg.fc_mp)
3416                 return ip6_route_multipath_add(&cfg, extack);
3417         else
3418                 return ip6_route_add(&cfg, extack);
3419 }
3420
3421 static size_t rt6_nlmsg_size(struct rt6_info *rt)
3422 {
3423         int nexthop_len = 0;
3424
3425         if (rt->rt6i_nsiblings) {
3426                 nexthop_len = nla_total_size(0)  /* RTA_MULTIPATH */
3427                             + NLA_ALIGN(sizeof(struct rtnexthop))
3428                             + nla_total_size(16) /* RTA_GATEWAY */
3429                             + lwtunnel_get_encap_size(rt->dst.lwtstate);
3430
3431                 nexthop_len *= rt->rt6i_nsiblings;
3432         }
3433
3434         return NLMSG_ALIGN(sizeof(struct rtmsg))
3435                + nla_total_size(16) /* RTA_SRC */
3436                + nla_total_size(16) /* RTA_DST */
3437                + nla_total_size(16) /* RTA_GATEWAY */
3438                + nla_total_size(16) /* RTA_PREFSRC */
3439                + nla_total_size(4) /* RTA_TABLE */
3440                + nla_total_size(4) /* RTA_IIF */
3441                + nla_total_size(4) /* RTA_OIF */
3442                + nla_total_size(4) /* RTA_PRIORITY */
3443                + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
3444                + nla_total_size(sizeof(struct rta_cacheinfo))
3445                + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
3446                + nla_total_size(1) /* RTA_PREF */
3447                + lwtunnel_get_encap_size(rt->dst.lwtstate)
3448                + nexthop_len;
3449 }
3450
3451 static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
3452                             unsigned int *flags, bool skip_oif)
3453 {
3454         if (!netif_running(rt->dst.dev) || !netif_carrier_ok(rt->dst.dev)) {
3455                 *flags |= RTNH_F_LINKDOWN;
3456                 if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
3457                         *flags |= RTNH_F_DEAD;
3458         }
3459
3460         if (rt->rt6i_flags & RTF_GATEWAY) {
3461                 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
3462                         goto nla_put_failure;
3463         }
3464
3465         if (rt->rt6i_nh_flags & RTNH_F_OFFLOAD)
3466                 *flags |= RTNH_F_OFFLOAD;
3467
3468         /* not needed for multipath encoding b/c it has a rtnexthop struct */
3469         if (!skip_oif && rt->dst.dev &&
3470             nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
3471                 goto nla_put_failure;
3472
3473         if (rt->dst.lwtstate &&
3474             lwtunnel_fill_encap(skb, rt->dst.lwtstate) < 0)
3475                 goto nla_put_failure;
3476
3477         return 0;
3478
3479 nla_put_failure:
3480         return -EMSGSIZE;
3481 }
3482
3483 /* add multipath next hop */
3484 static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt)
3485 {
3486         struct rtnexthop *rtnh;
3487         unsigned int flags = 0;
3488
3489         rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
3490         if (!rtnh)
3491                 goto nla_put_failure;
3492
3493         rtnh->rtnh_hops = 0;
3494         rtnh->rtnh_ifindex = rt->dst.dev ? rt->dst.dev->ifindex : 0;
3495
3496         if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
3497                 goto nla_put_failure;
3498
3499         rtnh->rtnh_flags = flags;
3500
3501         /* length of rtnetlink header + attributes */
3502         rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
3503
3504         return 0;
3505
3506 nla_put_failure:
3507         return -EMSGSIZE;
3508 }
3509
3510 static int rt6_fill_node(struct net *net,
3511                          struct sk_buff *skb, struct rt6_info *rt,
3512                          struct in6_addr *dst, struct in6_addr *src,
3513                          int iif, int type, u32 portid, u32 seq,
3514                          unsigned int flags)
3515 {
3516         u32 metrics[RTAX_MAX];
3517         struct rtmsg *rtm;
3518         struct nlmsghdr *nlh;
3519         long expires;
3520         u32 table;
3521
3522         nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
3523         if (!nlh)
3524                 return -EMSGSIZE;
3525
3526         rtm = nlmsg_data(nlh);
3527         rtm->rtm_family = AF_INET6;
3528         rtm->rtm_dst_len = rt->rt6i_dst.plen;
3529         rtm->rtm_src_len = rt->rt6i_src.plen;
3530         rtm->rtm_tos = 0;
3531         if (rt->rt6i_table)
3532                 table = rt->rt6i_table->tb6_id;
3533         else
3534                 table = RT6_TABLE_UNSPEC;
3535         rtm->rtm_table = table < 256 ? table : RT_TABLE_COMPAT;
3536         if (nla_put_u32(skb, RTA_TABLE, table))
3537                 goto nla_put_failure;
3538         if (rt->rt6i_flags & RTF_REJECT) {
3539                 switch (rt->dst.error) {
3540                 case -EINVAL:
3541                         rtm->rtm_type = RTN_BLACKHOLE;
3542                         break;
3543                 case -EACCES:
3544                         rtm->rtm_type = RTN_PROHIBIT;
3545                         break;
3546                 case -EAGAIN:
3547                         rtm->rtm_type = RTN_THROW;
3548                         break;
3549                 default:
3550                         rtm->rtm_type = RTN_UNREACHABLE;
3551                         break;
3552                 }
3553         }
3554         else if (rt->rt6i_flags & RTF_LOCAL)
3555                 rtm->rtm_type = RTN_LOCAL;
3556         else if (rt->rt6i_flags & RTF_ANYCAST)
3557                 rtm->rtm_type = RTN_ANYCAST;
3558         else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
3559                 rtm->rtm_type = RTN_LOCAL;
3560         else
3561                 rtm->rtm_type = RTN_UNICAST;
3562         rtm->rtm_flags = 0;
3563         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
3564         rtm->rtm_protocol = rt->rt6i_protocol;
3565
3566         if (rt->rt6i_flags & RTF_CACHE)
3567                 rtm->rtm_flags |= RTM_F_CLONED;
3568
3569         if (dst) {
3570                 if (nla_put_in6_addr(skb, RTA_DST, dst))
3571                         goto nla_put_failure;
3572                 rtm->rtm_dst_len = 128;
3573         } else if (rtm->rtm_dst_len)
3574                 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
3575                         goto nla_put_failure;
3576 #ifdef CONFIG_IPV6_SUBTREES
3577         if (src) {
3578                 if (nla_put_in6_addr(skb, RTA_SRC, src))
3579                         goto nla_put_failure;
3580                 rtm->rtm_src_len = 128;
3581         } else if (rtm->rtm_src_len &&
3582                    nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
3583                 goto nla_put_failure;
3584 #endif
3585         if (iif) {
3586 #ifdef CONFIG_IPV6_MROUTE
3587                 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
3588                         int err = ip6mr_get_route(net, skb, rtm, portid);
3589
3590                         if (err == 0)
3591                                 return 0;
3592                         if (err < 0)
3593                                 goto nla_put_failure;
3594                 } else
3595 #endif
3596                         if (nla_put_u32(skb, RTA_IIF, iif))
3597                                 goto nla_put_failure;
3598         } else if (dst) {
3599                 struct in6_addr saddr_buf;
3600                 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
3601                     nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3602                         goto nla_put_failure;
3603         }
3604
3605         if (rt->rt6i_prefsrc.plen) {
3606                 struct in6_addr saddr_buf;
3607                 saddr_buf = rt->rt6i_prefsrc.addr;
3608                 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3609                         goto nla_put_failure;
3610         }
3611
3612         memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
3613         if (rt->rt6i_pmtu)
3614                 metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
3615         if (rtnetlink_put_metrics(skb, metrics) < 0)
3616                 goto nla_put_failure;
3617
3618         if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
3619                 goto nla_put_failure;
3620
3621         /* For multipath routes, walk the siblings list and add
3622          * each as a nexthop within RTA_MULTIPATH.
3623          */
3624         if (rt->rt6i_nsiblings) {
3625                 struct rt6_info *sibling, *next_sibling;
3626                 struct nlattr *mp;
3627
3628                 mp = nla_nest_start(skb, RTA_MULTIPATH);
3629                 if (!mp)
3630                         goto nla_put_failure;
3631
3632                 if (rt6_add_nexthop(skb, rt) < 0)
3633                         goto nla_put_failure;
3634
3635                 list_for_each_entry_safe(sibling, next_sibling,
3636                                          &rt->rt6i_siblings, rt6i_siblings) {
3637                         if (rt6_add_nexthop(skb, sibling) < 0)
3638                                 goto nla_put_failure;
3639                 }
3640
3641                 nla_nest_end(skb, mp);
3642         } else {
3643                 if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0)
3644                         goto nla_put_failure;
3645         }
3646
3647         expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
3648
3649         if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
3650                 goto nla_put_failure;
3651
3652         if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
3653                 goto nla_put_failure;
3654
3655
3656         nlmsg_end(skb, nlh);
3657         return 0;
3658
3659 nla_put_failure:
3660         nlmsg_cancel(skb, nlh);
3661         return -EMSGSIZE;
3662 }
3663
3664 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
3665 {
3666         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
3667         struct net *net = arg->net;
3668
3669         if (rt == net->ipv6.ip6_null_entry)
3670                 return 0;
3671
3672         if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
3673                 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
3674
3675                 /* user wants prefix routes only */
3676                 if (rtm->rtm_flags & RTM_F_PREFIX &&
3677                     !(rt->rt6i_flags & RTF_PREFIX_RT)) {
3678                         /* success since this is not a prefix route */
3679                         return 1;
3680                 }
3681         }
3682
3683         return rt6_fill_node(net,
3684                      arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
3685                      NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
3686                      NLM_F_MULTI);
3687 }
3688
3689 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
3690                               struct netlink_ext_ack *extack)
3691 {
3692         struct net *net = sock_net(in_skb->sk);
3693         struct nlattr *tb[RTA_MAX+1];
3694         int err, iif = 0, oif = 0;
3695         struct dst_entry *dst;
3696         struct rt6_info *rt;
3697         struct sk_buff *skb;
3698         struct rtmsg *rtm;
3699         struct flowi6 fl6;
3700         bool fibmatch;
3701
3702         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
3703                           extack);
3704         if (err < 0)
3705                 goto errout;
3706
3707         err = -EINVAL;
3708         memset(&fl6, 0, sizeof(fl6));
3709         rtm = nlmsg_data(nlh);
3710         fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
3711         fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
3712
3713         if (tb[RTA_SRC]) {
3714                 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
3715                         goto errout;
3716
3717                 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
3718         }
3719
3720         if (tb[RTA_DST]) {
3721                 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
3722                         goto errout;
3723
3724                 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
3725         }
3726
3727         if (tb[RTA_IIF])
3728                 iif = nla_get_u32(tb[RTA_IIF]);
3729
3730         if (tb[RTA_OIF])
3731                 oif = nla_get_u32(tb[RTA_OIF]);
3732
3733         if (tb[RTA_MARK])
3734                 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
3735
3736         if (tb[RTA_UID])
3737                 fl6.flowi6_uid = make_kuid(current_user_ns(),
3738                                            nla_get_u32(tb[RTA_UID]));
3739         else
3740                 fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
3741
3742         if (iif) {
3743                 struct net_device *dev;
3744                 int flags = 0;
3745
3746                 rcu_read_lock();
3747
3748                 dev = dev_get_by_index_rcu(net, iif);
3749                 if (!dev) {
3750                         rcu_read_unlock();
3751                         err = -ENODEV;
3752                         goto errout;
3753                 }
3754
3755                 fl6.flowi6_iif = iif;
3756
3757                 if (!ipv6_addr_any(&fl6.saddr))
3758                         flags |= RT6_LOOKUP_F_HAS_SADDR;
3759
3760                 dst = ip6_route_input_lookup(net, dev, &fl6, flags);
3761
3762                 rcu_read_unlock();
3763         } else {
3764                 fl6.flowi6_oif = oif;
3765
3766                 dst = ip6_route_output(net, NULL, &fl6);
3767         }
3768
3769
3770         rt = container_of(dst, struct rt6_info, dst);
3771         if (rt->dst.error) {
3772                 err = rt->dst.error;
3773                 ip6_rt_put(rt);
3774                 goto errout;
3775         }
3776
3777         if (rt == net->ipv6.ip6_null_entry) {
3778                 err = rt->dst.error;
3779                 ip6_rt_put(rt);
3780                 goto errout;
3781         }
3782
3783         if (fibmatch && rt->dst.from) {
3784                 struct rt6_info *ort = container_of(rt->dst.from,
3785                                                     struct rt6_info, dst);
3786
3787                 dst_hold(&ort->dst);
3788                 ip6_rt_put(rt);
3789                 rt = ort;
3790         }
3791
3792         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
3793         if (!skb) {
3794                 ip6_rt_put(rt);
3795                 err = -ENOBUFS;
3796                 goto errout;
3797         }
3798
3799         skb_dst_set(skb, &rt->dst);
3800         if (fibmatch)
3801                 err = rt6_fill_node(net, skb, rt, NULL, NULL, iif,
3802                                     RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
3803                                     nlh->nlmsg_seq, 0);
3804         else
3805                 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
3806                                     RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
3807                                     nlh->nlmsg_seq, 0);
3808         if (err < 0) {
3809                 kfree_skb(skb);
3810                 goto errout;
3811         }
3812
3813         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
3814 errout:
3815         return err;
3816 }
3817
3818 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
3819                      unsigned int nlm_flags)
3820 {
3821         struct sk_buff *skb;
3822         struct net *net = info->nl_net;
3823         u32 seq;
3824         int err;
3825
3826         err = -ENOBUFS;
3827         seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3828
3829         skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3830         if (!skb)
3831                 goto errout;
3832
3833         err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
3834                                 event, info->portid, seq, nlm_flags);
3835         if (err < 0) {
3836                 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
3837                 WARN_ON(err == -EMSGSIZE);
3838                 kfree_skb(skb);
3839                 goto errout;
3840         }
3841         rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
3842                     info->nlh, gfp_any());
3843         return;
3844 errout:
3845         if (err < 0)
3846                 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
3847 }
3848
3849 static int ip6_route_dev_notify(struct notifier_block *this,
3850                                 unsigned long event, void *ptr)
3851 {
3852         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
3853         struct net *net = dev_net(dev);
3854
3855         if (!(dev->flags & IFF_LOOPBACK))
3856                 return NOTIFY_OK;
3857
3858         if (event == NETDEV_REGISTER) {
3859                 net->ipv6.ip6_null_entry->dst.dev = dev;
3860                 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
3861 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3862                 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
3863                 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
3864                 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
3865                 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
3866 #endif
3867          } else if (event == NETDEV_UNREGISTER &&
3868                     dev->reg_state != NETREG_UNREGISTERED) {
3869                 /* NETDEV_UNREGISTER could be fired for multiple times by
3870                  * netdev_wait_allrefs(). Make sure we only call this once.
3871                  */
3872                 in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
3873 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3874                 in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
3875                 in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
3876 #endif
3877         }
3878
3879         return NOTIFY_OK;
3880 }
3881
3882 /*
3883  *      /proc
3884  */
3885
3886 #ifdef CONFIG_PROC_FS
3887
3888 static const struct file_operations ipv6_route_proc_fops = {
3889         .owner          = THIS_MODULE,
3890         .open           = ipv6_route_open,
3891         .read           = seq_read,
3892         .llseek         = seq_lseek,
3893         .release        = seq_release_net,
3894 };
3895
3896 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
3897 {
3898         struct net *net = (struct net *)seq->private;
3899         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
3900                    net->ipv6.rt6_stats->fib_nodes,
3901                    net->ipv6.rt6_stats->fib_route_nodes,
3902                    net->ipv6.rt6_stats->fib_rt_alloc,
3903                    net->ipv6.rt6_stats->fib_rt_entries,
3904                    net->ipv6.rt6_stats->fib_rt_cache,
3905                    dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
3906                    net->ipv6.rt6_stats->fib_discarded_routes);
3907
3908         return 0;
3909 }
3910
3911 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
3912 {
3913         return single_open_net(inode, file, rt6_stats_seq_show);
3914 }
3915
3916 static const struct file_operations rt6_stats_seq_fops = {
3917         .owner   = THIS_MODULE,
3918         .open    = rt6_stats_seq_open,
3919         .read    = seq_read,
3920         .llseek  = seq_lseek,
3921         .release = single_release_net,
3922 };
3923 #endif  /* CONFIG_PROC_FS */
3924
3925 #ifdef CONFIG_SYSCTL
3926
3927 static
3928 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
3929                               void __user *buffer, size_t *lenp, loff_t *ppos)
3930 {
3931         struct net *net;
3932         int delay;
3933         if (!write)
3934                 return -EINVAL;
3935
3936         net = (struct net *)ctl->extra1;
3937         delay = net->ipv6.sysctl.flush_delay;
3938         proc_dointvec(ctl, write, buffer, lenp, ppos);
3939         fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
3940         return 0;
3941 }
3942
3943 struct ctl_table ipv6_route_table_template[] = {
3944         {
3945                 .procname       =       "flush",
3946                 .data           =       &init_net.ipv6.sysctl.flush_delay,
3947                 .maxlen         =       sizeof(int),
3948                 .mode           =       0200,
3949                 .proc_handler   =       ipv6_sysctl_rtcache_flush
3950         },
3951         {
3952                 .procname       =       "gc_thresh",
3953                 .data           =       &ip6_dst_ops_template.gc_thresh,
3954                 .maxlen         =       sizeof(int),
3955                 .mode           =       0644,
3956                 .proc_handler   =       proc_dointvec,
3957         },
3958         {
3959                 .procname       =       "max_size",
3960                 .data           =       &init_net.ipv6.sysctl.ip6_rt_max_size,
3961                 .maxlen         =       sizeof(int),
3962                 .mode           =       0644,
3963                 .proc_handler   =       proc_dointvec,
3964         },
3965         {
3966                 .procname       =       "gc_min_interval",
3967                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3968                 .maxlen         =       sizeof(int),
3969                 .mode           =       0644,
3970                 .proc_handler   =       proc_dointvec_jiffies,
3971         },
3972         {
3973                 .procname       =       "gc_timeout",
3974                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
3975                 .maxlen         =       sizeof(int),
3976                 .mode           =       0644,
3977                 .proc_handler   =       proc_dointvec_jiffies,
3978         },
3979         {
3980                 .procname       =       "gc_interval",
3981                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_interval,
3982                 .maxlen         =       sizeof(int),
3983                 .mode           =       0644,
3984                 .proc_handler   =       proc_dointvec_jiffies,
3985         },
3986         {
3987                 .procname       =       "gc_elasticity",
3988                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
3989                 .maxlen         =       sizeof(int),
3990                 .mode           =       0644,
3991                 .proc_handler   =       proc_dointvec,
3992         },
3993         {
3994                 .procname       =       "mtu_expires",
3995                 .data           =       &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
3996                 .maxlen         =       sizeof(int),
3997                 .mode           =       0644,
3998                 .proc_handler   =       proc_dointvec_jiffies,
3999         },
4000         {
4001                 .procname       =       "min_adv_mss",
4002                 .data           =       &init_net.ipv6.sysctl.ip6_rt_min_advmss,
4003                 .maxlen         =       sizeof(int),
4004                 .mode           =       0644,
4005                 .proc_handler   =       proc_dointvec,
4006         },
4007         {
4008                 .procname       =       "gc_min_interval_ms",
4009                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
4010                 .maxlen         =       sizeof(int),
4011                 .mode           =       0644,
4012                 .proc_handler   =       proc_dointvec_ms_jiffies,
4013         },
4014         { }
4015 };
4016
4017 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
4018 {
4019         struct ctl_table *table;
4020
4021         table = kmemdup(ipv6_route_table_template,
4022                         sizeof(ipv6_route_table_template),
4023                         GFP_KERNEL);
4024
4025         if (table) {
4026                 table[0].data = &net->ipv6.sysctl.flush_delay;
4027                 table[0].extra1 = net;
4028                 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
4029                 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
4030                 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
4031                 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
4032                 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
4033                 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
4034                 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
4035                 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
4036                 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
4037
4038                 /* Don't export sysctls to unprivileged users */
4039                 if (net->user_ns != &init_user_ns)
4040                         table[0].procname = NULL;
4041         }
4042
4043         return table;
4044 }
4045 #endif
4046
4047 static int __net_init ip6_route_net_init(struct net *net)
4048 {
4049         int ret = -ENOMEM;
4050
4051         memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
4052                sizeof(net->ipv6.ip6_dst_ops));
4053
4054         if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
4055                 goto out_ip6_dst_ops;
4056
4057         net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
4058                                            sizeof(*net->ipv6.ip6_null_entry),
4059                                            GFP_KERNEL);
4060         if (!net->ipv6.ip6_null_entry)
4061                 goto out_ip6_dst_entries;
4062         net->ipv6.ip6_null_entry->dst.path =
4063                 (struct dst_entry *)net->ipv6.ip6_null_entry;
4064         net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
4065         dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
4066                          ip6_template_metrics, true);
4067
4068 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
4069         net->ipv6.fib6_has_custom_rules = false;
4070         net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
4071                                                sizeof(*net->ipv6.ip6_prohibit_entry),
4072                                                GFP_KERNEL);
4073         if (!net->ipv6.ip6_prohibit_entry)
4074                 goto out_ip6_null_entry;
4075         net->ipv6.ip6_prohibit_entry->dst.path =
4076                 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
4077         net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
4078         dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
4079                          ip6_template_metrics, true);
4080
4081         net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
4082                                                sizeof(*net->ipv6.ip6_blk_hole_entry),
4083                                                GFP_KERNEL);
4084         if (!net->ipv6.ip6_blk_hole_entry)
4085                 goto out_ip6_prohibit_entry;
4086         net->ipv6.ip6_blk_hole_entry->dst.path =
4087                 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
4088         net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
4089         dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
4090                          ip6_template_metrics, true);
4091 #endif
4092
4093         net->ipv6.sysctl.flush_delay = 0;
4094         net->ipv6.sysctl.ip6_rt_max_size = 4096;
4095         net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
4096         net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
4097         net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
4098         net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
4099         net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
4100         net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
4101
4102         net->ipv6.ip6_rt_gc_expire = 30*HZ;
4103
4104         ret = 0;
4105 out:
4106         return ret;
4107
4108 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
4109 out_ip6_prohibit_entry:
4110         kfree(net->ipv6.ip6_prohibit_entry);
4111 out_ip6_null_entry:
4112         kfree(net->ipv6.ip6_null_entry);
4113 #endif
4114 out_ip6_dst_entries:
4115         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
4116 out_ip6_dst_ops:
4117         goto out;
4118 }
4119
4120 static void __net_exit ip6_route_net_exit(struct net *net)
4121 {
4122         kfree(net->ipv6.ip6_null_entry);
4123 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
4124         kfree(net->ipv6.ip6_prohibit_entry);
4125         kfree(net->ipv6.ip6_blk_hole_entry);
4126 #endif
4127         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
4128 }
4129
4130 static int __net_init ip6_route_net_init_late(struct net *net)
4131 {
4132 #ifdef CONFIG_PROC_FS
4133         proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
4134         proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
4135 #endif
4136         return 0;
4137 }
4138
4139 static void __net_exit ip6_route_net_exit_late(struct net *net)
4140 {
4141 #ifdef CONFIG_PROC_FS
4142         remove_proc_entry("ipv6_route", net->proc_net);
4143         remove_proc_entry("rt6_stats", net->proc_net);
4144 #endif
4145 }
4146
4147 static struct pernet_operations ip6_route_net_ops = {
4148         .init = ip6_route_net_init,
4149         .exit = ip6_route_net_exit,
4150 };
4151
4152 static int __net_init ipv6_inetpeer_init(struct net *net)
4153 {
4154         struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
4155
4156         if (!bp)
4157                 return -ENOMEM;
4158         inet_peer_base_init(bp);
4159         net->ipv6.peers = bp;
4160         return 0;
4161 }
4162
4163 static void __net_exit ipv6_inetpeer_exit(struct net *net)
4164 {
4165         struct inet_peer_base *bp = net->ipv6.peers;
4166
4167         net->ipv6.peers = NULL;
4168         inetpeer_invalidate_tree(bp);
4169         kfree(bp);
4170 }
4171
4172 static struct pernet_operations ipv6_inetpeer_ops = {
4173         .init   =       ipv6_inetpeer_init,
4174         .exit   =       ipv6_inetpeer_exit,
4175 };
4176
4177 static struct pernet_operations ip6_route_net_late_ops = {
4178         .init = ip6_route_net_init_late,
4179         .exit = ip6_route_net_exit_late,
4180 };
4181
4182 static struct notifier_block ip6_route_dev_notifier = {
4183         .notifier_call = ip6_route_dev_notify,
4184         .priority = ADDRCONF_NOTIFY_PRIORITY - 10,
4185 };
4186
4187 void __init ip6_route_init_special_entries(void)
4188 {
4189         /* Registering of the loopback is done before this portion of code,
4190          * the loopback reference in rt6_info will not be taken, do it
4191          * manually for init_net */
4192         init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
4193         init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
4194   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
4195         init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
4196         init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
4197         init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
4198         init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
4199   #endif
4200 }
4201
4202 int __init ip6_route_init(void)
4203 {
4204         int ret;
4205         int cpu;
4206
4207         ret = -ENOMEM;
4208         ip6_dst_ops_template.kmem_cachep =
4209                 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
4210                                   SLAB_HWCACHE_ALIGN, NULL);
4211         if (!ip6_dst_ops_template.kmem_cachep)
4212                 goto out;
4213
4214         ret = dst_entries_init(&ip6_dst_blackhole_ops);
4215         if (ret)
4216                 goto out_kmem_cache;
4217
4218         ret = register_pernet_subsys(&ipv6_inetpeer_ops);
4219         if (ret)
4220                 goto out_dst_entries;
4221
4222         ret = register_pernet_subsys(&ip6_route_net_ops);
4223         if (ret)
4224                 goto out_register_inetpeer;
4225
4226         ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
4227
4228         ret = fib6_init();
4229         if (ret)
4230                 goto out_register_subsys;
4231
4232         ret = xfrm6_init();
4233         if (ret)
4234                 goto out_fib6_init;
4235
4236         ret = fib6_rules_init();
4237         if (ret)
4238                 goto xfrm6_init;
4239
4240         ret = register_pernet_subsys(&ip6_route_net_late_ops);
4241         if (ret)
4242                 goto fib6_rules_init;
4243
4244         ret = -ENOBUFS;
4245         if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, 0) ||
4246             __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, 0) ||
4247             __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL,
4248                             RTNL_FLAG_DOIT_UNLOCKED))
4249                 goto out_register_late_subsys;
4250
4251         ret = register_netdevice_notifier(&ip6_route_dev_notifier);
4252         if (ret)
4253                 goto out_register_late_subsys;
4254
4255         for_each_possible_cpu(cpu) {
4256                 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
4257
4258                 INIT_LIST_HEAD(&ul->head);
4259                 spin_lock_init(&ul->lock);
4260         }
4261
4262 out:
4263         return ret;
4264
4265 out_register_late_subsys:
4266         unregister_pernet_subsys(&ip6_route_net_late_ops);
4267 fib6_rules_init:
4268         fib6_rules_cleanup();
4269 xfrm6_init:
4270         xfrm6_fini();
4271 out_fib6_init:
4272         fib6_gc_cleanup();
4273 out_register_subsys:
4274         unregister_pernet_subsys(&ip6_route_net_ops);
4275 out_register_inetpeer:
4276         unregister_pernet_subsys(&ipv6_inetpeer_ops);
4277 out_dst_entries:
4278         dst_entries_destroy(&ip6_dst_blackhole_ops);
4279 out_kmem_cache:
4280         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
4281         goto out;
4282 }
4283
4284 void ip6_route_cleanup(void)
4285 {
4286         unregister_netdevice_notifier(&ip6_route_dev_notifier);
4287         unregister_pernet_subsys(&ip6_route_net_late_ops);
4288         fib6_rules_cleanup();
4289         xfrm6_fini();
4290         fib6_gc_cleanup();
4291         unregister_pernet_subsys(&ipv6_inetpeer_ops);
4292         unregister_pernet_subsys(&ip6_route_net_ops);
4293         dst_entries_destroy(&ip6_dst_blackhole_ops);
4294         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
4295 }