2 * Linux IPv6 multicast routing support for BSD pim6sd
3 * Based on net/ipv4/ipmr.c.
5 * (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
6 * LSIIT Laboratory, Strasbourg, France
7 * (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
9 * Copyright (C)2007,2008 USAGI/WIDE Project
10 * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
19 #include <linux/uaccess.h>
20 #include <linux/types.h>
21 #include <linux/sched.h>
22 #include <linux/errno.h>
24 #include <linux/kernel.h>
25 #include <linux/fcntl.h>
26 #include <linux/stat.h>
27 #include <linux/socket.h>
28 #include <linux/inet.h>
29 #include <linux/netdevice.h>
30 #include <linux/inetdevice.h>
31 #include <linux/proc_fs.h>
32 #include <linux/seq_file.h>
33 #include <linux/init.h>
34 #include <linux/compat.h>
35 #include <linux/rhashtable.h>
36 #include <net/protocol.h>
37 #include <linux/skbuff.h>
39 #include <linux/notifier.h>
40 #include <linux/if_arp.h>
41 #include <net/checksum.h>
42 #include <net/netlink.h>
43 #include <net/fib_rules.h>
46 #include <net/ip6_route.h>
47 #include <linux/mroute6.h>
48 #include <linux/pim.h>
49 #include <net/addrconf.h>
50 #include <linux/netfilter_ipv6.h>
51 #include <linux/export.h>
52 #include <net/ip6_checksum.h>
53 #include <linux/netconf.h>
54 #include <net/ip_tunnels.h>
56 #include <linux/nospec.h>
59 struct fib_rule common;
66 /* Big lock, protecting vif table, mrt cache and mroute socket state.
67 Note that the changes are semaphored via rtnl_lock.
70 static DEFINE_RWLOCK(mrt_lock);
72 /* Multicast router control variables */
74 /* Special spinlock for queue of unresolved entries */
75 static DEFINE_SPINLOCK(mfc_unres_lock);
77 /* We return to original Alan's scheme. Hash table of resolved
78 entries is changed only in process context and protected
79 with weak lock mrt_lock. Queue of unresolved entries is protected
80 with strong spinlock mfc_unres_lock.
82 In this case data path is free of exclusive locks at all.
85 static struct kmem_cache *mrt_cachep __read_mostly;
87 static struct mr_table *ip6mr_new_table(struct net *net, u32 id);
88 static void ip6mr_free_table(struct mr_table *mrt);
90 static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
91 struct sk_buff *skb, struct mfc6_cache *cache);
92 static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
93 mifi_t mifi, int assert);
94 static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
96 static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt);
97 static int ip6mr_rtm_dumproute(struct sk_buff *skb,
98 struct netlink_callback *cb);
99 static void mroute_clean_tables(struct mr_table *mrt, bool all);
100 static void ipmr_expire_process(struct timer_list *t);
102 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
103 #define ip6mr_for_each_table(mrt, net) \
104 list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
106 static struct mr_table *ip6mr_mr_table_iter(struct net *net,
107 struct mr_table *mrt)
109 struct mr_table *ret;
112 ret = list_entry_rcu(net->ipv6.mr6_tables.next,
113 struct mr_table, list);
115 ret = list_entry_rcu(mrt->list.next,
116 struct mr_table, list);
118 if (&ret->list == &net->ipv6.mr6_tables)
123 static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
125 struct mr_table *mrt;
127 ip6mr_for_each_table(mrt, net) {
134 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
135 struct mr_table **mrt)
138 struct ip6mr_result res;
139 struct fib_lookup_arg arg = {
141 .flags = FIB_LOOKUP_NOREF,
144 err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
145 flowi6_to_flowi(flp6), 0, &arg);
152 static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
153 int flags, struct fib_lookup_arg *arg)
155 struct ip6mr_result *res = arg->result;
156 struct mr_table *mrt;
158 switch (rule->action) {
161 case FR_ACT_UNREACHABLE:
163 case FR_ACT_PROHIBIT:
165 case FR_ACT_BLACKHOLE:
170 mrt = ip6mr_get_table(rule->fr_net, rule->table);
177 static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
182 static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = {
186 static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
187 struct fib_rule_hdr *frh, struct nlattr **tb,
188 struct netlink_ext_ack *extack)
193 static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
199 static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
200 struct fib_rule_hdr *frh)
208 static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
209 .family = RTNL_FAMILY_IP6MR,
210 .rule_size = sizeof(struct ip6mr_rule),
211 .addr_size = sizeof(struct in6_addr),
212 .action = ip6mr_rule_action,
213 .match = ip6mr_rule_match,
214 .configure = ip6mr_rule_configure,
215 .compare = ip6mr_rule_compare,
216 .fill = ip6mr_rule_fill,
217 .nlgroup = RTNLGRP_IPV6_RULE,
218 .policy = ip6mr_rule_policy,
219 .owner = THIS_MODULE,
222 static int __net_init ip6mr_rules_init(struct net *net)
224 struct fib_rules_ops *ops;
225 struct mr_table *mrt;
228 ops = fib_rules_register(&ip6mr_rules_ops_template, net);
232 INIT_LIST_HEAD(&net->ipv6.mr6_tables);
234 mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
240 err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0);
244 net->ipv6.mr6_rules_ops = ops;
249 ip6mr_free_table(mrt);
252 fib_rules_unregister(ops);
256 static void __net_exit ip6mr_rules_exit(struct net *net)
258 struct mr_table *mrt, *next;
261 list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
262 list_del(&mrt->list);
263 ip6mr_free_table(mrt);
265 fib_rules_unregister(net->ipv6.mr6_rules_ops);
269 static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb)
271 return fib_rules_dump(net, nb, RTNL_FAMILY_IP6MR);
274 static unsigned int ip6mr_rules_seq_read(struct net *net)
276 return fib_rules_seq_read(net, RTNL_FAMILY_IP6MR);
279 bool ip6mr_rule_default(const struct fib_rule *rule)
281 return fib_rule_matchall(rule) && rule->action == FR_ACT_TO_TBL &&
282 rule->table == RT6_TABLE_DFLT && !rule->l3mdev;
284 EXPORT_SYMBOL(ip6mr_rule_default);
286 #define ip6mr_for_each_table(mrt, net) \
287 for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
289 static struct mr_table *ip6mr_mr_table_iter(struct net *net,
290 struct mr_table *mrt)
293 return net->ipv6.mrt6;
297 static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
299 return net->ipv6.mrt6;
302 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
303 struct mr_table **mrt)
305 *mrt = net->ipv6.mrt6;
309 static int __net_init ip6mr_rules_init(struct net *net)
311 struct mr_table *mrt;
313 mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
316 net->ipv6.mrt6 = mrt;
320 static void __net_exit ip6mr_rules_exit(struct net *net)
323 ip6mr_free_table(net->ipv6.mrt6);
324 net->ipv6.mrt6 = NULL;
328 static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb)
333 static unsigned int ip6mr_rules_seq_read(struct net *net)
339 static int ip6mr_hash_cmp(struct rhashtable_compare_arg *arg,
342 const struct mfc6_cache_cmp_arg *cmparg = arg->key;
343 struct mfc6_cache *c = (struct mfc6_cache *)ptr;
345 return !ipv6_addr_equal(&c->mf6c_mcastgrp, &cmparg->mf6c_mcastgrp) ||
346 !ipv6_addr_equal(&c->mf6c_origin, &cmparg->mf6c_origin);
349 static const struct rhashtable_params ip6mr_rht_params = {
350 .head_offset = offsetof(struct mr_mfc, mnode),
351 .key_offset = offsetof(struct mfc6_cache, cmparg),
352 .key_len = sizeof(struct mfc6_cache_cmp_arg),
355 .obj_cmpfn = ip6mr_hash_cmp,
356 .automatic_shrinking = true,
359 static void ip6mr_new_table_set(struct mr_table *mrt,
362 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
363 list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
367 static struct mfc6_cache_cmp_arg ip6mr_mr_table_ops_cmparg_any = {
368 .mf6c_origin = IN6ADDR_ANY_INIT,
369 .mf6c_mcastgrp = IN6ADDR_ANY_INIT,
372 static struct mr_table_ops ip6mr_mr_table_ops = {
373 .rht_params = &ip6mr_rht_params,
374 .cmparg_any = &ip6mr_mr_table_ops_cmparg_any,
377 static struct mr_table *ip6mr_new_table(struct net *net, u32 id)
379 struct mr_table *mrt;
381 mrt = ip6mr_get_table(net, id);
385 return mr_table_alloc(net, id, &ip6mr_mr_table_ops,
386 ipmr_expire_process, ip6mr_new_table_set);
389 static void ip6mr_free_table(struct mr_table *mrt)
391 del_timer_sync(&mrt->ipmr_expire_timer);
392 mroute_clean_tables(mrt, true);
393 rhltable_destroy(&mrt->mfc_hash);
397 #ifdef CONFIG_PROC_FS
398 /* The /proc interfaces to multicast routing
399 * /proc/ip6_mr_cache /proc/ip6_mr_vif
402 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
405 struct mr_vif_iter *iter = seq->private;
406 struct net *net = seq_file_net(seq);
407 struct mr_table *mrt;
409 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
411 return ERR_PTR(-ENOENT);
415 read_lock(&mrt_lock);
416 return mr_vif_seq_start(seq, pos);
419 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
422 read_unlock(&mrt_lock);
425 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
427 struct mr_vif_iter *iter = seq->private;
428 struct mr_table *mrt = iter->mrt;
430 if (v == SEQ_START_TOKEN) {
432 "Interface BytesIn PktsIn BytesOut PktsOut Flags\n");
434 const struct vif_device *vif = v;
435 const char *name = vif->dev ? vif->dev->name : "none";
438 "%2td %-10s %8ld %7ld %8ld %7ld %05X\n",
439 vif - mrt->vif_table,
440 name, vif->bytes_in, vif->pkt_in,
441 vif->bytes_out, vif->pkt_out,
447 static const struct seq_operations ip6mr_vif_seq_ops = {
448 .start = ip6mr_vif_seq_start,
449 .next = mr_vif_seq_next,
450 .stop = ip6mr_vif_seq_stop,
451 .show = ip6mr_vif_seq_show,
454 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
456 struct net *net = seq_file_net(seq);
457 struct mr_table *mrt;
459 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
461 return ERR_PTR(-ENOENT);
463 return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock);
466 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
470 if (v == SEQ_START_TOKEN) {
474 "Iif Pkts Bytes Wrong Oifs\n");
476 const struct mfc6_cache *mfc = v;
477 const struct mr_mfc_iter *it = seq->private;
478 struct mr_table *mrt = it->mrt;
480 seq_printf(seq, "%pI6 %pI6 %-3hd",
481 &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
484 if (it->cache != &mrt->mfc_unres_queue) {
485 seq_printf(seq, " %8lu %8lu %8lu",
486 mfc->_c.mfc_un.res.pkt,
487 mfc->_c.mfc_un.res.bytes,
488 mfc->_c.mfc_un.res.wrong_if);
489 for (n = mfc->_c.mfc_un.res.minvif;
490 n < mfc->_c.mfc_un.res.maxvif; n++) {
491 if (VIF_EXISTS(mrt, n) &&
492 mfc->_c.mfc_un.res.ttls[n] < 255)
495 mfc->_c.mfc_un.res.ttls[n]);
498 /* unresolved mfc_caches don't contain
499 * pkt, bytes and wrong_if values
501 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
508 static const struct seq_operations ipmr_mfc_seq_ops = {
509 .start = ipmr_mfc_seq_start,
510 .next = mr_mfc_seq_next,
511 .stop = mr_mfc_seq_stop,
512 .show = ipmr_mfc_seq_show,
516 #ifdef CONFIG_IPV6_PIMSM_V2
518 static int pim6_rcv(struct sk_buff *skb)
520 struct pimreghdr *pim;
521 struct ipv6hdr *encap;
522 struct net_device *reg_dev = NULL;
523 struct net *net = dev_net(skb->dev);
524 struct mr_table *mrt;
525 struct flowi6 fl6 = {
526 .flowi6_iif = skb->dev->ifindex,
527 .flowi6_mark = skb->mark,
531 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
534 pim = (struct pimreghdr *)skb_transport_header(skb);
535 if (pim->type != ((PIM_VERSION << 4) | PIM_TYPE_REGISTER) ||
536 (pim->flags & PIM_NULL_REGISTER) ||
537 (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
538 sizeof(*pim), IPPROTO_PIM,
539 csum_partial((void *)pim, sizeof(*pim), 0)) &&
540 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
543 /* check if the inner packet is destined to mcast group */
544 encap = (struct ipv6hdr *)(skb_transport_header(skb) +
547 if (!ipv6_addr_is_multicast(&encap->daddr) ||
548 encap->payload_len == 0 ||
549 ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
552 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
554 reg_vif_num = mrt->mroute_reg_vif_num;
556 read_lock(&mrt_lock);
557 if (reg_vif_num >= 0)
558 reg_dev = mrt->vif_table[reg_vif_num].dev;
561 read_unlock(&mrt_lock);
566 skb->mac_header = skb->network_header;
567 skb_pull(skb, (u8 *)encap - skb->data);
568 skb_reset_network_header(skb);
569 skb->protocol = htons(ETH_P_IPV6);
570 skb->ip_summed = CHECKSUM_NONE;
572 skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev));
583 static const struct inet6_protocol pim6_protocol = {
587 /* Service routines creating virtual interfaces: PIMREG */
589 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
590 struct net_device *dev)
592 struct net *net = dev_net(dev);
593 struct mr_table *mrt;
594 struct flowi6 fl6 = {
595 .flowi6_oif = dev->ifindex,
596 .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX,
597 .flowi6_mark = skb->mark,
600 if (!pskb_inet_may_pull(skb))
603 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
606 read_lock(&mrt_lock);
607 dev->stats.tx_bytes += skb->len;
608 dev->stats.tx_packets++;
609 ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT);
610 read_unlock(&mrt_lock);
615 dev->stats.tx_errors++;
620 static int reg_vif_get_iflink(const struct net_device *dev)
625 static const struct net_device_ops reg_vif_netdev_ops = {
626 .ndo_start_xmit = reg_vif_xmit,
627 .ndo_get_iflink = reg_vif_get_iflink,
630 static void reg_vif_setup(struct net_device *dev)
632 dev->type = ARPHRD_PIMREG;
633 dev->mtu = 1500 - sizeof(struct ipv6hdr) - 8;
634 dev->flags = IFF_NOARP;
635 dev->netdev_ops = ®_vif_netdev_ops;
636 dev->needs_free_netdev = true;
637 dev->features |= NETIF_F_NETNS_LOCAL;
640 static struct net_device *ip6mr_reg_vif(struct net *net, struct mr_table *mrt)
642 struct net_device *dev;
645 if (mrt->id == RT6_TABLE_DFLT)
646 sprintf(name, "pim6reg");
648 sprintf(name, "pim6reg%u", mrt->id);
650 dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup);
654 dev_net_set(dev, net);
656 if (register_netdevice(dev)) {
668 unregister_netdevice(dev);
673 static int call_ip6mr_vif_entry_notifiers(struct net *net,
674 enum fib_event_type event_type,
675 struct vif_device *vif,
676 mifi_t vif_index, u32 tb_id)
678 return mr_call_vif_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
679 vif, vif_index, tb_id,
680 &net->ipv6.ipmr_seq);
683 static int call_ip6mr_mfc_entry_notifiers(struct net *net,
684 enum fib_event_type event_type,
685 struct mfc6_cache *mfc, u32 tb_id)
687 return mr_call_mfc_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
688 &mfc->_c, tb_id, &net->ipv6.ipmr_seq);
691 /* Delete a VIF entry */
692 static int mif6_delete(struct mr_table *mrt, int vifi, int notify,
693 struct list_head *head)
695 struct vif_device *v;
696 struct net_device *dev;
697 struct inet6_dev *in6_dev;
699 if (vifi < 0 || vifi >= mrt->maxvif)
700 return -EADDRNOTAVAIL;
702 v = &mrt->vif_table[vifi];
704 if (VIF_EXISTS(mrt, vifi))
705 call_ip6mr_vif_entry_notifiers(read_pnet(&mrt->net),
706 FIB_EVENT_VIF_DEL, v, vifi,
709 write_lock_bh(&mrt_lock);
714 write_unlock_bh(&mrt_lock);
715 return -EADDRNOTAVAIL;
718 #ifdef CONFIG_IPV6_PIMSM_V2
719 if (vifi == mrt->mroute_reg_vif_num)
720 mrt->mroute_reg_vif_num = -1;
723 if (vifi + 1 == mrt->maxvif) {
725 for (tmp = vifi - 1; tmp >= 0; tmp--) {
726 if (VIF_EXISTS(mrt, tmp))
729 mrt->maxvif = tmp + 1;
732 write_unlock_bh(&mrt_lock);
734 dev_set_allmulti(dev, -1);
736 in6_dev = __in6_dev_get(dev);
738 in6_dev->cnf.mc_forwarding--;
739 inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
740 NETCONFA_MC_FORWARDING,
741 dev->ifindex, &in6_dev->cnf);
744 if ((v->flags & MIFF_REGISTER) && !notify)
745 unregister_netdevice_queue(dev, head);
751 static inline void ip6mr_cache_free_rcu(struct rcu_head *head)
753 struct mr_mfc *c = container_of(head, struct mr_mfc, rcu);
755 kmem_cache_free(mrt_cachep, (struct mfc6_cache *)c);
758 static inline void ip6mr_cache_free(struct mfc6_cache *c)
760 call_rcu(&c->_c.rcu, ip6mr_cache_free_rcu);
763 /* Destroy an unresolved cache entry, killing queued skbs
764 and reporting error to netlink readers.
767 static void ip6mr_destroy_unres(struct mr_table *mrt, struct mfc6_cache *c)
769 struct net *net = read_pnet(&mrt->net);
772 atomic_dec(&mrt->cache_resolve_queue_len);
774 while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved)) != NULL) {
775 if (ipv6_hdr(skb)->version == 0) {
776 struct nlmsghdr *nlh = skb_pull(skb,
777 sizeof(struct ipv6hdr));
778 nlh->nlmsg_type = NLMSG_ERROR;
779 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
780 skb_trim(skb, nlh->nlmsg_len);
781 ((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT;
782 rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
791 /* Timer process for all the unresolved queue. */
793 static void ipmr_do_expire_process(struct mr_table *mrt)
795 unsigned long now = jiffies;
796 unsigned long expires = 10 * HZ;
797 struct mr_mfc *c, *next;
799 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
800 if (time_after(c->mfc_un.unres.expires, now)) {
802 unsigned long interval = c->mfc_un.unres.expires - now;
803 if (interval < expires)
809 mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
810 ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
813 if (!list_empty(&mrt->mfc_unres_queue))
814 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
817 static void ipmr_expire_process(struct timer_list *t)
819 struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
821 if (!spin_trylock(&mfc_unres_lock)) {
822 mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
826 if (!list_empty(&mrt->mfc_unres_queue))
827 ipmr_do_expire_process(mrt);
829 spin_unlock(&mfc_unres_lock);
832 /* Fill oifs list. It is called under write locked mrt_lock. */
834 static void ip6mr_update_thresholds(struct mr_table *mrt,
835 struct mr_mfc *cache,
840 cache->mfc_un.res.minvif = MAXMIFS;
841 cache->mfc_un.res.maxvif = 0;
842 memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
844 for (vifi = 0; vifi < mrt->maxvif; vifi++) {
845 if (VIF_EXISTS(mrt, vifi) &&
846 ttls[vifi] && ttls[vifi] < 255) {
847 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
848 if (cache->mfc_un.res.minvif > vifi)
849 cache->mfc_un.res.minvif = vifi;
850 if (cache->mfc_un.res.maxvif <= vifi)
851 cache->mfc_un.res.maxvif = vifi + 1;
854 cache->mfc_un.res.lastuse = jiffies;
857 static int mif6_add(struct net *net, struct mr_table *mrt,
858 struct mif6ctl *vifc, int mrtsock)
860 int vifi = vifc->mif6c_mifi;
861 struct vif_device *v = &mrt->vif_table[vifi];
862 struct net_device *dev;
863 struct inet6_dev *in6_dev;
867 if (VIF_EXISTS(mrt, vifi))
870 switch (vifc->mif6c_flags) {
871 #ifdef CONFIG_IPV6_PIMSM_V2
874 * Special Purpose VIF in PIM
875 * All the packets will be sent to the daemon
877 if (mrt->mroute_reg_vif_num >= 0)
879 dev = ip6mr_reg_vif(net, mrt);
882 err = dev_set_allmulti(dev, 1);
884 unregister_netdevice(dev);
891 dev = dev_get_by_index(net, vifc->mif6c_pifi);
893 return -EADDRNOTAVAIL;
894 err = dev_set_allmulti(dev, 1);
904 in6_dev = __in6_dev_get(dev);
906 in6_dev->cnf.mc_forwarding++;
907 inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
908 NETCONFA_MC_FORWARDING,
909 dev->ifindex, &in6_dev->cnf);
912 /* Fill in the VIF structures */
913 vif_device_init(v, dev, vifc->vifc_rate_limit, vifc->vifc_threshold,
914 vifc->mif6c_flags | (!mrtsock ? VIFF_STATIC : 0),
917 /* And finish update writing critical data */
918 write_lock_bh(&mrt_lock);
920 #ifdef CONFIG_IPV6_PIMSM_V2
921 if (v->flags & MIFF_REGISTER)
922 mrt->mroute_reg_vif_num = vifi;
924 if (vifi + 1 > mrt->maxvif)
925 mrt->maxvif = vifi + 1;
926 write_unlock_bh(&mrt_lock);
927 call_ip6mr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD,
932 static struct mfc6_cache *ip6mr_cache_find(struct mr_table *mrt,
933 const struct in6_addr *origin,
934 const struct in6_addr *mcastgrp)
936 struct mfc6_cache_cmp_arg arg = {
937 .mf6c_origin = *origin,
938 .mf6c_mcastgrp = *mcastgrp,
941 return mr_mfc_find(mrt, &arg);
944 /* Look for a (*,G) entry */
945 static struct mfc6_cache *ip6mr_cache_find_any(struct mr_table *mrt,
946 struct in6_addr *mcastgrp,
949 struct mfc6_cache_cmp_arg arg = {
950 .mf6c_origin = in6addr_any,
951 .mf6c_mcastgrp = *mcastgrp,
954 if (ipv6_addr_any(mcastgrp))
955 return mr_mfc_find_any_parent(mrt, mifi);
956 return mr_mfc_find_any(mrt, mifi, &arg);
959 /* Look for a (S,G,iif) entry if parent != -1 */
960 static struct mfc6_cache *
961 ip6mr_cache_find_parent(struct mr_table *mrt,
962 const struct in6_addr *origin,
963 const struct in6_addr *mcastgrp,
966 struct mfc6_cache_cmp_arg arg = {
967 .mf6c_origin = *origin,
968 .mf6c_mcastgrp = *mcastgrp,
971 return mr_mfc_find_parent(mrt, &arg, parent);
974 /* Allocate a multicast cache entry */
975 static struct mfc6_cache *ip6mr_cache_alloc(void)
977 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
980 c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
981 c->_c.mfc_un.res.minvif = MAXMIFS;
982 c->_c.free = ip6mr_cache_free_rcu;
983 refcount_set(&c->_c.mfc_un.res.refcount, 1);
987 static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
989 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
992 skb_queue_head_init(&c->_c.mfc_un.unres.unresolved);
993 c->_c.mfc_un.unres.expires = jiffies + 10 * HZ;
998 * A cache entry has gone into a resolved state from queued
1001 static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt,
1002 struct mfc6_cache *uc, struct mfc6_cache *c)
1004 struct sk_buff *skb;
1007 * Play the pending entries through our router
1010 while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) {
1011 if (ipv6_hdr(skb)->version == 0) {
1012 struct nlmsghdr *nlh = skb_pull(skb,
1013 sizeof(struct ipv6hdr));
1015 if (mr_fill_mroute(mrt, skb, &c->_c,
1016 nlmsg_data(nlh)) > 0) {
1017 nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
1019 nlh->nlmsg_type = NLMSG_ERROR;
1020 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
1021 skb_trim(skb, nlh->nlmsg_len);
1022 ((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE;
1024 rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
1026 ip6_mr_forward(net, mrt, skb, c);
1031 * Bounce a cache query up to pim6sd and netlink.
1033 * Called under mrt_lock.
1036 static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
1037 mifi_t mifi, int assert)
1039 struct sock *mroute6_sk;
1040 struct sk_buff *skb;
1041 struct mrt6msg *msg;
1044 #ifdef CONFIG_IPV6_PIMSM_V2
1045 if (assert == MRT6MSG_WHOLEPKT)
1046 skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
1050 skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
1055 /* I suppose that internal messages
1056 * do not require checksums */
1058 skb->ip_summed = CHECKSUM_UNNECESSARY;
1060 #ifdef CONFIG_IPV6_PIMSM_V2
1061 if (assert == MRT6MSG_WHOLEPKT) {
1062 /* Ugly, but we have no choice with this interface.
1063 Duplicate old header, fix length etc.
1064 And all this only to mangle msg->im6_msgtype and
1065 to set msg->im6_mbz to "mbz" :-)
1067 skb_push(skb, -skb_network_offset(pkt));
1069 skb_push(skb, sizeof(*msg));
1070 skb_reset_transport_header(skb);
1071 msg = (struct mrt6msg *)skb_transport_header(skb);
1073 msg->im6_msgtype = MRT6MSG_WHOLEPKT;
1074 msg->im6_mif = mrt->mroute_reg_vif_num;
1076 msg->im6_src = ipv6_hdr(pkt)->saddr;
1077 msg->im6_dst = ipv6_hdr(pkt)->daddr;
1079 skb->ip_summed = CHECKSUM_UNNECESSARY;
1084 * Copy the IP header
1087 skb_put(skb, sizeof(struct ipv6hdr));
1088 skb_reset_network_header(skb);
1089 skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
1094 skb_put(skb, sizeof(*msg));
1095 skb_reset_transport_header(skb);
1096 msg = (struct mrt6msg *)skb_transport_header(skb);
1099 msg->im6_msgtype = assert;
1100 msg->im6_mif = mifi;
1102 msg->im6_src = ipv6_hdr(pkt)->saddr;
1103 msg->im6_dst = ipv6_hdr(pkt)->daddr;
1105 skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1106 skb->ip_summed = CHECKSUM_UNNECESSARY;
1110 mroute6_sk = rcu_dereference(mrt->mroute_sk);
1117 mrt6msg_netlink_event(mrt, skb);
1119 /* Deliver to user space multicast routing algorithms */
1120 ret = sock_queue_rcv_skb(mroute6_sk, skb);
1123 net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
1130 /* Queue a packet for resolution. It gets locked cache entry! */
1131 static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi,
1132 struct sk_buff *skb)
1134 struct mfc6_cache *c;
1138 spin_lock_bh(&mfc_unres_lock);
1139 list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) {
1140 if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
1141 ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
1149 * Create a new entry if allowable
1152 if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
1153 (c = ip6mr_cache_alloc_unres()) == NULL) {
1154 spin_unlock_bh(&mfc_unres_lock);
1160 /* Fill in the new cache entry */
1161 c->_c.mfc_parent = -1;
1162 c->mf6c_origin = ipv6_hdr(skb)->saddr;
1163 c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
1166 * Reflect first query at pim6sd
1168 err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
1170 /* If the report failed throw the cache entry
1173 spin_unlock_bh(&mfc_unres_lock);
1175 ip6mr_cache_free(c);
1180 atomic_inc(&mrt->cache_resolve_queue_len);
1181 list_add(&c->_c.list, &mrt->mfc_unres_queue);
1182 mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1184 ipmr_do_expire_process(mrt);
1187 /* See if we can append the packet */
1188 if (c->_c.mfc_un.unres.unresolved.qlen > 3) {
1192 skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb);
1196 spin_unlock_bh(&mfc_unres_lock);
1201 * MFC6 cache manipulation by user space
1204 static int ip6mr_mfc_delete(struct mr_table *mrt, struct mf6cctl *mfc,
1207 struct mfc6_cache *c;
1209 /* The entries are added/deleted only under RTNL */
1211 c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
1212 &mfc->mf6cc_mcastgrp.sin6_addr, parent);
1216 rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ip6mr_rht_params);
1217 list_del_rcu(&c->_c.list);
1219 call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
1220 FIB_EVENT_ENTRY_DEL, c, mrt->id);
1221 mr6_netlink_event(mrt, c, RTM_DELROUTE);
1222 mr_cache_put(&c->_c);
1226 static int ip6mr_device_event(struct notifier_block *this,
1227 unsigned long event, void *ptr)
1229 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1230 struct net *net = dev_net(dev);
1231 struct mr_table *mrt;
1232 struct vif_device *v;
1235 if (event != NETDEV_UNREGISTER)
1238 ip6mr_for_each_table(mrt, net) {
1239 v = &mrt->vif_table[0];
1240 for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1242 mif6_delete(mrt, ct, 1, NULL);
1249 static unsigned int ip6mr_seq_read(struct net *net)
1253 return net->ipv6.ipmr_seq + ip6mr_rules_seq_read(net);
1256 static int ip6mr_dump(struct net *net, struct notifier_block *nb)
1258 return mr_dump(net, nb, RTNL_FAMILY_IP6MR, ip6mr_rules_dump,
1259 ip6mr_mr_table_iter, &mrt_lock);
1262 static struct notifier_block ip6_mr_notifier = {
1263 .notifier_call = ip6mr_device_event
1266 static const struct fib_notifier_ops ip6mr_notifier_ops_template = {
1267 .family = RTNL_FAMILY_IP6MR,
1268 .fib_seq_read = ip6mr_seq_read,
1269 .fib_dump = ip6mr_dump,
1270 .owner = THIS_MODULE,
1273 static int __net_init ip6mr_notifier_init(struct net *net)
1275 struct fib_notifier_ops *ops;
1277 net->ipv6.ipmr_seq = 0;
1279 ops = fib_notifier_ops_register(&ip6mr_notifier_ops_template, net);
1281 return PTR_ERR(ops);
1283 net->ipv6.ip6mr_notifier_ops = ops;
1288 static void __net_exit ip6mr_notifier_exit(struct net *net)
1290 fib_notifier_ops_unregister(net->ipv6.ip6mr_notifier_ops);
1291 net->ipv6.ip6mr_notifier_ops = NULL;
1294 /* Setup for IP multicast routing */
1295 static int __net_init ip6mr_net_init(struct net *net)
1299 err = ip6mr_notifier_init(net);
1303 err = ip6mr_rules_init(net);
1305 goto ip6mr_rules_fail;
1307 #ifdef CONFIG_PROC_FS
1309 if (!proc_create_net("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_seq_ops,
1310 sizeof(struct mr_vif_iter)))
1312 if (!proc_create_net("ip6_mr_cache", 0, net->proc_net, &ipmr_mfc_seq_ops,
1313 sizeof(struct mr_mfc_iter)))
1314 goto proc_cache_fail;
1319 #ifdef CONFIG_PROC_FS
1321 remove_proc_entry("ip6_mr_vif", net->proc_net);
1323 ip6mr_rules_exit(net);
1326 ip6mr_notifier_exit(net);
1330 static void __net_exit ip6mr_net_exit(struct net *net)
1332 #ifdef CONFIG_PROC_FS
1333 remove_proc_entry("ip6_mr_cache", net->proc_net);
1334 remove_proc_entry("ip6_mr_vif", net->proc_net);
1336 ip6mr_rules_exit(net);
1337 ip6mr_notifier_exit(net);
1340 static struct pernet_operations ip6mr_net_ops = {
1341 .init = ip6mr_net_init,
1342 .exit = ip6mr_net_exit,
1345 int __init ip6_mr_init(void)
1349 mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1350 sizeof(struct mfc6_cache),
1351 0, SLAB_HWCACHE_ALIGN,
1356 err = register_pernet_subsys(&ip6mr_net_ops);
1358 goto reg_pernet_fail;
1360 err = register_netdevice_notifier(&ip6_mr_notifier);
1362 goto reg_notif_fail;
1363 #ifdef CONFIG_IPV6_PIMSM_V2
1364 if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1365 pr_err("%s: can't add PIM protocol\n", __func__);
1367 goto add_proto_fail;
1370 err = rtnl_register_module(THIS_MODULE, RTNL_FAMILY_IP6MR, RTM_GETROUTE,
1371 NULL, ip6mr_rtm_dumproute, 0);
1375 #ifdef CONFIG_IPV6_PIMSM_V2
1376 inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1378 unregister_netdevice_notifier(&ip6_mr_notifier);
1381 unregister_pernet_subsys(&ip6mr_net_ops);
1383 kmem_cache_destroy(mrt_cachep);
1387 void ip6_mr_cleanup(void)
1389 rtnl_unregister(RTNL_FAMILY_IP6MR, RTM_GETROUTE);
1390 #ifdef CONFIG_IPV6_PIMSM_V2
1391 inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1393 unregister_netdevice_notifier(&ip6_mr_notifier);
1394 unregister_pernet_subsys(&ip6mr_net_ops);
1395 kmem_cache_destroy(mrt_cachep);
1398 static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt,
1399 struct mf6cctl *mfc, int mrtsock, int parent)
1401 unsigned char ttls[MAXMIFS];
1402 struct mfc6_cache *uc, *c;
1407 if (mfc->mf6cc_parent >= MAXMIFS)
1410 memset(ttls, 255, MAXMIFS);
1411 for (i = 0; i < MAXMIFS; i++) {
1412 if (IF_ISSET(i, &mfc->mf6cc_ifset))
1416 /* The entries are added/deleted only under RTNL */
1418 c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
1419 &mfc->mf6cc_mcastgrp.sin6_addr, parent);
1422 write_lock_bh(&mrt_lock);
1423 c->_c.mfc_parent = mfc->mf6cc_parent;
1424 ip6mr_update_thresholds(mrt, &c->_c, ttls);
1426 c->_c.mfc_flags |= MFC_STATIC;
1427 write_unlock_bh(&mrt_lock);
1428 call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE,
1430 mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1434 if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) &&
1435 !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1438 c = ip6mr_cache_alloc();
1442 c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1443 c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1444 c->_c.mfc_parent = mfc->mf6cc_parent;
1445 ip6mr_update_thresholds(mrt, &c->_c, ttls);
1447 c->_c.mfc_flags |= MFC_STATIC;
1449 err = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode,
1452 pr_err("ip6mr: rhtable insert error %d\n", err);
1453 ip6mr_cache_free(c);
1456 list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list);
1458 /* Check to see if we resolved a queued list. If so we
1459 * need to send on the frames and tidy up.
1462 spin_lock_bh(&mfc_unres_lock);
1463 list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) {
1464 uc = (struct mfc6_cache *)_uc;
1465 if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1466 ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1467 list_del(&_uc->list);
1468 atomic_dec(&mrt->cache_resolve_queue_len);
1473 if (list_empty(&mrt->mfc_unres_queue))
1474 del_timer(&mrt->ipmr_expire_timer);
1475 spin_unlock_bh(&mfc_unres_lock);
1478 ip6mr_cache_resolve(net, mrt, uc, c);
1479 ip6mr_cache_free(uc);
1481 call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD,
1483 mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1488 * Close the multicast socket, and clear the vif tables etc
1491 static void mroute_clean_tables(struct mr_table *mrt, bool all)
1493 struct mr_mfc *c, *tmp;
1497 /* Shut down all active vif entries */
1498 for (i = 0; i < mrt->maxvif; i++) {
1499 if (!all && (mrt->vif_table[i].flags & VIFF_STATIC))
1501 mif6_delete(mrt, i, 0, &list);
1503 unregister_netdevice_many(&list);
1505 /* Wipe the cache */
1506 list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) {
1507 if (!all && (c->mfc_flags & MFC_STATIC))
1509 rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params);
1510 list_del_rcu(&c->list);
1511 call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
1512 FIB_EVENT_ENTRY_DEL,
1513 (struct mfc6_cache *)c, mrt->id);
1514 mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
1518 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1519 spin_lock_bh(&mfc_unres_lock);
1520 list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
1522 mr6_netlink_event(mrt, (struct mfc6_cache *)c,
1524 ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
1526 spin_unlock_bh(&mfc_unres_lock);
1530 static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk)
1533 struct net *net = sock_net(sk);
1536 write_lock_bh(&mrt_lock);
1537 if (rtnl_dereference(mrt->mroute_sk)) {
1540 rcu_assign_pointer(mrt->mroute_sk, sk);
1541 sock_set_flag(sk, SOCK_RCU_FREE);
1542 net->ipv6.devconf_all->mc_forwarding++;
1544 write_unlock_bh(&mrt_lock);
1547 inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
1548 NETCONFA_MC_FORWARDING,
1549 NETCONFA_IFINDEX_ALL,
1550 net->ipv6.devconf_all);
1556 int ip6mr_sk_done(struct sock *sk)
1559 struct net *net = sock_net(sk);
1560 struct mr_table *mrt;
1562 if (sk->sk_type != SOCK_RAW ||
1563 inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1567 ip6mr_for_each_table(mrt, net) {
1568 if (sk == rtnl_dereference(mrt->mroute_sk)) {
1569 write_lock_bh(&mrt_lock);
1570 RCU_INIT_POINTER(mrt->mroute_sk, NULL);
1571 /* Note that mroute_sk had SOCK_RCU_FREE set,
1572 * so the RCU grace period before sk freeing
1573 * is guaranteed by sk_destruct()
1575 net->ipv6.devconf_all->mc_forwarding--;
1576 write_unlock_bh(&mrt_lock);
1577 inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
1578 NETCONFA_MC_FORWARDING,
1579 NETCONFA_IFINDEX_ALL,
1580 net->ipv6.devconf_all);
1582 mroute_clean_tables(mrt, false);
1592 bool mroute6_is_socket(struct net *net, struct sk_buff *skb)
1594 struct mr_table *mrt;
1595 struct flowi6 fl6 = {
1596 .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX,
1597 .flowi6_oif = skb->dev->ifindex,
1598 .flowi6_mark = skb->mark,
1601 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
1604 return rcu_access_pointer(mrt->mroute_sk);
1606 EXPORT_SYMBOL(mroute6_is_socket);
1609 * Socket options and virtual interface manipulation. The whole
1610 * virtual interface system is a complete heap, but unfortunately
1611 * that's how BSD mrouted happens to think. Maybe one day with a proper
1612 * MOSPF/PIM router set up we can clean this up.
1615 int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1617 int ret, parent = 0;
1621 struct net *net = sock_net(sk);
1622 struct mr_table *mrt;
1624 if (sk->sk_type != SOCK_RAW ||
1625 inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1628 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1632 if (optname != MRT6_INIT) {
1633 if (sk != rcu_access_pointer(mrt->mroute_sk) &&
1634 !ns_capable(net->user_ns, CAP_NET_ADMIN))
1640 if (optlen < sizeof(int))
1643 return ip6mr_sk_init(mrt, sk);
1646 return ip6mr_sk_done(sk);
1649 if (optlen < sizeof(vif))
1651 if (copy_from_user(&vif, optval, sizeof(vif)))
1653 if (vif.mif6c_mifi >= MAXMIFS)
1656 ret = mif6_add(net, mrt, &vif,
1657 sk == rtnl_dereference(mrt->mroute_sk));
1662 if (optlen < sizeof(mifi_t))
1664 if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1667 ret = mif6_delete(mrt, mifi, 0, NULL);
1672 * Manipulate the forwarding caches. These live
1673 * in a sort of kernel/user symbiosis.
1679 case MRT6_ADD_MFC_PROXY:
1680 case MRT6_DEL_MFC_PROXY:
1681 if (optlen < sizeof(mfc))
1683 if (copy_from_user(&mfc, optval, sizeof(mfc)))
1686 parent = mfc.mf6cc_parent;
1688 if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY)
1689 ret = ip6mr_mfc_delete(mrt, &mfc, parent);
1691 ret = ip6mr_mfc_add(net, mrt, &mfc,
1693 rtnl_dereference(mrt->mroute_sk),
1699 * Control PIM assert (to activate pim will activate assert)
1705 if (optlen != sizeof(v))
1707 if (get_user(v, (int __user *)optval))
1709 mrt->mroute_do_assert = v;
1713 #ifdef CONFIG_IPV6_PIMSM_V2
1718 if (optlen != sizeof(v))
1720 if (get_user(v, (int __user *)optval))
1725 if (v != mrt->mroute_do_pim) {
1726 mrt->mroute_do_pim = v;
1727 mrt->mroute_do_assert = v;
1734 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
1739 if (optlen != sizeof(u32))
1741 if (get_user(v, (u32 __user *)optval))
1743 /* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
1744 if (v != RT_TABLE_DEFAULT && v >= 100000000)
1746 if (sk == rcu_access_pointer(mrt->mroute_sk))
1751 mrt = ip6mr_new_table(net, v);
1755 raw6_sk(sk)->ip6mr_table = v;
1761 * Spurious command, or MRT6_VERSION which you cannot
1765 return -ENOPROTOOPT;
1770 * Getsock opt support for the multicast routing system.
1773 int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1778 struct net *net = sock_net(sk);
1779 struct mr_table *mrt;
1781 if (sk->sk_type != SOCK_RAW ||
1782 inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1785 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1793 #ifdef CONFIG_IPV6_PIMSM_V2
1795 val = mrt->mroute_do_pim;
1799 val = mrt->mroute_do_assert;
1802 return -ENOPROTOOPT;
1805 if (get_user(olr, optlen))
1808 olr = min_t(int, olr, sizeof(int));
1812 if (put_user(olr, optlen))
1814 if (copy_to_user(optval, &val, olr))
1820 * The IP multicast ioctl support routines.
1823 int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1825 struct sioc_sg_req6 sr;
1826 struct sioc_mif_req6 vr;
1827 struct vif_device *vif;
1828 struct mfc6_cache *c;
1829 struct net *net = sock_net(sk);
1830 struct mr_table *mrt;
1832 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1837 case SIOCGETMIFCNT_IN6:
1838 if (copy_from_user(&vr, arg, sizeof(vr)))
1840 if (vr.mifi >= mrt->maxvif)
1842 vr.mifi = array_index_nospec(vr.mifi, mrt->maxvif);
1843 read_lock(&mrt_lock);
1844 vif = &mrt->vif_table[vr.mifi];
1845 if (VIF_EXISTS(mrt, vr.mifi)) {
1846 vr.icount = vif->pkt_in;
1847 vr.ocount = vif->pkt_out;
1848 vr.ibytes = vif->bytes_in;
1849 vr.obytes = vif->bytes_out;
1850 read_unlock(&mrt_lock);
1852 if (copy_to_user(arg, &vr, sizeof(vr)))
1856 read_unlock(&mrt_lock);
1857 return -EADDRNOTAVAIL;
1858 case SIOCGETSGCNT_IN6:
1859 if (copy_from_user(&sr, arg, sizeof(sr)))
1863 c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1865 sr.pktcnt = c->_c.mfc_un.res.pkt;
1866 sr.bytecnt = c->_c.mfc_un.res.bytes;
1867 sr.wrong_if = c->_c.mfc_un.res.wrong_if;
1870 if (copy_to_user(arg, &sr, sizeof(sr)))
1875 return -EADDRNOTAVAIL;
1877 return -ENOIOCTLCMD;
1881 #ifdef CONFIG_COMPAT
1882 struct compat_sioc_sg_req6 {
1883 struct sockaddr_in6 src;
1884 struct sockaddr_in6 grp;
1885 compat_ulong_t pktcnt;
1886 compat_ulong_t bytecnt;
1887 compat_ulong_t wrong_if;
1890 struct compat_sioc_mif_req6 {
1892 compat_ulong_t icount;
1893 compat_ulong_t ocount;
1894 compat_ulong_t ibytes;
1895 compat_ulong_t obytes;
1898 int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
1900 struct compat_sioc_sg_req6 sr;
1901 struct compat_sioc_mif_req6 vr;
1902 struct vif_device *vif;
1903 struct mfc6_cache *c;
1904 struct net *net = sock_net(sk);
1905 struct mr_table *mrt;
1907 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1912 case SIOCGETMIFCNT_IN6:
1913 if (copy_from_user(&vr, arg, sizeof(vr)))
1915 if (vr.mifi >= mrt->maxvif)
1917 vr.mifi = array_index_nospec(vr.mifi, mrt->maxvif);
1918 read_lock(&mrt_lock);
1919 vif = &mrt->vif_table[vr.mifi];
1920 if (VIF_EXISTS(mrt, vr.mifi)) {
1921 vr.icount = vif->pkt_in;
1922 vr.ocount = vif->pkt_out;
1923 vr.ibytes = vif->bytes_in;
1924 vr.obytes = vif->bytes_out;
1925 read_unlock(&mrt_lock);
1927 if (copy_to_user(arg, &vr, sizeof(vr)))
1931 read_unlock(&mrt_lock);
1932 return -EADDRNOTAVAIL;
1933 case SIOCGETSGCNT_IN6:
1934 if (copy_from_user(&sr, arg, sizeof(sr)))
1938 c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1940 sr.pktcnt = c->_c.mfc_un.res.pkt;
1941 sr.bytecnt = c->_c.mfc_un.res.bytes;
1942 sr.wrong_if = c->_c.mfc_un.res.wrong_if;
1945 if (copy_to_user(arg, &sr, sizeof(sr)))
1950 return -EADDRNOTAVAIL;
1952 return -ENOIOCTLCMD;
1957 static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
1959 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
1960 IPSTATS_MIB_OUTFORWDATAGRAMS);
1961 IP6_ADD_STATS(net, ip6_dst_idev(skb_dst(skb)),
1962 IPSTATS_MIB_OUTOCTETS, skb->len);
1963 return dst_output(net, sk, skb);
1967 * Processing handlers for ip6mr_forward
1970 static int ip6mr_forward2(struct net *net, struct mr_table *mrt,
1971 struct sk_buff *skb, struct mfc6_cache *c, int vifi)
1973 struct ipv6hdr *ipv6h;
1974 struct vif_device *vif = &mrt->vif_table[vifi];
1975 struct net_device *dev;
1976 struct dst_entry *dst;
1982 #ifdef CONFIG_IPV6_PIMSM_V2
1983 if (vif->flags & MIFF_REGISTER) {
1985 vif->bytes_out += skb->len;
1986 vif->dev->stats.tx_bytes += skb->len;
1987 vif->dev->stats.tx_packets++;
1988 ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
1993 ipv6h = ipv6_hdr(skb);
1995 fl6 = (struct flowi6) {
1996 .flowi6_oif = vif->link,
1997 .daddr = ipv6h->daddr,
2000 dst = ip6_route_output(net, NULL, &fl6);
2007 skb_dst_set(skb, dst);
2010 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
2011 * not only before forwarding, but after forwarding on all output
2012 * interfaces. It is clear, if mrouter runs a multicasting
2013 * program, it should receive packets not depending to what interface
2014 * program is joined.
2015 * If we will not make it, the program will have to join on all
2016 * interfaces. On the other hand, multihoming host (or router, but
2017 * not mrouter) cannot join to more than one interface - it will
2018 * result in receiving multiple packets.
2023 vif->bytes_out += skb->len;
2025 /* We are about to write */
2026 /* XXX: extension headers? */
2027 if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
2030 ipv6h = ipv6_hdr(skb);
2033 IP6CB(skb)->flags |= IP6SKB_FORWARDED;
2035 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
2036 net, NULL, skb, skb->dev, dev,
2037 ip6mr_forward2_finish);
2044 static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev)
2048 for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
2049 if (mrt->vif_table[ct].dev == dev)
2055 static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
2056 struct sk_buff *skb, struct mfc6_cache *c)
2060 int true_vifi = ip6mr_find_vif(mrt, skb->dev);
2062 vif = c->_c.mfc_parent;
2063 c->_c.mfc_un.res.pkt++;
2064 c->_c.mfc_un.res.bytes += skb->len;
2065 c->_c.mfc_un.res.lastuse = jiffies;
2067 if (ipv6_addr_any(&c->mf6c_origin) && true_vifi >= 0) {
2068 struct mfc6_cache *cache_proxy;
2070 /* For an (*,G) entry, we only check that the incoming
2071 * interface is part of the static tree.
2074 cache_proxy = mr_mfc_find_any_parent(mrt, vif);
2076 cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255) {
2084 * Wrong interface: drop packet and (maybe) send PIM assert.
2086 if (mrt->vif_table[vif].dev != skb->dev) {
2087 c->_c.mfc_un.res.wrong_if++;
2089 if (true_vifi >= 0 && mrt->mroute_do_assert &&
2090 /* pimsm uses asserts, when switching from RPT to SPT,
2091 so that we cannot check that packet arrived on an oif.
2092 It is bad, but otherwise we would need to move pretty
2093 large chunk of pimd to kernel. Ough... --ANK
2095 (mrt->mroute_do_pim ||
2096 c->_c.mfc_un.res.ttls[true_vifi] < 255) &&
2098 c->_c.mfc_un.res.last_assert +
2099 MFC_ASSERT_THRESH)) {
2100 c->_c.mfc_un.res.last_assert = jiffies;
2101 ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
2107 mrt->vif_table[vif].pkt_in++;
2108 mrt->vif_table[vif].bytes_in += skb->len;
2113 if (ipv6_addr_any(&c->mf6c_origin) &&
2114 ipv6_addr_any(&c->mf6c_mcastgrp)) {
2115 if (true_vifi >= 0 &&
2116 true_vifi != c->_c.mfc_parent &&
2117 ipv6_hdr(skb)->hop_limit >
2118 c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) {
2119 /* It's an (*,*) entry and the packet is not coming from
2120 * the upstream: forward the packet to the upstream
2123 psend = c->_c.mfc_parent;
2128 for (ct = c->_c.mfc_un.res.maxvif - 1;
2129 ct >= c->_c.mfc_un.res.minvif; ct--) {
2130 /* For (*,G) entry, don't forward to the incoming interface */
2131 if ((!ipv6_addr_any(&c->mf6c_origin) || ct != true_vifi) &&
2132 ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) {
2134 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2136 ip6mr_forward2(net, mrt, skb2,
2144 ip6mr_forward2(net, mrt, skb, c, psend);
2154 * Multicast packets for forwarding arrive here
2157 int ip6_mr_input(struct sk_buff *skb)
2159 struct mfc6_cache *cache;
2160 struct net *net = dev_net(skb->dev);
2161 struct mr_table *mrt;
2162 struct flowi6 fl6 = {
2163 .flowi6_iif = skb->dev->ifindex,
2164 .flowi6_mark = skb->mark,
2168 err = ip6mr_fib_lookup(net, &fl6, &mrt);
2174 read_lock(&mrt_lock);
2175 cache = ip6mr_cache_find(mrt,
2176 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
2178 int vif = ip6mr_find_vif(mrt, skb->dev);
2181 cache = ip6mr_cache_find_any(mrt,
2182 &ipv6_hdr(skb)->daddr,
2187 * No usable cache entry
2192 vif = ip6mr_find_vif(mrt, skb->dev);
2194 int err = ip6mr_cache_unresolved(mrt, vif, skb);
2195 read_unlock(&mrt_lock);
2199 read_unlock(&mrt_lock);
2204 ip6_mr_forward(net, mrt, skb, cache);
2206 read_unlock(&mrt_lock);
2211 int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
2215 struct mr_table *mrt;
2216 struct mfc6_cache *cache;
2217 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
2219 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
2223 read_lock(&mrt_lock);
2224 cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
2225 if (!cache && skb->dev) {
2226 int vif = ip6mr_find_vif(mrt, skb->dev);
2229 cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr,
2234 struct sk_buff *skb2;
2235 struct ipv6hdr *iph;
2236 struct net_device *dev;
2240 if (!dev || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
2241 read_unlock(&mrt_lock);
2245 /* really correct? */
2246 skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
2248 read_unlock(&mrt_lock);
2252 NETLINK_CB(skb2).portid = portid;
2253 skb_reset_transport_header(skb2);
2255 skb_put(skb2, sizeof(struct ipv6hdr));
2256 skb_reset_network_header(skb2);
2258 iph = ipv6_hdr(skb2);
2261 iph->flow_lbl[0] = 0;
2262 iph->flow_lbl[1] = 0;
2263 iph->flow_lbl[2] = 0;
2264 iph->payload_len = 0;
2265 iph->nexthdr = IPPROTO_NONE;
2267 iph->saddr = rt->rt6i_src.addr;
2268 iph->daddr = rt->rt6i_dst.addr;
2270 err = ip6mr_cache_unresolved(mrt, vif, skb2);
2271 read_unlock(&mrt_lock);
2276 err = mr_fill_mroute(mrt, skb, &cache->_c, rtm);
2277 read_unlock(&mrt_lock);
2281 static int ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2282 u32 portid, u32 seq, struct mfc6_cache *c, int cmd,
2285 struct nlmsghdr *nlh;
2289 nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags);
2293 rtm = nlmsg_data(nlh);
2294 rtm->rtm_family = RTNL_FAMILY_IP6MR;
2295 rtm->rtm_dst_len = 128;
2296 rtm->rtm_src_len = 128;
2298 rtm->rtm_table = mrt->id;
2299 if (nla_put_u32(skb, RTA_TABLE, mrt->id))
2300 goto nla_put_failure;
2301 rtm->rtm_type = RTN_MULTICAST;
2302 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2303 if (c->_c.mfc_flags & MFC_STATIC)
2304 rtm->rtm_protocol = RTPROT_STATIC;
2306 rtm->rtm_protocol = RTPROT_MROUTED;
2309 if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) ||
2310 nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp))
2311 goto nla_put_failure;
2312 err = mr_fill_mroute(mrt, skb, &c->_c, rtm);
2313 /* do not break the dump if cache is unresolved */
2314 if (err < 0 && err != -ENOENT)
2315 goto nla_put_failure;
2317 nlmsg_end(skb, nlh);
2321 nlmsg_cancel(skb, nlh);
2325 static int _ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2326 u32 portid, u32 seq, struct mr_mfc *c,
2329 return ip6mr_fill_mroute(mrt, skb, portid, seq, (struct mfc6_cache *)c,
2333 static int mr6_msgsize(bool unresolved, int maxvif)
2336 NLMSG_ALIGN(sizeof(struct rtmsg))
2337 + nla_total_size(4) /* RTA_TABLE */
2338 + nla_total_size(sizeof(struct in6_addr)) /* RTA_SRC */
2339 + nla_total_size(sizeof(struct in6_addr)) /* RTA_DST */
2344 + nla_total_size(4) /* RTA_IIF */
2345 + nla_total_size(0) /* RTA_MULTIPATH */
2346 + maxvif * NLA_ALIGN(sizeof(struct rtnexthop))
2348 + nla_total_size_64bit(sizeof(struct rta_mfc_stats))
2354 static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
2357 struct net *net = read_pnet(&mrt->net);
2358 struct sk_buff *skb;
2361 skb = nlmsg_new(mr6_msgsize(mfc->_c.mfc_parent >= MAXMIFS, mrt->maxvif),
2366 err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0);
2370 rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC);
2376 rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err);
2379 static size_t mrt6msg_netlink_msgsize(size_t payloadlen)
2382 NLMSG_ALIGN(sizeof(struct rtgenmsg))
2383 + nla_total_size(1) /* IP6MRA_CREPORT_MSGTYPE */
2384 + nla_total_size(4) /* IP6MRA_CREPORT_MIF_ID */
2385 /* IP6MRA_CREPORT_SRC_ADDR */
2386 + nla_total_size(sizeof(struct in6_addr))
2387 /* IP6MRA_CREPORT_DST_ADDR */
2388 + nla_total_size(sizeof(struct in6_addr))
2389 /* IP6MRA_CREPORT_PKT */
2390 + nla_total_size(payloadlen)
2396 static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt)
2398 struct net *net = read_pnet(&mrt->net);
2399 struct nlmsghdr *nlh;
2400 struct rtgenmsg *rtgenm;
2401 struct mrt6msg *msg;
2402 struct sk_buff *skb;
2406 payloadlen = pkt->len - sizeof(struct mrt6msg);
2407 msg = (struct mrt6msg *)skb_transport_header(pkt);
2409 skb = nlmsg_new(mrt6msg_netlink_msgsize(payloadlen), GFP_ATOMIC);
2413 nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT,
2414 sizeof(struct rtgenmsg), 0);
2417 rtgenm = nlmsg_data(nlh);
2418 rtgenm->rtgen_family = RTNL_FAMILY_IP6MR;
2419 if (nla_put_u8(skb, IP6MRA_CREPORT_MSGTYPE, msg->im6_msgtype) ||
2420 nla_put_u32(skb, IP6MRA_CREPORT_MIF_ID, msg->im6_mif) ||
2421 nla_put_in6_addr(skb, IP6MRA_CREPORT_SRC_ADDR,
2423 nla_put_in6_addr(skb, IP6MRA_CREPORT_DST_ADDR,
2425 goto nla_put_failure;
2427 nla = nla_reserve(skb, IP6MRA_CREPORT_PKT, payloadlen);
2428 if (!nla || skb_copy_bits(pkt, sizeof(struct mrt6msg),
2429 nla_data(nla), payloadlen))
2430 goto nla_put_failure;
2432 nlmsg_end(skb, nlh);
2434 rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE_R, NULL, GFP_ATOMIC);
2438 nlmsg_cancel(skb, nlh);
2441 rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE_R, -ENOBUFS);
2444 static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2446 return mr_rtm_dumproute(skb, cb, ip6mr_mr_table_iter,
2447 _ip6mr_fill_mroute, &mfc_unres_lock);