GNU Linux-libre 4.19.286-gnu1
[releases.git] / net / core / neighbour.c
1 /*
2  *      Generic address resolution entity
3  *
4  *      Authors:
5  *      Pedro Roque             <roque@di.fc.ul.pt>
6  *      Alexey Kuznetsov        <kuznet@ms2.inr.ac.ru>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  *
13  *      Fixes:
14  *      Vitaly E. Lavrov        releasing NULL neighbor in neigh_add.
15  *      Harald Welte            Add neighbour cache statistics like rtstat
16  */
17
18 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
19
20 #include <linux/slab.h>
21 #include <linux/kmemleak.h>
22 #include <linux/types.h>
23 #include <linux/kernel.h>
24 #include <linux/module.h>
25 #include <linux/socket.h>
26 #include <linux/netdevice.h>
27 #include <linux/proc_fs.h>
28 #ifdef CONFIG_SYSCTL
29 #include <linux/sysctl.h>
30 #endif
31 #include <linux/times.h>
32 #include <net/net_namespace.h>
33 #include <net/neighbour.h>
34 #include <net/arp.h>
35 #include <net/dst.h>
36 #include <net/sock.h>
37 #include <net/netevent.h>
38 #include <net/netlink.h>
39 #include <linux/rtnetlink.h>
40 #include <linux/random.h>
41 #include <linux/string.h>
42 #include <linux/log2.h>
43 #include <linux/inetdevice.h>
44 #include <net/addrconf.h>
45
46 #define DEBUG
47 #define NEIGH_DEBUG 1
48 #define neigh_dbg(level, fmt, ...)              \
49 do {                                            \
50         if (level <= NEIGH_DEBUG)               \
51                 pr_debug(fmt, ##__VA_ARGS__);   \
52 } while (0)
53
54 #define PNEIGH_HASHMASK         0xF
55
56 static void neigh_timer_handler(struct timer_list *t);
57 static void __neigh_notify(struct neighbour *n, int type, int flags,
58                            u32 pid);
59 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid);
60 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
61                                     struct net_device *dev);
62
63 #ifdef CONFIG_PROC_FS
64 static const struct seq_operations neigh_stat_seq_ops;
65 #endif
66
67 /*
68    Neighbour hash table buckets are protected with rwlock tbl->lock.
69
70    - All the scans/updates to hash buckets MUST be made under this lock.
71    - NOTHING clever should be made under this lock: no callbacks
72      to protocol backends, no attempts to send something to network.
73      It will result in deadlocks, if backend/driver wants to use neighbour
74      cache.
75    - If the entry requires some non-trivial actions, increase
76      its reference count and release table lock.
77
78    Neighbour entries are protected:
79    - with reference count.
80    - with rwlock neigh->lock
81
82    Reference count prevents destruction.
83
84    neigh->lock mainly serializes ll address data and its validity state.
85    However, the same lock is used to protect another entry fields:
86     - timer
87     - resolution queue
88
89    Again, nothing clever shall be made under neigh->lock,
90    the most complicated procedure, which we allow is dev->hard_header.
91    It is supposed, that dev->hard_header is simplistic and does
92    not make callbacks to neighbour tables.
93  */
94
95 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
96 {
97         kfree_skb(skb);
98         return -ENETDOWN;
99 }
100
101 static void neigh_cleanup_and_release(struct neighbour *neigh)
102 {
103         if (neigh->parms->neigh_cleanup)
104                 neigh->parms->neigh_cleanup(neigh);
105
106         __neigh_notify(neigh, RTM_DELNEIGH, 0, 0);
107         call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
108         neigh_release(neigh);
109 }
110
111 /*
112  * It is random distribution in the interval (1/2)*base...(3/2)*base.
113  * It corresponds to default IPv6 settings and is not overridable,
114  * because it is really reasonable choice.
115  */
116
117 unsigned long neigh_rand_reach_time(unsigned long base)
118 {
119         return base ? (prandom_u32() % base) + (base >> 1) : 0;
120 }
121 EXPORT_SYMBOL(neigh_rand_reach_time);
122
123
124 static bool neigh_del(struct neighbour *n, __u8 state, __u8 flags,
125                       struct neighbour __rcu **np, struct neigh_table *tbl)
126 {
127         bool retval = false;
128
129         write_lock(&n->lock);
130         if (refcount_read(&n->refcnt) == 1 && !(n->nud_state & state) &&
131             !(n->flags & flags)) {
132                 struct neighbour *neigh;
133
134                 neigh = rcu_dereference_protected(n->next,
135                                                   lockdep_is_held(&tbl->lock));
136                 rcu_assign_pointer(*np, neigh);
137                 n->dead = 1;
138                 retval = true;
139         }
140         write_unlock(&n->lock);
141         if (retval)
142                 neigh_cleanup_and_release(n);
143         return retval;
144 }
145
146 bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl)
147 {
148         struct neigh_hash_table *nht;
149         void *pkey = ndel->primary_key;
150         u32 hash_val;
151         struct neighbour *n;
152         struct neighbour __rcu **np;
153
154         nht = rcu_dereference_protected(tbl->nht,
155                                         lockdep_is_held(&tbl->lock));
156         hash_val = tbl->hash(pkey, ndel->dev, nht->hash_rnd);
157         hash_val = hash_val >> (32 - nht->hash_shift);
158
159         np = &nht->hash_buckets[hash_val];
160         while ((n = rcu_dereference_protected(*np,
161                                               lockdep_is_held(&tbl->lock)))) {
162                 if (n == ndel)
163                         return neigh_del(n, 0, 0, np, tbl);
164                 np = &n->next;
165         }
166         return false;
167 }
168
169 static int neigh_forced_gc(struct neigh_table *tbl)
170 {
171         int shrunk = 0;
172         int i;
173         struct neigh_hash_table *nht;
174
175         NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
176
177         write_lock_bh(&tbl->lock);
178         nht = rcu_dereference_protected(tbl->nht,
179                                         lockdep_is_held(&tbl->lock));
180         for (i = 0; i < (1 << nht->hash_shift); i++) {
181                 struct neighbour *n;
182                 struct neighbour __rcu **np;
183
184                 np = &nht->hash_buckets[i];
185                 while ((n = rcu_dereference_protected(*np,
186                                         lockdep_is_held(&tbl->lock))) != NULL) {
187                         /* Neighbour record may be discarded if:
188                          * - nobody refers to it.
189                          * - it is not permanent
190                          */
191                         if (neigh_del(n, NUD_PERMANENT, NTF_EXT_LEARNED, np,
192                                       tbl)) {
193                                 shrunk = 1;
194                                 continue;
195                         }
196                         np = &n->next;
197                 }
198         }
199
200         tbl->last_flush = jiffies;
201
202         write_unlock_bh(&tbl->lock);
203
204         return shrunk;
205 }
206
207 static void neigh_add_timer(struct neighbour *n, unsigned long when)
208 {
209         neigh_hold(n);
210         if (unlikely(mod_timer(&n->timer, when))) {
211                 printk("NEIGH: BUG, double timer add, state is %x\n",
212                        n->nud_state);
213                 dump_stack();
214         }
215 }
216
217 static int neigh_del_timer(struct neighbour *n)
218 {
219         if ((n->nud_state & NUD_IN_TIMER) &&
220             del_timer(&n->timer)) {
221                 neigh_release(n);
222                 return 1;
223         }
224         return 0;
225 }
226
227 static void pneigh_queue_purge(struct sk_buff_head *list, struct net *net)
228 {
229         struct sk_buff_head tmp;
230         unsigned long flags;
231         struct sk_buff *skb;
232
233         skb_queue_head_init(&tmp);
234         spin_lock_irqsave(&list->lock, flags);
235         skb = skb_peek(list);
236         while (skb != NULL) {
237                 struct sk_buff *skb_next = skb_peek_next(skb, list);
238                 if (net == NULL || net_eq(dev_net(skb->dev), net)) {
239                         __skb_unlink(skb, list);
240                         __skb_queue_tail(&tmp, skb);
241                 }
242                 skb = skb_next;
243         }
244         spin_unlock_irqrestore(&list->lock, flags);
245
246         while ((skb = __skb_dequeue(&tmp))) {
247                 dev_put(skb->dev);
248                 kfree_skb(skb);
249         }
250 }
251
252 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
253 {
254         int i;
255         struct neigh_hash_table *nht;
256
257         nht = rcu_dereference_protected(tbl->nht,
258                                         lockdep_is_held(&tbl->lock));
259
260         for (i = 0; i < (1 << nht->hash_shift); i++) {
261                 struct neighbour *n;
262                 struct neighbour __rcu **np = &nht->hash_buckets[i];
263
264                 while ((n = rcu_dereference_protected(*np,
265                                         lockdep_is_held(&tbl->lock))) != NULL) {
266                         if (dev && n->dev != dev) {
267                                 np = &n->next;
268                                 continue;
269                         }
270                         rcu_assign_pointer(*np,
271                                    rcu_dereference_protected(n->next,
272                                                 lockdep_is_held(&tbl->lock)));
273                         write_lock(&n->lock);
274                         neigh_del_timer(n);
275                         n->dead = 1;
276
277                         if (refcount_read(&n->refcnt) != 1) {
278                                 /* The most unpleasant situation.
279                                    We must destroy neighbour entry,
280                                    but someone still uses it.
281
282                                    The destroy will be delayed until
283                                    the last user releases us, but
284                                    we must kill timers etc. and move
285                                    it to safe state.
286                                  */
287                                 __skb_queue_purge(&n->arp_queue);
288                                 n->arp_queue_len_bytes = 0;
289                                 n->output = neigh_blackhole;
290                                 if (n->nud_state & NUD_VALID)
291                                         n->nud_state = NUD_NOARP;
292                                 else
293                                         n->nud_state = NUD_NONE;
294                                 neigh_dbg(2, "neigh %p is stray\n", n);
295                         }
296                         write_unlock(&n->lock);
297                         neigh_cleanup_and_release(n);
298                 }
299         }
300 }
301
302 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
303 {
304         write_lock_bh(&tbl->lock);
305         neigh_flush_dev(tbl, dev);
306         write_unlock_bh(&tbl->lock);
307 }
308 EXPORT_SYMBOL(neigh_changeaddr);
309
310 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
311 {
312         write_lock_bh(&tbl->lock);
313         neigh_flush_dev(tbl, dev);
314         pneigh_ifdown_and_unlock(tbl, dev);
315         pneigh_queue_purge(&tbl->proxy_queue, dev ? dev_net(dev) : NULL);
316         if (skb_queue_empty_lockless(&tbl->proxy_queue))
317                 del_timer_sync(&tbl->proxy_timer);
318         return 0;
319 }
320 EXPORT_SYMBOL(neigh_ifdown);
321
322 static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev)
323 {
324         struct neighbour *n = NULL;
325         unsigned long now = jiffies;
326         int entries;
327
328         entries = atomic_inc_return(&tbl->entries) - 1;
329         if (entries >= tbl->gc_thresh3 ||
330             (entries >= tbl->gc_thresh2 &&
331              time_after(now, tbl->last_flush + 5 * HZ))) {
332                 if (!neigh_forced_gc(tbl) &&
333                     entries >= tbl->gc_thresh3) {
334                         net_info_ratelimited("%s: neighbor table overflow!\n",
335                                              tbl->id);
336                         NEIGH_CACHE_STAT_INC(tbl, table_fulls);
337                         goto out_entries;
338                 }
339         }
340
341         n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
342         if (!n)
343                 goto out_entries;
344
345         __skb_queue_head_init(&n->arp_queue);
346         rwlock_init(&n->lock);
347         seqlock_init(&n->ha_lock);
348         n->updated        = n->used = now;
349         n->nud_state      = NUD_NONE;
350         n->output         = neigh_blackhole;
351         seqlock_init(&n->hh.hh_lock);
352         n->parms          = neigh_parms_clone(&tbl->parms);
353         timer_setup(&n->timer, neigh_timer_handler, 0);
354
355         NEIGH_CACHE_STAT_INC(tbl, allocs);
356         n->tbl            = tbl;
357         refcount_set(&n->refcnt, 1);
358         n->dead           = 1;
359 out:
360         return n;
361
362 out_entries:
363         atomic_dec(&tbl->entries);
364         goto out;
365 }
366
367 static void neigh_get_hash_rnd(u32 *x)
368 {
369         *x = get_random_u32() | 1;
370 }
371
372 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
373 {
374         size_t size = (1 << shift) * sizeof(struct neighbour *);
375         struct neigh_hash_table *ret;
376         struct neighbour __rcu **buckets;
377         int i;
378
379         ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
380         if (!ret)
381                 return NULL;
382         if (size <= PAGE_SIZE) {
383                 buckets = kzalloc(size, GFP_ATOMIC);
384         } else {
385                 buckets = (struct neighbour __rcu **)
386                           __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
387                                            get_order(size));
388                 kmemleak_alloc(buckets, size, 1, GFP_ATOMIC);
389         }
390         if (!buckets) {
391                 kfree(ret);
392                 return NULL;
393         }
394         ret->hash_buckets = buckets;
395         ret->hash_shift = shift;
396         for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
397                 neigh_get_hash_rnd(&ret->hash_rnd[i]);
398         return ret;
399 }
400
401 static void neigh_hash_free_rcu(struct rcu_head *head)
402 {
403         struct neigh_hash_table *nht = container_of(head,
404                                                     struct neigh_hash_table,
405                                                     rcu);
406         size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
407         struct neighbour __rcu **buckets = nht->hash_buckets;
408
409         if (size <= PAGE_SIZE) {
410                 kfree(buckets);
411         } else {
412                 kmemleak_free(buckets);
413                 free_pages((unsigned long)buckets, get_order(size));
414         }
415         kfree(nht);
416 }
417
418 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
419                                                 unsigned long new_shift)
420 {
421         unsigned int i, hash;
422         struct neigh_hash_table *new_nht, *old_nht;
423
424         NEIGH_CACHE_STAT_INC(tbl, hash_grows);
425
426         old_nht = rcu_dereference_protected(tbl->nht,
427                                             lockdep_is_held(&tbl->lock));
428         new_nht = neigh_hash_alloc(new_shift);
429         if (!new_nht)
430                 return old_nht;
431
432         for (i = 0; i < (1 << old_nht->hash_shift); i++) {
433                 struct neighbour *n, *next;
434
435                 for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
436                                                    lockdep_is_held(&tbl->lock));
437                      n != NULL;
438                      n = next) {
439                         hash = tbl->hash(n->primary_key, n->dev,
440                                          new_nht->hash_rnd);
441
442                         hash >>= (32 - new_nht->hash_shift);
443                         next = rcu_dereference_protected(n->next,
444                                                 lockdep_is_held(&tbl->lock));
445
446                         rcu_assign_pointer(n->next,
447                                            rcu_dereference_protected(
448                                                 new_nht->hash_buckets[hash],
449                                                 lockdep_is_held(&tbl->lock)));
450                         rcu_assign_pointer(new_nht->hash_buckets[hash], n);
451                 }
452         }
453
454         rcu_assign_pointer(tbl->nht, new_nht);
455         call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
456         return new_nht;
457 }
458
459 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
460                                struct net_device *dev)
461 {
462         struct neighbour *n;
463
464         NEIGH_CACHE_STAT_INC(tbl, lookups);
465
466         rcu_read_lock_bh();
467         n = __neigh_lookup_noref(tbl, pkey, dev);
468         if (n) {
469                 if (!refcount_inc_not_zero(&n->refcnt))
470                         n = NULL;
471                 NEIGH_CACHE_STAT_INC(tbl, hits);
472         }
473
474         rcu_read_unlock_bh();
475         return n;
476 }
477 EXPORT_SYMBOL(neigh_lookup);
478
479 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
480                                      const void *pkey)
481 {
482         struct neighbour *n;
483         unsigned int key_len = tbl->key_len;
484         u32 hash_val;
485         struct neigh_hash_table *nht;
486
487         NEIGH_CACHE_STAT_INC(tbl, lookups);
488
489         rcu_read_lock_bh();
490         nht = rcu_dereference_bh(tbl->nht);
491         hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
492
493         for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
494              n != NULL;
495              n = rcu_dereference_bh(n->next)) {
496                 if (!memcmp(n->primary_key, pkey, key_len) &&
497                     net_eq(dev_net(n->dev), net)) {
498                         if (!refcount_inc_not_zero(&n->refcnt))
499                                 n = NULL;
500                         NEIGH_CACHE_STAT_INC(tbl, hits);
501                         break;
502                 }
503         }
504
505         rcu_read_unlock_bh();
506         return n;
507 }
508 EXPORT_SYMBOL(neigh_lookup_nodev);
509
510 struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
511                                  struct net_device *dev, bool want_ref)
512 {
513         u32 hash_val;
514         unsigned int key_len = tbl->key_len;
515         int error;
516         struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
517         struct neigh_hash_table *nht;
518
519         if (!n) {
520                 rc = ERR_PTR(-ENOBUFS);
521                 goto out;
522         }
523
524         memcpy(n->primary_key, pkey, key_len);
525         n->dev = dev;
526         dev_hold(dev);
527
528         /* Protocol specific setup. */
529         if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
530                 rc = ERR_PTR(error);
531                 goto out_neigh_release;
532         }
533
534         if (dev->netdev_ops->ndo_neigh_construct) {
535                 error = dev->netdev_ops->ndo_neigh_construct(dev, n);
536                 if (error < 0) {
537                         rc = ERR_PTR(error);
538                         goto out_neigh_release;
539                 }
540         }
541
542         /* Device specific setup. */
543         if (n->parms->neigh_setup &&
544             (error = n->parms->neigh_setup(n)) < 0) {
545                 rc = ERR_PTR(error);
546                 goto out_neigh_release;
547         }
548
549         n->confirmed = jiffies - (NEIGH_VAR(n->parms, BASE_REACHABLE_TIME) << 1);
550
551         write_lock_bh(&tbl->lock);
552         nht = rcu_dereference_protected(tbl->nht,
553                                         lockdep_is_held(&tbl->lock));
554
555         if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
556                 nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
557
558         hash_val = tbl->hash(n->primary_key, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
559
560         if (n->parms->dead) {
561                 rc = ERR_PTR(-EINVAL);
562                 goto out_tbl_unlock;
563         }
564
565         for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
566                                             lockdep_is_held(&tbl->lock));
567              n1 != NULL;
568              n1 = rcu_dereference_protected(n1->next,
569                         lockdep_is_held(&tbl->lock))) {
570                 if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) {
571                         if (want_ref)
572                                 neigh_hold(n1);
573                         rc = n1;
574                         goto out_tbl_unlock;
575                 }
576         }
577
578         n->dead = 0;
579         if (want_ref)
580                 neigh_hold(n);
581         rcu_assign_pointer(n->next,
582                            rcu_dereference_protected(nht->hash_buckets[hash_val],
583                                                      lockdep_is_held(&tbl->lock)));
584         rcu_assign_pointer(nht->hash_buckets[hash_val], n);
585         write_unlock_bh(&tbl->lock);
586         neigh_dbg(2, "neigh %p is created\n", n);
587         rc = n;
588 out:
589         return rc;
590 out_tbl_unlock:
591         write_unlock_bh(&tbl->lock);
592 out_neigh_release:
593         neigh_release(n);
594         goto out;
595 }
596 EXPORT_SYMBOL(__neigh_create);
597
598 static u32 pneigh_hash(const void *pkey, unsigned int key_len)
599 {
600         u32 hash_val = *(u32 *)(pkey + key_len - 4);
601         hash_val ^= (hash_val >> 16);
602         hash_val ^= hash_val >> 8;
603         hash_val ^= hash_val >> 4;
604         hash_val &= PNEIGH_HASHMASK;
605         return hash_val;
606 }
607
608 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
609                                               struct net *net,
610                                               const void *pkey,
611                                               unsigned int key_len,
612                                               struct net_device *dev)
613 {
614         while (n) {
615                 if (!memcmp(n->key, pkey, key_len) &&
616                     net_eq(pneigh_net(n), net) &&
617                     (n->dev == dev || !n->dev))
618                         return n;
619                 n = n->next;
620         }
621         return NULL;
622 }
623
624 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
625                 struct net *net, const void *pkey, struct net_device *dev)
626 {
627         unsigned int key_len = tbl->key_len;
628         u32 hash_val = pneigh_hash(pkey, key_len);
629
630         return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
631                                  net, pkey, key_len, dev);
632 }
633 EXPORT_SYMBOL_GPL(__pneigh_lookup);
634
635 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
636                                     struct net *net, const void *pkey,
637                                     struct net_device *dev, int creat)
638 {
639         struct pneigh_entry *n;
640         unsigned int key_len = tbl->key_len;
641         u32 hash_val = pneigh_hash(pkey, key_len);
642
643         read_lock_bh(&tbl->lock);
644         n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
645                               net, pkey, key_len, dev);
646         read_unlock_bh(&tbl->lock);
647
648         if (n || !creat)
649                 goto out;
650
651         ASSERT_RTNL();
652
653         n = kzalloc(sizeof(*n) + key_len, GFP_KERNEL);
654         if (!n)
655                 goto out;
656
657         write_pnet(&n->net, net);
658         memcpy(n->key, pkey, key_len);
659         n->dev = dev;
660         if (dev)
661                 dev_hold(dev);
662
663         if (tbl->pconstructor && tbl->pconstructor(n)) {
664                 if (dev)
665                         dev_put(dev);
666                 kfree(n);
667                 n = NULL;
668                 goto out;
669         }
670
671         write_lock_bh(&tbl->lock);
672         n->next = tbl->phash_buckets[hash_val];
673         tbl->phash_buckets[hash_val] = n;
674         write_unlock_bh(&tbl->lock);
675 out:
676         return n;
677 }
678 EXPORT_SYMBOL(pneigh_lookup);
679
680
681 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
682                   struct net_device *dev)
683 {
684         struct pneigh_entry *n, **np;
685         unsigned int key_len = tbl->key_len;
686         u32 hash_val = pneigh_hash(pkey, key_len);
687
688         write_lock_bh(&tbl->lock);
689         for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
690              np = &n->next) {
691                 if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
692                     net_eq(pneigh_net(n), net)) {
693                         *np = n->next;
694                         write_unlock_bh(&tbl->lock);
695                         if (tbl->pdestructor)
696                                 tbl->pdestructor(n);
697                         if (n->dev)
698                                 dev_put(n->dev);
699                         kfree(n);
700                         return 0;
701                 }
702         }
703         write_unlock_bh(&tbl->lock);
704         return -ENOENT;
705 }
706
707 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
708                                     struct net_device *dev)
709 {
710         struct pneigh_entry *n, **np, *freelist = NULL;
711         u32 h;
712
713         for (h = 0; h <= PNEIGH_HASHMASK; h++) {
714                 np = &tbl->phash_buckets[h];
715                 while ((n = *np) != NULL) {
716                         if (!dev || n->dev == dev) {
717                                 *np = n->next;
718                                 n->next = freelist;
719                                 freelist = n;
720                                 continue;
721                         }
722                         np = &n->next;
723                 }
724         }
725         write_unlock_bh(&tbl->lock);
726         while ((n = freelist)) {
727                 freelist = n->next;
728                 n->next = NULL;
729                 if (tbl->pdestructor)
730                         tbl->pdestructor(n);
731                 if (n->dev)
732                         dev_put(n->dev);
733                 kfree(n);
734         }
735         return -ENOENT;
736 }
737
738 static void neigh_parms_destroy(struct neigh_parms *parms);
739
740 static inline void neigh_parms_put(struct neigh_parms *parms)
741 {
742         if (refcount_dec_and_test(&parms->refcnt))
743                 neigh_parms_destroy(parms);
744 }
745
746 /*
747  *      neighbour must already be out of the table;
748  *
749  */
750 void neigh_destroy(struct neighbour *neigh)
751 {
752         struct net_device *dev = neigh->dev;
753
754         NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
755
756         if (!neigh->dead) {
757                 pr_warn("Destroying alive neighbour %p\n", neigh);
758                 dump_stack();
759                 return;
760         }
761
762         if (neigh_del_timer(neigh))
763                 pr_warn("Impossible event\n");
764
765         write_lock_bh(&neigh->lock);
766         __skb_queue_purge(&neigh->arp_queue);
767         write_unlock_bh(&neigh->lock);
768         neigh->arp_queue_len_bytes = 0;
769
770         if (dev->netdev_ops->ndo_neigh_destroy)
771                 dev->netdev_ops->ndo_neigh_destroy(dev, neigh);
772
773         dev_put(dev);
774         neigh_parms_put(neigh->parms);
775
776         neigh_dbg(2, "neigh %p is destroyed\n", neigh);
777
778         atomic_dec(&neigh->tbl->entries);
779         kfree_rcu(neigh, rcu);
780 }
781 EXPORT_SYMBOL(neigh_destroy);
782
783 /* Neighbour state is suspicious;
784    disable fast path.
785
786    Called with write_locked neigh.
787  */
788 static void neigh_suspect(struct neighbour *neigh)
789 {
790         neigh_dbg(2, "neigh %p is suspected\n", neigh);
791
792         neigh->output = neigh->ops->output;
793 }
794
795 /* Neighbour state is OK;
796    enable fast path.
797
798    Called with write_locked neigh.
799  */
800 static void neigh_connect(struct neighbour *neigh)
801 {
802         neigh_dbg(2, "neigh %p is connected\n", neigh);
803
804         neigh->output = neigh->ops->connected_output;
805 }
806
807 static void neigh_periodic_work(struct work_struct *work)
808 {
809         struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
810         struct neighbour *n;
811         struct neighbour __rcu **np;
812         unsigned int i;
813         struct neigh_hash_table *nht;
814
815         NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
816
817         write_lock_bh(&tbl->lock);
818         nht = rcu_dereference_protected(tbl->nht,
819                                         lockdep_is_held(&tbl->lock));
820
821         /*
822          *      periodically recompute ReachableTime from random function
823          */
824
825         if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
826                 struct neigh_parms *p;
827                 tbl->last_rand = jiffies;
828                 list_for_each_entry(p, &tbl->parms_list, list)
829                         p->reachable_time =
830                                 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
831         }
832
833         if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
834                 goto out;
835
836         for (i = 0 ; i < (1 << nht->hash_shift); i++) {
837                 np = &nht->hash_buckets[i];
838
839                 while ((n = rcu_dereference_protected(*np,
840                                 lockdep_is_held(&tbl->lock))) != NULL) {
841                         unsigned int state;
842
843                         write_lock(&n->lock);
844
845                         state = n->nud_state;
846                         if ((state & (NUD_PERMANENT | NUD_IN_TIMER)) ||
847                             (n->flags & NTF_EXT_LEARNED)) {
848                                 write_unlock(&n->lock);
849                                 goto next_elt;
850                         }
851
852                         if (time_before(n->used, n->confirmed))
853                                 n->used = n->confirmed;
854
855                         if (refcount_read(&n->refcnt) == 1 &&
856                             (state == NUD_FAILED ||
857                              time_after(jiffies, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
858                                 *np = n->next;
859                                 n->dead = 1;
860                                 write_unlock(&n->lock);
861                                 neigh_cleanup_and_release(n);
862                                 continue;
863                         }
864                         write_unlock(&n->lock);
865
866 next_elt:
867                         np = &n->next;
868                 }
869                 /*
870                  * It's fine to release lock here, even if hash table
871                  * grows while we are preempted.
872                  */
873                 write_unlock_bh(&tbl->lock);
874                 cond_resched();
875                 write_lock_bh(&tbl->lock);
876                 nht = rcu_dereference_protected(tbl->nht,
877                                                 lockdep_is_held(&tbl->lock));
878         }
879 out:
880         /* Cycle through all hash buckets every BASE_REACHABLE_TIME/2 ticks.
881          * ARP entry timeouts range from 1/2 BASE_REACHABLE_TIME to 3/2
882          * BASE_REACHABLE_TIME.
883          */
884         queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
885                               NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME) >> 1);
886         write_unlock_bh(&tbl->lock);
887 }
888
889 static __inline__ int neigh_max_probes(struct neighbour *n)
890 {
891         struct neigh_parms *p = n->parms;
892         return NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) +
893                (n->nud_state & NUD_PROBE ? NEIGH_VAR(p, MCAST_REPROBES) :
894                 NEIGH_VAR(p, MCAST_PROBES));
895 }
896
897 static void neigh_invalidate(struct neighbour *neigh)
898         __releases(neigh->lock)
899         __acquires(neigh->lock)
900 {
901         struct sk_buff *skb;
902
903         NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
904         neigh_dbg(2, "neigh %p is failed\n", neigh);
905         neigh->updated = jiffies;
906
907         /* It is very thin place. report_unreachable is very complicated
908            routine. Particularly, it can hit the same neighbour entry!
909
910            So that, we try to be accurate and avoid dead loop. --ANK
911          */
912         while (neigh->nud_state == NUD_FAILED &&
913                (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
914                 write_unlock(&neigh->lock);
915                 neigh->ops->error_report(neigh, skb);
916                 write_lock(&neigh->lock);
917         }
918         __skb_queue_purge(&neigh->arp_queue);
919         neigh->arp_queue_len_bytes = 0;
920 }
921
922 static void neigh_probe(struct neighbour *neigh)
923         __releases(neigh->lock)
924 {
925         struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue);
926         /* keep skb alive even if arp_queue overflows */
927         if (skb)
928                 skb = skb_clone(skb, GFP_ATOMIC);
929         write_unlock(&neigh->lock);
930         if (neigh->ops->solicit)
931                 neigh->ops->solicit(neigh, skb);
932         atomic_inc(&neigh->probes);
933         kfree_skb(skb);
934 }
935
936 /* Called when a timer expires for a neighbour entry. */
937
938 static void neigh_timer_handler(struct timer_list *t)
939 {
940         unsigned long now, next;
941         struct neighbour *neigh = from_timer(neigh, t, timer);
942         unsigned int state;
943         int notify = 0;
944
945         write_lock(&neigh->lock);
946
947         state = neigh->nud_state;
948         now = jiffies;
949         next = now + HZ;
950
951         if (!(state & NUD_IN_TIMER))
952                 goto out;
953
954         if (state & NUD_REACHABLE) {
955                 if (time_before_eq(now,
956                                    neigh->confirmed + neigh->parms->reachable_time)) {
957                         neigh_dbg(2, "neigh %p is still alive\n", neigh);
958                         next = neigh->confirmed + neigh->parms->reachable_time;
959                 } else if (time_before_eq(now,
960                                           neigh->used +
961                                           NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
962                         neigh_dbg(2, "neigh %p is delayed\n", neigh);
963                         neigh->nud_state = NUD_DELAY;
964                         neigh->updated = jiffies;
965                         neigh_suspect(neigh);
966                         next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME);
967                 } else {
968                         neigh_dbg(2, "neigh %p is suspected\n", neigh);
969                         neigh->nud_state = NUD_STALE;
970                         neigh->updated = jiffies;
971                         neigh_suspect(neigh);
972                         notify = 1;
973                 }
974         } else if (state & NUD_DELAY) {
975                 if (time_before_eq(now,
976                                    neigh->confirmed +
977                                    NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
978                         neigh_dbg(2, "neigh %p is now reachable\n", neigh);
979                         neigh->nud_state = NUD_REACHABLE;
980                         neigh->updated = jiffies;
981                         neigh_connect(neigh);
982                         notify = 1;
983                         next = neigh->confirmed + neigh->parms->reachable_time;
984                 } else {
985                         neigh_dbg(2, "neigh %p is probed\n", neigh);
986                         neigh->nud_state = NUD_PROBE;
987                         neigh->updated = jiffies;
988                         atomic_set(&neigh->probes, 0);
989                         notify = 1;
990                         next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
991                 }
992         } else {
993                 /* NUD_PROBE|NUD_INCOMPLETE */
994                 next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
995         }
996
997         if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
998             atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
999                 neigh->nud_state = NUD_FAILED;
1000                 notify = 1;
1001                 neigh_invalidate(neigh);
1002                 goto out;
1003         }
1004
1005         if (neigh->nud_state & NUD_IN_TIMER) {
1006                 if (time_before(next, jiffies + HZ/2))
1007                         next = jiffies + HZ/2;
1008                 if (!mod_timer(&neigh->timer, next))
1009                         neigh_hold(neigh);
1010         }
1011         if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
1012                 neigh_probe(neigh);
1013         } else {
1014 out:
1015                 write_unlock(&neigh->lock);
1016         }
1017
1018         if (notify)
1019                 neigh_update_notify(neigh, 0);
1020
1021         neigh_release(neigh);
1022 }
1023
1024 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
1025 {
1026         int rc;
1027         bool immediate_probe = false;
1028
1029         write_lock_bh(&neigh->lock);
1030
1031         rc = 0;
1032         if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
1033                 goto out_unlock_bh;
1034         if (neigh->dead)
1035                 goto out_dead;
1036
1037         if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
1038                 if (NEIGH_VAR(neigh->parms, MCAST_PROBES) +
1039                     NEIGH_VAR(neigh->parms, APP_PROBES)) {
1040                         unsigned long next, now = jiffies;
1041
1042                         atomic_set(&neigh->probes,
1043                                    NEIGH_VAR(neigh->parms, UCAST_PROBES));
1044                         neigh_del_timer(neigh);
1045                         neigh->nud_state     = NUD_INCOMPLETE;
1046                         neigh->updated = now;
1047                         next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
1048                                          HZ/2);
1049                         neigh_add_timer(neigh, next);
1050                         immediate_probe = true;
1051                 } else {
1052                         neigh->nud_state = NUD_FAILED;
1053                         neigh->updated = jiffies;
1054                         write_unlock_bh(&neigh->lock);
1055
1056                         kfree_skb(skb);
1057                         return 1;
1058                 }
1059         } else if (neigh->nud_state & NUD_STALE) {
1060                 neigh_dbg(2, "neigh %p is delayed\n", neigh);
1061                 neigh_del_timer(neigh);
1062                 neigh->nud_state = NUD_DELAY;
1063                 neigh->updated = jiffies;
1064                 neigh_add_timer(neigh, jiffies +
1065                                 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME));
1066         }
1067
1068         if (neigh->nud_state == NUD_INCOMPLETE) {
1069                 if (skb) {
1070                         while (neigh->arp_queue_len_bytes + skb->truesize >
1071                                NEIGH_VAR(neigh->parms, QUEUE_LEN_BYTES)) {
1072                                 struct sk_buff *buff;
1073
1074                                 buff = __skb_dequeue(&neigh->arp_queue);
1075                                 if (!buff)
1076                                         break;
1077                                 neigh->arp_queue_len_bytes -= buff->truesize;
1078                                 kfree_skb(buff);
1079                                 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1080                         }
1081                         skb_dst_force(skb);
1082                         __skb_queue_tail(&neigh->arp_queue, skb);
1083                         neigh->arp_queue_len_bytes += skb->truesize;
1084                 }
1085                 rc = 1;
1086         }
1087 out_unlock_bh:
1088         if (immediate_probe)
1089                 neigh_probe(neigh);
1090         else
1091                 write_unlock(&neigh->lock);
1092         local_bh_enable();
1093         return rc;
1094
1095 out_dead:
1096         if (neigh->nud_state & NUD_STALE)
1097                 goto out_unlock_bh;
1098         write_unlock_bh(&neigh->lock);
1099         kfree_skb(skb);
1100         return 1;
1101 }
1102 EXPORT_SYMBOL(__neigh_event_send);
1103
1104 static void neigh_update_hhs(struct neighbour *neigh)
1105 {
1106         struct hh_cache *hh;
1107         void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1108                 = NULL;
1109
1110         if (neigh->dev->header_ops)
1111                 update = neigh->dev->header_ops->cache_update;
1112
1113         if (update) {
1114                 hh = &neigh->hh;
1115                 if (READ_ONCE(hh->hh_len)) {
1116                         write_seqlock_bh(&hh->hh_lock);
1117                         update(hh, neigh->dev, neigh->ha);
1118                         write_sequnlock_bh(&hh->hh_lock);
1119                 }
1120         }
1121 }
1122
1123
1124
1125 /* Generic update routine.
1126    -- lladdr is new lladdr or NULL, if it is not supplied.
1127    -- new    is new state.
1128    -- flags
1129         NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1130                                 if it is different.
1131         NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1132                                 lladdr instead of overriding it
1133                                 if it is different.
1134         NEIGH_UPDATE_F_ADMIN    means that the change is administrative.
1135
1136         NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1137                                 NTF_ROUTER flag.
1138         NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
1139                                 a router.
1140
1141    Caller MUST hold reference count on the entry.
1142  */
1143
1144 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1145                  u32 flags, u32 nlmsg_pid)
1146 {
1147         u8 old;
1148         int err;
1149         int notify = 0;
1150         struct net_device *dev;
1151         int update_isrouter = 0;
1152
1153         write_lock_bh(&neigh->lock);
1154
1155         dev    = neigh->dev;
1156         old    = neigh->nud_state;
1157         err    = -EPERM;
1158
1159         if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1160             (old & (NUD_NOARP | NUD_PERMANENT)))
1161                 goto out;
1162         if (neigh->dead)
1163                 goto out;
1164
1165         neigh_update_ext_learned(neigh, flags, &notify);
1166
1167         if (!(new & NUD_VALID)) {
1168                 neigh_del_timer(neigh);
1169                 if (old & NUD_CONNECTED)
1170                         neigh_suspect(neigh);
1171                 neigh->nud_state = new;
1172                 err = 0;
1173                 notify = old & NUD_VALID;
1174                 if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1175                     (new & NUD_FAILED)) {
1176                         neigh_invalidate(neigh);
1177                         notify = 1;
1178                 }
1179                 goto out;
1180         }
1181
1182         /* Compare new lladdr with cached one */
1183         if (!dev->addr_len) {
1184                 /* First case: device needs no address. */
1185                 lladdr = neigh->ha;
1186         } else if (lladdr) {
1187                 /* The second case: if something is already cached
1188                    and a new address is proposed:
1189                    - compare new & old
1190                    - if they are different, check override flag
1191                  */
1192                 if ((old & NUD_VALID) &&
1193                     !memcmp(lladdr, neigh->ha, dev->addr_len))
1194                         lladdr = neigh->ha;
1195         } else {
1196                 /* No address is supplied; if we know something,
1197                    use it, otherwise discard the request.
1198                  */
1199                 err = -EINVAL;
1200                 if (!(old & NUD_VALID))
1201                         goto out;
1202                 lladdr = neigh->ha;
1203         }
1204
1205         /* Update confirmed timestamp for neighbour entry after we
1206          * received ARP packet even if it doesn't change IP to MAC binding.
1207          */
1208         if (new & NUD_CONNECTED)
1209                 neigh->confirmed = jiffies;
1210
1211         /* If entry was valid and address is not changed,
1212            do not change entry state, if new one is STALE.
1213          */
1214         err = 0;
1215         update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1216         if (old & NUD_VALID) {
1217                 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1218                         update_isrouter = 0;
1219                         if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1220                             (old & NUD_CONNECTED)) {
1221                                 lladdr = neigh->ha;
1222                                 new = NUD_STALE;
1223                         } else
1224                                 goto out;
1225                 } else {
1226                         if (lladdr == neigh->ha && new == NUD_STALE &&
1227                             !(flags & NEIGH_UPDATE_F_ADMIN))
1228                                 new = old;
1229                 }
1230         }
1231
1232         /* Update timestamp only once we know we will make a change to the
1233          * neighbour entry. Otherwise we risk to move the locktime window with
1234          * noop updates and ignore relevant ARP updates.
1235          */
1236         if (new != old || lladdr != neigh->ha)
1237                 neigh->updated = jiffies;
1238
1239         if (new != old) {
1240                 neigh_del_timer(neigh);
1241                 if (new & NUD_PROBE)
1242                         atomic_set(&neigh->probes, 0);
1243                 if (new & NUD_IN_TIMER)
1244                         neigh_add_timer(neigh, (jiffies +
1245                                                 ((new & NUD_REACHABLE) ?
1246                                                  neigh->parms->reachable_time :
1247                                                  0)));
1248                 neigh->nud_state = new;
1249                 notify = 1;
1250         }
1251
1252         if (lladdr != neigh->ha) {
1253                 write_seqlock(&neigh->ha_lock);
1254                 memcpy(&neigh->ha, lladdr, dev->addr_len);
1255                 write_sequnlock(&neigh->ha_lock);
1256                 neigh_update_hhs(neigh);
1257                 if (!(new & NUD_CONNECTED))
1258                         neigh->confirmed = jiffies -
1259                                       (NEIGH_VAR(neigh->parms, BASE_REACHABLE_TIME) << 1);
1260                 notify = 1;
1261         }
1262         if (new == old)
1263                 goto out;
1264         if (new & NUD_CONNECTED)
1265                 neigh_connect(neigh);
1266         else
1267                 neigh_suspect(neigh);
1268         if (!(old & NUD_VALID)) {
1269                 struct sk_buff *skb;
1270
1271                 /* Again: avoid dead loop if something went wrong */
1272
1273                 while (neigh->nud_state & NUD_VALID &&
1274                        (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1275                         struct dst_entry *dst = skb_dst(skb);
1276                         struct neighbour *n2, *n1 = neigh;
1277                         write_unlock_bh(&neigh->lock);
1278
1279                         rcu_read_lock();
1280
1281                         /* Why not just use 'neigh' as-is?  The problem is that
1282                          * things such as shaper, eql, and sch_teql can end up
1283                          * using alternative, different, neigh objects to output
1284                          * the packet in the output path.  So what we need to do
1285                          * here is re-lookup the top-level neigh in the path so
1286                          * we can reinject the packet there.
1287                          */
1288                         n2 = NULL;
1289                         if (dst && dst->obsolete != DST_OBSOLETE_DEAD) {
1290                                 n2 = dst_neigh_lookup_skb(dst, skb);
1291                                 if (n2)
1292                                         n1 = n2;
1293                         }
1294                         n1->output(n1, skb);
1295                         if (n2)
1296                                 neigh_release(n2);
1297                         rcu_read_unlock();
1298
1299                         write_lock_bh(&neigh->lock);
1300                 }
1301                 __skb_queue_purge(&neigh->arp_queue);
1302                 neigh->arp_queue_len_bytes = 0;
1303         }
1304 out:
1305         if (update_isrouter) {
1306                 neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
1307                         (neigh->flags | NTF_ROUTER) :
1308                         (neigh->flags & ~NTF_ROUTER);
1309         }
1310         write_unlock_bh(&neigh->lock);
1311
1312         if (notify)
1313                 neigh_update_notify(neigh, nlmsg_pid);
1314
1315         return err;
1316 }
1317 EXPORT_SYMBOL(neigh_update);
1318
1319 /* Update the neigh to listen temporarily for probe responses, even if it is
1320  * in a NUD_FAILED state. The caller has to hold neigh->lock for writing.
1321  */
1322 void __neigh_set_probe_once(struct neighbour *neigh)
1323 {
1324         if (neigh->dead)
1325                 return;
1326         neigh->updated = jiffies;
1327         if (!(neigh->nud_state & NUD_FAILED))
1328                 return;
1329         neigh->nud_state = NUD_INCOMPLETE;
1330         atomic_set(&neigh->probes, neigh_max_probes(neigh));
1331         neigh_add_timer(neigh,
1332                         jiffies + NEIGH_VAR(neigh->parms, RETRANS_TIME));
1333 }
1334 EXPORT_SYMBOL(__neigh_set_probe_once);
1335
1336 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1337                                  u8 *lladdr, void *saddr,
1338                                  struct net_device *dev)
1339 {
1340         struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1341                                                  lladdr || !dev->addr_len);
1342         if (neigh)
1343                 neigh_update(neigh, lladdr, NUD_STALE,
1344                              NEIGH_UPDATE_F_OVERRIDE, 0);
1345         return neigh;
1346 }
1347 EXPORT_SYMBOL(neigh_event_ns);
1348
1349 /* called with read_lock_bh(&n->lock); */
1350 static void neigh_hh_init(struct neighbour *n)
1351 {
1352         struct net_device *dev = n->dev;
1353         __be16 prot = n->tbl->protocol;
1354         struct hh_cache *hh = &n->hh;
1355
1356         write_lock_bh(&n->lock);
1357
1358         /* Only one thread can come in here and initialize the
1359          * hh_cache entry.
1360          */
1361         if (!hh->hh_len)
1362                 dev->header_ops->cache(n, hh, prot);
1363
1364         write_unlock_bh(&n->lock);
1365 }
1366
1367 /* Slow and careful. */
1368
1369 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1370 {
1371         int rc = 0;
1372
1373         if (!neigh_event_send(neigh, skb)) {
1374                 int err;
1375                 struct net_device *dev = neigh->dev;
1376                 unsigned int seq;
1377
1378                 if (dev->header_ops->cache && !READ_ONCE(neigh->hh.hh_len))
1379                         neigh_hh_init(neigh);
1380
1381                 do {
1382                         __skb_pull(skb, skb_network_offset(skb));
1383                         seq = read_seqbegin(&neigh->ha_lock);
1384                         err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1385                                               neigh->ha, NULL, skb->len);
1386                 } while (read_seqretry(&neigh->ha_lock, seq));
1387
1388                 if (err >= 0)
1389                         rc = dev_queue_xmit(skb);
1390                 else
1391                         goto out_kfree_skb;
1392         }
1393 out:
1394         return rc;
1395 out_kfree_skb:
1396         rc = -EINVAL;
1397         kfree_skb(skb);
1398         goto out;
1399 }
1400 EXPORT_SYMBOL(neigh_resolve_output);
1401
1402 /* As fast as possible without hh cache */
1403
1404 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1405 {
1406         struct net_device *dev = neigh->dev;
1407         unsigned int seq;
1408         int err;
1409
1410         do {
1411                 __skb_pull(skb, skb_network_offset(skb));
1412                 seq = read_seqbegin(&neigh->ha_lock);
1413                 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1414                                       neigh->ha, NULL, skb->len);
1415         } while (read_seqretry(&neigh->ha_lock, seq));
1416
1417         if (err >= 0)
1418                 err = dev_queue_xmit(skb);
1419         else {
1420                 err = -EINVAL;
1421                 kfree_skb(skb);
1422         }
1423         return err;
1424 }
1425 EXPORT_SYMBOL(neigh_connected_output);
1426
1427 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1428 {
1429         return dev_queue_xmit(skb);
1430 }
1431 EXPORT_SYMBOL(neigh_direct_output);
1432
1433 static void neigh_proxy_process(struct timer_list *t)
1434 {
1435         struct neigh_table *tbl = from_timer(tbl, t, proxy_timer);
1436         long sched_next = 0;
1437         unsigned long now = jiffies;
1438         struct sk_buff *skb, *n;
1439
1440         spin_lock(&tbl->proxy_queue.lock);
1441
1442         skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1443                 long tdif = NEIGH_CB(skb)->sched_next - now;
1444
1445                 if (tdif <= 0) {
1446                         struct net_device *dev = skb->dev;
1447
1448                         __skb_unlink(skb, &tbl->proxy_queue);
1449                         if (tbl->proxy_redo && netif_running(dev)) {
1450                                 rcu_read_lock();
1451                                 tbl->proxy_redo(skb);
1452                                 rcu_read_unlock();
1453                         } else {
1454                                 kfree_skb(skb);
1455                         }
1456
1457                         dev_put(dev);
1458                 } else if (!sched_next || tdif < sched_next)
1459                         sched_next = tdif;
1460         }
1461         del_timer(&tbl->proxy_timer);
1462         if (sched_next)
1463                 mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1464         spin_unlock(&tbl->proxy_queue.lock);
1465 }
1466
1467 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1468                     struct sk_buff *skb)
1469 {
1470         unsigned long now = jiffies;
1471
1472         unsigned long sched_next = now + (prandom_u32() %
1473                                           NEIGH_VAR(p, PROXY_DELAY));
1474
1475         if (tbl->proxy_queue.qlen > NEIGH_VAR(p, PROXY_QLEN)) {
1476                 kfree_skb(skb);
1477                 return;
1478         }
1479
1480         NEIGH_CB(skb)->sched_next = sched_next;
1481         NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1482
1483         spin_lock(&tbl->proxy_queue.lock);
1484         if (del_timer(&tbl->proxy_timer)) {
1485                 if (time_before(tbl->proxy_timer.expires, sched_next))
1486                         sched_next = tbl->proxy_timer.expires;
1487         }
1488         skb_dst_drop(skb);
1489         dev_hold(skb->dev);
1490         __skb_queue_tail(&tbl->proxy_queue, skb);
1491         mod_timer(&tbl->proxy_timer, sched_next);
1492         spin_unlock(&tbl->proxy_queue.lock);
1493 }
1494 EXPORT_SYMBOL(pneigh_enqueue);
1495
1496 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1497                                                       struct net *net, int ifindex)
1498 {
1499         struct neigh_parms *p;
1500
1501         list_for_each_entry(p, &tbl->parms_list, list) {
1502                 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1503                     (!p->dev && !ifindex && net_eq(net, &init_net)))
1504                         return p;
1505         }
1506
1507         return NULL;
1508 }
1509
1510 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1511                                       struct neigh_table *tbl)
1512 {
1513         struct neigh_parms *p;
1514         struct net *net = dev_net(dev);
1515         const struct net_device_ops *ops = dev->netdev_ops;
1516
1517         p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
1518         if (p) {
1519                 p->tbl            = tbl;
1520                 refcount_set(&p->refcnt, 1);
1521                 p->reachable_time =
1522                                 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
1523                 dev_hold(dev);
1524                 p->dev = dev;
1525                 write_pnet(&p->net, net);
1526                 p->sysctl_table = NULL;
1527
1528                 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1529                         dev_put(dev);
1530                         kfree(p);
1531                         return NULL;
1532                 }
1533
1534                 write_lock_bh(&tbl->lock);
1535                 list_add(&p->list, &tbl->parms.list);
1536                 write_unlock_bh(&tbl->lock);
1537
1538                 neigh_parms_data_state_cleanall(p);
1539         }
1540         return p;
1541 }
1542 EXPORT_SYMBOL(neigh_parms_alloc);
1543
1544 static void neigh_rcu_free_parms(struct rcu_head *head)
1545 {
1546         struct neigh_parms *parms =
1547                 container_of(head, struct neigh_parms, rcu_head);
1548
1549         neigh_parms_put(parms);
1550 }
1551
1552 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1553 {
1554         if (!parms || parms == &tbl->parms)
1555                 return;
1556         write_lock_bh(&tbl->lock);
1557         list_del(&parms->list);
1558         parms->dead = 1;
1559         write_unlock_bh(&tbl->lock);
1560         if (parms->dev)
1561                 dev_put(parms->dev);
1562         call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1563 }
1564 EXPORT_SYMBOL(neigh_parms_release);
1565
1566 static void neigh_parms_destroy(struct neigh_parms *parms)
1567 {
1568         kfree(parms);
1569 }
1570
1571 static struct lock_class_key neigh_table_proxy_queue_class;
1572
1573 static struct neigh_table *neigh_tables[NEIGH_NR_TABLES] __read_mostly;
1574
1575 void neigh_table_init(int index, struct neigh_table *tbl)
1576 {
1577         unsigned long now = jiffies;
1578         unsigned long phsize;
1579
1580         INIT_LIST_HEAD(&tbl->parms_list);
1581         list_add(&tbl->parms.list, &tbl->parms_list);
1582         write_pnet(&tbl->parms.net, &init_net);
1583         refcount_set(&tbl->parms.refcnt, 1);
1584         tbl->parms.reachable_time =
1585                           neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME));
1586
1587         tbl->stats = alloc_percpu(struct neigh_statistics);
1588         if (!tbl->stats)
1589                 panic("cannot create neighbour cache statistics");
1590
1591 #ifdef CONFIG_PROC_FS
1592         if (!proc_create_seq_data(tbl->id, 0, init_net.proc_net_stat,
1593                               &neigh_stat_seq_ops, tbl))
1594                 panic("cannot create neighbour proc dir entry");
1595 #endif
1596
1597         RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1598
1599         phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1600         tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1601
1602         if (!tbl->nht || !tbl->phash_buckets)
1603                 panic("cannot allocate neighbour cache hashes");
1604
1605         if (!tbl->entry_size)
1606                 tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) +
1607                                         tbl->key_len, NEIGH_PRIV_ALIGN);
1608         else
1609                 WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
1610
1611         rwlock_init(&tbl->lock);
1612         INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1613         queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
1614                         tbl->parms.reachable_time);
1615         timer_setup(&tbl->proxy_timer, neigh_proxy_process, 0);
1616         skb_queue_head_init_class(&tbl->proxy_queue,
1617                         &neigh_table_proxy_queue_class);
1618
1619         tbl->last_flush = now;
1620         tbl->last_rand  = now + tbl->parms.reachable_time * 20;
1621
1622         neigh_tables[index] = tbl;
1623 }
1624 EXPORT_SYMBOL(neigh_table_init);
1625
1626 int neigh_table_clear(int index, struct neigh_table *tbl)
1627 {
1628         neigh_tables[index] = NULL;
1629         /* It is not clean... Fix it to unload IPv6 module safely */
1630         cancel_delayed_work_sync(&tbl->gc_work);
1631         del_timer_sync(&tbl->proxy_timer);
1632         pneigh_queue_purge(&tbl->proxy_queue, NULL);
1633         neigh_ifdown(tbl, NULL);
1634         if (atomic_read(&tbl->entries))
1635                 pr_crit("neighbour leakage\n");
1636
1637         call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1638                  neigh_hash_free_rcu);
1639         tbl->nht = NULL;
1640
1641         kfree(tbl->phash_buckets);
1642         tbl->phash_buckets = NULL;
1643
1644         remove_proc_entry(tbl->id, init_net.proc_net_stat);
1645
1646         free_percpu(tbl->stats);
1647         tbl->stats = NULL;
1648
1649         return 0;
1650 }
1651 EXPORT_SYMBOL(neigh_table_clear);
1652
1653 static struct neigh_table *neigh_find_table(int family)
1654 {
1655         struct neigh_table *tbl = NULL;
1656
1657         switch (family) {
1658         case AF_INET:
1659                 tbl = neigh_tables[NEIGH_ARP_TABLE];
1660                 break;
1661         case AF_INET6:
1662                 tbl = neigh_tables[NEIGH_ND_TABLE];
1663                 break;
1664         case AF_DECnet:
1665                 tbl = neigh_tables[NEIGH_DN_TABLE];
1666                 break;
1667         }
1668
1669         return tbl;
1670 }
1671
1672 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh,
1673                         struct netlink_ext_ack *extack)
1674 {
1675         struct net *net = sock_net(skb->sk);
1676         struct ndmsg *ndm;
1677         struct nlattr *dst_attr;
1678         struct neigh_table *tbl;
1679         struct neighbour *neigh;
1680         struct net_device *dev = NULL;
1681         int err = -EINVAL;
1682
1683         ASSERT_RTNL();
1684         if (nlmsg_len(nlh) < sizeof(*ndm))
1685                 goto out;
1686
1687         dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1688         if (dst_attr == NULL)
1689                 goto out;
1690
1691         ndm = nlmsg_data(nlh);
1692         if (ndm->ndm_ifindex) {
1693                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1694                 if (dev == NULL) {
1695                         err = -ENODEV;
1696                         goto out;
1697                 }
1698         }
1699
1700         tbl = neigh_find_table(ndm->ndm_family);
1701         if (tbl == NULL)
1702                 return -EAFNOSUPPORT;
1703
1704         if (nla_len(dst_attr) < (int)tbl->key_len)
1705                 goto out;
1706
1707         if (ndm->ndm_flags & NTF_PROXY) {
1708                 err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1709                 goto out;
1710         }
1711
1712         if (dev == NULL)
1713                 goto out;
1714
1715         neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1716         if (neigh == NULL) {
1717                 err = -ENOENT;
1718                 goto out;
1719         }
1720
1721         err = neigh_update(neigh, NULL, NUD_FAILED,
1722                            NEIGH_UPDATE_F_OVERRIDE |
1723                            NEIGH_UPDATE_F_ADMIN,
1724                            NETLINK_CB(skb).portid);
1725         write_lock_bh(&tbl->lock);
1726         neigh_release(neigh);
1727         neigh_remove_one(neigh, tbl);
1728         write_unlock_bh(&tbl->lock);
1729
1730 out:
1731         return err;
1732 }
1733
1734 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
1735                      struct netlink_ext_ack *extack)
1736 {
1737         int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
1738         struct net *net = sock_net(skb->sk);
1739         struct ndmsg *ndm;
1740         struct nlattr *tb[NDA_MAX+1];
1741         struct neigh_table *tbl;
1742         struct net_device *dev = NULL;
1743         struct neighbour *neigh;
1744         void *dst, *lladdr;
1745         int err;
1746
1747         ASSERT_RTNL();
1748         err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL, extack);
1749         if (err < 0)
1750                 goto out;
1751
1752         err = -EINVAL;
1753         if (tb[NDA_DST] == NULL)
1754                 goto out;
1755
1756         ndm = nlmsg_data(nlh);
1757         if (ndm->ndm_ifindex) {
1758                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1759                 if (dev == NULL) {
1760                         err = -ENODEV;
1761                         goto out;
1762                 }
1763
1764                 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1765                         goto out;
1766         }
1767
1768         tbl = neigh_find_table(ndm->ndm_family);
1769         if (tbl == NULL)
1770                 return -EAFNOSUPPORT;
1771
1772         if (nla_len(tb[NDA_DST]) < (int)tbl->key_len)
1773                 goto out;
1774         dst = nla_data(tb[NDA_DST]);
1775         lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1776
1777         if (ndm->ndm_flags & NTF_PROXY) {
1778                 struct pneigh_entry *pn;
1779
1780                 err = -ENOBUFS;
1781                 pn = pneigh_lookup(tbl, net, dst, dev, 1);
1782                 if (pn) {
1783                         pn->flags = ndm->ndm_flags;
1784                         err = 0;
1785                 }
1786                 goto out;
1787         }
1788
1789         if (dev == NULL)
1790                 goto out;
1791
1792         neigh = neigh_lookup(tbl, dst, dev);
1793         if (neigh == NULL) {
1794                 if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1795                         err = -ENOENT;
1796                         goto out;
1797                 }
1798
1799                 neigh = __neigh_lookup_errno(tbl, dst, dev);
1800                 if (IS_ERR(neigh)) {
1801                         err = PTR_ERR(neigh);
1802                         goto out;
1803                 }
1804         } else {
1805                 if (nlh->nlmsg_flags & NLM_F_EXCL) {
1806                         err = -EEXIST;
1807                         neigh_release(neigh);
1808                         goto out;
1809                 }
1810
1811                 if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1812                         flags &= ~NEIGH_UPDATE_F_OVERRIDE;
1813         }
1814
1815         if (ndm->ndm_flags & NTF_EXT_LEARNED)
1816                 flags |= NEIGH_UPDATE_F_EXT_LEARNED;
1817
1818         if (ndm->ndm_flags & NTF_USE) {
1819                 neigh_event_send(neigh, NULL);
1820                 err = 0;
1821         } else
1822                 err = neigh_update(neigh, lladdr, ndm->ndm_state, flags,
1823                                    NETLINK_CB(skb).portid);
1824         neigh_release(neigh);
1825
1826 out:
1827         return err;
1828 }
1829
1830 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1831 {
1832         struct nlattr *nest;
1833
1834         nest = nla_nest_start(skb, NDTA_PARMS);
1835         if (nest == NULL)
1836                 return -ENOBUFS;
1837
1838         if ((parms->dev &&
1839              nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
1840             nla_put_u32(skb, NDTPA_REFCNT, refcount_read(&parms->refcnt)) ||
1841             nla_put_u32(skb, NDTPA_QUEUE_LENBYTES,
1842                         NEIGH_VAR(parms, QUEUE_LEN_BYTES)) ||
1843             /* approximative value for deprecated QUEUE_LEN (in packets) */
1844             nla_put_u32(skb, NDTPA_QUEUE_LEN,
1845                         NEIGH_VAR(parms, QUEUE_LEN_BYTES) / SKB_TRUESIZE(ETH_FRAME_LEN)) ||
1846             nla_put_u32(skb, NDTPA_PROXY_QLEN, NEIGH_VAR(parms, PROXY_QLEN)) ||
1847             nla_put_u32(skb, NDTPA_APP_PROBES, NEIGH_VAR(parms, APP_PROBES)) ||
1848             nla_put_u32(skb, NDTPA_UCAST_PROBES,
1849                         NEIGH_VAR(parms, UCAST_PROBES)) ||
1850             nla_put_u32(skb, NDTPA_MCAST_PROBES,
1851                         NEIGH_VAR(parms, MCAST_PROBES)) ||
1852             nla_put_u32(skb, NDTPA_MCAST_REPROBES,
1853                         NEIGH_VAR(parms, MCAST_REPROBES)) ||
1854             nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time,
1855                           NDTPA_PAD) ||
1856             nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
1857                           NEIGH_VAR(parms, BASE_REACHABLE_TIME), NDTPA_PAD) ||
1858             nla_put_msecs(skb, NDTPA_GC_STALETIME,
1859                           NEIGH_VAR(parms, GC_STALETIME), NDTPA_PAD) ||
1860             nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
1861                           NEIGH_VAR(parms, DELAY_PROBE_TIME), NDTPA_PAD) ||
1862             nla_put_msecs(skb, NDTPA_RETRANS_TIME,
1863                           NEIGH_VAR(parms, RETRANS_TIME), NDTPA_PAD) ||
1864             nla_put_msecs(skb, NDTPA_ANYCAST_DELAY,
1865                           NEIGH_VAR(parms, ANYCAST_DELAY), NDTPA_PAD) ||
1866             nla_put_msecs(skb, NDTPA_PROXY_DELAY,
1867                           NEIGH_VAR(parms, PROXY_DELAY), NDTPA_PAD) ||
1868             nla_put_msecs(skb, NDTPA_LOCKTIME,
1869                           NEIGH_VAR(parms, LOCKTIME), NDTPA_PAD))
1870                 goto nla_put_failure;
1871         return nla_nest_end(skb, nest);
1872
1873 nla_put_failure:
1874         nla_nest_cancel(skb, nest);
1875         return -EMSGSIZE;
1876 }
1877
1878 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1879                               u32 pid, u32 seq, int type, int flags)
1880 {
1881         struct nlmsghdr *nlh;
1882         struct ndtmsg *ndtmsg;
1883
1884         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1885         if (nlh == NULL)
1886                 return -EMSGSIZE;
1887
1888         ndtmsg = nlmsg_data(nlh);
1889
1890         read_lock_bh(&tbl->lock);
1891         ndtmsg->ndtm_family = tbl->family;
1892         ndtmsg->ndtm_pad1   = 0;
1893         ndtmsg->ndtm_pad2   = 0;
1894
1895         if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
1896             nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval, NDTA_PAD) ||
1897             nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) ||
1898             nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) ||
1899             nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3))
1900                 goto nla_put_failure;
1901         {
1902                 unsigned long now = jiffies;
1903                 long flush_delta = now - tbl->last_flush;
1904                 long rand_delta = now - tbl->last_rand;
1905                 struct neigh_hash_table *nht;
1906                 struct ndt_config ndc = {
1907                         .ndtc_key_len           = tbl->key_len,
1908                         .ndtc_entry_size        = tbl->entry_size,
1909                         .ndtc_entries           = atomic_read(&tbl->entries),
1910                         .ndtc_last_flush        = jiffies_to_msecs(flush_delta),
1911                         .ndtc_last_rand         = jiffies_to_msecs(rand_delta),
1912                         .ndtc_proxy_qlen        = tbl->proxy_queue.qlen,
1913                 };
1914
1915                 rcu_read_lock_bh();
1916                 nht = rcu_dereference_bh(tbl->nht);
1917                 ndc.ndtc_hash_rnd = nht->hash_rnd[0];
1918                 ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
1919                 rcu_read_unlock_bh();
1920
1921                 if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
1922                         goto nla_put_failure;
1923         }
1924
1925         {
1926                 int cpu;
1927                 struct ndt_stats ndst;
1928
1929                 memset(&ndst, 0, sizeof(ndst));
1930
1931                 for_each_possible_cpu(cpu) {
1932                         struct neigh_statistics *st;
1933
1934                         st = per_cpu_ptr(tbl->stats, cpu);
1935                         ndst.ndts_allocs                += st->allocs;
1936                         ndst.ndts_destroys              += st->destroys;
1937                         ndst.ndts_hash_grows            += st->hash_grows;
1938                         ndst.ndts_res_failed            += st->res_failed;
1939                         ndst.ndts_lookups               += st->lookups;
1940                         ndst.ndts_hits                  += st->hits;
1941                         ndst.ndts_rcv_probes_mcast      += st->rcv_probes_mcast;
1942                         ndst.ndts_rcv_probes_ucast      += st->rcv_probes_ucast;
1943                         ndst.ndts_periodic_gc_runs      += st->periodic_gc_runs;
1944                         ndst.ndts_forced_gc_runs        += st->forced_gc_runs;
1945                         ndst.ndts_table_fulls           += st->table_fulls;
1946                 }
1947
1948                 if (nla_put_64bit(skb, NDTA_STATS, sizeof(ndst), &ndst,
1949                                   NDTA_PAD))
1950                         goto nla_put_failure;
1951         }
1952
1953         BUG_ON(tbl->parms.dev);
1954         if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1955                 goto nla_put_failure;
1956
1957         read_unlock_bh(&tbl->lock);
1958         nlmsg_end(skb, nlh);
1959         return 0;
1960
1961 nla_put_failure:
1962         read_unlock_bh(&tbl->lock);
1963         nlmsg_cancel(skb, nlh);
1964         return -EMSGSIZE;
1965 }
1966
1967 static int neightbl_fill_param_info(struct sk_buff *skb,
1968                                     struct neigh_table *tbl,
1969                                     struct neigh_parms *parms,
1970                                     u32 pid, u32 seq, int type,
1971                                     unsigned int flags)
1972 {
1973         struct ndtmsg *ndtmsg;
1974         struct nlmsghdr *nlh;
1975
1976         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1977         if (nlh == NULL)
1978                 return -EMSGSIZE;
1979
1980         ndtmsg = nlmsg_data(nlh);
1981
1982         read_lock_bh(&tbl->lock);
1983         ndtmsg->ndtm_family = tbl->family;
1984         ndtmsg->ndtm_pad1   = 0;
1985         ndtmsg->ndtm_pad2   = 0;
1986
1987         if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
1988             neightbl_fill_parms(skb, parms) < 0)
1989                 goto errout;
1990
1991         read_unlock_bh(&tbl->lock);
1992         nlmsg_end(skb, nlh);
1993         return 0;
1994 errout:
1995         read_unlock_bh(&tbl->lock);
1996         nlmsg_cancel(skb, nlh);
1997         return -EMSGSIZE;
1998 }
1999
2000 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
2001         [NDTA_NAME]             = { .type = NLA_STRING },
2002         [NDTA_THRESH1]          = { .type = NLA_U32 },
2003         [NDTA_THRESH2]          = { .type = NLA_U32 },
2004         [NDTA_THRESH3]          = { .type = NLA_U32 },
2005         [NDTA_GC_INTERVAL]      = { .type = NLA_U64 },
2006         [NDTA_PARMS]            = { .type = NLA_NESTED },
2007 };
2008
2009 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
2010         [NDTPA_IFINDEX]                 = { .type = NLA_U32 },
2011         [NDTPA_QUEUE_LEN]               = { .type = NLA_U32 },
2012         [NDTPA_PROXY_QLEN]              = { .type = NLA_U32 },
2013         [NDTPA_APP_PROBES]              = { .type = NLA_U32 },
2014         [NDTPA_UCAST_PROBES]            = { .type = NLA_U32 },
2015         [NDTPA_MCAST_PROBES]            = { .type = NLA_U32 },
2016         [NDTPA_MCAST_REPROBES]          = { .type = NLA_U32 },
2017         [NDTPA_BASE_REACHABLE_TIME]     = { .type = NLA_U64 },
2018         [NDTPA_GC_STALETIME]            = { .type = NLA_U64 },
2019         [NDTPA_DELAY_PROBE_TIME]        = { .type = NLA_U64 },
2020         [NDTPA_RETRANS_TIME]            = { .type = NLA_U64 },
2021         [NDTPA_ANYCAST_DELAY]           = { .type = NLA_U64 },
2022         [NDTPA_PROXY_DELAY]             = { .type = NLA_U64 },
2023         [NDTPA_LOCKTIME]                = { .type = NLA_U64 },
2024 };
2025
2026 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh,
2027                         struct netlink_ext_ack *extack)
2028 {
2029         struct net *net = sock_net(skb->sk);
2030         struct neigh_table *tbl;
2031         struct ndtmsg *ndtmsg;
2032         struct nlattr *tb[NDTA_MAX+1];
2033         bool found = false;
2034         int err, tidx;
2035
2036         err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
2037                           nl_neightbl_policy, extack);
2038         if (err < 0)
2039                 goto errout;
2040
2041         if (tb[NDTA_NAME] == NULL) {
2042                 err = -EINVAL;
2043                 goto errout;
2044         }
2045
2046         ndtmsg = nlmsg_data(nlh);
2047
2048         for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2049                 tbl = neigh_tables[tidx];
2050                 if (!tbl)
2051                         continue;
2052                 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
2053                         continue;
2054                 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0) {
2055                         found = true;
2056                         break;
2057                 }
2058         }
2059
2060         if (!found)
2061                 return -ENOENT;
2062
2063         /*
2064          * We acquire tbl->lock to be nice to the periodic timers and
2065          * make sure they always see a consistent set of values.
2066          */
2067         write_lock_bh(&tbl->lock);
2068
2069         if (tb[NDTA_PARMS]) {
2070                 struct nlattr *tbp[NDTPA_MAX+1];
2071                 struct neigh_parms *p;
2072                 int i, ifindex = 0;
2073
2074                 err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
2075                                        nl_ntbl_parm_policy, extack);
2076                 if (err < 0)
2077                         goto errout_tbl_lock;
2078
2079                 if (tbp[NDTPA_IFINDEX])
2080                         ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
2081
2082                 p = lookup_neigh_parms(tbl, net, ifindex);
2083                 if (p == NULL) {
2084                         err = -ENOENT;
2085                         goto errout_tbl_lock;
2086                 }
2087
2088                 for (i = 1; i <= NDTPA_MAX; i++) {
2089                         if (tbp[i] == NULL)
2090                                 continue;
2091
2092                         switch (i) {
2093                         case NDTPA_QUEUE_LEN:
2094                                 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2095                                               nla_get_u32(tbp[i]) *
2096                                               SKB_TRUESIZE(ETH_FRAME_LEN));
2097                                 break;
2098                         case NDTPA_QUEUE_LENBYTES:
2099                                 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2100                                               nla_get_u32(tbp[i]));
2101                                 break;
2102                         case NDTPA_PROXY_QLEN:
2103                                 NEIGH_VAR_SET(p, PROXY_QLEN,
2104                                               nla_get_u32(tbp[i]));
2105                                 break;
2106                         case NDTPA_APP_PROBES:
2107                                 NEIGH_VAR_SET(p, APP_PROBES,
2108                                               nla_get_u32(tbp[i]));
2109                                 break;
2110                         case NDTPA_UCAST_PROBES:
2111                                 NEIGH_VAR_SET(p, UCAST_PROBES,
2112                                               nla_get_u32(tbp[i]));
2113                                 break;
2114                         case NDTPA_MCAST_PROBES:
2115                                 NEIGH_VAR_SET(p, MCAST_PROBES,
2116                                               nla_get_u32(tbp[i]));
2117                                 break;
2118                         case NDTPA_MCAST_REPROBES:
2119                                 NEIGH_VAR_SET(p, MCAST_REPROBES,
2120                                               nla_get_u32(tbp[i]));
2121                                 break;
2122                         case NDTPA_BASE_REACHABLE_TIME:
2123                                 NEIGH_VAR_SET(p, BASE_REACHABLE_TIME,
2124                                               nla_get_msecs(tbp[i]));
2125                                 /* update reachable_time as well, otherwise, the change will
2126                                  * only be effective after the next time neigh_periodic_work
2127                                  * decides to recompute it (can be multiple minutes)
2128                                  */
2129                                 p->reachable_time =
2130                                         neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
2131                                 break;
2132                         case NDTPA_GC_STALETIME:
2133                                 NEIGH_VAR_SET(p, GC_STALETIME,
2134                                               nla_get_msecs(tbp[i]));
2135                                 break;
2136                         case NDTPA_DELAY_PROBE_TIME:
2137                                 NEIGH_VAR_SET(p, DELAY_PROBE_TIME,
2138                                               nla_get_msecs(tbp[i]));
2139                                 call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
2140                                 break;
2141                         case NDTPA_RETRANS_TIME:
2142                                 NEIGH_VAR_SET(p, RETRANS_TIME,
2143                                               nla_get_msecs(tbp[i]));
2144                                 break;
2145                         case NDTPA_ANYCAST_DELAY:
2146                                 NEIGH_VAR_SET(p, ANYCAST_DELAY,
2147                                               nla_get_msecs(tbp[i]));
2148                                 break;
2149                         case NDTPA_PROXY_DELAY:
2150                                 NEIGH_VAR_SET(p, PROXY_DELAY,
2151                                               nla_get_msecs(tbp[i]));
2152                                 break;
2153                         case NDTPA_LOCKTIME:
2154                                 NEIGH_VAR_SET(p, LOCKTIME,
2155                                               nla_get_msecs(tbp[i]));
2156                                 break;
2157                         }
2158                 }
2159         }
2160
2161         err = -ENOENT;
2162         if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] ||
2163              tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) &&
2164             !net_eq(net, &init_net))
2165                 goto errout_tbl_lock;
2166
2167         if (tb[NDTA_THRESH1])
2168                 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2169
2170         if (tb[NDTA_THRESH2])
2171                 tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2172
2173         if (tb[NDTA_THRESH3])
2174                 tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2175
2176         if (tb[NDTA_GC_INTERVAL])
2177                 tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2178
2179         err = 0;
2180
2181 errout_tbl_lock:
2182         write_unlock_bh(&tbl->lock);
2183 errout:
2184         return err;
2185 }
2186
2187 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2188 {
2189         struct net *net = sock_net(skb->sk);
2190         int family, tidx, nidx = 0;
2191         int tbl_skip = cb->args[0];
2192         int neigh_skip = cb->args[1];
2193         struct neigh_table *tbl;
2194
2195         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2196
2197         for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2198                 struct neigh_parms *p;
2199
2200                 tbl = neigh_tables[tidx];
2201                 if (!tbl)
2202                         continue;
2203
2204                 if (tidx < tbl_skip || (family && tbl->family != family))
2205                         continue;
2206
2207                 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
2208                                        cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2209                                        NLM_F_MULTI) < 0)
2210                         break;
2211
2212                 nidx = 0;
2213                 p = list_next_entry(&tbl->parms, list);
2214                 list_for_each_entry_from(p, &tbl->parms_list, list) {
2215                         if (!net_eq(neigh_parms_net(p), net))
2216                                 continue;
2217
2218                         if (nidx < neigh_skip)
2219                                 goto next;
2220
2221                         if (neightbl_fill_param_info(skb, tbl, p,
2222                                                      NETLINK_CB(cb->skb).portid,
2223                                                      cb->nlh->nlmsg_seq,
2224                                                      RTM_NEWNEIGHTBL,
2225                                                      NLM_F_MULTI) < 0)
2226                                 goto out;
2227                 next:
2228                         nidx++;
2229                 }
2230
2231                 neigh_skip = 0;
2232         }
2233 out:
2234         cb->args[0] = tidx;
2235         cb->args[1] = nidx;
2236
2237         return skb->len;
2238 }
2239
2240 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2241                            u32 pid, u32 seq, int type, unsigned int flags)
2242 {
2243         unsigned long now = jiffies;
2244         struct nda_cacheinfo ci;
2245         struct nlmsghdr *nlh;
2246         struct ndmsg *ndm;
2247
2248         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2249         if (nlh == NULL)
2250                 return -EMSGSIZE;
2251
2252         ndm = nlmsg_data(nlh);
2253         ndm->ndm_family  = neigh->ops->family;
2254         ndm->ndm_pad1    = 0;
2255         ndm->ndm_pad2    = 0;
2256         ndm->ndm_flags   = neigh->flags;
2257         ndm->ndm_type    = neigh->type;
2258         ndm->ndm_ifindex = neigh->dev->ifindex;
2259
2260         if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
2261                 goto nla_put_failure;
2262
2263         read_lock_bh(&neigh->lock);
2264         ndm->ndm_state   = neigh->nud_state;
2265         if (neigh->nud_state & NUD_VALID) {
2266                 char haddr[MAX_ADDR_LEN];
2267
2268                 neigh_ha_snapshot(haddr, neigh, neigh->dev);
2269                 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2270                         read_unlock_bh(&neigh->lock);
2271                         goto nla_put_failure;
2272                 }
2273         }
2274
2275         ci.ndm_used      = jiffies_to_clock_t(now - neigh->used);
2276         ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2277         ci.ndm_updated   = jiffies_to_clock_t(now - neigh->updated);
2278         ci.ndm_refcnt    = refcount_read(&neigh->refcnt) - 1;
2279         read_unlock_bh(&neigh->lock);
2280
2281         if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
2282             nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
2283                 goto nla_put_failure;
2284
2285         nlmsg_end(skb, nlh);
2286         return 0;
2287
2288 nla_put_failure:
2289         nlmsg_cancel(skb, nlh);
2290         return -EMSGSIZE;
2291 }
2292
2293 static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2294                             u32 pid, u32 seq, int type, unsigned int flags,
2295                             struct neigh_table *tbl)
2296 {
2297         struct nlmsghdr *nlh;
2298         struct ndmsg *ndm;
2299
2300         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2301         if (nlh == NULL)
2302                 return -EMSGSIZE;
2303
2304         ndm = nlmsg_data(nlh);
2305         ndm->ndm_family  = tbl->family;
2306         ndm->ndm_pad1    = 0;
2307         ndm->ndm_pad2    = 0;
2308         ndm->ndm_flags   = pn->flags | NTF_PROXY;
2309         ndm->ndm_type    = RTN_UNICAST;
2310         ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0;
2311         ndm->ndm_state   = NUD_NONE;
2312
2313         if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
2314                 goto nla_put_failure;
2315
2316         nlmsg_end(skb, nlh);
2317         return 0;
2318
2319 nla_put_failure:
2320         nlmsg_cancel(skb, nlh);
2321         return -EMSGSIZE;
2322 }
2323
2324 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid)
2325 {
2326         call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2327         __neigh_notify(neigh, RTM_NEWNEIGH, 0, nlmsg_pid);
2328 }
2329
2330 static bool neigh_master_filtered(struct net_device *dev, int master_idx)
2331 {
2332         struct net_device *master;
2333
2334         if (!master_idx)
2335                 return false;
2336
2337         master = netdev_master_upper_dev_get(dev);
2338         if (!master || master->ifindex != master_idx)
2339                 return true;
2340
2341         return false;
2342 }
2343
2344 static bool neigh_ifindex_filtered(struct net_device *dev, int filter_idx)
2345 {
2346         if (filter_idx && dev->ifindex != filter_idx)
2347                 return true;
2348
2349         return false;
2350 }
2351
2352 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2353                             struct netlink_callback *cb)
2354 {
2355         struct net *net = sock_net(skb->sk);
2356         const struct nlmsghdr *nlh = cb->nlh;
2357         struct nlattr *tb[NDA_MAX + 1];
2358         struct neighbour *n;
2359         int rc, h, s_h = cb->args[1];
2360         int idx, s_idx = idx = cb->args[2];
2361         struct neigh_hash_table *nht;
2362         int filter_master_idx = 0, filter_idx = 0;
2363         unsigned int flags = NLM_F_MULTI;
2364         int err;
2365
2366         err = nlmsg_parse(nlh, sizeof(struct ndmsg), tb, NDA_MAX, NULL, NULL);
2367         if (!err) {
2368                 if (tb[NDA_IFINDEX]) {
2369                         if (nla_len(tb[NDA_IFINDEX]) != sizeof(u32))
2370                                 return -EINVAL;
2371                         filter_idx = nla_get_u32(tb[NDA_IFINDEX]);
2372                 }
2373                 if (tb[NDA_MASTER]) {
2374                         if (nla_len(tb[NDA_MASTER]) != sizeof(u32))
2375                                 return -EINVAL;
2376                         filter_master_idx = nla_get_u32(tb[NDA_MASTER]);
2377                 }
2378                 if (filter_idx || filter_master_idx)
2379                         flags |= NLM_F_DUMP_FILTERED;
2380         }
2381
2382         rcu_read_lock_bh();
2383         nht = rcu_dereference_bh(tbl->nht);
2384
2385         for (h = s_h; h < (1 << nht->hash_shift); h++) {
2386                 if (h > s_h)
2387                         s_idx = 0;
2388                 for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2389                      n != NULL;
2390                      n = rcu_dereference_bh(n->next)) {
2391                         if (idx < s_idx || !net_eq(dev_net(n->dev), net))
2392                                 goto next;
2393                         if (neigh_ifindex_filtered(n->dev, filter_idx) ||
2394                             neigh_master_filtered(n->dev, filter_master_idx))
2395                                 goto next;
2396                         if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2397                                             cb->nlh->nlmsg_seq,
2398                                             RTM_NEWNEIGH,
2399                                             flags) < 0) {
2400                                 rc = -1;
2401                                 goto out;
2402                         }
2403 next:
2404                         idx++;
2405                 }
2406         }
2407         rc = skb->len;
2408 out:
2409         rcu_read_unlock_bh();
2410         cb->args[1] = h;
2411         cb->args[2] = idx;
2412         return rc;
2413 }
2414
2415 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2416                              struct netlink_callback *cb)
2417 {
2418         struct pneigh_entry *n;
2419         struct net *net = sock_net(skb->sk);
2420         int rc, h, s_h = cb->args[3];
2421         int idx, s_idx = idx = cb->args[4];
2422
2423         read_lock_bh(&tbl->lock);
2424
2425         for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
2426                 if (h > s_h)
2427                         s_idx = 0;
2428                 for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
2429                         if (idx < s_idx || pneigh_net(n) != net)
2430                                 goto next;
2431                         if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2432                                             cb->nlh->nlmsg_seq,
2433                                             RTM_NEWNEIGH,
2434                                             NLM_F_MULTI, tbl) < 0) {
2435                                 read_unlock_bh(&tbl->lock);
2436                                 rc = -1;
2437                                 goto out;
2438                         }
2439                 next:
2440                         idx++;
2441                 }
2442         }
2443
2444         read_unlock_bh(&tbl->lock);
2445         rc = skb->len;
2446 out:
2447         cb->args[3] = h;
2448         cb->args[4] = idx;
2449         return rc;
2450
2451 }
2452
2453 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2454 {
2455         struct neigh_table *tbl;
2456         int t, family, s_t;
2457         int proxy = 0;
2458         int err;
2459
2460         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2461
2462         /* check for full ndmsg structure presence, family member is
2463          * the same for both structures
2464          */
2465         if (nlmsg_len(cb->nlh) >= sizeof(struct ndmsg) &&
2466             ((struct ndmsg *) nlmsg_data(cb->nlh))->ndm_flags == NTF_PROXY)
2467                 proxy = 1;
2468
2469         s_t = cb->args[0];
2470
2471         for (t = 0; t < NEIGH_NR_TABLES; t++) {
2472                 tbl = neigh_tables[t];
2473
2474                 if (!tbl)
2475                         continue;
2476                 if (t < s_t || (family && tbl->family != family))
2477                         continue;
2478                 if (t > s_t)
2479                         memset(&cb->args[1], 0, sizeof(cb->args) -
2480                                                 sizeof(cb->args[0]));
2481                 if (proxy)
2482                         err = pneigh_dump_table(tbl, skb, cb);
2483                 else
2484                         err = neigh_dump_table(tbl, skb, cb);
2485                 if (err < 0)
2486                         break;
2487         }
2488
2489         cb->args[0] = t;
2490         return skb->len;
2491 }
2492
2493 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2494 {
2495         int chain;
2496         struct neigh_hash_table *nht;
2497
2498         rcu_read_lock_bh();
2499         nht = rcu_dereference_bh(tbl->nht);
2500
2501         read_lock(&tbl->lock); /* avoid resizes */
2502         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2503                 struct neighbour *n;
2504
2505                 for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2506                      n != NULL;
2507                      n = rcu_dereference_bh(n->next))
2508                         cb(n, cookie);
2509         }
2510         read_unlock(&tbl->lock);
2511         rcu_read_unlock_bh();
2512 }
2513 EXPORT_SYMBOL(neigh_for_each);
2514
2515 /* The tbl->lock must be held as a writer and BH disabled. */
2516 void __neigh_for_each_release(struct neigh_table *tbl,
2517                               int (*cb)(struct neighbour *))
2518 {
2519         int chain;
2520         struct neigh_hash_table *nht;
2521
2522         nht = rcu_dereference_protected(tbl->nht,
2523                                         lockdep_is_held(&tbl->lock));
2524         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2525                 struct neighbour *n;
2526                 struct neighbour __rcu **np;
2527
2528                 np = &nht->hash_buckets[chain];
2529                 while ((n = rcu_dereference_protected(*np,
2530                                         lockdep_is_held(&tbl->lock))) != NULL) {
2531                         int release;
2532
2533                         write_lock(&n->lock);
2534                         release = cb(n);
2535                         if (release) {
2536                                 rcu_assign_pointer(*np,
2537                                         rcu_dereference_protected(n->next,
2538                                                 lockdep_is_held(&tbl->lock)));
2539                                 n->dead = 1;
2540                         } else
2541                                 np = &n->next;
2542                         write_unlock(&n->lock);
2543                         if (release)
2544                                 neigh_cleanup_and_release(n);
2545                 }
2546         }
2547 }
2548 EXPORT_SYMBOL(__neigh_for_each_release);
2549
2550 int neigh_xmit(int index, struct net_device *dev,
2551                const void *addr, struct sk_buff *skb)
2552 {
2553         int err = -EAFNOSUPPORT;
2554         if (likely(index < NEIGH_NR_TABLES)) {
2555                 struct neigh_table *tbl;
2556                 struct neighbour *neigh;
2557
2558                 tbl = neigh_tables[index];
2559                 if (!tbl)
2560                         goto out;
2561                 rcu_read_lock_bh();
2562                 if (index == NEIGH_ARP_TABLE) {
2563                         u32 key = *((u32 *)addr);
2564
2565                         neigh = __ipv4_neigh_lookup_noref(dev, key);
2566                 } else {
2567                         neigh = __neigh_lookup_noref(tbl, addr, dev);
2568                 }
2569                 if (!neigh)
2570                         neigh = __neigh_create(tbl, addr, dev, false);
2571                 err = PTR_ERR(neigh);
2572                 if (IS_ERR(neigh)) {
2573                         rcu_read_unlock_bh();
2574                         goto out_kfree_skb;
2575                 }
2576                 err = neigh->output(neigh, skb);
2577                 rcu_read_unlock_bh();
2578         }
2579         else if (index == NEIGH_LINK_TABLE) {
2580                 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
2581                                       addr, NULL, skb->len);
2582                 if (err < 0)
2583                         goto out_kfree_skb;
2584                 err = dev_queue_xmit(skb);
2585         }
2586 out:
2587         return err;
2588 out_kfree_skb:
2589         kfree_skb(skb);
2590         goto out;
2591 }
2592 EXPORT_SYMBOL(neigh_xmit);
2593
2594 #ifdef CONFIG_PROC_FS
2595
2596 static struct neighbour *neigh_get_first(struct seq_file *seq)
2597 {
2598         struct neigh_seq_state *state = seq->private;
2599         struct net *net = seq_file_net(seq);
2600         struct neigh_hash_table *nht = state->nht;
2601         struct neighbour *n = NULL;
2602         int bucket = state->bucket;
2603
2604         state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2605         for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
2606                 n = rcu_dereference_bh(nht->hash_buckets[bucket]);
2607
2608                 while (n) {
2609                         if (!net_eq(dev_net(n->dev), net))
2610                                 goto next;
2611                         if (state->neigh_sub_iter) {
2612                                 loff_t fakep = 0;
2613                                 void *v;
2614
2615                                 v = state->neigh_sub_iter(state, n, &fakep);
2616                                 if (!v)
2617                                         goto next;
2618                         }
2619                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2620                                 break;
2621                         if (n->nud_state & ~NUD_NOARP)
2622                                 break;
2623 next:
2624                         n = rcu_dereference_bh(n->next);
2625                 }
2626
2627                 if (n)
2628                         break;
2629         }
2630         state->bucket = bucket;
2631
2632         return n;
2633 }
2634
2635 static struct neighbour *neigh_get_next(struct seq_file *seq,
2636                                         struct neighbour *n,
2637                                         loff_t *pos)
2638 {
2639         struct neigh_seq_state *state = seq->private;
2640         struct net *net = seq_file_net(seq);
2641         struct neigh_hash_table *nht = state->nht;
2642
2643         if (state->neigh_sub_iter) {
2644                 void *v = state->neigh_sub_iter(state, n, pos);
2645                 if (v)
2646                         return n;
2647         }
2648         n = rcu_dereference_bh(n->next);
2649
2650         while (1) {
2651                 while (n) {
2652                         if (!net_eq(dev_net(n->dev), net))
2653                                 goto next;
2654                         if (state->neigh_sub_iter) {
2655                                 void *v = state->neigh_sub_iter(state, n, pos);
2656                                 if (v)
2657                                         return n;
2658                                 goto next;
2659                         }
2660                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2661                                 break;
2662
2663                         if (n->nud_state & ~NUD_NOARP)
2664                                 break;
2665 next:
2666                         n = rcu_dereference_bh(n->next);
2667                 }
2668
2669                 if (n)
2670                         break;
2671
2672                 if (++state->bucket >= (1 << nht->hash_shift))
2673                         break;
2674
2675                 n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
2676         }
2677
2678         if (n && pos)
2679                 --(*pos);
2680         return n;
2681 }
2682
2683 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2684 {
2685         struct neighbour *n = neigh_get_first(seq);
2686
2687         if (n) {
2688                 --(*pos);
2689                 while (*pos) {
2690                         n = neigh_get_next(seq, n, pos);
2691                         if (!n)
2692                                 break;
2693                 }
2694         }
2695         return *pos ? NULL : n;
2696 }
2697
2698 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2699 {
2700         struct neigh_seq_state *state = seq->private;
2701         struct net *net = seq_file_net(seq);
2702         struct neigh_table *tbl = state->tbl;
2703         struct pneigh_entry *pn = NULL;
2704         int bucket = state->bucket;
2705
2706         state->flags |= NEIGH_SEQ_IS_PNEIGH;
2707         for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2708                 pn = tbl->phash_buckets[bucket];
2709                 while (pn && !net_eq(pneigh_net(pn), net))
2710                         pn = pn->next;
2711                 if (pn)
2712                         break;
2713         }
2714         state->bucket = bucket;
2715
2716         return pn;
2717 }
2718
2719 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2720                                             struct pneigh_entry *pn,
2721                                             loff_t *pos)
2722 {
2723         struct neigh_seq_state *state = seq->private;
2724         struct net *net = seq_file_net(seq);
2725         struct neigh_table *tbl = state->tbl;
2726
2727         do {
2728                 pn = pn->next;
2729         } while (pn && !net_eq(pneigh_net(pn), net));
2730
2731         while (!pn) {
2732                 if (++state->bucket > PNEIGH_HASHMASK)
2733                         break;
2734                 pn = tbl->phash_buckets[state->bucket];
2735                 while (pn && !net_eq(pneigh_net(pn), net))
2736                         pn = pn->next;
2737                 if (pn)
2738                         break;
2739         }
2740
2741         if (pn && pos)
2742                 --(*pos);
2743
2744         return pn;
2745 }
2746
2747 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
2748 {
2749         struct pneigh_entry *pn = pneigh_get_first(seq);
2750
2751         if (pn) {
2752                 --(*pos);
2753                 while (*pos) {
2754                         pn = pneigh_get_next(seq, pn, pos);
2755                         if (!pn)
2756                                 break;
2757                 }
2758         }
2759         return *pos ? NULL : pn;
2760 }
2761
2762 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2763 {
2764         struct neigh_seq_state *state = seq->private;
2765         void *rc;
2766         loff_t idxpos = *pos;
2767
2768         rc = neigh_get_idx(seq, &idxpos);
2769         if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2770                 rc = pneigh_get_idx(seq, &idxpos);
2771
2772         return rc;
2773 }
2774
2775 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2776         __acquires(tbl->lock)
2777         __acquires(rcu_bh)
2778 {
2779         struct neigh_seq_state *state = seq->private;
2780
2781         state->tbl = tbl;
2782         state->bucket = 0;
2783         state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
2784
2785         rcu_read_lock_bh();
2786         state->nht = rcu_dereference_bh(tbl->nht);
2787         read_lock(&tbl->lock);
2788
2789         return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
2790 }
2791 EXPORT_SYMBOL(neigh_seq_start);
2792
2793 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2794 {
2795         struct neigh_seq_state *state;
2796         void *rc;
2797
2798         if (v == SEQ_START_TOKEN) {
2799                 rc = neigh_get_first(seq);
2800                 goto out;
2801         }
2802
2803         state = seq->private;
2804         if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
2805                 rc = neigh_get_next(seq, v, NULL);
2806                 if (rc)
2807                         goto out;
2808                 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2809                         rc = pneigh_get_first(seq);
2810         } else {
2811                 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
2812                 rc = pneigh_get_next(seq, v, NULL);
2813         }
2814 out:
2815         ++(*pos);
2816         return rc;
2817 }
2818 EXPORT_SYMBOL(neigh_seq_next);
2819
2820 void neigh_seq_stop(struct seq_file *seq, void *v)
2821         __releases(tbl->lock)
2822         __releases(rcu_bh)
2823 {
2824         struct neigh_seq_state *state = seq->private;
2825         struct neigh_table *tbl = state->tbl;
2826
2827         read_unlock(&tbl->lock);
2828         rcu_read_unlock_bh();
2829 }
2830 EXPORT_SYMBOL(neigh_seq_stop);
2831
2832 /* statistics via seq_file */
2833
2834 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2835 {
2836         struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
2837         int cpu;
2838
2839         if (*pos == 0)
2840                 return SEQ_START_TOKEN;
2841
2842         for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
2843                 if (!cpu_possible(cpu))
2844                         continue;
2845                 *pos = cpu+1;
2846                 return per_cpu_ptr(tbl->stats, cpu);
2847         }
2848         return NULL;
2849 }
2850
2851 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2852 {
2853         struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
2854         int cpu;
2855
2856         for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
2857                 if (!cpu_possible(cpu))
2858                         continue;
2859                 *pos = cpu+1;
2860                 return per_cpu_ptr(tbl->stats, cpu);
2861         }
2862         (*pos)++;
2863         return NULL;
2864 }
2865
2866 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
2867 {
2868
2869 }
2870
2871 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2872 {
2873         struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
2874         struct neigh_statistics *st = v;
2875
2876         if (v == SEQ_START_TOKEN) {
2877                 seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n");
2878                 return 0;
2879         }
2880
2881         seq_printf(seq, "%08x  %08lx %08lx %08lx  %08lx %08lx  %08lx  "
2882                         "%08lx %08lx  %08lx %08lx %08lx %08lx\n",
2883                    atomic_read(&tbl->entries),
2884
2885                    st->allocs,
2886                    st->destroys,
2887                    st->hash_grows,
2888
2889                    st->lookups,
2890                    st->hits,
2891
2892                    st->res_failed,
2893
2894                    st->rcv_probes_mcast,
2895                    st->rcv_probes_ucast,
2896
2897                    st->periodic_gc_runs,
2898                    st->forced_gc_runs,
2899                    st->unres_discards,
2900                    st->table_fulls
2901                    );
2902
2903         return 0;
2904 }
2905
2906 static const struct seq_operations neigh_stat_seq_ops = {
2907         .start  = neigh_stat_seq_start,
2908         .next   = neigh_stat_seq_next,
2909         .stop   = neigh_stat_seq_stop,
2910         .show   = neigh_stat_seq_show,
2911 };
2912 #endif /* CONFIG_PROC_FS */
2913
2914 static inline size_t neigh_nlmsg_size(void)
2915 {
2916         return NLMSG_ALIGN(sizeof(struct ndmsg))
2917                + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2918                + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2919                + nla_total_size(sizeof(struct nda_cacheinfo))
2920                + nla_total_size(4); /* NDA_PROBES */
2921 }
2922
2923 static void __neigh_notify(struct neighbour *n, int type, int flags,
2924                            u32 pid)
2925 {
2926         struct net *net = dev_net(n->dev);
2927         struct sk_buff *skb;
2928         int err = -ENOBUFS;
2929
2930         skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
2931         if (skb == NULL)
2932                 goto errout;
2933
2934         err = neigh_fill_info(skb, n, pid, 0, type, flags);
2935         if (err < 0) {
2936                 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
2937                 WARN_ON(err == -EMSGSIZE);
2938                 kfree_skb(skb);
2939                 goto errout;
2940         }
2941         rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
2942         return;
2943 errout:
2944         if (err < 0)
2945                 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
2946 }
2947
2948 void neigh_app_ns(struct neighbour *n)
2949 {
2950         __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST, 0);
2951 }
2952 EXPORT_SYMBOL(neigh_app_ns);
2953
2954 #ifdef CONFIG_SYSCTL
2955 static int zero;
2956 static int int_max = INT_MAX;
2957 static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
2958
2959 static int proc_unres_qlen(struct ctl_table *ctl, int write,
2960                            void __user *buffer, size_t *lenp, loff_t *ppos)
2961 {
2962         int size, ret;
2963         struct ctl_table tmp = *ctl;
2964
2965         tmp.extra1 = &zero;
2966         tmp.extra2 = &unres_qlen_max;
2967         tmp.data = &size;
2968
2969         size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN);
2970         ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
2971
2972         if (write && !ret)
2973                 *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
2974         return ret;
2975 }
2976
2977 static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev,
2978                                                    int family)
2979 {
2980         switch (family) {
2981         case AF_INET:
2982                 return __in_dev_arp_parms_get_rcu(dev);
2983         case AF_INET6:
2984                 return __in6_dev_nd_parms_get_rcu(dev);
2985         }
2986         return NULL;
2987 }
2988
2989 static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p,
2990                                   int index)
2991 {
2992         struct net_device *dev;
2993         int family = neigh_parms_family(p);
2994
2995         rcu_read_lock();
2996         for_each_netdev_rcu(net, dev) {
2997                 struct neigh_parms *dst_p =
2998                                 neigh_get_dev_parms_rcu(dev, family);
2999
3000                 if (dst_p && !test_bit(index, dst_p->data_state))
3001                         dst_p->data[index] = p->data[index];
3002         }
3003         rcu_read_unlock();
3004 }
3005
3006 static void neigh_proc_update(struct ctl_table *ctl, int write)
3007 {
3008         struct net_device *dev = ctl->extra1;
3009         struct neigh_parms *p = ctl->extra2;
3010         struct net *net = neigh_parms_net(p);
3011         int index = (int *) ctl->data - p->data;
3012
3013         if (!write)
3014                 return;
3015
3016         set_bit(index, p->data_state);
3017         if (index == NEIGH_VAR_DELAY_PROBE_TIME)
3018                 call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
3019         if (!dev) /* NULL dev means this is default value */
3020                 neigh_copy_dflt_parms(net, p, index);
3021 }
3022
3023 static int neigh_proc_dointvec_zero_intmax(struct ctl_table *ctl, int write,
3024                                            void __user *buffer,
3025                                            size_t *lenp, loff_t *ppos)
3026 {
3027         struct ctl_table tmp = *ctl;
3028         int ret;
3029
3030         tmp.extra1 = &zero;
3031         tmp.extra2 = &int_max;
3032
3033         ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
3034         neigh_proc_update(ctl, write);
3035         return ret;
3036 }
3037
3038 int neigh_proc_dointvec(struct ctl_table *ctl, int write,
3039                         void __user *buffer, size_t *lenp, loff_t *ppos)
3040 {
3041         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
3042
3043         neigh_proc_update(ctl, write);
3044         return ret;
3045 }
3046 EXPORT_SYMBOL(neigh_proc_dointvec);
3047
3048 int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write,
3049                                 void __user *buffer,
3050                                 size_t *lenp, loff_t *ppos)
3051 {
3052         int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3053
3054         neigh_proc_update(ctl, write);
3055         return ret;
3056 }
3057 EXPORT_SYMBOL(neigh_proc_dointvec_jiffies);
3058
3059 static int neigh_proc_dointvec_userhz_jiffies(struct ctl_table *ctl, int write,
3060                                               void __user *buffer,
3061                                               size_t *lenp, loff_t *ppos)
3062 {
3063         int ret = proc_dointvec_userhz_jiffies(ctl, write, buffer, lenp, ppos);
3064
3065         neigh_proc_update(ctl, write);
3066         return ret;
3067 }
3068
3069 int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write,
3070                                    void __user *buffer,
3071                                    size_t *lenp, loff_t *ppos)
3072 {
3073         int ret = proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3074
3075         neigh_proc_update(ctl, write);
3076         return ret;
3077 }
3078 EXPORT_SYMBOL(neigh_proc_dointvec_ms_jiffies);
3079
3080 static int neigh_proc_dointvec_unres_qlen(struct ctl_table *ctl, int write,
3081                                           void __user *buffer,
3082                                           size_t *lenp, loff_t *ppos)
3083 {
3084         int ret = proc_unres_qlen(ctl, write, buffer, lenp, ppos);
3085
3086         neigh_proc_update(ctl, write);
3087         return ret;
3088 }
3089
3090 static int neigh_proc_base_reachable_time(struct ctl_table *ctl, int write,
3091                                           void __user *buffer,
3092                                           size_t *lenp, loff_t *ppos)
3093 {
3094         struct neigh_parms *p = ctl->extra2;
3095         int ret;
3096
3097         if (strcmp(ctl->procname, "base_reachable_time") == 0)
3098                 ret = neigh_proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3099         else if (strcmp(ctl->procname, "base_reachable_time_ms") == 0)
3100                 ret = neigh_proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3101         else
3102                 ret = -1;
3103
3104         if (write && ret == 0) {
3105                 /* update reachable_time as well, otherwise, the change will
3106                  * only be effective after the next time neigh_periodic_work
3107                  * decides to recompute it
3108                  */
3109                 p->reachable_time =
3110                         neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
3111         }
3112         return ret;
3113 }
3114
3115 #define NEIGH_PARMS_DATA_OFFSET(index)  \
3116         (&((struct neigh_parms *) 0)->data[index])
3117
3118 #define NEIGH_SYSCTL_ENTRY(attr, data_attr, name, mval, proc) \
3119         [NEIGH_VAR_ ## attr] = { \
3120                 .procname       = name, \
3121                 .data           = NEIGH_PARMS_DATA_OFFSET(NEIGH_VAR_ ## data_attr), \
3122                 .maxlen         = sizeof(int), \
3123                 .mode           = mval, \
3124                 .proc_handler   = proc, \
3125         }
3126
3127 #define NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(attr, name) \
3128         NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_zero_intmax)
3129
3130 #define NEIGH_SYSCTL_JIFFIES_ENTRY(attr, name) \
3131         NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_jiffies)
3132
3133 #define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \
3134         NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies)
3135
3136 #define NEIGH_SYSCTL_MS_JIFFIES_ENTRY(attr, name) \
3137         NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
3138
3139 #define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \
3140         NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
3141
3142 #define NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(attr, data_attr, name) \
3143         NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_unres_qlen)
3144
3145 static struct neigh_sysctl_table {
3146         struct ctl_table_header *sysctl_header;
3147         struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
3148 } neigh_sysctl_template __read_mostly = {
3149         .neigh_vars = {
3150                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"),
3151                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"),
3152                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"),
3153                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_REPROBES, "mcast_resolicit"),
3154                 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"),
3155                 NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"),
3156                 NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"),
3157                 NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"),
3158                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES, "unres_qlen_bytes"),
3159                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN, "proxy_qlen"),
3160                 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(ANYCAST_DELAY, "anycast_delay"),
3161                 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(PROXY_DELAY, "proxy_delay"),
3162                 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(LOCKTIME, "locktime"),
3163                 NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(QUEUE_LEN, QUEUE_LEN_BYTES, "unres_qlen"),
3164                 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(RETRANS_TIME_MS, RETRANS_TIME, "retrans_time_ms"),
3165                 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(BASE_REACHABLE_TIME_MS, BASE_REACHABLE_TIME, "base_reachable_time_ms"),
3166                 [NEIGH_VAR_GC_INTERVAL] = {
3167                         .procname       = "gc_interval",
3168                         .maxlen         = sizeof(int),
3169                         .mode           = 0644,
3170                         .proc_handler   = proc_dointvec_jiffies,
3171                 },
3172                 [NEIGH_VAR_GC_THRESH1] = {
3173                         .procname       = "gc_thresh1",
3174                         .maxlen         = sizeof(int),
3175                         .mode           = 0644,
3176                         .extra1         = &zero,
3177                         .extra2         = &int_max,
3178                         .proc_handler   = proc_dointvec_minmax,
3179                 },
3180                 [NEIGH_VAR_GC_THRESH2] = {
3181                         .procname       = "gc_thresh2",
3182                         .maxlen         = sizeof(int),
3183                         .mode           = 0644,
3184                         .extra1         = &zero,
3185                         .extra2         = &int_max,
3186                         .proc_handler   = proc_dointvec_minmax,
3187                 },
3188                 [NEIGH_VAR_GC_THRESH3] = {
3189                         .procname       = "gc_thresh3",
3190                         .maxlen         = sizeof(int),
3191                         .mode           = 0644,
3192                         .extra1         = &zero,
3193                         .extra2         = &int_max,
3194                         .proc_handler   = proc_dointvec_minmax,
3195                 },
3196                 {},
3197         },
3198 };
3199
3200 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
3201                           proc_handler *handler)
3202 {
3203         int i;
3204         struct neigh_sysctl_table *t;
3205         const char *dev_name_source;
3206         char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
3207         char *p_name;
3208
3209         t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
3210         if (!t)
3211                 goto err;
3212
3213         for (i = 0; i < NEIGH_VAR_GC_INTERVAL; i++) {
3214                 t->neigh_vars[i].data += (long) p;
3215                 t->neigh_vars[i].extra1 = dev;
3216                 t->neigh_vars[i].extra2 = p;
3217         }
3218
3219         if (dev) {
3220                 dev_name_source = dev->name;
3221                 /* Terminate the table early */
3222                 memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
3223                        sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
3224         } else {
3225                 struct neigh_table *tbl = p->tbl;
3226                 dev_name_source = "default";
3227                 t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = &tbl->gc_interval;
3228                 t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = &tbl->gc_thresh1;
3229                 t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = &tbl->gc_thresh2;
3230                 t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = &tbl->gc_thresh3;
3231         }
3232
3233         if (handler) {
3234                 /* RetransTime */
3235                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
3236                 /* ReachableTime */
3237                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
3238                 /* RetransTime (in milliseconds)*/
3239                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
3240                 /* ReachableTime (in milliseconds) */
3241                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
3242         } else {
3243                 /* Those handlers will update p->reachable_time after
3244                  * base_reachable_time(_ms) is set to ensure the new timer starts being
3245                  * applied after the next neighbour update instead of waiting for
3246                  * neigh_periodic_work to update its value (can be multiple minutes)
3247                  * So any handler that replaces them should do this as well
3248                  */
3249                 /* ReachableTime */
3250                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler =
3251                         neigh_proc_base_reachable_time;
3252                 /* ReachableTime (in milliseconds) */
3253                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler =
3254                         neigh_proc_base_reachable_time;
3255         }
3256
3257         /* Don't export sysctls to unprivileged users */
3258         if (neigh_parms_net(p)->user_ns != &init_user_ns)
3259                 t->neigh_vars[0].procname = NULL;
3260
3261         switch (neigh_parms_family(p)) {
3262         case AF_INET:
3263               p_name = "ipv4";
3264               break;
3265         case AF_INET6:
3266               p_name = "ipv6";
3267               break;
3268         default:
3269               BUG();
3270         }
3271
3272         snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
3273                 p_name, dev_name_source);
3274         t->sysctl_header =
3275                 register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars);
3276         if (!t->sysctl_header)
3277                 goto free;
3278
3279         p->sysctl_table = t;
3280         return 0;
3281
3282 free:
3283         kfree(t);
3284 err:
3285         return -ENOBUFS;
3286 }
3287 EXPORT_SYMBOL(neigh_sysctl_register);
3288
3289 void neigh_sysctl_unregister(struct neigh_parms *p)
3290 {
3291         if (p->sysctl_table) {
3292                 struct neigh_sysctl_table *t = p->sysctl_table;
3293                 p->sysctl_table = NULL;
3294                 unregister_net_sysctl_table(t->sysctl_header);
3295                 kfree(t);
3296         }
3297 }
3298 EXPORT_SYMBOL(neigh_sysctl_unregister);
3299
3300 #endif  /* CONFIG_SYSCTL */
3301
3302 static int __init neigh_init(void)
3303 {
3304         rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, 0);
3305         rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, 0);
3306         rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, 0);
3307
3308         rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
3309                       0);
3310         rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, 0);
3311
3312         return 0;
3313 }
3314
3315 subsys_initcall(neigh_init);