]> git.karo-electronics.de Git - karo-tx-linux.git/blob - net/core/neighbour.c
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
[karo-tx-linux.git] / net / core / neighbour.c
1 /*
2  *      Generic address resolution entity
3  *
4  *      Authors:
5  *      Pedro Roque             <roque@di.fc.ul.pt>
6  *      Alexey Kuznetsov        <kuznet@ms2.inr.ac.ru>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  *
13  *      Fixes:
14  *      Vitaly E. Lavrov        releasing NULL neighbor in neigh_add.
15  *      Harald Welte            Add neighbour cache statistics like rtstat
16  */
17
18 #include <linux/slab.h>
19 #include <linux/types.h>
20 #include <linux/kernel.h>
21 #include <linux/module.h>
22 #include <linux/socket.h>
23 #include <linux/netdevice.h>
24 #include <linux/proc_fs.h>
25 #ifdef CONFIG_SYSCTL
26 #include <linux/sysctl.h>
27 #endif
28 #include <linux/times.h>
29 #include <net/net_namespace.h>
30 #include <net/neighbour.h>
31 #include <net/dst.h>
32 #include <net/sock.h>
33 #include <net/netevent.h>
34 #include <net/netlink.h>
35 #include <linux/rtnetlink.h>
36 #include <linux/random.h>
37 #include <linux/string.h>
38 #include <linux/log2.h>
39
40 #define NEIGH_DEBUG 1
41
42 #define NEIGH_PRINTK(x...) printk(x)
43 #define NEIGH_NOPRINTK(x...) do { ; } while(0)
44 #define NEIGH_PRINTK1 NEIGH_NOPRINTK
45 #define NEIGH_PRINTK2 NEIGH_NOPRINTK
46
47 #if NEIGH_DEBUG >= 1
48 #undef NEIGH_PRINTK1
49 #define NEIGH_PRINTK1 NEIGH_PRINTK
50 #endif
51 #if NEIGH_DEBUG >= 2
52 #undef NEIGH_PRINTK2
53 #define NEIGH_PRINTK2 NEIGH_PRINTK
54 #endif
55
56 #define PNEIGH_HASHMASK         0xF
57
58 static void neigh_timer_handler(unsigned long arg);
59 static void __neigh_notify(struct neighbour *n, int type, int flags);
60 static void neigh_update_notify(struct neighbour *neigh);
61 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
62
63 static struct neigh_table *neigh_tables;
64 #ifdef CONFIG_PROC_FS
65 static const struct file_operations neigh_stat_seq_fops;
66 #endif
67
68 /*
69    Neighbour hash table buckets are protected with rwlock tbl->lock.
70
71    - All the scans/updates to hash buckets MUST be made under this lock.
72    - NOTHING clever should be made under this lock: no callbacks
73      to protocol backends, no attempts to send something to network.
74      It will result in deadlocks, if backend/driver wants to use neighbour
75      cache.
76    - If the entry requires some non-trivial actions, increase
77      its reference count and release table lock.
78
79    Neighbour entries are protected:
80    - with reference count.
81    - with rwlock neigh->lock
82
83    Reference count prevents destruction.
84
85    neigh->lock mainly serializes ll address data and its validity state.
86    However, the same lock is used to protect another entry fields:
87     - timer
88     - resolution queue
89
90    Again, nothing clever shall be made under neigh->lock,
91    the most complicated procedure, which we allow is dev->hard_header.
92    It is supposed, that dev->hard_header is simplistic and does
93    not make callbacks to neighbour tables.
94
95    The last lock is neigh_tbl_lock. It is pure SMP lock, protecting
96    list of neighbour tables. This list is used only in process context,
97  */
98
99 static DEFINE_RWLOCK(neigh_tbl_lock);
100
101 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
102 {
103         kfree_skb(skb);
104         return -ENETDOWN;
105 }
106
107 static void neigh_cleanup_and_release(struct neighbour *neigh)
108 {
109         if (neigh->parms->neigh_cleanup)
110                 neigh->parms->neigh_cleanup(neigh);
111
112         __neigh_notify(neigh, RTM_DELNEIGH, 0);
113         neigh_release(neigh);
114 }
115
116 /*
117  * It is random distribution in the interval (1/2)*base...(3/2)*base.
118  * It corresponds to default IPv6 settings and is not overridable,
119  * because it is really reasonable choice.
120  */
121
122 unsigned long neigh_rand_reach_time(unsigned long base)
123 {
124         return base ? (net_random() % base) + (base >> 1) : 0;
125 }
126 EXPORT_SYMBOL(neigh_rand_reach_time);
127
128
129 static int neigh_forced_gc(struct neigh_table *tbl)
130 {
131         int shrunk = 0;
132         int i;
133         struct neigh_hash_table *nht;
134
135         NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
136
137         write_lock_bh(&tbl->lock);
138         nht = rcu_dereference_protected(tbl->nht,
139                                         lockdep_is_held(&tbl->lock));
140         for (i = 0; i < (1 << nht->hash_shift); i++) {
141                 struct neighbour *n;
142                 struct neighbour __rcu **np;
143
144                 np = &nht->hash_buckets[i];
145                 while ((n = rcu_dereference_protected(*np,
146                                         lockdep_is_held(&tbl->lock))) != NULL) {
147                         /* Neighbour record may be discarded if:
148                          * - nobody refers to it.
149                          * - it is not permanent
150                          */
151                         write_lock(&n->lock);
152                         if (atomic_read(&n->refcnt) == 1 &&
153                             !(n->nud_state & NUD_PERMANENT)) {
154                                 rcu_assign_pointer(*np,
155                                         rcu_dereference_protected(n->next,
156                                                   lockdep_is_held(&tbl->lock)));
157                                 n->dead = 1;
158                                 shrunk  = 1;
159                                 write_unlock(&n->lock);
160                                 neigh_cleanup_and_release(n);
161                                 continue;
162                         }
163                         write_unlock(&n->lock);
164                         np = &n->next;
165                 }
166         }
167
168         tbl->last_flush = jiffies;
169
170         write_unlock_bh(&tbl->lock);
171
172         return shrunk;
173 }
174
175 static void neigh_add_timer(struct neighbour *n, unsigned long when)
176 {
177         neigh_hold(n);
178         if (unlikely(mod_timer(&n->timer, when))) {
179                 printk("NEIGH: BUG, double timer add, state is %x\n",
180                        n->nud_state);
181                 dump_stack();
182         }
183 }
184
185 static int neigh_del_timer(struct neighbour *n)
186 {
187         if ((n->nud_state & NUD_IN_TIMER) &&
188             del_timer(&n->timer)) {
189                 neigh_release(n);
190                 return 1;
191         }
192         return 0;
193 }
194
195 static void pneigh_queue_purge(struct sk_buff_head *list)
196 {
197         struct sk_buff *skb;
198
199         while ((skb = skb_dequeue(list)) != NULL) {
200                 dev_put(skb->dev);
201                 kfree_skb(skb);
202         }
203 }
204
205 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
206 {
207         int i;
208         struct neigh_hash_table *nht;
209
210         nht = rcu_dereference_protected(tbl->nht,
211                                         lockdep_is_held(&tbl->lock));
212
213         for (i = 0; i < (1 << nht->hash_shift); i++) {
214                 struct neighbour *n;
215                 struct neighbour __rcu **np = &nht->hash_buckets[i];
216
217                 while ((n = rcu_dereference_protected(*np,
218                                         lockdep_is_held(&tbl->lock))) != NULL) {
219                         if (dev && n->dev != dev) {
220                                 np = &n->next;
221                                 continue;
222                         }
223                         rcu_assign_pointer(*np,
224                                    rcu_dereference_protected(n->next,
225                                                 lockdep_is_held(&tbl->lock)));
226                         write_lock(&n->lock);
227                         neigh_del_timer(n);
228                         n->dead = 1;
229
230                         if (atomic_read(&n->refcnt) != 1) {
231                                 /* The most unpleasant situation.
232                                    We must destroy neighbour entry,
233                                    but someone still uses it.
234
235                                    The destroy will be delayed until
236                                    the last user releases us, but
237                                    we must kill timers etc. and move
238                                    it to safe state.
239                                  */
240                                 skb_queue_purge(&n->arp_queue);
241                                 n->arp_queue_len_bytes = 0;
242                                 n->output = neigh_blackhole;
243                                 if (n->nud_state & NUD_VALID)
244                                         n->nud_state = NUD_NOARP;
245                                 else
246                                         n->nud_state = NUD_NONE;
247                                 NEIGH_PRINTK2("neigh %p is stray.\n", n);
248                         }
249                         write_unlock(&n->lock);
250                         neigh_cleanup_and_release(n);
251                 }
252         }
253 }
254
255 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
256 {
257         write_lock_bh(&tbl->lock);
258         neigh_flush_dev(tbl, dev);
259         write_unlock_bh(&tbl->lock);
260 }
261 EXPORT_SYMBOL(neigh_changeaddr);
262
263 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
264 {
265         write_lock_bh(&tbl->lock);
266         neigh_flush_dev(tbl, dev);
267         pneigh_ifdown(tbl, dev);
268         write_unlock_bh(&tbl->lock);
269
270         del_timer_sync(&tbl->proxy_timer);
271         pneigh_queue_purge(&tbl->proxy_queue);
272         return 0;
273 }
274 EXPORT_SYMBOL(neigh_ifdown);
275
276 static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev)
277 {
278         struct neighbour *n = NULL;
279         unsigned long now = jiffies;
280         int entries;
281
282         entries = atomic_inc_return(&tbl->entries) - 1;
283         if (entries >= tbl->gc_thresh3 ||
284             (entries >= tbl->gc_thresh2 &&
285              time_after(now, tbl->last_flush + 5 * HZ))) {
286                 if (!neigh_forced_gc(tbl) &&
287                     entries >= tbl->gc_thresh3)
288                         goto out_entries;
289         }
290
291         if (tbl->entry_size)
292                 n = kzalloc(tbl->entry_size, GFP_ATOMIC);
293         else {
294                 int sz = sizeof(*n) + tbl->key_len;
295
296                 sz = ALIGN(sz, NEIGH_PRIV_ALIGN);
297                 sz += dev->neigh_priv_len;
298                 n = kzalloc(sz, GFP_ATOMIC);
299         }
300         if (!n)
301                 goto out_entries;
302
303         skb_queue_head_init(&n->arp_queue);
304         rwlock_init(&n->lock);
305         seqlock_init(&n->ha_lock);
306         n->updated        = n->used = now;
307         n->nud_state      = NUD_NONE;
308         n->output         = neigh_blackhole;
309         seqlock_init(&n->hh.hh_lock);
310         n->parms          = neigh_parms_clone(&tbl->parms);
311         setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
312
313         NEIGH_CACHE_STAT_INC(tbl, allocs);
314         n->tbl            = tbl;
315         atomic_set(&n->refcnt, 1);
316         n->dead           = 1;
317 out:
318         return n;
319
320 out_entries:
321         atomic_dec(&tbl->entries);
322         goto out;
323 }
324
325 static void neigh_get_hash_rnd(u32 *x)
326 {
327         get_random_bytes(x, sizeof(*x));
328         *x |= 1;
329 }
330
331 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
332 {
333         size_t size = (1 << shift) * sizeof(struct neighbour *);
334         struct neigh_hash_table *ret;
335         struct neighbour __rcu **buckets;
336         int i;
337
338         ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
339         if (!ret)
340                 return NULL;
341         if (size <= PAGE_SIZE)
342                 buckets = kzalloc(size, GFP_ATOMIC);
343         else
344                 buckets = (struct neighbour __rcu **)
345                           __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
346                                            get_order(size));
347         if (!buckets) {
348                 kfree(ret);
349                 return NULL;
350         }
351         ret->hash_buckets = buckets;
352         ret->hash_shift = shift;
353         for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
354                 neigh_get_hash_rnd(&ret->hash_rnd[i]);
355         return ret;
356 }
357
358 static void neigh_hash_free_rcu(struct rcu_head *head)
359 {
360         struct neigh_hash_table *nht = container_of(head,
361                                                     struct neigh_hash_table,
362                                                     rcu);
363         size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
364         struct neighbour __rcu **buckets = nht->hash_buckets;
365
366         if (size <= PAGE_SIZE)
367                 kfree(buckets);
368         else
369                 free_pages((unsigned long)buckets, get_order(size));
370         kfree(nht);
371 }
372
373 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
374                                                 unsigned long new_shift)
375 {
376         unsigned int i, hash;
377         struct neigh_hash_table *new_nht, *old_nht;
378
379         NEIGH_CACHE_STAT_INC(tbl, hash_grows);
380
381         old_nht = rcu_dereference_protected(tbl->nht,
382                                             lockdep_is_held(&tbl->lock));
383         new_nht = neigh_hash_alloc(new_shift);
384         if (!new_nht)
385                 return old_nht;
386
387         for (i = 0; i < (1 << old_nht->hash_shift); i++) {
388                 struct neighbour *n, *next;
389
390                 for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
391                                                    lockdep_is_held(&tbl->lock));
392                      n != NULL;
393                      n = next) {
394                         hash = tbl->hash(n->primary_key, n->dev,
395                                          new_nht->hash_rnd);
396
397                         hash >>= (32 - new_nht->hash_shift);
398                         next = rcu_dereference_protected(n->next,
399                                                 lockdep_is_held(&tbl->lock));
400
401                         rcu_assign_pointer(n->next,
402                                            rcu_dereference_protected(
403                                                 new_nht->hash_buckets[hash],
404                                                 lockdep_is_held(&tbl->lock)));
405                         rcu_assign_pointer(new_nht->hash_buckets[hash], n);
406                 }
407         }
408
409         rcu_assign_pointer(tbl->nht, new_nht);
410         call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
411         return new_nht;
412 }
413
414 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
415                                struct net_device *dev)
416 {
417         struct neighbour *n;
418         int key_len = tbl->key_len;
419         u32 hash_val;
420         struct neigh_hash_table *nht;
421
422         NEIGH_CACHE_STAT_INC(tbl, lookups);
423
424         rcu_read_lock_bh();
425         nht = rcu_dereference_bh(tbl->nht);
426         hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
427
428         for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
429              n != NULL;
430              n = rcu_dereference_bh(n->next)) {
431                 if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
432                         if (!atomic_inc_not_zero(&n->refcnt))
433                                 n = NULL;
434                         NEIGH_CACHE_STAT_INC(tbl, hits);
435                         break;
436                 }
437         }
438
439         rcu_read_unlock_bh();
440         return n;
441 }
442 EXPORT_SYMBOL(neigh_lookup);
443
444 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
445                                      const void *pkey)
446 {
447         struct neighbour *n;
448         int key_len = tbl->key_len;
449         u32 hash_val;
450         struct neigh_hash_table *nht;
451
452         NEIGH_CACHE_STAT_INC(tbl, lookups);
453
454         rcu_read_lock_bh();
455         nht = rcu_dereference_bh(tbl->nht);
456         hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
457
458         for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
459              n != NULL;
460              n = rcu_dereference_bh(n->next)) {
461                 if (!memcmp(n->primary_key, pkey, key_len) &&
462                     net_eq(dev_net(n->dev), net)) {
463                         if (!atomic_inc_not_zero(&n->refcnt))
464                                 n = NULL;
465                         NEIGH_CACHE_STAT_INC(tbl, hits);
466                         break;
467                 }
468         }
469
470         rcu_read_unlock_bh();
471         return n;
472 }
473 EXPORT_SYMBOL(neigh_lookup_nodev);
474
475 struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
476                                struct net_device *dev)
477 {
478         u32 hash_val;
479         int key_len = tbl->key_len;
480         int error;
481         struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
482         struct neigh_hash_table *nht;
483
484         if (!n) {
485                 rc = ERR_PTR(-ENOBUFS);
486                 goto out;
487         }
488
489         memcpy(n->primary_key, pkey, key_len);
490         n->dev = dev;
491         dev_hold(dev);
492
493         /* Protocol specific setup. */
494         if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
495                 rc = ERR_PTR(error);
496                 goto out_neigh_release;
497         }
498
499         if (dev->netdev_ops->ndo_neigh_construct) {
500                 error = dev->netdev_ops->ndo_neigh_construct(n);
501                 if (error < 0) {
502                         rc = ERR_PTR(error);
503                         goto out_neigh_release;
504                 }
505         }
506
507         /* Device specific setup. */
508         if (n->parms->neigh_setup &&
509             (error = n->parms->neigh_setup(n)) < 0) {
510                 rc = ERR_PTR(error);
511                 goto out_neigh_release;
512         }
513
514         n->confirmed = jiffies - (n->parms->base_reachable_time << 1);
515
516         write_lock_bh(&tbl->lock);
517         nht = rcu_dereference_protected(tbl->nht,
518                                         lockdep_is_held(&tbl->lock));
519
520         if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
521                 nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
522
523         hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
524
525         if (n->parms->dead) {
526                 rc = ERR_PTR(-EINVAL);
527                 goto out_tbl_unlock;
528         }
529
530         for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
531                                             lockdep_is_held(&tbl->lock));
532              n1 != NULL;
533              n1 = rcu_dereference_protected(n1->next,
534                         lockdep_is_held(&tbl->lock))) {
535                 if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
536                         neigh_hold(n1);
537                         rc = n1;
538                         goto out_tbl_unlock;
539                 }
540         }
541
542         n->dead = 0;
543         neigh_hold(n);
544         rcu_assign_pointer(n->next,
545                            rcu_dereference_protected(nht->hash_buckets[hash_val],
546                                                      lockdep_is_held(&tbl->lock)));
547         rcu_assign_pointer(nht->hash_buckets[hash_val], n);
548         write_unlock_bh(&tbl->lock);
549         NEIGH_PRINTK2("neigh %p is created.\n", n);
550         rc = n;
551 out:
552         return rc;
553 out_tbl_unlock:
554         write_unlock_bh(&tbl->lock);
555 out_neigh_release:
556         neigh_release(n);
557         goto out;
558 }
559 EXPORT_SYMBOL(neigh_create);
560
561 static u32 pneigh_hash(const void *pkey, int key_len)
562 {
563         u32 hash_val = *(u32 *)(pkey + key_len - 4);
564         hash_val ^= (hash_val >> 16);
565         hash_val ^= hash_val >> 8;
566         hash_val ^= hash_val >> 4;
567         hash_val &= PNEIGH_HASHMASK;
568         return hash_val;
569 }
570
571 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
572                                               struct net *net,
573                                               const void *pkey,
574                                               int key_len,
575                                               struct net_device *dev)
576 {
577         while (n) {
578                 if (!memcmp(n->key, pkey, key_len) &&
579                     net_eq(pneigh_net(n), net) &&
580                     (n->dev == dev || !n->dev))
581                         return n;
582                 n = n->next;
583         }
584         return NULL;
585 }
586
587 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
588                 struct net *net, const void *pkey, struct net_device *dev)
589 {
590         int key_len = tbl->key_len;
591         u32 hash_val = pneigh_hash(pkey, key_len);
592
593         return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
594                                  net, pkey, key_len, dev);
595 }
596 EXPORT_SYMBOL_GPL(__pneigh_lookup);
597
598 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
599                                     struct net *net, const void *pkey,
600                                     struct net_device *dev, int creat)
601 {
602         struct pneigh_entry *n;
603         int key_len = tbl->key_len;
604         u32 hash_val = pneigh_hash(pkey, key_len);
605
606         read_lock_bh(&tbl->lock);
607         n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
608                               net, pkey, key_len, dev);
609         read_unlock_bh(&tbl->lock);
610
611         if (n || !creat)
612                 goto out;
613
614         ASSERT_RTNL();
615
616         n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
617         if (!n)
618                 goto out;
619
620         write_pnet(&n->net, hold_net(net));
621         memcpy(n->key, pkey, key_len);
622         n->dev = dev;
623         if (dev)
624                 dev_hold(dev);
625
626         if (tbl->pconstructor && tbl->pconstructor(n)) {
627                 if (dev)
628                         dev_put(dev);
629                 release_net(net);
630                 kfree(n);
631                 n = NULL;
632                 goto out;
633         }
634
635         write_lock_bh(&tbl->lock);
636         n->next = tbl->phash_buckets[hash_val];
637         tbl->phash_buckets[hash_val] = n;
638         write_unlock_bh(&tbl->lock);
639 out:
640         return n;
641 }
642 EXPORT_SYMBOL(pneigh_lookup);
643
644
645 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
646                   struct net_device *dev)
647 {
648         struct pneigh_entry *n, **np;
649         int key_len = tbl->key_len;
650         u32 hash_val = pneigh_hash(pkey, key_len);
651
652         write_lock_bh(&tbl->lock);
653         for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
654              np = &n->next) {
655                 if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
656                     net_eq(pneigh_net(n), net)) {
657                         *np = n->next;
658                         write_unlock_bh(&tbl->lock);
659                         if (tbl->pdestructor)
660                                 tbl->pdestructor(n);
661                         if (n->dev)
662                                 dev_put(n->dev);
663                         release_net(pneigh_net(n));
664                         kfree(n);
665                         return 0;
666                 }
667         }
668         write_unlock_bh(&tbl->lock);
669         return -ENOENT;
670 }
671
672 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
673 {
674         struct pneigh_entry *n, **np;
675         u32 h;
676
677         for (h = 0; h <= PNEIGH_HASHMASK; h++) {
678                 np = &tbl->phash_buckets[h];
679                 while ((n = *np) != NULL) {
680                         if (!dev || n->dev == dev) {
681                                 *np = n->next;
682                                 if (tbl->pdestructor)
683                                         tbl->pdestructor(n);
684                                 if (n->dev)
685                                         dev_put(n->dev);
686                                 release_net(pneigh_net(n));
687                                 kfree(n);
688                                 continue;
689                         }
690                         np = &n->next;
691                 }
692         }
693         return -ENOENT;
694 }
695
696 static void neigh_parms_destroy(struct neigh_parms *parms);
697
698 static inline void neigh_parms_put(struct neigh_parms *parms)
699 {
700         if (atomic_dec_and_test(&parms->refcnt))
701                 neigh_parms_destroy(parms);
702 }
703
704 /*
705  *      neighbour must already be out of the table;
706  *
707  */
708 void neigh_destroy(struct neighbour *neigh)
709 {
710         struct net_device *dev = neigh->dev;
711
712         NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
713
714         if (!neigh->dead) {
715                 printk(KERN_WARNING
716                        "Destroying alive neighbour %p\n", neigh);
717                 dump_stack();
718                 return;
719         }
720
721         if (neigh_del_timer(neigh))
722                 printk(KERN_WARNING "Impossible event.\n");
723
724         skb_queue_purge(&neigh->arp_queue);
725         neigh->arp_queue_len_bytes = 0;
726
727         if (dev->netdev_ops->ndo_neigh_destroy)
728                 dev->netdev_ops->ndo_neigh_destroy(neigh);
729
730         dev_put(dev);
731         neigh_parms_put(neigh->parms);
732
733         NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh);
734
735         atomic_dec(&neigh->tbl->entries);
736         kfree_rcu(neigh, rcu);
737 }
738 EXPORT_SYMBOL(neigh_destroy);
739
740 /* Neighbour state is suspicious;
741    disable fast path.
742
743    Called with write_locked neigh.
744  */
745 static void neigh_suspect(struct neighbour *neigh)
746 {
747         NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
748
749         neigh->output = neigh->ops->output;
750 }
751
752 /* Neighbour state is OK;
753    enable fast path.
754
755    Called with write_locked neigh.
756  */
757 static void neigh_connect(struct neighbour *neigh)
758 {
759         NEIGH_PRINTK2("neigh %p is connected.\n", neigh);
760
761         neigh->output = neigh->ops->connected_output;
762 }
763
764 static void neigh_periodic_work(struct work_struct *work)
765 {
766         struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
767         struct neighbour *n;
768         struct neighbour __rcu **np;
769         unsigned int i;
770         struct neigh_hash_table *nht;
771
772         NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
773
774         write_lock_bh(&tbl->lock);
775         nht = rcu_dereference_protected(tbl->nht,
776                                         lockdep_is_held(&tbl->lock));
777
778         /*
779          *      periodically recompute ReachableTime from random function
780          */
781
782         if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
783                 struct neigh_parms *p;
784                 tbl->last_rand = jiffies;
785                 for (p = &tbl->parms; p; p = p->next)
786                         p->reachable_time =
787                                 neigh_rand_reach_time(p->base_reachable_time);
788         }
789
790         for (i = 0 ; i < (1 << nht->hash_shift); i++) {
791                 np = &nht->hash_buckets[i];
792
793                 while ((n = rcu_dereference_protected(*np,
794                                 lockdep_is_held(&tbl->lock))) != NULL) {
795                         unsigned int state;
796
797                         write_lock(&n->lock);
798
799                         state = n->nud_state;
800                         if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
801                                 write_unlock(&n->lock);
802                                 goto next_elt;
803                         }
804
805                         if (time_before(n->used, n->confirmed))
806                                 n->used = n->confirmed;
807
808                         if (atomic_read(&n->refcnt) == 1 &&
809                             (state == NUD_FAILED ||
810                              time_after(jiffies, n->used + n->parms->gc_staletime))) {
811                                 *np = n->next;
812                                 n->dead = 1;
813                                 write_unlock(&n->lock);
814                                 neigh_cleanup_and_release(n);
815                                 continue;
816                         }
817                         write_unlock(&n->lock);
818
819 next_elt:
820                         np = &n->next;
821                 }
822                 /*
823                  * It's fine to release lock here, even if hash table
824                  * grows while we are preempted.
825                  */
826                 write_unlock_bh(&tbl->lock);
827                 cond_resched();
828                 write_lock_bh(&tbl->lock);
829                 nht = rcu_dereference_protected(tbl->nht,
830                                                 lockdep_is_held(&tbl->lock));
831         }
832         /* Cycle through all hash buckets every base_reachable_time/2 ticks.
833          * ARP entry timeouts range from 1/2 base_reachable_time to 3/2
834          * base_reachable_time.
835          */
836         schedule_delayed_work(&tbl->gc_work,
837                               tbl->parms.base_reachable_time >> 1);
838         write_unlock_bh(&tbl->lock);
839 }
840
841 static __inline__ int neigh_max_probes(struct neighbour *n)
842 {
843         struct neigh_parms *p = n->parms;
844         return (n->nud_state & NUD_PROBE) ?
845                 p->ucast_probes :
846                 p->ucast_probes + p->app_probes + p->mcast_probes;
847 }
848
849 static void neigh_invalidate(struct neighbour *neigh)
850         __releases(neigh->lock)
851         __acquires(neigh->lock)
852 {
853         struct sk_buff *skb;
854
855         NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
856         NEIGH_PRINTK2("neigh %p is failed.\n", neigh);
857         neigh->updated = jiffies;
858
859         /* It is very thin place. report_unreachable is very complicated
860            routine. Particularly, it can hit the same neighbour entry!
861
862            So that, we try to be accurate and avoid dead loop. --ANK
863          */
864         while (neigh->nud_state == NUD_FAILED &&
865                (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
866                 write_unlock(&neigh->lock);
867                 neigh->ops->error_report(neigh, skb);
868                 write_lock(&neigh->lock);
869         }
870         skb_queue_purge(&neigh->arp_queue);
871         neigh->arp_queue_len_bytes = 0;
872 }
873
874 static void neigh_probe(struct neighbour *neigh)
875         __releases(neigh->lock)
876 {
877         struct sk_buff *skb = skb_peek(&neigh->arp_queue);
878         /* keep skb alive even if arp_queue overflows */
879         if (skb)
880                 skb = skb_copy(skb, GFP_ATOMIC);
881         write_unlock(&neigh->lock);
882         neigh->ops->solicit(neigh, skb);
883         atomic_inc(&neigh->probes);
884         kfree_skb(skb);
885 }
886
887 /* Called when a timer expires for a neighbour entry. */
888
889 static void neigh_timer_handler(unsigned long arg)
890 {
891         unsigned long now, next;
892         struct neighbour *neigh = (struct neighbour *)arg;
893         unsigned int state;
894         int notify = 0;
895
896         write_lock(&neigh->lock);
897
898         state = neigh->nud_state;
899         now = jiffies;
900         next = now + HZ;
901
902         if (!(state & NUD_IN_TIMER))
903                 goto out;
904
905         if (state & NUD_REACHABLE) {
906                 if (time_before_eq(now,
907                                    neigh->confirmed + neigh->parms->reachable_time)) {
908                         NEIGH_PRINTK2("neigh %p is still alive.\n", neigh);
909                         next = neigh->confirmed + neigh->parms->reachable_time;
910                 } else if (time_before_eq(now,
911                                           neigh->used + neigh->parms->delay_probe_time)) {
912                         NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
913                         neigh->nud_state = NUD_DELAY;
914                         neigh->updated = jiffies;
915                         neigh_suspect(neigh);
916                         next = now + neigh->parms->delay_probe_time;
917                 } else {
918                         NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
919                         neigh->nud_state = NUD_STALE;
920                         neigh->updated = jiffies;
921                         neigh_suspect(neigh);
922                         notify = 1;
923                 }
924         } else if (state & NUD_DELAY) {
925                 if (time_before_eq(now,
926                                    neigh->confirmed + neigh->parms->delay_probe_time)) {
927                         NEIGH_PRINTK2("neigh %p is now reachable.\n", neigh);
928                         neigh->nud_state = NUD_REACHABLE;
929                         neigh->updated = jiffies;
930                         neigh_connect(neigh);
931                         notify = 1;
932                         next = neigh->confirmed + neigh->parms->reachable_time;
933                 } else {
934                         NEIGH_PRINTK2("neigh %p is probed.\n", neigh);
935                         neigh->nud_state = NUD_PROBE;
936                         neigh->updated = jiffies;
937                         atomic_set(&neigh->probes, 0);
938                         next = now + neigh->parms->retrans_time;
939                 }
940         } else {
941                 /* NUD_PROBE|NUD_INCOMPLETE */
942                 next = now + neigh->parms->retrans_time;
943         }
944
945         if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
946             atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
947                 neigh->nud_state = NUD_FAILED;
948                 notify = 1;
949                 neigh_invalidate(neigh);
950         }
951
952         if (neigh->nud_state & NUD_IN_TIMER) {
953                 if (time_before(next, jiffies + HZ/2))
954                         next = jiffies + HZ/2;
955                 if (!mod_timer(&neigh->timer, next))
956                         neigh_hold(neigh);
957         }
958         if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
959                 neigh_probe(neigh);
960         } else {
961 out:
962                 write_unlock(&neigh->lock);
963         }
964
965         if (notify)
966                 neigh_update_notify(neigh);
967
968         neigh_release(neigh);
969 }
970
971 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
972 {
973         int rc;
974         bool immediate_probe = false;
975
976         write_lock_bh(&neigh->lock);
977
978         rc = 0;
979         if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
980                 goto out_unlock_bh;
981
982         if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
983                 if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
984                         unsigned long next, now = jiffies;
985
986                         atomic_set(&neigh->probes, neigh->parms->ucast_probes);
987                         neigh->nud_state     = NUD_INCOMPLETE;
988                         neigh->updated = now;
989                         next = now + max(neigh->parms->retrans_time, HZ/2);
990                         neigh_add_timer(neigh, next);
991                         immediate_probe = true;
992                 } else {
993                         neigh->nud_state = NUD_FAILED;
994                         neigh->updated = jiffies;
995                         write_unlock_bh(&neigh->lock);
996
997                         kfree_skb(skb);
998                         return 1;
999                 }
1000         } else if (neigh->nud_state & NUD_STALE) {
1001                 NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
1002                 neigh->nud_state = NUD_DELAY;
1003                 neigh->updated = jiffies;
1004                 neigh_add_timer(neigh,
1005                                 jiffies + neigh->parms->delay_probe_time);
1006         }
1007
1008         if (neigh->nud_state == NUD_INCOMPLETE) {
1009                 if (skb) {
1010                         while (neigh->arp_queue_len_bytes + skb->truesize >
1011                                neigh->parms->queue_len_bytes) {
1012                                 struct sk_buff *buff;
1013
1014                                 buff = __skb_dequeue(&neigh->arp_queue);
1015                                 if (!buff)
1016                                         break;
1017                                 neigh->arp_queue_len_bytes -= buff->truesize;
1018                                 kfree_skb(buff);
1019                                 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1020                         }
1021                         skb_dst_force(skb);
1022                         __skb_queue_tail(&neigh->arp_queue, skb);
1023                         neigh->arp_queue_len_bytes += skb->truesize;
1024                 }
1025                 rc = 1;
1026         }
1027 out_unlock_bh:
1028         if (immediate_probe)
1029                 neigh_probe(neigh);
1030         else
1031                 write_unlock(&neigh->lock);
1032         local_bh_enable();
1033         return rc;
1034 }
1035 EXPORT_SYMBOL(__neigh_event_send);
1036
1037 static void neigh_update_hhs(struct neighbour *neigh)
1038 {
1039         struct hh_cache *hh;
1040         void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1041                 = NULL;
1042
1043         if (neigh->dev->header_ops)
1044                 update = neigh->dev->header_ops->cache_update;
1045
1046         if (update) {
1047                 hh = &neigh->hh;
1048                 if (hh->hh_len) {
1049                         write_seqlock_bh(&hh->hh_lock);
1050                         update(hh, neigh->dev, neigh->ha);
1051                         write_sequnlock_bh(&hh->hh_lock);
1052                 }
1053         }
1054 }
1055
1056
1057
1058 /* Generic update routine.
1059    -- lladdr is new lladdr or NULL, if it is not supplied.
1060    -- new    is new state.
1061    -- flags
1062         NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1063                                 if it is different.
1064         NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1065                                 lladdr instead of overriding it
1066                                 if it is different.
1067                                 It also allows to retain current state
1068                                 if lladdr is unchanged.
1069         NEIGH_UPDATE_F_ADMIN    means that the change is administrative.
1070
1071         NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1072                                 NTF_ROUTER flag.
1073         NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
1074                                 a router.
1075
1076    Caller MUST hold reference count on the entry.
1077  */
1078
1079 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1080                  u32 flags)
1081 {
1082         u8 old;
1083         int err;
1084         int notify = 0;
1085         struct net_device *dev;
1086         int update_isrouter = 0;
1087
1088         write_lock_bh(&neigh->lock);
1089
1090         dev    = neigh->dev;
1091         old    = neigh->nud_state;
1092         err    = -EPERM;
1093
1094         if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1095             (old & (NUD_NOARP | NUD_PERMANENT)))
1096                 goto out;
1097
1098         if (!(new & NUD_VALID)) {
1099                 neigh_del_timer(neigh);
1100                 if (old & NUD_CONNECTED)
1101                         neigh_suspect(neigh);
1102                 neigh->nud_state = new;
1103                 err = 0;
1104                 notify = old & NUD_VALID;
1105                 if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1106                     (new & NUD_FAILED)) {
1107                         neigh_invalidate(neigh);
1108                         notify = 1;
1109                 }
1110                 goto out;
1111         }
1112
1113         /* Compare new lladdr with cached one */
1114         if (!dev->addr_len) {
1115                 /* First case: device needs no address. */
1116                 lladdr = neigh->ha;
1117         } else if (lladdr) {
1118                 /* The second case: if something is already cached
1119                    and a new address is proposed:
1120                    - compare new & old
1121                    - if they are different, check override flag
1122                  */
1123                 if ((old & NUD_VALID) &&
1124                     !memcmp(lladdr, neigh->ha, dev->addr_len))
1125                         lladdr = neigh->ha;
1126         } else {
1127                 /* No address is supplied; if we know something,
1128                    use it, otherwise discard the request.
1129                  */
1130                 err = -EINVAL;
1131                 if (!(old & NUD_VALID))
1132                         goto out;
1133                 lladdr = neigh->ha;
1134         }
1135
1136         if (new & NUD_CONNECTED)
1137                 neigh->confirmed = jiffies;
1138         neigh->updated = jiffies;
1139
1140         /* If entry was valid and address is not changed,
1141            do not change entry state, if new one is STALE.
1142          */
1143         err = 0;
1144         update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1145         if (old & NUD_VALID) {
1146                 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1147                         update_isrouter = 0;
1148                         if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1149                             (old & NUD_CONNECTED)) {
1150                                 lladdr = neigh->ha;
1151                                 new = NUD_STALE;
1152                         } else
1153                                 goto out;
1154                 } else {
1155                         if (lladdr == neigh->ha && new == NUD_STALE &&
1156                             ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) ||
1157                              (old & NUD_CONNECTED))
1158                             )
1159                                 new = old;
1160                 }
1161         }
1162
1163         if (new != old) {
1164                 neigh_del_timer(neigh);
1165                 if (new & NUD_IN_TIMER)
1166                         neigh_add_timer(neigh, (jiffies +
1167                                                 ((new & NUD_REACHABLE) ?
1168                                                  neigh->parms->reachable_time :
1169                                                  0)));
1170                 neigh->nud_state = new;
1171         }
1172
1173         if (lladdr != neigh->ha) {
1174                 write_seqlock(&neigh->ha_lock);
1175                 memcpy(&neigh->ha, lladdr, dev->addr_len);
1176                 write_sequnlock(&neigh->ha_lock);
1177                 neigh_update_hhs(neigh);
1178                 if (!(new & NUD_CONNECTED))
1179                         neigh->confirmed = jiffies -
1180                                       (neigh->parms->base_reachable_time << 1);
1181                 notify = 1;
1182         }
1183         if (new == old)
1184                 goto out;
1185         if (new & NUD_CONNECTED)
1186                 neigh_connect(neigh);
1187         else
1188                 neigh_suspect(neigh);
1189         if (!(old & NUD_VALID)) {
1190                 struct sk_buff *skb;
1191
1192                 /* Again: avoid dead loop if something went wrong */
1193
1194                 while (neigh->nud_state & NUD_VALID &&
1195                        (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1196                         struct dst_entry *dst = skb_dst(skb);
1197                         struct neighbour *n2, *n1 = neigh;
1198                         write_unlock_bh(&neigh->lock);
1199
1200                         rcu_read_lock();
1201                         /* On shaper/eql skb->dst->neighbour != neigh :( */
1202                         if (dst && (n2 = dst_get_neighbour_noref(dst)) != NULL)
1203                                 n1 = n2;
1204                         n1->output(n1, skb);
1205                         rcu_read_unlock();
1206
1207                         write_lock_bh(&neigh->lock);
1208                 }
1209                 skb_queue_purge(&neigh->arp_queue);
1210                 neigh->arp_queue_len_bytes = 0;
1211         }
1212 out:
1213         if (update_isrouter) {
1214                 neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
1215                         (neigh->flags | NTF_ROUTER) :
1216                         (neigh->flags & ~NTF_ROUTER);
1217         }
1218         write_unlock_bh(&neigh->lock);
1219
1220         if (notify)
1221                 neigh_update_notify(neigh);
1222
1223         return err;
1224 }
1225 EXPORT_SYMBOL(neigh_update);
1226
1227 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1228                                  u8 *lladdr, void *saddr,
1229                                  struct net_device *dev)
1230 {
1231         struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1232                                                  lladdr || !dev->addr_len);
1233         if (neigh)
1234                 neigh_update(neigh, lladdr, NUD_STALE,
1235                              NEIGH_UPDATE_F_OVERRIDE);
1236         return neigh;
1237 }
1238 EXPORT_SYMBOL(neigh_event_ns);
1239
1240 /* called with read_lock_bh(&n->lock); */
1241 static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst)
1242 {
1243         struct net_device *dev = dst->dev;
1244         __be16 prot = dst->ops->protocol;
1245         struct hh_cache *hh = &n->hh;
1246
1247         write_lock_bh(&n->lock);
1248
1249         /* Only one thread can come in here and initialize the
1250          * hh_cache entry.
1251          */
1252         if (!hh->hh_len)
1253                 dev->header_ops->cache(n, hh, prot);
1254
1255         write_unlock_bh(&n->lock);
1256 }
1257
1258 /* This function can be used in contexts, where only old dev_queue_xmit
1259  * worked, f.e. if you want to override normal output path (eql, shaper),
1260  * but resolution is not made yet.
1261  */
1262
1263 int neigh_compat_output(struct neighbour *neigh, struct sk_buff *skb)
1264 {
1265         struct net_device *dev = skb->dev;
1266
1267         __skb_pull(skb, skb_network_offset(skb));
1268
1269         if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
1270                             skb->len) < 0 &&
1271             dev->header_ops->rebuild(skb))
1272                 return 0;
1273
1274         return dev_queue_xmit(skb);
1275 }
1276 EXPORT_SYMBOL(neigh_compat_output);
1277
1278 /* Slow and careful. */
1279
1280 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1281 {
1282         struct dst_entry *dst = skb_dst(skb);
1283         int rc = 0;
1284
1285         if (!dst)
1286                 goto discard;
1287
1288         __skb_pull(skb, skb_network_offset(skb));
1289
1290         if (!neigh_event_send(neigh, skb)) {
1291                 int err;
1292                 struct net_device *dev = neigh->dev;
1293                 unsigned int seq;
1294
1295                 if (dev->header_ops->cache && !neigh->hh.hh_len)
1296                         neigh_hh_init(neigh, dst);
1297
1298                 do {
1299                         seq = read_seqbegin(&neigh->ha_lock);
1300                         err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1301                                               neigh->ha, NULL, skb->len);
1302                 } while (read_seqretry(&neigh->ha_lock, seq));
1303
1304                 if (err >= 0)
1305                         rc = dev_queue_xmit(skb);
1306                 else
1307                         goto out_kfree_skb;
1308         }
1309 out:
1310         return rc;
1311 discard:
1312         NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n",
1313                       dst, neigh);
1314 out_kfree_skb:
1315         rc = -EINVAL;
1316         kfree_skb(skb);
1317         goto out;
1318 }
1319 EXPORT_SYMBOL(neigh_resolve_output);
1320
1321 /* As fast as possible without hh cache */
1322
1323 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1324 {
1325         struct net_device *dev = neigh->dev;
1326         unsigned int seq;
1327         int err;
1328
1329         __skb_pull(skb, skb_network_offset(skb));
1330
1331         do {
1332                 seq = read_seqbegin(&neigh->ha_lock);
1333                 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1334                                       neigh->ha, NULL, skb->len);
1335         } while (read_seqretry(&neigh->ha_lock, seq));
1336
1337         if (err >= 0)
1338                 err = dev_queue_xmit(skb);
1339         else {
1340                 err = -EINVAL;
1341                 kfree_skb(skb);
1342         }
1343         return err;
1344 }
1345 EXPORT_SYMBOL(neigh_connected_output);
1346
1347 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1348 {
1349         return dev_queue_xmit(skb);
1350 }
1351 EXPORT_SYMBOL(neigh_direct_output);
1352
1353 static void neigh_proxy_process(unsigned long arg)
1354 {
1355         struct neigh_table *tbl = (struct neigh_table *)arg;
1356         long sched_next = 0;
1357         unsigned long now = jiffies;
1358         struct sk_buff *skb, *n;
1359
1360         spin_lock(&tbl->proxy_queue.lock);
1361
1362         skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1363                 long tdif = NEIGH_CB(skb)->sched_next - now;
1364
1365                 if (tdif <= 0) {
1366                         struct net_device *dev = skb->dev;
1367
1368                         __skb_unlink(skb, &tbl->proxy_queue);
1369                         if (tbl->proxy_redo && netif_running(dev)) {
1370                                 rcu_read_lock();
1371                                 tbl->proxy_redo(skb);
1372                                 rcu_read_unlock();
1373                         } else {
1374                                 kfree_skb(skb);
1375                         }
1376
1377                         dev_put(dev);
1378                 } else if (!sched_next || tdif < sched_next)
1379                         sched_next = tdif;
1380         }
1381         del_timer(&tbl->proxy_timer);
1382         if (sched_next)
1383                 mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1384         spin_unlock(&tbl->proxy_queue.lock);
1385 }
1386
1387 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1388                     struct sk_buff *skb)
1389 {
1390         unsigned long now = jiffies;
1391         unsigned long sched_next = now + (net_random() % p->proxy_delay);
1392
1393         if (tbl->proxy_queue.qlen > p->proxy_qlen) {
1394                 kfree_skb(skb);
1395                 return;
1396         }
1397
1398         NEIGH_CB(skb)->sched_next = sched_next;
1399         NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1400
1401         spin_lock(&tbl->proxy_queue.lock);
1402         if (del_timer(&tbl->proxy_timer)) {
1403                 if (time_before(tbl->proxy_timer.expires, sched_next))
1404                         sched_next = tbl->proxy_timer.expires;
1405         }
1406         skb_dst_drop(skb);
1407         dev_hold(skb->dev);
1408         __skb_queue_tail(&tbl->proxy_queue, skb);
1409         mod_timer(&tbl->proxy_timer, sched_next);
1410         spin_unlock(&tbl->proxy_queue.lock);
1411 }
1412 EXPORT_SYMBOL(pneigh_enqueue);
1413
1414 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1415                                                       struct net *net, int ifindex)
1416 {
1417         struct neigh_parms *p;
1418
1419         for (p = &tbl->parms; p; p = p->next) {
1420                 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1421                     (!p->dev && !ifindex))
1422                         return p;
1423         }
1424
1425         return NULL;
1426 }
1427
1428 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1429                                       struct neigh_table *tbl)
1430 {
1431         struct neigh_parms *p, *ref;
1432         struct net *net = dev_net(dev);
1433         const struct net_device_ops *ops = dev->netdev_ops;
1434
1435         ref = lookup_neigh_parms(tbl, net, 0);
1436         if (!ref)
1437                 return NULL;
1438
1439         p = kmemdup(ref, sizeof(*p), GFP_KERNEL);
1440         if (p) {
1441                 p->tbl            = tbl;
1442                 atomic_set(&p->refcnt, 1);
1443                 p->reachable_time =
1444                                 neigh_rand_reach_time(p->base_reachable_time);
1445
1446                 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1447                         kfree(p);
1448                         return NULL;
1449                 }
1450
1451                 dev_hold(dev);
1452                 p->dev = dev;
1453                 write_pnet(&p->net, hold_net(net));
1454                 p->sysctl_table = NULL;
1455                 write_lock_bh(&tbl->lock);
1456                 p->next         = tbl->parms.next;
1457                 tbl->parms.next = p;
1458                 write_unlock_bh(&tbl->lock);
1459         }
1460         return p;
1461 }
1462 EXPORT_SYMBOL(neigh_parms_alloc);
1463
1464 static void neigh_rcu_free_parms(struct rcu_head *head)
1465 {
1466         struct neigh_parms *parms =
1467                 container_of(head, struct neigh_parms, rcu_head);
1468
1469         neigh_parms_put(parms);
1470 }
1471
1472 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1473 {
1474         struct neigh_parms **p;
1475
1476         if (!parms || parms == &tbl->parms)
1477                 return;
1478         write_lock_bh(&tbl->lock);
1479         for (p = &tbl->parms.next; *p; p = &(*p)->next) {
1480                 if (*p == parms) {
1481                         *p = parms->next;
1482                         parms->dead = 1;
1483                         write_unlock_bh(&tbl->lock);
1484                         if (parms->dev)
1485                                 dev_put(parms->dev);
1486                         call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1487                         return;
1488                 }
1489         }
1490         write_unlock_bh(&tbl->lock);
1491         NEIGH_PRINTK1("neigh_parms_release: not found\n");
1492 }
1493 EXPORT_SYMBOL(neigh_parms_release);
1494
1495 static void neigh_parms_destroy(struct neigh_parms *parms)
1496 {
1497         release_net(neigh_parms_net(parms));
1498         kfree(parms);
1499 }
1500
1501 static struct lock_class_key neigh_table_proxy_queue_class;
1502
1503 static void neigh_table_init_no_netlink(struct neigh_table *tbl)
1504 {
1505         unsigned long now = jiffies;
1506         unsigned long phsize;
1507
1508         write_pnet(&tbl->parms.net, &init_net);
1509         atomic_set(&tbl->parms.refcnt, 1);
1510         tbl->parms.reachable_time =
1511                           neigh_rand_reach_time(tbl->parms.base_reachable_time);
1512
1513         tbl->stats = alloc_percpu(struct neigh_statistics);
1514         if (!tbl->stats)
1515                 panic("cannot create neighbour cache statistics");
1516
1517 #ifdef CONFIG_PROC_FS
1518         if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat,
1519                               &neigh_stat_seq_fops, tbl))
1520                 panic("cannot create neighbour proc dir entry");
1521 #endif
1522
1523         RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1524
1525         phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1526         tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1527
1528         if (!tbl->nht || !tbl->phash_buckets)
1529                 panic("cannot allocate neighbour cache hashes");
1530
1531         rwlock_init(&tbl->lock);
1532         INIT_DELAYED_WORK_DEFERRABLE(&tbl->gc_work, neigh_periodic_work);
1533         schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time);
1534         setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
1535         skb_queue_head_init_class(&tbl->proxy_queue,
1536                         &neigh_table_proxy_queue_class);
1537
1538         tbl->last_flush = now;
1539         tbl->last_rand  = now + tbl->parms.reachable_time * 20;
1540 }
1541
1542 void neigh_table_init(struct neigh_table *tbl)
1543 {
1544         struct neigh_table *tmp;
1545
1546         neigh_table_init_no_netlink(tbl);
1547         write_lock(&neigh_tbl_lock);
1548         for (tmp = neigh_tables; tmp; tmp = tmp->next) {
1549                 if (tmp->family == tbl->family)
1550                         break;
1551         }
1552         tbl->next       = neigh_tables;
1553         neigh_tables    = tbl;
1554         write_unlock(&neigh_tbl_lock);
1555
1556         if (unlikely(tmp)) {
1557                 printk(KERN_ERR "NEIGH: Registering multiple tables for "
1558                        "family %d\n", tbl->family);
1559                 dump_stack();
1560         }
1561 }
1562 EXPORT_SYMBOL(neigh_table_init);
1563
1564 int neigh_table_clear(struct neigh_table *tbl)
1565 {
1566         struct neigh_table **tp;
1567
1568         /* It is not clean... Fix it to unload IPv6 module safely */
1569         cancel_delayed_work_sync(&tbl->gc_work);
1570         del_timer_sync(&tbl->proxy_timer);
1571         pneigh_queue_purge(&tbl->proxy_queue);
1572         neigh_ifdown(tbl, NULL);
1573         if (atomic_read(&tbl->entries))
1574                 printk(KERN_CRIT "neighbour leakage\n");
1575         write_lock(&neigh_tbl_lock);
1576         for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
1577                 if (*tp == tbl) {
1578                         *tp = tbl->next;
1579                         break;
1580                 }
1581         }
1582         write_unlock(&neigh_tbl_lock);
1583
1584         call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1585                  neigh_hash_free_rcu);
1586         tbl->nht = NULL;
1587
1588         kfree(tbl->phash_buckets);
1589         tbl->phash_buckets = NULL;
1590
1591         remove_proc_entry(tbl->id, init_net.proc_net_stat);
1592
1593         free_percpu(tbl->stats);
1594         tbl->stats = NULL;
1595
1596         return 0;
1597 }
1598 EXPORT_SYMBOL(neigh_table_clear);
1599
1600 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1601 {
1602         struct net *net = sock_net(skb->sk);
1603         struct ndmsg *ndm;
1604         struct nlattr *dst_attr;
1605         struct neigh_table *tbl;
1606         struct net_device *dev = NULL;
1607         int err = -EINVAL;
1608
1609         ASSERT_RTNL();
1610         if (nlmsg_len(nlh) < sizeof(*ndm))
1611                 goto out;
1612
1613         dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1614         if (dst_attr == NULL)
1615                 goto out;
1616
1617         ndm = nlmsg_data(nlh);
1618         if (ndm->ndm_ifindex) {
1619                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1620                 if (dev == NULL) {
1621                         err = -ENODEV;
1622                         goto out;
1623                 }
1624         }
1625
1626         read_lock(&neigh_tbl_lock);
1627         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1628                 struct neighbour *neigh;
1629
1630                 if (tbl->family != ndm->ndm_family)
1631                         continue;
1632                 read_unlock(&neigh_tbl_lock);
1633
1634                 if (nla_len(dst_attr) < tbl->key_len)
1635                         goto out;
1636
1637                 if (ndm->ndm_flags & NTF_PROXY) {
1638                         err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1639                         goto out;
1640                 }
1641
1642                 if (dev == NULL)
1643                         goto out;
1644
1645                 neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1646                 if (neigh == NULL) {
1647                         err = -ENOENT;
1648                         goto out;
1649                 }
1650
1651                 err = neigh_update(neigh, NULL, NUD_FAILED,
1652                                    NEIGH_UPDATE_F_OVERRIDE |
1653                                    NEIGH_UPDATE_F_ADMIN);
1654                 neigh_release(neigh);
1655                 goto out;
1656         }
1657         read_unlock(&neigh_tbl_lock);
1658         err = -EAFNOSUPPORT;
1659
1660 out:
1661         return err;
1662 }
1663
1664 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1665 {
1666         struct net *net = sock_net(skb->sk);
1667         struct ndmsg *ndm;
1668         struct nlattr *tb[NDA_MAX+1];
1669         struct neigh_table *tbl;
1670         struct net_device *dev = NULL;
1671         int err;
1672
1673         ASSERT_RTNL();
1674         err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
1675         if (err < 0)
1676                 goto out;
1677
1678         err = -EINVAL;
1679         if (tb[NDA_DST] == NULL)
1680                 goto out;
1681
1682         ndm = nlmsg_data(nlh);
1683         if (ndm->ndm_ifindex) {
1684                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1685                 if (dev == NULL) {
1686                         err = -ENODEV;
1687                         goto out;
1688                 }
1689
1690                 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1691                         goto out;
1692         }
1693
1694         read_lock(&neigh_tbl_lock);
1695         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1696                 int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
1697                 struct neighbour *neigh;
1698                 void *dst, *lladdr;
1699
1700                 if (tbl->family != ndm->ndm_family)
1701                         continue;
1702                 read_unlock(&neigh_tbl_lock);
1703
1704                 if (nla_len(tb[NDA_DST]) < tbl->key_len)
1705                         goto out;
1706                 dst = nla_data(tb[NDA_DST]);
1707                 lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1708
1709                 if (ndm->ndm_flags & NTF_PROXY) {
1710                         struct pneigh_entry *pn;
1711
1712                         err = -ENOBUFS;
1713                         pn = pneigh_lookup(tbl, net, dst, dev, 1);
1714                         if (pn) {
1715                                 pn->flags = ndm->ndm_flags;
1716                                 err = 0;
1717                         }
1718                         goto out;
1719                 }
1720
1721                 if (dev == NULL)
1722                         goto out;
1723
1724                 neigh = neigh_lookup(tbl, dst, dev);
1725                 if (neigh == NULL) {
1726                         if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1727                                 err = -ENOENT;
1728                                 goto out;
1729                         }
1730
1731                         neigh = __neigh_lookup_errno(tbl, dst, dev);
1732                         if (IS_ERR(neigh)) {
1733                                 err = PTR_ERR(neigh);
1734                                 goto out;
1735                         }
1736                 } else {
1737                         if (nlh->nlmsg_flags & NLM_F_EXCL) {
1738                                 err = -EEXIST;
1739                                 neigh_release(neigh);
1740                                 goto out;
1741                         }
1742
1743                         if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1744                                 flags &= ~NEIGH_UPDATE_F_OVERRIDE;
1745                 }
1746
1747                 if (ndm->ndm_flags & NTF_USE) {
1748                         neigh_event_send(neigh, NULL);
1749                         err = 0;
1750                 } else
1751                         err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
1752                 neigh_release(neigh);
1753                 goto out;
1754         }
1755
1756         read_unlock(&neigh_tbl_lock);
1757         err = -EAFNOSUPPORT;
1758 out:
1759         return err;
1760 }
1761
1762 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1763 {
1764         struct nlattr *nest;
1765
1766         nest = nla_nest_start(skb, NDTA_PARMS);
1767         if (nest == NULL)
1768                 return -ENOBUFS;
1769
1770         if ((parms->dev &&
1771              nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
1772             nla_put_u32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt)) ||
1773             nla_put_u32(skb, NDTPA_QUEUE_LENBYTES, parms->queue_len_bytes) ||
1774             /* approximative value for deprecated QUEUE_LEN (in packets) */
1775             nla_put_u32(skb, NDTPA_QUEUE_LEN,
1776                         DIV_ROUND_UP(parms->queue_len_bytes,
1777                                      SKB_TRUESIZE(ETH_FRAME_LEN))) ||
1778             nla_put_u32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen) ||
1779             nla_put_u32(skb, NDTPA_APP_PROBES, parms->app_probes) ||
1780             nla_put_u32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes) ||
1781             nla_put_u32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes) ||
1782             nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time) ||
1783             nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
1784                           parms->base_reachable_time) ||
1785             nla_put_msecs(skb, NDTPA_GC_STALETIME, parms->gc_staletime) ||
1786             nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
1787                           parms->delay_probe_time) ||
1788             nla_put_msecs(skb, NDTPA_RETRANS_TIME, parms->retrans_time) ||
1789             nla_put_msecs(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay) ||
1790             nla_put_msecs(skb, NDTPA_PROXY_DELAY, parms->proxy_delay) ||
1791             nla_put_msecs(skb, NDTPA_LOCKTIME, parms->locktime))
1792                 goto nla_put_failure;
1793         return nla_nest_end(skb, nest);
1794
1795 nla_put_failure:
1796         nla_nest_cancel(skb, nest);
1797         return -EMSGSIZE;
1798 }
1799
1800 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1801                               u32 pid, u32 seq, int type, int flags)
1802 {
1803         struct nlmsghdr *nlh;
1804         struct ndtmsg *ndtmsg;
1805
1806         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1807         if (nlh == NULL)
1808                 return -EMSGSIZE;
1809
1810         ndtmsg = nlmsg_data(nlh);
1811
1812         read_lock_bh(&tbl->lock);
1813         ndtmsg->ndtm_family = tbl->family;
1814         ndtmsg->ndtm_pad1   = 0;
1815         ndtmsg->ndtm_pad2   = 0;
1816
1817         if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
1818             nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval) ||
1819             nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) ||
1820             nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) ||
1821             nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3))
1822                 goto nla_put_failure;
1823         {
1824                 unsigned long now = jiffies;
1825                 unsigned int flush_delta = now - tbl->last_flush;
1826                 unsigned int rand_delta = now - tbl->last_rand;
1827                 struct neigh_hash_table *nht;
1828                 struct ndt_config ndc = {
1829                         .ndtc_key_len           = tbl->key_len,
1830                         .ndtc_entry_size        = tbl->entry_size,
1831                         .ndtc_entries           = atomic_read(&tbl->entries),
1832                         .ndtc_last_flush        = jiffies_to_msecs(flush_delta),
1833                         .ndtc_last_rand         = jiffies_to_msecs(rand_delta),
1834                         .ndtc_proxy_qlen        = tbl->proxy_queue.qlen,
1835                 };
1836
1837                 rcu_read_lock_bh();
1838                 nht = rcu_dereference_bh(tbl->nht);
1839                 ndc.ndtc_hash_rnd = nht->hash_rnd[0];
1840                 ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
1841                 rcu_read_unlock_bh();
1842
1843                 if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
1844                         goto nla_put_failure;
1845         }
1846
1847         {
1848                 int cpu;
1849                 struct ndt_stats ndst;
1850
1851                 memset(&ndst, 0, sizeof(ndst));
1852
1853                 for_each_possible_cpu(cpu) {
1854                         struct neigh_statistics *st;
1855
1856                         st = per_cpu_ptr(tbl->stats, cpu);
1857                         ndst.ndts_allocs                += st->allocs;
1858                         ndst.ndts_destroys              += st->destroys;
1859                         ndst.ndts_hash_grows            += st->hash_grows;
1860                         ndst.ndts_res_failed            += st->res_failed;
1861                         ndst.ndts_lookups               += st->lookups;
1862                         ndst.ndts_hits                  += st->hits;
1863                         ndst.ndts_rcv_probes_mcast      += st->rcv_probes_mcast;
1864                         ndst.ndts_rcv_probes_ucast      += st->rcv_probes_ucast;
1865                         ndst.ndts_periodic_gc_runs      += st->periodic_gc_runs;
1866                         ndst.ndts_forced_gc_runs        += st->forced_gc_runs;
1867                 }
1868
1869                 if (nla_put(skb, NDTA_STATS, sizeof(ndst), &ndst))
1870                         goto nla_put_failure;
1871         }
1872
1873         BUG_ON(tbl->parms.dev);
1874         if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1875                 goto nla_put_failure;
1876
1877         read_unlock_bh(&tbl->lock);
1878         return nlmsg_end(skb, nlh);
1879
1880 nla_put_failure:
1881         read_unlock_bh(&tbl->lock);
1882         nlmsg_cancel(skb, nlh);
1883         return -EMSGSIZE;
1884 }
1885
1886 static int neightbl_fill_param_info(struct sk_buff *skb,
1887                                     struct neigh_table *tbl,
1888                                     struct neigh_parms *parms,
1889                                     u32 pid, u32 seq, int type,
1890                                     unsigned int flags)
1891 {
1892         struct ndtmsg *ndtmsg;
1893         struct nlmsghdr *nlh;
1894
1895         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1896         if (nlh == NULL)
1897                 return -EMSGSIZE;
1898
1899         ndtmsg = nlmsg_data(nlh);
1900
1901         read_lock_bh(&tbl->lock);
1902         ndtmsg->ndtm_family = tbl->family;
1903         ndtmsg->ndtm_pad1   = 0;
1904         ndtmsg->ndtm_pad2   = 0;
1905
1906         if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
1907             neightbl_fill_parms(skb, parms) < 0)
1908                 goto errout;
1909
1910         read_unlock_bh(&tbl->lock);
1911         return nlmsg_end(skb, nlh);
1912 errout:
1913         read_unlock_bh(&tbl->lock);
1914         nlmsg_cancel(skb, nlh);
1915         return -EMSGSIZE;
1916 }
1917
1918 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1919         [NDTA_NAME]             = { .type = NLA_STRING },
1920         [NDTA_THRESH1]          = { .type = NLA_U32 },
1921         [NDTA_THRESH2]          = { .type = NLA_U32 },
1922         [NDTA_THRESH3]          = { .type = NLA_U32 },
1923         [NDTA_GC_INTERVAL]      = { .type = NLA_U64 },
1924         [NDTA_PARMS]            = { .type = NLA_NESTED },
1925 };
1926
1927 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
1928         [NDTPA_IFINDEX]                 = { .type = NLA_U32 },
1929         [NDTPA_QUEUE_LEN]               = { .type = NLA_U32 },
1930         [NDTPA_PROXY_QLEN]              = { .type = NLA_U32 },
1931         [NDTPA_APP_PROBES]              = { .type = NLA_U32 },
1932         [NDTPA_UCAST_PROBES]            = { .type = NLA_U32 },
1933         [NDTPA_MCAST_PROBES]            = { .type = NLA_U32 },
1934         [NDTPA_BASE_REACHABLE_TIME]     = { .type = NLA_U64 },
1935         [NDTPA_GC_STALETIME]            = { .type = NLA_U64 },
1936         [NDTPA_DELAY_PROBE_TIME]        = { .type = NLA_U64 },
1937         [NDTPA_RETRANS_TIME]            = { .type = NLA_U64 },
1938         [NDTPA_ANYCAST_DELAY]           = { .type = NLA_U64 },
1939         [NDTPA_PROXY_DELAY]             = { .type = NLA_U64 },
1940         [NDTPA_LOCKTIME]                = { .type = NLA_U64 },
1941 };
1942
1943 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1944 {
1945         struct net *net = sock_net(skb->sk);
1946         struct neigh_table *tbl;
1947         struct ndtmsg *ndtmsg;
1948         struct nlattr *tb[NDTA_MAX+1];
1949         int err;
1950
1951         err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
1952                           nl_neightbl_policy);
1953         if (err < 0)
1954                 goto errout;
1955
1956         if (tb[NDTA_NAME] == NULL) {
1957                 err = -EINVAL;
1958                 goto errout;
1959         }
1960
1961         ndtmsg = nlmsg_data(nlh);
1962         read_lock(&neigh_tbl_lock);
1963         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1964                 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
1965                         continue;
1966
1967                 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0)
1968                         break;
1969         }
1970
1971         if (tbl == NULL) {
1972                 err = -ENOENT;
1973                 goto errout_locked;
1974         }
1975
1976         /*
1977          * We acquire tbl->lock to be nice to the periodic timers and
1978          * make sure they always see a consistent set of values.
1979          */
1980         write_lock_bh(&tbl->lock);
1981
1982         if (tb[NDTA_PARMS]) {
1983                 struct nlattr *tbp[NDTPA_MAX+1];
1984                 struct neigh_parms *p;
1985                 int i, ifindex = 0;
1986
1987                 err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
1988                                        nl_ntbl_parm_policy);
1989                 if (err < 0)
1990                         goto errout_tbl_lock;
1991
1992                 if (tbp[NDTPA_IFINDEX])
1993                         ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
1994
1995                 p = lookup_neigh_parms(tbl, net, ifindex);
1996                 if (p == NULL) {
1997                         err = -ENOENT;
1998                         goto errout_tbl_lock;
1999                 }
2000
2001                 for (i = 1; i <= NDTPA_MAX; i++) {
2002                         if (tbp[i] == NULL)
2003                                 continue;
2004
2005                         switch (i) {
2006                         case NDTPA_QUEUE_LEN:
2007                                 p->queue_len_bytes = nla_get_u32(tbp[i]) *
2008                                                      SKB_TRUESIZE(ETH_FRAME_LEN);
2009                                 break;
2010                         case NDTPA_QUEUE_LENBYTES:
2011                                 p->queue_len_bytes = nla_get_u32(tbp[i]);
2012                                 break;
2013                         case NDTPA_PROXY_QLEN:
2014                                 p->proxy_qlen = nla_get_u32(tbp[i]);
2015                                 break;
2016                         case NDTPA_APP_PROBES:
2017                                 p->app_probes = nla_get_u32(tbp[i]);
2018                                 break;
2019                         case NDTPA_UCAST_PROBES:
2020                                 p->ucast_probes = nla_get_u32(tbp[i]);
2021                                 break;
2022                         case NDTPA_MCAST_PROBES:
2023                                 p->mcast_probes = nla_get_u32(tbp[i]);
2024                                 break;
2025                         case NDTPA_BASE_REACHABLE_TIME:
2026                                 p->base_reachable_time = nla_get_msecs(tbp[i]);
2027                                 break;
2028                         case NDTPA_GC_STALETIME:
2029                                 p->gc_staletime = nla_get_msecs(tbp[i]);
2030                                 break;
2031                         case NDTPA_DELAY_PROBE_TIME:
2032                                 p->delay_probe_time = nla_get_msecs(tbp[i]);
2033                                 break;
2034                         case NDTPA_RETRANS_TIME:
2035                                 p->retrans_time = nla_get_msecs(tbp[i]);
2036                                 break;
2037                         case NDTPA_ANYCAST_DELAY:
2038                                 p->anycast_delay = nla_get_msecs(tbp[i]);
2039                                 break;
2040                         case NDTPA_PROXY_DELAY:
2041                                 p->proxy_delay = nla_get_msecs(tbp[i]);
2042                                 break;
2043                         case NDTPA_LOCKTIME:
2044                                 p->locktime = nla_get_msecs(tbp[i]);
2045                                 break;
2046                         }
2047                 }
2048         }
2049
2050         if (tb[NDTA_THRESH1])
2051                 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2052
2053         if (tb[NDTA_THRESH2])
2054                 tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2055
2056         if (tb[NDTA_THRESH3])
2057                 tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2058
2059         if (tb[NDTA_GC_INTERVAL])
2060                 tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2061
2062         err = 0;
2063
2064 errout_tbl_lock:
2065         write_unlock_bh(&tbl->lock);
2066 errout_locked:
2067         read_unlock(&neigh_tbl_lock);
2068 errout:
2069         return err;
2070 }
2071
2072 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2073 {
2074         struct net *net = sock_net(skb->sk);
2075         int family, tidx, nidx = 0;
2076         int tbl_skip = cb->args[0];
2077         int neigh_skip = cb->args[1];
2078         struct neigh_table *tbl;
2079
2080         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2081
2082         read_lock(&neigh_tbl_lock);
2083         for (tbl = neigh_tables, tidx = 0; tbl; tbl = tbl->next, tidx++) {
2084                 struct neigh_parms *p;
2085
2086                 if (tidx < tbl_skip || (family && tbl->family != family))
2087                         continue;
2088
2089                 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).pid,
2090                                        cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2091                                        NLM_F_MULTI) <= 0)
2092                         break;
2093
2094                 for (nidx = 0, p = tbl->parms.next; p; p = p->next) {
2095                         if (!net_eq(neigh_parms_net(p), net))
2096                                 continue;
2097
2098                         if (nidx < neigh_skip)
2099                                 goto next;
2100
2101                         if (neightbl_fill_param_info(skb, tbl, p,
2102                                                      NETLINK_CB(cb->skb).pid,
2103                                                      cb->nlh->nlmsg_seq,
2104                                                      RTM_NEWNEIGHTBL,
2105                                                      NLM_F_MULTI) <= 0)
2106                                 goto out;
2107                 next:
2108                         nidx++;
2109                 }
2110
2111                 neigh_skip = 0;
2112         }
2113 out:
2114         read_unlock(&neigh_tbl_lock);
2115         cb->args[0] = tidx;
2116         cb->args[1] = nidx;
2117
2118         return skb->len;
2119 }
2120
2121 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2122                            u32 pid, u32 seq, int type, unsigned int flags)
2123 {
2124         unsigned long now = jiffies;
2125         struct nda_cacheinfo ci;
2126         struct nlmsghdr *nlh;
2127         struct ndmsg *ndm;
2128
2129         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2130         if (nlh == NULL)
2131                 return -EMSGSIZE;
2132
2133         ndm = nlmsg_data(nlh);
2134         ndm->ndm_family  = neigh->ops->family;
2135         ndm->ndm_pad1    = 0;
2136         ndm->ndm_pad2    = 0;
2137         ndm->ndm_flags   = neigh->flags;
2138         ndm->ndm_type    = neigh->type;
2139         ndm->ndm_ifindex = neigh->dev->ifindex;
2140
2141         if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
2142                 goto nla_put_failure;
2143
2144         read_lock_bh(&neigh->lock);
2145         ndm->ndm_state   = neigh->nud_state;
2146         if (neigh->nud_state & NUD_VALID) {
2147                 char haddr[MAX_ADDR_LEN];
2148
2149                 neigh_ha_snapshot(haddr, neigh, neigh->dev);
2150                 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2151                         read_unlock_bh(&neigh->lock);
2152                         goto nla_put_failure;
2153                 }
2154         }
2155
2156         ci.ndm_used      = jiffies_to_clock_t(now - neigh->used);
2157         ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2158         ci.ndm_updated   = jiffies_to_clock_t(now - neigh->updated);
2159         ci.ndm_refcnt    = atomic_read(&neigh->refcnt) - 1;
2160         read_unlock_bh(&neigh->lock);
2161
2162         if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
2163             nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
2164                 goto nla_put_failure;
2165
2166         return nlmsg_end(skb, nlh);
2167
2168 nla_put_failure:
2169         nlmsg_cancel(skb, nlh);
2170         return -EMSGSIZE;
2171 }
2172
2173 static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2174                             u32 pid, u32 seq, int type, unsigned int flags,
2175                             struct neigh_table *tbl)
2176 {
2177         struct nlmsghdr *nlh;
2178         struct ndmsg *ndm;
2179
2180         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2181         if (nlh == NULL)
2182                 return -EMSGSIZE;
2183
2184         ndm = nlmsg_data(nlh);
2185         ndm->ndm_family  = tbl->family;
2186         ndm->ndm_pad1    = 0;
2187         ndm->ndm_pad2    = 0;
2188         ndm->ndm_flags   = pn->flags | NTF_PROXY;
2189         ndm->ndm_type    = NDA_DST;
2190         ndm->ndm_ifindex = pn->dev->ifindex;
2191         ndm->ndm_state   = NUD_NONE;
2192
2193         if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
2194                 goto nla_put_failure;
2195
2196         return nlmsg_end(skb, nlh);
2197
2198 nla_put_failure:
2199         nlmsg_cancel(skb, nlh);
2200         return -EMSGSIZE;
2201 }
2202
2203 static void neigh_update_notify(struct neighbour *neigh)
2204 {
2205         call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2206         __neigh_notify(neigh, RTM_NEWNEIGH, 0);
2207 }
2208
2209 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2210                             struct netlink_callback *cb)
2211 {
2212         struct net *net = sock_net(skb->sk);
2213         struct neighbour *n;
2214         int rc, h, s_h = cb->args[1];
2215         int idx, s_idx = idx = cb->args[2];
2216         struct neigh_hash_table *nht;
2217
2218         rcu_read_lock_bh();
2219         nht = rcu_dereference_bh(tbl->nht);
2220
2221         for (h = 0; h < (1 << nht->hash_shift); h++) {
2222                 if (h < s_h)
2223                         continue;
2224                 if (h > s_h)
2225                         s_idx = 0;
2226                 for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2227                      n != NULL;
2228                      n = rcu_dereference_bh(n->next)) {
2229                         if (!net_eq(dev_net(n->dev), net))
2230                                 continue;
2231                         if (idx < s_idx)
2232                                 goto next;
2233                         if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
2234                                             cb->nlh->nlmsg_seq,
2235                                             RTM_NEWNEIGH,
2236                                             NLM_F_MULTI) <= 0) {
2237                                 rc = -1;
2238                                 goto out;
2239                         }
2240 next:
2241                         idx++;
2242                 }
2243         }
2244         rc = skb->len;
2245 out:
2246         rcu_read_unlock_bh();
2247         cb->args[1] = h;
2248         cb->args[2] = idx;
2249         return rc;
2250 }
2251
2252 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2253                              struct netlink_callback *cb)
2254 {
2255         struct pneigh_entry *n;
2256         struct net *net = sock_net(skb->sk);
2257         int rc, h, s_h = cb->args[3];
2258         int idx, s_idx = idx = cb->args[4];
2259
2260         read_lock_bh(&tbl->lock);
2261
2262         for (h = 0; h <= PNEIGH_HASHMASK; h++) {
2263                 if (h < s_h)
2264                         continue;
2265                 if (h > s_h)
2266                         s_idx = 0;
2267                 for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
2268                         if (dev_net(n->dev) != net)
2269                                 continue;
2270                         if (idx < s_idx)
2271                                 goto next;
2272                         if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
2273                                             cb->nlh->nlmsg_seq,
2274                                             RTM_NEWNEIGH,
2275                                             NLM_F_MULTI, tbl) <= 0) {
2276                                 read_unlock_bh(&tbl->lock);
2277                                 rc = -1;
2278                                 goto out;
2279                         }
2280                 next:
2281                         idx++;
2282                 }
2283         }
2284
2285         read_unlock_bh(&tbl->lock);
2286         rc = skb->len;
2287 out:
2288         cb->args[3] = h;
2289         cb->args[4] = idx;
2290         return rc;
2291
2292 }
2293
2294 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2295 {
2296         struct neigh_table *tbl;
2297         int t, family, s_t;
2298         int proxy = 0;
2299         int err = 0;
2300
2301         read_lock(&neigh_tbl_lock);
2302         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2303
2304         /* check for full ndmsg structure presence, family member is
2305          * the same for both structures
2306          */
2307         if (nlmsg_len(cb->nlh) >= sizeof(struct ndmsg) &&
2308             ((struct ndmsg *) nlmsg_data(cb->nlh))->ndm_flags == NTF_PROXY)
2309                 proxy = 1;
2310
2311         s_t = cb->args[0];
2312
2313         for (tbl = neigh_tables, t = 0; tbl && (err >= 0);
2314              tbl = tbl->next, t++) {
2315                 if (t < s_t || (family && tbl->family != family))
2316                         continue;
2317                 if (t > s_t)
2318                         memset(&cb->args[1], 0, sizeof(cb->args) -
2319                                                 sizeof(cb->args[0]));
2320                 if (proxy)
2321                         err = pneigh_dump_table(tbl, skb, cb);
2322                 else
2323                         err = neigh_dump_table(tbl, skb, cb);
2324         }
2325         read_unlock(&neigh_tbl_lock);
2326
2327         cb->args[0] = t;
2328         return skb->len;
2329 }
2330
2331 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2332 {
2333         int chain;
2334         struct neigh_hash_table *nht;
2335
2336         rcu_read_lock_bh();
2337         nht = rcu_dereference_bh(tbl->nht);
2338
2339         read_lock(&tbl->lock); /* avoid resizes */
2340         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2341                 struct neighbour *n;
2342
2343                 for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2344                      n != NULL;
2345                      n = rcu_dereference_bh(n->next))
2346                         cb(n, cookie);
2347         }
2348         read_unlock(&tbl->lock);
2349         rcu_read_unlock_bh();
2350 }
2351 EXPORT_SYMBOL(neigh_for_each);
2352
2353 /* The tbl->lock must be held as a writer and BH disabled. */
2354 void __neigh_for_each_release(struct neigh_table *tbl,
2355                               int (*cb)(struct neighbour *))
2356 {
2357         int chain;
2358         struct neigh_hash_table *nht;
2359
2360         nht = rcu_dereference_protected(tbl->nht,
2361                                         lockdep_is_held(&tbl->lock));
2362         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2363                 struct neighbour *n;
2364                 struct neighbour __rcu **np;
2365
2366                 np = &nht->hash_buckets[chain];
2367                 while ((n = rcu_dereference_protected(*np,
2368                                         lockdep_is_held(&tbl->lock))) != NULL) {
2369                         int release;
2370
2371                         write_lock(&n->lock);
2372                         release = cb(n);
2373                         if (release) {
2374                                 rcu_assign_pointer(*np,
2375                                         rcu_dereference_protected(n->next,
2376                                                 lockdep_is_held(&tbl->lock)));
2377                                 n->dead = 1;
2378                         } else
2379                                 np = &n->next;
2380                         write_unlock(&n->lock);
2381                         if (release)
2382                                 neigh_cleanup_and_release(n);
2383                 }
2384         }
2385 }
2386 EXPORT_SYMBOL(__neigh_for_each_release);
2387
2388 #ifdef CONFIG_PROC_FS
2389
2390 static struct neighbour *neigh_get_first(struct seq_file *seq)
2391 {
2392         struct neigh_seq_state *state = seq->private;
2393         struct net *net = seq_file_net(seq);
2394         struct neigh_hash_table *nht = state->nht;
2395         struct neighbour *n = NULL;
2396         int bucket = state->bucket;
2397
2398         state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2399         for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
2400                 n = rcu_dereference_bh(nht->hash_buckets[bucket]);
2401
2402                 while (n) {
2403                         if (!net_eq(dev_net(n->dev), net))
2404                                 goto next;
2405                         if (state->neigh_sub_iter) {
2406                                 loff_t fakep = 0;
2407                                 void *v;
2408
2409                                 v = state->neigh_sub_iter(state, n, &fakep);
2410                                 if (!v)
2411                                         goto next;
2412                         }
2413                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2414                                 break;
2415                         if (n->nud_state & ~NUD_NOARP)
2416                                 break;
2417 next:
2418                         n = rcu_dereference_bh(n->next);
2419                 }
2420
2421                 if (n)
2422                         break;
2423         }
2424         state->bucket = bucket;
2425
2426         return n;
2427 }
2428
2429 static struct neighbour *neigh_get_next(struct seq_file *seq,
2430                                         struct neighbour *n,
2431                                         loff_t *pos)
2432 {
2433         struct neigh_seq_state *state = seq->private;
2434         struct net *net = seq_file_net(seq);
2435         struct neigh_hash_table *nht = state->nht;
2436
2437         if (state->neigh_sub_iter) {
2438                 void *v = state->neigh_sub_iter(state, n, pos);
2439                 if (v)
2440                         return n;
2441         }
2442         n = rcu_dereference_bh(n->next);
2443
2444         while (1) {
2445                 while (n) {
2446                         if (!net_eq(dev_net(n->dev), net))
2447                                 goto next;
2448                         if (state->neigh_sub_iter) {
2449                                 void *v = state->neigh_sub_iter(state, n, pos);
2450                                 if (v)
2451                                         return n;
2452                                 goto next;
2453                         }
2454                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2455                                 break;
2456
2457                         if (n->nud_state & ~NUD_NOARP)
2458                                 break;
2459 next:
2460                         n = rcu_dereference_bh(n->next);
2461                 }
2462
2463                 if (n)
2464                         break;
2465
2466                 if (++state->bucket >= (1 << nht->hash_shift))
2467                         break;
2468
2469                 n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
2470         }
2471
2472         if (n && pos)
2473                 --(*pos);
2474         return n;
2475 }
2476
2477 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2478 {
2479         struct neighbour *n = neigh_get_first(seq);
2480
2481         if (n) {
2482                 --(*pos);
2483                 while (*pos) {
2484                         n = neigh_get_next(seq, n, pos);
2485                         if (!n)
2486                                 break;
2487                 }
2488         }
2489         return *pos ? NULL : n;
2490 }
2491
2492 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2493 {
2494         struct neigh_seq_state *state = seq->private;
2495         struct net *net = seq_file_net(seq);
2496         struct neigh_table *tbl = state->tbl;
2497         struct pneigh_entry *pn = NULL;
2498         int bucket = state->bucket;
2499
2500         state->flags |= NEIGH_SEQ_IS_PNEIGH;
2501         for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2502                 pn = tbl->phash_buckets[bucket];
2503                 while (pn && !net_eq(pneigh_net(pn), net))
2504                         pn = pn->next;
2505                 if (pn)
2506                         break;
2507         }
2508         state->bucket = bucket;
2509
2510         return pn;
2511 }
2512
2513 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2514                                             struct pneigh_entry *pn,
2515                                             loff_t *pos)
2516 {
2517         struct neigh_seq_state *state = seq->private;
2518         struct net *net = seq_file_net(seq);
2519         struct neigh_table *tbl = state->tbl;
2520
2521         do {
2522                 pn = pn->next;
2523         } while (pn && !net_eq(pneigh_net(pn), net));
2524
2525         while (!pn) {
2526                 if (++state->bucket > PNEIGH_HASHMASK)
2527                         break;
2528                 pn = tbl->phash_buckets[state->bucket];
2529                 while (pn && !net_eq(pneigh_net(pn), net))
2530                         pn = pn->next;
2531                 if (pn)
2532                         break;
2533         }
2534
2535         if (pn && pos)
2536                 --(*pos);
2537
2538         return pn;
2539 }
2540
2541 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
2542 {
2543         struct pneigh_entry *pn = pneigh_get_first(seq);
2544
2545         if (pn) {
2546                 --(*pos);
2547                 while (*pos) {
2548                         pn = pneigh_get_next(seq, pn, pos);
2549                         if (!pn)
2550                                 break;
2551                 }
2552         }
2553         return *pos ? NULL : pn;
2554 }
2555
2556 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2557 {
2558         struct neigh_seq_state *state = seq->private;
2559         void *rc;
2560         loff_t idxpos = *pos;
2561
2562         rc = neigh_get_idx(seq, &idxpos);
2563         if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2564                 rc = pneigh_get_idx(seq, &idxpos);
2565
2566         return rc;
2567 }
2568
2569 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2570         __acquires(rcu_bh)
2571 {
2572         struct neigh_seq_state *state = seq->private;
2573
2574         state->tbl = tbl;
2575         state->bucket = 0;
2576         state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
2577
2578         rcu_read_lock_bh();
2579         state->nht = rcu_dereference_bh(tbl->nht);
2580
2581         return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
2582 }
2583 EXPORT_SYMBOL(neigh_seq_start);
2584
2585 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2586 {
2587         struct neigh_seq_state *state;
2588         void *rc;
2589
2590         if (v == SEQ_START_TOKEN) {
2591                 rc = neigh_get_first(seq);
2592                 goto out;
2593         }
2594
2595         state = seq->private;
2596         if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
2597                 rc = neigh_get_next(seq, v, NULL);
2598                 if (rc)
2599                         goto out;
2600                 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2601                         rc = pneigh_get_first(seq);
2602         } else {
2603                 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
2604                 rc = pneigh_get_next(seq, v, NULL);
2605         }
2606 out:
2607         ++(*pos);
2608         return rc;
2609 }
2610 EXPORT_SYMBOL(neigh_seq_next);
2611
2612 void neigh_seq_stop(struct seq_file *seq, void *v)
2613         __releases(rcu_bh)
2614 {
2615         rcu_read_unlock_bh();
2616 }
2617 EXPORT_SYMBOL(neigh_seq_stop);
2618
2619 /* statistics via seq_file */
2620
2621 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2622 {
2623         struct neigh_table *tbl = seq->private;
2624         int cpu;
2625
2626         if (*pos == 0)
2627                 return SEQ_START_TOKEN;
2628
2629         for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
2630                 if (!cpu_possible(cpu))
2631                         continue;
2632                 *pos = cpu+1;
2633                 return per_cpu_ptr(tbl->stats, cpu);
2634         }
2635         return NULL;
2636 }
2637
2638 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2639 {
2640         struct neigh_table *tbl = seq->private;
2641         int cpu;
2642
2643         for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
2644                 if (!cpu_possible(cpu))
2645                         continue;
2646                 *pos = cpu+1;
2647                 return per_cpu_ptr(tbl->stats, cpu);
2648         }
2649         return NULL;
2650 }
2651
2652 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
2653 {
2654
2655 }
2656
2657 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2658 {
2659         struct neigh_table *tbl = seq->private;
2660         struct neigh_statistics *st = v;
2661
2662         if (v == SEQ_START_TOKEN) {
2663                 seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs unresolved_discards\n");
2664                 return 0;
2665         }
2666
2667         seq_printf(seq, "%08x  %08lx %08lx %08lx  %08lx %08lx  %08lx  "
2668                         "%08lx %08lx  %08lx %08lx %08lx\n",
2669                    atomic_read(&tbl->entries),
2670
2671                    st->allocs,
2672                    st->destroys,
2673                    st->hash_grows,
2674
2675                    st->lookups,
2676                    st->hits,
2677
2678                    st->res_failed,
2679
2680                    st->rcv_probes_mcast,
2681                    st->rcv_probes_ucast,
2682
2683                    st->periodic_gc_runs,
2684                    st->forced_gc_runs,
2685                    st->unres_discards
2686                    );
2687
2688         return 0;
2689 }
2690
2691 static const struct seq_operations neigh_stat_seq_ops = {
2692         .start  = neigh_stat_seq_start,
2693         .next   = neigh_stat_seq_next,
2694         .stop   = neigh_stat_seq_stop,
2695         .show   = neigh_stat_seq_show,
2696 };
2697
2698 static int neigh_stat_seq_open(struct inode *inode, struct file *file)
2699 {
2700         int ret = seq_open(file, &neigh_stat_seq_ops);
2701
2702         if (!ret) {
2703                 struct seq_file *sf = file->private_data;
2704                 sf->private = PDE(inode)->data;
2705         }
2706         return ret;
2707 };
2708
2709 static const struct file_operations neigh_stat_seq_fops = {
2710         .owner   = THIS_MODULE,
2711         .open    = neigh_stat_seq_open,
2712         .read    = seq_read,
2713         .llseek  = seq_lseek,
2714         .release = seq_release,
2715 };
2716
2717 #endif /* CONFIG_PROC_FS */
2718
2719 static inline size_t neigh_nlmsg_size(void)
2720 {
2721         return NLMSG_ALIGN(sizeof(struct ndmsg))
2722                + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2723                + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2724                + nla_total_size(sizeof(struct nda_cacheinfo))
2725                + nla_total_size(4); /* NDA_PROBES */
2726 }
2727
2728 static void __neigh_notify(struct neighbour *n, int type, int flags)
2729 {
2730         struct net *net = dev_net(n->dev);
2731         struct sk_buff *skb;
2732         int err = -ENOBUFS;
2733
2734         skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
2735         if (skb == NULL)
2736                 goto errout;
2737
2738         err = neigh_fill_info(skb, n, 0, 0, type, flags);
2739         if (err < 0) {
2740                 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
2741                 WARN_ON(err == -EMSGSIZE);
2742                 kfree_skb(skb);
2743                 goto errout;
2744         }
2745         rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
2746         return;
2747 errout:
2748         if (err < 0)
2749                 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
2750 }
2751
2752 #ifdef CONFIG_ARPD
2753 void neigh_app_ns(struct neighbour *n)
2754 {
2755         __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
2756 }
2757 EXPORT_SYMBOL(neigh_app_ns);
2758 #endif /* CONFIG_ARPD */
2759
2760 #ifdef CONFIG_SYSCTL
2761
2762 static int proc_unres_qlen(ctl_table *ctl, int write, void __user *buffer,
2763                            size_t *lenp, loff_t *ppos)
2764 {
2765         int size, ret;
2766         ctl_table tmp = *ctl;
2767
2768         tmp.data = &size;
2769         size = DIV_ROUND_UP(*(int *)ctl->data, SKB_TRUESIZE(ETH_FRAME_LEN));
2770         ret = proc_dointvec(&tmp, write, buffer, lenp, ppos);
2771         if (write && !ret)
2772                 *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
2773         return ret;
2774 }
2775
2776 enum {
2777         NEIGH_VAR_MCAST_PROBE,
2778         NEIGH_VAR_UCAST_PROBE,
2779         NEIGH_VAR_APP_PROBE,
2780         NEIGH_VAR_RETRANS_TIME,
2781         NEIGH_VAR_BASE_REACHABLE_TIME,
2782         NEIGH_VAR_DELAY_PROBE_TIME,
2783         NEIGH_VAR_GC_STALETIME,
2784         NEIGH_VAR_QUEUE_LEN,
2785         NEIGH_VAR_QUEUE_LEN_BYTES,
2786         NEIGH_VAR_PROXY_QLEN,
2787         NEIGH_VAR_ANYCAST_DELAY,
2788         NEIGH_VAR_PROXY_DELAY,
2789         NEIGH_VAR_LOCKTIME,
2790         NEIGH_VAR_RETRANS_TIME_MS,
2791         NEIGH_VAR_BASE_REACHABLE_TIME_MS,
2792         NEIGH_VAR_GC_INTERVAL,
2793         NEIGH_VAR_GC_THRESH1,
2794         NEIGH_VAR_GC_THRESH2,
2795         NEIGH_VAR_GC_THRESH3,
2796         NEIGH_VAR_MAX
2797 };
2798
2799 static struct neigh_sysctl_table {
2800         struct ctl_table_header *sysctl_header;
2801         struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
2802 } neigh_sysctl_template __read_mostly = {
2803         .neigh_vars = {
2804                 [NEIGH_VAR_MCAST_PROBE] = {
2805                         .procname       = "mcast_solicit",
2806                         .maxlen         = sizeof(int),
2807                         .mode           = 0644,
2808                         .proc_handler   = proc_dointvec,
2809                 },
2810                 [NEIGH_VAR_UCAST_PROBE] = {
2811                         .procname       = "ucast_solicit",
2812                         .maxlen         = sizeof(int),
2813                         .mode           = 0644,
2814                         .proc_handler   = proc_dointvec,
2815                 },
2816                 [NEIGH_VAR_APP_PROBE] = {
2817                         .procname       = "app_solicit",
2818                         .maxlen         = sizeof(int),
2819                         .mode           = 0644,
2820                         .proc_handler   = proc_dointvec,
2821                 },
2822                 [NEIGH_VAR_RETRANS_TIME] = {
2823                         .procname       = "retrans_time",
2824                         .maxlen         = sizeof(int),
2825                         .mode           = 0644,
2826                         .proc_handler   = proc_dointvec_userhz_jiffies,
2827                 },
2828                 [NEIGH_VAR_BASE_REACHABLE_TIME] = {
2829                         .procname       = "base_reachable_time",
2830                         .maxlen         = sizeof(int),
2831                         .mode           = 0644,
2832                         .proc_handler   = proc_dointvec_jiffies,
2833                 },
2834                 [NEIGH_VAR_DELAY_PROBE_TIME] = {
2835                         .procname       = "delay_first_probe_time",
2836                         .maxlen         = sizeof(int),
2837                         .mode           = 0644,
2838                         .proc_handler   = proc_dointvec_jiffies,
2839                 },
2840                 [NEIGH_VAR_GC_STALETIME] = {
2841                         .procname       = "gc_stale_time",
2842                         .maxlen         = sizeof(int),
2843                         .mode           = 0644,
2844                         .proc_handler   = proc_dointvec_jiffies,
2845                 },
2846                 [NEIGH_VAR_QUEUE_LEN] = {
2847                         .procname       = "unres_qlen",
2848                         .maxlen         = sizeof(int),
2849                         .mode           = 0644,
2850                         .proc_handler   = proc_unres_qlen,
2851                 },
2852                 [NEIGH_VAR_QUEUE_LEN_BYTES] = {
2853                         .procname       = "unres_qlen_bytes",
2854                         .maxlen         = sizeof(int),
2855                         .mode           = 0644,
2856                         .proc_handler   = proc_dointvec,
2857                 },
2858                 [NEIGH_VAR_PROXY_QLEN] = {
2859                         .procname       = "proxy_qlen",
2860                         .maxlen         = sizeof(int),
2861                         .mode           = 0644,
2862                         .proc_handler   = proc_dointvec,
2863                 },
2864                 [NEIGH_VAR_ANYCAST_DELAY] = {
2865                         .procname       = "anycast_delay",
2866                         .maxlen         = sizeof(int),
2867                         .mode           = 0644,
2868                         .proc_handler   = proc_dointvec_userhz_jiffies,
2869                 },
2870                 [NEIGH_VAR_PROXY_DELAY] = {
2871                         .procname       = "proxy_delay",
2872                         .maxlen         = sizeof(int),
2873                         .mode           = 0644,
2874                         .proc_handler   = proc_dointvec_userhz_jiffies,
2875                 },
2876                 [NEIGH_VAR_LOCKTIME] = {
2877                         .procname       = "locktime",
2878                         .maxlen         = sizeof(int),
2879                         .mode           = 0644,
2880                         .proc_handler   = proc_dointvec_userhz_jiffies,
2881                 },
2882                 [NEIGH_VAR_RETRANS_TIME_MS] = {
2883                         .procname       = "retrans_time_ms",
2884                         .maxlen         = sizeof(int),
2885                         .mode           = 0644,
2886                         .proc_handler   = proc_dointvec_ms_jiffies,
2887                 },
2888                 [NEIGH_VAR_BASE_REACHABLE_TIME_MS] = {
2889                         .procname       = "base_reachable_time_ms",
2890                         .maxlen         = sizeof(int),
2891                         .mode           = 0644,
2892                         .proc_handler   = proc_dointvec_ms_jiffies,
2893                 },
2894                 [NEIGH_VAR_GC_INTERVAL] = {
2895                         .procname       = "gc_interval",
2896                         .maxlen         = sizeof(int),
2897                         .mode           = 0644,
2898                         .proc_handler   = proc_dointvec_jiffies,
2899                 },
2900                 [NEIGH_VAR_GC_THRESH1] = {
2901                         .procname       = "gc_thresh1",
2902                         .maxlen         = sizeof(int),
2903                         .mode           = 0644,
2904                         .proc_handler   = proc_dointvec,
2905                 },
2906                 [NEIGH_VAR_GC_THRESH2] = {
2907                         .procname       = "gc_thresh2",
2908                         .maxlen         = sizeof(int),
2909                         .mode           = 0644,
2910                         .proc_handler   = proc_dointvec,
2911                 },
2912                 [NEIGH_VAR_GC_THRESH3] = {
2913                         .procname       = "gc_thresh3",
2914                         .maxlen         = sizeof(int),
2915                         .mode           = 0644,
2916                         .proc_handler   = proc_dointvec,
2917                 },
2918                 {},
2919         },
2920 };
2921
2922 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
2923                           char *p_name, proc_handler *handler)
2924 {
2925         struct neigh_sysctl_table *t;
2926         const char *dev_name_source = NULL;
2927         char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
2928
2929         t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
2930         if (!t)
2931                 goto err;
2932
2933         t->neigh_vars[NEIGH_VAR_MCAST_PROBE].data  = &p->mcast_probes;
2934         t->neigh_vars[NEIGH_VAR_UCAST_PROBE].data  = &p->ucast_probes;
2935         t->neigh_vars[NEIGH_VAR_APP_PROBE].data  = &p->app_probes;
2936         t->neigh_vars[NEIGH_VAR_RETRANS_TIME].data  = &p->retrans_time;
2937         t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].data  = &p->base_reachable_time;
2938         t->neigh_vars[NEIGH_VAR_DELAY_PROBE_TIME].data  = &p->delay_probe_time;
2939         t->neigh_vars[NEIGH_VAR_GC_STALETIME].data  = &p->gc_staletime;
2940         t->neigh_vars[NEIGH_VAR_QUEUE_LEN].data  = &p->queue_len_bytes;
2941         t->neigh_vars[NEIGH_VAR_QUEUE_LEN_BYTES].data  = &p->queue_len_bytes;
2942         t->neigh_vars[NEIGH_VAR_PROXY_QLEN].data  = &p->proxy_qlen;
2943         t->neigh_vars[NEIGH_VAR_ANYCAST_DELAY].data  = &p->anycast_delay;
2944         t->neigh_vars[NEIGH_VAR_PROXY_DELAY].data = &p->proxy_delay;
2945         t->neigh_vars[NEIGH_VAR_LOCKTIME].data = &p->locktime;
2946         t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].data  = &p->retrans_time;
2947         t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].data  = &p->base_reachable_time;
2948
2949         if (dev) {
2950                 dev_name_source = dev->name;
2951                 /* Terminate the table early */
2952                 memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
2953                        sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
2954         } else {
2955                 dev_name_source = "default";
2956                 t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = (int *)(p + 1);
2957                 t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = (int *)(p + 1) + 1;
2958                 t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = (int *)(p + 1) + 2;
2959                 t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = (int *)(p + 1) + 3;
2960         }
2961
2962
2963         if (handler) {
2964                 /* RetransTime */
2965                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
2966                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].extra1 = dev;
2967                 /* ReachableTime */
2968                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
2969                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].extra1 = dev;
2970                 /* RetransTime (in milliseconds)*/
2971                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
2972                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].extra1 = dev;
2973                 /* ReachableTime (in milliseconds) */
2974                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
2975                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].extra1 = dev;
2976         }
2977
2978         snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
2979                 p_name, dev_name_source);
2980         t->sysctl_header =
2981                 register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars);
2982         if (!t->sysctl_header)
2983                 goto free;
2984
2985         p->sysctl_table = t;
2986         return 0;
2987
2988 free:
2989         kfree(t);
2990 err:
2991         return -ENOBUFS;
2992 }
2993 EXPORT_SYMBOL(neigh_sysctl_register);
2994
2995 void neigh_sysctl_unregister(struct neigh_parms *p)
2996 {
2997         if (p->sysctl_table) {
2998                 struct neigh_sysctl_table *t = p->sysctl_table;
2999                 p->sysctl_table = NULL;
3000                 unregister_net_sysctl_table(t->sysctl_header);
3001                 kfree(t);
3002         }
3003 }
3004 EXPORT_SYMBOL(neigh_sysctl_unregister);
3005
3006 #endif  /* CONFIG_SYSCTL */
3007
3008 static int __init neigh_init(void)
3009 {
3010         rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, NULL);
3011         rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, NULL);
3012         rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, NULL);
3013
3014         rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
3015                       NULL);
3016         rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, NULL);
3017
3018         return 0;
3019 }
3020
3021 subsys_initcall(neigh_init);
3022