]> git.karo-electronics.de Git - karo-tx-linux.git/blob - net/core/neighbour.c
Merge branch 'master' into for-davem
[karo-tx-linux.git] / net / core / neighbour.c
1 /*
2  *      Generic address resolution entity
3  *
4  *      Authors:
5  *      Pedro Roque             <roque@di.fc.ul.pt>
6  *      Alexey Kuznetsov        <kuznet@ms2.inr.ac.ru>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  *
13  *      Fixes:
14  *      Vitaly E. Lavrov        releasing NULL neighbor in neigh_add.
15  *      Harald Welte            Add neighbour cache statistics like rtstat
16  */
17
18 #include <linux/slab.h>
19 #include <linux/types.h>
20 #include <linux/kernel.h>
21 #include <linux/module.h>
22 #include <linux/socket.h>
23 #include <linux/netdevice.h>
24 #include <linux/proc_fs.h>
25 #ifdef CONFIG_SYSCTL
26 #include <linux/sysctl.h>
27 #endif
28 #include <linux/times.h>
29 #include <net/net_namespace.h>
30 #include <net/neighbour.h>
31 #include <net/dst.h>
32 #include <net/sock.h>
33 #include <net/netevent.h>
34 #include <net/netlink.h>
35 #include <linux/rtnetlink.h>
36 #include <linux/random.h>
37 #include <linux/string.h>
38 #include <linux/log2.h>
39
40 #define NEIGH_DEBUG 1
41
42 #define NEIGH_PRINTK(x...) printk(x)
43 #define NEIGH_NOPRINTK(x...) do { ; } while(0)
44 #define NEIGH_PRINTK1 NEIGH_NOPRINTK
45 #define NEIGH_PRINTK2 NEIGH_NOPRINTK
46
47 #if NEIGH_DEBUG >= 1
48 #undef NEIGH_PRINTK1
49 #define NEIGH_PRINTK1 NEIGH_PRINTK
50 #endif
51 #if NEIGH_DEBUG >= 2
52 #undef NEIGH_PRINTK2
53 #define NEIGH_PRINTK2 NEIGH_PRINTK
54 #endif
55
56 #define PNEIGH_HASHMASK         0xF
57
58 static void neigh_timer_handler(unsigned long arg);
59 static void __neigh_notify(struct neighbour *n, int type, int flags);
60 static void neigh_update_notify(struct neighbour *neigh);
61 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
62
63 static struct neigh_table *neigh_tables;
64 #ifdef CONFIG_PROC_FS
65 static const struct file_operations neigh_stat_seq_fops;
66 #endif
67
68 /*
69    Neighbour hash table buckets are protected with rwlock tbl->lock.
70
71    - All the scans/updates to hash buckets MUST be made under this lock.
72    - NOTHING clever should be made under this lock: no callbacks
73      to protocol backends, no attempts to send something to network.
74      It will result in deadlocks, if backend/driver wants to use neighbour
75      cache.
76    - If the entry requires some non-trivial actions, increase
77      its reference count and release table lock.
78
79    Neighbour entries are protected:
80    - with reference count.
81    - with rwlock neigh->lock
82
83    Reference count prevents destruction.
84
85    neigh->lock mainly serializes ll address data and its validity state.
86    However, the same lock is used to protect another entry fields:
87     - timer
88     - resolution queue
89
90    Again, nothing clever shall be made under neigh->lock,
91    the most complicated procedure, which we allow is dev->hard_header.
92    It is supposed, that dev->hard_header is simplistic and does
93    not make callbacks to neighbour tables.
94
95    The last lock is neigh_tbl_lock. It is pure SMP lock, protecting
96    list of neighbour tables. This list is used only in process context,
97  */
98
99 static DEFINE_RWLOCK(neigh_tbl_lock);
100
101 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
102 {
103         kfree_skb(skb);
104         return -ENETDOWN;
105 }
106
107 static void neigh_cleanup_and_release(struct neighbour *neigh)
108 {
109         if (neigh->parms->neigh_cleanup)
110                 neigh->parms->neigh_cleanup(neigh);
111
112         __neigh_notify(neigh, RTM_DELNEIGH, 0);
113         neigh_release(neigh);
114 }
115
116 /*
117  * It is random distribution in the interval (1/2)*base...(3/2)*base.
118  * It corresponds to default IPv6 settings and is not overridable,
119  * because it is really reasonable choice.
120  */
121
122 unsigned long neigh_rand_reach_time(unsigned long base)
123 {
124         return base ? (net_random() % base) + (base >> 1) : 0;
125 }
126 EXPORT_SYMBOL(neigh_rand_reach_time);
127
128
129 static int neigh_forced_gc(struct neigh_table *tbl)
130 {
131         int shrunk = 0;
132         int i;
133         struct neigh_hash_table *nht;
134
135         NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
136
137         write_lock_bh(&tbl->lock);
138         nht = rcu_dereference_protected(tbl->nht,
139                                         lockdep_is_held(&tbl->lock));
140         for (i = 0; i < (1 << nht->hash_shift); i++) {
141                 struct neighbour *n;
142                 struct neighbour __rcu **np;
143
144                 np = &nht->hash_buckets[i];
145                 while ((n = rcu_dereference_protected(*np,
146                                         lockdep_is_held(&tbl->lock))) != NULL) {
147                         /* Neighbour record may be discarded if:
148                          * - nobody refers to it.
149                          * - it is not permanent
150                          */
151                         write_lock(&n->lock);
152                         if (atomic_read(&n->refcnt) == 1 &&
153                             !(n->nud_state & NUD_PERMANENT)) {
154                                 rcu_assign_pointer(*np,
155                                         rcu_dereference_protected(n->next,
156                                                   lockdep_is_held(&tbl->lock)));
157                                 n->dead = 1;
158                                 shrunk  = 1;
159                                 write_unlock(&n->lock);
160                                 neigh_cleanup_and_release(n);
161                                 continue;
162                         }
163                         write_unlock(&n->lock);
164                         np = &n->next;
165                 }
166         }
167
168         tbl->last_flush = jiffies;
169
170         write_unlock_bh(&tbl->lock);
171
172         return shrunk;
173 }
174
175 static void neigh_add_timer(struct neighbour *n, unsigned long when)
176 {
177         neigh_hold(n);
178         if (unlikely(mod_timer(&n->timer, when))) {
179                 printk("NEIGH: BUG, double timer add, state is %x\n",
180                        n->nud_state);
181                 dump_stack();
182         }
183 }
184
185 static int neigh_del_timer(struct neighbour *n)
186 {
187         if ((n->nud_state & NUD_IN_TIMER) &&
188             del_timer(&n->timer)) {
189                 neigh_release(n);
190                 return 1;
191         }
192         return 0;
193 }
194
195 static void pneigh_queue_purge(struct sk_buff_head *list)
196 {
197         struct sk_buff *skb;
198
199         while ((skb = skb_dequeue(list)) != NULL) {
200                 dev_put(skb->dev);
201                 kfree_skb(skb);
202         }
203 }
204
205 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
206 {
207         int i;
208         struct neigh_hash_table *nht;
209
210         nht = rcu_dereference_protected(tbl->nht,
211                                         lockdep_is_held(&tbl->lock));
212
213         for (i = 0; i < (1 << nht->hash_shift); i++) {
214                 struct neighbour *n;
215                 struct neighbour __rcu **np = &nht->hash_buckets[i];
216
217                 while ((n = rcu_dereference_protected(*np,
218                                         lockdep_is_held(&tbl->lock))) != NULL) {
219                         if (dev && n->dev != dev) {
220                                 np = &n->next;
221                                 continue;
222                         }
223                         rcu_assign_pointer(*np,
224                                    rcu_dereference_protected(n->next,
225                                                 lockdep_is_held(&tbl->lock)));
226                         write_lock(&n->lock);
227                         neigh_del_timer(n);
228                         n->dead = 1;
229
230                         if (atomic_read(&n->refcnt) != 1) {
231                                 /* The most unpleasant situation.
232                                    We must destroy neighbour entry,
233                                    but someone still uses it.
234
235                                    The destroy will be delayed until
236                                    the last user releases us, but
237                                    we must kill timers etc. and move
238                                    it to safe state.
239                                  */
240                                 skb_queue_purge(&n->arp_queue);
241                                 n->arp_queue_len_bytes = 0;
242                                 n->output = neigh_blackhole;
243                                 if (n->nud_state & NUD_VALID)
244                                         n->nud_state = NUD_NOARP;
245                                 else
246                                         n->nud_state = NUD_NONE;
247                                 NEIGH_PRINTK2("neigh %p is stray.\n", n);
248                         }
249                         write_unlock(&n->lock);
250                         neigh_cleanup_and_release(n);
251                 }
252         }
253 }
254
255 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
256 {
257         write_lock_bh(&tbl->lock);
258         neigh_flush_dev(tbl, dev);
259         write_unlock_bh(&tbl->lock);
260 }
261 EXPORT_SYMBOL(neigh_changeaddr);
262
263 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
264 {
265         write_lock_bh(&tbl->lock);
266         neigh_flush_dev(tbl, dev);
267         pneigh_ifdown(tbl, dev);
268         write_unlock_bh(&tbl->lock);
269
270         del_timer_sync(&tbl->proxy_timer);
271         pneigh_queue_purge(&tbl->proxy_queue);
272         return 0;
273 }
274 EXPORT_SYMBOL(neigh_ifdown);
275
276 static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev)
277 {
278         struct neighbour *n = NULL;
279         unsigned long now = jiffies;
280         int entries;
281
282         entries = atomic_inc_return(&tbl->entries) - 1;
283         if (entries >= tbl->gc_thresh3 ||
284             (entries >= tbl->gc_thresh2 &&
285              time_after(now, tbl->last_flush + 5 * HZ))) {
286                 if (!neigh_forced_gc(tbl) &&
287                     entries >= tbl->gc_thresh3)
288                         goto out_entries;
289         }
290
291         if (tbl->entry_size)
292                 n = kzalloc(tbl->entry_size, GFP_ATOMIC);
293         else {
294                 int sz = sizeof(*n) + tbl->key_len;
295
296                 sz = ALIGN(sz, NEIGH_PRIV_ALIGN);
297                 sz += dev->neigh_priv_len;
298                 n = kzalloc(sz, GFP_ATOMIC);
299         }
300         if (!n)
301                 goto out_entries;
302
303         skb_queue_head_init(&n->arp_queue);
304         rwlock_init(&n->lock);
305         seqlock_init(&n->ha_lock);
306         n->updated        = n->used = now;
307         n->nud_state      = NUD_NONE;
308         n->output         = neigh_blackhole;
309         seqlock_init(&n->hh.hh_lock);
310         n->parms          = neigh_parms_clone(&tbl->parms);
311         setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
312
313         NEIGH_CACHE_STAT_INC(tbl, allocs);
314         n->tbl            = tbl;
315         atomic_set(&n->refcnt, 1);
316         n->dead           = 1;
317 out:
318         return n;
319
320 out_entries:
321         atomic_dec(&tbl->entries);
322         goto out;
323 }
324
325 static void neigh_get_hash_rnd(u32 *x)
326 {
327         get_random_bytes(x, sizeof(*x));
328         *x |= 1;
329 }
330
331 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
332 {
333         size_t size = (1 << shift) * sizeof(struct neighbour *);
334         struct neigh_hash_table *ret;
335         struct neighbour __rcu **buckets;
336         int i;
337
338         ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
339         if (!ret)
340                 return NULL;
341         if (size <= PAGE_SIZE)
342                 buckets = kzalloc(size, GFP_ATOMIC);
343         else
344                 buckets = (struct neighbour __rcu **)
345                           __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
346                                            get_order(size));
347         if (!buckets) {
348                 kfree(ret);
349                 return NULL;
350         }
351         ret->hash_buckets = buckets;
352         ret->hash_shift = shift;
353         for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
354                 neigh_get_hash_rnd(&ret->hash_rnd[i]);
355         return ret;
356 }
357
358 static void neigh_hash_free_rcu(struct rcu_head *head)
359 {
360         struct neigh_hash_table *nht = container_of(head,
361                                                     struct neigh_hash_table,
362                                                     rcu);
363         size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
364         struct neighbour __rcu **buckets = nht->hash_buckets;
365
366         if (size <= PAGE_SIZE)
367                 kfree(buckets);
368         else
369                 free_pages((unsigned long)buckets, get_order(size));
370         kfree(nht);
371 }
372
373 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
374                                                 unsigned long new_shift)
375 {
376         unsigned int i, hash;
377         struct neigh_hash_table *new_nht, *old_nht;
378
379         NEIGH_CACHE_STAT_INC(tbl, hash_grows);
380
381         old_nht = rcu_dereference_protected(tbl->nht,
382                                             lockdep_is_held(&tbl->lock));
383         new_nht = neigh_hash_alloc(new_shift);
384         if (!new_nht)
385                 return old_nht;
386
387         for (i = 0; i < (1 << old_nht->hash_shift); i++) {
388                 struct neighbour *n, *next;
389
390                 for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
391                                                    lockdep_is_held(&tbl->lock));
392                      n != NULL;
393                      n = next) {
394                         hash = tbl->hash(n->primary_key, n->dev,
395                                          new_nht->hash_rnd);
396
397                         hash >>= (32 - new_nht->hash_shift);
398                         next = rcu_dereference_protected(n->next,
399                                                 lockdep_is_held(&tbl->lock));
400
401                         rcu_assign_pointer(n->next,
402                                            rcu_dereference_protected(
403                                                 new_nht->hash_buckets[hash],
404                                                 lockdep_is_held(&tbl->lock)));
405                         rcu_assign_pointer(new_nht->hash_buckets[hash], n);
406                 }
407         }
408
409         rcu_assign_pointer(tbl->nht, new_nht);
410         call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
411         return new_nht;
412 }
413
414 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
415                                struct net_device *dev)
416 {
417         struct neighbour *n;
418         int key_len = tbl->key_len;
419         u32 hash_val;
420         struct neigh_hash_table *nht;
421
422         NEIGH_CACHE_STAT_INC(tbl, lookups);
423
424         rcu_read_lock_bh();
425         nht = rcu_dereference_bh(tbl->nht);
426         hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
427
428         for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
429              n != NULL;
430              n = rcu_dereference_bh(n->next)) {
431                 if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
432                         if (!atomic_inc_not_zero(&n->refcnt))
433                                 n = NULL;
434                         NEIGH_CACHE_STAT_INC(tbl, hits);
435                         break;
436                 }
437         }
438
439         rcu_read_unlock_bh();
440         return n;
441 }
442 EXPORT_SYMBOL(neigh_lookup);
443
444 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
445                                      const void *pkey)
446 {
447         struct neighbour *n;
448         int key_len = tbl->key_len;
449         u32 hash_val;
450         struct neigh_hash_table *nht;
451
452         NEIGH_CACHE_STAT_INC(tbl, lookups);
453
454         rcu_read_lock_bh();
455         nht = rcu_dereference_bh(tbl->nht);
456         hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
457
458         for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
459              n != NULL;
460              n = rcu_dereference_bh(n->next)) {
461                 if (!memcmp(n->primary_key, pkey, key_len) &&
462                     net_eq(dev_net(n->dev), net)) {
463                         if (!atomic_inc_not_zero(&n->refcnt))
464                                 n = NULL;
465                         NEIGH_CACHE_STAT_INC(tbl, hits);
466                         break;
467                 }
468         }
469
470         rcu_read_unlock_bh();
471         return n;
472 }
473 EXPORT_SYMBOL(neigh_lookup_nodev);
474
475 struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
476                                struct net_device *dev)
477 {
478         u32 hash_val;
479         int key_len = tbl->key_len;
480         int error;
481         struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
482         struct neigh_hash_table *nht;
483
484         if (!n) {
485                 rc = ERR_PTR(-ENOBUFS);
486                 goto out;
487         }
488
489         memcpy(n->primary_key, pkey, key_len);
490         n->dev = dev;
491         dev_hold(dev);
492
493         /* Protocol specific setup. */
494         if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
495                 rc = ERR_PTR(error);
496                 goto out_neigh_release;
497         }
498
499         if (dev->netdev_ops->ndo_neigh_construct) {
500                 error = dev->netdev_ops->ndo_neigh_construct(n);
501                 if (error < 0) {
502                         rc = ERR_PTR(error);
503                         goto out_neigh_release;
504                 }
505         }
506
507         /* Device specific setup. */
508         if (n->parms->neigh_setup &&
509             (error = n->parms->neigh_setup(n)) < 0) {
510                 rc = ERR_PTR(error);
511                 goto out_neigh_release;
512         }
513
514         n->confirmed = jiffies - (n->parms->base_reachable_time << 1);
515
516         write_lock_bh(&tbl->lock);
517         nht = rcu_dereference_protected(tbl->nht,
518                                         lockdep_is_held(&tbl->lock));
519
520         if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
521                 nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
522
523         hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
524
525         if (n->parms->dead) {
526                 rc = ERR_PTR(-EINVAL);
527                 goto out_tbl_unlock;
528         }
529
530         for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
531                                             lockdep_is_held(&tbl->lock));
532              n1 != NULL;
533              n1 = rcu_dereference_protected(n1->next,
534                         lockdep_is_held(&tbl->lock))) {
535                 if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
536                         neigh_hold(n1);
537                         rc = n1;
538                         goto out_tbl_unlock;
539                 }
540         }
541
542         n->dead = 0;
543         neigh_hold(n);
544         rcu_assign_pointer(n->next,
545                            rcu_dereference_protected(nht->hash_buckets[hash_val],
546                                                      lockdep_is_held(&tbl->lock)));
547         rcu_assign_pointer(nht->hash_buckets[hash_val], n);
548         write_unlock_bh(&tbl->lock);
549         NEIGH_PRINTK2("neigh %p is created.\n", n);
550         rc = n;
551 out:
552         return rc;
553 out_tbl_unlock:
554         write_unlock_bh(&tbl->lock);
555 out_neigh_release:
556         neigh_release(n);
557         goto out;
558 }
559 EXPORT_SYMBOL(neigh_create);
560
561 static u32 pneigh_hash(const void *pkey, int key_len)
562 {
563         u32 hash_val = *(u32 *)(pkey + key_len - 4);
564         hash_val ^= (hash_val >> 16);
565         hash_val ^= hash_val >> 8;
566         hash_val ^= hash_val >> 4;
567         hash_val &= PNEIGH_HASHMASK;
568         return hash_val;
569 }
570
571 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
572                                               struct net *net,
573                                               const void *pkey,
574                                               int key_len,
575                                               struct net_device *dev)
576 {
577         while (n) {
578                 if (!memcmp(n->key, pkey, key_len) &&
579                     net_eq(pneigh_net(n), net) &&
580                     (n->dev == dev || !n->dev))
581                         return n;
582                 n = n->next;
583         }
584         return NULL;
585 }
586
587 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
588                 struct net *net, const void *pkey, struct net_device *dev)
589 {
590         int key_len = tbl->key_len;
591         u32 hash_val = pneigh_hash(pkey, key_len);
592
593         return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
594                                  net, pkey, key_len, dev);
595 }
596 EXPORT_SYMBOL_GPL(__pneigh_lookup);
597
598 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
599                                     struct net *net, const void *pkey,
600                                     struct net_device *dev, int creat)
601 {
602         struct pneigh_entry *n;
603         int key_len = tbl->key_len;
604         u32 hash_val = pneigh_hash(pkey, key_len);
605
606         read_lock_bh(&tbl->lock);
607         n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
608                               net, pkey, key_len, dev);
609         read_unlock_bh(&tbl->lock);
610
611         if (n || !creat)
612                 goto out;
613
614         ASSERT_RTNL();
615
616         n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
617         if (!n)
618                 goto out;
619
620         write_pnet(&n->net, hold_net(net));
621         memcpy(n->key, pkey, key_len);
622         n->dev = dev;
623         if (dev)
624                 dev_hold(dev);
625
626         if (tbl->pconstructor && tbl->pconstructor(n)) {
627                 if (dev)
628                         dev_put(dev);
629                 release_net(net);
630                 kfree(n);
631                 n = NULL;
632                 goto out;
633         }
634
635         write_lock_bh(&tbl->lock);
636         n->next = tbl->phash_buckets[hash_val];
637         tbl->phash_buckets[hash_val] = n;
638         write_unlock_bh(&tbl->lock);
639 out:
640         return n;
641 }
642 EXPORT_SYMBOL(pneigh_lookup);
643
644
645 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
646                   struct net_device *dev)
647 {
648         struct pneigh_entry *n, **np;
649         int key_len = tbl->key_len;
650         u32 hash_val = pneigh_hash(pkey, key_len);
651
652         write_lock_bh(&tbl->lock);
653         for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
654              np = &n->next) {
655                 if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
656                     net_eq(pneigh_net(n), net)) {
657                         *np = n->next;
658                         write_unlock_bh(&tbl->lock);
659                         if (tbl->pdestructor)
660                                 tbl->pdestructor(n);
661                         if (n->dev)
662                                 dev_put(n->dev);
663                         release_net(pneigh_net(n));
664                         kfree(n);
665                         return 0;
666                 }
667         }
668         write_unlock_bh(&tbl->lock);
669         return -ENOENT;
670 }
671
672 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
673 {
674         struct pneigh_entry *n, **np;
675         u32 h;
676
677         for (h = 0; h <= PNEIGH_HASHMASK; h++) {
678                 np = &tbl->phash_buckets[h];
679                 while ((n = *np) != NULL) {
680                         if (!dev || n->dev == dev) {
681                                 *np = n->next;
682                                 if (tbl->pdestructor)
683                                         tbl->pdestructor(n);
684                                 if (n->dev)
685                                         dev_put(n->dev);
686                                 release_net(pneigh_net(n));
687                                 kfree(n);
688                                 continue;
689                         }
690                         np = &n->next;
691                 }
692         }
693         return -ENOENT;
694 }
695
696 static void neigh_parms_destroy(struct neigh_parms *parms);
697
698 static inline void neigh_parms_put(struct neigh_parms *parms)
699 {
700         if (atomic_dec_and_test(&parms->refcnt))
701                 neigh_parms_destroy(parms);
702 }
703
704 /*
705  *      neighbour must already be out of the table;
706  *
707  */
708 void neigh_destroy(struct neighbour *neigh)
709 {
710         struct net_device *dev = neigh->dev;
711
712         NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
713
714         if (!neigh->dead) {
715                 printk(KERN_WARNING
716                        "Destroying alive neighbour %p\n", neigh);
717                 dump_stack();
718                 return;
719         }
720
721         if (neigh_del_timer(neigh))
722                 printk(KERN_WARNING "Impossible event.\n");
723
724         skb_queue_purge(&neigh->arp_queue);
725         neigh->arp_queue_len_bytes = 0;
726
727         if (dev->netdev_ops->ndo_neigh_destroy)
728                 dev->netdev_ops->ndo_neigh_destroy(neigh);
729
730         dev_put(dev);
731         neigh_parms_put(neigh->parms);
732
733         NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh);
734
735         atomic_dec(&neigh->tbl->entries);
736         kfree_rcu(neigh, rcu);
737 }
738 EXPORT_SYMBOL(neigh_destroy);
739
740 /* Neighbour state is suspicious;
741    disable fast path.
742
743    Called with write_locked neigh.
744  */
745 static void neigh_suspect(struct neighbour *neigh)
746 {
747         NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
748
749         neigh->output = neigh->ops->output;
750 }
751
752 /* Neighbour state is OK;
753    enable fast path.
754
755    Called with write_locked neigh.
756  */
757 static void neigh_connect(struct neighbour *neigh)
758 {
759         NEIGH_PRINTK2("neigh %p is connected.\n", neigh);
760
761         neigh->output = neigh->ops->connected_output;
762 }
763
764 static void neigh_periodic_work(struct work_struct *work)
765 {
766         struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
767         struct neighbour *n;
768         struct neighbour __rcu **np;
769         unsigned int i;
770         struct neigh_hash_table *nht;
771
772         NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
773
774         write_lock_bh(&tbl->lock);
775         nht = rcu_dereference_protected(tbl->nht,
776                                         lockdep_is_held(&tbl->lock));
777
778         /*
779          *      periodically recompute ReachableTime from random function
780          */
781
782         if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
783                 struct neigh_parms *p;
784                 tbl->last_rand = jiffies;
785                 for (p = &tbl->parms; p; p = p->next)
786                         p->reachable_time =
787                                 neigh_rand_reach_time(p->base_reachable_time);
788         }
789
790         for (i = 0 ; i < (1 << nht->hash_shift); i++) {
791                 np = &nht->hash_buckets[i];
792
793                 while ((n = rcu_dereference_protected(*np,
794                                 lockdep_is_held(&tbl->lock))) != NULL) {
795                         unsigned int state;
796
797                         write_lock(&n->lock);
798
799                         state = n->nud_state;
800                         if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
801                                 write_unlock(&n->lock);
802                                 goto next_elt;
803                         }
804
805                         if (time_before(n->used, n->confirmed))
806                                 n->used = n->confirmed;
807
808                         if (atomic_read(&n->refcnt) == 1 &&
809                             (state == NUD_FAILED ||
810                              time_after(jiffies, n->used + n->parms->gc_staletime))) {
811                                 *np = n->next;
812                                 n->dead = 1;
813                                 write_unlock(&n->lock);
814                                 neigh_cleanup_and_release(n);
815                                 continue;
816                         }
817                         write_unlock(&n->lock);
818
819 next_elt:
820                         np = &n->next;
821                 }
822                 /*
823                  * It's fine to release lock here, even if hash table
824                  * grows while we are preempted.
825                  */
826                 write_unlock_bh(&tbl->lock);
827                 cond_resched();
828                 write_lock_bh(&tbl->lock);
829                 nht = rcu_dereference_protected(tbl->nht,
830                                                 lockdep_is_held(&tbl->lock));
831         }
832         /* Cycle through all hash buckets every base_reachable_time/2 ticks.
833          * ARP entry timeouts range from 1/2 base_reachable_time to 3/2
834          * base_reachable_time.
835          */
836         schedule_delayed_work(&tbl->gc_work,
837                               tbl->parms.base_reachable_time >> 1);
838         write_unlock_bh(&tbl->lock);
839 }
840
841 static __inline__ int neigh_max_probes(struct neighbour *n)
842 {
843         struct neigh_parms *p = n->parms;
844         return (n->nud_state & NUD_PROBE) ?
845                 p->ucast_probes :
846                 p->ucast_probes + p->app_probes + p->mcast_probes;
847 }
848
849 static void neigh_invalidate(struct neighbour *neigh)
850         __releases(neigh->lock)
851         __acquires(neigh->lock)
852 {
853         struct sk_buff *skb;
854
855         NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
856         NEIGH_PRINTK2("neigh %p is failed.\n", neigh);
857         neigh->updated = jiffies;
858
859         /* It is very thin place. report_unreachable is very complicated
860            routine. Particularly, it can hit the same neighbour entry!
861
862            So that, we try to be accurate and avoid dead loop. --ANK
863          */
864         while (neigh->nud_state == NUD_FAILED &&
865                (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
866                 write_unlock(&neigh->lock);
867                 neigh->ops->error_report(neigh, skb);
868                 write_lock(&neigh->lock);
869         }
870         skb_queue_purge(&neigh->arp_queue);
871         neigh->arp_queue_len_bytes = 0;
872 }
873
874 static void neigh_probe(struct neighbour *neigh)
875         __releases(neigh->lock)
876 {
877         struct sk_buff *skb = skb_peek(&neigh->arp_queue);
878         /* keep skb alive even if arp_queue overflows */
879         if (skb)
880                 skb = skb_copy(skb, GFP_ATOMIC);
881         write_unlock(&neigh->lock);
882         neigh->ops->solicit(neigh, skb);
883         atomic_inc(&neigh->probes);
884         kfree_skb(skb);
885 }
886
887 /* Called when a timer expires for a neighbour entry. */
888
889 static void neigh_timer_handler(unsigned long arg)
890 {
891         unsigned long now, next;
892         struct neighbour *neigh = (struct neighbour *)arg;
893         unsigned state;
894         int notify = 0;
895
896         write_lock(&neigh->lock);
897
898         state = neigh->nud_state;
899         now = jiffies;
900         next = now + HZ;
901
902         if (!(state & NUD_IN_TIMER))
903                 goto out;
904
905         if (state & NUD_REACHABLE) {
906                 if (time_before_eq(now,
907                                    neigh->confirmed + neigh->parms->reachable_time)) {
908                         NEIGH_PRINTK2("neigh %p is still alive.\n", neigh);
909                         next = neigh->confirmed + neigh->parms->reachable_time;
910                 } else if (time_before_eq(now,
911                                           neigh->used + neigh->parms->delay_probe_time)) {
912                         NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
913                         neigh->nud_state = NUD_DELAY;
914                         neigh->updated = jiffies;
915                         neigh_suspect(neigh);
916                         next = now + neigh->parms->delay_probe_time;
917                 } else {
918                         NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
919                         neigh->nud_state = NUD_STALE;
920                         neigh->updated = jiffies;
921                         neigh_suspect(neigh);
922                         notify = 1;
923                 }
924         } else if (state & NUD_DELAY) {
925                 if (time_before_eq(now,
926                                    neigh->confirmed + neigh->parms->delay_probe_time)) {
927                         NEIGH_PRINTK2("neigh %p is now reachable.\n", neigh);
928                         neigh->nud_state = NUD_REACHABLE;
929                         neigh->updated = jiffies;
930                         neigh_connect(neigh);
931                         notify = 1;
932                         next = neigh->confirmed + neigh->parms->reachable_time;
933                 } else {
934                         NEIGH_PRINTK2("neigh %p is probed.\n", neigh);
935                         neigh->nud_state = NUD_PROBE;
936                         neigh->updated = jiffies;
937                         atomic_set(&neigh->probes, 0);
938                         next = now + neigh->parms->retrans_time;
939                 }
940         } else {
941                 /* NUD_PROBE|NUD_INCOMPLETE */
942                 next = now + neigh->parms->retrans_time;
943         }
944
945         if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
946             atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
947                 neigh->nud_state = NUD_FAILED;
948                 notify = 1;
949                 neigh_invalidate(neigh);
950         }
951
952         if (neigh->nud_state & NUD_IN_TIMER) {
953                 if (time_before(next, jiffies + HZ/2))
954                         next = jiffies + HZ/2;
955                 if (!mod_timer(&neigh->timer, next))
956                         neigh_hold(neigh);
957         }
958         if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
959                 neigh_probe(neigh);
960         } else {
961 out:
962                 write_unlock(&neigh->lock);
963         }
964
965         if (notify)
966                 neigh_update_notify(neigh);
967
968         neigh_release(neigh);
969 }
970
971 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
972 {
973         int rc;
974         bool immediate_probe = false;
975
976         write_lock_bh(&neigh->lock);
977
978         rc = 0;
979         if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
980                 goto out_unlock_bh;
981
982         if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
983                 if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
984                         unsigned long next, now = jiffies;
985
986                         atomic_set(&neigh->probes, neigh->parms->ucast_probes);
987                         neigh->nud_state     = NUD_INCOMPLETE;
988                         neigh->updated = now;
989                         next = now + max(neigh->parms->retrans_time, HZ/2);
990                         neigh_add_timer(neigh, next);
991                         immediate_probe = true;
992                 } else {
993                         neigh->nud_state = NUD_FAILED;
994                         neigh->updated = jiffies;
995                         write_unlock_bh(&neigh->lock);
996
997                         kfree_skb(skb);
998                         return 1;
999                 }
1000         } else if (neigh->nud_state & NUD_STALE) {
1001                 NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
1002                 neigh->nud_state = NUD_DELAY;
1003                 neigh->updated = jiffies;
1004                 neigh_add_timer(neigh,
1005                                 jiffies + neigh->parms->delay_probe_time);
1006         }
1007
1008         if (neigh->nud_state == NUD_INCOMPLETE) {
1009                 if (skb) {
1010                         while (neigh->arp_queue_len_bytes + skb->truesize >
1011                                neigh->parms->queue_len_bytes) {
1012                                 struct sk_buff *buff;
1013
1014                                 buff = __skb_dequeue(&neigh->arp_queue);
1015                                 if (!buff)
1016                                         break;
1017                                 neigh->arp_queue_len_bytes -= buff->truesize;
1018                                 kfree_skb(buff);
1019                                 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1020                         }
1021                         skb_dst_force(skb);
1022                         __skb_queue_tail(&neigh->arp_queue, skb);
1023                         neigh->arp_queue_len_bytes += skb->truesize;
1024                 }
1025                 rc = 1;
1026         }
1027 out_unlock_bh:
1028         if (immediate_probe)
1029                 neigh_probe(neigh);
1030         else
1031                 write_unlock(&neigh->lock);
1032         local_bh_enable();
1033         return rc;
1034 }
1035 EXPORT_SYMBOL(__neigh_event_send);
1036
1037 static void neigh_update_hhs(struct neighbour *neigh)
1038 {
1039         struct hh_cache *hh;
1040         void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1041                 = NULL;
1042
1043         if (neigh->dev->header_ops)
1044                 update = neigh->dev->header_ops->cache_update;
1045
1046         if (update) {
1047                 hh = &neigh->hh;
1048                 if (hh->hh_len) {
1049                         write_seqlock_bh(&hh->hh_lock);
1050                         update(hh, neigh->dev, neigh->ha);
1051                         write_sequnlock_bh(&hh->hh_lock);
1052                 }
1053         }
1054 }
1055
1056
1057
1058 /* Generic update routine.
1059    -- lladdr is new lladdr or NULL, if it is not supplied.
1060    -- new    is new state.
1061    -- flags
1062         NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1063                                 if it is different.
1064         NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1065                                 lladdr instead of overriding it
1066                                 if it is different.
1067                                 It also allows to retain current state
1068                                 if lladdr is unchanged.
1069         NEIGH_UPDATE_F_ADMIN    means that the change is administrative.
1070
1071         NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1072                                 NTF_ROUTER flag.
1073         NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
1074                                 a router.
1075
1076    Caller MUST hold reference count on the entry.
1077  */
1078
1079 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1080                  u32 flags)
1081 {
1082         u8 old;
1083         int err;
1084         int notify = 0;
1085         struct net_device *dev;
1086         int update_isrouter = 0;
1087
1088         write_lock_bh(&neigh->lock);
1089
1090         dev    = neigh->dev;
1091         old    = neigh->nud_state;
1092         err    = -EPERM;
1093
1094         if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1095             (old & (NUD_NOARP | NUD_PERMANENT)))
1096                 goto out;
1097
1098         if (!(new & NUD_VALID)) {
1099                 neigh_del_timer(neigh);
1100                 if (old & NUD_CONNECTED)
1101                         neigh_suspect(neigh);
1102                 neigh->nud_state = new;
1103                 err = 0;
1104                 notify = old & NUD_VALID;
1105                 if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1106                     (new & NUD_FAILED)) {
1107                         neigh_invalidate(neigh);
1108                         notify = 1;
1109                 }
1110                 goto out;
1111         }
1112
1113         /* Compare new lladdr with cached one */
1114         if (!dev->addr_len) {
1115                 /* First case: device needs no address. */
1116                 lladdr = neigh->ha;
1117         } else if (lladdr) {
1118                 /* The second case: if something is already cached
1119                    and a new address is proposed:
1120                    - compare new & old
1121                    - if they are different, check override flag
1122                  */
1123                 if ((old & NUD_VALID) &&
1124                     !memcmp(lladdr, neigh->ha, dev->addr_len))
1125                         lladdr = neigh->ha;
1126         } else {
1127                 /* No address is supplied; if we know something,
1128                    use it, otherwise discard the request.
1129                  */
1130                 err = -EINVAL;
1131                 if (!(old & NUD_VALID))
1132                         goto out;
1133                 lladdr = neigh->ha;
1134         }
1135
1136         if (new & NUD_CONNECTED)
1137                 neigh->confirmed = jiffies;
1138         neigh->updated = jiffies;
1139
1140         /* If entry was valid and address is not changed,
1141            do not change entry state, if new one is STALE.
1142          */
1143         err = 0;
1144         update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1145         if (old & NUD_VALID) {
1146                 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1147                         update_isrouter = 0;
1148                         if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1149                             (old & NUD_CONNECTED)) {
1150                                 lladdr = neigh->ha;
1151                                 new = NUD_STALE;
1152                         } else
1153                                 goto out;
1154                 } else {
1155                         if (lladdr == neigh->ha && new == NUD_STALE &&
1156                             ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) ||
1157                              (old & NUD_CONNECTED))
1158                             )
1159                                 new = old;
1160                 }
1161         }
1162
1163         if (new != old) {
1164                 neigh_del_timer(neigh);
1165                 if (new & NUD_IN_TIMER)
1166                         neigh_add_timer(neigh, (jiffies +
1167                                                 ((new & NUD_REACHABLE) ?
1168                                                  neigh->parms->reachable_time :
1169                                                  0)));
1170                 neigh->nud_state = new;
1171         }
1172
1173         if (lladdr != neigh->ha) {
1174                 write_seqlock(&neigh->ha_lock);
1175                 memcpy(&neigh->ha, lladdr, dev->addr_len);
1176                 write_sequnlock(&neigh->ha_lock);
1177                 neigh_update_hhs(neigh);
1178                 if (!(new & NUD_CONNECTED))
1179                         neigh->confirmed = jiffies -
1180                                       (neigh->parms->base_reachable_time << 1);
1181                 notify = 1;
1182         }
1183         if (new == old)
1184                 goto out;
1185         if (new & NUD_CONNECTED)
1186                 neigh_connect(neigh);
1187         else
1188                 neigh_suspect(neigh);
1189         if (!(old & NUD_VALID)) {
1190                 struct sk_buff *skb;
1191
1192                 /* Again: avoid dead loop if something went wrong */
1193
1194                 while (neigh->nud_state & NUD_VALID &&
1195                        (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1196                         struct dst_entry *dst = skb_dst(skb);
1197                         struct neighbour *n2, *n1 = neigh;
1198                         write_unlock_bh(&neigh->lock);
1199
1200                         rcu_read_lock();
1201                         /* On shaper/eql skb->dst->neighbour != neigh :( */
1202                         if (dst && (n2 = dst_get_neighbour_noref(dst)) != NULL)
1203                                 n1 = n2;
1204                         n1->output(n1, skb);
1205                         rcu_read_unlock();
1206
1207                         write_lock_bh(&neigh->lock);
1208                 }
1209                 skb_queue_purge(&neigh->arp_queue);
1210                 neigh->arp_queue_len_bytes = 0;
1211         }
1212 out:
1213         if (update_isrouter) {
1214                 neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
1215                         (neigh->flags | NTF_ROUTER) :
1216                         (neigh->flags & ~NTF_ROUTER);
1217         }
1218         write_unlock_bh(&neigh->lock);
1219
1220         if (notify)
1221                 neigh_update_notify(neigh);
1222
1223         return err;
1224 }
1225 EXPORT_SYMBOL(neigh_update);
1226
1227 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1228                                  u8 *lladdr, void *saddr,
1229                                  struct net_device *dev)
1230 {
1231         struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1232                                                  lladdr || !dev->addr_len);
1233         if (neigh)
1234                 neigh_update(neigh, lladdr, NUD_STALE,
1235                              NEIGH_UPDATE_F_OVERRIDE);
1236         return neigh;
1237 }
1238 EXPORT_SYMBOL(neigh_event_ns);
1239
1240 /* called with read_lock_bh(&n->lock); */
1241 static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst)
1242 {
1243         struct net_device *dev = dst->dev;
1244         __be16 prot = dst->ops->protocol;
1245         struct hh_cache *hh = &n->hh;
1246
1247         write_lock_bh(&n->lock);
1248
1249         /* Only one thread can come in here and initialize the
1250          * hh_cache entry.
1251          */
1252         if (!hh->hh_len)
1253                 dev->header_ops->cache(n, hh, prot);
1254
1255         write_unlock_bh(&n->lock);
1256 }
1257
1258 /* This function can be used in contexts, where only old dev_queue_xmit
1259  * worked, f.e. if you want to override normal output path (eql, shaper),
1260  * but resolution is not made yet.
1261  */
1262
1263 int neigh_compat_output(struct neighbour *neigh, struct sk_buff *skb)
1264 {
1265         struct net_device *dev = skb->dev;
1266
1267         __skb_pull(skb, skb_network_offset(skb));
1268
1269         if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
1270                             skb->len) < 0 &&
1271             dev->header_ops->rebuild(skb))
1272                 return 0;
1273
1274         return dev_queue_xmit(skb);
1275 }
1276 EXPORT_SYMBOL(neigh_compat_output);
1277
1278 /* Slow and careful. */
1279
1280 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1281 {
1282         struct dst_entry *dst = skb_dst(skb);
1283         int rc = 0;
1284
1285         if (!dst)
1286                 goto discard;
1287
1288         __skb_pull(skb, skb_network_offset(skb));
1289
1290         if (!neigh_event_send(neigh, skb)) {
1291                 int err;
1292                 struct net_device *dev = neigh->dev;
1293                 unsigned int seq;
1294
1295                 if (dev->header_ops->cache && !neigh->hh.hh_len)
1296                         neigh_hh_init(neigh, dst);
1297
1298                 do {
1299                         seq = read_seqbegin(&neigh->ha_lock);
1300                         err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1301                                               neigh->ha, NULL, skb->len);
1302                 } while (read_seqretry(&neigh->ha_lock, seq));
1303
1304                 if (err >= 0)
1305                         rc = dev_queue_xmit(skb);
1306                 else
1307                         goto out_kfree_skb;
1308         }
1309 out:
1310         return rc;
1311 discard:
1312         NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n",
1313                       dst, neigh);
1314 out_kfree_skb:
1315         rc = -EINVAL;
1316         kfree_skb(skb);
1317         goto out;
1318 }
1319 EXPORT_SYMBOL(neigh_resolve_output);
1320
1321 /* As fast as possible without hh cache */
1322
1323 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1324 {
1325         struct net_device *dev = neigh->dev;
1326         unsigned int seq;
1327         int err;
1328
1329         __skb_pull(skb, skb_network_offset(skb));
1330
1331         do {
1332                 seq = read_seqbegin(&neigh->ha_lock);
1333                 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1334                                       neigh->ha, NULL, skb->len);
1335         } while (read_seqretry(&neigh->ha_lock, seq));
1336
1337         if (err >= 0)
1338                 err = dev_queue_xmit(skb);
1339         else {
1340                 err = -EINVAL;
1341                 kfree_skb(skb);
1342         }
1343         return err;
1344 }
1345 EXPORT_SYMBOL(neigh_connected_output);
1346
1347 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1348 {
1349         return dev_queue_xmit(skb);
1350 }
1351 EXPORT_SYMBOL(neigh_direct_output);
1352
1353 static void neigh_proxy_process(unsigned long arg)
1354 {
1355         struct neigh_table *tbl = (struct neigh_table *)arg;
1356         long sched_next = 0;
1357         unsigned long now = jiffies;
1358         struct sk_buff *skb, *n;
1359
1360         spin_lock(&tbl->proxy_queue.lock);
1361
1362         skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1363                 long tdif = NEIGH_CB(skb)->sched_next - now;
1364
1365                 if (tdif <= 0) {
1366                         struct net_device *dev = skb->dev;
1367
1368                         __skb_unlink(skb, &tbl->proxy_queue);
1369                         if (tbl->proxy_redo && netif_running(dev)) {
1370                                 rcu_read_lock();
1371                                 tbl->proxy_redo(skb);
1372                                 rcu_read_unlock();
1373                         } else {
1374                                 kfree_skb(skb);
1375                         }
1376
1377                         dev_put(dev);
1378                 } else if (!sched_next || tdif < sched_next)
1379                         sched_next = tdif;
1380         }
1381         del_timer(&tbl->proxy_timer);
1382         if (sched_next)
1383                 mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1384         spin_unlock(&tbl->proxy_queue.lock);
1385 }
1386
1387 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1388                     struct sk_buff *skb)
1389 {
1390         unsigned long now = jiffies;
1391         unsigned long sched_next = now + (net_random() % p->proxy_delay);
1392
1393         if (tbl->proxy_queue.qlen > p->proxy_qlen) {
1394                 kfree_skb(skb);
1395                 return;
1396         }
1397
1398         NEIGH_CB(skb)->sched_next = sched_next;
1399         NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1400
1401         spin_lock(&tbl->proxy_queue.lock);
1402         if (del_timer(&tbl->proxy_timer)) {
1403                 if (time_before(tbl->proxy_timer.expires, sched_next))
1404                         sched_next = tbl->proxy_timer.expires;
1405         }
1406         skb_dst_drop(skb);
1407         dev_hold(skb->dev);
1408         __skb_queue_tail(&tbl->proxy_queue, skb);
1409         mod_timer(&tbl->proxy_timer, sched_next);
1410         spin_unlock(&tbl->proxy_queue.lock);
1411 }
1412 EXPORT_SYMBOL(pneigh_enqueue);
1413
1414 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1415                                                       struct net *net, int ifindex)
1416 {
1417         struct neigh_parms *p;
1418
1419         for (p = &tbl->parms; p; p = p->next) {
1420                 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1421                     (!p->dev && !ifindex))
1422                         return p;
1423         }
1424
1425         return NULL;
1426 }
1427
1428 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1429                                       struct neigh_table *tbl)
1430 {
1431         struct neigh_parms *p, *ref;
1432         struct net *net = dev_net(dev);
1433         const struct net_device_ops *ops = dev->netdev_ops;
1434
1435         ref = lookup_neigh_parms(tbl, net, 0);
1436         if (!ref)
1437                 return NULL;
1438
1439         p = kmemdup(ref, sizeof(*p), GFP_KERNEL);
1440         if (p) {
1441                 p->tbl            = tbl;
1442                 atomic_set(&p->refcnt, 1);
1443                 p->reachable_time =
1444                                 neigh_rand_reach_time(p->base_reachable_time);
1445
1446                 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1447                         kfree(p);
1448                         return NULL;
1449                 }
1450
1451                 dev_hold(dev);
1452                 p->dev = dev;
1453                 write_pnet(&p->net, hold_net(net));
1454                 p->sysctl_table = NULL;
1455                 write_lock_bh(&tbl->lock);
1456                 p->next         = tbl->parms.next;
1457                 tbl->parms.next = p;
1458                 write_unlock_bh(&tbl->lock);
1459         }
1460         return p;
1461 }
1462 EXPORT_SYMBOL(neigh_parms_alloc);
1463
1464 static void neigh_rcu_free_parms(struct rcu_head *head)
1465 {
1466         struct neigh_parms *parms =
1467                 container_of(head, struct neigh_parms, rcu_head);
1468
1469         neigh_parms_put(parms);
1470 }
1471
1472 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1473 {
1474         struct neigh_parms **p;
1475
1476         if (!parms || parms == &tbl->parms)
1477                 return;
1478         write_lock_bh(&tbl->lock);
1479         for (p = &tbl->parms.next; *p; p = &(*p)->next) {
1480                 if (*p == parms) {
1481                         *p = parms->next;
1482                         parms->dead = 1;
1483                         write_unlock_bh(&tbl->lock);
1484                         if (parms->dev)
1485                                 dev_put(parms->dev);
1486                         call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1487                         return;
1488                 }
1489         }
1490         write_unlock_bh(&tbl->lock);
1491         NEIGH_PRINTK1("neigh_parms_release: not found\n");
1492 }
1493 EXPORT_SYMBOL(neigh_parms_release);
1494
1495 static void neigh_parms_destroy(struct neigh_parms *parms)
1496 {
1497         release_net(neigh_parms_net(parms));
1498         kfree(parms);
1499 }
1500
1501 static struct lock_class_key neigh_table_proxy_queue_class;
1502
1503 void neigh_table_init_no_netlink(struct neigh_table *tbl)
1504 {
1505         unsigned long now = jiffies;
1506         unsigned long phsize;
1507
1508         write_pnet(&tbl->parms.net, &init_net);
1509         atomic_set(&tbl->parms.refcnt, 1);
1510         tbl->parms.reachable_time =
1511                           neigh_rand_reach_time(tbl->parms.base_reachable_time);
1512
1513         tbl->stats = alloc_percpu(struct neigh_statistics);
1514         if (!tbl->stats)
1515                 panic("cannot create neighbour cache statistics");
1516
1517 #ifdef CONFIG_PROC_FS
1518         if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat,
1519                               &neigh_stat_seq_fops, tbl))
1520                 panic("cannot create neighbour proc dir entry");
1521 #endif
1522
1523         RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1524
1525         phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1526         tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1527
1528         if (!tbl->nht || !tbl->phash_buckets)
1529                 panic("cannot allocate neighbour cache hashes");
1530
1531         rwlock_init(&tbl->lock);
1532         INIT_DELAYED_WORK_DEFERRABLE(&tbl->gc_work, neigh_periodic_work);
1533         schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time);
1534         setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
1535         skb_queue_head_init_class(&tbl->proxy_queue,
1536                         &neigh_table_proxy_queue_class);
1537
1538         tbl->last_flush = now;
1539         tbl->last_rand  = now + tbl->parms.reachable_time * 20;
1540 }
1541 EXPORT_SYMBOL(neigh_table_init_no_netlink);
1542
1543 void neigh_table_init(struct neigh_table *tbl)
1544 {
1545         struct neigh_table *tmp;
1546
1547         neigh_table_init_no_netlink(tbl);
1548         write_lock(&neigh_tbl_lock);
1549         for (tmp = neigh_tables; tmp; tmp = tmp->next) {
1550                 if (tmp->family == tbl->family)
1551                         break;
1552         }
1553         tbl->next       = neigh_tables;
1554         neigh_tables    = tbl;
1555         write_unlock(&neigh_tbl_lock);
1556
1557         if (unlikely(tmp)) {
1558                 printk(KERN_ERR "NEIGH: Registering multiple tables for "
1559                        "family %d\n", tbl->family);
1560                 dump_stack();
1561         }
1562 }
1563 EXPORT_SYMBOL(neigh_table_init);
1564
1565 int neigh_table_clear(struct neigh_table *tbl)
1566 {
1567         struct neigh_table **tp;
1568
1569         /* It is not clean... Fix it to unload IPv6 module safely */
1570         cancel_delayed_work_sync(&tbl->gc_work);
1571         del_timer_sync(&tbl->proxy_timer);
1572         pneigh_queue_purge(&tbl->proxy_queue);
1573         neigh_ifdown(tbl, NULL);
1574         if (atomic_read(&tbl->entries))
1575                 printk(KERN_CRIT "neighbour leakage\n");
1576         write_lock(&neigh_tbl_lock);
1577         for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
1578                 if (*tp == tbl) {
1579                         *tp = tbl->next;
1580                         break;
1581                 }
1582         }
1583         write_unlock(&neigh_tbl_lock);
1584
1585         call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1586                  neigh_hash_free_rcu);
1587         tbl->nht = NULL;
1588
1589         kfree(tbl->phash_buckets);
1590         tbl->phash_buckets = NULL;
1591
1592         remove_proc_entry(tbl->id, init_net.proc_net_stat);
1593
1594         free_percpu(tbl->stats);
1595         tbl->stats = NULL;
1596
1597         return 0;
1598 }
1599 EXPORT_SYMBOL(neigh_table_clear);
1600
1601 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1602 {
1603         struct net *net = sock_net(skb->sk);
1604         struct ndmsg *ndm;
1605         struct nlattr *dst_attr;
1606         struct neigh_table *tbl;
1607         struct net_device *dev = NULL;
1608         int err = -EINVAL;
1609
1610         ASSERT_RTNL();
1611         if (nlmsg_len(nlh) < sizeof(*ndm))
1612                 goto out;
1613
1614         dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1615         if (dst_attr == NULL)
1616                 goto out;
1617
1618         ndm = nlmsg_data(nlh);
1619         if (ndm->ndm_ifindex) {
1620                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1621                 if (dev == NULL) {
1622                         err = -ENODEV;
1623                         goto out;
1624                 }
1625         }
1626
1627         read_lock(&neigh_tbl_lock);
1628         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1629                 struct neighbour *neigh;
1630
1631                 if (tbl->family != ndm->ndm_family)
1632                         continue;
1633                 read_unlock(&neigh_tbl_lock);
1634
1635                 if (nla_len(dst_attr) < tbl->key_len)
1636                         goto out;
1637
1638                 if (ndm->ndm_flags & NTF_PROXY) {
1639                         err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1640                         goto out;
1641                 }
1642
1643                 if (dev == NULL)
1644                         goto out;
1645
1646                 neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1647                 if (neigh == NULL) {
1648                         err = -ENOENT;
1649                         goto out;
1650                 }
1651
1652                 err = neigh_update(neigh, NULL, NUD_FAILED,
1653                                    NEIGH_UPDATE_F_OVERRIDE |
1654                                    NEIGH_UPDATE_F_ADMIN);
1655                 neigh_release(neigh);
1656                 goto out;
1657         }
1658         read_unlock(&neigh_tbl_lock);
1659         err = -EAFNOSUPPORT;
1660
1661 out:
1662         return err;
1663 }
1664
1665 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1666 {
1667         struct net *net = sock_net(skb->sk);
1668         struct ndmsg *ndm;
1669         struct nlattr *tb[NDA_MAX+1];
1670         struct neigh_table *tbl;
1671         struct net_device *dev = NULL;
1672         int err;
1673
1674         ASSERT_RTNL();
1675         err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
1676         if (err < 0)
1677                 goto out;
1678
1679         err = -EINVAL;
1680         if (tb[NDA_DST] == NULL)
1681                 goto out;
1682
1683         ndm = nlmsg_data(nlh);
1684         if (ndm->ndm_ifindex) {
1685                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1686                 if (dev == NULL) {
1687                         err = -ENODEV;
1688                         goto out;
1689                 }
1690
1691                 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1692                         goto out;
1693         }
1694
1695         read_lock(&neigh_tbl_lock);
1696         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1697                 int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
1698                 struct neighbour *neigh;
1699                 void *dst, *lladdr;
1700
1701                 if (tbl->family != ndm->ndm_family)
1702                         continue;
1703                 read_unlock(&neigh_tbl_lock);
1704
1705                 if (nla_len(tb[NDA_DST]) < tbl->key_len)
1706                         goto out;
1707                 dst = nla_data(tb[NDA_DST]);
1708                 lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1709
1710                 if (ndm->ndm_flags & NTF_PROXY) {
1711                         struct pneigh_entry *pn;
1712
1713                         err = -ENOBUFS;
1714                         pn = pneigh_lookup(tbl, net, dst, dev, 1);
1715                         if (pn) {
1716                                 pn->flags = ndm->ndm_flags;
1717                                 err = 0;
1718                         }
1719                         goto out;
1720                 }
1721
1722                 if (dev == NULL)
1723                         goto out;
1724
1725                 neigh = neigh_lookup(tbl, dst, dev);
1726                 if (neigh == NULL) {
1727                         if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1728                                 err = -ENOENT;
1729                                 goto out;
1730                         }
1731
1732                         neigh = __neigh_lookup_errno(tbl, dst, dev);
1733                         if (IS_ERR(neigh)) {
1734                                 err = PTR_ERR(neigh);
1735                                 goto out;
1736                         }
1737                 } else {
1738                         if (nlh->nlmsg_flags & NLM_F_EXCL) {
1739                                 err = -EEXIST;
1740                                 neigh_release(neigh);
1741                                 goto out;
1742                         }
1743
1744                         if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1745                                 flags &= ~NEIGH_UPDATE_F_OVERRIDE;
1746                 }
1747
1748                 if (ndm->ndm_flags & NTF_USE) {
1749                         neigh_event_send(neigh, NULL);
1750                         err = 0;
1751                 } else
1752                         err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
1753                 neigh_release(neigh);
1754                 goto out;
1755         }
1756
1757         read_unlock(&neigh_tbl_lock);
1758         err = -EAFNOSUPPORT;
1759 out:
1760         return err;
1761 }
1762
1763 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1764 {
1765         struct nlattr *nest;
1766
1767         nest = nla_nest_start(skb, NDTA_PARMS);
1768         if (nest == NULL)
1769                 return -ENOBUFS;
1770
1771         if ((parms->dev &&
1772              nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
1773             nla_put_u32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt)) ||
1774             nla_put_u32(skb, NDTPA_QUEUE_LENBYTES, parms->queue_len_bytes) ||
1775             /* approximative value for deprecated QUEUE_LEN (in packets) */
1776             nla_put_u32(skb, NDTPA_QUEUE_LEN,
1777                         DIV_ROUND_UP(parms->queue_len_bytes,
1778                                      SKB_TRUESIZE(ETH_FRAME_LEN))) ||
1779             nla_put_u32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen) ||
1780             nla_put_u32(skb, NDTPA_APP_PROBES, parms->app_probes) ||
1781             nla_put_u32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes) ||
1782             nla_put_u32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes) ||
1783             nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time) ||
1784             nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
1785                           parms->base_reachable_time) ||
1786             nla_put_msecs(skb, NDTPA_GC_STALETIME, parms->gc_staletime) ||
1787             nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
1788                           parms->delay_probe_time) ||
1789             nla_put_msecs(skb, NDTPA_RETRANS_TIME, parms->retrans_time) ||
1790             nla_put_msecs(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay) ||
1791             nla_put_msecs(skb, NDTPA_PROXY_DELAY, parms->proxy_delay) ||
1792             nla_put_msecs(skb, NDTPA_LOCKTIME, parms->locktime))
1793                 goto nla_put_failure;
1794         return nla_nest_end(skb, nest);
1795
1796 nla_put_failure:
1797         nla_nest_cancel(skb, nest);
1798         return -EMSGSIZE;
1799 }
1800
1801 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1802                               u32 pid, u32 seq, int type, int flags)
1803 {
1804         struct nlmsghdr *nlh;
1805         struct ndtmsg *ndtmsg;
1806
1807         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1808         if (nlh == NULL)
1809                 return -EMSGSIZE;
1810
1811         ndtmsg = nlmsg_data(nlh);
1812
1813         read_lock_bh(&tbl->lock);
1814         ndtmsg->ndtm_family = tbl->family;
1815         ndtmsg->ndtm_pad1   = 0;
1816         ndtmsg->ndtm_pad2   = 0;
1817
1818         if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
1819             nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval) ||
1820             nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) ||
1821             nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) ||
1822             nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3))
1823                 goto nla_put_failure;
1824         {
1825                 unsigned long now = jiffies;
1826                 unsigned int flush_delta = now - tbl->last_flush;
1827                 unsigned int rand_delta = now - tbl->last_rand;
1828                 struct neigh_hash_table *nht;
1829                 struct ndt_config ndc = {
1830                         .ndtc_key_len           = tbl->key_len,
1831                         .ndtc_entry_size        = tbl->entry_size,
1832                         .ndtc_entries           = atomic_read(&tbl->entries),
1833                         .ndtc_last_flush        = jiffies_to_msecs(flush_delta),
1834                         .ndtc_last_rand         = jiffies_to_msecs(rand_delta),
1835                         .ndtc_proxy_qlen        = tbl->proxy_queue.qlen,
1836                 };
1837
1838                 rcu_read_lock_bh();
1839                 nht = rcu_dereference_bh(tbl->nht);
1840                 ndc.ndtc_hash_rnd = nht->hash_rnd[0];
1841                 ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
1842                 rcu_read_unlock_bh();
1843
1844                 if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
1845                         goto nla_put_failure;
1846         }
1847
1848         {
1849                 int cpu;
1850                 struct ndt_stats ndst;
1851
1852                 memset(&ndst, 0, sizeof(ndst));
1853
1854                 for_each_possible_cpu(cpu) {
1855                         struct neigh_statistics *st;
1856
1857                         st = per_cpu_ptr(tbl->stats, cpu);
1858                         ndst.ndts_allocs                += st->allocs;
1859                         ndst.ndts_destroys              += st->destroys;
1860                         ndst.ndts_hash_grows            += st->hash_grows;
1861                         ndst.ndts_res_failed            += st->res_failed;
1862                         ndst.ndts_lookups               += st->lookups;
1863                         ndst.ndts_hits                  += st->hits;
1864                         ndst.ndts_rcv_probes_mcast      += st->rcv_probes_mcast;
1865                         ndst.ndts_rcv_probes_ucast      += st->rcv_probes_ucast;
1866                         ndst.ndts_periodic_gc_runs      += st->periodic_gc_runs;
1867                         ndst.ndts_forced_gc_runs        += st->forced_gc_runs;
1868                 }
1869
1870                 if (nla_put(skb, NDTA_STATS, sizeof(ndst), &ndst))
1871                         goto nla_put_failure;
1872         }
1873
1874         BUG_ON(tbl->parms.dev);
1875         if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1876                 goto nla_put_failure;
1877
1878         read_unlock_bh(&tbl->lock);
1879         return nlmsg_end(skb, nlh);
1880
1881 nla_put_failure:
1882         read_unlock_bh(&tbl->lock);
1883         nlmsg_cancel(skb, nlh);
1884         return -EMSGSIZE;
1885 }
1886
1887 static int neightbl_fill_param_info(struct sk_buff *skb,
1888                                     struct neigh_table *tbl,
1889                                     struct neigh_parms *parms,
1890                                     u32 pid, u32 seq, int type,
1891                                     unsigned int flags)
1892 {
1893         struct ndtmsg *ndtmsg;
1894         struct nlmsghdr *nlh;
1895
1896         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1897         if (nlh == NULL)
1898                 return -EMSGSIZE;
1899
1900         ndtmsg = nlmsg_data(nlh);
1901
1902         read_lock_bh(&tbl->lock);
1903         ndtmsg->ndtm_family = tbl->family;
1904         ndtmsg->ndtm_pad1   = 0;
1905         ndtmsg->ndtm_pad2   = 0;
1906
1907         if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
1908             neightbl_fill_parms(skb, parms) < 0)
1909                 goto errout;
1910
1911         read_unlock_bh(&tbl->lock);
1912         return nlmsg_end(skb, nlh);
1913 errout:
1914         read_unlock_bh(&tbl->lock);
1915         nlmsg_cancel(skb, nlh);
1916         return -EMSGSIZE;
1917 }
1918
1919 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1920         [NDTA_NAME]             = { .type = NLA_STRING },
1921         [NDTA_THRESH1]          = { .type = NLA_U32 },
1922         [NDTA_THRESH2]          = { .type = NLA_U32 },
1923         [NDTA_THRESH3]          = { .type = NLA_U32 },
1924         [NDTA_GC_INTERVAL]      = { .type = NLA_U64 },
1925         [NDTA_PARMS]            = { .type = NLA_NESTED },
1926 };
1927
1928 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
1929         [NDTPA_IFINDEX]                 = { .type = NLA_U32 },
1930         [NDTPA_QUEUE_LEN]               = { .type = NLA_U32 },
1931         [NDTPA_PROXY_QLEN]              = { .type = NLA_U32 },
1932         [NDTPA_APP_PROBES]              = { .type = NLA_U32 },
1933         [NDTPA_UCAST_PROBES]            = { .type = NLA_U32 },
1934         [NDTPA_MCAST_PROBES]            = { .type = NLA_U32 },
1935         [NDTPA_BASE_REACHABLE_TIME]     = { .type = NLA_U64 },
1936         [NDTPA_GC_STALETIME]            = { .type = NLA_U64 },
1937         [NDTPA_DELAY_PROBE_TIME]        = { .type = NLA_U64 },
1938         [NDTPA_RETRANS_TIME]            = { .type = NLA_U64 },
1939         [NDTPA_ANYCAST_DELAY]           = { .type = NLA_U64 },
1940         [NDTPA_PROXY_DELAY]             = { .type = NLA_U64 },
1941         [NDTPA_LOCKTIME]                = { .type = NLA_U64 },
1942 };
1943
1944 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1945 {
1946         struct net *net = sock_net(skb->sk);
1947         struct neigh_table *tbl;
1948         struct ndtmsg *ndtmsg;
1949         struct nlattr *tb[NDTA_MAX+1];
1950         int err;
1951
1952         err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
1953                           nl_neightbl_policy);
1954         if (err < 0)
1955                 goto errout;
1956
1957         if (tb[NDTA_NAME] == NULL) {
1958                 err = -EINVAL;
1959                 goto errout;
1960         }
1961
1962         ndtmsg = nlmsg_data(nlh);
1963         read_lock(&neigh_tbl_lock);
1964         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1965                 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
1966                         continue;
1967
1968                 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0)
1969                         break;
1970         }
1971
1972         if (tbl == NULL) {
1973                 err = -ENOENT;
1974                 goto errout_locked;
1975         }
1976
1977         /*
1978          * We acquire tbl->lock to be nice to the periodic timers and
1979          * make sure they always see a consistent set of values.
1980          */
1981         write_lock_bh(&tbl->lock);
1982
1983         if (tb[NDTA_PARMS]) {
1984                 struct nlattr *tbp[NDTPA_MAX+1];
1985                 struct neigh_parms *p;
1986                 int i, ifindex = 0;
1987
1988                 err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
1989                                        nl_ntbl_parm_policy);
1990                 if (err < 0)
1991                         goto errout_tbl_lock;
1992
1993                 if (tbp[NDTPA_IFINDEX])
1994                         ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
1995
1996                 p = lookup_neigh_parms(tbl, net, ifindex);
1997                 if (p == NULL) {
1998                         err = -ENOENT;
1999                         goto errout_tbl_lock;
2000                 }
2001
2002                 for (i = 1; i <= NDTPA_MAX; i++) {
2003                         if (tbp[i] == NULL)
2004                                 continue;
2005
2006                         switch (i) {
2007                         case NDTPA_QUEUE_LEN:
2008                                 p->queue_len_bytes = nla_get_u32(tbp[i]) *
2009                                                      SKB_TRUESIZE(ETH_FRAME_LEN);
2010                                 break;
2011                         case NDTPA_QUEUE_LENBYTES:
2012                                 p->queue_len_bytes = nla_get_u32(tbp[i]);
2013                                 break;
2014                         case NDTPA_PROXY_QLEN:
2015                                 p->proxy_qlen = nla_get_u32(tbp[i]);
2016                                 break;
2017                         case NDTPA_APP_PROBES:
2018                                 p->app_probes = nla_get_u32(tbp[i]);
2019                                 break;
2020                         case NDTPA_UCAST_PROBES:
2021                                 p->ucast_probes = nla_get_u32(tbp[i]);
2022                                 break;
2023                         case NDTPA_MCAST_PROBES:
2024                                 p->mcast_probes = nla_get_u32(tbp[i]);
2025                                 break;
2026                         case NDTPA_BASE_REACHABLE_TIME:
2027                                 p->base_reachable_time = nla_get_msecs(tbp[i]);
2028                                 break;
2029                         case NDTPA_GC_STALETIME:
2030                                 p->gc_staletime = nla_get_msecs(tbp[i]);
2031                                 break;
2032                         case NDTPA_DELAY_PROBE_TIME:
2033                                 p->delay_probe_time = nla_get_msecs(tbp[i]);
2034                                 break;
2035                         case NDTPA_RETRANS_TIME:
2036                                 p->retrans_time = nla_get_msecs(tbp[i]);
2037                                 break;
2038                         case NDTPA_ANYCAST_DELAY:
2039                                 p->anycast_delay = nla_get_msecs(tbp[i]);
2040                                 break;
2041                         case NDTPA_PROXY_DELAY:
2042                                 p->proxy_delay = nla_get_msecs(tbp[i]);
2043                                 break;
2044                         case NDTPA_LOCKTIME:
2045                                 p->locktime = nla_get_msecs(tbp[i]);
2046                                 break;
2047                         }
2048                 }
2049         }
2050
2051         if (tb[NDTA_THRESH1])
2052                 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2053
2054         if (tb[NDTA_THRESH2])
2055                 tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2056
2057         if (tb[NDTA_THRESH3])
2058                 tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2059
2060         if (tb[NDTA_GC_INTERVAL])
2061                 tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2062
2063         err = 0;
2064
2065 errout_tbl_lock:
2066         write_unlock_bh(&tbl->lock);
2067 errout_locked:
2068         read_unlock(&neigh_tbl_lock);
2069 errout:
2070         return err;
2071 }
2072
2073 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2074 {
2075         struct net *net = sock_net(skb->sk);
2076         int family, tidx, nidx = 0;
2077         int tbl_skip = cb->args[0];
2078         int neigh_skip = cb->args[1];
2079         struct neigh_table *tbl;
2080
2081         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2082
2083         read_lock(&neigh_tbl_lock);
2084         for (tbl = neigh_tables, tidx = 0; tbl; tbl = tbl->next, tidx++) {
2085                 struct neigh_parms *p;
2086
2087                 if (tidx < tbl_skip || (family && tbl->family != family))
2088                         continue;
2089
2090                 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).pid,
2091                                        cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2092                                        NLM_F_MULTI) <= 0)
2093                         break;
2094
2095                 for (nidx = 0, p = tbl->parms.next; p; p = p->next) {
2096                         if (!net_eq(neigh_parms_net(p), net))
2097                                 continue;
2098
2099                         if (nidx < neigh_skip)
2100                                 goto next;
2101
2102                         if (neightbl_fill_param_info(skb, tbl, p,
2103                                                      NETLINK_CB(cb->skb).pid,
2104                                                      cb->nlh->nlmsg_seq,
2105                                                      RTM_NEWNEIGHTBL,
2106                                                      NLM_F_MULTI) <= 0)
2107                                 goto out;
2108                 next:
2109                         nidx++;
2110                 }
2111
2112                 neigh_skip = 0;
2113         }
2114 out:
2115         read_unlock(&neigh_tbl_lock);
2116         cb->args[0] = tidx;
2117         cb->args[1] = nidx;
2118
2119         return skb->len;
2120 }
2121
2122 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2123                            u32 pid, u32 seq, int type, unsigned int flags)
2124 {
2125         unsigned long now = jiffies;
2126         struct nda_cacheinfo ci;
2127         struct nlmsghdr *nlh;
2128         struct ndmsg *ndm;
2129
2130         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2131         if (nlh == NULL)
2132                 return -EMSGSIZE;
2133
2134         ndm = nlmsg_data(nlh);
2135         ndm->ndm_family  = neigh->ops->family;
2136         ndm->ndm_pad1    = 0;
2137         ndm->ndm_pad2    = 0;
2138         ndm->ndm_flags   = neigh->flags;
2139         ndm->ndm_type    = neigh->type;
2140         ndm->ndm_ifindex = neigh->dev->ifindex;
2141
2142         if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
2143                 goto nla_put_failure;
2144
2145         read_lock_bh(&neigh->lock);
2146         ndm->ndm_state   = neigh->nud_state;
2147         if (neigh->nud_state & NUD_VALID) {
2148                 char haddr[MAX_ADDR_LEN];
2149
2150                 neigh_ha_snapshot(haddr, neigh, neigh->dev);
2151                 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2152                         read_unlock_bh(&neigh->lock);
2153                         goto nla_put_failure;
2154                 }
2155         }
2156
2157         ci.ndm_used      = jiffies_to_clock_t(now - neigh->used);
2158         ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2159         ci.ndm_updated   = jiffies_to_clock_t(now - neigh->updated);
2160         ci.ndm_refcnt    = atomic_read(&neigh->refcnt) - 1;
2161         read_unlock_bh(&neigh->lock);
2162
2163         if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
2164             nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
2165                 goto nla_put_failure;
2166
2167         return nlmsg_end(skb, nlh);
2168
2169 nla_put_failure:
2170         nlmsg_cancel(skb, nlh);
2171         return -EMSGSIZE;
2172 }
2173
2174 static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2175                             u32 pid, u32 seq, int type, unsigned int flags,
2176                             struct neigh_table *tbl)
2177 {
2178         struct nlmsghdr *nlh;
2179         struct ndmsg *ndm;
2180
2181         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2182         if (nlh == NULL)
2183                 return -EMSGSIZE;
2184
2185         ndm = nlmsg_data(nlh);
2186         ndm->ndm_family  = tbl->family;
2187         ndm->ndm_pad1    = 0;
2188         ndm->ndm_pad2    = 0;
2189         ndm->ndm_flags   = pn->flags | NTF_PROXY;
2190         ndm->ndm_type    = NDA_DST;
2191         ndm->ndm_ifindex = pn->dev->ifindex;
2192         ndm->ndm_state   = NUD_NONE;
2193
2194         if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
2195                 goto nla_put_failure;
2196
2197         return nlmsg_end(skb, nlh);
2198
2199 nla_put_failure:
2200         nlmsg_cancel(skb, nlh);
2201         return -EMSGSIZE;
2202 }
2203
2204 static void neigh_update_notify(struct neighbour *neigh)
2205 {
2206         call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2207         __neigh_notify(neigh, RTM_NEWNEIGH, 0);
2208 }
2209
2210 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2211                             struct netlink_callback *cb)
2212 {
2213         struct net *net = sock_net(skb->sk);
2214         struct neighbour *n;
2215         int rc, h, s_h = cb->args[1];
2216         int idx, s_idx = idx = cb->args[2];
2217         struct neigh_hash_table *nht;
2218
2219         rcu_read_lock_bh();
2220         nht = rcu_dereference_bh(tbl->nht);
2221
2222         for (h = 0; h < (1 << nht->hash_shift); h++) {
2223                 if (h < s_h)
2224                         continue;
2225                 if (h > s_h)
2226                         s_idx = 0;
2227                 for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2228                      n != NULL;
2229                      n = rcu_dereference_bh(n->next)) {
2230                         if (!net_eq(dev_net(n->dev), net))
2231                                 continue;
2232                         if (idx < s_idx)
2233                                 goto next;
2234                         if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
2235                                             cb->nlh->nlmsg_seq,
2236                                             RTM_NEWNEIGH,
2237                                             NLM_F_MULTI) <= 0) {
2238                                 rc = -1;
2239                                 goto out;
2240                         }
2241 next:
2242                         idx++;
2243                 }
2244         }
2245         rc = skb->len;
2246 out:
2247         rcu_read_unlock_bh();
2248         cb->args[1] = h;
2249         cb->args[2] = idx;
2250         return rc;
2251 }
2252
2253 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2254                              struct netlink_callback *cb)
2255 {
2256         struct pneigh_entry *n;
2257         struct net *net = sock_net(skb->sk);
2258         int rc, h, s_h = cb->args[3];
2259         int idx, s_idx = idx = cb->args[4];
2260
2261         read_lock_bh(&tbl->lock);
2262
2263         for (h = 0; h <= PNEIGH_HASHMASK; h++) {
2264                 if (h < s_h)
2265                         continue;
2266                 if (h > s_h)
2267                         s_idx = 0;
2268                 for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
2269                         if (dev_net(n->dev) != net)
2270                                 continue;
2271                         if (idx < s_idx)
2272                                 goto next;
2273                         if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
2274                                             cb->nlh->nlmsg_seq,
2275                                             RTM_NEWNEIGH,
2276                                             NLM_F_MULTI, tbl) <= 0) {
2277                                 read_unlock_bh(&tbl->lock);
2278                                 rc = -1;
2279                                 goto out;
2280                         }
2281                 next:
2282                         idx++;
2283                 }
2284         }
2285
2286         read_unlock_bh(&tbl->lock);
2287         rc = skb->len;
2288 out:
2289         cb->args[3] = h;
2290         cb->args[4] = idx;
2291         return rc;
2292
2293 }
2294
2295 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2296 {
2297         struct neigh_table *tbl;
2298         int t, family, s_t;
2299         int proxy = 0;
2300         int err = 0;
2301
2302         read_lock(&neigh_tbl_lock);
2303         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2304
2305         /* check for full ndmsg structure presence, family member is
2306          * the same for both structures
2307          */
2308         if (nlmsg_len(cb->nlh) >= sizeof(struct ndmsg) &&
2309             ((struct ndmsg *) nlmsg_data(cb->nlh))->ndm_flags == NTF_PROXY)
2310                 proxy = 1;
2311
2312         s_t = cb->args[0];
2313
2314         for (tbl = neigh_tables, t = 0; tbl && (err >= 0);
2315              tbl = tbl->next, t++) {
2316                 if (t < s_t || (family && tbl->family != family))
2317                         continue;
2318                 if (t > s_t)
2319                         memset(&cb->args[1], 0, sizeof(cb->args) -
2320                                                 sizeof(cb->args[0]));
2321                 if (proxy)
2322                         err = pneigh_dump_table(tbl, skb, cb);
2323                 else
2324                         err = neigh_dump_table(tbl, skb, cb);
2325         }
2326         read_unlock(&neigh_tbl_lock);
2327
2328         cb->args[0] = t;
2329         return skb->len;
2330 }
2331
2332 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2333 {
2334         int chain;
2335         struct neigh_hash_table *nht;
2336
2337         rcu_read_lock_bh();
2338         nht = rcu_dereference_bh(tbl->nht);
2339
2340         read_lock(&tbl->lock); /* avoid resizes */
2341         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2342                 struct neighbour *n;
2343
2344                 for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2345                      n != NULL;
2346                      n = rcu_dereference_bh(n->next))
2347                         cb(n, cookie);
2348         }
2349         read_unlock(&tbl->lock);
2350         rcu_read_unlock_bh();
2351 }
2352 EXPORT_SYMBOL(neigh_for_each);
2353
2354 /* The tbl->lock must be held as a writer and BH disabled. */
2355 void __neigh_for_each_release(struct neigh_table *tbl,
2356                               int (*cb)(struct neighbour *))
2357 {
2358         int chain;
2359         struct neigh_hash_table *nht;
2360
2361         nht = rcu_dereference_protected(tbl->nht,
2362                                         lockdep_is_held(&tbl->lock));
2363         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2364                 struct neighbour *n;
2365                 struct neighbour __rcu **np;
2366
2367                 np = &nht->hash_buckets[chain];
2368                 while ((n = rcu_dereference_protected(*np,
2369                                         lockdep_is_held(&tbl->lock))) != NULL) {
2370                         int release;
2371
2372                         write_lock(&n->lock);
2373                         release = cb(n);
2374                         if (release) {
2375                                 rcu_assign_pointer(*np,
2376                                         rcu_dereference_protected(n->next,
2377                                                 lockdep_is_held(&tbl->lock)));
2378                                 n->dead = 1;
2379                         } else
2380                                 np = &n->next;
2381                         write_unlock(&n->lock);
2382                         if (release)
2383                                 neigh_cleanup_and_release(n);
2384                 }
2385         }
2386 }
2387 EXPORT_SYMBOL(__neigh_for_each_release);
2388
2389 #ifdef CONFIG_PROC_FS
2390
2391 static struct neighbour *neigh_get_first(struct seq_file *seq)
2392 {
2393         struct neigh_seq_state *state = seq->private;
2394         struct net *net = seq_file_net(seq);
2395         struct neigh_hash_table *nht = state->nht;
2396         struct neighbour *n = NULL;
2397         int bucket = state->bucket;
2398
2399         state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2400         for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
2401                 n = rcu_dereference_bh(nht->hash_buckets[bucket]);
2402
2403                 while (n) {
2404                         if (!net_eq(dev_net(n->dev), net))
2405                                 goto next;
2406                         if (state->neigh_sub_iter) {
2407                                 loff_t fakep = 0;
2408                                 void *v;
2409
2410                                 v = state->neigh_sub_iter(state, n, &fakep);
2411                                 if (!v)
2412                                         goto next;
2413                         }
2414                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2415                                 break;
2416                         if (n->nud_state & ~NUD_NOARP)
2417                                 break;
2418 next:
2419                         n = rcu_dereference_bh(n->next);
2420                 }
2421
2422                 if (n)
2423                         break;
2424         }
2425         state->bucket = bucket;
2426
2427         return n;
2428 }
2429
2430 static struct neighbour *neigh_get_next(struct seq_file *seq,
2431                                         struct neighbour *n,
2432                                         loff_t *pos)
2433 {
2434         struct neigh_seq_state *state = seq->private;
2435         struct net *net = seq_file_net(seq);
2436         struct neigh_hash_table *nht = state->nht;
2437
2438         if (state->neigh_sub_iter) {
2439                 void *v = state->neigh_sub_iter(state, n, pos);
2440                 if (v)
2441                         return n;
2442         }
2443         n = rcu_dereference_bh(n->next);
2444
2445         while (1) {
2446                 while (n) {
2447                         if (!net_eq(dev_net(n->dev), net))
2448                                 goto next;
2449                         if (state->neigh_sub_iter) {
2450                                 void *v = state->neigh_sub_iter(state, n, pos);
2451                                 if (v)
2452                                         return n;
2453                                 goto next;
2454                         }
2455                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2456                                 break;
2457
2458                         if (n->nud_state & ~NUD_NOARP)
2459                                 break;
2460 next:
2461                         n = rcu_dereference_bh(n->next);
2462                 }
2463
2464                 if (n)
2465                         break;
2466
2467                 if (++state->bucket >= (1 << nht->hash_shift))
2468                         break;
2469
2470                 n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
2471         }
2472
2473         if (n && pos)
2474                 --(*pos);
2475         return n;
2476 }
2477
2478 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2479 {
2480         struct neighbour *n = neigh_get_first(seq);
2481
2482         if (n) {
2483                 --(*pos);
2484                 while (*pos) {
2485                         n = neigh_get_next(seq, n, pos);
2486                         if (!n)
2487                                 break;
2488                 }
2489         }
2490         return *pos ? NULL : n;
2491 }
2492
2493 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2494 {
2495         struct neigh_seq_state *state = seq->private;
2496         struct net *net = seq_file_net(seq);
2497         struct neigh_table *tbl = state->tbl;
2498         struct pneigh_entry *pn = NULL;
2499         int bucket = state->bucket;
2500
2501         state->flags |= NEIGH_SEQ_IS_PNEIGH;
2502         for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2503                 pn = tbl->phash_buckets[bucket];
2504                 while (pn && !net_eq(pneigh_net(pn), net))
2505                         pn = pn->next;
2506                 if (pn)
2507                         break;
2508         }
2509         state->bucket = bucket;
2510
2511         return pn;
2512 }
2513
2514 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2515                                             struct pneigh_entry *pn,
2516                                             loff_t *pos)
2517 {
2518         struct neigh_seq_state *state = seq->private;
2519         struct net *net = seq_file_net(seq);
2520         struct neigh_table *tbl = state->tbl;
2521
2522         do {
2523                 pn = pn->next;
2524         } while (pn && !net_eq(pneigh_net(pn), net));
2525
2526         while (!pn) {
2527                 if (++state->bucket > PNEIGH_HASHMASK)
2528                         break;
2529                 pn = tbl->phash_buckets[state->bucket];
2530                 while (pn && !net_eq(pneigh_net(pn), net))
2531                         pn = pn->next;
2532                 if (pn)
2533                         break;
2534         }
2535
2536         if (pn && pos)
2537                 --(*pos);
2538
2539         return pn;
2540 }
2541
2542 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
2543 {
2544         struct pneigh_entry *pn = pneigh_get_first(seq);
2545
2546         if (pn) {
2547                 --(*pos);
2548                 while (*pos) {
2549                         pn = pneigh_get_next(seq, pn, pos);
2550                         if (!pn)
2551                                 break;
2552                 }
2553         }
2554         return *pos ? NULL : pn;
2555 }
2556
2557 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2558 {
2559         struct neigh_seq_state *state = seq->private;
2560         void *rc;
2561         loff_t idxpos = *pos;
2562
2563         rc = neigh_get_idx(seq, &idxpos);
2564         if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2565                 rc = pneigh_get_idx(seq, &idxpos);
2566
2567         return rc;
2568 }
2569
2570 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2571         __acquires(rcu_bh)
2572 {
2573         struct neigh_seq_state *state = seq->private;
2574
2575         state->tbl = tbl;
2576         state->bucket = 0;
2577         state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
2578
2579         rcu_read_lock_bh();
2580         state->nht = rcu_dereference_bh(tbl->nht);
2581
2582         return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
2583 }
2584 EXPORT_SYMBOL(neigh_seq_start);
2585
2586 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2587 {
2588         struct neigh_seq_state *state;
2589         void *rc;
2590
2591         if (v == SEQ_START_TOKEN) {
2592                 rc = neigh_get_first(seq);
2593                 goto out;
2594         }
2595
2596         state = seq->private;
2597         if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
2598                 rc = neigh_get_next(seq, v, NULL);
2599                 if (rc)
2600                         goto out;
2601                 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2602                         rc = pneigh_get_first(seq);
2603         } else {
2604                 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
2605                 rc = pneigh_get_next(seq, v, NULL);
2606         }
2607 out:
2608         ++(*pos);
2609         return rc;
2610 }
2611 EXPORT_SYMBOL(neigh_seq_next);
2612
2613 void neigh_seq_stop(struct seq_file *seq, void *v)
2614         __releases(rcu_bh)
2615 {
2616         rcu_read_unlock_bh();
2617 }
2618 EXPORT_SYMBOL(neigh_seq_stop);
2619
2620 /* statistics via seq_file */
2621
2622 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2623 {
2624         struct neigh_table *tbl = seq->private;
2625         int cpu;
2626
2627         if (*pos == 0)
2628                 return SEQ_START_TOKEN;
2629
2630         for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
2631                 if (!cpu_possible(cpu))
2632                         continue;
2633                 *pos = cpu+1;
2634                 return per_cpu_ptr(tbl->stats, cpu);
2635         }
2636         return NULL;
2637 }
2638
2639 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2640 {
2641         struct neigh_table *tbl = seq->private;
2642         int cpu;
2643
2644         for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
2645                 if (!cpu_possible(cpu))
2646                         continue;
2647                 *pos = cpu+1;
2648                 return per_cpu_ptr(tbl->stats, cpu);
2649         }
2650         return NULL;
2651 }
2652
2653 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
2654 {
2655
2656 }
2657
2658 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2659 {
2660         struct neigh_table *tbl = seq->private;
2661         struct neigh_statistics *st = v;
2662
2663         if (v == SEQ_START_TOKEN) {
2664                 seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs unresolved_discards\n");
2665                 return 0;
2666         }
2667
2668         seq_printf(seq, "%08x  %08lx %08lx %08lx  %08lx %08lx  %08lx  "
2669                         "%08lx %08lx  %08lx %08lx %08lx\n",
2670                    atomic_read(&tbl->entries),
2671
2672                    st->allocs,
2673                    st->destroys,
2674                    st->hash_grows,
2675
2676                    st->lookups,
2677                    st->hits,
2678
2679                    st->res_failed,
2680
2681                    st->rcv_probes_mcast,
2682                    st->rcv_probes_ucast,
2683
2684                    st->periodic_gc_runs,
2685                    st->forced_gc_runs,
2686                    st->unres_discards
2687                    );
2688
2689         return 0;
2690 }
2691
2692 static const struct seq_operations neigh_stat_seq_ops = {
2693         .start  = neigh_stat_seq_start,
2694         .next   = neigh_stat_seq_next,
2695         .stop   = neigh_stat_seq_stop,
2696         .show   = neigh_stat_seq_show,
2697 };
2698
2699 static int neigh_stat_seq_open(struct inode *inode, struct file *file)
2700 {
2701         int ret = seq_open(file, &neigh_stat_seq_ops);
2702
2703         if (!ret) {
2704                 struct seq_file *sf = file->private_data;
2705                 sf->private = PDE(inode)->data;
2706         }
2707         return ret;
2708 };
2709
2710 static const struct file_operations neigh_stat_seq_fops = {
2711         .owner   = THIS_MODULE,
2712         .open    = neigh_stat_seq_open,
2713         .read    = seq_read,
2714         .llseek  = seq_lseek,
2715         .release = seq_release,
2716 };
2717
2718 #endif /* CONFIG_PROC_FS */
2719
2720 static inline size_t neigh_nlmsg_size(void)
2721 {
2722         return NLMSG_ALIGN(sizeof(struct ndmsg))
2723                + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2724                + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2725                + nla_total_size(sizeof(struct nda_cacheinfo))
2726                + nla_total_size(4); /* NDA_PROBES */
2727 }
2728
2729 static void __neigh_notify(struct neighbour *n, int type, int flags)
2730 {
2731         struct net *net = dev_net(n->dev);
2732         struct sk_buff *skb;
2733         int err = -ENOBUFS;
2734
2735         skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
2736         if (skb == NULL)
2737                 goto errout;
2738
2739         err = neigh_fill_info(skb, n, 0, 0, type, flags);
2740         if (err < 0) {
2741                 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
2742                 WARN_ON(err == -EMSGSIZE);
2743                 kfree_skb(skb);
2744                 goto errout;
2745         }
2746         rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
2747         return;
2748 errout:
2749         if (err < 0)
2750                 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
2751 }
2752
2753 #ifdef CONFIG_ARPD
2754 void neigh_app_ns(struct neighbour *n)
2755 {
2756         __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
2757 }
2758 EXPORT_SYMBOL(neigh_app_ns);
2759 #endif /* CONFIG_ARPD */
2760
2761 #ifdef CONFIG_SYSCTL
2762
2763 static int proc_unres_qlen(ctl_table *ctl, int write, void __user *buffer,
2764                            size_t *lenp, loff_t *ppos)
2765 {
2766         int size, ret;
2767         ctl_table tmp = *ctl;
2768
2769         tmp.data = &size;
2770         size = DIV_ROUND_UP(*(int *)ctl->data, SKB_TRUESIZE(ETH_FRAME_LEN));
2771         ret = proc_dointvec(&tmp, write, buffer, lenp, ppos);
2772         if (write && !ret)
2773                 *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
2774         return ret;
2775 }
2776
2777 enum {
2778         NEIGH_VAR_MCAST_PROBE,
2779         NEIGH_VAR_UCAST_PROBE,
2780         NEIGH_VAR_APP_PROBE,
2781         NEIGH_VAR_RETRANS_TIME,
2782         NEIGH_VAR_BASE_REACHABLE_TIME,
2783         NEIGH_VAR_DELAY_PROBE_TIME,
2784         NEIGH_VAR_GC_STALETIME,
2785         NEIGH_VAR_QUEUE_LEN,
2786         NEIGH_VAR_QUEUE_LEN_BYTES,
2787         NEIGH_VAR_PROXY_QLEN,
2788         NEIGH_VAR_ANYCAST_DELAY,
2789         NEIGH_VAR_PROXY_DELAY,
2790         NEIGH_VAR_LOCKTIME,
2791         NEIGH_VAR_RETRANS_TIME_MS,
2792         NEIGH_VAR_BASE_REACHABLE_TIME_MS,
2793         NEIGH_VAR_GC_INTERVAL,
2794         NEIGH_VAR_GC_THRESH1,
2795         NEIGH_VAR_GC_THRESH2,
2796         NEIGH_VAR_GC_THRESH3,
2797         NEIGH_VAR_MAX
2798 };
2799
2800 static struct neigh_sysctl_table {
2801         struct ctl_table_header *sysctl_header;
2802         struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
2803         char *dev_name;
2804 } neigh_sysctl_template __read_mostly = {
2805         .neigh_vars = {
2806                 [NEIGH_VAR_MCAST_PROBE] = {
2807                         .procname       = "mcast_solicit",
2808                         .maxlen         = sizeof(int),
2809                         .mode           = 0644,
2810                         .proc_handler   = proc_dointvec,
2811                 },
2812                 [NEIGH_VAR_UCAST_PROBE] = {
2813                         .procname       = "ucast_solicit",
2814                         .maxlen         = sizeof(int),
2815                         .mode           = 0644,
2816                         .proc_handler   = proc_dointvec,
2817                 },
2818                 [NEIGH_VAR_APP_PROBE] = {
2819                         .procname       = "app_solicit",
2820                         .maxlen         = sizeof(int),
2821                         .mode           = 0644,
2822                         .proc_handler   = proc_dointvec,
2823                 },
2824                 [NEIGH_VAR_RETRANS_TIME] = {
2825                         .procname       = "retrans_time",
2826                         .maxlen         = sizeof(int),
2827                         .mode           = 0644,
2828                         .proc_handler   = proc_dointvec_userhz_jiffies,
2829                 },
2830                 [NEIGH_VAR_BASE_REACHABLE_TIME] = {
2831                         .procname       = "base_reachable_time",
2832                         .maxlen         = sizeof(int),
2833                         .mode           = 0644,
2834                         .proc_handler   = proc_dointvec_jiffies,
2835                 },
2836                 [NEIGH_VAR_DELAY_PROBE_TIME] = {
2837                         .procname       = "delay_first_probe_time",
2838                         .maxlen         = sizeof(int),
2839                         .mode           = 0644,
2840                         .proc_handler   = proc_dointvec_jiffies,
2841                 },
2842                 [NEIGH_VAR_GC_STALETIME] = {
2843                         .procname       = "gc_stale_time",
2844                         .maxlen         = sizeof(int),
2845                         .mode           = 0644,
2846                         .proc_handler   = proc_dointvec_jiffies,
2847                 },
2848                 [NEIGH_VAR_QUEUE_LEN] = {
2849                         .procname       = "unres_qlen",
2850                         .maxlen         = sizeof(int),
2851                         .mode           = 0644,
2852                         .proc_handler   = proc_unres_qlen,
2853                 },
2854                 [NEIGH_VAR_QUEUE_LEN_BYTES] = {
2855                         .procname       = "unres_qlen_bytes",
2856                         .maxlen         = sizeof(int),
2857                         .mode           = 0644,
2858                         .proc_handler   = proc_dointvec,
2859                 },
2860                 [NEIGH_VAR_PROXY_QLEN] = {
2861                         .procname       = "proxy_qlen",
2862                         .maxlen         = sizeof(int),
2863                         .mode           = 0644,
2864                         .proc_handler   = proc_dointvec,
2865                 },
2866                 [NEIGH_VAR_ANYCAST_DELAY] = {
2867                         .procname       = "anycast_delay",
2868                         .maxlen         = sizeof(int),
2869                         .mode           = 0644,
2870                         .proc_handler   = proc_dointvec_userhz_jiffies,
2871                 },
2872                 [NEIGH_VAR_PROXY_DELAY] = {
2873                         .procname       = "proxy_delay",
2874                         .maxlen         = sizeof(int),
2875                         .mode           = 0644,
2876                         .proc_handler   = proc_dointvec_userhz_jiffies,
2877                 },
2878                 [NEIGH_VAR_LOCKTIME] = {
2879                         .procname       = "locktime",
2880                         .maxlen         = sizeof(int),
2881                         .mode           = 0644,
2882                         .proc_handler   = proc_dointvec_userhz_jiffies,
2883                 },
2884                 [NEIGH_VAR_RETRANS_TIME_MS] = {
2885                         .procname       = "retrans_time_ms",
2886                         .maxlen         = sizeof(int),
2887                         .mode           = 0644,
2888                         .proc_handler   = proc_dointvec_ms_jiffies,
2889                 },
2890                 [NEIGH_VAR_BASE_REACHABLE_TIME_MS] = {
2891                         .procname       = "base_reachable_time_ms",
2892                         .maxlen         = sizeof(int),
2893                         .mode           = 0644,
2894                         .proc_handler   = proc_dointvec_ms_jiffies,
2895                 },
2896                 [NEIGH_VAR_GC_INTERVAL] = {
2897                         .procname       = "gc_interval",
2898                         .maxlen         = sizeof(int),
2899                         .mode           = 0644,
2900                         .proc_handler   = proc_dointvec_jiffies,
2901                 },
2902                 [NEIGH_VAR_GC_THRESH1] = {
2903                         .procname       = "gc_thresh1",
2904                         .maxlen         = sizeof(int),
2905                         .mode           = 0644,
2906                         .proc_handler   = proc_dointvec,
2907                 },
2908                 [NEIGH_VAR_GC_THRESH2] = {
2909                         .procname       = "gc_thresh2",
2910                         .maxlen         = sizeof(int),
2911                         .mode           = 0644,
2912                         .proc_handler   = proc_dointvec,
2913                 },
2914                 [NEIGH_VAR_GC_THRESH3] = {
2915                         .procname       = "gc_thresh3",
2916                         .maxlen         = sizeof(int),
2917                         .mode           = 0644,
2918                         .proc_handler   = proc_dointvec,
2919                 },
2920                 {},
2921         },
2922 };
2923
2924 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
2925                           char *p_name, proc_handler *handler)
2926 {
2927         struct neigh_sysctl_table *t;
2928         const char *dev_name_source = NULL;
2929
2930 #define NEIGH_CTL_PATH_ROOT     0
2931 #define NEIGH_CTL_PATH_PROTO    1
2932 #define NEIGH_CTL_PATH_NEIGH    2
2933 #define NEIGH_CTL_PATH_DEV      3
2934
2935         struct ctl_path neigh_path[] = {
2936                 { .procname = "net",     },
2937                 { .procname = "proto",   },
2938                 { .procname = "neigh",   },
2939                 { .procname = "default", },
2940                 { },
2941         };
2942
2943         t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
2944         if (!t)
2945                 goto err;
2946
2947         t->neigh_vars[NEIGH_VAR_MCAST_PROBE].data  = &p->mcast_probes;
2948         t->neigh_vars[NEIGH_VAR_UCAST_PROBE].data  = &p->ucast_probes;
2949         t->neigh_vars[NEIGH_VAR_APP_PROBE].data  = &p->app_probes;
2950         t->neigh_vars[NEIGH_VAR_RETRANS_TIME].data  = &p->retrans_time;
2951         t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].data  = &p->base_reachable_time;
2952         t->neigh_vars[NEIGH_VAR_DELAY_PROBE_TIME].data  = &p->delay_probe_time;
2953         t->neigh_vars[NEIGH_VAR_GC_STALETIME].data  = &p->gc_staletime;
2954         t->neigh_vars[NEIGH_VAR_QUEUE_LEN].data  = &p->queue_len_bytes;
2955         t->neigh_vars[NEIGH_VAR_QUEUE_LEN_BYTES].data  = &p->queue_len_bytes;
2956         t->neigh_vars[NEIGH_VAR_PROXY_QLEN].data  = &p->proxy_qlen;
2957         t->neigh_vars[NEIGH_VAR_ANYCAST_DELAY].data  = &p->anycast_delay;
2958         t->neigh_vars[NEIGH_VAR_PROXY_DELAY].data = &p->proxy_delay;
2959         t->neigh_vars[NEIGH_VAR_LOCKTIME].data = &p->locktime;
2960         t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].data  = &p->retrans_time;
2961         t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].data  = &p->base_reachable_time;
2962
2963         if (dev) {
2964                 dev_name_source = dev->name;
2965                 /* Terminate the table early */
2966                 memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
2967                        sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
2968         } else {
2969                 dev_name_source = neigh_path[NEIGH_CTL_PATH_DEV].procname;
2970                 t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = (int *)(p + 1);
2971                 t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = (int *)(p + 1) + 1;
2972                 t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = (int *)(p + 1) + 2;
2973                 t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = (int *)(p + 1) + 3;
2974         }
2975
2976
2977         if (handler) {
2978                 /* RetransTime */
2979                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
2980                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].extra1 = dev;
2981                 /* ReachableTime */
2982                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
2983                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].extra1 = dev;
2984                 /* RetransTime (in milliseconds)*/
2985                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
2986                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].extra1 = dev;
2987                 /* ReachableTime (in milliseconds) */
2988                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
2989                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].extra1 = dev;
2990         }
2991
2992         t->dev_name = kstrdup(dev_name_source, GFP_KERNEL);
2993         if (!t->dev_name)
2994                 goto free;
2995
2996         neigh_path[NEIGH_CTL_PATH_DEV].procname = t->dev_name;
2997         neigh_path[NEIGH_CTL_PATH_PROTO].procname = p_name;
2998
2999         t->sysctl_header =
3000                 register_net_sysctl_table(neigh_parms_net(p), neigh_path, t->neigh_vars);
3001         if (!t->sysctl_header)
3002                 goto free_procname;
3003
3004         p->sysctl_table = t;
3005         return 0;
3006
3007 free_procname:
3008         kfree(t->dev_name);
3009 free:
3010         kfree(t);
3011 err:
3012         return -ENOBUFS;
3013 }
3014 EXPORT_SYMBOL(neigh_sysctl_register);
3015
3016 void neigh_sysctl_unregister(struct neigh_parms *p)
3017 {
3018         if (p->sysctl_table) {
3019                 struct neigh_sysctl_table *t = p->sysctl_table;
3020                 p->sysctl_table = NULL;
3021                 unregister_sysctl_table(t->sysctl_header);
3022                 kfree(t->dev_name);
3023                 kfree(t);
3024         }
3025 }
3026 EXPORT_SYMBOL(neigh_sysctl_unregister);
3027
3028 #endif  /* CONFIG_SYSCTL */
3029
3030 static int __init neigh_init(void)
3031 {
3032         rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, NULL);
3033         rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, NULL);
3034         rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, NULL);
3035
3036         rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
3037                       NULL);
3038         rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, NULL);
3039
3040         return 0;
3041 }
3042
3043 subsys_initcall(neigh_init);
3044