]> git.karo-electronics.de Git - linux-beck.git/blob - net/ipv4/devinet.c
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
[linux-beck.git] / net / ipv4 / devinet.c
1 /*
2  *      NET3    IP device support routines.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  *      Derived from the IP parts of dev.c 1.0.19
10  *              Authors:        Ross Biro
11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *      Additional Authors:
15  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *      Changes:
19  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
20  *                                      lists.
21  *              Cyrus Durgin:           updated for kmod
22  *              Matthias Andree:        in devinet_ioctl, compare label and
23  *                                      address (4.4BSD alias style support),
24  *                                      fall back to comparing just the label
25  *                                      if no match found.
26  */
27
28
29 #include <asm/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/string.h>
36 #include <linux/mm.h>
37 #include <linux/socket.h>
38 #include <linux/sockios.h>
39 #include <linux/in.h>
40 #include <linux/errno.h>
41 #include <linux/interrupt.h>
42 #include <linux/if_addr.h>
43 #include <linux/if_ether.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/etherdevice.h>
47 #include <linux/skbuff.h>
48 #include <linux/init.h>
49 #include <linux/notifier.h>
50 #include <linux/inetdevice.h>
51 #include <linux/igmp.h>
52 #include <linux/slab.h>
53 #include <linux/hash.h>
54 #ifdef CONFIG_SYSCTL
55 #include <linux/sysctl.h>
56 #endif
57 #include <linux/kmod.h>
58 #include <linux/netconf.h>
59
60 #include <net/arp.h>
61 #include <net/ip.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 #include <net/net_namespace.h>
66 #include <net/addrconf.h>
67
68 #include "fib_lookup.h"
69
70 static struct ipv4_devconf ipv4_devconf = {
71         .data = {
72                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76         },
77 };
78
79 static struct ipv4_devconf ipv4_devconf_dflt = {
80         .data = {
81                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
82                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
83                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
84                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
85                 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
86         },
87 };
88
89 #define IPV4_DEVCONF_DFLT(net, attr) \
90         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
91
92 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
93         [IFA_LOCAL]             = { .type = NLA_U32 },
94         [IFA_ADDRESS]           = { .type = NLA_U32 },
95         [IFA_BROADCAST]         = { .type = NLA_U32 },
96         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
97         [IFA_CACHEINFO]         = { .len = sizeof(struct ifa_cacheinfo) },
98 };
99
100 #define IN4_ADDR_HSIZE_SHIFT    8
101 #define IN4_ADDR_HSIZE          (1U << IN4_ADDR_HSIZE_SHIFT)
102
103 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
104 static DEFINE_SPINLOCK(inet_addr_hash_lock);
105
106 static u32 inet_addr_hash(struct net *net, __be32 addr)
107 {
108         u32 val = (__force u32) addr ^ net_hash_mix(net);
109
110         return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
111 }
112
113 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
114 {
115         u32 hash = inet_addr_hash(net, ifa->ifa_local);
116
117         spin_lock(&inet_addr_hash_lock);
118         hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
119         spin_unlock(&inet_addr_hash_lock);
120 }
121
122 static void inet_hash_remove(struct in_ifaddr *ifa)
123 {
124         spin_lock(&inet_addr_hash_lock);
125         hlist_del_init_rcu(&ifa->hash);
126         spin_unlock(&inet_addr_hash_lock);
127 }
128
129 /**
130  * __ip_dev_find - find the first device with a given source address.
131  * @net: the net namespace
132  * @addr: the source address
133  * @devref: if true, take a reference on the found device
134  *
135  * If a caller uses devref=false, it should be protected by RCU, or RTNL
136  */
137 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
138 {
139         u32 hash = inet_addr_hash(net, addr);
140         struct net_device *result = NULL;
141         struct in_ifaddr *ifa;
142
143         rcu_read_lock();
144         hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) {
145                 if (ifa->ifa_local == addr) {
146                         struct net_device *dev = ifa->ifa_dev->dev;
147
148                         if (!net_eq(dev_net(dev), net))
149                                 continue;
150                         result = dev;
151                         break;
152                 }
153         }
154         if (!result) {
155                 struct flowi4 fl4 = { .daddr = addr };
156                 struct fib_result res = { 0 };
157                 struct fib_table *local;
158
159                 /* Fallback to FIB local table so that communication
160                  * over loopback subnets work.
161                  */
162                 local = fib_get_table(net, RT_TABLE_LOCAL);
163                 if (local &&
164                     !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
165                     res.type == RTN_LOCAL)
166                         result = FIB_RES_DEV(res);
167         }
168         if (result && devref)
169                 dev_hold(result);
170         rcu_read_unlock();
171         return result;
172 }
173 EXPORT_SYMBOL(__ip_dev_find);
174
175 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
176
177 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
178 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
179                          int destroy);
180 #ifdef CONFIG_SYSCTL
181 static void devinet_sysctl_register(struct in_device *idev);
182 static void devinet_sysctl_unregister(struct in_device *idev);
183 #else
184 static void devinet_sysctl_register(struct in_device *idev)
185 {
186 }
187 static void devinet_sysctl_unregister(struct in_device *idev)
188 {
189 }
190 #endif
191
192 /* Locks all the inet devices. */
193
194 static struct in_ifaddr *inet_alloc_ifa(void)
195 {
196         return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
197 }
198
199 static void inet_rcu_free_ifa(struct rcu_head *head)
200 {
201         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
202         if (ifa->ifa_dev)
203                 in_dev_put(ifa->ifa_dev);
204         kfree(ifa);
205 }
206
207 static void inet_free_ifa(struct in_ifaddr *ifa)
208 {
209         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
210 }
211
212 void in_dev_finish_destroy(struct in_device *idev)
213 {
214         struct net_device *dev = idev->dev;
215
216         WARN_ON(idev->ifa_list);
217         WARN_ON(idev->mc_list);
218 #ifdef NET_REFCNT_DEBUG
219         pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
220 #endif
221         dev_put(dev);
222         if (!idev->dead)
223                 pr_err("Freeing alive in_device %p\n", idev);
224         else
225                 kfree(idev);
226 }
227 EXPORT_SYMBOL(in_dev_finish_destroy);
228
229 static struct in_device *inetdev_init(struct net_device *dev)
230 {
231         struct in_device *in_dev;
232
233         ASSERT_RTNL();
234
235         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
236         if (!in_dev)
237                 goto out;
238         memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
239                         sizeof(in_dev->cnf));
240         in_dev->cnf.sysctl = NULL;
241         in_dev->dev = dev;
242         in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
243         if (!in_dev->arp_parms)
244                 goto out_kfree;
245         if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
246                 dev_disable_lro(dev);
247         /* Reference in_dev->dev */
248         dev_hold(dev);
249         /* Account for reference dev->ip_ptr (below) */
250         in_dev_hold(in_dev);
251
252         devinet_sysctl_register(in_dev);
253         ip_mc_init_dev(in_dev);
254         if (dev->flags & IFF_UP)
255                 ip_mc_up(in_dev);
256
257         /* we can receive as soon as ip_ptr is set -- do this last */
258         rcu_assign_pointer(dev->ip_ptr, in_dev);
259 out:
260         return in_dev;
261 out_kfree:
262         kfree(in_dev);
263         in_dev = NULL;
264         goto out;
265 }
266
267 static void in_dev_rcu_put(struct rcu_head *head)
268 {
269         struct in_device *idev = container_of(head, struct in_device, rcu_head);
270         in_dev_put(idev);
271 }
272
273 static void inetdev_destroy(struct in_device *in_dev)
274 {
275         struct in_ifaddr *ifa;
276         struct net_device *dev;
277
278         ASSERT_RTNL();
279
280         dev = in_dev->dev;
281
282         in_dev->dead = 1;
283
284         ip_mc_destroy_dev(in_dev);
285
286         while ((ifa = in_dev->ifa_list) != NULL) {
287                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
288                 inet_free_ifa(ifa);
289         }
290
291         RCU_INIT_POINTER(dev->ip_ptr, NULL);
292
293         devinet_sysctl_unregister(in_dev);
294         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
295         arp_ifdown(dev);
296
297         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
298 }
299
300 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
301 {
302         rcu_read_lock();
303         for_primary_ifa(in_dev) {
304                 if (inet_ifa_match(a, ifa)) {
305                         if (!b || inet_ifa_match(b, ifa)) {
306                                 rcu_read_unlock();
307                                 return 1;
308                         }
309                 }
310         } endfor_ifa(in_dev);
311         rcu_read_unlock();
312         return 0;
313 }
314
315 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
316                          int destroy, struct nlmsghdr *nlh, u32 portid)
317 {
318         struct in_ifaddr *promote = NULL;
319         struct in_ifaddr *ifa, *ifa1 = *ifap;
320         struct in_ifaddr *last_prim = in_dev->ifa_list;
321         struct in_ifaddr *prev_prom = NULL;
322         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
323
324         ASSERT_RTNL();
325
326         /* 1. Deleting primary ifaddr forces deletion all secondaries
327          * unless alias promotion is set
328          **/
329
330         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
331                 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
332
333                 while ((ifa = *ifap1) != NULL) {
334                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
335                             ifa1->ifa_scope <= ifa->ifa_scope)
336                                 last_prim = ifa;
337
338                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
339                             ifa1->ifa_mask != ifa->ifa_mask ||
340                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
341                                 ifap1 = &ifa->ifa_next;
342                                 prev_prom = ifa;
343                                 continue;
344                         }
345
346                         if (!do_promote) {
347                                 inet_hash_remove(ifa);
348                                 *ifap1 = ifa->ifa_next;
349
350                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
351                                 blocking_notifier_call_chain(&inetaddr_chain,
352                                                 NETDEV_DOWN, ifa);
353                                 inet_free_ifa(ifa);
354                         } else {
355                                 promote = ifa;
356                                 break;
357                         }
358                 }
359         }
360
361         /* On promotion all secondaries from subnet are changing
362          * the primary IP, we must remove all their routes silently
363          * and later to add them back with new prefsrc. Do this
364          * while all addresses are on the device list.
365          */
366         for (ifa = promote; ifa; ifa = ifa->ifa_next) {
367                 if (ifa1->ifa_mask == ifa->ifa_mask &&
368                     inet_ifa_match(ifa1->ifa_address, ifa))
369                         fib_del_ifaddr(ifa, ifa1);
370         }
371
372         /* 2. Unlink it */
373
374         *ifap = ifa1->ifa_next;
375         inet_hash_remove(ifa1);
376
377         /* 3. Announce address deletion */
378
379         /* Send message first, then call notifier.
380            At first sight, FIB update triggered by notifier
381            will refer to already deleted ifaddr, that could confuse
382            netlink listeners. It is not true: look, gated sees
383            that route deleted and if it still thinks that ifaddr
384            is valid, it will try to restore deleted routes... Grr.
385            So that, this order is correct.
386          */
387         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
388         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
389
390         if (promote) {
391                 struct in_ifaddr *next_sec = promote->ifa_next;
392
393                 if (prev_prom) {
394                         prev_prom->ifa_next = promote->ifa_next;
395                         promote->ifa_next = last_prim->ifa_next;
396                         last_prim->ifa_next = promote;
397                 }
398
399                 promote->ifa_flags &= ~IFA_F_SECONDARY;
400                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
401                 blocking_notifier_call_chain(&inetaddr_chain,
402                                 NETDEV_UP, promote);
403                 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
404                         if (ifa1->ifa_mask != ifa->ifa_mask ||
405                             !inet_ifa_match(ifa1->ifa_address, ifa))
406                                         continue;
407                         fib_add_ifaddr(ifa);
408                 }
409
410         }
411         if (destroy)
412                 inet_free_ifa(ifa1);
413 }
414
415 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
416                          int destroy)
417 {
418         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
419 }
420
421 static void check_lifetime(struct work_struct *work);
422
423 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
424
425 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
426                              u32 portid)
427 {
428         struct in_device *in_dev = ifa->ifa_dev;
429         struct in_ifaddr *ifa1, **ifap, **last_primary;
430
431         ASSERT_RTNL();
432
433         if (!ifa->ifa_local) {
434                 inet_free_ifa(ifa);
435                 return 0;
436         }
437
438         ifa->ifa_flags &= ~IFA_F_SECONDARY;
439         last_primary = &in_dev->ifa_list;
440
441         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
442              ifap = &ifa1->ifa_next) {
443                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
444                     ifa->ifa_scope <= ifa1->ifa_scope)
445                         last_primary = &ifa1->ifa_next;
446                 if (ifa1->ifa_mask == ifa->ifa_mask &&
447                     inet_ifa_match(ifa1->ifa_address, ifa)) {
448                         if (ifa1->ifa_local == ifa->ifa_local) {
449                                 inet_free_ifa(ifa);
450                                 return -EEXIST;
451                         }
452                         if (ifa1->ifa_scope != ifa->ifa_scope) {
453                                 inet_free_ifa(ifa);
454                                 return -EINVAL;
455                         }
456                         ifa->ifa_flags |= IFA_F_SECONDARY;
457                 }
458         }
459
460         if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
461                 net_srandom(ifa->ifa_local);
462                 ifap = last_primary;
463         }
464
465         ifa->ifa_next = *ifap;
466         *ifap = ifa;
467
468         inet_hash_insert(dev_net(in_dev->dev), ifa);
469
470         cancel_delayed_work(&check_lifetime_work);
471         schedule_delayed_work(&check_lifetime_work, 0);
472
473         /* Send message first, then call notifier.
474            Notifier will trigger FIB update, so that
475            listeners of netlink will know about new ifaddr */
476         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
477         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
478
479         return 0;
480 }
481
482 static int inet_insert_ifa(struct in_ifaddr *ifa)
483 {
484         return __inet_insert_ifa(ifa, NULL, 0);
485 }
486
487 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
488 {
489         struct in_device *in_dev = __in_dev_get_rtnl(dev);
490
491         ASSERT_RTNL();
492
493         if (!in_dev) {
494                 inet_free_ifa(ifa);
495                 return -ENOBUFS;
496         }
497         ipv4_devconf_setall(in_dev);
498         if (ifa->ifa_dev != in_dev) {
499                 WARN_ON(ifa->ifa_dev);
500                 in_dev_hold(in_dev);
501                 ifa->ifa_dev = in_dev;
502         }
503         if (ipv4_is_loopback(ifa->ifa_local))
504                 ifa->ifa_scope = RT_SCOPE_HOST;
505         return inet_insert_ifa(ifa);
506 }
507
508 /* Caller must hold RCU or RTNL :
509  * We dont take a reference on found in_device
510  */
511 struct in_device *inetdev_by_index(struct net *net, int ifindex)
512 {
513         struct net_device *dev;
514         struct in_device *in_dev = NULL;
515
516         rcu_read_lock();
517         dev = dev_get_by_index_rcu(net, ifindex);
518         if (dev)
519                 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
520         rcu_read_unlock();
521         return in_dev;
522 }
523 EXPORT_SYMBOL(inetdev_by_index);
524
525 /* Called only from RTNL semaphored context. No locks. */
526
527 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
528                                     __be32 mask)
529 {
530         ASSERT_RTNL();
531
532         for_primary_ifa(in_dev) {
533                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
534                         return ifa;
535         } endfor_ifa(in_dev);
536         return NULL;
537 }
538
539 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
540 {
541         struct net *net = sock_net(skb->sk);
542         struct nlattr *tb[IFA_MAX+1];
543         struct in_device *in_dev;
544         struct ifaddrmsg *ifm;
545         struct in_ifaddr *ifa, **ifap;
546         int err = -EINVAL;
547
548         ASSERT_RTNL();
549
550         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
551         if (err < 0)
552                 goto errout;
553
554         ifm = nlmsg_data(nlh);
555         in_dev = inetdev_by_index(net, ifm->ifa_index);
556         if (in_dev == NULL) {
557                 err = -ENODEV;
558                 goto errout;
559         }
560
561         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
562              ifap = &ifa->ifa_next) {
563                 if (tb[IFA_LOCAL] &&
564                     ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
565                         continue;
566
567                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
568                         continue;
569
570                 if (tb[IFA_ADDRESS] &&
571                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
572                     !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
573                         continue;
574
575                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
576                 return 0;
577         }
578
579         err = -EADDRNOTAVAIL;
580 errout:
581         return err;
582 }
583
584 #define INFINITY_LIFE_TIME      0xFFFFFFFF
585
586 static void check_lifetime(struct work_struct *work)
587 {
588         unsigned long now, next, next_sec, next_sched;
589         struct in_ifaddr *ifa;
590         int i;
591
592         now = jiffies;
593         next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
594
595         rcu_read_lock();
596         for (i = 0; i < IN4_ADDR_HSIZE; i++) {
597                 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
598                         unsigned long age;
599
600                         if (ifa->ifa_flags & IFA_F_PERMANENT)
601                                 continue;
602
603                         /* We try to batch several events at once. */
604                         age = (now - ifa->ifa_tstamp +
605                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
606
607                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
608                             age >= ifa->ifa_valid_lft) {
609                                 struct in_ifaddr **ifap ;
610
611                                 rtnl_lock();
612                                 for (ifap = &ifa->ifa_dev->ifa_list;
613                                      *ifap != NULL; ifap = &ifa->ifa_next) {
614                                         if (*ifap == ifa)
615                                                 inet_del_ifa(ifa->ifa_dev,
616                                                              ifap, 1);
617                                 }
618                                 rtnl_unlock();
619                         } else if (ifa->ifa_preferred_lft ==
620                                    INFINITY_LIFE_TIME) {
621                                 continue;
622                         } else if (age >= ifa->ifa_preferred_lft) {
623                                 if (time_before(ifa->ifa_tstamp +
624                                                 ifa->ifa_valid_lft * HZ, next))
625                                         next = ifa->ifa_tstamp +
626                                                ifa->ifa_valid_lft * HZ;
627
628                                 if (!(ifa->ifa_flags & IFA_F_DEPRECATED)) {
629                                         ifa->ifa_flags |= IFA_F_DEPRECATED;
630                                         rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
631                                 }
632                         } else if (time_before(ifa->ifa_tstamp +
633                                                ifa->ifa_preferred_lft * HZ,
634                                                next)) {
635                                 next = ifa->ifa_tstamp +
636                                        ifa->ifa_preferred_lft * HZ;
637                         }
638                 }
639         }
640         rcu_read_unlock();
641
642         next_sec = round_jiffies_up(next);
643         next_sched = next;
644
645         /* If rounded timeout is accurate enough, accept it. */
646         if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
647                 next_sched = next_sec;
648
649         now = jiffies;
650         /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
651         if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
652                 next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
653
654         schedule_delayed_work(&check_lifetime_work, next_sched - now);
655 }
656
657 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
658                              __u32 prefered_lft)
659 {
660         unsigned long timeout;
661
662         ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
663
664         timeout = addrconf_timeout_fixup(valid_lft, HZ);
665         if (addrconf_finite_timeout(timeout))
666                 ifa->ifa_valid_lft = timeout;
667         else
668                 ifa->ifa_flags |= IFA_F_PERMANENT;
669
670         timeout = addrconf_timeout_fixup(prefered_lft, HZ);
671         if (addrconf_finite_timeout(timeout)) {
672                 if (timeout == 0)
673                         ifa->ifa_flags |= IFA_F_DEPRECATED;
674                 ifa->ifa_preferred_lft = timeout;
675         }
676         ifa->ifa_tstamp = jiffies;
677         if (!ifa->ifa_cstamp)
678                 ifa->ifa_cstamp = ifa->ifa_tstamp;
679 }
680
681 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
682                                        __u32 *pvalid_lft, __u32 *pprefered_lft)
683 {
684         struct nlattr *tb[IFA_MAX+1];
685         struct in_ifaddr *ifa;
686         struct ifaddrmsg *ifm;
687         struct net_device *dev;
688         struct in_device *in_dev;
689         int err;
690
691         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
692         if (err < 0)
693                 goto errout;
694
695         ifm = nlmsg_data(nlh);
696         err = -EINVAL;
697         if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
698                 goto errout;
699
700         dev = __dev_get_by_index(net, ifm->ifa_index);
701         err = -ENODEV;
702         if (dev == NULL)
703                 goto errout;
704
705         in_dev = __in_dev_get_rtnl(dev);
706         err = -ENOBUFS;
707         if (in_dev == NULL)
708                 goto errout;
709
710         ifa = inet_alloc_ifa();
711         if (ifa == NULL)
712                 /*
713                  * A potential indev allocation can be left alive, it stays
714                  * assigned to its device and is destroy with it.
715                  */
716                 goto errout;
717
718         ipv4_devconf_setall(in_dev);
719         in_dev_hold(in_dev);
720
721         if (tb[IFA_ADDRESS] == NULL)
722                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
723
724         INIT_HLIST_NODE(&ifa->hash);
725         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
726         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
727         ifa->ifa_flags = ifm->ifa_flags;
728         ifa->ifa_scope = ifm->ifa_scope;
729         ifa->ifa_dev = in_dev;
730
731         ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
732         ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
733
734         if (tb[IFA_BROADCAST])
735                 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
736
737         if (tb[IFA_LABEL])
738                 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
739         else
740                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
741
742         if (tb[IFA_CACHEINFO]) {
743                 struct ifa_cacheinfo *ci;
744
745                 ci = nla_data(tb[IFA_CACHEINFO]);
746                 if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
747                         err = -EINVAL;
748                         goto errout;
749                 }
750                 *pvalid_lft = ci->ifa_valid;
751                 *pprefered_lft = ci->ifa_prefered;
752         }
753
754         return ifa;
755
756 errout:
757         return ERR_PTR(err);
758 }
759
760 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
761 {
762         struct in_device *in_dev = ifa->ifa_dev;
763         struct in_ifaddr *ifa1, **ifap;
764
765         if (!ifa->ifa_local)
766                 return NULL;
767
768         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
769              ifap = &ifa1->ifa_next) {
770                 if (ifa1->ifa_mask == ifa->ifa_mask &&
771                     inet_ifa_match(ifa1->ifa_address, ifa) &&
772                     ifa1->ifa_local == ifa->ifa_local)
773                         return ifa1;
774         }
775         return NULL;
776 }
777
778 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
779 {
780         struct net *net = sock_net(skb->sk);
781         struct in_ifaddr *ifa;
782         struct in_ifaddr *ifa_existing;
783         __u32 valid_lft = INFINITY_LIFE_TIME;
784         __u32 prefered_lft = INFINITY_LIFE_TIME;
785
786         ASSERT_RTNL();
787
788         ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
789         if (IS_ERR(ifa))
790                 return PTR_ERR(ifa);
791
792         ifa_existing = find_matching_ifa(ifa);
793         if (!ifa_existing) {
794                 /* It would be best to check for !NLM_F_CREATE here but
795                  * userspace alreay relies on not having to provide this.
796                  */
797                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
798                 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
799         } else {
800                 inet_free_ifa(ifa);
801
802                 if (nlh->nlmsg_flags & NLM_F_EXCL ||
803                     !(nlh->nlmsg_flags & NLM_F_REPLACE))
804                         return -EEXIST;
805                 ifa = ifa_existing;
806                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
807                 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
808                 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
809         }
810         return 0;
811 }
812
813 /*
814  *      Determine a default network mask, based on the IP address.
815  */
816
817 static int inet_abc_len(__be32 addr)
818 {
819         int rc = -1;    /* Something else, probably a multicast. */
820
821         if (ipv4_is_zeronet(addr))
822                 rc = 0;
823         else {
824                 __u32 haddr = ntohl(addr);
825
826                 if (IN_CLASSA(haddr))
827                         rc = 8;
828                 else if (IN_CLASSB(haddr))
829                         rc = 16;
830                 else if (IN_CLASSC(haddr))
831                         rc = 24;
832         }
833
834         return rc;
835 }
836
837
838 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
839 {
840         struct ifreq ifr;
841         struct sockaddr_in sin_orig;
842         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
843         struct in_device *in_dev;
844         struct in_ifaddr **ifap = NULL;
845         struct in_ifaddr *ifa = NULL;
846         struct net_device *dev;
847         char *colon;
848         int ret = -EFAULT;
849         int tryaddrmatch = 0;
850
851         /*
852          *      Fetch the caller's info block into kernel space
853          */
854
855         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
856                 goto out;
857         ifr.ifr_name[IFNAMSIZ - 1] = 0;
858
859         /* save original address for comparison */
860         memcpy(&sin_orig, sin, sizeof(*sin));
861
862         colon = strchr(ifr.ifr_name, ':');
863         if (colon)
864                 *colon = 0;
865
866         dev_load(net, ifr.ifr_name);
867
868         switch (cmd) {
869         case SIOCGIFADDR:       /* Get interface address */
870         case SIOCGIFBRDADDR:    /* Get the broadcast address */
871         case SIOCGIFDSTADDR:    /* Get the destination address */
872         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
873                 /* Note that these ioctls will not sleep,
874                    so that we do not impose a lock.
875                    One day we will be forced to put shlock here (I mean SMP)
876                  */
877                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
878                 memset(sin, 0, sizeof(*sin));
879                 sin->sin_family = AF_INET;
880                 break;
881
882         case SIOCSIFFLAGS:
883                 ret = -EPERM;
884                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
885                         goto out;
886                 break;
887         case SIOCSIFADDR:       /* Set interface address (and family) */
888         case SIOCSIFBRDADDR:    /* Set the broadcast address */
889         case SIOCSIFDSTADDR:    /* Set the destination address */
890         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
891                 ret = -EPERM;
892                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
893                         goto out;
894                 ret = -EINVAL;
895                 if (sin->sin_family != AF_INET)
896                         goto out;
897                 break;
898         default:
899                 ret = -EINVAL;
900                 goto out;
901         }
902
903         rtnl_lock();
904
905         ret = -ENODEV;
906         dev = __dev_get_by_name(net, ifr.ifr_name);
907         if (!dev)
908                 goto done;
909
910         if (colon)
911                 *colon = ':';
912
913         in_dev = __in_dev_get_rtnl(dev);
914         if (in_dev) {
915                 if (tryaddrmatch) {
916                         /* Matthias Andree */
917                         /* compare label and address (4.4BSD style) */
918                         /* note: we only do this for a limited set of ioctls
919                            and only if the original address family was AF_INET.
920                            This is checked above. */
921                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
922                              ifap = &ifa->ifa_next) {
923                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
924                                     sin_orig.sin_addr.s_addr ==
925                                                         ifa->ifa_local) {
926                                         break; /* found */
927                                 }
928                         }
929                 }
930                 /* we didn't get a match, maybe the application is
931                    4.3BSD-style and passed in junk so we fall back to
932                    comparing just the label */
933                 if (!ifa) {
934                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
935                              ifap = &ifa->ifa_next)
936                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
937                                         break;
938                 }
939         }
940
941         ret = -EADDRNOTAVAIL;
942         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
943                 goto done;
944
945         switch (cmd) {
946         case SIOCGIFADDR:       /* Get interface address */
947                 sin->sin_addr.s_addr = ifa->ifa_local;
948                 goto rarok;
949
950         case SIOCGIFBRDADDR:    /* Get the broadcast address */
951                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
952                 goto rarok;
953
954         case SIOCGIFDSTADDR:    /* Get the destination address */
955                 sin->sin_addr.s_addr = ifa->ifa_address;
956                 goto rarok;
957
958         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
959                 sin->sin_addr.s_addr = ifa->ifa_mask;
960                 goto rarok;
961
962         case SIOCSIFFLAGS:
963                 if (colon) {
964                         ret = -EADDRNOTAVAIL;
965                         if (!ifa)
966                                 break;
967                         ret = 0;
968                         if (!(ifr.ifr_flags & IFF_UP))
969                                 inet_del_ifa(in_dev, ifap, 1);
970                         break;
971                 }
972                 ret = dev_change_flags(dev, ifr.ifr_flags);
973                 break;
974
975         case SIOCSIFADDR:       /* Set interface address (and family) */
976                 ret = -EINVAL;
977                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
978                         break;
979
980                 if (!ifa) {
981                         ret = -ENOBUFS;
982                         ifa = inet_alloc_ifa();
983                         if (!ifa)
984                                 break;
985                         INIT_HLIST_NODE(&ifa->hash);
986                         if (colon)
987                                 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
988                         else
989                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
990                 } else {
991                         ret = 0;
992                         if (ifa->ifa_local == sin->sin_addr.s_addr)
993                                 break;
994                         inet_del_ifa(in_dev, ifap, 0);
995                         ifa->ifa_broadcast = 0;
996                         ifa->ifa_scope = 0;
997                 }
998
999                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1000
1001                 if (!(dev->flags & IFF_POINTOPOINT)) {
1002                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1003                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1004                         if ((dev->flags & IFF_BROADCAST) &&
1005                             ifa->ifa_prefixlen < 31)
1006                                 ifa->ifa_broadcast = ifa->ifa_address |
1007                                                      ~ifa->ifa_mask;
1008                 } else {
1009                         ifa->ifa_prefixlen = 32;
1010                         ifa->ifa_mask = inet_make_mask(32);
1011                 }
1012                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1013                 ret = inet_set_ifa(dev, ifa);
1014                 break;
1015
1016         case SIOCSIFBRDADDR:    /* Set the broadcast address */
1017                 ret = 0;
1018                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1019                         inet_del_ifa(in_dev, ifap, 0);
1020                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
1021                         inet_insert_ifa(ifa);
1022                 }
1023                 break;
1024
1025         case SIOCSIFDSTADDR:    /* Set the destination address */
1026                 ret = 0;
1027                 if (ifa->ifa_address == sin->sin_addr.s_addr)
1028                         break;
1029                 ret = -EINVAL;
1030                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1031                         break;
1032                 ret = 0;
1033                 inet_del_ifa(in_dev, ifap, 0);
1034                 ifa->ifa_address = sin->sin_addr.s_addr;
1035                 inet_insert_ifa(ifa);
1036                 break;
1037
1038         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1039
1040                 /*
1041                  *      The mask we set must be legal.
1042                  */
1043                 ret = -EINVAL;
1044                 if (bad_mask(sin->sin_addr.s_addr, 0))
1045                         break;
1046                 ret = 0;
1047                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1048                         __be32 old_mask = ifa->ifa_mask;
1049                         inet_del_ifa(in_dev, ifap, 0);
1050                         ifa->ifa_mask = sin->sin_addr.s_addr;
1051                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1052
1053                         /* See if current broadcast address matches
1054                          * with current netmask, then recalculate
1055                          * the broadcast address. Otherwise it's a
1056                          * funny address, so don't touch it since
1057                          * the user seems to know what (s)he's doing...
1058                          */
1059                         if ((dev->flags & IFF_BROADCAST) &&
1060                             (ifa->ifa_prefixlen < 31) &&
1061                             (ifa->ifa_broadcast ==
1062                              (ifa->ifa_local|~old_mask))) {
1063                                 ifa->ifa_broadcast = (ifa->ifa_local |
1064                                                       ~sin->sin_addr.s_addr);
1065                         }
1066                         inet_insert_ifa(ifa);
1067                 }
1068                 break;
1069         }
1070 done:
1071         rtnl_unlock();
1072 out:
1073         return ret;
1074 rarok:
1075         rtnl_unlock();
1076         ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
1077         goto out;
1078 }
1079
1080 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1081 {
1082         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1083         struct in_ifaddr *ifa;
1084         struct ifreq ifr;
1085         int done = 0;
1086
1087         if (!in_dev)
1088                 goto out;
1089
1090         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1091                 if (!buf) {
1092                         done += sizeof(ifr);
1093                         continue;
1094                 }
1095                 if (len < (int) sizeof(ifr))
1096                         break;
1097                 memset(&ifr, 0, sizeof(struct ifreq));
1098                 if (ifa->ifa_label)
1099                         strcpy(ifr.ifr_name, ifa->ifa_label);
1100                 else
1101                         strcpy(ifr.ifr_name, dev->name);
1102
1103                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1104                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1105                                                                 ifa->ifa_local;
1106
1107                 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
1108                         done = -EFAULT;
1109                         break;
1110                 }
1111                 buf  += sizeof(struct ifreq);
1112                 len  -= sizeof(struct ifreq);
1113                 done += sizeof(struct ifreq);
1114         }
1115 out:
1116         return done;
1117 }
1118
1119 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1120 {
1121         __be32 addr = 0;
1122         struct in_device *in_dev;
1123         struct net *net = dev_net(dev);
1124
1125         rcu_read_lock();
1126         in_dev = __in_dev_get_rcu(dev);
1127         if (!in_dev)
1128                 goto no_in_dev;
1129
1130         for_primary_ifa(in_dev) {
1131                 if (ifa->ifa_scope > scope)
1132                         continue;
1133                 if (!dst || inet_ifa_match(dst, ifa)) {
1134                         addr = ifa->ifa_local;
1135                         break;
1136                 }
1137                 if (!addr)
1138                         addr = ifa->ifa_local;
1139         } endfor_ifa(in_dev);
1140
1141         if (addr)
1142                 goto out_unlock;
1143 no_in_dev:
1144
1145         /* Not loopback addresses on loopback should be preferred
1146            in this case. It is importnat that lo is the first interface
1147            in dev_base list.
1148          */
1149         for_each_netdev_rcu(net, dev) {
1150                 in_dev = __in_dev_get_rcu(dev);
1151                 if (!in_dev)
1152                         continue;
1153
1154                 for_primary_ifa(in_dev) {
1155                         if (ifa->ifa_scope != RT_SCOPE_LINK &&
1156                             ifa->ifa_scope <= scope) {
1157                                 addr = ifa->ifa_local;
1158                                 goto out_unlock;
1159                         }
1160                 } endfor_ifa(in_dev);
1161         }
1162 out_unlock:
1163         rcu_read_unlock();
1164         return addr;
1165 }
1166 EXPORT_SYMBOL(inet_select_addr);
1167
1168 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1169                               __be32 local, int scope)
1170 {
1171         int same = 0;
1172         __be32 addr = 0;
1173
1174         for_ifa(in_dev) {
1175                 if (!addr &&
1176                     (local == ifa->ifa_local || !local) &&
1177                     ifa->ifa_scope <= scope) {
1178                         addr = ifa->ifa_local;
1179                         if (same)
1180                                 break;
1181                 }
1182                 if (!same) {
1183                         same = (!local || inet_ifa_match(local, ifa)) &&
1184                                 (!dst || inet_ifa_match(dst, ifa));
1185                         if (same && addr) {
1186                                 if (local || !dst)
1187                                         break;
1188                                 /* Is the selected addr into dst subnet? */
1189                                 if (inet_ifa_match(addr, ifa))
1190                                         break;
1191                                 /* No, then can we use new local src? */
1192                                 if (ifa->ifa_scope <= scope) {
1193                                         addr = ifa->ifa_local;
1194                                         break;
1195                                 }
1196                                 /* search for large dst subnet for addr */
1197                                 same = 0;
1198                         }
1199                 }
1200         } endfor_ifa(in_dev);
1201
1202         return same ? addr : 0;
1203 }
1204
1205 /*
1206  * Confirm that local IP address exists using wildcards:
1207  * - in_dev: only on this interface, 0=any interface
1208  * - dst: only in the same subnet as dst, 0=any dst
1209  * - local: address, 0=autoselect the local address
1210  * - scope: maximum allowed scope value for the local address
1211  */
1212 __be32 inet_confirm_addr(struct in_device *in_dev,
1213                          __be32 dst, __be32 local, int scope)
1214 {
1215         __be32 addr = 0;
1216         struct net_device *dev;
1217         struct net *net;
1218
1219         if (scope != RT_SCOPE_LINK)
1220                 return confirm_addr_indev(in_dev, dst, local, scope);
1221
1222         net = dev_net(in_dev->dev);
1223         rcu_read_lock();
1224         for_each_netdev_rcu(net, dev) {
1225                 in_dev = __in_dev_get_rcu(dev);
1226                 if (in_dev) {
1227                         addr = confirm_addr_indev(in_dev, dst, local, scope);
1228                         if (addr)
1229                                 break;
1230                 }
1231         }
1232         rcu_read_unlock();
1233
1234         return addr;
1235 }
1236 EXPORT_SYMBOL(inet_confirm_addr);
1237
1238 /*
1239  *      Device notifier
1240  */
1241
1242 int register_inetaddr_notifier(struct notifier_block *nb)
1243 {
1244         return blocking_notifier_chain_register(&inetaddr_chain, nb);
1245 }
1246 EXPORT_SYMBOL(register_inetaddr_notifier);
1247
1248 int unregister_inetaddr_notifier(struct notifier_block *nb)
1249 {
1250         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1251 }
1252 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1253
1254 /* Rename ifa_labels for a device name change. Make some effort to preserve
1255  * existing alias numbering and to create unique labels if possible.
1256 */
1257 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1258 {
1259         struct in_ifaddr *ifa;
1260         int named = 0;
1261
1262         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1263                 char old[IFNAMSIZ], *dot;
1264
1265                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1266                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1267                 if (named++ == 0)
1268                         goto skip;
1269                 dot = strchr(old, ':');
1270                 if (dot == NULL) {
1271                         sprintf(old, ":%d", named);
1272                         dot = old;
1273                 }
1274                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1275                         strcat(ifa->ifa_label, dot);
1276                 else
1277                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1278 skip:
1279                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1280         }
1281 }
1282
1283 static bool inetdev_valid_mtu(unsigned int mtu)
1284 {
1285         return mtu >= 68;
1286 }
1287
1288 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1289                                         struct in_device *in_dev)
1290
1291 {
1292         struct in_ifaddr *ifa;
1293
1294         for (ifa = in_dev->ifa_list; ifa;
1295              ifa = ifa->ifa_next) {
1296                 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1297                          ifa->ifa_local, dev,
1298                          ifa->ifa_local, NULL,
1299                          dev->dev_addr, NULL);
1300         }
1301 }
1302
1303 /* Called only under RTNL semaphore */
1304
1305 static int inetdev_event(struct notifier_block *this, unsigned long event,
1306                          void *ptr)
1307 {
1308         struct net_device *dev = ptr;
1309         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1310
1311         ASSERT_RTNL();
1312
1313         if (!in_dev) {
1314                 if (event == NETDEV_REGISTER) {
1315                         in_dev = inetdev_init(dev);
1316                         if (!in_dev)
1317                                 return notifier_from_errno(-ENOMEM);
1318                         if (dev->flags & IFF_LOOPBACK) {
1319                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1320                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1321                         }
1322                 } else if (event == NETDEV_CHANGEMTU) {
1323                         /* Re-enabling IP */
1324                         if (inetdev_valid_mtu(dev->mtu))
1325                                 in_dev = inetdev_init(dev);
1326                 }
1327                 goto out;
1328         }
1329
1330         switch (event) {
1331         case NETDEV_REGISTER:
1332                 pr_debug("%s: bug\n", __func__);
1333                 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1334                 break;
1335         case NETDEV_UP:
1336                 if (!inetdev_valid_mtu(dev->mtu))
1337                         break;
1338                 if (dev->flags & IFF_LOOPBACK) {
1339                         struct in_ifaddr *ifa = inet_alloc_ifa();
1340
1341                         if (ifa) {
1342                                 INIT_HLIST_NODE(&ifa->hash);
1343                                 ifa->ifa_local =
1344                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1345                                 ifa->ifa_prefixlen = 8;
1346                                 ifa->ifa_mask = inet_make_mask(8);
1347                                 in_dev_hold(in_dev);
1348                                 ifa->ifa_dev = in_dev;
1349                                 ifa->ifa_scope = RT_SCOPE_HOST;
1350                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1351                                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1352                                                  INFINITY_LIFE_TIME);
1353                                 inet_insert_ifa(ifa);
1354                         }
1355                 }
1356                 ip_mc_up(in_dev);
1357                 /* fall through */
1358         case NETDEV_CHANGEADDR:
1359                 if (!IN_DEV_ARP_NOTIFY(in_dev))
1360                         break;
1361                 /* fall through */
1362         case NETDEV_NOTIFY_PEERS:
1363                 /* Send gratuitous ARP to notify of link change */
1364                 inetdev_send_gratuitous_arp(dev, in_dev);
1365                 break;
1366         case NETDEV_DOWN:
1367                 ip_mc_down(in_dev);
1368                 break;
1369         case NETDEV_PRE_TYPE_CHANGE:
1370                 ip_mc_unmap(in_dev);
1371                 break;
1372         case NETDEV_POST_TYPE_CHANGE:
1373                 ip_mc_remap(in_dev);
1374                 break;
1375         case NETDEV_CHANGEMTU:
1376                 if (inetdev_valid_mtu(dev->mtu))
1377                         break;
1378                 /* disable IP when MTU is not enough */
1379         case NETDEV_UNREGISTER:
1380                 inetdev_destroy(in_dev);
1381                 break;
1382         case NETDEV_CHANGENAME:
1383                 /* Do not notify about label change, this event is
1384                  * not interesting to applications using netlink.
1385                  */
1386                 inetdev_changename(dev, in_dev);
1387
1388                 devinet_sysctl_unregister(in_dev);
1389                 devinet_sysctl_register(in_dev);
1390                 break;
1391         }
1392 out:
1393         return NOTIFY_DONE;
1394 }
1395
1396 static struct notifier_block ip_netdev_notifier = {
1397         .notifier_call = inetdev_event,
1398 };
1399
1400 static size_t inet_nlmsg_size(void)
1401 {
1402         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1403                + nla_total_size(4) /* IFA_ADDRESS */
1404                + nla_total_size(4) /* IFA_LOCAL */
1405                + nla_total_size(4) /* IFA_BROADCAST */
1406                + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1407 }
1408
1409 static inline u32 cstamp_delta(unsigned long cstamp)
1410 {
1411         return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1412 }
1413
1414 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1415                          unsigned long tstamp, u32 preferred, u32 valid)
1416 {
1417         struct ifa_cacheinfo ci;
1418
1419         ci.cstamp = cstamp_delta(cstamp);
1420         ci.tstamp = cstamp_delta(tstamp);
1421         ci.ifa_prefered = preferred;
1422         ci.ifa_valid = valid;
1423
1424         return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1425 }
1426
1427 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1428                             u32 portid, u32 seq, int event, unsigned int flags)
1429 {
1430         struct ifaddrmsg *ifm;
1431         struct nlmsghdr  *nlh;
1432         u32 preferred, valid;
1433
1434         nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1435         if (nlh == NULL)
1436                 return -EMSGSIZE;
1437
1438         ifm = nlmsg_data(nlh);
1439         ifm->ifa_family = AF_INET;
1440         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1441         ifm->ifa_flags = ifa->ifa_flags;
1442         ifm->ifa_scope = ifa->ifa_scope;
1443         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1444
1445         if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1446                 preferred = ifa->ifa_preferred_lft;
1447                 valid = ifa->ifa_valid_lft;
1448                 if (preferred != INFINITY_LIFE_TIME) {
1449                         long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1450
1451                         if (preferred > tval)
1452                                 preferred -= tval;
1453                         else
1454                                 preferred = 0;
1455                         if (valid != INFINITY_LIFE_TIME) {
1456                                 if (valid > tval)
1457                                         valid -= tval;
1458                                 else
1459                                         valid = 0;
1460                         }
1461                 }
1462         } else {
1463                 preferred = INFINITY_LIFE_TIME;
1464                 valid = INFINITY_LIFE_TIME;
1465         }
1466         if ((ifa->ifa_address &&
1467              nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1468             (ifa->ifa_local &&
1469              nla_put_be32(skb, IFA_LOCAL, ifa->ifa_local)) ||
1470             (ifa->ifa_broadcast &&
1471              nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1472             (ifa->ifa_label[0] &&
1473              nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1474             put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1475                           preferred, valid))
1476                 goto nla_put_failure;
1477
1478         return nlmsg_end(skb, nlh);
1479
1480 nla_put_failure:
1481         nlmsg_cancel(skb, nlh);
1482         return -EMSGSIZE;
1483 }
1484
1485 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1486 {
1487         struct net *net = sock_net(skb->sk);
1488         int h, s_h;
1489         int idx, s_idx;
1490         int ip_idx, s_ip_idx;
1491         struct net_device *dev;
1492         struct in_device *in_dev;
1493         struct in_ifaddr *ifa;
1494         struct hlist_head *head;
1495
1496         s_h = cb->args[0];
1497         s_idx = idx = cb->args[1];
1498         s_ip_idx = ip_idx = cb->args[2];
1499
1500         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1501                 idx = 0;
1502                 head = &net->dev_index_head[h];
1503                 rcu_read_lock();
1504                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1505                           net->dev_base_seq;
1506                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1507                         if (idx < s_idx)
1508                                 goto cont;
1509                         if (h > s_h || idx > s_idx)
1510                                 s_ip_idx = 0;
1511                         in_dev = __in_dev_get_rcu(dev);
1512                         if (!in_dev)
1513                                 goto cont;
1514
1515                         for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1516                              ifa = ifa->ifa_next, ip_idx++) {
1517                                 if (ip_idx < s_ip_idx)
1518                                         continue;
1519                                 if (inet_fill_ifaddr(skb, ifa,
1520                                              NETLINK_CB(cb->skb).portid,
1521                                              cb->nlh->nlmsg_seq,
1522                                              RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1523                                         rcu_read_unlock();
1524                                         goto done;
1525                                 }
1526                                 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1527                         }
1528 cont:
1529                         idx++;
1530                 }
1531                 rcu_read_unlock();
1532         }
1533
1534 done:
1535         cb->args[0] = h;
1536         cb->args[1] = idx;
1537         cb->args[2] = ip_idx;
1538
1539         return skb->len;
1540 }
1541
1542 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1543                       u32 portid)
1544 {
1545         struct sk_buff *skb;
1546         u32 seq = nlh ? nlh->nlmsg_seq : 0;
1547         int err = -ENOBUFS;
1548         struct net *net;
1549
1550         net = dev_net(ifa->ifa_dev->dev);
1551         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1552         if (skb == NULL)
1553                 goto errout;
1554
1555         err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1556         if (err < 0) {
1557                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1558                 WARN_ON(err == -EMSGSIZE);
1559                 kfree_skb(skb);
1560                 goto errout;
1561         }
1562         rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1563         return;
1564 errout:
1565         if (err < 0)
1566                 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1567 }
1568
1569 static size_t inet_get_link_af_size(const struct net_device *dev)
1570 {
1571         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1572
1573         if (!in_dev)
1574                 return 0;
1575
1576         return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1577 }
1578
1579 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1580 {
1581         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1582         struct nlattr *nla;
1583         int i;
1584
1585         if (!in_dev)
1586                 return -ENODATA;
1587
1588         nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1589         if (nla == NULL)
1590                 return -EMSGSIZE;
1591
1592         for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1593                 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1594
1595         return 0;
1596 }
1597
1598 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1599         [IFLA_INET_CONF]        = { .type = NLA_NESTED },
1600 };
1601
1602 static int inet_validate_link_af(const struct net_device *dev,
1603                                  const struct nlattr *nla)
1604 {
1605         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1606         int err, rem;
1607
1608         if (dev && !__in_dev_get_rtnl(dev))
1609                 return -EAFNOSUPPORT;
1610
1611         err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1612         if (err < 0)
1613                 return err;
1614
1615         if (tb[IFLA_INET_CONF]) {
1616                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1617                         int cfgid = nla_type(a);
1618
1619                         if (nla_len(a) < 4)
1620                                 return -EINVAL;
1621
1622                         if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1623                                 return -EINVAL;
1624                 }
1625         }
1626
1627         return 0;
1628 }
1629
1630 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1631 {
1632         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1633         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1634         int rem;
1635
1636         if (!in_dev)
1637                 return -EAFNOSUPPORT;
1638
1639         if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1640                 BUG();
1641
1642         if (tb[IFLA_INET_CONF]) {
1643                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1644                         ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1645         }
1646
1647         return 0;
1648 }
1649
1650 static int inet_netconf_msgsize_devconf(int type)
1651 {
1652         int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1653                    + nla_total_size(4); /* NETCONFA_IFINDEX */
1654
1655         /* type -1 is used for ALL */
1656         if (type == -1 || type == NETCONFA_FORWARDING)
1657                 size += nla_total_size(4);
1658         if (type == -1 || type == NETCONFA_RP_FILTER)
1659                 size += nla_total_size(4);
1660         if (type == -1 || type == NETCONFA_MC_FORWARDING)
1661                 size += nla_total_size(4);
1662
1663         return size;
1664 }
1665
1666 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1667                                      struct ipv4_devconf *devconf, u32 portid,
1668                                      u32 seq, int event, unsigned int flags,
1669                                      int type)
1670 {
1671         struct nlmsghdr  *nlh;
1672         struct netconfmsg *ncm;
1673
1674         nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1675                         flags);
1676         if (nlh == NULL)
1677                 return -EMSGSIZE;
1678
1679         ncm = nlmsg_data(nlh);
1680         ncm->ncm_family = AF_INET;
1681
1682         if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1683                 goto nla_put_failure;
1684
1685         /* type -1 is used for ALL */
1686         if ((type == -1 || type == NETCONFA_FORWARDING) &&
1687             nla_put_s32(skb, NETCONFA_FORWARDING,
1688                         IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1689                 goto nla_put_failure;
1690         if ((type == -1 || type == NETCONFA_RP_FILTER) &&
1691             nla_put_s32(skb, NETCONFA_RP_FILTER,
1692                         IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1693                 goto nla_put_failure;
1694         if ((type == -1 || type == NETCONFA_MC_FORWARDING) &&
1695             nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1696                         IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1697                 goto nla_put_failure;
1698
1699         return nlmsg_end(skb, nlh);
1700
1701 nla_put_failure:
1702         nlmsg_cancel(skb, nlh);
1703         return -EMSGSIZE;
1704 }
1705
1706 void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
1707                                  struct ipv4_devconf *devconf)
1708 {
1709         struct sk_buff *skb;
1710         int err = -ENOBUFS;
1711
1712         skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC);
1713         if (skb == NULL)
1714                 goto errout;
1715
1716         err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1717                                         RTM_NEWNETCONF, 0, type);
1718         if (err < 0) {
1719                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1720                 WARN_ON(err == -EMSGSIZE);
1721                 kfree_skb(skb);
1722                 goto errout;
1723         }
1724         rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC);
1725         return;
1726 errout:
1727         if (err < 0)
1728                 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1729 }
1730
1731 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1732         [NETCONFA_IFINDEX]      = { .len = sizeof(int) },
1733         [NETCONFA_FORWARDING]   = { .len = sizeof(int) },
1734         [NETCONFA_RP_FILTER]    = { .len = sizeof(int) },
1735 };
1736
1737 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1738                                     struct nlmsghdr *nlh)
1739 {
1740         struct net *net = sock_net(in_skb->sk);
1741         struct nlattr *tb[NETCONFA_MAX+1];
1742         struct netconfmsg *ncm;
1743         struct sk_buff *skb;
1744         struct ipv4_devconf *devconf;
1745         struct in_device *in_dev;
1746         struct net_device *dev;
1747         int ifindex;
1748         int err;
1749
1750         err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1751                           devconf_ipv4_policy);
1752         if (err < 0)
1753                 goto errout;
1754
1755         err = EINVAL;
1756         if (!tb[NETCONFA_IFINDEX])
1757                 goto errout;
1758
1759         ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1760         switch (ifindex) {
1761         case NETCONFA_IFINDEX_ALL:
1762                 devconf = net->ipv4.devconf_all;
1763                 break;
1764         case NETCONFA_IFINDEX_DEFAULT:
1765                 devconf = net->ipv4.devconf_dflt;
1766                 break;
1767         default:
1768                 dev = __dev_get_by_index(net, ifindex);
1769                 if (dev == NULL)
1770                         goto errout;
1771                 in_dev = __in_dev_get_rtnl(dev);
1772                 if (in_dev == NULL)
1773                         goto errout;
1774                 devconf = &in_dev->cnf;
1775                 break;
1776         }
1777
1778         err = -ENOBUFS;
1779         skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_ATOMIC);
1780         if (skb == NULL)
1781                 goto errout;
1782
1783         err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1784                                         NETLINK_CB(in_skb).portid,
1785                                         nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1786                                         -1);
1787         if (err < 0) {
1788                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1789                 WARN_ON(err == -EMSGSIZE);
1790                 kfree_skb(skb);
1791                 goto errout;
1792         }
1793         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1794 errout:
1795         return err;
1796 }
1797
1798 static int inet_netconf_dump_devconf(struct sk_buff *skb,
1799                                      struct netlink_callback *cb)
1800 {
1801         struct net *net = sock_net(skb->sk);
1802         int h, s_h;
1803         int idx, s_idx;
1804         struct net_device *dev;
1805         struct in_device *in_dev;
1806         struct hlist_head *head;
1807
1808         s_h = cb->args[0];
1809         s_idx = idx = cb->args[1];
1810
1811         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1812                 idx = 0;
1813                 head = &net->dev_index_head[h];
1814                 rcu_read_lock();
1815                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1816                           net->dev_base_seq;
1817                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1818                         if (idx < s_idx)
1819                                 goto cont;
1820                         in_dev = __in_dev_get_rcu(dev);
1821                         if (!in_dev)
1822                                 goto cont;
1823
1824                         if (inet_netconf_fill_devconf(skb, dev->ifindex,
1825                                                       &in_dev->cnf,
1826                                                       NETLINK_CB(cb->skb).portid,
1827                                                       cb->nlh->nlmsg_seq,
1828                                                       RTM_NEWNETCONF,
1829                                                       NLM_F_MULTI,
1830                                                       -1) <= 0) {
1831                                 rcu_read_unlock();
1832                                 goto done;
1833                         }
1834                         nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1835 cont:
1836                         idx++;
1837                 }
1838                 rcu_read_unlock();
1839         }
1840         if (h == NETDEV_HASHENTRIES) {
1841                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
1842                                               net->ipv4.devconf_all,
1843                                               NETLINK_CB(cb->skb).portid,
1844                                               cb->nlh->nlmsg_seq,
1845                                               RTM_NEWNETCONF, NLM_F_MULTI,
1846                                               -1) <= 0)
1847                         goto done;
1848                 else
1849                         h++;
1850         }
1851         if (h == NETDEV_HASHENTRIES + 1) {
1852                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
1853                                               net->ipv4.devconf_dflt,
1854                                               NETLINK_CB(cb->skb).portid,
1855                                               cb->nlh->nlmsg_seq,
1856                                               RTM_NEWNETCONF, NLM_F_MULTI,
1857                                               -1) <= 0)
1858                         goto done;
1859                 else
1860                         h++;
1861         }
1862 done:
1863         cb->args[0] = h;
1864         cb->args[1] = idx;
1865
1866         return skb->len;
1867 }
1868
1869 #ifdef CONFIG_SYSCTL
1870
1871 static void devinet_copy_dflt_conf(struct net *net, int i)
1872 {
1873         struct net_device *dev;
1874
1875         rcu_read_lock();
1876         for_each_netdev_rcu(net, dev) {
1877                 struct in_device *in_dev;
1878
1879                 in_dev = __in_dev_get_rcu(dev);
1880                 if (in_dev && !test_bit(i, in_dev->cnf.state))
1881                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1882         }
1883         rcu_read_unlock();
1884 }
1885
1886 /* called with RTNL locked */
1887 static void inet_forward_change(struct net *net)
1888 {
1889         struct net_device *dev;
1890         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1891
1892         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1893         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1894         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1895                                     NETCONFA_IFINDEX_ALL,
1896                                     net->ipv4.devconf_all);
1897         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1898                                     NETCONFA_IFINDEX_DEFAULT,
1899                                     net->ipv4.devconf_dflt);
1900
1901         for_each_netdev(net, dev) {
1902                 struct in_device *in_dev;
1903                 if (on)
1904                         dev_disable_lro(dev);
1905                 rcu_read_lock();
1906                 in_dev = __in_dev_get_rcu(dev);
1907                 if (in_dev) {
1908                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1909                         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1910                                                     dev->ifindex, &in_dev->cnf);
1911                 }
1912                 rcu_read_unlock();
1913         }
1914 }
1915
1916 static int devinet_conf_proc(ctl_table *ctl, int write,
1917                              void __user *buffer,
1918                              size_t *lenp, loff_t *ppos)
1919 {
1920         int old_value = *(int *)ctl->data;
1921         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1922         int new_value = *(int *)ctl->data;
1923
1924         if (write) {
1925                 struct ipv4_devconf *cnf = ctl->extra1;
1926                 struct net *net = ctl->extra2;
1927                 int i = (int *)ctl->data - cnf->data;
1928
1929                 set_bit(i, cnf->state);
1930
1931                 if (cnf == net->ipv4.devconf_dflt)
1932                         devinet_copy_dflt_conf(net, i);
1933                 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
1934                     i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
1935                         if ((new_value == 0) && (old_value != 0))
1936                                 rt_cache_flush(net);
1937                 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
1938                     new_value != old_value) {
1939                         int ifindex;
1940
1941                         if (cnf == net->ipv4.devconf_dflt)
1942                                 ifindex = NETCONFA_IFINDEX_DEFAULT;
1943                         else if (cnf == net->ipv4.devconf_all)
1944                                 ifindex = NETCONFA_IFINDEX_ALL;
1945                         else {
1946                                 struct in_device *idev =
1947                                         container_of(cnf, struct in_device,
1948                                                      cnf);
1949                                 ifindex = idev->dev->ifindex;
1950                         }
1951                         inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
1952                                                     ifindex, cnf);
1953                 }
1954         }
1955
1956         return ret;
1957 }
1958
1959 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1960                                   void __user *buffer,
1961                                   size_t *lenp, loff_t *ppos)
1962 {
1963         int *valp = ctl->data;
1964         int val = *valp;
1965         loff_t pos = *ppos;
1966         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1967
1968         if (write && *valp != val) {
1969                 struct net *net = ctl->extra2;
1970
1971                 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1972                         if (!rtnl_trylock()) {
1973                                 /* Restore the original values before restarting */
1974                                 *valp = val;
1975                                 *ppos = pos;
1976                                 return restart_syscall();
1977                         }
1978                         if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1979                                 inet_forward_change(net);
1980                         } else {
1981                                 struct ipv4_devconf *cnf = ctl->extra1;
1982                                 struct in_device *idev =
1983                                         container_of(cnf, struct in_device, cnf);
1984                                 if (*valp)
1985                                         dev_disable_lro(idev->dev);
1986                                 inet_netconf_notify_devconf(net,
1987                                                             NETCONFA_FORWARDING,
1988                                                             idev->dev->ifindex,
1989                                                             cnf);
1990                         }
1991                         rtnl_unlock();
1992                         rt_cache_flush(net);
1993                 } else
1994                         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1995                                                     NETCONFA_IFINDEX_DEFAULT,
1996                                                     net->ipv4.devconf_dflt);
1997         }
1998
1999         return ret;
2000 }
2001
2002 static int ipv4_doint_and_flush(ctl_table *ctl, int write,
2003                                 void __user *buffer,
2004                                 size_t *lenp, loff_t *ppos)
2005 {
2006         int *valp = ctl->data;
2007         int val = *valp;
2008         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2009         struct net *net = ctl->extra2;
2010
2011         if (write && *valp != val)
2012                 rt_cache_flush(net);
2013
2014         return ret;
2015 }
2016
2017 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2018         { \
2019                 .procname       = name, \
2020                 .data           = ipv4_devconf.data + \
2021                                   IPV4_DEVCONF_ ## attr - 1, \
2022                 .maxlen         = sizeof(int), \
2023                 .mode           = mval, \
2024                 .proc_handler   = proc, \
2025                 .extra1         = &ipv4_devconf, \
2026         }
2027
2028 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2029         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2030
2031 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2032         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2033
2034 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2035         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2036
2037 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2038         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2039
2040 static struct devinet_sysctl_table {
2041         struct ctl_table_header *sysctl_header;
2042         struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2043 } devinet_sysctl = {
2044         .devinet_vars = {
2045                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2046                                              devinet_sysctl_forward),
2047                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2048
2049                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2050                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2051                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2052                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2053                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2054                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2055                                         "accept_source_route"),
2056                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2057                 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2058                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2059                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2060                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2061                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2062                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2063                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2064                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2065                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2066                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2067                 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2068                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2069
2070                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2071                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2072                 DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
2073                                               "force_igmp_version"),
2074                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2075                                               "promote_secondaries"),
2076                 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2077                                               "route_localnet"),
2078         },
2079 };
2080
2081 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2082                                         struct ipv4_devconf *p)
2083 {
2084         int i;
2085         struct devinet_sysctl_table *t;
2086         char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2087
2088         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2089         if (!t)
2090                 goto out;
2091
2092         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2093                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2094                 t->devinet_vars[i].extra1 = p;
2095                 t->devinet_vars[i].extra2 = net;
2096         }
2097
2098         snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2099
2100         t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2101         if (!t->sysctl_header)
2102                 goto free;
2103
2104         p->sysctl = t;
2105         return 0;
2106
2107 free:
2108         kfree(t);
2109 out:
2110         return -ENOBUFS;
2111 }
2112
2113 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
2114 {
2115         struct devinet_sysctl_table *t = cnf->sysctl;
2116
2117         if (t == NULL)
2118                 return;
2119
2120         cnf->sysctl = NULL;
2121         unregister_net_sysctl_table(t->sysctl_header);
2122         kfree(t);
2123 }
2124
2125 static void devinet_sysctl_register(struct in_device *idev)
2126 {
2127         neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
2128         __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2129                                         &idev->cnf);
2130 }
2131
2132 static void devinet_sysctl_unregister(struct in_device *idev)
2133 {
2134         __devinet_sysctl_unregister(&idev->cnf);
2135         neigh_sysctl_unregister(idev->arp_parms);
2136 }
2137
2138 static struct ctl_table ctl_forward_entry[] = {
2139         {
2140                 .procname       = "ip_forward",
2141                 .data           = &ipv4_devconf.data[
2142                                         IPV4_DEVCONF_FORWARDING - 1],
2143                 .maxlen         = sizeof(int),
2144                 .mode           = 0644,
2145                 .proc_handler   = devinet_sysctl_forward,
2146                 .extra1         = &ipv4_devconf,
2147                 .extra2         = &init_net,
2148         },
2149         { },
2150 };
2151 #endif
2152
2153 static __net_init int devinet_init_net(struct net *net)
2154 {
2155         int err;
2156         struct ipv4_devconf *all, *dflt;
2157 #ifdef CONFIG_SYSCTL
2158         struct ctl_table *tbl = ctl_forward_entry;
2159         struct ctl_table_header *forw_hdr;
2160 #endif
2161
2162         err = -ENOMEM;
2163         all = &ipv4_devconf;
2164         dflt = &ipv4_devconf_dflt;
2165
2166         if (!net_eq(net, &init_net)) {
2167                 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2168                 if (all == NULL)
2169                         goto err_alloc_all;
2170
2171                 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2172                 if (dflt == NULL)
2173                         goto err_alloc_dflt;
2174
2175 #ifdef CONFIG_SYSCTL
2176                 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2177                 if (tbl == NULL)
2178                         goto err_alloc_ctl;
2179
2180                 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2181                 tbl[0].extra1 = all;
2182                 tbl[0].extra2 = net;
2183 #endif
2184         }
2185
2186 #ifdef CONFIG_SYSCTL
2187         err = __devinet_sysctl_register(net, "all", all);
2188         if (err < 0)
2189                 goto err_reg_all;
2190
2191         err = __devinet_sysctl_register(net, "default", dflt);
2192         if (err < 0)
2193                 goto err_reg_dflt;
2194
2195         err = -ENOMEM;
2196         forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2197         if (forw_hdr == NULL)
2198                 goto err_reg_ctl;
2199         net->ipv4.forw_hdr = forw_hdr;
2200 #endif
2201
2202         net->ipv4.devconf_all = all;
2203         net->ipv4.devconf_dflt = dflt;
2204         return 0;
2205
2206 #ifdef CONFIG_SYSCTL
2207 err_reg_ctl:
2208         __devinet_sysctl_unregister(dflt);
2209 err_reg_dflt:
2210         __devinet_sysctl_unregister(all);
2211 err_reg_all:
2212         if (tbl != ctl_forward_entry)
2213                 kfree(tbl);
2214 err_alloc_ctl:
2215 #endif
2216         if (dflt != &ipv4_devconf_dflt)
2217                 kfree(dflt);
2218 err_alloc_dflt:
2219         if (all != &ipv4_devconf)
2220                 kfree(all);
2221 err_alloc_all:
2222         return err;
2223 }
2224
2225 static __net_exit void devinet_exit_net(struct net *net)
2226 {
2227 #ifdef CONFIG_SYSCTL
2228         struct ctl_table *tbl;
2229
2230         tbl = net->ipv4.forw_hdr->ctl_table_arg;
2231         unregister_net_sysctl_table(net->ipv4.forw_hdr);
2232         __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
2233         __devinet_sysctl_unregister(net->ipv4.devconf_all);
2234         kfree(tbl);
2235 #endif
2236         kfree(net->ipv4.devconf_dflt);
2237         kfree(net->ipv4.devconf_all);
2238 }
2239
2240 static __net_initdata struct pernet_operations devinet_ops = {
2241         .init = devinet_init_net,
2242         .exit = devinet_exit_net,
2243 };
2244
2245 static struct rtnl_af_ops inet_af_ops = {
2246         .family           = AF_INET,
2247         .fill_link_af     = inet_fill_link_af,
2248         .get_link_af_size = inet_get_link_af_size,
2249         .validate_link_af = inet_validate_link_af,
2250         .set_link_af      = inet_set_link_af,
2251 };
2252
2253 void __init devinet_init(void)
2254 {
2255         int i;
2256
2257         for (i = 0; i < IN4_ADDR_HSIZE; i++)
2258                 INIT_HLIST_HEAD(&inet_addr_lst[i]);
2259
2260         register_pernet_subsys(&devinet_ops);
2261
2262         register_gifconf(PF_INET, inet_gifconf);
2263         register_netdevice_notifier(&ip_netdev_notifier);
2264
2265         schedule_delayed_work(&check_lifetime_work, 0);
2266
2267         rtnl_af_register(&inet_af_ops);
2268
2269         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
2270         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
2271         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
2272         rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2273                       inet_netconf_dump_devconf, NULL);
2274 }
2275