]> git.karo-electronics.de Git - karo-tx-linux.git/blob - net/ipv4/devinet.c
Merge remote-tracking branch 'mips/mips-for-linux-next'
[karo-tx-linux.git] / net / ipv4 / devinet.c
1 /*
2  *      NET3    IP device support routines.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  *      Derived from the IP parts of dev.c 1.0.19
10  *              Authors:        Ross Biro
11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *      Additional Authors:
15  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *      Changes:
19  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
20  *                                      lists.
21  *              Cyrus Durgin:           updated for kmod
22  *              Matthias Andree:        in devinet_ioctl, compare label and
23  *                                      address (4.4BSD alias style support),
24  *                                      fall back to comparing just the label
25  *                                      if no match found.
26  */
27
28
29 #include <asm/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/string.h>
36 #include <linux/mm.h>
37 #include <linux/socket.h>
38 #include <linux/sockios.h>
39 #include <linux/in.h>
40 #include <linux/errno.h>
41 #include <linux/interrupt.h>
42 #include <linux/if_addr.h>
43 #include <linux/if_ether.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/etherdevice.h>
47 #include <linux/skbuff.h>
48 #include <linux/init.h>
49 #include <linux/notifier.h>
50 #include <linux/inetdevice.h>
51 #include <linux/igmp.h>
52 #include <linux/slab.h>
53 #include <linux/hash.h>
54 #ifdef CONFIG_SYSCTL
55 #include <linux/sysctl.h>
56 #endif
57 #include <linux/kmod.h>
58 #include <linux/netconf.h>
59
60 #include <net/arp.h>
61 #include <net/ip.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 #include <net/net_namespace.h>
66 #include <net/addrconf.h>
67
68 #include "fib_lookup.h"
69
70 static struct ipv4_devconf ipv4_devconf = {
71         .data = {
72                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
77                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
78         },
79 };
80
81 static struct ipv4_devconf ipv4_devconf_dflt = {
82         .data = {
83                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
84                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
85                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
86                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
87                 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
88                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
89                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
90         },
91 };
92
93 #define IPV4_DEVCONF_DFLT(net, attr) \
94         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
95
96 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
97         [IFA_LOCAL]             = { .type = NLA_U32 },
98         [IFA_ADDRESS]           = { .type = NLA_U32 },
99         [IFA_BROADCAST]         = { .type = NLA_U32 },
100         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
101         [IFA_CACHEINFO]         = { .len = sizeof(struct ifa_cacheinfo) },
102 };
103
104 #define IN4_ADDR_HSIZE_SHIFT    8
105 #define IN4_ADDR_HSIZE          (1U << IN4_ADDR_HSIZE_SHIFT)
106
107 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
108 static DEFINE_SPINLOCK(inet_addr_hash_lock);
109
110 static u32 inet_addr_hash(struct net *net, __be32 addr)
111 {
112         u32 val = (__force u32) addr ^ net_hash_mix(net);
113
114         return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
115 }
116
117 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
118 {
119         u32 hash = inet_addr_hash(net, ifa->ifa_local);
120
121         spin_lock(&inet_addr_hash_lock);
122         hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
123         spin_unlock(&inet_addr_hash_lock);
124 }
125
126 static void inet_hash_remove(struct in_ifaddr *ifa)
127 {
128         spin_lock(&inet_addr_hash_lock);
129         hlist_del_init_rcu(&ifa->hash);
130         spin_unlock(&inet_addr_hash_lock);
131 }
132
133 /**
134  * __ip_dev_find - find the first device with a given source address.
135  * @net: the net namespace
136  * @addr: the source address
137  * @devref: if true, take a reference on the found device
138  *
139  * If a caller uses devref=false, it should be protected by RCU, or RTNL
140  */
141 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
142 {
143         u32 hash = inet_addr_hash(net, addr);
144         struct net_device *result = NULL;
145         struct in_ifaddr *ifa;
146
147         rcu_read_lock();
148         hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) {
149                 if (ifa->ifa_local == addr) {
150                         struct net_device *dev = ifa->ifa_dev->dev;
151
152                         if (!net_eq(dev_net(dev), net))
153                                 continue;
154                         result = dev;
155                         break;
156                 }
157         }
158         if (!result) {
159                 struct flowi4 fl4 = { .daddr = addr };
160                 struct fib_result res = { 0 };
161                 struct fib_table *local;
162
163                 /* Fallback to FIB local table so that communication
164                  * over loopback subnets work.
165                  */
166                 local = fib_get_table(net, RT_TABLE_LOCAL);
167                 if (local &&
168                     !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
169                     res.type == RTN_LOCAL)
170                         result = FIB_RES_DEV(res);
171         }
172         if (result && devref)
173                 dev_hold(result);
174         rcu_read_unlock();
175         return result;
176 }
177 EXPORT_SYMBOL(__ip_dev_find);
178
179 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
180
181 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
182 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
183                          int destroy);
184 #ifdef CONFIG_SYSCTL
185 static void devinet_sysctl_register(struct in_device *idev);
186 static void devinet_sysctl_unregister(struct in_device *idev);
187 #else
188 static void devinet_sysctl_register(struct in_device *idev)
189 {
190 }
191 static void devinet_sysctl_unregister(struct in_device *idev)
192 {
193 }
194 #endif
195
196 /* Locks all the inet devices. */
197
198 static struct in_ifaddr *inet_alloc_ifa(void)
199 {
200         return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
201 }
202
203 static void inet_rcu_free_ifa(struct rcu_head *head)
204 {
205         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
206         if (ifa->ifa_dev)
207                 in_dev_put(ifa->ifa_dev);
208         kfree(ifa);
209 }
210
211 static void inet_free_ifa(struct in_ifaddr *ifa)
212 {
213         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
214 }
215
216 void in_dev_finish_destroy(struct in_device *idev)
217 {
218         struct net_device *dev = idev->dev;
219
220         WARN_ON(idev->ifa_list);
221         WARN_ON(idev->mc_list);
222         kfree(rcu_dereference_protected(idev->mc_hash, 1));
223 #ifdef NET_REFCNT_DEBUG
224         pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
225 #endif
226         dev_put(dev);
227         if (!idev->dead)
228                 pr_err("Freeing alive in_device %p\n", idev);
229         else
230                 kfree(idev);
231 }
232 EXPORT_SYMBOL(in_dev_finish_destroy);
233
234 static struct in_device *inetdev_init(struct net_device *dev)
235 {
236         struct in_device *in_dev;
237
238         ASSERT_RTNL();
239
240         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
241         if (!in_dev)
242                 goto out;
243         memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
244                         sizeof(in_dev->cnf));
245         in_dev->cnf.sysctl = NULL;
246         in_dev->dev = dev;
247         in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
248         if (!in_dev->arp_parms)
249                 goto out_kfree;
250         if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
251                 dev_disable_lro(dev);
252         /* Reference in_dev->dev */
253         dev_hold(dev);
254         /* Account for reference dev->ip_ptr (below) */
255         in_dev_hold(in_dev);
256
257         devinet_sysctl_register(in_dev);
258         ip_mc_init_dev(in_dev);
259         if (dev->flags & IFF_UP)
260                 ip_mc_up(in_dev);
261
262         /* we can receive as soon as ip_ptr is set -- do this last */
263         rcu_assign_pointer(dev->ip_ptr, in_dev);
264 out:
265         return in_dev;
266 out_kfree:
267         kfree(in_dev);
268         in_dev = NULL;
269         goto out;
270 }
271
272 static void in_dev_rcu_put(struct rcu_head *head)
273 {
274         struct in_device *idev = container_of(head, struct in_device, rcu_head);
275         in_dev_put(idev);
276 }
277
278 static void inetdev_destroy(struct in_device *in_dev)
279 {
280         struct in_ifaddr *ifa;
281         struct net_device *dev;
282
283         ASSERT_RTNL();
284
285         dev = in_dev->dev;
286
287         in_dev->dead = 1;
288
289         ip_mc_destroy_dev(in_dev);
290
291         while ((ifa = in_dev->ifa_list) != NULL) {
292                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
293                 inet_free_ifa(ifa);
294         }
295
296         RCU_INIT_POINTER(dev->ip_ptr, NULL);
297
298         devinet_sysctl_unregister(in_dev);
299         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
300         arp_ifdown(dev);
301
302         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
303 }
304
305 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
306 {
307         rcu_read_lock();
308         for_primary_ifa(in_dev) {
309                 if (inet_ifa_match(a, ifa)) {
310                         if (!b || inet_ifa_match(b, ifa)) {
311                                 rcu_read_unlock();
312                                 return 1;
313                         }
314                 }
315         } endfor_ifa(in_dev);
316         rcu_read_unlock();
317         return 0;
318 }
319
320 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
321                          int destroy, struct nlmsghdr *nlh, u32 portid)
322 {
323         struct in_ifaddr *promote = NULL;
324         struct in_ifaddr *ifa, *ifa1 = *ifap;
325         struct in_ifaddr *last_prim = in_dev->ifa_list;
326         struct in_ifaddr *prev_prom = NULL;
327         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
328
329         ASSERT_RTNL();
330
331         /* 1. Deleting primary ifaddr forces deletion all secondaries
332          * unless alias promotion is set
333          **/
334
335         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
336                 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
337
338                 while ((ifa = *ifap1) != NULL) {
339                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
340                             ifa1->ifa_scope <= ifa->ifa_scope)
341                                 last_prim = ifa;
342
343                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
344                             ifa1->ifa_mask != ifa->ifa_mask ||
345                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
346                                 ifap1 = &ifa->ifa_next;
347                                 prev_prom = ifa;
348                                 continue;
349                         }
350
351                         if (!do_promote) {
352                                 inet_hash_remove(ifa);
353                                 *ifap1 = ifa->ifa_next;
354
355                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
356                                 blocking_notifier_call_chain(&inetaddr_chain,
357                                                 NETDEV_DOWN, ifa);
358                                 inet_free_ifa(ifa);
359                         } else {
360                                 promote = ifa;
361                                 break;
362                         }
363                 }
364         }
365
366         /* On promotion all secondaries from subnet are changing
367          * the primary IP, we must remove all their routes silently
368          * and later to add them back with new prefsrc. Do this
369          * while all addresses are on the device list.
370          */
371         for (ifa = promote; ifa; ifa = ifa->ifa_next) {
372                 if (ifa1->ifa_mask == ifa->ifa_mask &&
373                     inet_ifa_match(ifa1->ifa_address, ifa))
374                         fib_del_ifaddr(ifa, ifa1);
375         }
376
377         /* 2. Unlink it */
378
379         *ifap = ifa1->ifa_next;
380         inet_hash_remove(ifa1);
381
382         /* 3. Announce address deletion */
383
384         /* Send message first, then call notifier.
385            At first sight, FIB update triggered by notifier
386            will refer to already deleted ifaddr, that could confuse
387            netlink listeners. It is not true: look, gated sees
388            that route deleted and if it still thinks that ifaddr
389            is valid, it will try to restore deleted routes... Grr.
390            So that, this order is correct.
391          */
392         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
393         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
394
395         if (promote) {
396                 struct in_ifaddr *next_sec = promote->ifa_next;
397
398                 if (prev_prom) {
399                         prev_prom->ifa_next = promote->ifa_next;
400                         promote->ifa_next = last_prim->ifa_next;
401                         last_prim->ifa_next = promote;
402                 }
403
404                 promote->ifa_flags &= ~IFA_F_SECONDARY;
405                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
406                 blocking_notifier_call_chain(&inetaddr_chain,
407                                 NETDEV_UP, promote);
408                 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
409                         if (ifa1->ifa_mask != ifa->ifa_mask ||
410                             !inet_ifa_match(ifa1->ifa_address, ifa))
411                                         continue;
412                         fib_add_ifaddr(ifa);
413                 }
414
415         }
416         if (destroy)
417                 inet_free_ifa(ifa1);
418 }
419
420 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
421                          int destroy)
422 {
423         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
424 }
425
426 static void check_lifetime(struct work_struct *work);
427
428 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
429
430 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
431                              u32 portid)
432 {
433         struct in_device *in_dev = ifa->ifa_dev;
434         struct in_ifaddr *ifa1, **ifap, **last_primary;
435
436         ASSERT_RTNL();
437
438         if (!ifa->ifa_local) {
439                 inet_free_ifa(ifa);
440                 return 0;
441         }
442
443         ifa->ifa_flags &= ~IFA_F_SECONDARY;
444         last_primary = &in_dev->ifa_list;
445
446         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
447              ifap = &ifa1->ifa_next) {
448                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
449                     ifa->ifa_scope <= ifa1->ifa_scope)
450                         last_primary = &ifa1->ifa_next;
451                 if (ifa1->ifa_mask == ifa->ifa_mask &&
452                     inet_ifa_match(ifa1->ifa_address, ifa)) {
453                         if (ifa1->ifa_local == ifa->ifa_local) {
454                                 inet_free_ifa(ifa);
455                                 return -EEXIST;
456                         }
457                         if (ifa1->ifa_scope != ifa->ifa_scope) {
458                                 inet_free_ifa(ifa);
459                                 return -EINVAL;
460                         }
461                         ifa->ifa_flags |= IFA_F_SECONDARY;
462                 }
463         }
464
465         if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
466                 net_srandom(ifa->ifa_local);
467                 ifap = last_primary;
468         }
469
470         ifa->ifa_next = *ifap;
471         *ifap = ifa;
472
473         inet_hash_insert(dev_net(in_dev->dev), ifa);
474
475         cancel_delayed_work(&check_lifetime_work);
476         schedule_delayed_work(&check_lifetime_work, 0);
477
478         /* Send message first, then call notifier.
479            Notifier will trigger FIB update, so that
480            listeners of netlink will know about new ifaddr */
481         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
482         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
483
484         return 0;
485 }
486
487 static int inet_insert_ifa(struct in_ifaddr *ifa)
488 {
489         return __inet_insert_ifa(ifa, NULL, 0);
490 }
491
492 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
493 {
494         struct in_device *in_dev = __in_dev_get_rtnl(dev);
495
496         ASSERT_RTNL();
497
498         if (!in_dev) {
499                 inet_free_ifa(ifa);
500                 return -ENOBUFS;
501         }
502         ipv4_devconf_setall(in_dev);
503         if (ifa->ifa_dev != in_dev) {
504                 WARN_ON(ifa->ifa_dev);
505                 in_dev_hold(in_dev);
506                 ifa->ifa_dev = in_dev;
507         }
508         if (ipv4_is_loopback(ifa->ifa_local))
509                 ifa->ifa_scope = RT_SCOPE_HOST;
510         return inet_insert_ifa(ifa);
511 }
512
513 /* Caller must hold RCU or RTNL :
514  * We dont take a reference on found in_device
515  */
516 struct in_device *inetdev_by_index(struct net *net, int ifindex)
517 {
518         struct net_device *dev;
519         struct in_device *in_dev = NULL;
520
521         rcu_read_lock();
522         dev = dev_get_by_index_rcu(net, ifindex);
523         if (dev)
524                 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
525         rcu_read_unlock();
526         return in_dev;
527 }
528 EXPORT_SYMBOL(inetdev_by_index);
529
530 /* Called only from RTNL semaphored context. No locks. */
531
532 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
533                                     __be32 mask)
534 {
535         ASSERT_RTNL();
536
537         for_primary_ifa(in_dev) {
538                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
539                         return ifa;
540         } endfor_ifa(in_dev);
541         return NULL;
542 }
543
544 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
545 {
546         struct net *net = sock_net(skb->sk);
547         struct nlattr *tb[IFA_MAX+1];
548         struct in_device *in_dev;
549         struct ifaddrmsg *ifm;
550         struct in_ifaddr *ifa, **ifap;
551         int err = -EINVAL;
552
553         ASSERT_RTNL();
554
555         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
556         if (err < 0)
557                 goto errout;
558
559         ifm = nlmsg_data(nlh);
560         in_dev = inetdev_by_index(net, ifm->ifa_index);
561         if (in_dev == NULL) {
562                 err = -ENODEV;
563                 goto errout;
564         }
565
566         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
567              ifap = &ifa->ifa_next) {
568                 if (tb[IFA_LOCAL] &&
569                     ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
570                         continue;
571
572                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
573                         continue;
574
575                 if (tb[IFA_ADDRESS] &&
576                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
577                     !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
578                         continue;
579
580                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
581                 return 0;
582         }
583
584         err = -EADDRNOTAVAIL;
585 errout:
586         return err;
587 }
588
589 #define INFINITY_LIFE_TIME      0xFFFFFFFF
590
591 static void check_lifetime(struct work_struct *work)
592 {
593         unsigned long now, next, next_sec, next_sched;
594         struct in_ifaddr *ifa;
595         struct hlist_node *n;
596         int i;
597
598         now = jiffies;
599         next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
600
601         for (i = 0; i < IN4_ADDR_HSIZE; i++) {
602                 bool change_needed = false;
603
604                 rcu_read_lock();
605                 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
606                         unsigned long age;
607
608                         if (ifa->ifa_flags & IFA_F_PERMANENT)
609                                 continue;
610
611                         /* We try to batch several events at once. */
612                         age = (now - ifa->ifa_tstamp +
613                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
614
615                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
616                             age >= ifa->ifa_valid_lft) {
617                                 change_needed = true;
618                         } else if (ifa->ifa_preferred_lft ==
619                                    INFINITY_LIFE_TIME) {
620                                 continue;
621                         } else if (age >= ifa->ifa_preferred_lft) {
622                                 if (time_before(ifa->ifa_tstamp +
623                                                 ifa->ifa_valid_lft * HZ, next))
624                                         next = ifa->ifa_tstamp +
625                                                ifa->ifa_valid_lft * HZ;
626
627                                 if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
628                                         change_needed = true;
629                         } else if (time_before(ifa->ifa_tstamp +
630                                                ifa->ifa_preferred_lft * HZ,
631                                                next)) {
632                                 next = ifa->ifa_tstamp +
633                                        ifa->ifa_preferred_lft * HZ;
634                         }
635                 }
636                 rcu_read_unlock();
637                 if (!change_needed)
638                         continue;
639                 rtnl_lock();
640                 hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
641                         unsigned long age;
642
643                         if (ifa->ifa_flags & IFA_F_PERMANENT)
644                                 continue;
645
646                         /* We try to batch several events at once. */
647                         age = (now - ifa->ifa_tstamp +
648                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
649
650                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
651                             age >= ifa->ifa_valid_lft) {
652                                 struct in_ifaddr **ifap;
653
654                                 for (ifap = &ifa->ifa_dev->ifa_list;
655                                      *ifap != NULL; ifap = &(*ifap)->ifa_next) {
656                                         if (*ifap == ifa) {
657                                                 inet_del_ifa(ifa->ifa_dev,
658                                                              ifap, 1);
659                                                 break;
660                                         }
661                                 }
662                         } else if (ifa->ifa_preferred_lft !=
663                                    INFINITY_LIFE_TIME &&
664                                    age >= ifa->ifa_preferred_lft &&
665                                    !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
666                                 ifa->ifa_flags |= IFA_F_DEPRECATED;
667                                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
668                         }
669                 }
670                 rtnl_unlock();
671         }
672
673         next_sec = round_jiffies_up(next);
674         next_sched = next;
675
676         /* If rounded timeout is accurate enough, accept it. */
677         if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
678                 next_sched = next_sec;
679
680         now = jiffies;
681         /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
682         if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
683                 next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
684
685         schedule_delayed_work(&check_lifetime_work, next_sched - now);
686 }
687
688 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
689                              __u32 prefered_lft)
690 {
691         unsigned long timeout;
692
693         ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
694
695         timeout = addrconf_timeout_fixup(valid_lft, HZ);
696         if (addrconf_finite_timeout(timeout))
697                 ifa->ifa_valid_lft = timeout;
698         else
699                 ifa->ifa_flags |= IFA_F_PERMANENT;
700
701         timeout = addrconf_timeout_fixup(prefered_lft, HZ);
702         if (addrconf_finite_timeout(timeout)) {
703                 if (timeout == 0)
704                         ifa->ifa_flags |= IFA_F_DEPRECATED;
705                 ifa->ifa_preferred_lft = timeout;
706         }
707         ifa->ifa_tstamp = jiffies;
708         if (!ifa->ifa_cstamp)
709                 ifa->ifa_cstamp = ifa->ifa_tstamp;
710 }
711
712 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
713                                        __u32 *pvalid_lft, __u32 *pprefered_lft)
714 {
715         struct nlattr *tb[IFA_MAX+1];
716         struct in_ifaddr *ifa;
717         struct ifaddrmsg *ifm;
718         struct net_device *dev;
719         struct in_device *in_dev;
720         int err;
721
722         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
723         if (err < 0)
724                 goto errout;
725
726         ifm = nlmsg_data(nlh);
727         err = -EINVAL;
728         if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
729                 goto errout;
730
731         dev = __dev_get_by_index(net, ifm->ifa_index);
732         err = -ENODEV;
733         if (dev == NULL)
734                 goto errout;
735
736         in_dev = __in_dev_get_rtnl(dev);
737         err = -ENOBUFS;
738         if (in_dev == NULL)
739                 goto errout;
740
741         ifa = inet_alloc_ifa();
742         if (ifa == NULL)
743                 /*
744                  * A potential indev allocation can be left alive, it stays
745                  * assigned to its device and is destroy with it.
746                  */
747                 goto errout;
748
749         ipv4_devconf_setall(in_dev);
750         in_dev_hold(in_dev);
751
752         if (tb[IFA_ADDRESS] == NULL)
753                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
754
755         INIT_HLIST_NODE(&ifa->hash);
756         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
757         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
758         ifa->ifa_flags = ifm->ifa_flags;
759         ifa->ifa_scope = ifm->ifa_scope;
760         ifa->ifa_dev = in_dev;
761
762         ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
763         ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
764
765         if (tb[IFA_BROADCAST])
766                 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
767
768         if (tb[IFA_LABEL])
769                 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
770         else
771                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
772
773         if (tb[IFA_CACHEINFO]) {
774                 struct ifa_cacheinfo *ci;
775
776                 ci = nla_data(tb[IFA_CACHEINFO]);
777                 if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
778                         err = -EINVAL;
779                         goto errout_free;
780                 }
781                 *pvalid_lft = ci->ifa_valid;
782                 *pprefered_lft = ci->ifa_prefered;
783         }
784
785         return ifa;
786
787 errout_free:
788         inet_free_ifa(ifa);
789 errout:
790         return ERR_PTR(err);
791 }
792
793 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
794 {
795         struct in_device *in_dev = ifa->ifa_dev;
796         struct in_ifaddr *ifa1, **ifap;
797
798         if (!ifa->ifa_local)
799                 return NULL;
800
801         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
802              ifap = &ifa1->ifa_next) {
803                 if (ifa1->ifa_mask == ifa->ifa_mask &&
804                     inet_ifa_match(ifa1->ifa_address, ifa) &&
805                     ifa1->ifa_local == ifa->ifa_local)
806                         return ifa1;
807         }
808         return NULL;
809 }
810
811 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
812 {
813         struct net *net = sock_net(skb->sk);
814         struct in_ifaddr *ifa;
815         struct in_ifaddr *ifa_existing;
816         __u32 valid_lft = INFINITY_LIFE_TIME;
817         __u32 prefered_lft = INFINITY_LIFE_TIME;
818
819         ASSERT_RTNL();
820
821         ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
822         if (IS_ERR(ifa))
823                 return PTR_ERR(ifa);
824
825         ifa_existing = find_matching_ifa(ifa);
826         if (!ifa_existing) {
827                 /* It would be best to check for !NLM_F_CREATE here but
828                  * userspace alreay relies on not having to provide this.
829                  */
830                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
831                 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
832         } else {
833                 inet_free_ifa(ifa);
834
835                 if (nlh->nlmsg_flags & NLM_F_EXCL ||
836                     !(nlh->nlmsg_flags & NLM_F_REPLACE))
837                         return -EEXIST;
838                 ifa = ifa_existing;
839                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
840                 cancel_delayed_work(&check_lifetime_work);
841                 schedule_delayed_work(&check_lifetime_work, 0);
842                 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
843                 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
844         }
845         return 0;
846 }
847
848 /*
849  *      Determine a default network mask, based on the IP address.
850  */
851
852 static int inet_abc_len(__be32 addr)
853 {
854         int rc = -1;    /* Something else, probably a multicast. */
855
856         if (ipv4_is_zeronet(addr))
857                 rc = 0;
858         else {
859                 __u32 haddr = ntohl(addr);
860
861                 if (IN_CLASSA(haddr))
862                         rc = 8;
863                 else if (IN_CLASSB(haddr))
864                         rc = 16;
865                 else if (IN_CLASSC(haddr))
866                         rc = 24;
867         }
868
869         return rc;
870 }
871
872
873 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
874 {
875         struct ifreq ifr;
876         struct sockaddr_in sin_orig;
877         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
878         struct in_device *in_dev;
879         struct in_ifaddr **ifap = NULL;
880         struct in_ifaddr *ifa = NULL;
881         struct net_device *dev;
882         char *colon;
883         int ret = -EFAULT;
884         int tryaddrmatch = 0;
885
886         /*
887          *      Fetch the caller's info block into kernel space
888          */
889
890         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
891                 goto out;
892         ifr.ifr_name[IFNAMSIZ - 1] = 0;
893
894         /* save original address for comparison */
895         memcpy(&sin_orig, sin, sizeof(*sin));
896
897         colon = strchr(ifr.ifr_name, ':');
898         if (colon)
899                 *colon = 0;
900
901         dev_load(net, ifr.ifr_name);
902
903         switch (cmd) {
904         case SIOCGIFADDR:       /* Get interface address */
905         case SIOCGIFBRDADDR:    /* Get the broadcast address */
906         case SIOCGIFDSTADDR:    /* Get the destination address */
907         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
908                 /* Note that these ioctls will not sleep,
909                    so that we do not impose a lock.
910                    One day we will be forced to put shlock here (I mean SMP)
911                  */
912                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
913                 memset(sin, 0, sizeof(*sin));
914                 sin->sin_family = AF_INET;
915                 break;
916
917         case SIOCSIFFLAGS:
918                 ret = -EPERM;
919                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
920                         goto out;
921                 break;
922         case SIOCSIFADDR:       /* Set interface address (and family) */
923         case SIOCSIFBRDADDR:    /* Set the broadcast address */
924         case SIOCSIFDSTADDR:    /* Set the destination address */
925         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
926                 ret = -EPERM;
927                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
928                         goto out;
929                 ret = -EINVAL;
930                 if (sin->sin_family != AF_INET)
931                         goto out;
932                 break;
933         default:
934                 ret = -EINVAL;
935                 goto out;
936         }
937
938         rtnl_lock();
939
940         ret = -ENODEV;
941         dev = __dev_get_by_name(net, ifr.ifr_name);
942         if (!dev)
943                 goto done;
944
945         if (colon)
946                 *colon = ':';
947
948         in_dev = __in_dev_get_rtnl(dev);
949         if (in_dev) {
950                 if (tryaddrmatch) {
951                         /* Matthias Andree */
952                         /* compare label and address (4.4BSD style) */
953                         /* note: we only do this for a limited set of ioctls
954                            and only if the original address family was AF_INET.
955                            This is checked above. */
956                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
957                              ifap = &ifa->ifa_next) {
958                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
959                                     sin_orig.sin_addr.s_addr ==
960                                                         ifa->ifa_local) {
961                                         break; /* found */
962                                 }
963                         }
964                 }
965                 /* we didn't get a match, maybe the application is
966                    4.3BSD-style and passed in junk so we fall back to
967                    comparing just the label */
968                 if (!ifa) {
969                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
970                              ifap = &ifa->ifa_next)
971                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
972                                         break;
973                 }
974         }
975
976         ret = -EADDRNOTAVAIL;
977         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
978                 goto done;
979
980         switch (cmd) {
981         case SIOCGIFADDR:       /* Get interface address */
982                 sin->sin_addr.s_addr = ifa->ifa_local;
983                 goto rarok;
984
985         case SIOCGIFBRDADDR:    /* Get the broadcast address */
986                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
987                 goto rarok;
988
989         case SIOCGIFDSTADDR:    /* Get the destination address */
990                 sin->sin_addr.s_addr = ifa->ifa_address;
991                 goto rarok;
992
993         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
994                 sin->sin_addr.s_addr = ifa->ifa_mask;
995                 goto rarok;
996
997         case SIOCSIFFLAGS:
998                 if (colon) {
999                         ret = -EADDRNOTAVAIL;
1000                         if (!ifa)
1001                                 break;
1002                         ret = 0;
1003                         if (!(ifr.ifr_flags & IFF_UP))
1004                                 inet_del_ifa(in_dev, ifap, 1);
1005                         break;
1006                 }
1007                 ret = dev_change_flags(dev, ifr.ifr_flags);
1008                 break;
1009
1010         case SIOCSIFADDR:       /* Set interface address (and family) */
1011                 ret = -EINVAL;
1012                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1013                         break;
1014
1015                 if (!ifa) {
1016                         ret = -ENOBUFS;
1017                         ifa = inet_alloc_ifa();
1018                         if (!ifa)
1019                                 break;
1020                         INIT_HLIST_NODE(&ifa->hash);
1021                         if (colon)
1022                                 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
1023                         else
1024                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1025                 } else {
1026                         ret = 0;
1027                         if (ifa->ifa_local == sin->sin_addr.s_addr)
1028                                 break;
1029                         inet_del_ifa(in_dev, ifap, 0);
1030                         ifa->ifa_broadcast = 0;
1031                         ifa->ifa_scope = 0;
1032                 }
1033
1034                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1035
1036                 if (!(dev->flags & IFF_POINTOPOINT)) {
1037                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1038                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1039                         if ((dev->flags & IFF_BROADCAST) &&
1040                             ifa->ifa_prefixlen < 31)
1041                                 ifa->ifa_broadcast = ifa->ifa_address |
1042                                                      ~ifa->ifa_mask;
1043                 } else {
1044                         ifa->ifa_prefixlen = 32;
1045                         ifa->ifa_mask = inet_make_mask(32);
1046                 }
1047                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1048                 ret = inet_set_ifa(dev, ifa);
1049                 break;
1050
1051         case SIOCSIFBRDADDR:    /* Set the broadcast address */
1052                 ret = 0;
1053                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1054                         inet_del_ifa(in_dev, ifap, 0);
1055                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
1056                         inet_insert_ifa(ifa);
1057                 }
1058                 break;
1059
1060         case SIOCSIFDSTADDR:    /* Set the destination address */
1061                 ret = 0;
1062                 if (ifa->ifa_address == sin->sin_addr.s_addr)
1063                         break;
1064                 ret = -EINVAL;
1065                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1066                         break;
1067                 ret = 0;
1068                 inet_del_ifa(in_dev, ifap, 0);
1069                 ifa->ifa_address = sin->sin_addr.s_addr;
1070                 inet_insert_ifa(ifa);
1071                 break;
1072
1073         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1074
1075                 /*
1076                  *      The mask we set must be legal.
1077                  */
1078                 ret = -EINVAL;
1079                 if (bad_mask(sin->sin_addr.s_addr, 0))
1080                         break;
1081                 ret = 0;
1082                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1083                         __be32 old_mask = ifa->ifa_mask;
1084                         inet_del_ifa(in_dev, ifap, 0);
1085                         ifa->ifa_mask = sin->sin_addr.s_addr;
1086                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1087
1088                         /* See if current broadcast address matches
1089                          * with current netmask, then recalculate
1090                          * the broadcast address. Otherwise it's a
1091                          * funny address, so don't touch it since
1092                          * the user seems to know what (s)he's doing...
1093                          */
1094                         if ((dev->flags & IFF_BROADCAST) &&
1095                             (ifa->ifa_prefixlen < 31) &&
1096                             (ifa->ifa_broadcast ==
1097                              (ifa->ifa_local|~old_mask))) {
1098                                 ifa->ifa_broadcast = (ifa->ifa_local |
1099                                                       ~sin->sin_addr.s_addr);
1100                         }
1101                         inet_insert_ifa(ifa);
1102                 }
1103                 break;
1104         }
1105 done:
1106         rtnl_unlock();
1107 out:
1108         return ret;
1109 rarok:
1110         rtnl_unlock();
1111         ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
1112         goto out;
1113 }
1114
1115 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1116 {
1117         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1118         struct in_ifaddr *ifa;
1119         struct ifreq ifr;
1120         int done = 0;
1121
1122         if (!in_dev)
1123                 goto out;
1124
1125         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1126                 if (!buf) {
1127                         done += sizeof(ifr);
1128                         continue;
1129                 }
1130                 if (len < (int) sizeof(ifr))
1131                         break;
1132                 memset(&ifr, 0, sizeof(struct ifreq));
1133                 strcpy(ifr.ifr_name, ifa->ifa_label);
1134
1135                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1136                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1137                                                                 ifa->ifa_local;
1138
1139                 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
1140                         done = -EFAULT;
1141                         break;
1142                 }
1143                 buf  += sizeof(struct ifreq);
1144                 len  -= sizeof(struct ifreq);
1145                 done += sizeof(struct ifreq);
1146         }
1147 out:
1148         return done;
1149 }
1150
1151 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1152 {
1153         __be32 addr = 0;
1154         struct in_device *in_dev;
1155         struct net *net = dev_net(dev);
1156
1157         rcu_read_lock();
1158         in_dev = __in_dev_get_rcu(dev);
1159         if (!in_dev)
1160                 goto no_in_dev;
1161
1162         for_primary_ifa(in_dev) {
1163                 if (ifa->ifa_scope > scope)
1164                         continue;
1165                 if (!dst || inet_ifa_match(dst, ifa)) {
1166                         addr = ifa->ifa_local;
1167                         break;
1168                 }
1169                 if (!addr)
1170                         addr = ifa->ifa_local;
1171         } endfor_ifa(in_dev);
1172
1173         if (addr)
1174                 goto out_unlock;
1175 no_in_dev:
1176
1177         /* Not loopback addresses on loopback should be preferred
1178            in this case. It is importnat that lo is the first interface
1179            in dev_base list.
1180          */
1181         for_each_netdev_rcu(net, dev) {
1182                 in_dev = __in_dev_get_rcu(dev);
1183                 if (!in_dev)
1184                         continue;
1185
1186                 for_primary_ifa(in_dev) {
1187                         if (ifa->ifa_scope != RT_SCOPE_LINK &&
1188                             ifa->ifa_scope <= scope) {
1189                                 addr = ifa->ifa_local;
1190                                 goto out_unlock;
1191                         }
1192                 } endfor_ifa(in_dev);
1193         }
1194 out_unlock:
1195         rcu_read_unlock();
1196         return addr;
1197 }
1198 EXPORT_SYMBOL(inet_select_addr);
1199
1200 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1201                               __be32 local, int scope)
1202 {
1203         int same = 0;
1204         __be32 addr = 0;
1205
1206         for_ifa(in_dev) {
1207                 if (!addr &&
1208                     (local == ifa->ifa_local || !local) &&
1209                     ifa->ifa_scope <= scope) {
1210                         addr = ifa->ifa_local;
1211                         if (same)
1212                                 break;
1213                 }
1214                 if (!same) {
1215                         same = (!local || inet_ifa_match(local, ifa)) &&
1216                                 (!dst || inet_ifa_match(dst, ifa));
1217                         if (same && addr) {
1218                                 if (local || !dst)
1219                                         break;
1220                                 /* Is the selected addr into dst subnet? */
1221                                 if (inet_ifa_match(addr, ifa))
1222                                         break;
1223                                 /* No, then can we use new local src? */
1224                                 if (ifa->ifa_scope <= scope) {
1225                                         addr = ifa->ifa_local;
1226                                         break;
1227                                 }
1228                                 /* search for large dst subnet for addr */
1229                                 same = 0;
1230                         }
1231                 }
1232         } endfor_ifa(in_dev);
1233
1234         return same ? addr : 0;
1235 }
1236
1237 /*
1238  * Confirm that local IP address exists using wildcards:
1239  * - in_dev: only on this interface, 0=any interface
1240  * - dst: only in the same subnet as dst, 0=any dst
1241  * - local: address, 0=autoselect the local address
1242  * - scope: maximum allowed scope value for the local address
1243  */
1244 __be32 inet_confirm_addr(struct in_device *in_dev,
1245                          __be32 dst, __be32 local, int scope)
1246 {
1247         __be32 addr = 0;
1248         struct net_device *dev;
1249         struct net *net;
1250
1251         if (scope != RT_SCOPE_LINK)
1252                 return confirm_addr_indev(in_dev, dst, local, scope);
1253
1254         net = dev_net(in_dev->dev);
1255         rcu_read_lock();
1256         for_each_netdev_rcu(net, dev) {
1257                 in_dev = __in_dev_get_rcu(dev);
1258                 if (in_dev) {
1259                         addr = confirm_addr_indev(in_dev, dst, local, scope);
1260                         if (addr)
1261                                 break;
1262                 }
1263         }
1264         rcu_read_unlock();
1265
1266         return addr;
1267 }
1268 EXPORT_SYMBOL(inet_confirm_addr);
1269
1270 /*
1271  *      Device notifier
1272  */
1273
1274 int register_inetaddr_notifier(struct notifier_block *nb)
1275 {
1276         return blocking_notifier_chain_register(&inetaddr_chain, nb);
1277 }
1278 EXPORT_SYMBOL(register_inetaddr_notifier);
1279
1280 int unregister_inetaddr_notifier(struct notifier_block *nb)
1281 {
1282         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1283 }
1284 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1285
1286 /* Rename ifa_labels for a device name change. Make some effort to preserve
1287  * existing alias numbering and to create unique labels if possible.
1288 */
1289 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1290 {
1291         struct in_ifaddr *ifa;
1292         int named = 0;
1293
1294         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1295                 char old[IFNAMSIZ], *dot;
1296
1297                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1298                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1299                 if (named++ == 0)
1300                         goto skip;
1301                 dot = strchr(old, ':');
1302                 if (dot == NULL) {
1303                         sprintf(old, ":%d", named);
1304                         dot = old;
1305                 }
1306                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1307                         strcat(ifa->ifa_label, dot);
1308                 else
1309                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1310 skip:
1311                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1312         }
1313 }
1314
1315 static bool inetdev_valid_mtu(unsigned int mtu)
1316 {
1317         return mtu >= 68;
1318 }
1319
1320 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1321                                         struct in_device *in_dev)
1322
1323 {
1324         struct in_ifaddr *ifa;
1325
1326         for (ifa = in_dev->ifa_list; ifa;
1327              ifa = ifa->ifa_next) {
1328                 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1329                          ifa->ifa_local, dev,
1330                          ifa->ifa_local, NULL,
1331                          dev->dev_addr, NULL);
1332         }
1333 }
1334
1335 /* Called only under RTNL semaphore */
1336
1337 static int inetdev_event(struct notifier_block *this, unsigned long event,
1338                          void *ptr)
1339 {
1340         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1341         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1342
1343         ASSERT_RTNL();
1344
1345         if (!in_dev) {
1346                 if (event == NETDEV_REGISTER) {
1347                         in_dev = inetdev_init(dev);
1348                         if (!in_dev)
1349                                 return notifier_from_errno(-ENOMEM);
1350                         if (dev->flags & IFF_LOOPBACK) {
1351                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1352                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1353                         }
1354                 } else if (event == NETDEV_CHANGEMTU) {
1355                         /* Re-enabling IP */
1356                         if (inetdev_valid_mtu(dev->mtu))
1357                                 in_dev = inetdev_init(dev);
1358                 }
1359                 goto out;
1360         }
1361
1362         switch (event) {
1363         case NETDEV_REGISTER:
1364                 pr_debug("%s: bug\n", __func__);
1365                 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1366                 break;
1367         case NETDEV_UP:
1368                 if (!inetdev_valid_mtu(dev->mtu))
1369                         break;
1370                 if (dev->flags & IFF_LOOPBACK) {
1371                         struct in_ifaddr *ifa = inet_alloc_ifa();
1372
1373                         if (ifa) {
1374                                 INIT_HLIST_NODE(&ifa->hash);
1375                                 ifa->ifa_local =
1376                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1377                                 ifa->ifa_prefixlen = 8;
1378                                 ifa->ifa_mask = inet_make_mask(8);
1379                                 in_dev_hold(in_dev);
1380                                 ifa->ifa_dev = in_dev;
1381                                 ifa->ifa_scope = RT_SCOPE_HOST;
1382                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1383                                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1384                                                  INFINITY_LIFE_TIME);
1385                                 inet_insert_ifa(ifa);
1386                         }
1387                 }
1388                 ip_mc_up(in_dev);
1389                 /* fall through */
1390         case NETDEV_CHANGEADDR:
1391                 if (!IN_DEV_ARP_NOTIFY(in_dev))
1392                         break;
1393                 /* fall through */
1394         case NETDEV_NOTIFY_PEERS:
1395                 /* Send gratuitous ARP to notify of link change */
1396                 inetdev_send_gratuitous_arp(dev, in_dev);
1397                 break;
1398         case NETDEV_DOWN:
1399                 ip_mc_down(in_dev);
1400                 break;
1401         case NETDEV_PRE_TYPE_CHANGE:
1402                 ip_mc_unmap(in_dev);
1403                 break;
1404         case NETDEV_POST_TYPE_CHANGE:
1405                 ip_mc_remap(in_dev);
1406                 break;
1407         case NETDEV_CHANGEMTU:
1408                 if (inetdev_valid_mtu(dev->mtu))
1409                         break;
1410                 /* disable IP when MTU is not enough */
1411         case NETDEV_UNREGISTER:
1412                 inetdev_destroy(in_dev);
1413                 break;
1414         case NETDEV_CHANGENAME:
1415                 /* Do not notify about label change, this event is
1416                  * not interesting to applications using netlink.
1417                  */
1418                 inetdev_changename(dev, in_dev);
1419
1420                 devinet_sysctl_unregister(in_dev);
1421                 devinet_sysctl_register(in_dev);
1422                 break;
1423         }
1424 out:
1425         return NOTIFY_DONE;
1426 }
1427
1428 static struct notifier_block ip_netdev_notifier = {
1429         .notifier_call = inetdev_event,
1430 };
1431
1432 static size_t inet_nlmsg_size(void)
1433 {
1434         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1435                + nla_total_size(4) /* IFA_ADDRESS */
1436                + nla_total_size(4) /* IFA_LOCAL */
1437                + nla_total_size(4) /* IFA_BROADCAST */
1438                + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1439 }
1440
1441 static inline u32 cstamp_delta(unsigned long cstamp)
1442 {
1443         return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1444 }
1445
1446 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1447                          unsigned long tstamp, u32 preferred, u32 valid)
1448 {
1449         struct ifa_cacheinfo ci;
1450
1451         ci.cstamp = cstamp_delta(cstamp);
1452         ci.tstamp = cstamp_delta(tstamp);
1453         ci.ifa_prefered = preferred;
1454         ci.ifa_valid = valid;
1455
1456         return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1457 }
1458
1459 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1460                             u32 portid, u32 seq, int event, unsigned int flags)
1461 {
1462         struct ifaddrmsg *ifm;
1463         struct nlmsghdr  *nlh;
1464         u32 preferred, valid;
1465
1466         nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1467         if (nlh == NULL)
1468                 return -EMSGSIZE;
1469
1470         ifm = nlmsg_data(nlh);
1471         ifm->ifa_family = AF_INET;
1472         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1473         ifm->ifa_flags = ifa->ifa_flags;
1474         ifm->ifa_scope = ifa->ifa_scope;
1475         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1476
1477         if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1478                 preferred = ifa->ifa_preferred_lft;
1479                 valid = ifa->ifa_valid_lft;
1480                 if (preferred != INFINITY_LIFE_TIME) {
1481                         long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1482
1483                         if (preferred > tval)
1484                                 preferred -= tval;
1485                         else
1486                                 preferred = 0;
1487                         if (valid != INFINITY_LIFE_TIME) {
1488                                 if (valid > tval)
1489                                         valid -= tval;
1490                                 else
1491                                         valid = 0;
1492                         }
1493                 }
1494         } else {
1495                 preferred = INFINITY_LIFE_TIME;
1496                 valid = INFINITY_LIFE_TIME;
1497         }
1498         if ((ifa->ifa_address &&
1499              nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1500             (ifa->ifa_local &&
1501              nla_put_be32(skb, IFA_LOCAL, ifa->ifa_local)) ||
1502             (ifa->ifa_broadcast &&
1503              nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1504             (ifa->ifa_label[0] &&
1505              nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1506             put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1507                           preferred, valid))
1508                 goto nla_put_failure;
1509
1510         return nlmsg_end(skb, nlh);
1511
1512 nla_put_failure:
1513         nlmsg_cancel(skb, nlh);
1514         return -EMSGSIZE;
1515 }
1516
1517 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1518 {
1519         struct net *net = sock_net(skb->sk);
1520         int h, s_h;
1521         int idx, s_idx;
1522         int ip_idx, s_ip_idx;
1523         struct net_device *dev;
1524         struct in_device *in_dev;
1525         struct in_ifaddr *ifa;
1526         struct hlist_head *head;
1527
1528         s_h = cb->args[0];
1529         s_idx = idx = cb->args[1];
1530         s_ip_idx = ip_idx = cb->args[2];
1531
1532         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1533                 idx = 0;
1534                 head = &net->dev_index_head[h];
1535                 rcu_read_lock();
1536                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1537                           net->dev_base_seq;
1538                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1539                         if (idx < s_idx)
1540                                 goto cont;
1541                         if (h > s_h || idx > s_idx)
1542                                 s_ip_idx = 0;
1543                         in_dev = __in_dev_get_rcu(dev);
1544                         if (!in_dev)
1545                                 goto cont;
1546
1547                         for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1548                              ifa = ifa->ifa_next, ip_idx++) {
1549                                 if (ip_idx < s_ip_idx)
1550                                         continue;
1551                                 if (inet_fill_ifaddr(skb, ifa,
1552                                              NETLINK_CB(cb->skb).portid,
1553                                              cb->nlh->nlmsg_seq,
1554                                              RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1555                                         rcu_read_unlock();
1556                                         goto done;
1557                                 }
1558                                 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1559                         }
1560 cont:
1561                         idx++;
1562                 }
1563                 rcu_read_unlock();
1564         }
1565
1566 done:
1567         cb->args[0] = h;
1568         cb->args[1] = idx;
1569         cb->args[2] = ip_idx;
1570
1571         return skb->len;
1572 }
1573
1574 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1575                       u32 portid)
1576 {
1577         struct sk_buff *skb;
1578         u32 seq = nlh ? nlh->nlmsg_seq : 0;
1579         int err = -ENOBUFS;
1580         struct net *net;
1581
1582         net = dev_net(ifa->ifa_dev->dev);
1583         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1584         if (skb == NULL)
1585                 goto errout;
1586
1587         err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1588         if (err < 0) {
1589                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1590                 WARN_ON(err == -EMSGSIZE);
1591                 kfree_skb(skb);
1592                 goto errout;
1593         }
1594         rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1595         return;
1596 errout:
1597         if (err < 0)
1598                 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1599 }
1600
1601 static size_t inet_get_link_af_size(const struct net_device *dev)
1602 {
1603         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1604
1605         if (!in_dev)
1606                 return 0;
1607
1608         return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1609 }
1610
1611 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1612 {
1613         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1614         struct nlattr *nla;
1615         int i;
1616
1617         if (!in_dev)
1618                 return -ENODATA;
1619
1620         nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1621         if (nla == NULL)
1622                 return -EMSGSIZE;
1623
1624         for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1625                 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1626
1627         return 0;
1628 }
1629
1630 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1631         [IFLA_INET_CONF]        = { .type = NLA_NESTED },
1632 };
1633
1634 static int inet_validate_link_af(const struct net_device *dev,
1635                                  const struct nlattr *nla)
1636 {
1637         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1638         int err, rem;
1639
1640         if (dev && !__in_dev_get_rtnl(dev))
1641                 return -EAFNOSUPPORT;
1642
1643         err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1644         if (err < 0)
1645                 return err;
1646
1647         if (tb[IFLA_INET_CONF]) {
1648                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1649                         int cfgid = nla_type(a);
1650
1651                         if (nla_len(a) < 4)
1652                                 return -EINVAL;
1653
1654                         if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1655                                 return -EINVAL;
1656                 }
1657         }
1658
1659         return 0;
1660 }
1661
1662 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1663 {
1664         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1665         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1666         int rem;
1667
1668         if (!in_dev)
1669                 return -EAFNOSUPPORT;
1670
1671         if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1672                 BUG();
1673
1674         if (tb[IFLA_INET_CONF]) {
1675                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1676                         ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1677         }
1678
1679         return 0;
1680 }
1681
1682 static int inet_netconf_msgsize_devconf(int type)
1683 {
1684         int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1685                    + nla_total_size(4); /* NETCONFA_IFINDEX */
1686
1687         /* type -1 is used for ALL */
1688         if (type == -1 || type == NETCONFA_FORWARDING)
1689                 size += nla_total_size(4);
1690         if (type == -1 || type == NETCONFA_RP_FILTER)
1691                 size += nla_total_size(4);
1692         if (type == -1 || type == NETCONFA_MC_FORWARDING)
1693                 size += nla_total_size(4);
1694
1695         return size;
1696 }
1697
1698 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1699                                      struct ipv4_devconf *devconf, u32 portid,
1700                                      u32 seq, int event, unsigned int flags,
1701                                      int type)
1702 {
1703         struct nlmsghdr  *nlh;
1704         struct netconfmsg *ncm;
1705
1706         nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1707                         flags);
1708         if (nlh == NULL)
1709                 return -EMSGSIZE;
1710
1711         ncm = nlmsg_data(nlh);
1712         ncm->ncm_family = AF_INET;
1713
1714         if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1715                 goto nla_put_failure;
1716
1717         /* type -1 is used for ALL */
1718         if ((type == -1 || type == NETCONFA_FORWARDING) &&
1719             nla_put_s32(skb, NETCONFA_FORWARDING,
1720                         IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1721                 goto nla_put_failure;
1722         if ((type == -1 || type == NETCONFA_RP_FILTER) &&
1723             nla_put_s32(skb, NETCONFA_RP_FILTER,
1724                         IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1725                 goto nla_put_failure;
1726         if ((type == -1 || type == NETCONFA_MC_FORWARDING) &&
1727             nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1728                         IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1729                 goto nla_put_failure;
1730
1731         return nlmsg_end(skb, nlh);
1732
1733 nla_put_failure:
1734         nlmsg_cancel(skb, nlh);
1735         return -EMSGSIZE;
1736 }
1737
1738 void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
1739                                  struct ipv4_devconf *devconf)
1740 {
1741         struct sk_buff *skb;
1742         int err = -ENOBUFS;
1743
1744         skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC);
1745         if (skb == NULL)
1746                 goto errout;
1747
1748         err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1749                                         RTM_NEWNETCONF, 0, type);
1750         if (err < 0) {
1751                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1752                 WARN_ON(err == -EMSGSIZE);
1753                 kfree_skb(skb);
1754                 goto errout;
1755         }
1756         rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC);
1757         return;
1758 errout:
1759         if (err < 0)
1760                 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1761 }
1762
1763 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1764         [NETCONFA_IFINDEX]      = { .len = sizeof(int) },
1765         [NETCONFA_FORWARDING]   = { .len = sizeof(int) },
1766         [NETCONFA_RP_FILTER]    = { .len = sizeof(int) },
1767 };
1768
1769 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1770                                     struct nlmsghdr *nlh)
1771 {
1772         struct net *net = sock_net(in_skb->sk);
1773         struct nlattr *tb[NETCONFA_MAX+1];
1774         struct netconfmsg *ncm;
1775         struct sk_buff *skb;
1776         struct ipv4_devconf *devconf;
1777         struct in_device *in_dev;
1778         struct net_device *dev;
1779         int ifindex;
1780         int err;
1781
1782         err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1783                           devconf_ipv4_policy);
1784         if (err < 0)
1785                 goto errout;
1786
1787         err = EINVAL;
1788         if (!tb[NETCONFA_IFINDEX])
1789                 goto errout;
1790
1791         ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1792         switch (ifindex) {
1793         case NETCONFA_IFINDEX_ALL:
1794                 devconf = net->ipv4.devconf_all;
1795                 break;
1796         case NETCONFA_IFINDEX_DEFAULT:
1797                 devconf = net->ipv4.devconf_dflt;
1798                 break;
1799         default:
1800                 dev = __dev_get_by_index(net, ifindex);
1801                 if (dev == NULL)
1802                         goto errout;
1803                 in_dev = __in_dev_get_rtnl(dev);
1804                 if (in_dev == NULL)
1805                         goto errout;
1806                 devconf = &in_dev->cnf;
1807                 break;
1808         }
1809
1810         err = -ENOBUFS;
1811         skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_ATOMIC);
1812         if (skb == NULL)
1813                 goto errout;
1814
1815         err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1816                                         NETLINK_CB(in_skb).portid,
1817                                         nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1818                                         -1);
1819         if (err < 0) {
1820                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1821                 WARN_ON(err == -EMSGSIZE);
1822                 kfree_skb(skb);
1823                 goto errout;
1824         }
1825         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1826 errout:
1827         return err;
1828 }
1829
1830 static int inet_netconf_dump_devconf(struct sk_buff *skb,
1831                                      struct netlink_callback *cb)
1832 {
1833         struct net *net = sock_net(skb->sk);
1834         int h, s_h;
1835         int idx, s_idx;
1836         struct net_device *dev;
1837         struct in_device *in_dev;
1838         struct hlist_head *head;
1839
1840         s_h = cb->args[0];
1841         s_idx = idx = cb->args[1];
1842
1843         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1844                 idx = 0;
1845                 head = &net->dev_index_head[h];
1846                 rcu_read_lock();
1847                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1848                           net->dev_base_seq;
1849                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1850                         if (idx < s_idx)
1851                                 goto cont;
1852                         in_dev = __in_dev_get_rcu(dev);
1853                         if (!in_dev)
1854                                 goto cont;
1855
1856                         if (inet_netconf_fill_devconf(skb, dev->ifindex,
1857                                                       &in_dev->cnf,
1858                                                       NETLINK_CB(cb->skb).portid,
1859                                                       cb->nlh->nlmsg_seq,
1860                                                       RTM_NEWNETCONF,
1861                                                       NLM_F_MULTI,
1862                                                       -1) <= 0) {
1863                                 rcu_read_unlock();
1864                                 goto done;
1865                         }
1866                         nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1867 cont:
1868                         idx++;
1869                 }
1870                 rcu_read_unlock();
1871         }
1872         if (h == NETDEV_HASHENTRIES) {
1873                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
1874                                               net->ipv4.devconf_all,
1875                                               NETLINK_CB(cb->skb).portid,
1876                                               cb->nlh->nlmsg_seq,
1877                                               RTM_NEWNETCONF, NLM_F_MULTI,
1878                                               -1) <= 0)
1879                         goto done;
1880                 else
1881                         h++;
1882         }
1883         if (h == NETDEV_HASHENTRIES + 1) {
1884                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
1885                                               net->ipv4.devconf_dflt,
1886                                               NETLINK_CB(cb->skb).portid,
1887                                               cb->nlh->nlmsg_seq,
1888                                               RTM_NEWNETCONF, NLM_F_MULTI,
1889                                               -1) <= 0)
1890                         goto done;
1891                 else
1892                         h++;
1893         }
1894 done:
1895         cb->args[0] = h;
1896         cb->args[1] = idx;
1897
1898         return skb->len;
1899 }
1900
1901 #ifdef CONFIG_SYSCTL
1902
1903 static void devinet_copy_dflt_conf(struct net *net, int i)
1904 {
1905         struct net_device *dev;
1906
1907         rcu_read_lock();
1908         for_each_netdev_rcu(net, dev) {
1909                 struct in_device *in_dev;
1910
1911                 in_dev = __in_dev_get_rcu(dev);
1912                 if (in_dev && !test_bit(i, in_dev->cnf.state))
1913                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1914         }
1915         rcu_read_unlock();
1916 }
1917
1918 /* called with RTNL locked */
1919 static void inet_forward_change(struct net *net)
1920 {
1921         struct net_device *dev;
1922         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1923
1924         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1925         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1926         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1927                                     NETCONFA_IFINDEX_ALL,
1928                                     net->ipv4.devconf_all);
1929         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1930                                     NETCONFA_IFINDEX_DEFAULT,
1931                                     net->ipv4.devconf_dflt);
1932
1933         for_each_netdev(net, dev) {
1934                 struct in_device *in_dev;
1935                 if (on)
1936                         dev_disable_lro(dev);
1937                 rcu_read_lock();
1938                 in_dev = __in_dev_get_rcu(dev);
1939                 if (in_dev) {
1940                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1941                         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1942                                                     dev->ifindex, &in_dev->cnf);
1943                 }
1944                 rcu_read_unlock();
1945         }
1946 }
1947
1948 static int devinet_conf_proc(struct ctl_table *ctl, int write,
1949                              void __user *buffer,
1950                              size_t *lenp, loff_t *ppos)
1951 {
1952         int old_value = *(int *)ctl->data;
1953         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1954         int new_value = *(int *)ctl->data;
1955
1956         if (write) {
1957                 struct ipv4_devconf *cnf = ctl->extra1;
1958                 struct net *net = ctl->extra2;
1959                 int i = (int *)ctl->data - cnf->data;
1960
1961                 set_bit(i, cnf->state);
1962
1963                 if (cnf == net->ipv4.devconf_dflt)
1964                         devinet_copy_dflt_conf(net, i);
1965                 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
1966                     i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
1967                         if ((new_value == 0) && (old_value != 0))
1968                                 rt_cache_flush(net);
1969                 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
1970                     new_value != old_value) {
1971                         int ifindex;
1972
1973                         if (cnf == net->ipv4.devconf_dflt)
1974                                 ifindex = NETCONFA_IFINDEX_DEFAULT;
1975                         else if (cnf == net->ipv4.devconf_all)
1976                                 ifindex = NETCONFA_IFINDEX_ALL;
1977                         else {
1978                                 struct in_device *idev =
1979                                         container_of(cnf, struct in_device,
1980                                                      cnf);
1981                                 ifindex = idev->dev->ifindex;
1982                         }
1983                         inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
1984                                                     ifindex, cnf);
1985                 }
1986         }
1987
1988         return ret;
1989 }
1990
1991 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
1992                                   void __user *buffer,
1993                                   size_t *lenp, loff_t *ppos)
1994 {
1995         int *valp = ctl->data;
1996         int val = *valp;
1997         loff_t pos = *ppos;
1998         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1999
2000         if (write && *valp != val) {
2001                 struct net *net = ctl->extra2;
2002
2003                 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2004                         if (!rtnl_trylock()) {
2005                                 /* Restore the original values before restarting */
2006                                 *valp = val;
2007                                 *ppos = pos;
2008                                 return restart_syscall();
2009                         }
2010                         if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2011                                 inet_forward_change(net);
2012                         } else {
2013                                 struct ipv4_devconf *cnf = ctl->extra1;
2014                                 struct in_device *idev =
2015                                         container_of(cnf, struct in_device, cnf);
2016                                 if (*valp)
2017                                         dev_disable_lro(idev->dev);
2018                                 inet_netconf_notify_devconf(net,
2019                                                             NETCONFA_FORWARDING,
2020                                                             idev->dev->ifindex,
2021                                                             cnf);
2022                         }
2023                         rtnl_unlock();
2024                         rt_cache_flush(net);
2025                 } else
2026                         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2027                                                     NETCONFA_IFINDEX_DEFAULT,
2028                                                     net->ipv4.devconf_dflt);
2029         }
2030
2031         return ret;
2032 }
2033
2034 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2035                                 void __user *buffer,
2036                                 size_t *lenp, loff_t *ppos)
2037 {
2038         int *valp = ctl->data;
2039         int val = *valp;
2040         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2041         struct net *net = ctl->extra2;
2042
2043         if (write && *valp != val)
2044                 rt_cache_flush(net);
2045
2046         return ret;
2047 }
2048
2049 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2050         { \
2051                 .procname       = name, \
2052                 .data           = ipv4_devconf.data + \
2053                                   IPV4_DEVCONF_ ## attr - 1, \
2054                 .maxlen         = sizeof(int), \
2055                 .mode           = mval, \
2056                 .proc_handler   = proc, \
2057                 .extra1         = &ipv4_devconf, \
2058         }
2059
2060 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2061         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2062
2063 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2064         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2065
2066 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2067         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2068
2069 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2070         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2071
2072 static struct devinet_sysctl_table {
2073         struct ctl_table_header *sysctl_header;
2074         struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2075 } devinet_sysctl = {
2076         .devinet_vars = {
2077                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2078                                              devinet_sysctl_forward),
2079                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2080
2081                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2082                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2083                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2084                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2085                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2086                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2087                                         "accept_source_route"),
2088                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2089                 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2090                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2091                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2092                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2093                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2094                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2095                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2096                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2097                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2098                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2099                 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2100                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2101                 DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2102                                         "force_igmp_version"),
2103                 DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2104                                         "igmpv2_unsolicited_report_interval"),
2105                 DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2106                                         "igmpv3_unsolicited_report_interval"),
2107
2108                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2109                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2110                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2111                                               "promote_secondaries"),
2112                 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2113                                               "route_localnet"),
2114         },
2115 };
2116
2117 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2118                                         struct ipv4_devconf *p)
2119 {
2120         int i;
2121         struct devinet_sysctl_table *t;
2122         char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2123
2124         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2125         if (!t)
2126                 goto out;
2127
2128         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2129                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2130                 t->devinet_vars[i].extra1 = p;
2131                 t->devinet_vars[i].extra2 = net;
2132         }
2133
2134         snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2135
2136         t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2137         if (!t->sysctl_header)
2138                 goto free;
2139
2140         p->sysctl = t;
2141         return 0;
2142
2143 free:
2144         kfree(t);
2145 out:
2146         return -ENOBUFS;
2147 }
2148
2149 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
2150 {
2151         struct devinet_sysctl_table *t = cnf->sysctl;
2152
2153         if (t == NULL)
2154                 return;
2155
2156         cnf->sysctl = NULL;
2157         unregister_net_sysctl_table(t->sysctl_header);
2158         kfree(t);
2159 }
2160
2161 static void devinet_sysctl_register(struct in_device *idev)
2162 {
2163         neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
2164         __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2165                                         &idev->cnf);
2166 }
2167
2168 static void devinet_sysctl_unregister(struct in_device *idev)
2169 {
2170         __devinet_sysctl_unregister(&idev->cnf);
2171         neigh_sysctl_unregister(idev->arp_parms);
2172 }
2173
2174 static struct ctl_table ctl_forward_entry[] = {
2175         {
2176                 .procname       = "ip_forward",
2177                 .data           = &ipv4_devconf.data[
2178                                         IPV4_DEVCONF_FORWARDING - 1],
2179                 .maxlen         = sizeof(int),
2180                 .mode           = 0644,
2181                 .proc_handler   = devinet_sysctl_forward,
2182                 .extra1         = &ipv4_devconf,
2183                 .extra2         = &init_net,
2184         },
2185         { },
2186 };
2187 #endif
2188
2189 static __net_init int devinet_init_net(struct net *net)
2190 {
2191         int err;
2192         struct ipv4_devconf *all, *dflt;
2193 #ifdef CONFIG_SYSCTL
2194         struct ctl_table *tbl = ctl_forward_entry;
2195         struct ctl_table_header *forw_hdr;
2196 #endif
2197
2198         err = -ENOMEM;
2199         all = &ipv4_devconf;
2200         dflt = &ipv4_devconf_dflt;
2201
2202         if (!net_eq(net, &init_net)) {
2203                 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2204                 if (all == NULL)
2205                         goto err_alloc_all;
2206
2207                 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2208                 if (dflt == NULL)
2209                         goto err_alloc_dflt;
2210
2211 #ifdef CONFIG_SYSCTL
2212                 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2213                 if (tbl == NULL)
2214                         goto err_alloc_ctl;
2215
2216                 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2217                 tbl[0].extra1 = all;
2218                 tbl[0].extra2 = net;
2219 #endif
2220         }
2221
2222 #ifdef CONFIG_SYSCTL
2223         err = __devinet_sysctl_register(net, "all", all);
2224         if (err < 0)
2225                 goto err_reg_all;
2226
2227         err = __devinet_sysctl_register(net, "default", dflt);
2228         if (err < 0)
2229                 goto err_reg_dflt;
2230
2231         err = -ENOMEM;
2232         forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2233         if (forw_hdr == NULL)
2234                 goto err_reg_ctl;
2235         net->ipv4.forw_hdr = forw_hdr;
2236 #endif
2237
2238         net->ipv4.devconf_all = all;
2239         net->ipv4.devconf_dflt = dflt;
2240         return 0;
2241
2242 #ifdef CONFIG_SYSCTL
2243 err_reg_ctl:
2244         __devinet_sysctl_unregister(dflt);
2245 err_reg_dflt:
2246         __devinet_sysctl_unregister(all);
2247 err_reg_all:
2248         if (tbl != ctl_forward_entry)
2249                 kfree(tbl);
2250 err_alloc_ctl:
2251 #endif
2252         if (dflt != &ipv4_devconf_dflt)
2253                 kfree(dflt);
2254 err_alloc_dflt:
2255         if (all != &ipv4_devconf)
2256                 kfree(all);
2257 err_alloc_all:
2258         return err;
2259 }
2260
2261 static __net_exit void devinet_exit_net(struct net *net)
2262 {
2263 #ifdef CONFIG_SYSCTL
2264         struct ctl_table *tbl;
2265
2266         tbl = net->ipv4.forw_hdr->ctl_table_arg;
2267         unregister_net_sysctl_table(net->ipv4.forw_hdr);
2268         __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
2269         __devinet_sysctl_unregister(net->ipv4.devconf_all);
2270         kfree(tbl);
2271 #endif
2272         kfree(net->ipv4.devconf_dflt);
2273         kfree(net->ipv4.devconf_all);
2274 }
2275
2276 static __net_initdata struct pernet_operations devinet_ops = {
2277         .init = devinet_init_net,
2278         .exit = devinet_exit_net,
2279 };
2280
2281 static struct rtnl_af_ops inet_af_ops = {
2282         .family           = AF_INET,
2283         .fill_link_af     = inet_fill_link_af,
2284         .get_link_af_size = inet_get_link_af_size,
2285         .validate_link_af = inet_validate_link_af,
2286         .set_link_af      = inet_set_link_af,
2287 };
2288
2289 void __init devinet_init(void)
2290 {
2291         int i;
2292
2293         for (i = 0; i < IN4_ADDR_HSIZE; i++)
2294                 INIT_HLIST_HEAD(&inet_addr_lst[i]);
2295
2296         register_pernet_subsys(&devinet_ops);
2297
2298         register_gifconf(PF_INET, inet_gifconf);
2299         register_netdevice_notifier(&ip_netdev_notifier);
2300
2301         schedule_delayed_work(&check_lifetime_work, 0);
2302
2303         rtnl_af_register(&inet_af_ops);
2304
2305         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
2306         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
2307         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
2308         rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2309                       inet_netconf_dump_devconf, NULL);
2310 }
2311