]> git.karo-electronics.de Git - karo-tx-linux.git/blob - net/ipv4/devinet.c
net: Kill register_sysctl_rotable
[karo-tx-linux.git] / net / ipv4 / devinet.c
1 /*
2  *      NET3    IP device support routines.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  *      Derived from the IP parts of dev.c 1.0.19
10  *              Authors:        Ross Biro
11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *      Additional Authors:
15  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *      Changes:
19  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
20  *                                      lists.
21  *              Cyrus Durgin:           updated for kmod
22  *              Matthias Andree:        in devinet_ioctl, compare label and
23  *                                      address (4.4BSD alias style support),
24  *                                      fall back to comparing just the label
25  *                                      if no match found.
26  */
27
28
29 #include <asm/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/string.h>
36 #include <linux/mm.h>
37 #include <linux/socket.h>
38 #include <linux/sockios.h>
39 #include <linux/in.h>
40 #include <linux/errno.h>
41 #include <linux/interrupt.h>
42 #include <linux/if_addr.h>
43 #include <linux/if_ether.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/etherdevice.h>
47 #include <linux/skbuff.h>
48 #include <linux/init.h>
49 #include <linux/notifier.h>
50 #include <linux/inetdevice.h>
51 #include <linux/igmp.h>
52 #include <linux/slab.h>
53 #include <linux/hash.h>
54 #ifdef CONFIG_SYSCTL
55 #include <linux/sysctl.h>
56 #endif
57 #include <linux/kmod.h>
58
59 #include <net/arp.h>
60 #include <net/ip.h>
61 #include <net/route.h>
62 #include <net/ip_fib.h>
63 #include <net/rtnetlink.h>
64 #include <net/net_namespace.h>
65
66 #include "fib_lookup.h"
67
68 static struct ipv4_devconf ipv4_devconf = {
69         .data = {
70                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
71                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
72                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
73                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
74         },
75 };
76
77 static struct ipv4_devconf ipv4_devconf_dflt = {
78         .data = {
79                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
80                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
81                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
82                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
83                 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
84         },
85 };
86
87 #define IPV4_DEVCONF_DFLT(net, attr) \
88         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
89
90 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
91         [IFA_LOCAL]             = { .type = NLA_U32 },
92         [IFA_ADDRESS]           = { .type = NLA_U32 },
93         [IFA_BROADCAST]         = { .type = NLA_U32 },
94         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
95 };
96
97 /* inet_addr_hash's shifting is dependent upon this IN4_ADDR_HSIZE
98  * value.  So if you change this define, make appropriate changes to
99  * inet_addr_hash as well.
100  */
101 #define IN4_ADDR_HSIZE  256
102 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
103 static DEFINE_SPINLOCK(inet_addr_hash_lock);
104
105 static inline unsigned int inet_addr_hash(struct net *net, __be32 addr)
106 {
107         u32 val = (__force u32) addr ^ hash_ptr(net, 8);
108
109         return ((val ^ (val >> 8) ^ (val >> 16) ^ (val >> 24)) &
110                 (IN4_ADDR_HSIZE - 1));
111 }
112
113 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
114 {
115         unsigned int hash = inet_addr_hash(net, ifa->ifa_local);
116
117         spin_lock(&inet_addr_hash_lock);
118         hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
119         spin_unlock(&inet_addr_hash_lock);
120 }
121
122 static void inet_hash_remove(struct in_ifaddr *ifa)
123 {
124         spin_lock(&inet_addr_hash_lock);
125         hlist_del_init_rcu(&ifa->hash);
126         spin_unlock(&inet_addr_hash_lock);
127 }
128
129 /**
130  * __ip_dev_find - find the first device with a given source address.
131  * @net: the net namespace
132  * @addr: the source address
133  * @devref: if true, take a reference on the found device
134  *
135  * If a caller uses devref=false, it should be protected by RCU, or RTNL
136  */
137 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
138 {
139         unsigned int hash = inet_addr_hash(net, addr);
140         struct net_device *result = NULL;
141         struct in_ifaddr *ifa;
142         struct hlist_node *node;
143
144         rcu_read_lock();
145         hlist_for_each_entry_rcu(ifa, node, &inet_addr_lst[hash], hash) {
146                 struct net_device *dev = ifa->ifa_dev->dev;
147
148                 if (!net_eq(dev_net(dev), net))
149                         continue;
150                 if (ifa->ifa_local == addr) {
151                         result = dev;
152                         break;
153                 }
154         }
155         if (!result) {
156                 struct flowi4 fl4 = { .daddr = addr };
157                 struct fib_result res = { 0 };
158                 struct fib_table *local;
159
160                 /* Fallback to FIB local table so that communication
161                  * over loopback subnets work.
162                  */
163                 local = fib_get_table(net, RT_TABLE_LOCAL);
164                 if (local &&
165                     !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
166                     res.type == RTN_LOCAL)
167                         result = FIB_RES_DEV(res);
168         }
169         if (result && devref)
170                 dev_hold(result);
171         rcu_read_unlock();
172         return result;
173 }
174 EXPORT_SYMBOL(__ip_dev_find);
175
176 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
177
178 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
179 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
180                          int destroy);
181 #ifdef CONFIG_SYSCTL
182 static void devinet_sysctl_register(struct in_device *idev);
183 static void devinet_sysctl_unregister(struct in_device *idev);
184 #else
185 static inline void devinet_sysctl_register(struct in_device *idev)
186 {
187 }
188 static inline void devinet_sysctl_unregister(struct in_device *idev)
189 {
190 }
191 #endif
192
193 /* Locks all the inet devices. */
194
195 static struct in_ifaddr *inet_alloc_ifa(void)
196 {
197         return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
198 }
199
200 static void inet_rcu_free_ifa(struct rcu_head *head)
201 {
202         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
203         if (ifa->ifa_dev)
204                 in_dev_put(ifa->ifa_dev);
205         kfree(ifa);
206 }
207
208 static inline void inet_free_ifa(struct in_ifaddr *ifa)
209 {
210         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
211 }
212
213 void in_dev_finish_destroy(struct in_device *idev)
214 {
215         struct net_device *dev = idev->dev;
216
217         WARN_ON(idev->ifa_list);
218         WARN_ON(idev->mc_list);
219 #ifdef NET_REFCNT_DEBUG
220         printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
221                idev, dev ? dev->name : "NIL");
222 #endif
223         dev_put(dev);
224         if (!idev->dead)
225                 pr_err("Freeing alive in_device %p\n", idev);
226         else
227                 kfree(idev);
228 }
229 EXPORT_SYMBOL(in_dev_finish_destroy);
230
231 static struct in_device *inetdev_init(struct net_device *dev)
232 {
233         struct in_device *in_dev;
234
235         ASSERT_RTNL();
236
237         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
238         if (!in_dev)
239                 goto out;
240         memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
241                         sizeof(in_dev->cnf));
242         in_dev->cnf.sysctl = NULL;
243         in_dev->dev = dev;
244         in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
245         if (!in_dev->arp_parms)
246                 goto out_kfree;
247         if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
248                 dev_disable_lro(dev);
249         /* Reference in_dev->dev */
250         dev_hold(dev);
251         /* Account for reference dev->ip_ptr (below) */
252         in_dev_hold(in_dev);
253
254         devinet_sysctl_register(in_dev);
255         ip_mc_init_dev(in_dev);
256         if (dev->flags & IFF_UP)
257                 ip_mc_up(in_dev);
258
259         /* we can receive as soon as ip_ptr is set -- do this last */
260         rcu_assign_pointer(dev->ip_ptr, in_dev);
261 out:
262         return in_dev;
263 out_kfree:
264         kfree(in_dev);
265         in_dev = NULL;
266         goto out;
267 }
268
269 static void in_dev_rcu_put(struct rcu_head *head)
270 {
271         struct in_device *idev = container_of(head, struct in_device, rcu_head);
272         in_dev_put(idev);
273 }
274
275 static void inetdev_destroy(struct in_device *in_dev)
276 {
277         struct in_ifaddr *ifa;
278         struct net_device *dev;
279
280         ASSERT_RTNL();
281
282         dev = in_dev->dev;
283
284         in_dev->dead = 1;
285
286         ip_mc_destroy_dev(in_dev);
287
288         while ((ifa = in_dev->ifa_list) != NULL) {
289                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
290                 inet_free_ifa(ifa);
291         }
292
293         RCU_INIT_POINTER(dev->ip_ptr, NULL);
294
295         devinet_sysctl_unregister(in_dev);
296         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
297         arp_ifdown(dev);
298
299         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
300 }
301
302 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
303 {
304         rcu_read_lock();
305         for_primary_ifa(in_dev) {
306                 if (inet_ifa_match(a, ifa)) {
307                         if (!b || inet_ifa_match(b, ifa)) {
308                                 rcu_read_unlock();
309                                 return 1;
310                         }
311                 }
312         } endfor_ifa(in_dev);
313         rcu_read_unlock();
314         return 0;
315 }
316
317 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
318                          int destroy, struct nlmsghdr *nlh, u32 pid)
319 {
320         struct in_ifaddr *promote = NULL;
321         struct in_ifaddr *ifa, *ifa1 = *ifap;
322         struct in_ifaddr *last_prim = in_dev->ifa_list;
323         struct in_ifaddr *prev_prom = NULL;
324         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
325
326         ASSERT_RTNL();
327
328         /* 1. Deleting primary ifaddr forces deletion all secondaries
329          * unless alias promotion is set
330          **/
331
332         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
333                 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
334
335                 while ((ifa = *ifap1) != NULL) {
336                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
337                             ifa1->ifa_scope <= ifa->ifa_scope)
338                                 last_prim = ifa;
339
340                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
341                             ifa1->ifa_mask != ifa->ifa_mask ||
342                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
343                                 ifap1 = &ifa->ifa_next;
344                                 prev_prom = ifa;
345                                 continue;
346                         }
347
348                         if (!do_promote) {
349                                 inet_hash_remove(ifa);
350                                 *ifap1 = ifa->ifa_next;
351
352                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
353                                 blocking_notifier_call_chain(&inetaddr_chain,
354                                                 NETDEV_DOWN, ifa);
355                                 inet_free_ifa(ifa);
356                         } else {
357                                 promote = ifa;
358                                 break;
359                         }
360                 }
361         }
362
363         /* On promotion all secondaries from subnet are changing
364          * the primary IP, we must remove all their routes silently
365          * and later to add them back with new prefsrc. Do this
366          * while all addresses are on the device list.
367          */
368         for (ifa = promote; ifa; ifa = ifa->ifa_next) {
369                 if (ifa1->ifa_mask == ifa->ifa_mask &&
370                     inet_ifa_match(ifa1->ifa_address, ifa))
371                         fib_del_ifaddr(ifa, ifa1);
372         }
373
374         /* 2. Unlink it */
375
376         *ifap = ifa1->ifa_next;
377         inet_hash_remove(ifa1);
378
379         /* 3. Announce address deletion */
380
381         /* Send message first, then call notifier.
382            At first sight, FIB update triggered by notifier
383            will refer to already deleted ifaddr, that could confuse
384            netlink listeners. It is not true: look, gated sees
385            that route deleted and if it still thinks that ifaddr
386            is valid, it will try to restore deleted routes... Grr.
387            So that, this order is correct.
388          */
389         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
390         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
391
392         if (promote) {
393                 struct in_ifaddr *next_sec = promote->ifa_next;
394
395                 if (prev_prom) {
396                         prev_prom->ifa_next = promote->ifa_next;
397                         promote->ifa_next = last_prim->ifa_next;
398                         last_prim->ifa_next = promote;
399                 }
400
401                 promote->ifa_flags &= ~IFA_F_SECONDARY;
402                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
403                 blocking_notifier_call_chain(&inetaddr_chain,
404                                 NETDEV_UP, promote);
405                 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
406                         if (ifa1->ifa_mask != ifa->ifa_mask ||
407                             !inet_ifa_match(ifa1->ifa_address, ifa))
408                                         continue;
409                         fib_add_ifaddr(ifa);
410                 }
411
412         }
413         if (destroy)
414                 inet_free_ifa(ifa1);
415 }
416
417 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
418                          int destroy)
419 {
420         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
421 }
422
423 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
424                              u32 pid)
425 {
426         struct in_device *in_dev = ifa->ifa_dev;
427         struct in_ifaddr *ifa1, **ifap, **last_primary;
428
429         ASSERT_RTNL();
430
431         if (!ifa->ifa_local) {
432                 inet_free_ifa(ifa);
433                 return 0;
434         }
435
436         ifa->ifa_flags &= ~IFA_F_SECONDARY;
437         last_primary = &in_dev->ifa_list;
438
439         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
440              ifap = &ifa1->ifa_next) {
441                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
442                     ifa->ifa_scope <= ifa1->ifa_scope)
443                         last_primary = &ifa1->ifa_next;
444                 if (ifa1->ifa_mask == ifa->ifa_mask &&
445                     inet_ifa_match(ifa1->ifa_address, ifa)) {
446                         if (ifa1->ifa_local == ifa->ifa_local) {
447                                 inet_free_ifa(ifa);
448                                 return -EEXIST;
449                         }
450                         if (ifa1->ifa_scope != ifa->ifa_scope) {
451                                 inet_free_ifa(ifa);
452                                 return -EINVAL;
453                         }
454                         ifa->ifa_flags |= IFA_F_SECONDARY;
455                 }
456         }
457
458         if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
459                 net_srandom(ifa->ifa_local);
460                 ifap = last_primary;
461         }
462
463         ifa->ifa_next = *ifap;
464         *ifap = ifa;
465
466         inet_hash_insert(dev_net(in_dev->dev), ifa);
467
468         /* Send message first, then call notifier.
469            Notifier will trigger FIB update, so that
470            listeners of netlink will know about new ifaddr */
471         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
472         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
473
474         return 0;
475 }
476
477 static int inet_insert_ifa(struct in_ifaddr *ifa)
478 {
479         return __inet_insert_ifa(ifa, NULL, 0);
480 }
481
482 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
483 {
484         struct in_device *in_dev = __in_dev_get_rtnl(dev);
485
486         ASSERT_RTNL();
487
488         if (!in_dev) {
489                 inet_free_ifa(ifa);
490                 return -ENOBUFS;
491         }
492         ipv4_devconf_setall(in_dev);
493         if (ifa->ifa_dev != in_dev) {
494                 WARN_ON(ifa->ifa_dev);
495                 in_dev_hold(in_dev);
496                 ifa->ifa_dev = in_dev;
497         }
498         if (ipv4_is_loopback(ifa->ifa_local))
499                 ifa->ifa_scope = RT_SCOPE_HOST;
500         return inet_insert_ifa(ifa);
501 }
502
503 /* Caller must hold RCU or RTNL :
504  * We dont take a reference on found in_device
505  */
506 struct in_device *inetdev_by_index(struct net *net, int ifindex)
507 {
508         struct net_device *dev;
509         struct in_device *in_dev = NULL;
510
511         rcu_read_lock();
512         dev = dev_get_by_index_rcu(net, ifindex);
513         if (dev)
514                 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
515         rcu_read_unlock();
516         return in_dev;
517 }
518 EXPORT_SYMBOL(inetdev_by_index);
519
520 /* Called only from RTNL semaphored context. No locks. */
521
522 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
523                                     __be32 mask)
524 {
525         ASSERT_RTNL();
526
527         for_primary_ifa(in_dev) {
528                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
529                         return ifa;
530         } endfor_ifa(in_dev);
531         return NULL;
532 }
533
534 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
535 {
536         struct net *net = sock_net(skb->sk);
537         struct nlattr *tb[IFA_MAX+1];
538         struct in_device *in_dev;
539         struct ifaddrmsg *ifm;
540         struct in_ifaddr *ifa, **ifap;
541         int err = -EINVAL;
542
543         ASSERT_RTNL();
544
545         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
546         if (err < 0)
547                 goto errout;
548
549         ifm = nlmsg_data(nlh);
550         in_dev = inetdev_by_index(net, ifm->ifa_index);
551         if (in_dev == NULL) {
552                 err = -ENODEV;
553                 goto errout;
554         }
555
556         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
557              ifap = &ifa->ifa_next) {
558                 if (tb[IFA_LOCAL] &&
559                     ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
560                         continue;
561
562                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
563                         continue;
564
565                 if (tb[IFA_ADDRESS] &&
566                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
567                     !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
568                         continue;
569
570                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
571                 return 0;
572         }
573
574         err = -EADDRNOTAVAIL;
575 errout:
576         return err;
577 }
578
579 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
580 {
581         struct nlattr *tb[IFA_MAX+1];
582         struct in_ifaddr *ifa;
583         struct ifaddrmsg *ifm;
584         struct net_device *dev;
585         struct in_device *in_dev;
586         int err;
587
588         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
589         if (err < 0)
590                 goto errout;
591
592         ifm = nlmsg_data(nlh);
593         err = -EINVAL;
594         if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
595                 goto errout;
596
597         dev = __dev_get_by_index(net, ifm->ifa_index);
598         err = -ENODEV;
599         if (dev == NULL)
600                 goto errout;
601
602         in_dev = __in_dev_get_rtnl(dev);
603         err = -ENOBUFS;
604         if (in_dev == NULL)
605                 goto errout;
606
607         ifa = inet_alloc_ifa();
608         if (ifa == NULL)
609                 /*
610                  * A potential indev allocation can be left alive, it stays
611                  * assigned to its device and is destroy with it.
612                  */
613                 goto errout;
614
615         ipv4_devconf_setall(in_dev);
616         in_dev_hold(in_dev);
617
618         if (tb[IFA_ADDRESS] == NULL)
619                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
620
621         INIT_HLIST_NODE(&ifa->hash);
622         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
623         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
624         ifa->ifa_flags = ifm->ifa_flags;
625         ifa->ifa_scope = ifm->ifa_scope;
626         ifa->ifa_dev = in_dev;
627
628         ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
629         ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
630
631         if (tb[IFA_BROADCAST])
632                 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
633
634         if (tb[IFA_LABEL])
635                 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
636         else
637                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
638
639         return ifa;
640
641 errout:
642         return ERR_PTR(err);
643 }
644
645 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
646 {
647         struct net *net = sock_net(skb->sk);
648         struct in_ifaddr *ifa;
649
650         ASSERT_RTNL();
651
652         ifa = rtm_to_ifaddr(net, nlh);
653         if (IS_ERR(ifa))
654                 return PTR_ERR(ifa);
655
656         return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
657 }
658
659 /*
660  *      Determine a default network mask, based on the IP address.
661  */
662
663 static inline int inet_abc_len(__be32 addr)
664 {
665         int rc = -1;    /* Something else, probably a multicast. */
666
667         if (ipv4_is_zeronet(addr))
668                 rc = 0;
669         else {
670                 __u32 haddr = ntohl(addr);
671
672                 if (IN_CLASSA(haddr))
673                         rc = 8;
674                 else if (IN_CLASSB(haddr))
675                         rc = 16;
676                 else if (IN_CLASSC(haddr))
677                         rc = 24;
678         }
679
680         return rc;
681 }
682
683
684 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
685 {
686         struct ifreq ifr;
687         struct sockaddr_in sin_orig;
688         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
689         struct in_device *in_dev;
690         struct in_ifaddr **ifap = NULL;
691         struct in_ifaddr *ifa = NULL;
692         struct net_device *dev;
693         char *colon;
694         int ret = -EFAULT;
695         int tryaddrmatch = 0;
696
697         /*
698          *      Fetch the caller's info block into kernel space
699          */
700
701         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
702                 goto out;
703         ifr.ifr_name[IFNAMSIZ - 1] = 0;
704
705         /* save original address for comparison */
706         memcpy(&sin_orig, sin, sizeof(*sin));
707
708         colon = strchr(ifr.ifr_name, ':');
709         if (colon)
710                 *colon = 0;
711
712         dev_load(net, ifr.ifr_name);
713
714         switch (cmd) {
715         case SIOCGIFADDR:       /* Get interface address */
716         case SIOCGIFBRDADDR:    /* Get the broadcast address */
717         case SIOCGIFDSTADDR:    /* Get the destination address */
718         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
719                 /* Note that these ioctls will not sleep,
720                    so that we do not impose a lock.
721                    One day we will be forced to put shlock here (I mean SMP)
722                  */
723                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
724                 memset(sin, 0, sizeof(*sin));
725                 sin->sin_family = AF_INET;
726                 break;
727
728         case SIOCSIFFLAGS:
729                 ret = -EACCES;
730                 if (!capable(CAP_NET_ADMIN))
731                         goto out;
732                 break;
733         case SIOCSIFADDR:       /* Set interface address (and family) */
734         case SIOCSIFBRDADDR:    /* Set the broadcast address */
735         case SIOCSIFDSTADDR:    /* Set the destination address */
736         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
737                 ret = -EACCES;
738                 if (!capable(CAP_NET_ADMIN))
739                         goto out;
740                 ret = -EINVAL;
741                 if (sin->sin_family != AF_INET)
742                         goto out;
743                 break;
744         default:
745                 ret = -EINVAL;
746                 goto out;
747         }
748
749         rtnl_lock();
750
751         ret = -ENODEV;
752         dev = __dev_get_by_name(net, ifr.ifr_name);
753         if (!dev)
754                 goto done;
755
756         if (colon)
757                 *colon = ':';
758
759         in_dev = __in_dev_get_rtnl(dev);
760         if (in_dev) {
761                 if (tryaddrmatch) {
762                         /* Matthias Andree */
763                         /* compare label and address (4.4BSD style) */
764                         /* note: we only do this for a limited set of ioctls
765                            and only if the original address family was AF_INET.
766                            This is checked above. */
767                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
768                              ifap = &ifa->ifa_next) {
769                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
770                                     sin_orig.sin_addr.s_addr ==
771                                                         ifa->ifa_local) {
772                                         break; /* found */
773                                 }
774                         }
775                 }
776                 /* we didn't get a match, maybe the application is
777                    4.3BSD-style and passed in junk so we fall back to
778                    comparing just the label */
779                 if (!ifa) {
780                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
781                              ifap = &ifa->ifa_next)
782                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
783                                         break;
784                 }
785         }
786
787         ret = -EADDRNOTAVAIL;
788         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
789                 goto done;
790
791         switch (cmd) {
792         case SIOCGIFADDR:       /* Get interface address */
793                 sin->sin_addr.s_addr = ifa->ifa_local;
794                 goto rarok;
795
796         case SIOCGIFBRDADDR:    /* Get the broadcast address */
797                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
798                 goto rarok;
799
800         case SIOCGIFDSTADDR:    /* Get the destination address */
801                 sin->sin_addr.s_addr = ifa->ifa_address;
802                 goto rarok;
803
804         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
805                 sin->sin_addr.s_addr = ifa->ifa_mask;
806                 goto rarok;
807
808         case SIOCSIFFLAGS:
809                 if (colon) {
810                         ret = -EADDRNOTAVAIL;
811                         if (!ifa)
812                                 break;
813                         ret = 0;
814                         if (!(ifr.ifr_flags & IFF_UP))
815                                 inet_del_ifa(in_dev, ifap, 1);
816                         break;
817                 }
818                 ret = dev_change_flags(dev, ifr.ifr_flags);
819                 break;
820
821         case SIOCSIFADDR:       /* Set interface address (and family) */
822                 ret = -EINVAL;
823                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
824                         break;
825
826                 if (!ifa) {
827                         ret = -ENOBUFS;
828                         ifa = inet_alloc_ifa();
829                         INIT_HLIST_NODE(&ifa->hash);
830                         if (!ifa)
831                                 break;
832                         if (colon)
833                                 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
834                         else
835                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
836                 } else {
837                         ret = 0;
838                         if (ifa->ifa_local == sin->sin_addr.s_addr)
839                                 break;
840                         inet_del_ifa(in_dev, ifap, 0);
841                         ifa->ifa_broadcast = 0;
842                         ifa->ifa_scope = 0;
843                 }
844
845                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
846
847                 if (!(dev->flags & IFF_POINTOPOINT)) {
848                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
849                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
850                         if ((dev->flags & IFF_BROADCAST) &&
851                             ifa->ifa_prefixlen < 31)
852                                 ifa->ifa_broadcast = ifa->ifa_address |
853                                                      ~ifa->ifa_mask;
854                 } else {
855                         ifa->ifa_prefixlen = 32;
856                         ifa->ifa_mask = inet_make_mask(32);
857                 }
858                 ret = inet_set_ifa(dev, ifa);
859                 break;
860
861         case SIOCSIFBRDADDR:    /* Set the broadcast address */
862                 ret = 0;
863                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
864                         inet_del_ifa(in_dev, ifap, 0);
865                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
866                         inet_insert_ifa(ifa);
867                 }
868                 break;
869
870         case SIOCSIFDSTADDR:    /* Set the destination address */
871                 ret = 0;
872                 if (ifa->ifa_address == sin->sin_addr.s_addr)
873                         break;
874                 ret = -EINVAL;
875                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
876                         break;
877                 ret = 0;
878                 inet_del_ifa(in_dev, ifap, 0);
879                 ifa->ifa_address = sin->sin_addr.s_addr;
880                 inet_insert_ifa(ifa);
881                 break;
882
883         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
884
885                 /*
886                  *      The mask we set must be legal.
887                  */
888                 ret = -EINVAL;
889                 if (bad_mask(sin->sin_addr.s_addr, 0))
890                         break;
891                 ret = 0;
892                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
893                         __be32 old_mask = ifa->ifa_mask;
894                         inet_del_ifa(in_dev, ifap, 0);
895                         ifa->ifa_mask = sin->sin_addr.s_addr;
896                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
897
898                         /* See if current broadcast address matches
899                          * with current netmask, then recalculate
900                          * the broadcast address. Otherwise it's a
901                          * funny address, so don't touch it since
902                          * the user seems to know what (s)he's doing...
903                          */
904                         if ((dev->flags & IFF_BROADCAST) &&
905                             (ifa->ifa_prefixlen < 31) &&
906                             (ifa->ifa_broadcast ==
907                              (ifa->ifa_local|~old_mask))) {
908                                 ifa->ifa_broadcast = (ifa->ifa_local |
909                                                       ~sin->sin_addr.s_addr);
910                         }
911                         inet_insert_ifa(ifa);
912                 }
913                 break;
914         }
915 done:
916         rtnl_unlock();
917 out:
918         return ret;
919 rarok:
920         rtnl_unlock();
921         ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
922         goto out;
923 }
924
925 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
926 {
927         struct in_device *in_dev = __in_dev_get_rtnl(dev);
928         struct in_ifaddr *ifa;
929         struct ifreq ifr;
930         int done = 0;
931
932         if (!in_dev)
933                 goto out;
934
935         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
936                 if (!buf) {
937                         done += sizeof(ifr);
938                         continue;
939                 }
940                 if (len < (int) sizeof(ifr))
941                         break;
942                 memset(&ifr, 0, sizeof(struct ifreq));
943                 if (ifa->ifa_label)
944                         strcpy(ifr.ifr_name, ifa->ifa_label);
945                 else
946                         strcpy(ifr.ifr_name, dev->name);
947
948                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
949                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
950                                                                 ifa->ifa_local;
951
952                 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
953                         done = -EFAULT;
954                         break;
955                 }
956                 buf  += sizeof(struct ifreq);
957                 len  -= sizeof(struct ifreq);
958                 done += sizeof(struct ifreq);
959         }
960 out:
961         return done;
962 }
963
964 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
965 {
966         __be32 addr = 0;
967         struct in_device *in_dev;
968         struct net *net = dev_net(dev);
969
970         rcu_read_lock();
971         in_dev = __in_dev_get_rcu(dev);
972         if (!in_dev)
973                 goto no_in_dev;
974
975         for_primary_ifa(in_dev) {
976                 if (ifa->ifa_scope > scope)
977                         continue;
978                 if (!dst || inet_ifa_match(dst, ifa)) {
979                         addr = ifa->ifa_local;
980                         break;
981                 }
982                 if (!addr)
983                         addr = ifa->ifa_local;
984         } endfor_ifa(in_dev);
985
986         if (addr)
987                 goto out_unlock;
988 no_in_dev:
989
990         /* Not loopback addresses on loopback should be preferred
991            in this case. It is importnat that lo is the first interface
992            in dev_base list.
993          */
994         for_each_netdev_rcu(net, dev) {
995                 in_dev = __in_dev_get_rcu(dev);
996                 if (!in_dev)
997                         continue;
998
999                 for_primary_ifa(in_dev) {
1000                         if (ifa->ifa_scope != RT_SCOPE_LINK &&
1001                             ifa->ifa_scope <= scope) {
1002                                 addr = ifa->ifa_local;
1003                                 goto out_unlock;
1004                         }
1005                 } endfor_ifa(in_dev);
1006         }
1007 out_unlock:
1008         rcu_read_unlock();
1009         return addr;
1010 }
1011 EXPORT_SYMBOL(inet_select_addr);
1012
1013 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1014                               __be32 local, int scope)
1015 {
1016         int same = 0;
1017         __be32 addr = 0;
1018
1019         for_ifa(in_dev) {
1020                 if (!addr &&
1021                     (local == ifa->ifa_local || !local) &&
1022                     ifa->ifa_scope <= scope) {
1023                         addr = ifa->ifa_local;
1024                         if (same)
1025                                 break;
1026                 }
1027                 if (!same) {
1028                         same = (!local || inet_ifa_match(local, ifa)) &&
1029                                 (!dst || inet_ifa_match(dst, ifa));
1030                         if (same && addr) {
1031                                 if (local || !dst)
1032                                         break;
1033                                 /* Is the selected addr into dst subnet? */
1034                                 if (inet_ifa_match(addr, ifa))
1035                                         break;
1036                                 /* No, then can we use new local src? */
1037                                 if (ifa->ifa_scope <= scope) {
1038                                         addr = ifa->ifa_local;
1039                                         break;
1040                                 }
1041                                 /* search for large dst subnet for addr */
1042                                 same = 0;
1043                         }
1044                 }
1045         } endfor_ifa(in_dev);
1046
1047         return same ? addr : 0;
1048 }
1049
1050 /*
1051  * Confirm that local IP address exists using wildcards:
1052  * - in_dev: only on this interface, 0=any interface
1053  * - dst: only in the same subnet as dst, 0=any dst
1054  * - local: address, 0=autoselect the local address
1055  * - scope: maximum allowed scope value for the local address
1056  */
1057 __be32 inet_confirm_addr(struct in_device *in_dev,
1058                          __be32 dst, __be32 local, int scope)
1059 {
1060         __be32 addr = 0;
1061         struct net_device *dev;
1062         struct net *net;
1063
1064         if (scope != RT_SCOPE_LINK)
1065                 return confirm_addr_indev(in_dev, dst, local, scope);
1066
1067         net = dev_net(in_dev->dev);
1068         rcu_read_lock();
1069         for_each_netdev_rcu(net, dev) {
1070                 in_dev = __in_dev_get_rcu(dev);
1071                 if (in_dev) {
1072                         addr = confirm_addr_indev(in_dev, dst, local, scope);
1073                         if (addr)
1074                                 break;
1075                 }
1076         }
1077         rcu_read_unlock();
1078
1079         return addr;
1080 }
1081 EXPORT_SYMBOL(inet_confirm_addr);
1082
1083 /*
1084  *      Device notifier
1085  */
1086
1087 int register_inetaddr_notifier(struct notifier_block *nb)
1088 {
1089         return blocking_notifier_chain_register(&inetaddr_chain, nb);
1090 }
1091 EXPORT_SYMBOL(register_inetaddr_notifier);
1092
1093 int unregister_inetaddr_notifier(struct notifier_block *nb)
1094 {
1095         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1096 }
1097 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1098
1099 /* Rename ifa_labels for a device name change. Make some effort to preserve
1100  * existing alias numbering and to create unique labels if possible.
1101 */
1102 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1103 {
1104         struct in_ifaddr *ifa;
1105         int named = 0;
1106
1107         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1108                 char old[IFNAMSIZ], *dot;
1109
1110                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1111                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1112                 if (named++ == 0)
1113                         goto skip;
1114                 dot = strchr(old, ':');
1115                 if (dot == NULL) {
1116                         sprintf(old, ":%d", named);
1117                         dot = old;
1118                 }
1119                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1120                         strcat(ifa->ifa_label, dot);
1121                 else
1122                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1123 skip:
1124                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1125         }
1126 }
1127
1128 static inline bool inetdev_valid_mtu(unsigned int mtu)
1129 {
1130         return mtu >= 68;
1131 }
1132
1133 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1134                                         struct in_device *in_dev)
1135
1136 {
1137         struct in_ifaddr *ifa;
1138
1139         for (ifa = in_dev->ifa_list; ifa;
1140              ifa = ifa->ifa_next) {
1141                 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1142                          ifa->ifa_local, dev,
1143                          ifa->ifa_local, NULL,
1144                          dev->dev_addr, NULL);
1145         }
1146 }
1147
1148 /* Called only under RTNL semaphore */
1149
1150 static int inetdev_event(struct notifier_block *this, unsigned long event,
1151                          void *ptr)
1152 {
1153         struct net_device *dev = ptr;
1154         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1155
1156         ASSERT_RTNL();
1157
1158         if (!in_dev) {
1159                 if (event == NETDEV_REGISTER) {
1160                         in_dev = inetdev_init(dev);
1161                         if (!in_dev)
1162                                 return notifier_from_errno(-ENOMEM);
1163                         if (dev->flags & IFF_LOOPBACK) {
1164                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1165                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1166                         }
1167                 } else if (event == NETDEV_CHANGEMTU) {
1168                         /* Re-enabling IP */
1169                         if (inetdev_valid_mtu(dev->mtu))
1170                                 in_dev = inetdev_init(dev);
1171                 }
1172                 goto out;
1173         }
1174
1175         switch (event) {
1176         case NETDEV_REGISTER:
1177                 printk(KERN_DEBUG "inetdev_event: bug\n");
1178                 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1179                 break;
1180         case NETDEV_UP:
1181                 if (!inetdev_valid_mtu(dev->mtu))
1182                         break;
1183                 if (dev->flags & IFF_LOOPBACK) {
1184                         struct in_ifaddr *ifa = inet_alloc_ifa();
1185
1186                         if (ifa) {
1187                                 INIT_HLIST_NODE(&ifa->hash);
1188                                 ifa->ifa_local =
1189                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1190                                 ifa->ifa_prefixlen = 8;
1191                                 ifa->ifa_mask = inet_make_mask(8);
1192                                 in_dev_hold(in_dev);
1193                                 ifa->ifa_dev = in_dev;
1194                                 ifa->ifa_scope = RT_SCOPE_HOST;
1195                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1196                                 inet_insert_ifa(ifa);
1197                         }
1198                 }
1199                 ip_mc_up(in_dev);
1200                 /* fall through */
1201         case NETDEV_CHANGEADDR:
1202                 if (!IN_DEV_ARP_NOTIFY(in_dev))
1203                         break;
1204                 /* fall through */
1205         case NETDEV_NOTIFY_PEERS:
1206                 /* Send gratuitous ARP to notify of link change */
1207                 inetdev_send_gratuitous_arp(dev, in_dev);
1208                 break;
1209         case NETDEV_DOWN:
1210                 ip_mc_down(in_dev);
1211                 break;
1212         case NETDEV_PRE_TYPE_CHANGE:
1213                 ip_mc_unmap(in_dev);
1214                 break;
1215         case NETDEV_POST_TYPE_CHANGE:
1216                 ip_mc_remap(in_dev);
1217                 break;
1218         case NETDEV_CHANGEMTU:
1219                 if (inetdev_valid_mtu(dev->mtu))
1220                         break;
1221                 /* disable IP when MTU is not enough */
1222         case NETDEV_UNREGISTER:
1223                 inetdev_destroy(in_dev);
1224                 break;
1225         case NETDEV_CHANGENAME:
1226                 /* Do not notify about label change, this event is
1227                  * not interesting to applications using netlink.
1228                  */
1229                 inetdev_changename(dev, in_dev);
1230
1231                 devinet_sysctl_unregister(in_dev);
1232                 devinet_sysctl_register(in_dev);
1233                 break;
1234         }
1235 out:
1236         return NOTIFY_DONE;
1237 }
1238
1239 static struct notifier_block ip_netdev_notifier = {
1240         .notifier_call = inetdev_event,
1241 };
1242
1243 static inline size_t inet_nlmsg_size(void)
1244 {
1245         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1246                + nla_total_size(4) /* IFA_ADDRESS */
1247                + nla_total_size(4) /* IFA_LOCAL */
1248                + nla_total_size(4) /* IFA_BROADCAST */
1249                + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1250 }
1251
1252 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1253                             u32 pid, u32 seq, int event, unsigned int flags)
1254 {
1255         struct ifaddrmsg *ifm;
1256         struct nlmsghdr  *nlh;
1257
1258         nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1259         if (nlh == NULL)
1260                 return -EMSGSIZE;
1261
1262         ifm = nlmsg_data(nlh);
1263         ifm->ifa_family = AF_INET;
1264         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1265         ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1266         ifm->ifa_scope = ifa->ifa_scope;
1267         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1268
1269         if ((ifa->ifa_address &&
1270              nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1271             (ifa->ifa_local &&
1272              nla_put_be32(skb, IFA_LOCAL, ifa->ifa_local)) ||
1273             (ifa->ifa_broadcast &&
1274              nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1275             (ifa->ifa_label[0] &&
1276              nla_put_string(skb, IFA_LABEL, ifa->ifa_label)))
1277                 goto nla_put_failure;
1278
1279         return nlmsg_end(skb, nlh);
1280
1281 nla_put_failure:
1282         nlmsg_cancel(skb, nlh);
1283         return -EMSGSIZE;
1284 }
1285
1286 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1287 {
1288         struct net *net = sock_net(skb->sk);
1289         int h, s_h;
1290         int idx, s_idx;
1291         int ip_idx, s_ip_idx;
1292         struct net_device *dev;
1293         struct in_device *in_dev;
1294         struct in_ifaddr *ifa;
1295         struct hlist_head *head;
1296         struct hlist_node *node;
1297
1298         s_h = cb->args[0];
1299         s_idx = idx = cb->args[1];
1300         s_ip_idx = ip_idx = cb->args[2];
1301
1302         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1303                 idx = 0;
1304                 head = &net->dev_index_head[h];
1305                 rcu_read_lock();
1306                 hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
1307                         if (idx < s_idx)
1308                                 goto cont;
1309                         if (h > s_h || idx > s_idx)
1310                                 s_ip_idx = 0;
1311                         in_dev = __in_dev_get_rcu(dev);
1312                         if (!in_dev)
1313                                 goto cont;
1314
1315                         for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1316                              ifa = ifa->ifa_next, ip_idx++) {
1317                                 if (ip_idx < s_ip_idx)
1318                                         continue;
1319                                 if (inet_fill_ifaddr(skb, ifa,
1320                                              NETLINK_CB(cb->skb).pid,
1321                                              cb->nlh->nlmsg_seq,
1322                                              RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1323                                         rcu_read_unlock();
1324                                         goto done;
1325                                 }
1326                         }
1327 cont:
1328                         idx++;
1329                 }
1330                 rcu_read_unlock();
1331         }
1332
1333 done:
1334         cb->args[0] = h;
1335         cb->args[1] = idx;
1336         cb->args[2] = ip_idx;
1337
1338         return skb->len;
1339 }
1340
1341 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1342                       u32 pid)
1343 {
1344         struct sk_buff *skb;
1345         u32 seq = nlh ? nlh->nlmsg_seq : 0;
1346         int err = -ENOBUFS;
1347         struct net *net;
1348
1349         net = dev_net(ifa->ifa_dev->dev);
1350         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1351         if (skb == NULL)
1352                 goto errout;
1353
1354         err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1355         if (err < 0) {
1356                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1357                 WARN_ON(err == -EMSGSIZE);
1358                 kfree_skb(skb);
1359                 goto errout;
1360         }
1361         rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1362         return;
1363 errout:
1364         if (err < 0)
1365                 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1366 }
1367
1368 static size_t inet_get_link_af_size(const struct net_device *dev)
1369 {
1370         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1371
1372         if (!in_dev)
1373                 return 0;
1374
1375         return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1376 }
1377
1378 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1379 {
1380         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1381         struct nlattr *nla;
1382         int i;
1383
1384         if (!in_dev)
1385                 return -ENODATA;
1386
1387         nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1388         if (nla == NULL)
1389                 return -EMSGSIZE;
1390
1391         for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1392                 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1393
1394         return 0;
1395 }
1396
1397 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1398         [IFLA_INET_CONF]        = { .type = NLA_NESTED },
1399 };
1400
1401 static int inet_validate_link_af(const struct net_device *dev,
1402                                  const struct nlattr *nla)
1403 {
1404         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1405         int err, rem;
1406
1407         if (dev && !__in_dev_get_rtnl(dev))
1408                 return -EAFNOSUPPORT;
1409
1410         err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1411         if (err < 0)
1412                 return err;
1413
1414         if (tb[IFLA_INET_CONF]) {
1415                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1416                         int cfgid = nla_type(a);
1417
1418                         if (nla_len(a) < 4)
1419                                 return -EINVAL;
1420
1421                         if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1422                                 return -EINVAL;
1423                 }
1424         }
1425
1426         return 0;
1427 }
1428
1429 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1430 {
1431         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1432         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1433         int rem;
1434
1435         if (!in_dev)
1436                 return -EAFNOSUPPORT;
1437
1438         if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1439                 BUG();
1440
1441         if (tb[IFLA_INET_CONF]) {
1442                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1443                         ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1444         }
1445
1446         return 0;
1447 }
1448
1449 #ifdef CONFIG_SYSCTL
1450
1451 static void devinet_copy_dflt_conf(struct net *net, int i)
1452 {
1453         struct net_device *dev;
1454
1455         rcu_read_lock();
1456         for_each_netdev_rcu(net, dev) {
1457                 struct in_device *in_dev;
1458
1459                 in_dev = __in_dev_get_rcu(dev);
1460                 if (in_dev && !test_bit(i, in_dev->cnf.state))
1461                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1462         }
1463         rcu_read_unlock();
1464 }
1465
1466 /* called with RTNL locked */
1467 static void inet_forward_change(struct net *net)
1468 {
1469         struct net_device *dev;
1470         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1471
1472         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1473         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1474
1475         for_each_netdev(net, dev) {
1476                 struct in_device *in_dev;
1477                 if (on)
1478                         dev_disable_lro(dev);
1479                 rcu_read_lock();
1480                 in_dev = __in_dev_get_rcu(dev);
1481                 if (in_dev)
1482                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1483                 rcu_read_unlock();
1484         }
1485 }
1486
1487 static int devinet_conf_proc(ctl_table *ctl, int write,
1488                              void __user *buffer,
1489                              size_t *lenp, loff_t *ppos)
1490 {
1491         int old_value = *(int *)ctl->data;
1492         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1493         int new_value = *(int *)ctl->data;
1494
1495         if (write) {
1496                 struct ipv4_devconf *cnf = ctl->extra1;
1497                 struct net *net = ctl->extra2;
1498                 int i = (int *)ctl->data - cnf->data;
1499
1500                 set_bit(i, cnf->state);
1501
1502                 if (cnf == net->ipv4.devconf_dflt)
1503                         devinet_copy_dflt_conf(net, i);
1504                 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1)
1505                         if ((new_value == 0) && (old_value != 0))
1506                                 rt_cache_flush(net, 0);
1507         }
1508
1509         return ret;
1510 }
1511
1512 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1513                                   void __user *buffer,
1514                                   size_t *lenp, loff_t *ppos)
1515 {
1516         int *valp = ctl->data;
1517         int val = *valp;
1518         loff_t pos = *ppos;
1519         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1520
1521         if (write && *valp != val) {
1522                 struct net *net = ctl->extra2;
1523
1524                 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1525                         if (!rtnl_trylock()) {
1526                                 /* Restore the original values before restarting */
1527                                 *valp = val;
1528                                 *ppos = pos;
1529                                 return restart_syscall();
1530                         }
1531                         if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1532                                 inet_forward_change(net);
1533                         } else if (*valp) {
1534                                 struct ipv4_devconf *cnf = ctl->extra1;
1535                                 struct in_device *idev =
1536                                         container_of(cnf, struct in_device, cnf);
1537                                 dev_disable_lro(idev->dev);
1538                         }
1539                         rtnl_unlock();
1540                         rt_cache_flush(net, 0);
1541                 }
1542         }
1543
1544         return ret;
1545 }
1546
1547 static int ipv4_doint_and_flush(ctl_table *ctl, int write,
1548                                 void __user *buffer,
1549                                 size_t *lenp, loff_t *ppos)
1550 {
1551         int *valp = ctl->data;
1552         int val = *valp;
1553         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1554         struct net *net = ctl->extra2;
1555
1556         if (write && *valp != val)
1557                 rt_cache_flush(net, 0);
1558
1559         return ret;
1560 }
1561
1562 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
1563         { \
1564                 .procname       = name, \
1565                 .data           = ipv4_devconf.data + \
1566                                   IPV4_DEVCONF_ ## attr - 1, \
1567                 .maxlen         = sizeof(int), \
1568                 .mode           = mval, \
1569                 .proc_handler   = proc, \
1570                 .extra1         = &ipv4_devconf, \
1571         }
1572
1573 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1574         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
1575
1576 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1577         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
1578
1579 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
1580         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
1581
1582 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1583         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
1584
1585 static struct devinet_sysctl_table {
1586         struct ctl_table_header *sysctl_header;
1587         struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
1588         char *dev_name;
1589 } devinet_sysctl = {
1590         .devinet_vars = {
1591                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1592                                              devinet_sysctl_forward),
1593                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1594
1595                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1596                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1597                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1598                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1599                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1600                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1601                                         "accept_source_route"),
1602                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
1603                 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
1604                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1605                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1606                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1607                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1608                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1609                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1610                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1611                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1612                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1613                 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
1614                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
1615
1616                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1617                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1618                 DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1619                                               "force_igmp_version"),
1620                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1621                                               "promote_secondaries"),
1622         },
1623 };
1624
1625 static int __devinet_sysctl_register(struct net *net, char *dev_name,
1626                                         struct ipv4_devconf *p)
1627 {
1628         int i;
1629         struct devinet_sysctl_table *t;
1630
1631 #define DEVINET_CTL_PATH_DEV    3
1632
1633         struct ctl_path devinet_ctl_path[] = {
1634                 { .procname = "net",  },
1635                 { .procname = "ipv4", },
1636                 { .procname = "conf", },
1637                 { /* to be set */ },
1638                 { },
1639         };
1640
1641         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1642         if (!t)
1643                 goto out;
1644
1645         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1646                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1647                 t->devinet_vars[i].extra1 = p;
1648                 t->devinet_vars[i].extra2 = net;
1649         }
1650
1651         /*
1652          * Make a copy of dev_name, because '.procname' is regarded as const
1653          * by sysctl and we wouldn't want anyone to change it under our feet
1654          * (see SIOCSIFNAME).
1655          */
1656         t->dev_name = kstrdup(dev_name, GFP_KERNEL);
1657         if (!t->dev_name)
1658                 goto free;
1659
1660         devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
1661
1662         t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
1663                         t->devinet_vars);
1664         if (!t->sysctl_header)
1665                 goto free_procname;
1666
1667         p->sysctl = t;
1668         return 0;
1669
1670 free_procname:
1671         kfree(t->dev_name);
1672 free:
1673         kfree(t);
1674 out:
1675         return -ENOBUFS;
1676 }
1677
1678 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1679 {
1680         struct devinet_sysctl_table *t = cnf->sysctl;
1681
1682         if (t == NULL)
1683                 return;
1684
1685         cnf->sysctl = NULL;
1686         unregister_net_sysctl_table(t->sysctl_header);
1687         kfree(t->dev_name);
1688         kfree(t);
1689 }
1690
1691 static void devinet_sysctl_register(struct in_device *idev)
1692 {
1693         neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
1694         __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1695                                         &idev->cnf);
1696 }
1697
1698 static void devinet_sysctl_unregister(struct in_device *idev)
1699 {
1700         __devinet_sysctl_unregister(&idev->cnf);
1701         neigh_sysctl_unregister(idev->arp_parms);
1702 }
1703
1704 static struct ctl_table ctl_forward_entry[] = {
1705         {
1706                 .procname       = "ip_forward",
1707                 .data           = &ipv4_devconf.data[
1708                                         IPV4_DEVCONF_FORWARDING - 1],
1709                 .maxlen         = sizeof(int),
1710                 .mode           = 0644,
1711                 .proc_handler   = devinet_sysctl_forward,
1712                 .extra1         = &ipv4_devconf,
1713                 .extra2         = &init_net,
1714         },
1715         { },
1716 };
1717
1718 static __net_initdata struct ctl_path net_ipv4_path[] = {
1719         { .procname = "net", },
1720         { .procname = "ipv4", },
1721         { },
1722 };
1723 #endif
1724
1725 static __net_init int devinet_init_net(struct net *net)
1726 {
1727         int err;
1728         struct ipv4_devconf *all, *dflt;
1729 #ifdef CONFIG_SYSCTL
1730         struct ctl_table *tbl = ctl_forward_entry;
1731         struct ctl_table_header *forw_hdr;
1732 #endif
1733
1734         err = -ENOMEM;
1735         all = &ipv4_devconf;
1736         dflt = &ipv4_devconf_dflt;
1737
1738         if (!net_eq(net, &init_net)) {
1739                 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1740                 if (all == NULL)
1741                         goto err_alloc_all;
1742
1743                 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1744                 if (dflt == NULL)
1745                         goto err_alloc_dflt;
1746
1747 #ifdef CONFIG_SYSCTL
1748                 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1749                 if (tbl == NULL)
1750                         goto err_alloc_ctl;
1751
1752                 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
1753                 tbl[0].extra1 = all;
1754                 tbl[0].extra2 = net;
1755 #endif
1756         }
1757
1758 #ifdef CONFIG_SYSCTL
1759         err = __devinet_sysctl_register(net, "all", all);
1760         if (err < 0)
1761                 goto err_reg_all;
1762
1763         err = __devinet_sysctl_register(net, "default", dflt);
1764         if (err < 0)
1765                 goto err_reg_dflt;
1766
1767         err = -ENOMEM;
1768         forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
1769         if (forw_hdr == NULL)
1770                 goto err_reg_ctl;
1771         net->ipv4.forw_hdr = forw_hdr;
1772 #endif
1773
1774         net->ipv4.devconf_all = all;
1775         net->ipv4.devconf_dflt = dflt;
1776         return 0;
1777
1778 #ifdef CONFIG_SYSCTL
1779 err_reg_ctl:
1780         __devinet_sysctl_unregister(dflt);
1781 err_reg_dflt:
1782         __devinet_sysctl_unregister(all);
1783 err_reg_all:
1784         if (tbl != ctl_forward_entry)
1785                 kfree(tbl);
1786 err_alloc_ctl:
1787 #endif
1788         if (dflt != &ipv4_devconf_dflt)
1789                 kfree(dflt);
1790 err_alloc_dflt:
1791         if (all != &ipv4_devconf)
1792                 kfree(all);
1793 err_alloc_all:
1794         return err;
1795 }
1796
1797 static __net_exit void devinet_exit_net(struct net *net)
1798 {
1799 #ifdef CONFIG_SYSCTL
1800         struct ctl_table *tbl;
1801
1802         tbl = net->ipv4.forw_hdr->ctl_table_arg;
1803         unregister_net_sysctl_table(net->ipv4.forw_hdr);
1804         __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1805         __devinet_sysctl_unregister(net->ipv4.devconf_all);
1806         kfree(tbl);
1807 #endif
1808         kfree(net->ipv4.devconf_dflt);
1809         kfree(net->ipv4.devconf_all);
1810 }
1811
1812 static __net_initdata struct pernet_operations devinet_ops = {
1813         .init = devinet_init_net,
1814         .exit = devinet_exit_net,
1815 };
1816
1817 static struct rtnl_af_ops inet_af_ops = {
1818         .family           = AF_INET,
1819         .fill_link_af     = inet_fill_link_af,
1820         .get_link_af_size = inet_get_link_af_size,
1821         .validate_link_af = inet_validate_link_af,
1822         .set_link_af      = inet_set_link_af,
1823 };
1824
1825 void __init devinet_init(void)
1826 {
1827         int i;
1828
1829         for (i = 0; i < IN4_ADDR_HSIZE; i++)
1830                 INIT_HLIST_HEAD(&inet_addr_lst[i]);
1831
1832         register_pernet_subsys(&devinet_ops);
1833
1834         register_gifconf(PF_INET, inet_gifconf);
1835         register_netdevice_notifier(&ip_netdev_notifier);
1836
1837         rtnl_af_register(&inet_af_ops);
1838
1839         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
1840         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
1841         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
1842 }
1843