2 * NET3 IP device support routines.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Derived from the IP parts of dev.c 1.0.19
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Mark Evans, <evansmp@uhura.aston.ac.uk>
15 * Alan Cox, <gw4pts@gw4pts.ampr.org>
16 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
19 * Alexey Kuznetsov: pa_* fields are replaced with ifaddr
21 * Cyrus Durgin: updated for kmod
22 * Matthias Andree: in devinet_ioctl, compare label and
23 * address (4.4BSD alias style support),
24 * fall back to comparing just the label
29 #include <asm/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/string.h>
37 #include <linux/socket.h>
38 #include <linux/sockios.h>
40 #include <linux/errno.h>
41 #include <linux/interrupt.h>
42 #include <linux/if_addr.h>
43 #include <linux/if_ether.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/etherdevice.h>
47 #include <linux/skbuff.h>
48 #include <linux/init.h>
49 #include <linux/notifier.h>
50 #include <linux/inetdevice.h>
51 #include <linux/igmp.h>
52 #include <linux/slab.h>
53 #include <linux/hash.h>
55 #include <linux/sysctl.h>
57 #include <linux/kmod.h>
58 #include <linux/netconf.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 #include <net/net_namespace.h>
66 #include <net/addrconf.h>
68 #include "fib_lookup.h"
70 static struct ipv4_devconf ipv4_devconf = {
72 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
79 static struct ipv4_devconf ipv4_devconf_dflt = {
81 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
82 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
83 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
84 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
85 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
89 #define IPV4_DEVCONF_DFLT(net, attr) \
90 IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
92 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
93 [IFA_LOCAL] = { .type = NLA_U32 },
94 [IFA_ADDRESS] = { .type = NLA_U32 },
95 [IFA_BROADCAST] = { .type = NLA_U32 },
96 [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
97 [IFA_CACHEINFO] = { .len = sizeof(struct ifa_cacheinfo) },
100 #define IN4_ADDR_HSIZE_SHIFT 8
101 #define IN4_ADDR_HSIZE (1U << IN4_ADDR_HSIZE_SHIFT)
103 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
104 static DEFINE_SPINLOCK(inet_addr_hash_lock);
106 static u32 inet_addr_hash(struct net *net, __be32 addr)
108 u32 val = (__force u32) addr ^ net_hash_mix(net);
110 return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
113 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
115 u32 hash = inet_addr_hash(net, ifa->ifa_local);
117 spin_lock(&inet_addr_hash_lock);
118 hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
119 spin_unlock(&inet_addr_hash_lock);
122 static void inet_hash_remove(struct in_ifaddr *ifa)
124 spin_lock(&inet_addr_hash_lock);
125 hlist_del_init_rcu(&ifa->hash);
126 spin_unlock(&inet_addr_hash_lock);
130 * __ip_dev_find - find the first device with a given source address.
131 * @net: the net namespace
132 * @addr: the source address
133 * @devref: if true, take a reference on the found device
135 * If a caller uses devref=false, it should be protected by RCU, or RTNL
137 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
139 u32 hash = inet_addr_hash(net, addr);
140 struct net_device *result = NULL;
141 struct in_ifaddr *ifa;
144 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) {
145 if (ifa->ifa_local == addr) {
146 struct net_device *dev = ifa->ifa_dev->dev;
148 if (!net_eq(dev_net(dev), net))
155 struct flowi4 fl4 = { .daddr = addr };
156 struct fib_result res = { 0 };
157 struct fib_table *local;
159 /* Fallback to FIB local table so that communication
160 * over loopback subnets work.
162 local = fib_get_table(net, RT_TABLE_LOCAL);
164 !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
165 res.type == RTN_LOCAL)
166 result = FIB_RES_DEV(res);
168 if (result && devref)
173 EXPORT_SYMBOL(__ip_dev_find);
175 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
177 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
178 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
181 static void devinet_sysctl_register(struct in_device *idev);
182 static void devinet_sysctl_unregister(struct in_device *idev);
184 static void devinet_sysctl_register(struct in_device *idev)
187 static void devinet_sysctl_unregister(struct in_device *idev)
192 /* Locks all the inet devices. */
194 static struct in_ifaddr *inet_alloc_ifa(void)
196 return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
199 static void inet_rcu_free_ifa(struct rcu_head *head)
201 struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
203 in_dev_put(ifa->ifa_dev);
207 static void inet_free_ifa(struct in_ifaddr *ifa)
209 call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
212 void in_dev_finish_destroy(struct in_device *idev)
214 struct net_device *dev = idev->dev;
216 WARN_ON(idev->ifa_list);
217 WARN_ON(idev->mc_list);
218 #ifdef NET_REFCNT_DEBUG
219 pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
223 pr_err("Freeing alive in_device %p\n", idev);
227 EXPORT_SYMBOL(in_dev_finish_destroy);
229 static struct in_device *inetdev_init(struct net_device *dev)
231 struct in_device *in_dev;
235 in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
238 memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
239 sizeof(in_dev->cnf));
240 in_dev->cnf.sysctl = NULL;
242 in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
243 if (!in_dev->arp_parms)
245 if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
246 dev_disable_lro(dev);
247 /* Reference in_dev->dev */
249 /* Account for reference dev->ip_ptr (below) */
252 devinet_sysctl_register(in_dev);
253 ip_mc_init_dev(in_dev);
254 if (dev->flags & IFF_UP)
257 /* we can receive as soon as ip_ptr is set -- do this last */
258 rcu_assign_pointer(dev->ip_ptr, in_dev);
267 static void in_dev_rcu_put(struct rcu_head *head)
269 struct in_device *idev = container_of(head, struct in_device, rcu_head);
273 static void inetdev_destroy(struct in_device *in_dev)
275 struct in_ifaddr *ifa;
276 struct net_device *dev;
284 ip_mc_destroy_dev(in_dev);
286 while ((ifa = in_dev->ifa_list) != NULL) {
287 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
291 RCU_INIT_POINTER(dev->ip_ptr, NULL);
293 devinet_sysctl_unregister(in_dev);
294 neigh_parms_release(&arp_tbl, in_dev->arp_parms);
297 call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
300 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
303 for_primary_ifa(in_dev) {
304 if (inet_ifa_match(a, ifa)) {
305 if (!b || inet_ifa_match(b, ifa)) {
310 } endfor_ifa(in_dev);
315 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
316 int destroy, struct nlmsghdr *nlh, u32 portid)
318 struct in_ifaddr *promote = NULL;
319 struct in_ifaddr *ifa, *ifa1 = *ifap;
320 struct in_ifaddr *last_prim = in_dev->ifa_list;
321 struct in_ifaddr *prev_prom = NULL;
322 int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
326 /* 1. Deleting primary ifaddr forces deletion all secondaries
327 * unless alias promotion is set
330 if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
331 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
333 while ((ifa = *ifap1) != NULL) {
334 if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
335 ifa1->ifa_scope <= ifa->ifa_scope)
338 if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
339 ifa1->ifa_mask != ifa->ifa_mask ||
340 !inet_ifa_match(ifa1->ifa_address, ifa)) {
341 ifap1 = &ifa->ifa_next;
347 inet_hash_remove(ifa);
348 *ifap1 = ifa->ifa_next;
350 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
351 blocking_notifier_call_chain(&inetaddr_chain,
361 /* On promotion all secondaries from subnet are changing
362 * the primary IP, we must remove all their routes silently
363 * and later to add them back with new prefsrc. Do this
364 * while all addresses are on the device list.
366 for (ifa = promote; ifa; ifa = ifa->ifa_next) {
367 if (ifa1->ifa_mask == ifa->ifa_mask &&
368 inet_ifa_match(ifa1->ifa_address, ifa))
369 fib_del_ifaddr(ifa, ifa1);
374 *ifap = ifa1->ifa_next;
375 inet_hash_remove(ifa1);
377 /* 3. Announce address deletion */
379 /* Send message first, then call notifier.
380 At first sight, FIB update triggered by notifier
381 will refer to already deleted ifaddr, that could confuse
382 netlink listeners. It is not true: look, gated sees
383 that route deleted and if it still thinks that ifaddr
384 is valid, it will try to restore deleted routes... Grr.
385 So that, this order is correct.
387 rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
388 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
391 struct in_ifaddr *next_sec = promote->ifa_next;
394 prev_prom->ifa_next = promote->ifa_next;
395 promote->ifa_next = last_prim->ifa_next;
396 last_prim->ifa_next = promote;
399 promote->ifa_flags &= ~IFA_F_SECONDARY;
400 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
401 blocking_notifier_call_chain(&inetaddr_chain,
403 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
404 if (ifa1->ifa_mask != ifa->ifa_mask ||
405 !inet_ifa_match(ifa1->ifa_address, ifa))
415 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
418 __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
421 static void check_lifetime(struct work_struct *work);
423 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
425 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
428 struct in_device *in_dev = ifa->ifa_dev;
429 struct in_ifaddr *ifa1, **ifap, **last_primary;
433 if (!ifa->ifa_local) {
438 ifa->ifa_flags &= ~IFA_F_SECONDARY;
439 last_primary = &in_dev->ifa_list;
441 for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
442 ifap = &ifa1->ifa_next) {
443 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
444 ifa->ifa_scope <= ifa1->ifa_scope)
445 last_primary = &ifa1->ifa_next;
446 if (ifa1->ifa_mask == ifa->ifa_mask &&
447 inet_ifa_match(ifa1->ifa_address, ifa)) {
448 if (ifa1->ifa_local == ifa->ifa_local) {
452 if (ifa1->ifa_scope != ifa->ifa_scope) {
456 ifa->ifa_flags |= IFA_F_SECONDARY;
460 if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
461 net_srandom(ifa->ifa_local);
465 ifa->ifa_next = *ifap;
468 inet_hash_insert(dev_net(in_dev->dev), ifa);
470 cancel_delayed_work(&check_lifetime_work);
471 schedule_delayed_work(&check_lifetime_work, 0);
473 /* Send message first, then call notifier.
474 Notifier will trigger FIB update, so that
475 listeners of netlink will know about new ifaddr */
476 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
477 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
482 static int inet_insert_ifa(struct in_ifaddr *ifa)
484 return __inet_insert_ifa(ifa, NULL, 0);
487 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
489 struct in_device *in_dev = __in_dev_get_rtnl(dev);
497 ipv4_devconf_setall(in_dev);
498 if (ifa->ifa_dev != in_dev) {
499 WARN_ON(ifa->ifa_dev);
501 ifa->ifa_dev = in_dev;
503 if (ipv4_is_loopback(ifa->ifa_local))
504 ifa->ifa_scope = RT_SCOPE_HOST;
505 return inet_insert_ifa(ifa);
508 /* Caller must hold RCU or RTNL :
509 * We dont take a reference on found in_device
511 struct in_device *inetdev_by_index(struct net *net, int ifindex)
513 struct net_device *dev;
514 struct in_device *in_dev = NULL;
517 dev = dev_get_by_index_rcu(net, ifindex);
519 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
523 EXPORT_SYMBOL(inetdev_by_index);
525 /* Called only from RTNL semaphored context. No locks. */
527 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
532 for_primary_ifa(in_dev) {
533 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
535 } endfor_ifa(in_dev);
539 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
541 struct net *net = sock_net(skb->sk);
542 struct nlattr *tb[IFA_MAX+1];
543 struct in_device *in_dev;
544 struct ifaddrmsg *ifm;
545 struct in_ifaddr *ifa, **ifap;
550 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
554 ifm = nlmsg_data(nlh);
555 in_dev = inetdev_by_index(net, ifm->ifa_index);
556 if (in_dev == NULL) {
561 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
562 ifap = &ifa->ifa_next) {
564 ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
567 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
570 if (tb[IFA_ADDRESS] &&
571 (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
572 !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
575 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
579 err = -EADDRNOTAVAIL;
584 #define INFINITY_LIFE_TIME 0xFFFFFFFF
586 static void check_lifetime(struct work_struct *work)
588 unsigned long now, next, next_sec, next_sched;
589 struct in_ifaddr *ifa;
593 next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
596 for (i = 0; i < IN4_ADDR_HSIZE; i++) {
597 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
600 if (ifa->ifa_flags & IFA_F_PERMANENT)
603 /* We try to batch several events at once. */
604 age = (now - ifa->ifa_tstamp +
605 ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
607 if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
608 age >= ifa->ifa_valid_lft) {
609 struct in_ifaddr **ifap ;
612 for (ifap = &ifa->ifa_dev->ifa_list;
613 *ifap != NULL; ifap = &ifa->ifa_next) {
615 inet_del_ifa(ifa->ifa_dev,
619 } else if (ifa->ifa_preferred_lft ==
620 INFINITY_LIFE_TIME) {
622 } else if (age >= ifa->ifa_preferred_lft) {
623 if (time_before(ifa->ifa_tstamp +
624 ifa->ifa_valid_lft * HZ, next))
625 next = ifa->ifa_tstamp +
626 ifa->ifa_valid_lft * HZ;
628 if (!(ifa->ifa_flags & IFA_F_DEPRECATED)) {
629 ifa->ifa_flags |= IFA_F_DEPRECATED;
630 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
632 } else if (time_before(ifa->ifa_tstamp +
633 ifa->ifa_preferred_lft * HZ,
635 next = ifa->ifa_tstamp +
636 ifa->ifa_preferred_lft * HZ;
642 next_sec = round_jiffies_up(next);
645 /* If rounded timeout is accurate enough, accept it. */
646 if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
647 next_sched = next_sec;
650 /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
651 if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
652 next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
654 schedule_delayed_work(&check_lifetime_work, next_sched - now);
657 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
660 unsigned long timeout;
662 ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
664 timeout = addrconf_timeout_fixup(valid_lft, HZ);
665 if (addrconf_finite_timeout(timeout))
666 ifa->ifa_valid_lft = timeout;
668 ifa->ifa_flags |= IFA_F_PERMANENT;
670 timeout = addrconf_timeout_fixup(prefered_lft, HZ);
671 if (addrconf_finite_timeout(timeout)) {
673 ifa->ifa_flags |= IFA_F_DEPRECATED;
674 ifa->ifa_preferred_lft = timeout;
676 ifa->ifa_tstamp = jiffies;
677 if (!ifa->ifa_cstamp)
678 ifa->ifa_cstamp = ifa->ifa_tstamp;
681 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
682 __u32 *pvalid_lft, __u32 *pprefered_lft)
684 struct nlattr *tb[IFA_MAX+1];
685 struct in_ifaddr *ifa;
686 struct ifaddrmsg *ifm;
687 struct net_device *dev;
688 struct in_device *in_dev;
691 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
695 ifm = nlmsg_data(nlh);
697 if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
700 dev = __dev_get_by_index(net, ifm->ifa_index);
705 in_dev = __in_dev_get_rtnl(dev);
710 ifa = inet_alloc_ifa();
713 * A potential indev allocation can be left alive, it stays
714 * assigned to its device and is destroy with it.
718 ipv4_devconf_setall(in_dev);
721 if (tb[IFA_ADDRESS] == NULL)
722 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
724 INIT_HLIST_NODE(&ifa->hash);
725 ifa->ifa_prefixlen = ifm->ifa_prefixlen;
726 ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
727 ifa->ifa_flags = ifm->ifa_flags;
728 ifa->ifa_scope = ifm->ifa_scope;
729 ifa->ifa_dev = in_dev;
731 ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
732 ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
734 if (tb[IFA_BROADCAST])
735 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
738 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
740 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
742 if (tb[IFA_CACHEINFO]) {
743 struct ifa_cacheinfo *ci;
745 ci = nla_data(tb[IFA_CACHEINFO]);
746 if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
750 *pvalid_lft = ci->ifa_valid;
751 *pprefered_lft = ci->ifa_prefered;
760 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
762 struct in_device *in_dev = ifa->ifa_dev;
763 struct in_ifaddr *ifa1, **ifap;
768 for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
769 ifap = &ifa1->ifa_next) {
770 if (ifa1->ifa_mask == ifa->ifa_mask &&
771 inet_ifa_match(ifa1->ifa_address, ifa) &&
772 ifa1->ifa_local == ifa->ifa_local)
778 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
780 struct net *net = sock_net(skb->sk);
781 struct in_ifaddr *ifa;
782 struct in_ifaddr *ifa_existing;
783 __u32 valid_lft = INFINITY_LIFE_TIME;
784 __u32 prefered_lft = INFINITY_LIFE_TIME;
788 ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
792 ifa_existing = find_matching_ifa(ifa);
794 /* It would be best to check for !NLM_F_CREATE here but
795 * userspace alreay relies on not having to provide this.
797 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
798 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
802 if (nlh->nlmsg_flags & NLM_F_EXCL ||
803 !(nlh->nlmsg_flags & NLM_F_REPLACE))
806 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
807 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
808 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
814 * Determine a default network mask, based on the IP address.
817 static int inet_abc_len(__be32 addr)
819 int rc = -1; /* Something else, probably a multicast. */
821 if (ipv4_is_zeronet(addr))
824 __u32 haddr = ntohl(addr);
826 if (IN_CLASSA(haddr))
828 else if (IN_CLASSB(haddr))
830 else if (IN_CLASSC(haddr))
838 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
841 struct sockaddr_in sin_orig;
842 struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
843 struct in_device *in_dev;
844 struct in_ifaddr **ifap = NULL;
845 struct in_ifaddr *ifa = NULL;
846 struct net_device *dev;
849 int tryaddrmatch = 0;
852 * Fetch the caller's info block into kernel space
855 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
857 ifr.ifr_name[IFNAMSIZ - 1] = 0;
859 /* save original address for comparison */
860 memcpy(&sin_orig, sin, sizeof(*sin));
862 colon = strchr(ifr.ifr_name, ':');
866 dev_load(net, ifr.ifr_name);
869 case SIOCGIFADDR: /* Get interface address */
870 case SIOCGIFBRDADDR: /* Get the broadcast address */
871 case SIOCGIFDSTADDR: /* Get the destination address */
872 case SIOCGIFNETMASK: /* Get the netmask for the interface */
873 /* Note that these ioctls will not sleep,
874 so that we do not impose a lock.
875 One day we will be forced to put shlock here (I mean SMP)
877 tryaddrmatch = (sin_orig.sin_family == AF_INET);
878 memset(sin, 0, sizeof(*sin));
879 sin->sin_family = AF_INET;
884 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
887 case SIOCSIFADDR: /* Set interface address (and family) */
888 case SIOCSIFBRDADDR: /* Set the broadcast address */
889 case SIOCSIFDSTADDR: /* Set the destination address */
890 case SIOCSIFNETMASK: /* Set the netmask for the interface */
892 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
895 if (sin->sin_family != AF_INET)
906 dev = __dev_get_by_name(net, ifr.ifr_name);
913 in_dev = __in_dev_get_rtnl(dev);
916 /* Matthias Andree */
917 /* compare label and address (4.4BSD style) */
918 /* note: we only do this for a limited set of ioctls
919 and only if the original address family was AF_INET.
920 This is checked above. */
921 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
922 ifap = &ifa->ifa_next) {
923 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
924 sin_orig.sin_addr.s_addr ==
930 /* we didn't get a match, maybe the application is
931 4.3BSD-style and passed in junk so we fall back to
932 comparing just the label */
934 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
935 ifap = &ifa->ifa_next)
936 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
941 ret = -EADDRNOTAVAIL;
942 if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
946 case SIOCGIFADDR: /* Get interface address */
947 sin->sin_addr.s_addr = ifa->ifa_local;
950 case SIOCGIFBRDADDR: /* Get the broadcast address */
951 sin->sin_addr.s_addr = ifa->ifa_broadcast;
954 case SIOCGIFDSTADDR: /* Get the destination address */
955 sin->sin_addr.s_addr = ifa->ifa_address;
958 case SIOCGIFNETMASK: /* Get the netmask for the interface */
959 sin->sin_addr.s_addr = ifa->ifa_mask;
964 ret = -EADDRNOTAVAIL;
968 if (!(ifr.ifr_flags & IFF_UP))
969 inet_del_ifa(in_dev, ifap, 1);
972 ret = dev_change_flags(dev, ifr.ifr_flags);
975 case SIOCSIFADDR: /* Set interface address (and family) */
977 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
982 ifa = inet_alloc_ifa();
985 INIT_HLIST_NODE(&ifa->hash);
987 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
989 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
992 if (ifa->ifa_local == sin->sin_addr.s_addr)
994 inet_del_ifa(in_dev, ifap, 0);
995 ifa->ifa_broadcast = 0;
999 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1001 if (!(dev->flags & IFF_POINTOPOINT)) {
1002 ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1003 ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1004 if ((dev->flags & IFF_BROADCAST) &&
1005 ifa->ifa_prefixlen < 31)
1006 ifa->ifa_broadcast = ifa->ifa_address |
1009 ifa->ifa_prefixlen = 32;
1010 ifa->ifa_mask = inet_make_mask(32);
1012 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1013 ret = inet_set_ifa(dev, ifa);
1016 case SIOCSIFBRDADDR: /* Set the broadcast address */
1018 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1019 inet_del_ifa(in_dev, ifap, 0);
1020 ifa->ifa_broadcast = sin->sin_addr.s_addr;
1021 inet_insert_ifa(ifa);
1025 case SIOCSIFDSTADDR: /* Set the destination address */
1027 if (ifa->ifa_address == sin->sin_addr.s_addr)
1030 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1033 inet_del_ifa(in_dev, ifap, 0);
1034 ifa->ifa_address = sin->sin_addr.s_addr;
1035 inet_insert_ifa(ifa);
1038 case SIOCSIFNETMASK: /* Set the netmask for the interface */
1041 * The mask we set must be legal.
1044 if (bad_mask(sin->sin_addr.s_addr, 0))
1047 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1048 __be32 old_mask = ifa->ifa_mask;
1049 inet_del_ifa(in_dev, ifap, 0);
1050 ifa->ifa_mask = sin->sin_addr.s_addr;
1051 ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1053 /* See if current broadcast address matches
1054 * with current netmask, then recalculate
1055 * the broadcast address. Otherwise it's a
1056 * funny address, so don't touch it since
1057 * the user seems to know what (s)he's doing...
1059 if ((dev->flags & IFF_BROADCAST) &&
1060 (ifa->ifa_prefixlen < 31) &&
1061 (ifa->ifa_broadcast ==
1062 (ifa->ifa_local|~old_mask))) {
1063 ifa->ifa_broadcast = (ifa->ifa_local |
1064 ~sin->sin_addr.s_addr);
1066 inet_insert_ifa(ifa);
1076 ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
1080 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1082 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1083 struct in_ifaddr *ifa;
1090 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1092 done += sizeof(ifr);
1095 if (len < (int) sizeof(ifr))
1097 memset(&ifr, 0, sizeof(struct ifreq));
1099 strcpy(ifr.ifr_name, ifa->ifa_label);
1101 strcpy(ifr.ifr_name, dev->name);
1103 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1104 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1107 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
1111 buf += sizeof(struct ifreq);
1112 len -= sizeof(struct ifreq);
1113 done += sizeof(struct ifreq);
1119 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1122 struct in_device *in_dev;
1123 struct net *net = dev_net(dev);
1126 in_dev = __in_dev_get_rcu(dev);
1130 for_primary_ifa(in_dev) {
1131 if (ifa->ifa_scope > scope)
1133 if (!dst || inet_ifa_match(dst, ifa)) {
1134 addr = ifa->ifa_local;
1138 addr = ifa->ifa_local;
1139 } endfor_ifa(in_dev);
1145 /* Not loopback addresses on loopback should be preferred
1146 in this case. It is importnat that lo is the first interface
1149 for_each_netdev_rcu(net, dev) {
1150 in_dev = __in_dev_get_rcu(dev);
1154 for_primary_ifa(in_dev) {
1155 if (ifa->ifa_scope != RT_SCOPE_LINK &&
1156 ifa->ifa_scope <= scope) {
1157 addr = ifa->ifa_local;
1160 } endfor_ifa(in_dev);
1166 EXPORT_SYMBOL(inet_select_addr);
1168 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1169 __be32 local, int scope)
1176 (local == ifa->ifa_local || !local) &&
1177 ifa->ifa_scope <= scope) {
1178 addr = ifa->ifa_local;
1183 same = (!local || inet_ifa_match(local, ifa)) &&
1184 (!dst || inet_ifa_match(dst, ifa));
1188 /* Is the selected addr into dst subnet? */
1189 if (inet_ifa_match(addr, ifa))
1191 /* No, then can we use new local src? */
1192 if (ifa->ifa_scope <= scope) {
1193 addr = ifa->ifa_local;
1196 /* search for large dst subnet for addr */
1200 } endfor_ifa(in_dev);
1202 return same ? addr : 0;
1206 * Confirm that local IP address exists using wildcards:
1207 * - in_dev: only on this interface, 0=any interface
1208 * - dst: only in the same subnet as dst, 0=any dst
1209 * - local: address, 0=autoselect the local address
1210 * - scope: maximum allowed scope value for the local address
1212 __be32 inet_confirm_addr(struct in_device *in_dev,
1213 __be32 dst, __be32 local, int scope)
1216 struct net_device *dev;
1219 if (scope != RT_SCOPE_LINK)
1220 return confirm_addr_indev(in_dev, dst, local, scope);
1222 net = dev_net(in_dev->dev);
1224 for_each_netdev_rcu(net, dev) {
1225 in_dev = __in_dev_get_rcu(dev);
1227 addr = confirm_addr_indev(in_dev, dst, local, scope);
1236 EXPORT_SYMBOL(inet_confirm_addr);
1242 int register_inetaddr_notifier(struct notifier_block *nb)
1244 return blocking_notifier_chain_register(&inetaddr_chain, nb);
1246 EXPORT_SYMBOL(register_inetaddr_notifier);
1248 int unregister_inetaddr_notifier(struct notifier_block *nb)
1250 return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1252 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1254 /* Rename ifa_labels for a device name change. Make some effort to preserve
1255 * existing alias numbering and to create unique labels if possible.
1257 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1259 struct in_ifaddr *ifa;
1262 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1263 char old[IFNAMSIZ], *dot;
1265 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1266 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1269 dot = strchr(old, ':');
1271 sprintf(old, ":%d", named);
1274 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1275 strcat(ifa->ifa_label, dot);
1277 strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1279 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1283 static bool inetdev_valid_mtu(unsigned int mtu)
1288 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1289 struct in_device *in_dev)
1292 struct in_ifaddr *ifa;
1294 for (ifa = in_dev->ifa_list; ifa;
1295 ifa = ifa->ifa_next) {
1296 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1297 ifa->ifa_local, dev,
1298 ifa->ifa_local, NULL,
1299 dev->dev_addr, NULL);
1303 /* Called only under RTNL semaphore */
1305 static int inetdev_event(struct notifier_block *this, unsigned long event,
1308 struct net_device *dev = ptr;
1309 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1314 if (event == NETDEV_REGISTER) {
1315 in_dev = inetdev_init(dev);
1317 return notifier_from_errno(-ENOMEM);
1318 if (dev->flags & IFF_LOOPBACK) {
1319 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1320 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1322 } else if (event == NETDEV_CHANGEMTU) {
1323 /* Re-enabling IP */
1324 if (inetdev_valid_mtu(dev->mtu))
1325 in_dev = inetdev_init(dev);
1331 case NETDEV_REGISTER:
1332 pr_debug("%s: bug\n", __func__);
1333 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1336 if (!inetdev_valid_mtu(dev->mtu))
1338 if (dev->flags & IFF_LOOPBACK) {
1339 struct in_ifaddr *ifa = inet_alloc_ifa();
1342 INIT_HLIST_NODE(&ifa->hash);
1344 ifa->ifa_address = htonl(INADDR_LOOPBACK);
1345 ifa->ifa_prefixlen = 8;
1346 ifa->ifa_mask = inet_make_mask(8);
1347 in_dev_hold(in_dev);
1348 ifa->ifa_dev = in_dev;
1349 ifa->ifa_scope = RT_SCOPE_HOST;
1350 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1351 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1352 INFINITY_LIFE_TIME);
1353 inet_insert_ifa(ifa);
1358 case NETDEV_CHANGEADDR:
1359 if (!IN_DEV_ARP_NOTIFY(in_dev))
1362 case NETDEV_NOTIFY_PEERS:
1363 /* Send gratuitous ARP to notify of link change */
1364 inetdev_send_gratuitous_arp(dev, in_dev);
1369 case NETDEV_PRE_TYPE_CHANGE:
1370 ip_mc_unmap(in_dev);
1372 case NETDEV_POST_TYPE_CHANGE:
1373 ip_mc_remap(in_dev);
1375 case NETDEV_CHANGEMTU:
1376 if (inetdev_valid_mtu(dev->mtu))
1378 /* disable IP when MTU is not enough */
1379 case NETDEV_UNREGISTER:
1380 inetdev_destroy(in_dev);
1382 case NETDEV_CHANGENAME:
1383 /* Do not notify about label change, this event is
1384 * not interesting to applications using netlink.
1386 inetdev_changename(dev, in_dev);
1388 devinet_sysctl_unregister(in_dev);
1389 devinet_sysctl_register(in_dev);
1396 static struct notifier_block ip_netdev_notifier = {
1397 .notifier_call = inetdev_event,
1400 static size_t inet_nlmsg_size(void)
1402 return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1403 + nla_total_size(4) /* IFA_ADDRESS */
1404 + nla_total_size(4) /* IFA_LOCAL */
1405 + nla_total_size(4) /* IFA_BROADCAST */
1406 + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1409 static inline u32 cstamp_delta(unsigned long cstamp)
1411 return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1414 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1415 unsigned long tstamp, u32 preferred, u32 valid)
1417 struct ifa_cacheinfo ci;
1419 ci.cstamp = cstamp_delta(cstamp);
1420 ci.tstamp = cstamp_delta(tstamp);
1421 ci.ifa_prefered = preferred;
1422 ci.ifa_valid = valid;
1424 return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1427 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1428 u32 portid, u32 seq, int event, unsigned int flags)
1430 struct ifaddrmsg *ifm;
1431 struct nlmsghdr *nlh;
1432 u32 preferred, valid;
1434 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1438 ifm = nlmsg_data(nlh);
1439 ifm->ifa_family = AF_INET;
1440 ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1441 ifm->ifa_flags = ifa->ifa_flags;
1442 ifm->ifa_scope = ifa->ifa_scope;
1443 ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1445 if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1446 preferred = ifa->ifa_preferred_lft;
1447 valid = ifa->ifa_valid_lft;
1448 if (preferred != INFINITY_LIFE_TIME) {
1449 long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1451 if (preferred > tval)
1455 if (valid != INFINITY_LIFE_TIME) {
1463 preferred = INFINITY_LIFE_TIME;
1464 valid = INFINITY_LIFE_TIME;
1466 if ((ifa->ifa_address &&
1467 nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1469 nla_put_be32(skb, IFA_LOCAL, ifa->ifa_local)) ||
1470 (ifa->ifa_broadcast &&
1471 nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1472 (ifa->ifa_label[0] &&
1473 nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1474 put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1476 goto nla_put_failure;
1478 return nlmsg_end(skb, nlh);
1481 nlmsg_cancel(skb, nlh);
1485 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1487 struct net *net = sock_net(skb->sk);
1490 int ip_idx, s_ip_idx;
1491 struct net_device *dev;
1492 struct in_device *in_dev;
1493 struct in_ifaddr *ifa;
1494 struct hlist_head *head;
1497 s_idx = idx = cb->args[1];
1498 s_ip_idx = ip_idx = cb->args[2];
1500 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1502 head = &net->dev_index_head[h];
1504 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1507 if (h > s_h || idx > s_idx)
1509 in_dev = __in_dev_get_rcu(dev);
1513 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1514 ifa = ifa->ifa_next, ip_idx++) {
1515 if (ip_idx < s_ip_idx)
1517 if (inet_fill_ifaddr(skb, ifa,
1518 NETLINK_CB(cb->skb).portid,
1520 RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1534 cb->args[2] = ip_idx;
1539 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1542 struct sk_buff *skb;
1543 u32 seq = nlh ? nlh->nlmsg_seq : 0;
1547 net = dev_net(ifa->ifa_dev->dev);
1548 skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1552 err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1554 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1555 WARN_ON(err == -EMSGSIZE);
1559 rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1563 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1566 static size_t inet_get_link_af_size(const struct net_device *dev)
1568 struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1573 return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1576 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1578 struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1585 nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1589 for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1590 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1595 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1596 [IFLA_INET_CONF] = { .type = NLA_NESTED },
1599 static int inet_validate_link_af(const struct net_device *dev,
1600 const struct nlattr *nla)
1602 struct nlattr *a, *tb[IFLA_INET_MAX+1];
1605 if (dev && !__in_dev_get_rtnl(dev))
1606 return -EAFNOSUPPORT;
1608 err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1612 if (tb[IFLA_INET_CONF]) {
1613 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1614 int cfgid = nla_type(a);
1619 if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1627 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1629 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1630 struct nlattr *a, *tb[IFLA_INET_MAX+1];
1634 return -EAFNOSUPPORT;
1636 if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1639 if (tb[IFLA_INET_CONF]) {
1640 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1641 ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1647 static int inet_netconf_msgsize_devconf(int type)
1649 int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1650 + nla_total_size(4); /* NETCONFA_IFINDEX */
1652 /* type -1 is used for ALL */
1653 if (type == -1 || type == NETCONFA_FORWARDING)
1654 size += nla_total_size(4);
1655 if (type == -1 || type == NETCONFA_RP_FILTER)
1656 size += nla_total_size(4);
1657 if (type == -1 || type == NETCONFA_MC_FORWARDING)
1658 size += nla_total_size(4);
1663 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1664 struct ipv4_devconf *devconf, u32 portid,
1665 u32 seq, int event, unsigned int flags,
1668 struct nlmsghdr *nlh;
1669 struct netconfmsg *ncm;
1671 nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1676 ncm = nlmsg_data(nlh);
1677 ncm->ncm_family = AF_INET;
1679 if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1680 goto nla_put_failure;
1682 /* type -1 is used for ALL */
1683 if ((type == -1 || type == NETCONFA_FORWARDING) &&
1684 nla_put_s32(skb, NETCONFA_FORWARDING,
1685 IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1686 goto nla_put_failure;
1687 if ((type == -1 || type == NETCONFA_RP_FILTER) &&
1688 nla_put_s32(skb, NETCONFA_RP_FILTER,
1689 IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1690 goto nla_put_failure;
1691 if ((type == -1 || type == NETCONFA_MC_FORWARDING) &&
1692 nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1693 IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1694 goto nla_put_failure;
1696 return nlmsg_end(skb, nlh);
1699 nlmsg_cancel(skb, nlh);
1703 void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
1704 struct ipv4_devconf *devconf)
1706 struct sk_buff *skb;
1709 skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC);
1713 err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1714 RTM_NEWNETCONF, 0, type);
1716 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1717 WARN_ON(err == -EMSGSIZE);
1721 rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC);
1725 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1728 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1729 [NETCONFA_IFINDEX] = { .len = sizeof(int) },
1730 [NETCONFA_FORWARDING] = { .len = sizeof(int) },
1731 [NETCONFA_RP_FILTER] = { .len = sizeof(int) },
1734 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1735 struct nlmsghdr *nlh,
1738 struct net *net = sock_net(in_skb->sk);
1739 struct nlattr *tb[NETCONFA_MAX+1];
1740 struct netconfmsg *ncm;
1741 struct sk_buff *skb;
1742 struct ipv4_devconf *devconf;
1743 struct in_device *in_dev;
1744 struct net_device *dev;
1748 err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1749 devconf_ipv4_policy);
1754 if (!tb[NETCONFA_IFINDEX])
1757 ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1759 case NETCONFA_IFINDEX_ALL:
1760 devconf = net->ipv4.devconf_all;
1762 case NETCONFA_IFINDEX_DEFAULT:
1763 devconf = net->ipv4.devconf_dflt;
1766 dev = __dev_get_by_index(net, ifindex);
1769 in_dev = __in_dev_get_rtnl(dev);
1772 devconf = &in_dev->cnf;
1777 skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_ATOMIC);
1781 err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1782 NETLINK_CB(in_skb).portid,
1783 nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1786 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1787 WARN_ON(err == -EMSGSIZE);
1791 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1796 #ifdef CONFIG_SYSCTL
1798 static void devinet_copy_dflt_conf(struct net *net, int i)
1800 struct net_device *dev;
1803 for_each_netdev_rcu(net, dev) {
1804 struct in_device *in_dev;
1806 in_dev = __in_dev_get_rcu(dev);
1807 if (in_dev && !test_bit(i, in_dev->cnf.state))
1808 in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1813 /* called with RTNL locked */
1814 static void inet_forward_change(struct net *net)
1816 struct net_device *dev;
1817 int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1819 IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1820 IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1821 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1822 NETCONFA_IFINDEX_ALL,
1823 net->ipv4.devconf_all);
1824 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1825 NETCONFA_IFINDEX_DEFAULT,
1826 net->ipv4.devconf_dflt);
1828 for_each_netdev(net, dev) {
1829 struct in_device *in_dev;
1831 dev_disable_lro(dev);
1833 in_dev = __in_dev_get_rcu(dev);
1835 IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1836 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1837 dev->ifindex, &in_dev->cnf);
1843 static int devinet_conf_proc(ctl_table *ctl, int write,
1844 void __user *buffer,
1845 size_t *lenp, loff_t *ppos)
1847 int old_value = *(int *)ctl->data;
1848 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1849 int new_value = *(int *)ctl->data;
1852 struct ipv4_devconf *cnf = ctl->extra1;
1853 struct net *net = ctl->extra2;
1854 int i = (int *)ctl->data - cnf->data;
1856 set_bit(i, cnf->state);
1858 if (cnf == net->ipv4.devconf_dflt)
1859 devinet_copy_dflt_conf(net, i);
1860 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
1861 i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
1862 if ((new_value == 0) && (old_value != 0))
1863 rt_cache_flush(net);
1864 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
1865 new_value != old_value) {
1868 if (cnf == net->ipv4.devconf_dflt)
1869 ifindex = NETCONFA_IFINDEX_DEFAULT;
1870 else if (cnf == net->ipv4.devconf_all)
1871 ifindex = NETCONFA_IFINDEX_ALL;
1873 struct in_device *idev =
1874 container_of(cnf, struct in_device,
1876 ifindex = idev->dev->ifindex;
1878 inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
1886 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1887 void __user *buffer,
1888 size_t *lenp, loff_t *ppos)
1890 int *valp = ctl->data;
1893 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1895 if (write && *valp != val) {
1896 struct net *net = ctl->extra2;
1898 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1899 if (!rtnl_trylock()) {
1900 /* Restore the original values before restarting */
1903 return restart_syscall();
1905 if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1906 inet_forward_change(net);
1908 struct ipv4_devconf *cnf = ctl->extra1;
1909 struct in_device *idev =
1910 container_of(cnf, struct in_device, cnf);
1912 dev_disable_lro(idev->dev);
1913 inet_netconf_notify_devconf(net,
1914 NETCONFA_FORWARDING,
1919 rt_cache_flush(net);
1921 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1922 NETCONFA_IFINDEX_DEFAULT,
1923 net->ipv4.devconf_dflt);
1929 static int ipv4_doint_and_flush(ctl_table *ctl, int write,
1930 void __user *buffer,
1931 size_t *lenp, loff_t *ppos)
1933 int *valp = ctl->data;
1935 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1936 struct net *net = ctl->extra2;
1938 if (write && *valp != val)
1939 rt_cache_flush(net);
1944 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
1947 .data = ipv4_devconf.data + \
1948 IPV4_DEVCONF_ ## attr - 1, \
1949 .maxlen = sizeof(int), \
1951 .proc_handler = proc, \
1952 .extra1 = &ipv4_devconf, \
1955 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1956 DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
1958 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1959 DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
1961 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
1962 DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
1964 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1965 DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
1967 static struct devinet_sysctl_table {
1968 struct ctl_table_header *sysctl_header;
1969 struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
1970 } devinet_sysctl = {
1972 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1973 devinet_sysctl_forward),
1974 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1976 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1977 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1978 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1979 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1980 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1981 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1982 "accept_source_route"),
1983 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
1984 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
1985 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1986 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1987 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1988 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1989 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1990 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1991 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1992 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1993 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1994 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
1995 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
1997 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1998 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1999 DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
2000 "force_igmp_version"),
2001 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2002 "promote_secondaries"),
2003 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2008 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2009 struct ipv4_devconf *p)
2012 struct devinet_sysctl_table *t;
2013 char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2015 t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2019 for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2020 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2021 t->devinet_vars[i].extra1 = p;
2022 t->devinet_vars[i].extra2 = net;
2025 snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2027 t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2028 if (!t->sysctl_header)
2040 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
2042 struct devinet_sysctl_table *t = cnf->sysctl;
2048 unregister_net_sysctl_table(t->sysctl_header);
2052 static void devinet_sysctl_register(struct in_device *idev)
2054 neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
2055 __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2059 static void devinet_sysctl_unregister(struct in_device *idev)
2061 __devinet_sysctl_unregister(&idev->cnf);
2062 neigh_sysctl_unregister(idev->arp_parms);
2065 static struct ctl_table ctl_forward_entry[] = {
2067 .procname = "ip_forward",
2068 .data = &ipv4_devconf.data[
2069 IPV4_DEVCONF_FORWARDING - 1],
2070 .maxlen = sizeof(int),
2072 .proc_handler = devinet_sysctl_forward,
2073 .extra1 = &ipv4_devconf,
2074 .extra2 = &init_net,
2080 static __net_init int devinet_init_net(struct net *net)
2083 struct ipv4_devconf *all, *dflt;
2084 #ifdef CONFIG_SYSCTL
2085 struct ctl_table *tbl = ctl_forward_entry;
2086 struct ctl_table_header *forw_hdr;
2090 all = &ipv4_devconf;
2091 dflt = &ipv4_devconf_dflt;
2093 if (!net_eq(net, &init_net)) {
2094 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2098 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2100 goto err_alloc_dflt;
2102 #ifdef CONFIG_SYSCTL
2103 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2107 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2108 tbl[0].extra1 = all;
2109 tbl[0].extra2 = net;
2113 #ifdef CONFIG_SYSCTL
2114 err = __devinet_sysctl_register(net, "all", all);
2118 err = __devinet_sysctl_register(net, "default", dflt);
2123 forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2124 if (forw_hdr == NULL)
2126 net->ipv4.forw_hdr = forw_hdr;
2129 net->ipv4.devconf_all = all;
2130 net->ipv4.devconf_dflt = dflt;
2133 #ifdef CONFIG_SYSCTL
2135 __devinet_sysctl_unregister(dflt);
2137 __devinet_sysctl_unregister(all);
2139 if (tbl != ctl_forward_entry)
2143 if (dflt != &ipv4_devconf_dflt)
2146 if (all != &ipv4_devconf)
2152 static __net_exit void devinet_exit_net(struct net *net)
2154 #ifdef CONFIG_SYSCTL
2155 struct ctl_table *tbl;
2157 tbl = net->ipv4.forw_hdr->ctl_table_arg;
2158 unregister_net_sysctl_table(net->ipv4.forw_hdr);
2159 __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
2160 __devinet_sysctl_unregister(net->ipv4.devconf_all);
2163 kfree(net->ipv4.devconf_dflt);
2164 kfree(net->ipv4.devconf_all);
2167 static __net_initdata struct pernet_operations devinet_ops = {
2168 .init = devinet_init_net,
2169 .exit = devinet_exit_net,
2172 static struct rtnl_af_ops inet_af_ops = {
2174 .fill_link_af = inet_fill_link_af,
2175 .get_link_af_size = inet_get_link_af_size,
2176 .validate_link_af = inet_validate_link_af,
2177 .set_link_af = inet_set_link_af,
2180 void __init devinet_init(void)
2184 for (i = 0; i < IN4_ADDR_HSIZE; i++)
2185 INIT_HLIST_HEAD(&inet_addr_lst[i]);
2187 register_pernet_subsys(&devinet_ops);
2189 register_gifconf(PF_INET, inet_gifconf);
2190 register_netdevice_notifier(&ip_netdev_notifier);
2192 schedule_delayed_work(&check_lifetime_work, 0);
2194 rtnl_af_register(&inet_af_ops);
2196 rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
2197 rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
2198 rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
2199 rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,