2 * NET3 IP device support routines.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Derived from the IP parts of dev.c 1.0.19
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Mark Evans, <evansmp@uhura.aston.ac.uk>
15 * Alan Cox, <gw4pts@gw4pts.ampr.org>
16 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
19 * Alexey Kuznetsov: pa_* fields are replaced with ifaddr
21 * Cyrus Durgin: updated for kmod
22 * Matthias Andree: in devinet_ioctl, compare label and
23 * address (4.4BSD alias style support),
24 * fall back to comparing just the label
29 #include <asm/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/string.h>
37 #include <linux/socket.h>
38 #include <linux/sockios.h>
40 #include <linux/errno.h>
41 #include <linux/interrupt.h>
42 #include <linux/if_addr.h>
43 #include <linux/if_ether.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/etherdevice.h>
47 #include <linux/skbuff.h>
48 #include <linux/init.h>
49 #include <linux/notifier.h>
50 #include <linux/inetdevice.h>
51 #include <linux/igmp.h>
52 #include <linux/slab.h>
53 #include <linux/hash.h>
55 #include <linux/sysctl.h>
57 #include <linux/kmod.h>
58 #include <linux/netconf.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 #include <net/net_namespace.h>
66 #include <net/addrconf.h>
68 #include "fib_lookup.h"
70 static struct ipv4_devconf ipv4_devconf = {
72 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
79 static struct ipv4_devconf ipv4_devconf_dflt = {
81 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
82 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
83 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
84 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
85 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
89 #define IPV4_DEVCONF_DFLT(net, attr) \
90 IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
92 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
93 [IFA_LOCAL] = { .type = NLA_U32 },
94 [IFA_ADDRESS] = { .type = NLA_U32 },
95 [IFA_BROADCAST] = { .type = NLA_U32 },
96 [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
97 [IFA_CACHEINFO] = { .len = sizeof(struct ifa_cacheinfo) },
100 #define IN4_ADDR_HSIZE_SHIFT 8
101 #define IN4_ADDR_HSIZE (1U << IN4_ADDR_HSIZE_SHIFT)
103 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
104 static DEFINE_SPINLOCK(inet_addr_hash_lock);
106 static u32 inet_addr_hash(struct net *net, __be32 addr)
108 u32 val = (__force u32) addr ^ net_hash_mix(net);
110 return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
113 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
115 u32 hash = inet_addr_hash(net, ifa->ifa_local);
117 spin_lock(&inet_addr_hash_lock);
118 hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
119 spin_unlock(&inet_addr_hash_lock);
122 static void inet_hash_remove(struct in_ifaddr *ifa)
124 spin_lock(&inet_addr_hash_lock);
125 hlist_del_init_rcu(&ifa->hash);
126 spin_unlock(&inet_addr_hash_lock);
130 * __ip_dev_find - find the first device with a given source address.
131 * @net: the net namespace
132 * @addr: the source address
133 * @devref: if true, take a reference on the found device
135 * If a caller uses devref=false, it should be protected by RCU, or RTNL
137 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
139 u32 hash = inet_addr_hash(net, addr);
140 struct net_device *result = NULL;
141 struct in_ifaddr *ifa;
144 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) {
145 if (ifa->ifa_local == addr) {
146 struct net_device *dev = ifa->ifa_dev->dev;
148 if (!net_eq(dev_net(dev), net))
155 struct flowi4 fl4 = { .daddr = addr };
156 struct fib_result res = { 0 };
157 struct fib_table *local;
159 /* Fallback to FIB local table so that communication
160 * over loopback subnets work.
162 local = fib_get_table(net, RT_TABLE_LOCAL);
164 !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
165 res.type == RTN_LOCAL)
166 result = FIB_RES_DEV(res);
168 if (result && devref)
173 EXPORT_SYMBOL(__ip_dev_find);
175 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
177 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
178 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
181 static void devinet_sysctl_register(struct in_device *idev);
182 static void devinet_sysctl_unregister(struct in_device *idev);
184 static void devinet_sysctl_register(struct in_device *idev)
187 static void devinet_sysctl_unregister(struct in_device *idev)
192 /* Locks all the inet devices. */
194 static struct in_ifaddr *inet_alloc_ifa(void)
196 return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
199 static void inet_rcu_free_ifa(struct rcu_head *head)
201 struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
203 in_dev_put(ifa->ifa_dev);
207 static void inet_free_ifa(struct in_ifaddr *ifa)
209 call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
212 void in_dev_finish_destroy(struct in_device *idev)
214 struct net_device *dev = idev->dev;
216 WARN_ON(idev->ifa_list);
217 WARN_ON(idev->mc_list);
218 #ifdef NET_REFCNT_DEBUG
219 pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
223 pr_err("Freeing alive in_device %p\n", idev);
227 EXPORT_SYMBOL(in_dev_finish_destroy);
229 static struct in_device *inetdev_init(struct net_device *dev)
231 struct in_device *in_dev;
235 in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
238 memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
239 sizeof(in_dev->cnf));
240 in_dev->cnf.sysctl = NULL;
242 in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
243 if (!in_dev->arp_parms)
245 if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
246 dev_disable_lro(dev);
247 /* Reference in_dev->dev */
249 /* Account for reference dev->ip_ptr (below) */
252 devinet_sysctl_register(in_dev);
253 ip_mc_init_dev(in_dev);
254 if (dev->flags & IFF_UP)
257 /* we can receive as soon as ip_ptr is set -- do this last */
258 rcu_assign_pointer(dev->ip_ptr, in_dev);
267 static void in_dev_rcu_put(struct rcu_head *head)
269 struct in_device *idev = container_of(head, struct in_device, rcu_head);
273 static void inetdev_destroy(struct in_device *in_dev)
275 struct in_ifaddr *ifa;
276 struct net_device *dev;
284 ip_mc_destroy_dev(in_dev);
286 while ((ifa = in_dev->ifa_list) != NULL) {
287 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
291 RCU_INIT_POINTER(dev->ip_ptr, NULL);
293 devinet_sysctl_unregister(in_dev);
294 neigh_parms_release(&arp_tbl, in_dev->arp_parms);
297 call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
300 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
303 for_primary_ifa(in_dev) {
304 if (inet_ifa_match(a, ifa)) {
305 if (!b || inet_ifa_match(b, ifa)) {
310 } endfor_ifa(in_dev);
315 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
316 int destroy, struct nlmsghdr *nlh, u32 portid)
318 struct in_ifaddr *promote = NULL;
319 struct in_ifaddr *ifa, *ifa1 = *ifap;
320 struct in_ifaddr *last_prim = in_dev->ifa_list;
321 struct in_ifaddr *prev_prom = NULL;
322 int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
326 /* 1. Deleting primary ifaddr forces deletion all secondaries
327 * unless alias promotion is set
330 if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
331 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
333 while ((ifa = *ifap1) != NULL) {
334 if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
335 ifa1->ifa_scope <= ifa->ifa_scope)
338 if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
339 ifa1->ifa_mask != ifa->ifa_mask ||
340 !inet_ifa_match(ifa1->ifa_address, ifa)) {
341 ifap1 = &ifa->ifa_next;
347 inet_hash_remove(ifa);
348 *ifap1 = ifa->ifa_next;
350 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
351 blocking_notifier_call_chain(&inetaddr_chain,
361 /* On promotion all secondaries from subnet are changing
362 * the primary IP, we must remove all their routes silently
363 * and later to add them back with new prefsrc. Do this
364 * while all addresses are on the device list.
366 for (ifa = promote; ifa; ifa = ifa->ifa_next) {
367 if (ifa1->ifa_mask == ifa->ifa_mask &&
368 inet_ifa_match(ifa1->ifa_address, ifa))
369 fib_del_ifaddr(ifa, ifa1);
374 *ifap = ifa1->ifa_next;
375 inet_hash_remove(ifa1);
377 /* 3. Announce address deletion */
379 /* Send message first, then call notifier.
380 At first sight, FIB update triggered by notifier
381 will refer to already deleted ifaddr, that could confuse
382 netlink listeners. It is not true: look, gated sees
383 that route deleted and if it still thinks that ifaddr
384 is valid, it will try to restore deleted routes... Grr.
385 So that, this order is correct.
387 rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
388 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
391 struct in_ifaddr *next_sec = promote->ifa_next;
394 prev_prom->ifa_next = promote->ifa_next;
395 promote->ifa_next = last_prim->ifa_next;
396 last_prim->ifa_next = promote;
399 promote->ifa_flags &= ~IFA_F_SECONDARY;
400 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
401 blocking_notifier_call_chain(&inetaddr_chain,
403 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
404 if (ifa1->ifa_mask != ifa->ifa_mask ||
405 !inet_ifa_match(ifa1->ifa_address, ifa))
415 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
418 __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
421 static void check_lifetime(struct work_struct *work);
423 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
425 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
428 struct in_device *in_dev = ifa->ifa_dev;
429 struct in_ifaddr *ifa1, **ifap, **last_primary;
433 if (!ifa->ifa_local) {
438 ifa->ifa_flags &= ~IFA_F_SECONDARY;
439 last_primary = &in_dev->ifa_list;
441 for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
442 ifap = &ifa1->ifa_next) {
443 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
444 ifa->ifa_scope <= ifa1->ifa_scope)
445 last_primary = &ifa1->ifa_next;
446 if (ifa1->ifa_mask == ifa->ifa_mask &&
447 inet_ifa_match(ifa1->ifa_address, ifa)) {
448 if (ifa1->ifa_local == ifa->ifa_local) {
452 if (ifa1->ifa_scope != ifa->ifa_scope) {
456 ifa->ifa_flags |= IFA_F_SECONDARY;
460 if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
461 net_srandom(ifa->ifa_local);
465 ifa->ifa_next = *ifap;
468 inet_hash_insert(dev_net(in_dev->dev), ifa);
470 cancel_delayed_work(&check_lifetime_work);
471 schedule_delayed_work(&check_lifetime_work, 0);
473 /* Send message first, then call notifier.
474 Notifier will trigger FIB update, so that
475 listeners of netlink will know about new ifaddr */
476 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
477 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
482 static int inet_insert_ifa(struct in_ifaddr *ifa)
484 return __inet_insert_ifa(ifa, NULL, 0);
487 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
489 struct in_device *in_dev = __in_dev_get_rtnl(dev);
497 ipv4_devconf_setall(in_dev);
498 if (ifa->ifa_dev != in_dev) {
499 WARN_ON(ifa->ifa_dev);
501 ifa->ifa_dev = in_dev;
503 if (ipv4_is_loopback(ifa->ifa_local))
504 ifa->ifa_scope = RT_SCOPE_HOST;
505 return inet_insert_ifa(ifa);
508 /* Caller must hold RCU or RTNL :
509 * We dont take a reference on found in_device
511 struct in_device *inetdev_by_index(struct net *net, int ifindex)
513 struct net_device *dev;
514 struct in_device *in_dev = NULL;
517 dev = dev_get_by_index_rcu(net, ifindex);
519 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
523 EXPORT_SYMBOL(inetdev_by_index);
525 /* Called only from RTNL semaphored context. No locks. */
527 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
532 for_primary_ifa(in_dev) {
533 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
535 } endfor_ifa(in_dev);
539 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
541 struct net *net = sock_net(skb->sk);
542 struct nlattr *tb[IFA_MAX+1];
543 struct in_device *in_dev;
544 struct ifaddrmsg *ifm;
545 struct in_ifaddr *ifa, **ifap;
550 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
554 ifm = nlmsg_data(nlh);
555 in_dev = inetdev_by_index(net, ifm->ifa_index);
556 if (in_dev == NULL) {
561 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
562 ifap = &ifa->ifa_next) {
564 ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
567 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
570 if (tb[IFA_ADDRESS] &&
571 (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
572 !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
575 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
579 err = -EADDRNOTAVAIL;
584 #define INFINITY_LIFE_TIME 0xFFFFFFFF
586 static void check_lifetime(struct work_struct *work)
588 unsigned long now, next, next_sec, next_sched;
589 struct in_ifaddr *ifa;
593 next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
596 for (i = 0; i < IN4_ADDR_HSIZE; i++) {
597 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
600 if (ifa->ifa_flags & IFA_F_PERMANENT)
603 /* We try to batch several events at once. */
604 age = (now - ifa->ifa_tstamp +
605 ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
607 if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
608 age >= ifa->ifa_valid_lft) {
609 struct in_ifaddr **ifap ;
612 for (ifap = &ifa->ifa_dev->ifa_list;
613 *ifap != NULL; ifap = &ifa->ifa_next) {
615 inet_del_ifa(ifa->ifa_dev,
619 } else if (ifa->ifa_preferred_lft ==
620 INFINITY_LIFE_TIME) {
622 } else if (age >= ifa->ifa_preferred_lft) {
623 if (time_before(ifa->ifa_tstamp +
624 ifa->ifa_valid_lft * HZ, next))
625 next = ifa->ifa_tstamp +
626 ifa->ifa_valid_lft * HZ;
628 if (!(ifa->ifa_flags & IFA_F_DEPRECATED)) {
629 ifa->ifa_flags |= IFA_F_DEPRECATED;
630 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
632 } else if (time_before(ifa->ifa_tstamp +
633 ifa->ifa_preferred_lft * HZ,
635 next = ifa->ifa_tstamp +
636 ifa->ifa_preferred_lft * HZ;
642 next_sec = round_jiffies_up(next);
645 /* If rounded timeout is accurate enough, accept it. */
646 if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
647 next_sched = next_sec;
650 /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
651 if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
652 next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
654 schedule_delayed_work(&check_lifetime_work, next_sched - now);
657 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
660 unsigned long timeout;
662 ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
664 timeout = addrconf_timeout_fixup(valid_lft, HZ);
665 if (addrconf_finite_timeout(timeout))
666 ifa->ifa_valid_lft = timeout;
668 ifa->ifa_flags |= IFA_F_PERMANENT;
670 timeout = addrconf_timeout_fixup(prefered_lft, HZ);
671 if (addrconf_finite_timeout(timeout)) {
673 ifa->ifa_flags |= IFA_F_DEPRECATED;
674 ifa->ifa_preferred_lft = timeout;
676 ifa->ifa_tstamp = jiffies;
677 if (!ifa->ifa_cstamp)
678 ifa->ifa_cstamp = ifa->ifa_tstamp;
681 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
682 __u32 *pvalid_lft, __u32 *pprefered_lft)
684 struct nlattr *tb[IFA_MAX+1];
685 struct in_ifaddr *ifa;
686 struct ifaddrmsg *ifm;
687 struct net_device *dev;
688 struct in_device *in_dev;
691 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
695 ifm = nlmsg_data(nlh);
697 if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
700 dev = __dev_get_by_index(net, ifm->ifa_index);
705 in_dev = __in_dev_get_rtnl(dev);
710 ifa = inet_alloc_ifa();
713 * A potential indev allocation can be left alive, it stays
714 * assigned to its device and is destroy with it.
718 ipv4_devconf_setall(in_dev);
721 if (tb[IFA_ADDRESS] == NULL)
722 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
724 INIT_HLIST_NODE(&ifa->hash);
725 ifa->ifa_prefixlen = ifm->ifa_prefixlen;
726 ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
727 ifa->ifa_flags = ifm->ifa_flags;
728 ifa->ifa_scope = ifm->ifa_scope;
729 ifa->ifa_dev = in_dev;
731 ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
732 ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
734 if (tb[IFA_BROADCAST])
735 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
738 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
740 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
742 if (tb[IFA_CACHEINFO]) {
743 struct ifa_cacheinfo *ci;
745 ci = nla_data(tb[IFA_CACHEINFO]);
746 if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
750 *pvalid_lft = ci->ifa_valid;
751 *pprefered_lft = ci->ifa_prefered;
760 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
762 struct in_device *in_dev = ifa->ifa_dev;
763 struct in_ifaddr *ifa1, **ifap;
768 for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
769 ifap = &ifa1->ifa_next) {
770 if (ifa1->ifa_mask == ifa->ifa_mask &&
771 inet_ifa_match(ifa1->ifa_address, ifa) &&
772 ifa1->ifa_local == ifa->ifa_local)
778 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
780 struct net *net = sock_net(skb->sk);
781 struct in_ifaddr *ifa;
782 struct in_ifaddr *ifa_existing;
783 __u32 valid_lft = INFINITY_LIFE_TIME;
784 __u32 prefered_lft = INFINITY_LIFE_TIME;
788 ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
792 ifa_existing = find_matching_ifa(ifa);
794 /* It would be best to check for !NLM_F_CREATE here but
795 * userspace alreay relies on not having to provide this.
797 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
798 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
802 if (nlh->nlmsg_flags & NLM_F_EXCL ||
803 !(nlh->nlmsg_flags & NLM_F_REPLACE))
806 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
807 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
808 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
814 * Determine a default network mask, based on the IP address.
817 static int inet_abc_len(__be32 addr)
819 int rc = -1; /* Something else, probably a multicast. */
821 if (ipv4_is_zeronet(addr))
824 __u32 haddr = ntohl(addr);
826 if (IN_CLASSA(haddr))
828 else if (IN_CLASSB(haddr))
830 else if (IN_CLASSC(haddr))
838 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
841 struct sockaddr_in sin_orig;
842 struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
843 struct in_device *in_dev;
844 struct in_ifaddr **ifap = NULL;
845 struct in_ifaddr *ifa = NULL;
846 struct net_device *dev;
849 int tryaddrmatch = 0;
852 * Fetch the caller's info block into kernel space
855 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
857 ifr.ifr_name[IFNAMSIZ - 1] = 0;
859 /* save original address for comparison */
860 memcpy(&sin_orig, sin, sizeof(*sin));
862 colon = strchr(ifr.ifr_name, ':');
866 dev_load(net, ifr.ifr_name);
869 case SIOCGIFADDR: /* Get interface address */
870 case SIOCGIFBRDADDR: /* Get the broadcast address */
871 case SIOCGIFDSTADDR: /* Get the destination address */
872 case SIOCGIFNETMASK: /* Get the netmask for the interface */
873 /* Note that these ioctls will not sleep,
874 so that we do not impose a lock.
875 One day we will be forced to put shlock here (I mean SMP)
877 tryaddrmatch = (sin_orig.sin_family == AF_INET);
878 memset(sin, 0, sizeof(*sin));
879 sin->sin_family = AF_INET;
884 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
887 case SIOCSIFADDR: /* Set interface address (and family) */
888 case SIOCSIFBRDADDR: /* Set the broadcast address */
889 case SIOCSIFDSTADDR: /* Set the destination address */
890 case SIOCSIFNETMASK: /* Set the netmask for the interface */
892 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
895 if (sin->sin_family != AF_INET)
906 dev = __dev_get_by_name(net, ifr.ifr_name);
913 in_dev = __in_dev_get_rtnl(dev);
916 /* Matthias Andree */
917 /* compare label and address (4.4BSD style) */
918 /* note: we only do this for a limited set of ioctls
919 and only if the original address family was AF_INET.
920 This is checked above. */
921 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
922 ifap = &ifa->ifa_next) {
923 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
924 sin_orig.sin_addr.s_addr ==
930 /* we didn't get a match, maybe the application is
931 4.3BSD-style and passed in junk so we fall back to
932 comparing just the label */
934 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
935 ifap = &ifa->ifa_next)
936 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
941 ret = -EADDRNOTAVAIL;
942 if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
946 case SIOCGIFADDR: /* Get interface address */
947 sin->sin_addr.s_addr = ifa->ifa_local;
950 case SIOCGIFBRDADDR: /* Get the broadcast address */
951 sin->sin_addr.s_addr = ifa->ifa_broadcast;
954 case SIOCGIFDSTADDR: /* Get the destination address */
955 sin->sin_addr.s_addr = ifa->ifa_address;
958 case SIOCGIFNETMASK: /* Get the netmask for the interface */
959 sin->sin_addr.s_addr = ifa->ifa_mask;
964 ret = -EADDRNOTAVAIL;
968 if (!(ifr.ifr_flags & IFF_UP))
969 inet_del_ifa(in_dev, ifap, 1);
972 ret = dev_change_flags(dev, ifr.ifr_flags);
975 case SIOCSIFADDR: /* Set interface address (and family) */
977 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
982 ifa = inet_alloc_ifa();
985 INIT_HLIST_NODE(&ifa->hash);
987 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
989 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
992 if (ifa->ifa_local == sin->sin_addr.s_addr)
994 inet_del_ifa(in_dev, ifap, 0);
995 ifa->ifa_broadcast = 0;
999 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1001 if (!(dev->flags & IFF_POINTOPOINT)) {
1002 ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1003 ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1004 if ((dev->flags & IFF_BROADCAST) &&
1005 ifa->ifa_prefixlen < 31)
1006 ifa->ifa_broadcast = ifa->ifa_address |
1009 ifa->ifa_prefixlen = 32;
1010 ifa->ifa_mask = inet_make_mask(32);
1012 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1013 ret = inet_set_ifa(dev, ifa);
1016 case SIOCSIFBRDADDR: /* Set the broadcast address */
1018 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1019 inet_del_ifa(in_dev, ifap, 0);
1020 ifa->ifa_broadcast = sin->sin_addr.s_addr;
1021 inet_insert_ifa(ifa);
1025 case SIOCSIFDSTADDR: /* Set the destination address */
1027 if (ifa->ifa_address == sin->sin_addr.s_addr)
1030 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1033 inet_del_ifa(in_dev, ifap, 0);
1034 ifa->ifa_address = sin->sin_addr.s_addr;
1035 inet_insert_ifa(ifa);
1038 case SIOCSIFNETMASK: /* Set the netmask for the interface */
1041 * The mask we set must be legal.
1044 if (bad_mask(sin->sin_addr.s_addr, 0))
1047 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1048 __be32 old_mask = ifa->ifa_mask;
1049 inet_del_ifa(in_dev, ifap, 0);
1050 ifa->ifa_mask = sin->sin_addr.s_addr;
1051 ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1053 /* See if current broadcast address matches
1054 * with current netmask, then recalculate
1055 * the broadcast address. Otherwise it's a
1056 * funny address, so don't touch it since
1057 * the user seems to know what (s)he's doing...
1059 if ((dev->flags & IFF_BROADCAST) &&
1060 (ifa->ifa_prefixlen < 31) &&
1061 (ifa->ifa_broadcast ==
1062 (ifa->ifa_local|~old_mask))) {
1063 ifa->ifa_broadcast = (ifa->ifa_local |
1064 ~sin->sin_addr.s_addr);
1066 inet_insert_ifa(ifa);
1076 ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
1080 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1082 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1083 struct in_ifaddr *ifa;
1090 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1092 done += sizeof(ifr);
1095 if (len < (int) sizeof(ifr))
1097 memset(&ifr, 0, sizeof(struct ifreq));
1099 strcpy(ifr.ifr_name, ifa->ifa_label);
1101 strcpy(ifr.ifr_name, dev->name);
1103 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1104 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1107 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
1111 buf += sizeof(struct ifreq);
1112 len -= sizeof(struct ifreq);
1113 done += sizeof(struct ifreq);
1119 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1122 struct in_device *in_dev;
1123 struct net *net = dev_net(dev);
1126 in_dev = __in_dev_get_rcu(dev);
1130 for_primary_ifa(in_dev) {
1131 if (ifa->ifa_scope > scope)
1133 if (!dst || inet_ifa_match(dst, ifa)) {
1134 addr = ifa->ifa_local;
1138 addr = ifa->ifa_local;
1139 } endfor_ifa(in_dev);
1145 /* Not loopback addresses on loopback should be preferred
1146 in this case. It is importnat that lo is the first interface
1149 for_each_netdev_rcu(net, dev) {
1150 in_dev = __in_dev_get_rcu(dev);
1154 for_primary_ifa(in_dev) {
1155 if (ifa->ifa_scope != RT_SCOPE_LINK &&
1156 ifa->ifa_scope <= scope) {
1157 addr = ifa->ifa_local;
1160 } endfor_ifa(in_dev);
1166 EXPORT_SYMBOL(inet_select_addr);
1168 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1169 __be32 local, int scope)
1176 (local == ifa->ifa_local || !local) &&
1177 ifa->ifa_scope <= scope) {
1178 addr = ifa->ifa_local;
1183 same = (!local || inet_ifa_match(local, ifa)) &&
1184 (!dst || inet_ifa_match(dst, ifa));
1188 /* Is the selected addr into dst subnet? */
1189 if (inet_ifa_match(addr, ifa))
1191 /* No, then can we use new local src? */
1192 if (ifa->ifa_scope <= scope) {
1193 addr = ifa->ifa_local;
1196 /* search for large dst subnet for addr */
1200 } endfor_ifa(in_dev);
1202 return same ? addr : 0;
1206 * Confirm that local IP address exists using wildcards:
1207 * - in_dev: only on this interface, 0=any interface
1208 * - dst: only in the same subnet as dst, 0=any dst
1209 * - local: address, 0=autoselect the local address
1210 * - scope: maximum allowed scope value for the local address
1212 __be32 inet_confirm_addr(struct in_device *in_dev,
1213 __be32 dst, __be32 local, int scope)
1216 struct net_device *dev;
1219 if (scope != RT_SCOPE_LINK)
1220 return confirm_addr_indev(in_dev, dst, local, scope);
1222 net = dev_net(in_dev->dev);
1224 for_each_netdev_rcu(net, dev) {
1225 in_dev = __in_dev_get_rcu(dev);
1227 addr = confirm_addr_indev(in_dev, dst, local, scope);
1236 EXPORT_SYMBOL(inet_confirm_addr);
1242 int register_inetaddr_notifier(struct notifier_block *nb)
1244 return blocking_notifier_chain_register(&inetaddr_chain, nb);
1246 EXPORT_SYMBOL(register_inetaddr_notifier);
1248 int unregister_inetaddr_notifier(struct notifier_block *nb)
1250 return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1252 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1254 /* Rename ifa_labels for a device name change. Make some effort to preserve
1255 * existing alias numbering and to create unique labels if possible.
1257 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1259 struct in_ifaddr *ifa;
1262 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1263 char old[IFNAMSIZ], *dot;
1265 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1266 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1269 dot = strchr(old, ':');
1271 sprintf(old, ":%d", named);
1274 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1275 strcat(ifa->ifa_label, dot);
1277 strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1279 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1283 static bool inetdev_valid_mtu(unsigned int mtu)
1288 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1289 struct in_device *in_dev)
1292 struct in_ifaddr *ifa;
1294 for (ifa = in_dev->ifa_list; ifa;
1295 ifa = ifa->ifa_next) {
1296 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1297 ifa->ifa_local, dev,
1298 ifa->ifa_local, NULL,
1299 dev->dev_addr, NULL);
1303 /* Called only under RTNL semaphore */
1305 static int inetdev_event(struct notifier_block *this, unsigned long event,
1308 struct net_device *dev = ptr;
1309 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1314 if (event == NETDEV_REGISTER) {
1315 in_dev = inetdev_init(dev);
1317 return notifier_from_errno(-ENOMEM);
1318 if (dev->flags & IFF_LOOPBACK) {
1319 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1320 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1322 } else if (event == NETDEV_CHANGEMTU) {
1323 /* Re-enabling IP */
1324 if (inetdev_valid_mtu(dev->mtu))
1325 in_dev = inetdev_init(dev);
1331 case NETDEV_REGISTER:
1332 pr_debug("%s: bug\n", __func__);
1333 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1336 if (!inetdev_valid_mtu(dev->mtu))
1338 if (dev->flags & IFF_LOOPBACK) {
1339 struct in_ifaddr *ifa = inet_alloc_ifa();
1342 INIT_HLIST_NODE(&ifa->hash);
1344 ifa->ifa_address = htonl(INADDR_LOOPBACK);
1345 ifa->ifa_prefixlen = 8;
1346 ifa->ifa_mask = inet_make_mask(8);
1347 in_dev_hold(in_dev);
1348 ifa->ifa_dev = in_dev;
1349 ifa->ifa_scope = RT_SCOPE_HOST;
1350 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1351 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1352 INFINITY_LIFE_TIME);
1353 inet_insert_ifa(ifa);
1358 case NETDEV_CHANGEADDR:
1359 if (!IN_DEV_ARP_NOTIFY(in_dev))
1362 case NETDEV_NOTIFY_PEERS:
1363 /* Send gratuitous ARP to notify of link change */
1364 inetdev_send_gratuitous_arp(dev, in_dev);
1369 case NETDEV_PRE_TYPE_CHANGE:
1370 ip_mc_unmap(in_dev);
1372 case NETDEV_POST_TYPE_CHANGE:
1373 ip_mc_remap(in_dev);
1375 case NETDEV_CHANGEMTU:
1376 if (inetdev_valid_mtu(dev->mtu))
1378 /* disable IP when MTU is not enough */
1379 case NETDEV_UNREGISTER:
1380 inetdev_destroy(in_dev);
1382 case NETDEV_CHANGENAME:
1383 /* Do not notify about label change, this event is
1384 * not interesting to applications using netlink.
1386 inetdev_changename(dev, in_dev);
1388 devinet_sysctl_unregister(in_dev);
1389 devinet_sysctl_register(in_dev);
1396 static struct notifier_block ip_netdev_notifier = {
1397 .notifier_call = inetdev_event,
1400 static size_t inet_nlmsg_size(void)
1402 return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1403 + nla_total_size(4) /* IFA_ADDRESS */
1404 + nla_total_size(4) /* IFA_LOCAL */
1405 + nla_total_size(4) /* IFA_BROADCAST */
1406 + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1409 static inline u32 cstamp_delta(unsigned long cstamp)
1411 return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1414 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1415 unsigned long tstamp, u32 preferred, u32 valid)
1417 struct ifa_cacheinfo ci;
1419 ci.cstamp = cstamp_delta(cstamp);
1420 ci.tstamp = cstamp_delta(tstamp);
1421 ci.ifa_prefered = preferred;
1422 ci.ifa_valid = valid;
1424 return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1427 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1428 u32 portid, u32 seq, int event, unsigned int flags)
1430 struct ifaddrmsg *ifm;
1431 struct nlmsghdr *nlh;
1432 u32 preferred, valid;
1434 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1438 ifm = nlmsg_data(nlh);
1439 ifm->ifa_family = AF_INET;
1440 ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1441 ifm->ifa_flags = ifa->ifa_flags;
1442 ifm->ifa_scope = ifa->ifa_scope;
1443 ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1445 if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1446 preferred = ifa->ifa_preferred_lft;
1447 valid = ifa->ifa_valid_lft;
1448 if (preferred != INFINITY_LIFE_TIME) {
1449 long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1451 if (preferred > tval)
1455 if (valid != INFINITY_LIFE_TIME) {
1463 preferred = INFINITY_LIFE_TIME;
1464 valid = INFINITY_LIFE_TIME;
1466 if ((ifa->ifa_address &&
1467 nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1469 nla_put_be32(skb, IFA_LOCAL, ifa->ifa_local)) ||
1470 (ifa->ifa_broadcast &&
1471 nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1472 (ifa->ifa_label[0] &&
1473 nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1474 put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1476 goto nla_put_failure;
1478 return nlmsg_end(skb, nlh);
1481 nlmsg_cancel(skb, nlh);
1485 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1487 struct net *net = sock_net(skb->sk);
1490 int ip_idx, s_ip_idx;
1491 struct net_device *dev;
1492 struct in_device *in_dev;
1493 struct in_ifaddr *ifa;
1494 struct hlist_head *head;
1497 s_idx = idx = cb->args[1];
1498 s_ip_idx = ip_idx = cb->args[2];
1500 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1502 head = &net->dev_index_head[h];
1504 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1506 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1509 if (h > s_h || idx > s_idx)
1511 in_dev = __in_dev_get_rcu(dev);
1515 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1516 ifa = ifa->ifa_next, ip_idx++) {
1517 if (ip_idx < s_ip_idx)
1519 if (inet_fill_ifaddr(skb, ifa,
1520 NETLINK_CB(cb->skb).portid,
1522 RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1526 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1537 cb->args[2] = ip_idx;
1542 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1545 struct sk_buff *skb;
1546 u32 seq = nlh ? nlh->nlmsg_seq : 0;
1550 net = dev_net(ifa->ifa_dev->dev);
1551 skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1555 err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1557 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1558 WARN_ON(err == -EMSGSIZE);
1562 rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1566 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1569 static size_t inet_get_link_af_size(const struct net_device *dev)
1571 struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1576 return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1579 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1581 struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1588 nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1592 for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1593 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1598 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1599 [IFLA_INET_CONF] = { .type = NLA_NESTED },
1602 static int inet_validate_link_af(const struct net_device *dev,
1603 const struct nlattr *nla)
1605 struct nlattr *a, *tb[IFLA_INET_MAX+1];
1608 if (dev && !__in_dev_get_rtnl(dev))
1609 return -EAFNOSUPPORT;
1611 err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1615 if (tb[IFLA_INET_CONF]) {
1616 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1617 int cfgid = nla_type(a);
1622 if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1630 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1632 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1633 struct nlattr *a, *tb[IFLA_INET_MAX+1];
1637 return -EAFNOSUPPORT;
1639 if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1642 if (tb[IFLA_INET_CONF]) {
1643 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1644 ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1650 static int inet_netconf_msgsize_devconf(int type)
1652 int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1653 + nla_total_size(4); /* NETCONFA_IFINDEX */
1655 /* type -1 is used for ALL */
1656 if (type == -1 || type == NETCONFA_FORWARDING)
1657 size += nla_total_size(4);
1658 if (type == -1 || type == NETCONFA_RP_FILTER)
1659 size += nla_total_size(4);
1660 if (type == -1 || type == NETCONFA_MC_FORWARDING)
1661 size += nla_total_size(4);
1666 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1667 struct ipv4_devconf *devconf, u32 portid,
1668 u32 seq, int event, unsigned int flags,
1671 struct nlmsghdr *nlh;
1672 struct netconfmsg *ncm;
1674 nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1679 ncm = nlmsg_data(nlh);
1680 ncm->ncm_family = AF_INET;
1682 if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1683 goto nla_put_failure;
1685 /* type -1 is used for ALL */
1686 if ((type == -1 || type == NETCONFA_FORWARDING) &&
1687 nla_put_s32(skb, NETCONFA_FORWARDING,
1688 IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1689 goto nla_put_failure;
1690 if ((type == -1 || type == NETCONFA_RP_FILTER) &&
1691 nla_put_s32(skb, NETCONFA_RP_FILTER,
1692 IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1693 goto nla_put_failure;
1694 if ((type == -1 || type == NETCONFA_MC_FORWARDING) &&
1695 nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1696 IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1697 goto nla_put_failure;
1699 return nlmsg_end(skb, nlh);
1702 nlmsg_cancel(skb, nlh);
1706 void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
1707 struct ipv4_devconf *devconf)
1709 struct sk_buff *skb;
1712 skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC);
1716 err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1717 RTM_NEWNETCONF, 0, type);
1719 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1720 WARN_ON(err == -EMSGSIZE);
1724 rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC);
1728 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1731 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1732 [NETCONFA_IFINDEX] = { .len = sizeof(int) },
1733 [NETCONFA_FORWARDING] = { .len = sizeof(int) },
1734 [NETCONFA_RP_FILTER] = { .len = sizeof(int) },
1737 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1738 struct nlmsghdr *nlh)
1740 struct net *net = sock_net(in_skb->sk);
1741 struct nlattr *tb[NETCONFA_MAX+1];
1742 struct netconfmsg *ncm;
1743 struct sk_buff *skb;
1744 struct ipv4_devconf *devconf;
1745 struct in_device *in_dev;
1746 struct net_device *dev;
1750 err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1751 devconf_ipv4_policy);
1756 if (!tb[NETCONFA_IFINDEX])
1759 ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1761 case NETCONFA_IFINDEX_ALL:
1762 devconf = net->ipv4.devconf_all;
1764 case NETCONFA_IFINDEX_DEFAULT:
1765 devconf = net->ipv4.devconf_dflt;
1768 dev = __dev_get_by_index(net, ifindex);
1771 in_dev = __in_dev_get_rtnl(dev);
1774 devconf = &in_dev->cnf;
1779 skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_ATOMIC);
1783 err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1784 NETLINK_CB(in_skb).portid,
1785 nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1788 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1789 WARN_ON(err == -EMSGSIZE);
1793 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1798 static int inet_netconf_dump_devconf(struct sk_buff *skb,
1799 struct netlink_callback *cb)
1801 struct net *net = sock_net(skb->sk);
1804 struct net_device *dev;
1805 struct in_device *in_dev;
1806 struct hlist_head *head;
1809 s_idx = idx = cb->args[1];
1811 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1813 head = &net->dev_index_head[h];
1815 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1817 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1820 in_dev = __in_dev_get_rcu(dev);
1824 if (inet_netconf_fill_devconf(skb, dev->ifindex,
1826 NETLINK_CB(cb->skb).portid,
1834 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1840 if (h == NETDEV_HASHENTRIES) {
1841 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
1842 net->ipv4.devconf_all,
1843 NETLINK_CB(cb->skb).portid,
1845 RTM_NEWNETCONF, NLM_F_MULTI,
1851 if (h == NETDEV_HASHENTRIES + 1) {
1852 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
1853 net->ipv4.devconf_dflt,
1854 NETLINK_CB(cb->skb).portid,
1856 RTM_NEWNETCONF, NLM_F_MULTI,
1869 #ifdef CONFIG_SYSCTL
1871 static void devinet_copy_dflt_conf(struct net *net, int i)
1873 struct net_device *dev;
1876 for_each_netdev_rcu(net, dev) {
1877 struct in_device *in_dev;
1879 in_dev = __in_dev_get_rcu(dev);
1880 if (in_dev && !test_bit(i, in_dev->cnf.state))
1881 in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1886 /* called with RTNL locked */
1887 static void inet_forward_change(struct net *net)
1889 struct net_device *dev;
1890 int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1892 IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1893 IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1894 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1895 NETCONFA_IFINDEX_ALL,
1896 net->ipv4.devconf_all);
1897 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1898 NETCONFA_IFINDEX_DEFAULT,
1899 net->ipv4.devconf_dflt);
1901 for_each_netdev(net, dev) {
1902 struct in_device *in_dev;
1904 dev_disable_lro(dev);
1906 in_dev = __in_dev_get_rcu(dev);
1908 IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1909 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1910 dev->ifindex, &in_dev->cnf);
1916 static int devinet_conf_proc(ctl_table *ctl, int write,
1917 void __user *buffer,
1918 size_t *lenp, loff_t *ppos)
1920 int old_value = *(int *)ctl->data;
1921 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1922 int new_value = *(int *)ctl->data;
1925 struct ipv4_devconf *cnf = ctl->extra1;
1926 struct net *net = ctl->extra2;
1927 int i = (int *)ctl->data - cnf->data;
1929 set_bit(i, cnf->state);
1931 if (cnf == net->ipv4.devconf_dflt)
1932 devinet_copy_dflt_conf(net, i);
1933 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
1934 i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
1935 if ((new_value == 0) && (old_value != 0))
1936 rt_cache_flush(net);
1937 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
1938 new_value != old_value) {
1941 if (cnf == net->ipv4.devconf_dflt)
1942 ifindex = NETCONFA_IFINDEX_DEFAULT;
1943 else if (cnf == net->ipv4.devconf_all)
1944 ifindex = NETCONFA_IFINDEX_ALL;
1946 struct in_device *idev =
1947 container_of(cnf, struct in_device,
1949 ifindex = idev->dev->ifindex;
1951 inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
1959 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1960 void __user *buffer,
1961 size_t *lenp, loff_t *ppos)
1963 int *valp = ctl->data;
1966 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1968 if (write && *valp != val) {
1969 struct net *net = ctl->extra2;
1971 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1972 if (!rtnl_trylock()) {
1973 /* Restore the original values before restarting */
1976 return restart_syscall();
1978 if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1979 inet_forward_change(net);
1981 struct ipv4_devconf *cnf = ctl->extra1;
1982 struct in_device *idev =
1983 container_of(cnf, struct in_device, cnf);
1985 dev_disable_lro(idev->dev);
1986 inet_netconf_notify_devconf(net,
1987 NETCONFA_FORWARDING,
1992 rt_cache_flush(net);
1994 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1995 NETCONFA_IFINDEX_DEFAULT,
1996 net->ipv4.devconf_dflt);
2002 static int ipv4_doint_and_flush(ctl_table *ctl, int write,
2003 void __user *buffer,
2004 size_t *lenp, loff_t *ppos)
2006 int *valp = ctl->data;
2008 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2009 struct net *net = ctl->extra2;
2011 if (write && *valp != val)
2012 rt_cache_flush(net);
2017 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2020 .data = ipv4_devconf.data + \
2021 IPV4_DEVCONF_ ## attr - 1, \
2022 .maxlen = sizeof(int), \
2024 .proc_handler = proc, \
2025 .extra1 = &ipv4_devconf, \
2028 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2029 DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2031 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2032 DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2034 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2035 DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2037 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2038 DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2040 static struct devinet_sysctl_table {
2041 struct ctl_table_header *sysctl_header;
2042 struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2043 } devinet_sysctl = {
2045 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2046 devinet_sysctl_forward),
2047 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2049 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2050 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2051 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2052 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2053 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2054 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2055 "accept_source_route"),
2056 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2057 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2058 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2059 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2060 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2061 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2062 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2063 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2064 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2065 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2066 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2067 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2068 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2070 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2071 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2072 DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
2073 "force_igmp_version"),
2074 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2075 "promote_secondaries"),
2076 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2081 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2082 struct ipv4_devconf *p)
2085 struct devinet_sysctl_table *t;
2086 char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2088 t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2092 for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2093 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2094 t->devinet_vars[i].extra1 = p;
2095 t->devinet_vars[i].extra2 = net;
2098 snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2100 t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2101 if (!t->sysctl_header)
2113 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
2115 struct devinet_sysctl_table *t = cnf->sysctl;
2121 unregister_net_sysctl_table(t->sysctl_header);
2125 static void devinet_sysctl_register(struct in_device *idev)
2127 neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
2128 __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2132 static void devinet_sysctl_unregister(struct in_device *idev)
2134 __devinet_sysctl_unregister(&idev->cnf);
2135 neigh_sysctl_unregister(idev->arp_parms);
2138 static struct ctl_table ctl_forward_entry[] = {
2140 .procname = "ip_forward",
2141 .data = &ipv4_devconf.data[
2142 IPV4_DEVCONF_FORWARDING - 1],
2143 .maxlen = sizeof(int),
2145 .proc_handler = devinet_sysctl_forward,
2146 .extra1 = &ipv4_devconf,
2147 .extra2 = &init_net,
2153 static __net_init int devinet_init_net(struct net *net)
2156 struct ipv4_devconf *all, *dflt;
2157 #ifdef CONFIG_SYSCTL
2158 struct ctl_table *tbl = ctl_forward_entry;
2159 struct ctl_table_header *forw_hdr;
2163 all = &ipv4_devconf;
2164 dflt = &ipv4_devconf_dflt;
2166 if (!net_eq(net, &init_net)) {
2167 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2171 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2173 goto err_alloc_dflt;
2175 #ifdef CONFIG_SYSCTL
2176 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2180 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2181 tbl[0].extra1 = all;
2182 tbl[0].extra2 = net;
2186 #ifdef CONFIG_SYSCTL
2187 err = __devinet_sysctl_register(net, "all", all);
2191 err = __devinet_sysctl_register(net, "default", dflt);
2196 forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2197 if (forw_hdr == NULL)
2199 net->ipv4.forw_hdr = forw_hdr;
2202 net->ipv4.devconf_all = all;
2203 net->ipv4.devconf_dflt = dflt;
2206 #ifdef CONFIG_SYSCTL
2208 __devinet_sysctl_unregister(dflt);
2210 __devinet_sysctl_unregister(all);
2212 if (tbl != ctl_forward_entry)
2216 if (dflt != &ipv4_devconf_dflt)
2219 if (all != &ipv4_devconf)
2225 static __net_exit void devinet_exit_net(struct net *net)
2227 #ifdef CONFIG_SYSCTL
2228 struct ctl_table *tbl;
2230 tbl = net->ipv4.forw_hdr->ctl_table_arg;
2231 unregister_net_sysctl_table(net->ipv4.forw_hdr);
2232 __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
2233 __devinet_sysctl_unregister(net->ipv4.devconf_all);
2236 kfree(net->ipv4.devconf_dflt);
2237 kfree(net->ipv4.devconf_all);
2240 static __net_initdata struct pernet_operations devinet_ops = {
2241 .init = devinet_init_net,
2242 .exit = devinet_exit_net,
2245 static struct rtnl_af_ops inet_af_ops = {
2247 .fill_link_af = inet_fill_link_af,
2248 .get_link_af_size = inet_get_link_af_size,
2249 .validate_link_af = inet_validate_link_af,
2250 .set_link_af = inet_set_link_af,
2253 void __init devinet_init(void)
2257 for (i = 0; i < IN4_ADDR_HSIZE; i++)
2258 INIT_HLIST_HEAD(&inet_addr_lst[i]);
2260 register_pernet_subsys(&devinet_ops);
2262 register_gifconf(PF_INET, inet_gifconf);
2263 register_netdevice_notifier(&ip_netdev_notifier);
2265 schedule_delayed_work(&check_lifetime_work, 0);
2267 rtnl_af_register(&inet_af_ops);
2269 rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
2270 rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
2271 rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
2272 rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2273 inet_netconf_dump_devconf, NULL);