2 * NET3 IP device support routines.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Derived from the IP parts of dev.c 1.0.19
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Mark Evans, <evansmp@uhura.aston.ac.uk>
15 * Alan Cox, <gw4pts@gw4pts.ampr.org>
16 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
19 * Alexey Kuznetsov: pa_* fields are replaced with ifaddr
21 * Cyrus Durgin: updated for kmod
22 * Matthias Andree: in devinet_ioctl, compare label and
23 * address (4.4BSD alias style support),
24 * fall back to comparing just the label
29 #include <asm/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/string.h>
37 #include <linux/socket.h>
38 #include <linux/sockios.h>
40 #include <linux/errno.h>
41 #include <linux/interrupt.h>
42 #include <linux/if_addr.h>
43 #include <linux/if_ether.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/etherdevice.h>
47 #include <linux/skbuff.h>
48 #include <linux/init.h>
49 #include <linux/notifier.h>
50 #include <linux/inetdevice.h>
51 #include <linux/igmp.h>
52 #include <linux/slab.h>
53 #include <linux/hash.h>
55 #include <linux/sysctl.h>
57 #include <linux/kmod.h>
58 #include <linux/netconf.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 #include <net/net_namespace.h>
66 #include <net/addrconf.h>
68 #include "fib_lookup.h"
70 static struct ipv4_devconf ipv4_devconf = {
72 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
77 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] = 1000 /*ms*/,
81 static struct ipv4_devconf ipv4_devconf_dflt = {
83 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
84 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
85 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
86 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
87 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
88 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
89 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] = 1000 /*ms*/,
93 #define IPV4_DEVCONF_DFLT(net, attr) \
94 IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
96 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
97 [IFA_LOCAL] = { .type = NLA_U32 },
98 [IFA_ADDRESS] = { .type = NLA_U32 },
99 [IFA_BROADCAST] = { .type = NLA_U32 },
100 [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
101 [IFA_CACHEINFO] = { .len = sizeof(struct ifa_cacheinfo) },
102 [IFA_FLAGS] = { .type = NLA_U32 },
105 #define IN4_ADDR_HSIZE_SHIFT 8
106 #define IN4_ADDR_HSIZE (1U << IN4_ADDR_HSIZE_SHIFT)
108 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
109 static DEFINE_SPINLOCK(inet_addr_hash_lock);
111 static u32 inet_addr_hash(struct net *net, __be32 addr)
113 u32 val = (__force u32) addr ^ net_hash_mix(net);
115 return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
118 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
120 u32 hash = inet_addr_hash(net, ifa->ifa_local);
122 spin_lock(&inet_addr_hash_lock);
123 hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
124 spin_unlock(&inet_addr_hash_lock);
127 static void inet_hash_remove(struct in_ifaddr *ifa)
129 spin_lock(&inet_addr_hash_lock);
130 hlist_del_init_rcu(&ifa->hash);
131 spin_unlock(&inet_addr_hash_lock);
135 * __ip_dev_find - find the first device with a given source address.
136 * @net: the net namespace
137 * @addr: the source address
138 * @devref: if true, take a reference on the found device
140 * If a caller uses devref=false, it should be protected by RCU, or RTNL
142 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
144 u32 hash = inet_addr_hash(net, addr);
145 struct net_device *result = NULL;
146 struct in_ifaddr *ifa;
149 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) {
150 if (ifa->ifa_local == addr) {
151 struct net_device *dev = ifa->ifa_dev->dev;
153 if (!net_eq(dev_net(dev), net))
160 struct flowi4 fl4 = { .daddr = addr };
161 struct fib_result res = { 0 };
162 struct fib_table *local;
164 /* Fallback to FIB local table so that communication
165 * over loopback subnets work.
167 local = fib_get_table(net, RT_TABLE_LOCAL);
169 !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
170 res.type == RTN_LOCAL)
171 result = FIB_RES_DEV(res);
173 if (result && devref)
178 EXPORT_SYMBOL(__ip_dev_find);
180 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
182 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
183 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
186 static void devinet_sysctl_register(struct in_device *idev);
187 static void devinet_sysctl_unregister(struct in_device *idev);
189 static void devinet_sysctl_register(struct in_device *idev)
192 static void devinet_sysctl_unregister(struct in_device *idev)
197 /* Locks all the inet devices. */
199 static struct in_ifaddr *inet_alloc_ifa(void)
201 return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
204 static void inet_rcu_free_ifa(struct rcu_head *head)
206 struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
208 in_dev_put(ifa->ifa_dev);
212 static void inet_free_ifa(struct in_ifaddr *ifa)
214 call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
217 void in_dev_finish_destroy(struct in_device *idev)
219 struct net_device *dev = idev->dev;
221 WARN_ON(idev->ifa_list);
222 WARN_ON(idev->mc_list);
223 kfree(rcu_dereference_protected(idev->mc_hash, 1));
224 #ifdef NET_REFCNT_DEBUG
225 pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
229 pr_err("Freeing alive in_device %p\n", idev);
233 EXPORT_SYMBOL(in_dev_finish_destroy);
235 static struct in_device *inetdev_init(struct net_device *dev)
237 struct in_device *in_dev;
241 in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
244 memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
245 sizeof(in_dev->cnf));
246 in_dev->cnf.sysctl = NULL;
248 in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
249 if (!in_dev->arp_parms)
251 if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
252 dev_disable_lro(dev);
253 /* Reference in_dev->dev */
255 /* Account for reference dev->ip_ptr (below) */
258 devinet_sysctl_register(in_dev);
259 ip_mc_init_dev(in_dev);
260 if (dev->flags & IFF_UP)
263 /* we can receive as soon as ip_ptr is set -- do this last */
264 rcu_assign_pointer(dev->ip_ptr, in_dev);
273 static void in_dev_rcu_put(struct rcu_head *head)
275 struct in_device *idev = container_of(head, struct in_device, rcu_head);
279 static void inetdev_destroy(struct in_device *in_dev)
281 struct in_ifaddr *ifa;
282 struct net_device *dev;
290 ip_mc_destroy_dev(in_dev);
292 while ((ifa = in_dev->ifa_list) != NULL) {
293 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
297 RCU_INIT_POINTER(dev->ip_ptr, NULL);
299 devinet_sysctl_unregister(in_dev);
300 neigh_parms_release(&arp_tbl, in_dev->arp_parms);
303 call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
306 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
309 for_primary_ifa(in_dev) {
310 if (inet_ifa_match(a, ifa)) {
311 if (!b || inet_ifa_match(b, ifa)) {
316 } endfor_ifa(in_dev);
321 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
322 int destroy, struct nlmsghdr *nlh, u32 portid)
324 struct in_ifaddr *promote = NULL;
325 struct in_ifaddr *ifa, *ifa1 = *ifap;
326 struct in_ifaddr *last_prim = in_dev->ifa_list;
327 struct in_ifaddr *prev_prom = NULL;
328 int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
332 /* 1. Deleting primary ifaddr forces deletion all secondaries
333 * unless alias promotion is set
336 if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
337 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
339 while ((ifa = *ifap1) != NULL) {
340 if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
341 ifa1->ifa_scope <= ifa->ifa_scope)
344 if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
345 ifa1->ifa_mask != ifa->ifa_mask ||
346 !inet_ifa_match(ifa1->ifa_address, ifa)) {
347 ifap1 = &ifa->ifa_next;
353 inet_hash_remove(ifa);
354 *ifap1 = ifa->ifa_next;
356 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
357 blocking_notifier_call_chain(&inetaddr_chain,
367 /* On promotion all secondaries from subnet are changing
368 * the primary IP, we must remove all their routes silently
369 * and later to add them back with new prefsrc. Do this
370 * while all addresses are on the device list.
372 for (ifa = promote; ifa; ifa = ifa->ifa_next) {
373 if (ifa1->ifa_mask == ifa->ifa_mask &&
374 inet_ifa_match(ifa1->ifa_address, ifa))
375 fib_del_ifaddr(ifa, ifa1);
380 *ifap = ifa1->ifa_next;
381 inet_hash_remove(ifa1);
383 /* 3. Announce address deletion */
385 /* Send message first, then call notifier.
386 At first sight, FIB update triggered by notifier
387 will refer to already deleted ifaddr, that could confuse
388 netlink listeners. It is not true: look, gated sees
389 that route deleted and if it still thinks that ifaddr
390 is valid, it will try to restore deleted routes... Grr.
391 So that, this order is correct.
393 rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
394 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
397 struct in_ifaddr *next_sec = promote->ifa_next;
400 prev_prom->ifa_next = promote->ifa_next;
401 promote->ifa_next = last_prim->ifa_next;
402 last_prim->ifa_next = promote;
405 promote->ifa_flags &= ~IFA_F_SECONDARY;
406 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
407 blocking_notifier_call_chain(&inetaddr_chain,
409 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
410 if (ifa1->ifa_mask != ifa->ifa_mask ||
411 !inet_ifa_match(ifa1->ifa_address, ifa))
421 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
424 __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
427 static void check_lifetime(struct work_struct *work);
429 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
431 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
434 struct in_device *in_dev = ifa->ifa_dev;
435 struct in_ifaddr *ifa1, **ifap, **last_primary;
439 if (!ifa->ifa_local) {
444 ifa->ifa_flags &= ~IFA_F_SECONDARY;
445 last_primary = &in_dev->ifa_list;
447 for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
448 ifap = &ifa1->ifa_next) {
449 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
450 ifa->ifa_scope <= ifa1->ifa_scope)
451 last_primary = &ifa1->ifa_next;
452 if (ifa1->ifa_mask == ifa->ifa_mask &&
453 inet_ifa_match(ifa1->ifa_address, ifa)) {
454 if (ifa1->ifa_local == ifa->ifa_local) {
458 if (ifa1->ifa_scope != ifa->ifa_scope) {
462 ifa->ifa_flags |= IFA_F_SECONDARY;
466 if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
467 net_srandom(ifa->ifa_local);
471 ifa->ifa_next = *ifap;
474 inet_hash_insert(dev_net(in_dev->dev), ifa);
476 cancel_delayed_work(&check_lifetime_work);
477 schedule_delayed_work(&check_lifetime_work, 0);
479 /* Send message first, then call notifier.
480 Notifier will trigger FIB update, so that
481 listeners of netlink will know about new ifaddr */
482 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
483 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
488 static int inet_insert_ifa(struct in_ifaddr *ifa)
490 return __inet_insert_ifa(ifa, NULL, 0);
493 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
495 struct in_device *in_dev = __in_dev_get_rtnl(dev);
503 ipv4_devconf_setall(in_dev);
504 neigh_parms_data_state_setall(in_dev->arp_parms);
505 if (ifa->ifa_dev != in_dev) {
506 WARN_ON(ifa->ifa_dev);
508 ifa->ifa_dev = in_dev;
510 if (ipv4_is_loopback(ifa->ifa_local))
511 ifa->ifa_scope = RT_SCOPE_HOST;
512 return inet_insert_ifa(ifa);
515 /* Caller must hold RCU or RTNL :
516 * We dont take a reference on found in_device
518 struct in_device *inetdev_by_index(struct net *net, int ifindex)
520 struct net_device *dev;
521 struct in_device *in_dev = NULL;
524 dev = dev_get_by_index_rcu(net, ifindex);
526 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
530 EXPORT_SYMBOL(inetdev_by_index);
532 /* Called only from RTNL semaphored context. No locks. */
534 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
539 for_primary_ifa(in_dev) {
540 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
542 } endfor_ifa(in_dev);
546 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
548 struct net *net = sock_net(skb->sk);
549 struct nlattr *tb[IFA_MAX+1];
550 struct in_device *in_dev;
551 struct ifaddrmsg *ifm;
552 struct in_ifaddr *ifa, **ifap;
557 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
561 ifm = nlmsg_data(nlh);
562 in_dev = inetdev_by_index(net, ifm->ifa_index);
563 if (in_dev == NULL) {
568 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
569 ifap = &ifa->ifa_next) {
571 ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
574 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
577 if (tb[IFA_ADDRESS] &&
578 (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
579 !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
582 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
586 err = -EADDRNOTAVAIL;
591 #define INFINITY_LIFE_TIME 0xFFFFFFFF
593 static void check_lifetime(struct work_struct *work)
595 unsigned long now, next, next_sec, next_sched;
596 struct in_ifaddr *ifa;
597 struct hlist_node *n;
601 next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
603 for (i = 0; i < IN4_ADDR_HSIZE; i++) {
604 bool change_needed = false;
607 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
610 if (ifa->ifa_flags & IFA_F_PERMANENT)
613 /* We try to batch several events at once. */
614 age = (now - ifa->ifa_tstamp +
615 ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
617 if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
618 age >= ifa->ifa_valid_lft) {
619 change_needed = true;
620 } else if (ifa->ifa_preferred_lft ==
621 INFINITY_LIFE_TIME) {
623 } else if (age >= ifa->ifa_preferred_lft) {
624 if (time_before(ifa->ifa_tstamp +
625 ifa->ifa_valid_lft * HZ, next))
626 next = ifa->ifa_tstamp +
627 ifa->ifa_valid_lft * HZ;
629 if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
630 change_needed = true;
631 } else if (time_before(ifa->ifa_tstamp +
632 ifa->ifa_preferred_lft * HZ,
634 next = ifa->ifa_tstamp +
635 ifa->ifa_preferred_lft * HZ;
642 hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
645 if (ifa->ifa_flags & IFA_F_PERMANENT)
648 /* We try to batch several events at once. */
649 age = (now - ifa->ifa_tstamp +
650 ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
652 if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
653 age >= ifa->ifa_valid_lft) {
654 struct in_ifaddr **ifap;
656 for (ifap = &ifa->ifa_dev->ifa_list;
657 *ifap != NULL; ifap = &(*ifap)->ifa_next) {
659 inet_del_ifa(ifa->ifa_dev,
664 } else if (ifa->ifa_preferred_lft !=
665 INFINITY_LIFE_TIME &&
666 age >= ifa->ifa_preferred_lft &&
667 !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
668 ifa->ifa_flags |= IFA_F_DEPRECATED;
669 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
675 next_sec = round_jiffies_up(next);
678 /* If rounded timeout is accurate enough, accept it. */
679 if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
680 next_sched = next_sec;
683 /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
684 if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
685 next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
687 schedule_delayed_work(&check_lifetime_work, next_sched - now);
690 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
693 unsigned long timeout;
695 ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
697 timeout = addrconf_timeout_fixup(valid_lft, HZ);
698 if (addrconf_finite_timeout(timeout))
699 ifa->ifa_valid_lft = timeout;
701 ifa->ifa_flags |= IFA_F_PERMANENT;
703 timeout = addrconf_timeout_fixup(prefered_lft, HZ);
704 if (addrconf_finite_timeout(timeout)) {
706 ifa->ifa_flags |= IFA_F_DEPRECATED;
707 ifa->ifa_preferred_lft = timeout;
709 ifa->ifa_tstamp = jiffies;
710 if (!ifa->ifa_cstamp)
711 ifa->ifa_cstamp = ifa->ifa_tstamp;
714 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
715 __u32 *pvalid_lft, __u32 *pprefered_lft)
717 struct nlattr *tb[IFA_MAX+1];
718 struct in_ifaddr *ifa;
719 struct ifaddrmsg *ifm;
720 struct net_device *dev;
721 struct in_device *in_dev;
724 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
728 ifm = nlmsg_data(nlh);
730 if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
733 dev = __dev_get_by_index(net, ifm->ifa_index);
738 in_dev = __in_dev_get_rtnl(dev);
743 ifa = inet_alloc_ifa();
746 * A potential indev allocation can be left alive, it stays
747 * assigned to its device and is destroy with it.
751 ipv4_devconf_setall(in_dev);
752 neigh_parms_data_state_setall(in_dev->arp_parms);
755 if (tb[IFA_ADDRESS] == NULL)
756 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
758 INIT_HLIST_NODE(&ifa->hash);
759 ifa->ifa_prefixlen = ifm->ifa_prefixlen;
760 ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
761 ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
763 ifa->ifa_scope = ifm->ifa_scope;
764 ifa->ifa_dev = in_dev;
766 ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
767 ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
769 if (tb[IFA_BROADCAST])
770 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
773 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
775 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
777 if (tb[IFA_CACHEINFO]) {
778 struct ifa_cacheinfo *ci;
780 ci = nla_data(tb[IFA_CACHEINFO]);
781 if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
785 *pvalid_lft = ci->ifa_valid;
786 *pprefered_lft = ci->ifa_prefered;
797 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
799 struct in_device *in_dev = ifa->ifa_dev;
800 struct in_ifaddr *ifa1, **ifap;
805 for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
806 ifap = &ifa1->ifa_next) {
807 if (ifa1->ifa_mask == ifa->ifa_mask &&
808 inet_ifa_match(ifa1->ifa_address, ifa) &&
809 ifa1->ifa_local == ifa->ifa_local)
815 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
817 struct net *net = sock_net(skb->sk);
818 struct in_ifaddr *ifa;
819 struct in_ifaddr *ifa_existing;
820 __u32 valid_lft = INFINITY_LIFE_TIME;
821 __u32 prefered_lft = INFINITY_LIFE_TIME;
825 ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
829 ifa_existing = find_matching_ifa(ifa);
831 /* It would be best to check for !NLM_F_CREATE here but
832 * userspace alreay relies on not having to provide this.
834 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
835 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
839 if (nlh->nlmsg_flags & NLM_F_EXCL ||
840 !(nlh->nlmsg_flags & NLM_F_REPLACE))
843 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
844 cancel_delayed_work(&check_lifetime_work);
845 schedule_delayed_work(&check_lifetime_work, 0);
846 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
847 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
853 * Determine a default network mask, based on the IP address.
856 static int inet_abc_len(__be32 addr)
858 int rc = -1; /* Something else, probably a multicast. */
860 if (ipv4_is_zeronet(addr))
863 __u32 haddr = ntohl(addr);
865 if (IN_CLASSA(haddr))
867 else if (IN_CLASSB(haddr))
869 else if (IN_CLASSC(haddr))
877 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
880 struct sockaddr_in sin_orig;
881 struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
882 struct in_device *in_dev;
883 struct in_ifaddr **ifap = NULL;
884 struct in_ifaddr *ifa = NULL;
885 struct net_device *dev;
888 int tryaddrmatch = 0;
891 * Fetch the caller's info block into kernel space
894 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
896 ifr.ifr_name[IFNAMSIZ - 1] = 0;
898 /* save original address for comparison */
899 memcpy(&sin_orig, sin, sizeof(*sin));
901 colon = strchr(ifr.ifr_name, ':');
905 dev_load(net, ifr.ifr_name);
908 case SIOCGIFADDR: /* Get interface address */
909 case SIOCGIFBRDADDR: /* Get the broadcast address */
910 case SIOCGIFDSTADDR: /* Get the destination address */
911 case SIOCGIFNETMASK: /* Get the netmask for the interface */
912 /* Note that these ioctls will not sleep,
913 so that we do not impose a lock.
914 One day we will be forced to put shlock here (I mean SMP)
916 tryaddrmatch = (sin_orig.sin_family == AF_INET);
917 memset(sin, 0, sizeof(*sin));
918 sin->sin_family = AF_INET;
923 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
926 case SIOCSIFADDR: /* Set interface address (and family) */
927 case SIOCSIFBRDADDR: /* Set the broadcast address */
928 case SIOCSIFDSTADDR: /* Set the destination address */
929 case SIOCSIFNETMASK: /* Set the netmask for the interface */
931 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
934 if (sin->sin_family != AF_INET)
945 dev = __dev_get_by_name(net, ifr.ifr_name);
952 in_dev = __in_dev_get_rtnl(dev);
955 /* Matthias Andree */
956 /* compare label and address (4.4BSD style) */
957 /* note: we only do this for a limited set of ioctls
958 and only if the original address family was AF_INET.
959 This is checked above. */
960 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
961 ifap = &ifa->ifa_next) {
962 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
963 sin_orig.sin_addr.s_addr ==
969 /* we didn't get a match, maybe the application is
970 4.3BSD-style and passed in junk so we fall back to
971 comparing just the label */
973 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
974 ifap = &ifa->ifa_next)
975 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
980 ret = -EADDRNOTAVAIL;
981 if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
985 case SIOCGIFADDR: /* Get interface address */
986 sin->sin_addr.s_addr = ifa->ifa_local;
989 case SIOCGIFBRDADDR: /* Get the broadcast address */
990 sin->sin_addr.s_addr = ifa->ifa_broadcast;
993 case SIOCGIFDSTADDR: /* Get the destination address */
994 sin->sin_addr.s_addr = ifa->ifa_address;
997 case SIOCGIFNETMASK: /* Get the netmask for the interface */
998 sin->sin_addr.s_addr = ifa->ifa_mask;
1003 ret = -EADDRNOTAVAIL;
1007 if (!(ifr.ifr_flags & IFF_UP))
1008 inet_del_ifa(in_dev, ifap, 1);
1011 ret = dev_change_flags(dev, ifr.ifr_flags);
1014 case SIOCSIFADDR: /* Set interface address (and family) */
1016 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1021 ifa = inet_alloc_ifa();
1024 INIT_HLIST_NODE(&ifa->hash);
1026 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
1028 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1031 if (ifa->ifa_local == sin->sin_addr.s_addr)
1033 inet_del_ifa(in_dev, ifap, 0);
1034 ifa->ifa_broadcast = 0;
1038 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1040 if (!(dev->flags & IFF_POINTOPOINT)) {
1041 ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1042 ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1043 if ((dev->flags & IFF_BROADCAST) &&
1044 ifa->ifa_prefixlen < 31)
1045 ifa->ifa_broadcast = ifa->ifa_address |
1048 ifa->ifa_prefixlen = 32;
1049 ifa->ifa_mask = inet_make_mask(32);
1051 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1052 ret = inet_set_ifa(dev, ifa);
1055 case SIOCSIFBRDADDR: /* Set the broadcast address */
1057 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1058 inet_del_ifa(in_dev, ifap, 0);
1059 ifa->ifa_broadcast = sin->sin_addr.s_addr;
1060 inet_insert_ifa(ifa);
1064 case SIOCSIFDSTADDR: /* Set the destination address */
1066 if (ifa->ifa_address == sin->sin_addr.s_addr)
1069 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1072 inet_del_ifa(in_dev, ifap, 0);
1073 ifa->ifa_address = sin->sin_addr.s_addr;
1074 inet_insert_ifa(ifa);
1077 case SIOCSIFNETMASK: /* Set the netmask for the interface */
1080 * The mask we set must be legal.
1083 if (bad_mask(sin->sin_addr.s_addr, 0))
1086 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1087 __be32 old_mask = ifa->ifa_mask;
1088 inet_del_ifa(in_dev, ifap, 0);
1089 ifa->ifa_mask = sin->sin_addr.s_addr;
1090 ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1092 /* See if current broadcast address matches
1093 * with current netmask, then recalculate
1094 * the broadcast address. Otherwise it's a
1095 * funny address, so don't touch it since
1096 * the user seems to know what (s)he's doing...
1098 if ((dev->flags & IFF_BROADCAST) &&
1099 (ifa->ifa_prefixlen < 31) &&
1100 (ifa->ifa_broadcast ==
1101 (ifa->ifa_local|~old_mask))) {
1102 ifa->ifa_broadcast = (ifa->ifa_local |
1103 ~sin->sin_addr.s_addr);
1105 inet_insert_ifa(ifa);
1115 ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
1119 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1121 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1122 struct in_ifaddr *ifa;
1129 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1131 done += sizeof(ifr);
1134 if (len < (int) sizeof(ifr))
1136 memset(&ifr, 0, sizeof(struct ifreq));
1137 strcpy(ifr.ifr_name, ifa->ifa_label);
1139 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1140 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1143 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
1147 buf += sizeof(struct ifreq);
1148 len -= sizeof(struct ifreq);
1149 done += sizeof(struct ifreq);
1155 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1158 struct in_device *in_dev;
1159 struct net *net = dev_net(dev);
1162 in_dev = __in_dev_get_rcu(dev);
1166 for_primary_ifa(in_dev) {
1167 if (ifa->ifa_scope > scope)
1169 if (!dst || inet_ifa_match(dst, ifa)) {
1170 addr = ifa->ifa_local;
1174 addr = ifa->ifa_local;
1175 } endfor_ifa(in_dev);
1181 /* Not loopback addresses on loopback should be preferred
1182 in this case. It is importnat that lo is the first interface
1185 for_each_netdev_rcu(net, dev) {
1186 in_dev = __in_dev_get_rcu(dev);
1190 for_primary_ifa(in_dev) {
1191 if (ifa->ifa_scope != RT_SCOPE_LINK &&
1192 ifa->ifa_scope <= scope) {
1193 addr = ifa->ifa_local;
1196 } endfor_ifa(in_dev);
1202 EXPORT_SYMBOL(inet_select_addr);
1204 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1205 __be32 local, int scope)
1212 (local == ifa->ifa_local || !local) &&
1213 ifa->ifa_scope <= scope) {
1214 addr = ifa->ifa_local;
1219 same = (!local || inet_ifa_match(local, ifa)) &&
1220 (!dst || inet_ifa_match(dst, ifa));
1224 /* Is the selected addr into dst subnet? */
1225 if (inet_ifa_match(addr, ifa))
1227 /* No, then can we use new local src? */
1228 if (ifa->ifa_scope <= scope) {
1229 addr = ifa->ifa_local;
1232 /* search for large dst subnet for addr */
1236 } endfor_ifa(in_dev);
1238 return same ? addr : 0;
1242 * Confirm that local IP address exists using wildcards:
1243 * - in_dev: only on this interface, 0=any interface
1244 * - dst: only in the same subnet as dst, 0=any dst
1245 * - local: address, 0=autoselect the local address
1246 * - scope: maximum allowed scope value for the local address
1248 __be32 inet_confirm_addr(struct in_device *in_dev,
1249 __be32 dst, __be32 local, int scope)
1252 struct net_device *dev;
1255 if (scope != RT_SCOPE_LINK)
1256 return confirm_addr_indev(in_dev, dst, local, scope);
1258 net = dev_net(in_dev->dev);
1260 for_each_netdev_rcu(net, dev) {
1261 in_dev = __in_dev_get_rcu(dev);
1263 addr = confirm_addr_indev(in_dev, dst, local, scope);
1272 EXPORT_SYMBOL(inet_confirm_addr);
1278 int register_inetaddr_notifier(struct notifier_block *nb)
1280 return blocking_notifier_chain_register(&inetaddr_chain, nb);
1282 EXPORT_SYMBOL(register_inetaddr_notifier);
1284 int unregister_inetaddr_notifier(struct notifier_block *nb)
1286 return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1288 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1290 /* Rename ifa_labels for a device name change. Make some effort to preserve
1291 * existing alias numbering and to create unique labels if possible.
1293 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1295 struct in_ifaddr *ifa;
1298 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1299 char old[IFNAMSIZ], *dot;
1301 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1302 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1305 dot = strchr(old, ':');
1307 sprintf(old, ":%d", named);
1310 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1311 strcat(ifa->ifa_label, dot);
1313 strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1315 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1319 static bool inetdev_valid_mtu(unsigned int mtu)
1324 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1325 struct in_device *in_dev)
1328 struct in_ifaddr *ifa;
1330 for (ifa = in_dev->ifa_list; ifa;
1331 ifa = ifa->ifa_next) {
1332 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1333 ifa->ifa_local, dev,
1334 ifa->ifa_local, NULL,
1335 dev->dev_addr, NULL);
1339 /* Called only under RTNL semaphore */
1341 static int inetdev_event(struct notifier_block *this, unsigned long event,
1344 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1345 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1350 if (event == NETDEV_REGISTER) {
1351 in_dev = inetdev_init(dev);
1353 return notifier_from_errno(-ENOMEM);
1354 if (dev->flags & IFF_LOOPBACK) {
1355 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1356 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1358 } else if (event == NETDEV_CHANGEMTU) {
1359 /* Re-enabling IP */
1360 if (inetdev_valid_mtu(dev->mtu))
1361 in_dev = inetdev_init(dev);
1367 case NETDEV_REGISTER:
1368 pr_debug("%s: bug\n", __func__);
1369 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1372 if (!inetdev_valid_mtu(dev->mtu))
1374 if (dev->flags & IFF_LOOPBACK) {
1375 struct in_ifaddr *ifa = inet_alloc_ifa();
1378 INIT_HLIST_NODE(&ifa->hash);
1380 ifa->ifa_address = htonl(INADDR_LOOPBACK);
1381 ifa->ifa_prefixlen = 8;
1382 ifa->ifa_mask = inet_make_mask(8);
1383 in_dev_hold(in_dev);
1384 ifa->ifa_dev = in_dev;
1385 ifa->ifa_scope = RT_SCOPE_HOST;
1386 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1387 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1388 INFINITY_LIFE_TIME);
1389 inet_insert_ifa(ifa);
1394 case NETDEV_CHANGEADDR:
1395 if (!IN_DEV_ARP_NOTIFY(in_dev))
1398 case NETDEV_NOTIFY_PEERS:
1399 /* Send gratuitous ARP to notify of link change */
1400 inetdev_send_gratuitous_arp(dev, in_dev);
1405 case NETDEV_PRE_TYPE_CHANGE:
1406 ip_mc_unmap(in_dev);
1408 case NETDEV_POST_TYPE_CHANGE:
1409 ip_mc_remap(in_dev);
1411 case NETDEV_CHANGEMTU:
1412 if (inetdev_valid_mtu(dev->mtu))
1414 /* disable IP when MTU is not enough */
1415 case NETDEV_UNREGISTER:
1416 inetdev_destroy(in_dev);
1418 case NETDEV_CHANGENAME:
1419 /* Do not notify about label change, this event is
1420 * not interesting to applications using netlink.
1422 inetdev_changename(dev, in_dev);
1424 devinet_sysctl_unregister(in_dev);
1425 devinet_sysctl_register(in_dev);
1432 static struct notifier_block ip_netdev_notifier = {
1433 .notifier_call = inetdev_event,
1436 static size_t inet_nlmsg_size(void)
1438 return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1439 + nla_total_size(4) /* IFA_ADDRESS */
1440 + nla_total_size(4) /* IFA_LOCAL */
1441 + nla_total_size(4) /* IFA_BROADCAST */
1442 + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1443 + nla_total_size(4); /* IFA_FLAGS */
1446 static inline u32 cstamp_delta(unsigned long cstamp)
1448 return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1451 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1452 unsigned long tstamp, u32 preferred, u32 valid)
1454 struct ifa_cacheinfo ci;
1456 ci.cstamp = cstamp_delta(cstamp);
1457 ci.tstamp = cstamp_delta(tstamp);
1458 ci.ifa_prefered = preferred;
1459 ci.ifa_valid = valid;
1461 return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1464 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1465 u32 portid, u32 seq, int event, unsigned int flags)
1467 struct ifaddrmsg *ifm;
1468 struct nlmsghdr *nlh;
1469 u32 preferred, valid;
1471 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1475 ifm = nlmsg_data(nlh);
1476 ifm->ifa_family = AF_INET;
1477 ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1478 ifm->ifa_flags = ifa->ifa_flags;
1479 ifm->ifa_scope = ifa->ifa_scope;
1480 ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1482 if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1483 preferred = ifa->ifa_preferred_lft;
1484 valid = ifa->ifa_valid_lft;
1485 if (preferred != INFINITY_LIFE_TIME) {
1486 long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1488 if (preferred > tval)
1492 if (valid != INFINITY_LIFE_TIME) {
1500 preferred = INFINITY_LIFE_TIME;
1501 valid = INFINITY_LIFE_TIME;
1503 if ((ifa->ifa_address &&
1504 nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1506 nla_put_be32(skb, IFA_LOCAL, ifa->ifa_local)) ||
1507 (ifa->ifa_broadcast &&
1508 nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1509 (ifa->ifa_label[0] &&
1510 nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1511 nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1512 put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1514 goto nla_put_failure;
1516 return nlmsg_end(skb, nlh);
1519 nlmsg_cancel(skb, nlh);
1523 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1525 struct net *net = sock_net(skb->sk);
1528 int ip_idx, s_ip_idx;
1529 struct net_device *dev;
1530 struct in_device *in_dev;
1531 struct in_ifaddr *ifa;
1532 struct hlist_head *head;
1535 s_idx = idx = cb->args[1];
1536 s_ip_idx = ip_idx = cb->args[2];
1538 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1540 head = &net->dev_index_head[h];
1542 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1544 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1547 if (h > s_h || idx > s_idx)
1549 in_dev = __in_dev_get_rcu(dev);
1553 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1554 ifa = ifa->ifa_next, ip_idx++) {
1555 if (ip_idx < s_ip_idx)
1557 if (inet_fill_ifaddr(skb, ifa,
1558 NETLINK_CB(cb->skb).portid,
1560 RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1564 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1575 cb->args[2] = ip_idx;
1580 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1583 struct sk_buff *skb;
1584 u32 seq = nlh ? nlh->nlmsg_seq : 0;
1588 net = dev_net(ifa->ifa_dev->dev);
1589 skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1593 err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1595 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1596 WARN_ON(err == -EMSGSIZE);
1600 rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1604 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1607 static size_t inet_get_link_af_size(const struct net_device *dev)
1609 struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1614 return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1617 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1619 struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1626 nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1630 for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1631 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1636 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1637 [IFLA_INET_CONF] = { .type = NLA_NESTED },
1640 static int inet_validate_link_af(const struct net_device *dev,
1641 const struct nlattr *nla)
1643 struct nlattr *a, *tb[IFLA_INET_MAX+1];
1646 if (dev && !__in_dev_get_rtnl(dev))
1647 return -EAFNOSUPPORT;
1649 err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1653 if (tb[IFLA_INET_CONF]) {
1654 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1655 int cfgid = nla_type(a);
1660 if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1668 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1670 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1671 struct nlattr *a, *tb[IFLA_INET_MAX+1];
1675 return -EAFNOSUPPORT;
1677 if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1680 if (tb[IFLA_INET_CONF]) {
1681 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1682 ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1688 static int inet_netconf_msgsize_devconf(int type)
1690 int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1691 + nla_total_size(4); /* NETCONFA_IFINDEX */
1693 /* type -1 is used for ALL */
1694 if (type == -1 || type == NETCONFA_FORWARDING)
1695 size += nla_total_size(4);
1696 if (type == -1 || type == NETCONFA_RP_FILTER)
1697 size += nla_total_size(4);
1698 if (type == -1 || type == NETCONFA_MC_FORWARDING)
1699 size += nla_total_size(4);
1704 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1705 struct ipv4_devconf *devconf, u32 portid,
1706 u32 seq, int event, unsigned int flags,
1709 struct nlmsghdr *nlh;
1710 struct netconfmsg *ncm;
1712 nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1717 ncm = nlmsg_data(nlh);
1718 ncm->ncm_family = AF_INET;
1720 if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1721 goto nla_put_failure;
1723 /* type -1 is used for ALL */
1724 if ((type == -1 || type == NETCONFA_FORWARDING) &&
1725 nla_put_s32(skb, NETCONFA_FORWARDING,
1726 IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1727 goto nla_put_failure;
1728 if ((type == -1 || type == NETCONFA_RP_FILTER) &&
1729 nla_put_s32(skb, NETCONFA_RP_FILTER,
1730 IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1731 goto nla_put_failure;
1732 if ((type == -1 || type == NETCONFA_MC_FORWARDING) &&
1733 nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1734 IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1735 goto nla_put_failure;
1737 return nlmsg_end(skb, nlh);
1740 nlmsg_cancel(skb, nlh);
1744 void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
1745 struct ipv4_devconf *devconf)
1747 struct sk_buff *skb;
1750 skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC);
1754 err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1755 RTM_NEWNETCONF, 0, type);
1757 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1758 WARN_ON(err == -EMSGSIZE);
1762 rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC);
1766 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1769 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1770 [NETCONFA_IFINDEX] = { .len = sizeof(int) },
1771 [NETCONFA_FORWARDING] = { .len = sizeof(int) },
1772 [NETCONFA_RP_FILTER] = { .len = sizeof(int) },
1775 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1776 struct nlmsghdr *nlh)
1778 struct net *net = sock_net(in_skb->sk);
1779 struct nlattr *tb[NETCONFA_MAX+1];
1780 struct netconfmsg *ncm;
1781 struct sk_buff *skb;
1782 struct ipv4_devconf *devconf;
1783 struct in_device *in_dev;
1784 struct net_device *dev;
1788 err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1789 devconf_ipv4_policy);
1794 if (!tb[NETCONFA_IFINDEX])
1797 ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1799 case NETCONFA_IFINDEX_ALL:
1800 devconf = net->ipv4.devconf_all;
1802 case NETCONFA_IFINDEX_DEFAULT:
1803 devconf = net->ipv4.devconf_dflt;
1806 dev = __dev_get_by_index(net, ifindex);
1809 in_dev = __in_dev_get_rtnl(dev);
1812 devconf = &in_dev->cnf;
1817 skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_ATOMIC);
1821 err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1822 NETLINK_CB(in_skb).portid,
1823 nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1826 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1827 WARN_ON(err == -EMSGSIZE);
1831 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1836 static int inet_netconf_dump_devconf(struct sk_buff *skb,
1837 struct netlink_callback *cb)
1839 struct net *net = sock_net(skb->sk);
1842 struct net_device *dev;
1843 struct in_device *in_dev;
1844 struct hlist_head *head;
1847 s_idx = idx = cb->args[1];
1849 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1851 head = &net->dev_index_head[h];
1853 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1855 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1858 in_dev = __in_dev_get_rcu(dev);
1862 if (inet_netconf_fill_devconf(skb, dev->ifindex,
1864 NETLINK_CB(cb->skb).portid,
1872 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1878 if (h == NETDEV_HASHENTRIES) {
1879 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
1880 net->ipv4.devconf_all,
1881 NETLINK_CB(cb->skb).portid,
1883 RTM_NEWNETCONF, NLM_F_MULTI,
1889 if (h == NETDEV_HASHENTRIES + 1) {
1890 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
1891 net->ipv4.devconf_dflt,
1892 NETLINK_CB(cb->skb).portid,
1894 RTM_NEWNETCONF, NLM_F_MULTI,
1907 #ifdef CONFIG_SYSCTL
1909 static void devinet_copy_dflt_conf(struct net *net, int i)
1911 struct net_device *dev;
1914 for_each_netdev_rcu(net, dev) {
1915 struct in_device *in_dev;
1917 in_dev = __in_dev_get_rcu(dev);
1918 if (in_dev && !test_bit(i, in_dev->cnf.state))
1919 in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1924 /* called with RTNL locked */
1925 static void inet_forward_change(struct net *net)
1927 struct net_device *dev;
1928 int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1930 IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1931 IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1932 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1933 NETCONFA_IFINDEX_ALL,
1934 net->ipv4.devconf_all);
1935 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1936 NETCONFA_IFINDEX_DEFAULT,
1937 net->ipv4.devconf_dflt);
1939 for_each_netdev(net, dev) {
1940 struct in_device *in_dev;
1942 dev_disable_lro(dev);
1944 in_dev = __in_dev_get_rcu(dev);
1946 IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1947 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1948 dev->ifindex, &in_dev->cnf);
1954 static int devinet_conf_proc(struct ctl_table *ctl, int write,
1955 void __user *buffer,
1956 size_t *lenp, loff_t *ppos)
1958 int old_value = *(int *)ctl->data;
1959 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1960 int new_value = *(int *)ctl->data;
1963 struct ipv4_devconf *cnf = ctl->extra1;
1964 struct net *net = ctl->extra2;
1965 int i = (int *)ctl->data - cnf->data;
1967 set_bit(i, cnf->state);
1969 if (cnf == net->ipv4.devconf_dflt)
1970 devinet_copy_dflt_conf(net, i);
1971 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
1972 i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
1973 if ((new_value == 0) && (old_value != 0))
1974 rt_cache_flush(net);
1975 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
1976 new_value != old_value) {
1979 if (cnf == net->ipv4.devconf_dflt)
1980 ifindex = NETCONFA_IFINDEX_DEFAULT;
1981 else if (cnf == net->ipv4.devconf_all)
1982 ifindex = NETCONFA_IFINDEX_ALL;
1984 struct in_device *idev =
1985 container_of(cnf, struct in_device,
1987 ifindex = idev->dev->ifindex;
1989 inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
1997 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
1998 void __user *buffer,
1999 size_t *lenp, loff_t *ppos)
2001 int *valp = ctl->data;
2004 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2006 if (write && *valp != val) {
2007 struct net *net = ctl->extra2;
2009 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2010 if (!rtnl_trylock()) {
2011 /* Restore the original values before restarting */
2014 return restart_syscall();
2016 if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2017 inet_forward_change(net);
2019 struct ipv4_devconf *cnf = ctl->extra1;
2020 struct in_device *idev =
2021 container_of(cnf, struct in_device, cnf);
2023 dev_disable_lro(idev->dev);
2024 inet_netconf_notify_devconf(net,
2025 NETCONFA_FORWARDING,
2030 rt_cache_flush(net);
2032 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2033 NETCONFA_IFINDEX_DEFAULT,
2034 net->ipv4.devconf_dflt);
2040 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2041 void __user *buffer,
2042 size_t *lenp, loff_t *ppos)
2044 int *valp = ctl->data;
2046 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2047 struct net *net = ctl->extra2;
2049 if (write && *valp != val)
2050 rt_cache_flush(net);
2055 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2058 .data = ipv4_devconf.data + \
2059 IPV4_DEVCONF_ ## attr - 1, \
2060 .maxlen = sizeof(int), \
2062 .proc_handler = proc, \
2063 .extra1 = &ipv4_devconf, \
2066 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2067 DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2069 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2070 DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2072 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2073 DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2075 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2076 DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2078 static struct devinet_sysctl_table {
2079 struct ctl_table_header *sysctl_header;
2080 struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2081 } devinet_sysctl = {
2083 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2084 devinet_sysctl_forward),
2085 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2087 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2088 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2089 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2090 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2091 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2092 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2093 "accept_source_route"),
2094 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2095 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2096 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2097 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2098 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2099 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2100 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2101 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2102 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2103 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2104 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2105 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2106 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2107 DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2108 "force_igmp_version"),
2109 DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2110 "igmpv2_unsolicited_report_interval"),
2111 DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2112 "igmpv3_unsolicited_report_interval"),
2114 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2115 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2116 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2117 "promote_secondaries"),
2118 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2123 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2124 struct ipv4_devconf *p)
2127 struct devinet_sysctl_table *t;
2128 char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2130 t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2134 for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2135 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2136 t->devinet_vars[i].extra1 = p;
2137 t->devinet_vars[i].extra2 = net;
2140 snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2142 t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2143 if (!t->sysctl_header)
2155 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
2157 struct devinet_sysctl_table *t = cnf->sysctl;
2163 unregister_net_sysctl_table(t->sysctl_header);
2167 static void devinet_sysctl_register(struct in_device *idev)
2169 neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2170 __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2174 static void devinet_sysctl_unregister(struct in_device *idev)
2176 __devinet_sysctl_unregister(&idev->cnf);
2177 neigh_sysctl_unregister(idev->arp_parms);
2180 static struct ctl_table ctl_forward_entry[] = {
2182 .procname = "ip_forward",
2183 .data = &ipv4_devconf.data[
2184 IPV4_DEVCONF_FORWARDING - 1],
2185 .maxlen = sizeof(int),
2187 .proc_handler = devinet_sysctl_forward,
2188 .extra1 = &ipv4_devconf,
2189 .extra2 = &init_net,
2195 static __net_init int devinet_init_net(struct net *net)
2198 struct ipv4_devconf *all, *dflt;
2199 #ifdef CONFIG_SYSCTL
2200 struct ctl_table *tbl = ctl_forward_entry;
2201 struct ctl_table_header *forw_hdr;
2205 all = &ipv4_devconf;
2206 dflt = &ipv4_devconf_dflt;
2208 if (!net_eq(net, &init_net)) {
2209 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2213 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2215 goto err_alloc_dflt;
2217 #ifdef CONFIG_SYSCTL
2218 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2222 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2223 tbl[0].extra1 = all;
2224 tbl[0].extra2 = net;
2228 #ifdef CONFIG_SYSCTL
2229 err = __devinet_sysctl_register(net, "all", all);
2233 err = __devinet_sysctl_register(net, "default", dflt);
2238 forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2239 if (forw_hdr == NULL)
2241 net->ipv4.forw_hdr = forw_hdr;
2244 net->ipv4.devconf_all = all;
2245 net->ipv4.devconf_dflt = dflt;
2248 #ifdef CONFIG_SYSCTL
2250 __devinet_sysctl_unregister(dflt);
2252 __devinet_sysctl_unregister(all);
2254 if (tbl != ctl_forward_entry)
2258 if (dflt != &ipv4_devconf_dflt)
2261 if (all != &ipv4_devconf)
2267 static __net_exit void devinet_exit_net(struct net *net)
2269 #ifdef CONFIG_SYSCTL
2270 struct ctl_table *tbl;
2272 tbl = net->ipv4.forw_hdr->ctl_table_arg;
2273 unregister_net_sysctl_table(net->ipv4.forw_hdr);
2274 __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
2275 __devinet_sysctl_unregister(net->ipv4.devconf_all);
2278 kfree(net->ipv4.devconf_dflt);
2279 kfree(net->ipv4.devconf_all);
2282 static __net_initdata struct pernet_operations devinet_ops = {
2283 .init = devinet_init_net,
2284 .exit = devinet_exit_net,
2287 static struct rtnl_af_ops inet_af_ops = {
2289 .fill_link_af = inet_fill_link_af,
2290 .get_link_af_size = inet_get_link_af_size,
2291 .validate_link_af = inet_validate_link_af,
2292 .set_link_af = inet_set_link_af,
2295 void __init devinet_init(void)
2299 for (i = 0; i < IN4_ADDR_HSIZE; i++)
2300 INIT_HLIST_HEAD(&inet_addr_lst[i]);
2302 register_pernet_subsys(&devinet_ops);
2304 register_gifconf(PF_INET, inet_gifconf);
2305 register_netdevice_notifier(&ip_netdev_notifier);
2307 schedule_delayed_work(&check_lifetime_work, 0);
2309 rtnl_af_register(&inet_af_ops);
2311 rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
2312 rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
2313 rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
2314 rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2315 inet_netconf_dump_devconf, NULL);