2 * NET3 IP device support routines.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Derived from the IP parts of dev.c 1.0.19
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Mark Evans, <evansmp@uhura.aston.ac.uk>
15 * Alan Cox, <gw4pts@gw4pts.ampr.org>
16 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
19 * Alexey Kuznetsov: pa_* fields are replaced with ifaddr
21 * Cyrus Durgin: updated for kmod
22 * Matthias Andree: in devinet_ioctl, compare label and
23 * address (4.4BSD alias style support),
24 * fall back to comparing just the label
29 #include <asm/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/string.h>
37 #include <linux/socket.h>
38 #include <linux/sockios.h>
40 #include <linux/errno.h>
41 #include <linux/interrupt.h>
42 #include <linux/if_addr.h>
43 #include <linux/if_ether.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/etherdevice.h>
47 #include <linux/skbuff.h>
48 #include <linux/init.h>
49 #include <linux/notifier.h>
50 #include <linux/inetdevice.h>
51 #include <linux/igmp.h>
52 #include <linux/slab.h>
53 #include <linux/hash.h>
55 #include <linux/sysctl.h>
57 #include <linux/kmod.h>
58 #include <linux/netconf.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 #include <net/net_namespace.h>
66 #include <net/addrconf.h>
68 #include "fib_lookup.h"
70 static struct ipv4_devconf ipv4_devconf = {
72 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
77 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] = 1000 /*ms*/,
81 static struct ipv4_devconf ipv4_devconf_dflt = {
83 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
84 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
85 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
86 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
87 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
88 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
89 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] = 1000 /*ms*/,
93 #define IPV4_DEVCONF_DFLT(net, attr) \
94 IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
96 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
97 [IFA_LOCAL] = { .type = NLA_U32 },
98 [IFA_ADDRESS] = { .type = NLA_U32 },
99 [IFA_BROADCAST] = { .type = NLA_U32 },
100 [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
101 [IFA_CACHEINFO] = { .len = sizeof(struct ifa_cacheinfo) },
102 [IFA_FLAGS] = { .type = NLA_U32 },
105 #define IN4_ADDR_HSIZE_SHIFT 8
106 #define IN4_ADDR_HSIZE (1U << IN4_ADDR_HSIZE_SHIFT)
108 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
110 static u32 inet_addr_hash(struct net *net, __be32 addr)
112 u32 val = (__force u32) addr ^ net_hash_mix(net);
114 return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
117 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
119 u32 hash = inet_addr_hash(net, ifa->ifa_local);
122 hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
125 static void inet_hash_remove(struct in_ifaddr *ifa)
128 hlist_del_init_rcu(&ifa->hash);
132 * __ip_dev_find - find the first device with a given source address.
133 * @net: the net namespace
134 * @addr: the source address
135 * @devref: if true, take a reference on the found device
137 * If a caller uses devref=false, it should be protected by RCU, or RTNL
139 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
141 u32 hash = inet_addr_hash(net, addr);
142 struct net_device *result = NULL;
143 struct in_ifaddr *ifa;
146 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) {
147 if (ifa->ifa_local == addr) {
148 struct net_device *dev = ifa->ifa_dev->dev;
150 if (!net_eq(dev_net(dev), net))
157 struct flowi4 fl4 = { .daddr = addr };
158 struct fib_result res = { 0 };
159 struct fib_table *local;
161 /* Fallback to FIB local table so that communication
162 * over loopback subnets work.
164 local = fib_get_table(net, RT_TABLE_LOCAL);
166 !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
167 res.type == RTN_LOCAL)
168 result = FIB_RES_DEV(res);
170 if (result && devref)
175 EXPORT_SYMBOL(__ip_dev_find);
177 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
179 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
180 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
183 static int devinet_sysctl_register(struct in_device *idev);
184 static void devinet_sysctl_unregister(struct in_device *idev);
186 static int devinet_sysctl_register(struct in_device *idev)
190 static void devinet_sysctl_unregister(struct in_device *idev)
195 /* Locks all the inet devices. */
197 static struct in_ifaddr *inet_alloc_ifa(void)
199 return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
202 static void inet_rcu_free_ifa(struct rcu_head *head)
204 struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
206 in_dev_put(ifa->ifa_dev);
210 static void inet_free_ifa(struct in_ifaddr *ifa)
212 call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
215 void in_dev_finish_destroy(struct in_device *idev)
217 struct net_device *dev = idev->dev;
219 WARN_ON(idev->ifa_list);
220 WARN_ON(idev->mc_list);
221 kfree(rcu_dereference_protected(idev->mc_hash, 1));
222 #ifdef NET_REFCNT_DEBUG
223 pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
227 pr_err("Freeing alive in_device %p\n", idev);
231 EXPORT_SYMBOL(in_dev_finish_destroy);
233 static struct in_device *inetdev_init(struct net_device *dev)
235 struct in_device *in_dev;
240 in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
243 memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
244 sizeof(in_dev->cnf));
245 in_dev->cnf.sysctl = NULL;
247 in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
248 if (!in_dev->arp_parms)
250 if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
251 dev_disable_lro(dev);
252 /* Reference in_dev->dev */
254 /* Account for reference dev->ip_ptr (below) */
257 err = devinet_sysctl_register(in_dev);
264 ip_mc_init_dev(in_dev);
265 if (dev->flags & IFF_UP)
268 /* we can receive as soon as ip_ptr is set -- do this last */
269 rcu_assign_pointer(dev->ip_ptr, in_dev);
271 return in_dev ?: ERR_PTR(err);
278 static void in_dev_rcu_put(struct rcu_head *head)
280 struct in_device *idev = container_of(head, struct in_device, rcu_head);
284 static void inetdev_destroy(struct in_device *in_dev)
286 struct in_ifaddr *ifa;
287 struct net_device *dev;
295 ip_mc_destroy_dev(in_dev);
297 while ((ifa = in_dev->ifa_list) != NULL) {
298 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
302 RCU_INIT_POINTER(dev->ip_ptr, NULL);
304 devinet_sysctl_unregister(in_dev);
305 neigh_parms_release(&arp_tbl, in_dev->arp_parms);
308 call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
311 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
314 for_primary_ifa(in_dev) {
315 if (inet_ifa_match(a, ifa)) {
316 if (!b || inet_ifa_match(b, ifa)) {
321 } endfor_ifa(in_dev);
326 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
327 int destroy, struct nlmsghdr *nlh, u32 portid)
329 struct in_ifaddr *promote = NULL;
330 struct in_ifaddr *ifa, *ifa1 = *ifap;
331 struct in_ifaddr *last_prim = in_dev->ifa_list;
332 struct in_ifaddr *prev_prom = NULL;
333 int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
337 /* 1. Deleting primary ifaddr forces deletion all secondaries
338 * unless alias promotion is set
341 if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
342 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
344 while ((ifa = *ifap1) != NULL) {
345 if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
346 ifa1->ifa_scope <= ifa->ifa_scope)
349 if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
350 ifa1->ifa_mask != ifa->ifa_mask ||
351 !inet_ifa_match(ifa1->ifa_address, ifa)) {
352 ifap1 = &ifa->ifa_next;
358 inet_hash_remove(ifa);
359 *ifap1 = ifa->ifa_next;
361 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
362 blocking_notifier_call_chain(&inetaddr_chain,
372 /* On promotion all secondaries from subnet are changing
373 * the primary IP, we must remove all their routes silently
374 * and later to add them back with new prefsrc. Do this
375 * while all addresses are on the device list.
377 for (ifa = promote; ifa; ifa = ifa->ifa_next) {
378 if (ifa1->ifa_mask == ifa->ifa_mask &&
379 inet_ifa_match(ifa1->ifa_address, ifa))
380 fib_del_ifaddr(ifa, ifa1);
385 *ifap = ifa1->ifa_next;
386 inet_hash_remove(ifa1);
388 /* 3. Announce address deletion */
390 /* Send message first, then call notifier.
391 At first sight, FIB update triggered by notifier
392 will refer to already deleted ifaddr, that could confuse
393 netlink listeners. It is not true: look, gated sees
394 that route deleted and if it still thinks that ifaddr
395 is valid, it will try to restore deleted routes... Grr.
396 So that, this order is correct.
398 rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
399 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
402 struct in_ifaddr *next_sec = promote->ifa_next;
405 prev_prom->ifa_next = promote->ifa_next;
406 promote->ifa_next = last_prim->ifa_next;
407 last_prim->ifa_next = promote;
410 promote->ifa_flags &= ~IFA_F_SECONDARY;
411 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
412 blocking_notifier_call_chain(&inetaddr_chain,
414 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
415 if (ifa1->ifa_mask != ifa->ifa_mask ||
416 !inet_ifa_match(ifa1->ifa_address, ifa))
426 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
429 __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
432 static void check_lifetime(struct work_struct *work);
434 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
436 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
439 struct in_device *in_dev = ifa->ifa_dev;
440 struct in_ifaddr *ifa1, **ifap, **last_primary;
444 if (!ifa->ifa_local) {
449 ifa->ifa_flags &= ~IFA_F_SECONDARY;
450 last_primary = &in_dev->ifa_list;
452 for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
453 ifap = &ifa1->ifa_next) {
454 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
455 ifa->ifa_scope <= ifa1->ifa_scope)
456 last_primary = &ifa1->ifa_next;
457 if (ifa1->ifa_mask == ifa->ifa_mask &&
458 inet_ifa_match(ifa1->ifa_address, ifa)) {
459 if (ifa1->ifa_local == ifa->ifa_local) {
463 if (ifa1->ifa_scope != ifa->ifa_scope) {
467 ifa->ifa_flags |= IFA_F_SECONDARY;
471 if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
472 prandom_seed((__force u32) ifa->ifa_local);
476 ifa->ifa_next = *ifap;
479 inet_hash_insert(dev_net(in_dev->dev), ifa);
481 cancel_delayed_work(&check_lifetime_work);
482 queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
484 /* Send message first, then call notifier.
485 Notifier will trigger FIB update, so that
486 listeners of netlink will know about new ifaddr */
487 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
488 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
493 static int inet_insert_ifa(struct in_ifaddr *ifa)
495 return __inet_insert_ifa(ifa, NULL, 0);
498 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
500 struct in_device *in_dev = __in_dev_get_rtnl(dev);
508 ipv4_devconf_setall(in_dev);
509 neigh_parms_data_state_setall(in_dev->arp_parms);
510 if (ifa->ifa_dev != in_dev) {
511 WARN_ON(ifa->ifa_dev);
513 ifa->ifa_dev = in_dev;
515 if (ipv4_is_loopback(ifa->ifa_local))
516 ifa->ifa_scope = RT_SCOPE_HOST;
517 return inet_insert_ifa(ifa);
520 /* Caller must hold RCU or RTNL :
521 * We dont take a reference on found in_device
523 struct in_device *inetdev_by_index(struct net *net, int ifindex)
525 struct net_device *dev;
526 struct in_device *in_dev = NULL;
529 dev = dev_get_by_index_rcu(net, ifindex);
531 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
535 EXPORT_SYMBOL(inetdev_by_index);
537 /* Called only from RTNL semaphored context. No locks. */
539 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
544 for_primary_ifa(in_dev) {
545 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
547 } endfor_ifa(in_dev);
551 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
553 struct net *net = sock_net(skb->sk);
554 struct nlattr *tb[IFA_MAX+1];
555 struct in_device *in_dev;
556 struct ifaddrmsg *ifm;
557 struct in_ifaddr *ifa, **ifap;
562 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
566 ifm = nlmsg_data(nlh);
567 in_dev = inetdev_by_index(net, ifm->ifa_index);
568 if (in_dev == NULL) {
573 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
574 ifap = &ifa->ifa_next) {
576 ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
579 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
582 if (tb[IFA_ADDRESS] &&
583 (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
584 !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
587 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
591 err = -EADDRNOTAVAIL;
596 #define INFINITY_LIFE_TIME 0xFFFFFFFF
598 static void check_lifetime(struct work_struct *work)
600 unsigned long now, next, next_sec, next_sched;
601 struct in_ifaddr *ifa;
602 struct hlist_node *n;
606 next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
608 for (i = 0; i < IN4_ADDR_HSIZE; i++) {
609 bool change_needed = false;
612 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
615 if (ifa->ifa_flags & IFA_F_PERMANENT)
618 /* We try to batch several events at once. */
619 age = (now - ifa->ifa_tstamp +
620 ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
622 if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
623 age >= ifa->ifa_valid_lft) {
624 change_needed = true;
625 } else if (ifa->ifa_preferred_lft ==
626 INFINITY_LIFE_TIME) {
628 } else if (age >= ifa->ifa_preferred_lft) {
629 if (time_before(ifa->ifa_tstamp +
630 ifa->ifa_valid_lft * HZ, next))
631 next = ifa->ifa_tstamp +
632 ifa->ifa_valid_lft * HZ;
634 if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
635 change_needed = true;
636 } else if (time_before(ifa->ifa_tstamp +
637 ifa->ifa_preferred_lft * HZ,
639 next = ifa->ifa_tstamp +
640 ifa->ifa_preferred_lft * HZ;
647 hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
650 if (ifa->ifa_flags & IFA_F_PERMANENT)
653 /* We try to batch several events at once. */
654 age = (now - ifa->ifa_tstamp +
655 ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
657 if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
658 age >= ifa->ifa_valid_lft) {
659 struct in_ifaddr **ifap;
661 for (ifap = &ifa->ifa_dev->ifa_list;
662 *ifap != NULL; ifap = &(*ifap)->ifa_next) {
664 inet_del_ifa(ifa->ifa_dev,
669 } else if (ifa->ifa_preferred_lft !=
670 INFINITY_LIFE_TIME &&
671 age >= ifa->ifa_preferred_lft &&
672 !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
673 ifa->ifa_flags |= IFA_F_DEPRECATED;
674 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
680 next_sec = round_jiffies_up(next);
683 /* If rounded timeout is accurate enough, accept it. */
684 if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
685 next_sched = next_sec;
688 /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
689 if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
690 next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
692 queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
696 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
699 unsigned long timeout;
701 ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
703 timeout = addrconf_timeout_fixup(valid_lft, HZ);
704 if (addrconf_finite_timeout(timeout))
705 ifa->ifa_valid_lft = timeout;
707 ifa->ifa_flags |= IFA_F_PERMANENT;
709 timeout = addrconf_timeout_fixup(prefered_lft, HZ);
710 if (addrconf_finite_timeout(timeout)) {
712 ifa->ifa_flags |= IFA_F_DEPRECATED;
713 ifa->ifa_preferred_lft = timeout;
715 ifa->ifa_tstamp = jiffies;
716 if (!ifa->ifa_cstamp)
717 ifa->ifa_cstamp = ifa->ifa_tstamp;
720 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
721 __u32 *pvalid_lft, __u32 *pprefered_lft)
723 struct nlattr *tb[IFA_MAX+1];
724 struct in_ifaddr *ifa;
725 struct ifaddrmsg *ifm;
726 struct net_device *dev;
727 struct in_device *in_dev;
730 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
734 ifm = nlmsg_data(nlh);
736 if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
739 dev = __dev_get_by_index(net, ifm->ifa_index);
744 in_dev = __in_dev_get_rtnl(dev);
749 ifa = inet_alloc_ifa();
752 * A potential indev allocation can be left alive, it stays
753 * assigned to its device and is destroy with it.
757 ipv4_devconf_setall(in_dev);
758 neigh_parms_data_state_setall(in_dev->arp_parms);
761 if (tb[IFA_ADDRESS] == NULL)
762 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
764 INIT_HLIST_NODE(&ifa->hash);
765 ifa->ifa_prefixlen = ifm->ifa_prefixlen;
766 ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
767 ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
769 ifa->ifa_scope = ifm->ifa_scope;
770 ifa->ifa_dev = in_dev;
772 ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
773 ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
775 if (tb[IFA_BROADCAST])
776 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
779 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
781 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
783 if (tb[IFA_CACHEINFO]) {
784 struct ifa_cacheinfo *ci;
786 ci = nla_data(tb[IFA_CACHEINFO]);
787 if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
791 *pvalid_lft = ci->ifa_valid;
792 *pprefered_lft = ci->ifa_prefered;
803 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
805 struct in_device *in_dev = ifa->ifa_dev;
806 struct in_ifaddr *ifa1, **ifap;
811 for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
812 ifap = &ifa1->ifa_next) {
813 if (ifa1->ifa_mask == ifa->ifa_mask &&
814 inet_ifa_match(ifa1->ifa_address, ifa) &&
815 ifa1->ifa_local == ifa->ifa_local)
821 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
823 struct net *net = sock_net(skb->sk);
824 struct in_ifaddr *ifa;
825 struct in_ifaddr *ifa_existing;
826 __u32 valid_lft = INFINITY_LIFE_TIME;
827 __u32 prefered_lft = INFINITY_LIFE_TIME;
831 ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
835 ifa_existing = find_matching_ifa(ifa);
837 /* It would be best to check for !NLM_F_CREATE here but
838 * userspace already relies on not having to provide this.
840 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
841 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
845 if (nlh->nlmsg_flags & NLM_F_EXCL ||
846 !(nlh->nlmsg_flags & NLM_F_REPLACE))
849 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
850 cancel_delayed_work(&check_lifetime_work);
851 queue_delayed_work(system_power_efficient_wq,
852 &check_lifetime_work, 0);
853 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
854 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
860 * Determine a default network mask, based on the IP address.
863 static int inet_abc_len(__be32 addr)
865 int rc = -1; /* Something else, probably a multicast. */
867 if (ipv4_is_zeronet(addr))
870 __u32 haddr = ntohl(addr);
872 if (IN_CLASSA(haddr))
874 else if (IN_CLASSB(haddr))
876 else if (IN_CLASSC(haddr))
884 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
887 struct sockaddr_in sin_orig;
888 struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
889 struct in_device *in_dev;
890 struct in_ifaddr **ifap = NULL;
891 struct in_ifaddr *ifa = NULL;
892 struct net_device *dev;
895 int tryaddrmatch = 0;
898 * Fetch the caller's info block into kernel space
901 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
903 ifr.ifr_name[IFNAMSIZ - 1] = 0;
905 /* save original address for comparison */
906 memcpy(&sin_orig, sin, sizeof(*sin));
908 colon = strchr(ifr.ifr_name, ':');
912 dev_load(net, ifr.ifr_name);
915 case SIOCGIFADDR: /* Get interface address */
916 case SIOCGIFBRDADDR: /* Get the broadcast address */
917 case SIOCGIFDSTADDR: /* Get the destination address */
918 case SIOCGIFNETMASK: /* Get the netmask for the interface */
919 /* Note that these ioctls will not sleep,
920 so that we do not impose a lock.
921 One day we will be forced to put shlock here (I mean SMP)
923 tryaddrmatch = (sin_orig.sin_family == AF_INET);
924 memset(sin, 0, sizeof(*sin));
925 sin->sin_family = AF_INET;
930 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
933 case SIOCSIFADDR: /* Set interface address (and family) */
934 case SIOCSIFBRDADDR: /* Set the broadcast address */
935 case SIOCSIFDSTADDR: /* Set the destination address */
936 case SIOCSIFNETMASK: /* Set the netmask for the interface */
938 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
941 if (sin->sin_family != AF_INET)
952 dev = __dev_get_by_name(net, ifr.ifr_name);
959 in_dev = __in_dev_get_rtnl(dev);
962 /* Matthias Andree */
963 /* compare label and address (4.4BSD style) */
964 /* note: we only do this for a limited set of ioctls
965 and only if the original address family was AF_INET.
966 This is checked above. */
967 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
968 ifap = &ifa->ifa_next) {
969 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
970 sin_orig.sin_addr.s_addr ==
976 /* we didn't get a match, maybe the application is
977 4.3BSD-style and passed in junk so we fall back to
978 comparing just the label */
980 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
981 ifap = &ifa->ifa_next)
982 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
987 ret = -EADDRNOTAVAIL;
988 if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
992 case SIOCGIFADDR: /* Get interface address */
993 sin->sin_addr.s_addr = ifa->ifa_local;
996 case SIOCGIFBRDADDR: /* Get the broadcast address */
997 sin->sin_addr.s_addr = ifa->ifa_broadcast;
1000 case SIOCGIFDSTADDR: /* Get the destination address */
1001 sin->sin_addr.s_addr = ifa->ifa_address;
1004 case SIOCGIFNETMASK: /* Get the netmask for the interface */
1005 sin->sin_addr.s_addr = ifa->ifa_mask;
1010 ret = -EADDRNOTAVAIL;
1014 if (!(ifr.ifr_flags & IFF_UP))
1015 inet_del_ifa(in_dev, ifap, 1);
1018 ret = dev_change_flags(dev, ifr.ifr_flags);
1021 case SIOCSIFADDR: /* Set interface address (and family) */
1023 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1028 ifa = inet_alloc_ifa();
1031 INIT_HLIST_NODE(&ifa->hash);
1033 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
1035 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1038 if (ifa->ifa_local == sin->sin_addr.s_addr)
1040 inet_del_ifa(in_dev, ifap, 0);
1041 ifa->ifa_broadcast = 0;
1045 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1047 if (!(dev->flags & IFF_POINTOPOINT)) {
1048 ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1049 ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1050 if ((dev->flags & IFF_BROADCAST) &&
1051 ifa->ifa_prefixlen < 31)
1052 ifa->ifa_broadcast = ifa->ifa_address |
1055 ifa->ifa_prefixlen = 32;
1056 ifa->ifa_mask = inet_make_mask(32);
1058 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1059 ret = inet_set_ifa(dev, ifa);
1062 case SIOCSIFBRDADDR: /* Set the broadcast address */
1064 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1065 inet_del_ifa(in_dev, ifap, 0);
1066 ifa->ifa_broadcast = sin->sin_addr.s_addr;
1067 inet_insert_ifa(ifa);
1071 case SIOCSIFDSTADDR: /* Set the destination address */
1073 if (ifa->ifa_address == sin->sin_addr.s_addr)
1076 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1079 inet_del_ifa(in_dev, ifap, 0);
1080 ifa->ifa_address = sin->sin_addr.s_addr;
1081 inet_insert_ifa(ifa);
1084 case SIOCSIFNETMASK: /* Set the netmask for the interface */
1087 * The mask we set must be legal.
1090 if (bad_mask(sin->sin_addr.s_addr, 0))
1093 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1094 __be32 old_mask = ifa->ifa_mask;
1095 inet_del_ifa(in_dev, ifap, 0);
1096 ifa->ifa_mask = sin->sin_addr.s_addr;
1097 ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1099 /* See if current broadcast address matches
1100 * with current netmask, then recalculate
1101 * the broadcast address. Otherwise it's a
1102 * funny address, so don't touch it since
1103 * the user seems to know what (s)he's doing...
1105 if ((dev->flags & IFF_BROADCAST) &&
1106 (ifa->ifa_prefixlen < 31) &&
1107 (ifa->ifa_broadcast ==
1108 (ifa->ifa_local|~old_mask))) {
1109 ifa->ifa_broadcast = (ifa->ifa_local |
1110 ~sin->sin_addr.s_addr);
1112 inet_insert_ifa(ifa);
1122 ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
1126 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1128 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1129 struct in_ifaddr *ifa;
1136 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1138 done += sizeof(ifr);
1141 if (len < (int) sizeof(ifr))
1143 memset(&ifr, 0, sizeof(struct ifreq));
1144 strcpy(ifr.ifr_name, ifa->ifa_label);
1146 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1147 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1150 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
1154 buf += sizeof(struct ifreq);
1155 len -= sizeof(struct ifreq);
1156 done += sizeof(struct ifreq);
1162 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1165 struct in_device *in_dev;
1166 struct net *net = dev_net(dev);
1169 in_dev = __in_dev_get_rcu(dev);
1173 for_primary_ifa(in_dev) {
1174 if (ifa->ifa_scope > scope)
1176 if (!dst || inet_ifa_match(dst, ifa)) {
1177 addr = ifa->ifa_local;
1181 addr = ifa->ifa_local;
1182 } endfor_ifa(in_dev);
1188 /* Not loopback addresses on loopback should be preferred
1189 in this case. It is important that lo is the first interface
1192 for_each_netdev_rcu(net, dev) {
1193 in_dev = __in_dev_get_rcu(dev);
1197 for_primary_ifa(in_dev) {
1198 if (ifa->ifa_scope != RT_SCOPE_LINK &&
1199 ifa->ifa_scope <= scope) {
1200 addr = ifa->ifa_local;
1203 } endfor_ifa(in_dev);
1209 EXPORT_SYMBOL(inet_select_addr);
1211 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1212 __be32 local, int scope)
1219 (local == ifa->ifa_local || !local) &&
1220 ifa->ifa_scope <= scope) {
1221 addr = ifa->ifa_local;
1226 same = (!local || inet_ifa_match(local, ifa)) &&
1227 (!dst || inet_ifa_match(dst, ifa));
1231 /* Is the selected addr into dst subnet? */
1232 if (inet_ifa_match(addr, ifa))
1234 /* No, then can we use new local src? */
1235 if (ifa->ifa_scope <= scope) {
1236 addr = ifa->ifa_local;
1239 /* search for large dst subnet for addr */
1243 } endfor_ifa(in_dev);
1245 return same ? addr : 0;
1249 * Confirm that local IP address exists using wildcards:
1250 * - net: netns to check, cannot be NULL
1251 * - in_dev: only on this interface, NULL=any interface
1252 * - dst: only in the same subnet as dst, 0=any dst
1253 * - local: address, 0=autoselect the local address
1254 * - scope: maximum allowed scope value for the local address
1256 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1257 __be32 dst, __be32 local, int scope)
1260 struct net_device *dev;
1263 return confirm_addr_indev(in_dev, dst, local, scope);
1266 for_each_netdev_rcu(net, dev) {
1267 in_dev = __in_dev_get_rcu(dev);
1269 addr = confirm_addr_indev(in_dev, dst, local, scope);
1278 EXPORT_SYMBOL(inet_confirm_addr);
1284 int register_inetaddr_notifier(struct notifier_block *nb)
1286 return blocking_notifier_chain_register(&inetaddr_chain, nb);
1288 EXPORT_SYMBOL(register_inetaddr_notifier);
1290 int unregister_inetaddr_notifier(struct notifier_block *nb)
1292 return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1294 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1296 /* Rename ifa_labels for a device name change. Make some effort to preserve
1297 * existing alias numbering and to create unique labels if possible.
1299 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1301 struct in_ifaddr *ifa;
1304 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1305 char old[IFNAMSIZ], *dot;
1307 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1308 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1311 dot = strchr(old, ':');
1313 sprintf(old, ":%d", named);
1316 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1317 strcat(ifa->ifa_label, dot);
1319 strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1321 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1325 static bool inetdev_valid_mtu(unsigned int mtu)
1330 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1331 struct in_device *in_dev)
1334 struct in_ifaddr *ifa;
1336 for (ifa = in_dev->ifa_list; ifa;
1337 ifa = ifa->ifa_next) {
1338 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1339 ifa->ifa_local, dev,
1340 ifa->ifa_local, NULL,
1341 dev->dev_addr, NULL);
1345 /* Called only under RTNL semaphore */
1347 static int inetdev_event(struct notifier_block *this, unsigned long event,
1350 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1351 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1356 if (event == NETDEV_REGISTER) {
1357 in_dev = inetdev_init(dev);
1359 return notifier_from_errno(PTR_ERR(in_dev));
1360 if (dev->flags & IFF_LOOPBACK) {
1361 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1362 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1364 } else if (event == NETDEV_CHANGEMTU) {
1365 /* Re-enabling IP */
1366 if (inetdev_valid_mtu(dev->mtu))
1367 in_dev = inetdev_init(dev);
1373 case NETDEV_REGISTER:
1374 pr_debug("%s: bug\n", __func__);
1375 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1378 if (!inetdev_valid_mtu(dev->mtu))
1380 if (dev->flags & IFF_LOOPBACK) {
1381 struct in_ifaddr *ifa = inet_alloc_ifa();
1384 INIT_HLIST_NODE(&ifa->hash);
1386 ifa->ifa_address = htonl(INADDR_LOOPBACK);
1387 ifa->ifa_prefixlen = 8;
1388 ifa->ifa_mask = inet_make_mask(8);
1389 in_dev_hold(in_dev);
1390 ifa->ifa_dev = in_dev;
1391 ifa->ifa_scope = RT_SCOPE_HOST;
1392 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1393 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1394 INFINITY_LIFE_TIME);
1395 ipv4_devconf_setall(in_dev);
1396 neigh_parms_data_state_setall(in_dev->arp_parms);
1397 inet_insert_ifa(ifa);
1402 case NETDEV_CHANGEADDR:
1403 if (!IN_DEV_ARP_NOTIFY(in_dev))
1406 case NETDEV_NOTIFY_PEERS:
1407 /* Send gratuitous ARP to notify of link change */
1408 inetdev_send_gratuitous_arp(dev, in_dev);
1413 case NETDEV_PRE_TYPE_CHANGE:
1414 ip_mc_unmap(in_dev);
1416 case NETDEV_POST_TYPE_CHANGE:
1417 ip_mc_remap(in_dev);
1419 case NETDEV_CHANGEMTU:
1420 if (inetdev_valid_mtu(dev->mtu))
1422 /* disable IP when MTU is not enough */
1423 case NETDEV_UNREGISTER:
1424 inetdev_destroy(in_dev);
1426 case NETDEV_CHANGENAME:
1427 /* Do not notify about label change, this event is
1428 * not interesting to applications using netlink.
1430 inetdev_changename(dev, in_dev);
1432 devinet_sysctl_unregister(in_dev);
1433 devinet_sysctl_register(in_dev);
1440 static struct notifier_block ip_netdev_notifier = {
1441 .notifier_call = inetdev_event,
1444 static size_t inet_nlmsg_size(void)
1446 return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1447 + nla_total_size(4) /* IFA_ADDRESS */
1448 + nla_total_size(4) /* IFA_LOCAL */
1449 + nla_total_size(4) /* IFA_BROADCAST */
1450 + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1451 + nla_total_size(4) /* IFA_FLAGS */
1452 + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1455 static inline u32 cstamp_delta(unsigned long cstamp)
1457 return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1460 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1461 unsigned long tstamp, u32 preferred, u32 valid)
1463 struct ifa_cacheinfo ci;
1465 ci.cstamp = cstamp_delta(cstamp);
1466 ci.tstamp = cstamp_delta(tstamp);
1467 ci.ifa_prefered = preferred;
1468 ci.ifa_valid = valid;
1470 return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1473 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1474 u32 portid, u32 seq, int event, unsigned int flags)
1476 struct ifaddrmsg *ifm;
1477 struct nlmsghdr *nlh;
1478 u32 preferred, valid;
1480 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1484 ifm = nlmsg_data(nlh);
1485 ifm->ifa_family = AF_INET;
1486 ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1487 ifm->ifa_flags = ifa->ifa_flags;
1488 ifm->ifa_scope = ifa->ifa_scope;
1489 ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1491 if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1492 preferred = ifa->ifa_preferred_lft;
1493 valid = ifa->ifa_valid_lft;
1494 if (preferred != INFINITY_LIFE_TIME) {
1495 long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1497 if (preferred > tval)
1501 if (valid != INFINITY_LIFE_TIME) {
1509 preferred = INFINITY_LIFE_TIME;
1510 valid = INFINITY_LIFE_TIME;
1512 if ((ifa->ifa_address &&
1513 nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1515 nla_put_be32(skb, IFA_LOCAL, ifa->ifa_local)) ||
1516 (ifa->ifa_broadcast &&
1517 nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1518 (ifa->ifa_label[0] &&
1519 nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1520 nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1521 put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1523 goto nla_put_failure;
1525 nlmsg_end(skb, nlh);
1529 nlmsg_cancel(skb, nlh);
1533 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1535 struct net *net = sock_net(skb->sk);
1538 int ip_idx, s_ip_idx;
1539 struct net_device *dev;
1540 struct in_device *in_dev;
1541 struct in_ifaddr *ifa;
1542 struct hlist_head *head;
1545 s_idx = idx = cb->args[1];
1546 s_ip_idx = ip_idx = cb->args[2];
1548 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1550 head = &net->dev_index_head[h];
1552 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1554 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1557 if (h > s_h || idx > s_idx)
1559 in_dev = __in_dev_get_rcu(dev);
1563 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1564 ifa = ifa->ifa_next, ip_idx++) {
1565 if (ip_idx < s_ip_idx)
1567 if (inet_fill_ifaddr(skb, ifa,
1568 NETLINK_CB(cb->skb).portid,
1570 RTM_NEWADDR, NLM_F_MULTI) < 0) {
1574 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1585 cb->args[2] = ip_idx;
1590 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1593 struct sk_buff *skb;
1594 u32 seq = nlh ? nlh->nlmsg_seq : 0;
1598 net = dev_net(ifa->ifa_dev->dev);
1599 skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1603 err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1605 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1606 WARN_ON(err == -EMSGSIZE);
1610 rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1614 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1617 static size_t inet_get_link_af_size(const struct net_device *dev)
1619 struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1624 return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1627 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1629 struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1636 nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1640 for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1641 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1646 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1647 [IFLA_INET_CONF] = { .type = NLA_NESTED },
1650 static int inet_validate_link_af(const struct net_device *dev,
1651 const struct nlattr *nla)
1653 struct nlattr *a, *tb[IFLA_INET_MAX+1];
1656 if (dev && !__in_dev_get_rtnl(dev))
1657 return -EAFNOSUPPORT;
1659 err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1663 if (tb[IFLA_INET_CONF]) {
1664 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1665 int cfgid = nla_type(a);
1670 if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1678 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1680 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1681 struct nlattr *a, *tb[IFLA_INET_MAX+1];
1685 return -EAFNOSUPPORT;
1687 if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1690 if (tb[IFLA_INET_CONF]) {
1691 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1692 ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1698 static int inet_netconf_msgsize_devconf(int type)
1700 int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1701 + nla_total_size(4); /* NETCONFA_IFINDEX */
1703 /* type -1 is used for ALL */
1704 if (type == -1 || type == NETCONFA_FORWARDING)
1705 size += nla_total_size(4);
1706 if (type == -1 || type == NETCONFA_RP_FILTER)
1707 size += nla_total_size(4);
1708 if (type == -1 || type == NETCONFA_MC_FORWARDING)
1709 size += nla_total_size(4);
1710 if (type == -1 || type == NETCONFA_PROXY_NEIGH)
1711 size += nla_total_size(4);
1716 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1717 struct ipv4_devconf *devconf, u32 portid,
1718 u32 seq, int event, unsigned int flags,
1721 struct nlmsghdr *nlh;
1722 struct netconfmsg *ncm;
1724 nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1729 ncm = nlmsg_data(nlh);
1730 ncm->ncm_family = AF_INET;
1732 if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1733 goto nla_put_failure;
1735 /* type -1 is used for ALL */
1736 if ((type == -1 || type == NETCONFA_FORWARDING) &&
1737 nla_put_s32(skb, NETCONFA_FORWARDING,
1738 IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1739 goto nla_put_failure;
1740 if ((type == -1 || type == NETCONFA_RP_FILTER) &&
1741 nla_put_s32(skb, NETCONFA_RP_FILTER,
1742 IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1743 goto nla_put_failure;
1744 if ((type == -1 || type == NETCONFA_MC_FORWARDING) &&
1745 nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1746 IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1747 goto nla_put_failure;
1748 if ((type == -1 || type == NETCONFA_PROXY_NEIGH) &&
1749 nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
1750 IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
1751 goto nla_put_failure;
1753 nlmsg_end(skb, nlh);
1757 nlmsg_cancel(skb, nlh);
1761 void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
1762 struct ipv4_devconf *devconf)
1764 struct sk_buff *skb;
1767 skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC);
1771 err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1772 RTM_NEWNETCONF, 0, type);
1774 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1775 WARN_ON(err == -EMSGSIZE);
1779 rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC);
1783 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1786 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1787 [NETCONFA_IFINDEX] = { .len = sizeof(int) },
1788 [NETCONFA_FORWARDING] = { .len = sizeof(int) },
1789 [NETCONFA_RP_FILTER] = { .len = sizeof(int) },
1790 [NETCONFA_PROXY_NEIGH] = { .len = sizeof(int) },
1793 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1794 struct nlmsghdr *nlh)
1796 struct net *net = sock_net(in_skb->sk);
1797 struct nlattr *tb[NETCONFA_MAX+1];
1798 struct netconfmsg *ncm;
1799 struct sk_buff *skb;
1800 struct ipv4_devconf *devconf;
1801 struct in_device *in_dev;
1802 struct net_device *dev;
1806 err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1807 devconf_ipv4_policy);
1812 if (!tb[NETCONFA_IFINDEX])
1815 ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1817 case NETCONFA_IFINDEX_ALL:
1818 devconf = net->ipv4.devconf_all;
1820 case NETCONFA_IFINDEX_DEFAULT:
1821 devconf = net->ipv4.devconf_dflt;
1824 dev = __dev_get_by_index(net, ifindex);
1827 in_dev = __in_dev_get_rtnl(dev);
1830 devconf = &in_dev->cnf;
1835 skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_ATOMIC);
1839 err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1840 NETLINK_CB(in_skb).portid,
1841 nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1844 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1845 WARN_ON(err == -EMSGSIZE);
1849 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1854 static int inet_netconf_dump_devconf(struct sk_buff *skb,
1855 struct netlink_callback *cb)
1857 struct net *net = sock_net(skb->sk);
1860 struct net_device *dev;
1861 struct in_device *in_dev;
1862 struct hlist_head *head;
1865 s_idx = idx = cb->args[1];
1867 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1869 head = &net->dev_index_head[h];
1871 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1873 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1876 in_dev = __in_dev_get_rcu(dev);
1880 if (inet_netconf_fill_devconf(skb, dev->ifindex,
1882 NETLINK_CB(cb->skb).portid,
1890 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1896 if (h == NETDEV_HASHENTRIES) {
1897 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
1898 net->ipv4.devconf_all,
1899 NETLINK_CB(cb->skb).portid,
1901 RTM_NEWNETCONF, NLM_F_MULTI,
1907 if (h == NETDEV_HASHENTRIES + 1) {
1908 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
1909 net->ipv4.devconf_dflt,
1910 NETLINK_CB(cb->skb).portid,
1912 RTM_NEWNETCONF, NLM_F_MULTI,
1925 #ifdef CONFIG_SYSCTL
1927 static void devinet_copy_dflt_conf(struct net *net, int i)
1929 struct net_device *dev;
1932 for_each_netdev_rcu(net, dev) {
1933 struct in_device *in_dev;
1935 in_dev = __in_dev_get_rcu(dev);
1936 if (in_dev && !test_bit(i, in_dev->cnf.state))
1937 in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1942 /* called with RTNL locked */
1943 static void inet_forward_change(struct net *net)
1945 struct net_device *dev;
1946 int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1948 IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1949 IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1950 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1951 NETCONFA_IFINDEX_ALL,
1952 net->ipv4.devconf_all);
1953 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1954 NETCONFA_IFINDEX_DEFAULT,
1955 net->ipv4.devconf_dflt);
1957 for_each_netdev(net, dev) {
1958 struct in_device *in_dev;
1960 dev_disable_lro(dev);
1962 in_dev = __in_dev_get_rcu(dev);
1964 IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1965 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1966 dev->ifindex, &in_dev->cnf);
1972 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
1974 if (cnf == net->ipv4.devconf_dflt)
1975 return NETCONFA_IFINDEX_DEFAULT;
1976 else if (cnf == net->ipv4.devconf_all)
1977 return NETCONFA_IFINDEX_ALL;
1979 struct in_device *idev
1980 = container_of(cnf, struct in_device, cnf);
1981 return idev->dev->ifindex;
1985 static int devinet_conf_proc(struct ctl_table *ctl, int write,
1986 void __user *buffer,
1987 size_t *lenp, loff_t *ppos)
1989 int old_value = *(int *)ctl->data;
1990 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1991 int new_value = *(int *)ctl->data;
1994 struct ipv4_devconf *cnf = ctl->extra1;
1995 struct net *net = ctl->extra2;
1996 int i = (int *)ctl->data - cnf->data;
1999 set_bit(i, cnf->state);
2001 if (cnf == net->ipv4.devconf_dflt)
2002 devinet_copy_dflt_conf(net, i);
2003 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2004 i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2005 if ((new_value == 0) && (old_value != 0))
2006 rt_cache_flush(net);
2008 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2009 new_value != old_value) {
2010 ifindex = devinet_conf_ifindex(net, cnf);
2011 inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
2014 if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2015 new_value != old_value) {
2016 ifindex = devinet_conf_ifindex(net, cnf);
2017 inet_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH,
2025 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2026 void __user *buffer,
2027 size_t *lenp, loff_t *ppos)
2029 int *valp = ctl->data;
2032 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2034 if (write && *valp != val) {
2035 struct net *net = ctl->extra2;
2037 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2038 if (!rtnl_trylock()) {
2039 /* Restore the original values before restarting */
2042 return restart_syscall();
2044 if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2045 inet_forward_change(net);
2047 struct ipv4_devconf *cnf = ctl->extra1;
2048 struct in_device *idev =
2049 container_of(cnf, struct in_device, cnf);
2051 dev_disable_lro(idev->dev);
2052 inet_netconf_notify_devconf(net,
2053 NETCONFA_FORWARDING,
2058 rt_cache_flush(net);
2060 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2061 NETCONFA_IFINDEX_DEFAULT,
2062 net->ipv4.devconf_dflt);
2068 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2069 void __user *buffer,
2070 size_t *lenp, loff_t *ppos)
2072 int *valp = ctl->data;
2074 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2075 struct net *net = ctl->extra2;
2077 if (write && *valp != val)
2078 rt_cache_flush(net);
2083 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2086 .data = ipv4_devconf.data + \
2087 IPV4_DEVCONF_ ## attr - 1, \
2088 .maxlen = sizeof(int), \
2090 .proc_handler = proc, \
2091 .extra1 = &ipv4_devconf, \
2094 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2095 DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2097 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2098 DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2100 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2101 DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2103 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2104 DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2106 static struct devinet_sysctl_table {
2107 struct ctl_table_header *sysctl_header;
2108 struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2109 } devinet_sysctl = {
2111 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2112 devinet_sysctl_forward),
2113 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2115 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2116 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2117 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2118 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2119 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2120 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2121 "accept_source_route"),
2122 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2123 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2124 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2125 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2126 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2127 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2128 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2129 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2130 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2131 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2132 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2133 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2134 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2135 DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2136 "force_igmp_version"),
2137 DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2138 "igmpv2_unsolicited_report_interval"),
2139 DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2140 "igmpv3_unsolicited_report_interval"),
2142 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2143 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2144 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2145 "promote_secondaries"),
2146 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2151 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2152 struct ipv4_devconf *p)
2155 struct devinet_sysctl_table *t;
2156 char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2158 t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2162 for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2163 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2164 t->devinet_vars[i].extra1 = p;
2165 t->devinet_vars[i].extra2 = net;
2168 snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2170 t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2171 if (!t->sysctl_header)
2183 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
2185 struct devinet_sysctl_table *t = cnf->sysctl;
2191 unregister_net_sysctl_table(t->sysctl_header);
2195 static int devinet_sysctl_register(struct in_device *idev)
2199 if (!sysctl_dev_name_is_allowed(idev->dev->name))
2202 err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2205 err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2208 neigh_sysctl_unregister(idev->arp_parms);
2212 static void devinet_sysctl_unregister(struct in_device *idev)
2214 __devinet_sysctl_unregister(&idev->cnf);
2215 neigh_sysctl_unregister(idev->arp_parms);
2218 static struct ctl_table ctl_forward_entry[] = {
2220 .procname = "ip_forward",
2221 .data = &ipv4_devconf.data[
2222 IPV4_DEVCONF_FORWARDING - 1],
2223 .maxlen = sizeof(int),
2225 .proc_handler = devinet_sysctl_forward,
2226 .extra1 = &ipv4_devconf,
2227 .extra2 = &init_net,
2233 static __net_init int devinet_init_net(struct net *net)
2236 struct ipv4_devconf *all, *dflt;
2237 #ifdef CONFIG_SYSCTL
2238 struct ctl_table *tbl = ctl_forward_entry;
2239 struct ctl_table_header *forw_hdr;
2243 all = &ipv4_devconf;
2244 dflt = &ipv4_devconf_dflt;
2246 if (!net_eq(net, &init_net)) {
2247 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2251 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2253 goto err_alloc_dflt;
2255 #ifdef CONFIG_SYSCTL
2256 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2260 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2261 tbl[0].extra1 = all;
2262 tbl[0].extra2 = net;
2266 #ifdef CONFIG_SYSCTL
2267 err = __devinet_sysctl_register(net, "all", all);
2271 err = __devinet_sysctl_register(net, "default", dflt);
2276 forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2277 if (forw_hdr == NULL)
2279 net->ipv4.forw_hdr = forw_hdr;
2282 net->ipv4.devconf_all = all;
2283 net->ipv4.devconf_dflt = dflt;
2286 #ifdef CONFIG_SYSCTL
2288 __devinet_sysctl_unregister(dflt);
2290 __devinet_sysctl_unregister(all);
2292 if (tbl != ctl_forward_entry)
2296 if (dflt != &ipv4_devconf_dflt)
2299 if (all != &ipv4_devconf)
2305 static __net_exit void devinet_exit_net(struct net *net)
2307 #ifdef CONFIG_SYSCTL
2308 struct ctl_table *tbl;
2310 tbl = net->ipv4.forw_hdr->ctl_table_arg;
2311 unregister_net_sysctl_table(net->ipv4.forw_hdr);
2312 __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
2313 __devinet_sysctl_unregister(net->ipv4.devconf_all);
2316 kfree(net->ipv4.devconf_dflt);
2317 kfree(net->ipv4.devconf_all);
2320 static __net_initdata struct pernet_operations devinet_ops = {
2321 .init = devinet_init_net,
2322 .exit = devinet_exit_net,
2325 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2327 .fill_link_af = inet_fill_link_af,
2328 .get_link_af_size = inet_get_link_af_size,
2329 .validate_link_af = inet_validate_link_af,
2330 .set_link_af = inet_set_link_af,
2333 void __init devinet_init(void)
2337 for (i = 0; i < IN4_ADDR_HSIZE; i++)
2338 INIT_HLIST_HEAD(&inet_addr_lst[i]);
2340 register_pernet_subsys(&devinet_ops);
2342 register_gifconf(PF_INET, inet_gifconf);
2343 register_netdevice_notifier(&ip_netdev_notifier);
2345 queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2347 rtnl_af_register(&inet_af_ops);
2349 rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
2350 rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
2351 rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
2352 rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2353 inet_netconf_dump_devconf, NULL);