2 * NET3 IP device support routines.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Derived from the IP parts of dev.c 1.0.19
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Mark Evans, <evansmp@uhura.aston.ac.uk>
15 * Alan Cox, <gw4pts@gw4pts.ampr.org>
16 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
19 * Alexey Kuznetsov: pa_* fields are replaced with ifaddr
21 * Cyrus Durgin: updated for kmod
22 * Matthias Andree: in devinet_ioctl, compare label and
23 * address (4.4BSD alias style support),
24 * fall back to comparing just the label
29 #include <asm/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/string.h>
37 #include <linux/socket.h>
38 #include <linux/sockios.h>
40 #include <linux/errno.h>
41 #include <linux/interrupt.h>
42 #include <linux/if_addr.h>
43 #include <linux/if_ether.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/etherdevice.h>
47 #include <linux/skbuff.h>
48 #include <linux/init.h>
49 #include <linux/notifier.h>
50 #include <linux/inetdevice.h>
51 #include <linux/igmp.h>
52 #include <linux/slab.h>
53 #include <linux/hash.h>
55 #include <linux/sysctl.h>
57 #include <linux/kmod.h>
58 #include <linux/netconf.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 #include <net/net_namespace.h>
66 #include <net/addrconf.h>
68 #include "fib_lookup.h"
70 static struct ipv4_devconf ipv4_devconf = {
72 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
77 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] = 1000 /*ms*/,
81 static struct ipv4_devconf ipv4_devconf_dflt = {
83 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
84 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
85 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
86 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
87 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
88 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
89 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] = 1000 /*ms*/,
93 #define IPV4_DEVCONF_DFLT(net, attr) \
94 IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
96 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
97 [IFA_LOCAL] = { .type = NLA_U32 },
98 [IFA_ADDRESS] = { .type = NLA_U32 },
99 [IFA_BROADCAST] = { .type = NLA_U32 },
100 [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
101 [IFA_CACHEINFO] = { .len = sizeof(struct ifa_cacheinfo) },
104 #define IN4_ADDR_HSIZE_SHIFT 8
105 #define IN4_ADDR_HSIZE (1U << IN4_ADDR_HSIZE_SHIFT)
107 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
108 static DEFINE_SPINLOCK(inet_addr_hash_lock);
110 static u32 inet_addr_hash(struct net *net, __be32 addr)
112 u32 val = (__force u32) addr ^ net_hash_mix(net);
114 return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
117 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
119 u32 hash = inet_addr_hash(net, ifa->ifa_local);
121 spin_lock(&inet_addr_hash_lock);
122 hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
123 spin_unlock(&inet_addr_hash_lock);
126 static void inet_hash_remove(struct in_ifaddr *ifa)
128 spin_lock(&inet_addr_hash_lock);
129 hlist_del_init_rcu(&ifa->hash);
130 spin_unlock(&inet_addr_hash_lock);
134 * __ip_dev_find - find the first device with a given source address.
135 * @net: the net namespace
136 * @addr: the source address
137 * @devref: if true, take a reference on the found device
139 * If a caller uses devref=false, it should be protected by RCU, or RTNL
141 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
143 u32 hash = inet_addr_hash(net, addr);
144 struct net_device *result = NULL;
145 struct in_ifaddr *ifa;
148 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) {
149 if (ifa->ifa_local == addr) {
150 struct net_device *dev = ifa->ifa_dev->dev;
152 if (!net_eq(dev_net(dev), net))
159 struct flowi4 fl4 = { .daddr = addr };
160 struct fib_result res = { 0 };
161 struct fib_table *local;
163 /* Fallback to FIB local table so that communication
164 * over loopback subnets work.
166 local = fib_get_table(net, RT_TABLE_LOCAL);
168 !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
169 res.type == RTN_LOCAL)
170 result = FIB_RES_DEV(res);
172 if (result && devref)
177 EXPORT_SYMBOL(__ip_dev_find);
179 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
181 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
182 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
185 static void devinet_sysctl_register(struct in_device *idev);
186 static void devinet_sysctl_unregister(struct in_device *idev);
188 static void devinet_sysctl_register(struct in_device *idev)
191 static void devinet_sysctl_unregister(struct in_device *idev)
196 /* Locks all the inet devices. */
198 static struct in_ifaddr *inet_alloc_ifa(void)
200 return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
203 static void inet_rcu_free_ifa(struct rcu_head *head)
205 struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
207 in_dev_put(ifa->ifa_dev);
211 static void inet_free_ifa(struct in_ifaddr *ifa)
213 call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
216 void in_dev_finish_destroy(struct in_device *idev)
218 struct net_device *dev = idev->dev;
220 WARN_ON(idev->ifa_list);
221 WARN_ON(idev->mc_list);
222 kfree(rcu_dereference_protected(idev->mc_hash, 1));
223 #ifdef NET_REFCNT_DEBUG
224 pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
228 pr_err("Freeing alive in_device %p\n", idev);
232 EXPORT_SYMBOL(in_dev_finish_destroy);
234 static struct in_device *inetdev_init(struct net_device *dev)
236 struct in_device *in_dev;
240 in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
243 memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
244 sizeof(in_dev->cnf));
245 in_dev->cnf.sysctl = NULL;
247 in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
248 if (!in_dev->arp_parms)
250 if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
251 dev_disable_lro(dev);
252 /* Reference in_dev->dev */
254 /* Account for reference dev->ip_ptr (below) */
257 devinet_sysctl_register(in_dev);
258 ip_mc_init_dev(in_dev);
259 if (dev->flags & IFF_UP)
262 /* we can receive as soon as ip_ptr is set -- do this last */
263 rcu_assign_pointer(dev->ip_ptr, in_dev);
272 static void in_dev_rcu_put(struct rcu_head *head)
274 struct in_device *idev = container_of(head, struct in_device, rcu_head);
278 static void inetdev_destroy(struct in_device *in_dev)
280 struct in_ifaddr *ifa;
281 struct net_device *dev;
289 ip_mc_destroy_dev(in_dev);
291 while ((ifa = in_dev->ifa_list) != NULL) {
292 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
296 RCU_INIT_POINTER(dev->ip_ptr, NULL);
298 devinet_sysctl_unregister(in_dev);
299 neigh_parms_release(&arp_tbl, in_dev->arp_parms);
302 call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
305 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
308 for_primary_ifa(in_dev) {
309 if (inet_ifa_match(a, ifa)) {
310 if (!b || inet_ifa_match(b, ifa)) {
315 } endfor_ifa(in_dev);
320 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
321 int destroy, struct nlmsghdr *nlh, u32 portid)
323 struct in_ifaddr *promote = NULL;
324 struct in_ifaddr *ifa, *ifa1 = *ifap;
325 struct in_ifaddr *last_prim = in_dev->ifa_list;
326 struct in_ifaddr *prev_prom = NULL;
327 int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
331 /* 1. Deleting primary ifaddr forces deletion all secondaries
332 * unless alias promotion is set
335 if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
336 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
338 while ((ifa = *ifap1) != NULL) {
339 if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
340 ifa1->ifa_scope <= ifa->ifa_scope)
343 if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
344 ifa1->ifa_mask != ifa->ifa_mask ||
345 !inet_ifa_match(ifa1->ifa_address, ifa)) {
346 ifap1 = &ifa->ifa_next;
352 inet_hash_remove(ifa);
353 *ifap1 = ifa->ifa_next;
355 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
356 blocking_notifier_call_chain(&inetaddr_chain,
366 /* On promotion all secondaries from subnet are changing
367 * the primary IP, we must remove all their routes silently
368 * and later to add them back with new prefsrc. Do this
369 * while all addresses are on the device list.
371 for (ifa = promote; ifa; ifa = ifa->ifa_next) {
372 if (ifa1->ifa_mask == ifa->ifa_mask &&
373 inet_ifa_match(ifa1->ifa_address, ifa))
374 fib_del_ifaddr(ifa, ifa1);
379 *ifap = ifa1->ifa_next;
380 inet_hash_remove(ifa1);
382 /* 3. Announce address deletion */
384 /* Send message first, then call notifier.
385 At first sight, FIB update triggered by notifier
386 will refer to already deleted ifaddr, that could confuse
387 netlink listeners. It is not true: look, gated sees
388 that route deleted and if it still thinks that ifaddr
389 is valid, it will try to restore deleted routes... Grr.
390 So that, this order is correct.
392 rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
393 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
396 struct in_ifaddr *next_sec = promote->ifa_next;
399 prev_prom->ifa_next = promote->ifa_next;
400 promote->ifa_next = last_prim->ifa_next;
401 last_prim->ifa_next = promote;
404 promote->ifa_flags &= ~IFA_F_SECONDARY;
405 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
406 blocking_notifier_call_chain(&inetaddr_chain,
408 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
409 if (ifa1->ifa_mask != ifa->ifa_mask ||
410 !inet_ifa_match(ifa1->ifa_address, ifa))
420 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
423 __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
426 static void check_lifetime(struct work_struct *work);
428 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
430 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
433 struct in_device *in_dev = ifa->ifa_dev;
434 struct in_ifaddr *ifa1, **ifap, **last_primary;
438 if (!ifa->ifa_local) {
443 ifa->ifa_flags &= ~IFA_F_SECONDARY;
444 last_primary = &in_dev->ifa_list;
446 for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
447 ifap = &ifa1->ifa_next) {
448 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
449 ifa->ifa_scope <= ifa1->ifa_scope)
450 last_primary = &ifa1->ifa_next;
451 if (ifa1->ifa_mask == ifa->ifa_mask &&
452 inet_ifa_match(ifa1->ifa_address, ifa)) {
453 if (ifa1->ifa_local == ifa->ifa_local) {
457 if (ifa1->ifa_scope != ifa->ifa_scope) {
461 ifa->ifa_flags |= IFA_F_SECONDARY;
465 if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
466 net_srandom(ifa->ifa_local);
470 ifa->ifa_next = *ifap;
473 inet_hash_insert(dev_net(in_dev->dev), ifa);
475 cancel_delayed_work(&check_lifetime_work);
476 schedule_delayed_work(&check_lifetime_work, 0);
478 /* Send message first, then call notifier.
479 Notifier will trigger FIB update, so that
480 listeners of netlink will know about new ifaddr */
481 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
482 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
487 static int inet_insert_ifa(struct in_ifaddr *ifa)
489 return __inet_insert_ifa(ifa, NULL, 0);
492 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
494 struct in_device *in_dev = __in_dev_get_rtnl(dev);
502 ipv4_devconf_setall(in_dev);
503 if (ifa->ifa_dev != in_dev) {
504 WARN_ON(ifa->ifa_dev);
506 ifa->ifa_dev = in_dev;
508 if (ipv4_is_loopback(ifa->ifa_local))
509 ifa->ifa_scope = RT_SCOPE_HOST;
510 return inet_insert_ifa(ifa);
513 /* Caller must hold RCU or RTNL :
514 * We dont take a reference on found in_device
516 struct in_device *inetdev_by_index(struct net *net, int ifindex)
518 struct net_device *dev;
519 struct in_device *in_dev = NULL;
522 dev = dev_get_by_index_rcu(net, ifindex);
524 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
528 EXPORT_SYMBOL(inetdev_by_index);
530 /* Called only from RTNL semaphored context. No locks. */
532 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
537 for_primary_ifa(in_dev) {
538 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
540 } endfor_ifa(in_dev);
544 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
546 struct net *net = sock_net(skb->sk);
547 struct nlattr *tb[IFA_MAX+1];
548 struct in_device *in_dev;
549 struct ifaddrmsg *ifm;
550 struct in_ifaddr *ifa, **ifap;
555 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
559 ifm = nlmsg_data(nlh);
560 in_dev = inetdev_by_index(net, ifm->ifa_index);
561 if (in_dev == NULL) {
566 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
567 ifap = &ifa->ifa_next) {
569 ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
572 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
575 if (tb[IFA_ADDRESS] &&
576 (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
577 !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
580 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
584 err = -EADDRNOTAVAIL;
589 #define INFINITY_LIFE_TIME 0xFFFFFFFF
591 static void check_lifetime(struct work_struct *work)
593 unsigned long now, next, next_sec, next_sched;
594 struct in_ifaddr *ifa;
595 struct hlist_node *n;
599 next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
601 for (i = 0; i < IN4_ADDR_HSIZE; i++) {
602 bool change_needed = false;
605 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
608 if (ifa->ifa_flags & IFA_F_PERMANENT)
611 /* We try to batch several events at once. */
612 age = (now - ifa->ifa_tstamp +
613 ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
615 if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
616 age >= ifa->ifa_valid_lft) {
617 change_needed = true;
618 } else if (ifa->ifa_preferred_lft ==
619 INFINITY_LIFE_TIME) {
621 } else if (age >= ifa->ifa_preferred_lft) {
622 if (time_before(ifa->ifa_tstamp +
623 ifa->ifa_valid_lft * HZ, next))
624 next = ifa->ifa_tstamp +
625 ifa->ifa_valid_lft * HZ;
627 if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
628 change_needed = true;
629 } else if (time_before(ifa->ifa_tstamp +
630 ifa->ifa_preferred_lft * HZ,
632 next = ifa->ifa_tstamp +
633 ifa->ifa_preferred_lft * HZ;
640 hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
643 if (ifa->ifa_flags & IFA_F_PERMANENT)
646 /* We try to batch several events at once. */
647 age = (now - ifa->ifa_tstamp +
648 ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
650 if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
651 age >= ifa->ifa_valid_lft) {
652 struct in_ifaddr **ifap;
654 for (ifap = &ifa->ifa_dev->ifa_list;
655 *ifap != NULL; ifap = &(*ifap)->ifa_next) {
657 inet_del_ifa(ifa->ifa_dev,
662 } else if (ifa->ifa_preferred_lft !=
663 INFINITY_LIFE_TIME &&
664 age >= ifa->ifa_preferred_lft &&
665 !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
666 ifa->ifa_flags |= IFA_F_DEPRECATED;
667 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
673 next_sec = round_jiffies_up(next);
676 /* If rounded timeout is accurate enough, accept it. */
677 if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
678 next_sched = next_sec;
681 /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
682 if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
683 next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
685 schedule_delayed_work(&check_lifetime_work, next_sched - now);
688 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
691 unsigned long timeout;
693 ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
695 timeout = addrconf_timeout_fixup(valid_lft, HZ);
696 if (addrconf_finite_timeout(timeout))
697 ifa->ifa_valid_lft = timeout;
699 ifa->ifa_flags |= IFA_F_PERMANENT;
701 timeout = addrconf_timeout_fixup(prefered_lft, HZ);
702 if (addrconf_finite_timeout(timeout)) {
704 ifa->ifa_flags |= IFA_F_DEPRECATED;
705 ifa->ifa_preferred_lft = timeout;
707 ifa->ifa_tstamp = jiffies;
708 if (!ifa->ifa_cstamp)
709 ifa->ifa_cstamp = ifa->ifa_tstamp;
712 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
713 __u32 *pvalid_lft, __u32 *pprefered_lft)
715 struct nlattr *tb[IFA_MAX+1];
716 struct in_ifaddr *ifa;
717 struct ifaddrmsg *ifm;
718 struct net_device *dev;
719 struct in_device *in_dev;
722 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
726 ifm = nlmsg_data(nlh);
728 if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
731 dev = __dev_get_by_index(net, ifm->ifa_index);
736 in_dev = __in_dev_get_rtnl(dev);
741 ifa = inet_alloc_ifa();
744 * A potential indev allocation can be left alive, it stays
745 * assigned to its device and is destroy with it.
749 ipv4_devconf_setall(in_dev);
752 if (tb[IFA_ADDRESS] == NULL)
753 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
755 INIT_HLIST_NODE(&ifa->hash);
756 ifa->ifa_prefixlen = ifm->ifa_prefixlen;
757 ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
758 ifa->ifa_flags = ifm->ifa_flags;
759 ifa->ifa_scope = ifm->ifa_scope;
760 ifa->ifa_dev = in_dev;
762 ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
763 ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
765 if (tb[IFA_BROADCAST])
766 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
769 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
771 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
773 if (tb[IFA_CACHEINFO]) {
774 struct ifa_cacheinfo *ci;
776 ci = nla_data(tb[IFA_CACHEINFO]);
777 if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
781 *pvalid_lft = ci->ifa_valid;
782 *pprefered_lft = ci->ifa_prefered;
793 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
795 struct in_device *in_dev = ifa->ifa_dev;
796 struct in_ifaddr *ifa1, **ifap;
801 for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
802 ifap = &ifa1->ifa_next) {
803 if (ifa1->ifa_mask == ifa->ifa_mask &&
804 inet_ifa_match(ifa1->ifa_address, ifa) &&
805 ifa1->ifa_local == ifa->ifa_local)
811 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
813 struct net *net = sock_net(skb->sk);
814 struct in_ifaddr *ifa;
815 struct in_ifaddr *ifa_existing;
816 __u32 valid_lft = INFINITY_LIFE_TIME;
817 __u32 prefered_lft = INFINITY_LIFE_TIME;
821 ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
825 ifa_existing = find_matching_ifa(ifa);
827 /* It would be best to check for !NLM_F_CREATE here but
828 * userspace alreay relies on not having to provide this.
830 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
831 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
835 if (nlh->nlmsg_flags & NLM_F_EXCL ||
836 !(nlh->nlmsg_flags & NLM_F_REPLACE))
839 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
840 cancel_delayed_work(&check_lifetime_work);
841 schedule_delayed_work(&check_lifetime_work, 0);
842 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
843 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
849 * Determine a default network mask, based on the IP address.
852 static int inet_abc_len(__be32 addr)
854 int rc = -1; /* Something else, probably a multicast. */
856 if (ipv4_is_zeronet(addr))
859 __u32 haddr = ntohl(addr);
861 if (IN_CLASSA(haddr))
863 else if (IN_CLASSB(haddr))
865 else if (IN_CLASSC(haddr))
873 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
876 struct sockaddr_in sin_orig;
877 struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
878 struct in_device *in_dev;
879 struct in_ifaddr **ifap = NULL;
880 struct in_ifaddr *ifa = NULL;
881 struct net_device *dev;
884 int tryaddrmatch = 0;
887 * Fetch the caller's info block into kernel space
890 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
892 ifr.ifr_name[IFNAMSIZ - 1] = 0;
894 /* save original address for comparison */
895 memcpy(&sin_orig, sin, sizeof(*sin));
897 colon = strchr(ifr.ifr_name, ':');
901 dev_load(net, ifr.ifr_name);
904 case SIOCGIFADDR: /* Get interface address */
905 case SIOCGIFBRDADDR: /* Get the broadcast address */
906 case SIOCGIFDSTADDR: /* Get the destination address */
907 case SIOCGIFNETMASK: /* Get the netmask for the interface */
908 /* Note that these ioctls will not sleep,
909 so that we do not impose a lock.
910 One day we will be forced to put shlock here (I mean SMP)
912 tryaddrmatch = (sin_orig.sin_family == AF_INET);
913 memset(sin, 0, sizeof(*sin));
914 sin->sin_family = AF_INET;
919 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
922 case SIOCSIFADDR: /* Set interface address (and family) */
923 case SIOCSIFBRDADDR: /* Set the broadcast address */
924 case SIOCSIFDSTADDR: /* Set the destination address */
925 case SIOCSIFNETMASK: /* Set the netmask for the interface */
927 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
930 if (sin->sin_family != AF_INET)
941 dev = __dev_get_by_name(net, ifr.ifr_name);
948 in_dev = __in_dev_get_rtnl(dev);
951 /* Matthias Andree */
952 /* compare label and address (4.4BSD style) */
953 /* note: we only do this for a limited set of ioctls
954 and only if the original address family was AF_INET.
955 This is checked above. */
956 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
957 ifap = &ifa->ifa_next) {
958 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
959 sin_orig.sin_addr.s_addr ==
965 /* we didn't get a match, maybe the application is
966 4.3BSD-style and passed in junk so we fall back to
967 comparing just the label */
969 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
970 ifap = &ifa->ifa_next)
971 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
976 ret = -EADDRNOTAVAIL;
977 if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
981 case SIOCGIFADDR: /* Get interface address */
982 sin->sin_addr.s_addr = ifa->ifa_local;
985 case SIOCGIFBRDADDR: /* Get the broadcast address */
986 sin->sin_addr.s_addr = ifa->ifa_broadcast;
989 case SIOCGIFDSTADDR: /* Get the destination address */
990 sin->sin_addr.s_addr = ifa->ifa_address;
993 case SIOCGIFNETMASK: /* Get the netmask for the interface */
994 sin->sin_addr.s_addr = ifa->ifa_mask;
999 ret = -EADDRNOTAVAIL;
1003 if (!(ifr.ifr_flags & IFF_UP))
1004 inet_del_ifa(in_dev, ifap, 1);
1007 ret = dev_change_flags(dev, ifr.ifr_flags);
1010 case SIOCSIFADDR: /* Set interface address (and family) */
1012 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1017 ifa = inet_alloc_ifa();
1020 INIT_HLIST_NODE(&ifa->hash);
1022 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
1024 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1027 if (ifa->ifa_local == sin->sin_addr.s_addr)
1029 inet_del_ifa(in_dev, ifap, 0);
1030 ifa->ifa_broadcast = 0;
1034 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1036 if (!(dev->flags & IFF_POINTOPOINT)) {
1037 ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1038 ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1039 if ((dev->flags & IFF_BROADCAST) &&
1040 ifa->ifa_prefixlen < 31)
1041 ifa->ifa_broadcast = ifa->ifa_address |
1044 ifa->ifa_prefixlen = 32;
1045 ifa->ifa_mask = inet_make_mask(32);
1047 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1048 ret = inet_set_ifa(dev, ifa);
1051 case SIOCSIFBRDADDR: /* Set the broadcast address */
1053 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1054 inet_del_ifa(in_dev, ifap, 0);
1055 ifa->ifa_broadcast = sin->sin_addr.s_addr;
1056 inet_insert_ifa(ifa);
1060 case SIOCSIFDSTADDR: /* Set the destination address */
1062 if (ifa->ifa_address == sin->sin_addr.s_addr)
1065 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1068 inet_del_ifa(in_dev, ifap, 0);
1069 ifa->ifa_address = sin->sin_addr.s_addr;
1070 inet_insert_ifa(ifa);
1073 case SIOCSIFNETMASK: /* Set the netmask for the interface */
1076 * The mask we set must be legal.
1079 if (bad_mask(sin->sin_addr.s_addr, 0))
1082 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1083 __be32 old_mask = ifa->ifa_mask;
1084 inet_del_ifa(in_dev, ifap, 0);
1085 ifa->ifa_mask = sin->sin_addr.s_addr;
1086 ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1088 /* See if current broadcast address matches
1089 * with current netmask, then recalculate
1090 * the broadcast address. Otherwise it's a
1091 * funny address, so don't touch it since
1092 * the user seems to know what (s)he's doing...
1094 if ((dev->flags & IFF_BROADCAST) &&
1095 (ifa->ifa_prefixlen < 31) &&
1096 (ifa->ifa_broadcast ==
1097 (ifa->ifa_local|~old_mask))) {
1098 ifa->ifa_broadcast = (ifa->ifa_local |
1099 ~sin->sin_addr.s_addr);
1101 inet_insert_ifa(ifa);
1111 ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
1115 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1117 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1118 struct in_ifaddr *ifa;
1125 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1127 done += sizeof(ifr);
1130 if (len < (int) sizeof(ifr))
1132 memset(&ifr, 0, sizeof(struct ifreq));
1133 strcpy(ifr.ifr_name, ifa->ifa_label);
1135 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1136 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1139 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
1143 buf += sizeof(struct ifreq);
1144 len -= sizeof(struct ifreq);
1145 done += sizeof(struct ifreq);
1151 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1154 struct in_device *in_dev;
1155 struct net *net = dev_net(dev);
1158 in_dev = __in_dev_get_rcu(dev);
1162 for_primary_ifa(in_dev) {
1163 if (ifa->ifa_scope > scope)
1165 if (!dst || inet_ifa_match(dst, ifa)) {
1166 addr = ifa->ifa_local;
1170 addr = ifa->ifa_local;
1171 } endfor_ifa(in_dev);
1177 /* Not loopback addresses on loopback should be preferred
1178 in this case. It is importnat that lo is the first interface
1181 for_each_netdev_rcu(net, dev) {
1182 in_dev = __in_dev_get_rcu(dev);
1186 for_primary_ifa(in_dev) {
1187 if (ifa->ifa_scope != RT_SCOPE_LINK &&
1188 ifa->ifa_scope <= scope) {
1189 addr = ifa->ifa_local;
1192 } endfor_ifa(in_dev);
1198 EXPORT_SYMBOL(inet_select_addr);
1200 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1201 __be32 local, int scope)
1208 (local == ifa->ifa_local || !local) &&
1209 ifa->ifa_scope <= scope) {
1210 addr = ifa->ifa_local;
1215 same = (!local || inet_ifa_match(local, ifa)) &&
1216 (!dst || inet_ifa_match(dst, ifa));
1220 /* Is the selected addr into dst subnet? */
1221 if (inet_ifa_match(addr, ifa))
1223 /* No, then can we use new local src? */
1224 if (ifa->ifa_scope <= scope) {
1225 addr = ifa->ifa_local;
1228 /* search for large dst subnet for addr */
1232 } endfor_ifa(in_dev);
1234 return same ? addr : 0;
1238 * Confirm that local IP address exists using wildcards:
1239 * - in_dev: only on this interface, 0=any interface
1240 * - dst: only in the same subnet as dst, 0=any dst
1241 * - local: address, 0=autoselect the local address
1242 * - scope: maximum allowed scope value for the local address
1244 __be32 inet_confirm_addr(struct in_device *in_dev,
1245 __be32 dst, __be32 local, int scope)
1248 struct net_device *dev;
1251 if (scope != RT_SCOPE_LINK)
1252 return confirm_addr_indev(in_dev, dst, local, scope);
1254 net = dev_net(in_dev->dev);
1256 for_each_netdev_rcu(net, dev) {
1257 in_dev = __in_dev_get_rcu(dev);
1259 addr = confirm_addr_indev(in_dev, dst, local, scope);
1268 EXPORT_SYMBOL(inet_confirm_addr);
1274 int register_inetaddr_notifier(struct notifier_block *nb)
1276 return blocking_notifier_chain_register(&inetaddr_chain, nb);
1278 EXPORT_SYMBOL(register_inetaddr_notifier);
1280 int unregister_inetaddr_notifier(struct notifier_block *nb)
1282 return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1284 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1286 /* Rename ifa_labels for a device name change. Make some effort to preserve
1287 * existing alias numbering and to create unique labels if possible.
1289 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1291 struct in_ifaddr *ifa;
1294 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1295 char old[IFNAMSIZ], *dot;
1297 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1298 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1301 dot = strchr(old, ':');
1303 sprintf(old, ":%d", named);
1306 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1307 strcat(ifa->ifa_label, dot);
1309 strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1311 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1315 static bool inetdev_valid_mtu(unsigned int mtu)
1320 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1321 struct in_device *in_dev)
1324 struct in_ifaddr *ifa;
1326 for (ifa = in_dev->ifa_list; ifa;
1327 ifa = ifa->ifa_next) {
1328 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1329 ifa->ifa_local, dev,
1330 ifa->ifa_local, NULL,
1331 dev->dev_addr, NULL);
1335 /* Called only under RTNL semaphore */
1337 static int inetdev_event(struct notifier_block *this, unsigned long event,
1340 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1341 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1346 if (event == NETDEV_REGISTER) {
1347 in_dev = inetdev_init(dev);
1349 return notifier_from_errno(-ENOMEM);
1350 if (dev->flags & IFF_LOOPBACK) {
1351 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1352 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1354 } else if (event == NETDEV_CHANGEMTU) {
1355 /* Re-enabling IP */
1356 if (inetdev_valid_mtu(dev->mtu))
1357 in_dev = inetdev_init(dev);
1363 case NETDEV_REGISTER:
1364 pr_debug("%s: bug\n", __func__);
1365 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1368 if (!inetdev_valid_mtu(dev->mtu))
1370 if (dev->flags & IFF_LOOPBACK) {
1371 struct in_ifaddr *ifa = inet_alloc_ifa();
1374 INIT_HLIST_NODE(&ifa->hash);
1376 ifa->ifa_address = htonl(INADDR_LOOPBACK);
1377 ifa->ifa_prefixlen = 8;
1378 ifa->ifa_mask = inet_make_mask(8);
1379 in_dev_hold(in_dev);
1380 ifa->ifa_dev = in_dev;
1381 ifa->ifa_scope = RT_SCOPE_HOST;
1382 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1383 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1384 INFINITY_LIFE_TIME);
1385 inet_insert_ifa(ifa);
1390 case NETDEV_CHANGEADDR:
1391 if (!IN_DEV_ARP_NOTIFY(in_dev))
1394 case NETDEV_NOTIFY_PEERS:
1395 /* Send gratuitous ARP to notify of link change */
1396 inetdev_send_gratuitous_arp(dev, in_dev);
1401 case NETDEV_PRE_TYPE_CHANGE:
1402 ip_mc_unmap(in_dev);
1404 case NETDEV_POST_TYPE_CHANGE:
1405 ip_mc_remap(in_dev);
1407 case NETDEV_CHANGEMTU:
1408 if (inetdev_valid_mtu(dev->mtu))
1410 /* disable IP when MTU is not enough */
1411 case NETDEV_UNREGISTER:
1412 inetdev_destroy(in_dev);
1414 case NETDEV_CHANGENAME:
1415 /* Do not notify about label change, this event is
1416 * not interesting to applications using netlink.
1418 inetdev_changename(dev, in_dev);
1420 devinet_sysctl_unregister(in_dev);
1421 devinet_sysctl_register(in_dev);
1428 static struct notifier_block ip_netdev_notifier = {
1429 .notifier_call = inetdev_event,
1432 static size_t inet_nlmsg_size(void)
1434 return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1435 + nla_total_size(4) /* IFA_ADDRESS */
1436 + nla_total_size(4) /* IFA_LOCAL */
1437 + nla_total_size(4) /* IFA_BROADCAST */
1438 + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1441 static inline u32 cstamp_delta(unsigned long cstamp)
1443 return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1446 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1447 unsigned long tstamp, u32 preferred, u32 valid)
1449 struct ifa_cacheinfo ci;
1451 ci.cstamp = cstamp_delta(cstamp);
1452 ci.tstamp = cstamp_delta(tstamp);
1453 ci.ifa_prefered = preferred;
1454 ci.ifa_valid = valid;
1456 return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1459 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1460 u32 portid, u32 seq, int event, unsigned int flags)
1462 struct ifaddrmsg *ifm;
1463 struct nlmsghdr *nlh;
1464 u32 preferred, valid;
1466 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1470 ifm = nlmsg_data(nlh);
1471 ifm->ifa_family = AF_INET;
1472 ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1473 ifm->ifa_flags = ifa->ifa_flags;
1474 ifm->ifa_scope = ifa->ifa_scope;
1475 ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1477 if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1478 preferred = ifa->ifa_preferred_lft;
1479 valid = ifa->ifa_valid_lft;
1480 if (preferred != INFINITY_LIFE_TIME) {
1481 long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1483 if (preferred > tval)
1487 if (valid != INFINITY_LIFE_TIME) {
1495 preferred = INFINITY_LIFE_TIME;
1496 valid = INFINITY_LIFE_TIME;
1498 if ((ifa->ifa_address &&
1499 nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1501 nla_put_be32(skb, IFA_LOCAL, ifa->ifa_local)) ||
1502 (ifa->ifa_broadcast &&
1503 nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1504 (ifa->ifa_label[0] &&
1505 nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1506 put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1508 goto nla_put_failure;
1510 return nlmsg_end(skb, nlh);
1513 nlmsg_cancel(skb, nlh);
1517 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1519 struct net *net = sock_net(skb->sk);
1522 int ip_idx, s_ip_idx;
1523 struct net_device *dev;
1524 struct in_device *in_dev;
1525 struct in_ifaddr *ifa;
1526 struct hlist_head *head;
1529 s_idx = idx = cb->args[1];
1530 s_ip_idx = ip_idx = cb->args[2];
1532 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1534 head = &net->dev_index_head[h];
1536 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1538 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1541 if (h > s_h || idx > s_idx)
1543 in_dev = __in_dev_get_rcu(dev);
1547 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1548 ifa = ifa->ifa_next, ip_idx++) {
1549 if (ip_idx < s_ip_idx)
1551 if (inet_fill_ifaddr(skb, ifa,
1552 NETLINK_CB(cb->skb).portid,
1554 RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1558 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1569 cb->args[2] = ip_idx;
1574 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1577 struct sk_buff *skb;
1578 u32 seq = nlh ? nlh->nlmsg_seq : 0;
1582 net = dev_net(ifa->ifa_dev->dev);
1583 skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1587 err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1589 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1590 WARN_ON(err == -EMSGSIZE);
1594 rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1598 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1601 static size_t inet_get_link_af_size(const struct net_device *dev)
1603 struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1608 return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1611 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1613 struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1620 nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1624 for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1625 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1630 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1631 [IFLA_INET_CONF] = { .type = NLA_NESTED },
1634 static int inet_validate_link_af(const struct net_device *dev,
1635 const struct nlattr *nla)
1637 struct nlattr *a, *tb[IFLA_INET_MAX+1];
1640 if (dev && !__in_dev_get_rtnl(dev))
1641 return -EAFNOSUPPORT;
1643 err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1647 if (tb[IFLA_INET_CONF]) {
1648 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1649 int cfgid = nla_type(a);
1654 if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1662 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1664 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1665 struct nlattr *a, *tb[IFLA_INET_MAX+1];
1669 return -EAFNOSUPPORT;
1671 if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1674 if (tb[IFLA_INET_CONF]) {
1675 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1676 ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1682 static int inet_netconf_msgsize_devconf(int type)
1684 int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1685 + nla_total_size(4); /* NETCONFA_IFINDEX */
1687 /* type -1 is used for ALL */
1688 if (type == -1 || type == NETCONFA_FORWARDING)
1689 size += nla_total_size(4);
1690 if (type == -1 || type == NETCONFA_RP_FILTER)
1691 size += nla_total_size(4);
1692 if (type == -1 || type == NETCONFA_MC_FORWARDING)
1693 size += nla_total_size(4);
1698 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1699 struct ipv4_devconf *devconf, u32 portid,
1700 u32 seq, int event, unsigned int flags,
1703 struct nlmsghdr *nlh;
1704 struct netconfmsg *ncm;
1706 nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1711 ncm = nlmsg_data(nlh);
1712 ncm->ncm_family = AF_INET;
1714 if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1715 goto nla_put_failure;
1717 /* type -1 is used for ALL */
1718 if ((type == -1 || type == NETCONFA_FORWARDING) &&
1719 nla_put_s32(skb, NETCONFA_FORWARDING,
1720 IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1721 goto nla_put_failure;
1722 if ((type == -1 || type == NETCONFA_RP_FILTER) &&
1723 nla_put_s32(skb, NETCONFA_RP_FILTER,
1724 IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1725 goto nla_put_failure;
1726 if ((type == -1 || type == NETCONFA_MC_FORWARDING) &&
1727 nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1728 IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1729 goto nla_put_failure;
1731 return nlmsg_end(skb, nlh);
1734 nlmsg_cancel(skb, nlh);
1738 void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
1739 struct ipv4_devconf *devconf)
1741 struct sk_buff *skb;
1744 skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC);
1748 err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1749 RTM_NEWNETCONF, 0, type);
1751 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1752 WARN_ON(err == -EMSGSIZE);
1756 rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC);
1760 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1763 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1764 [NETCONFA_IFINDEX] = { .len = sizeof(int) },
1765 [NETCONFA_FORWARDING] = { .len = sizeof(int) },
1766 [NETCONFA_RP_FILTER] = { .len = sizeof(int) },
1769 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1770 struct nlmsghdr *nlh)
1772 struct net *net = sock_net(in_skb->sk);
1773 struct nlattr *tb[NETCONFA_MAX+1];
1774 struct netconfmsg *ncm;
1775 struct sk_buff *skb;
1776 struct ipv4_devconf *devconf;
1777 struct in_device *in_dev;
1778 struct net_device *dev;
1782 err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1783 devconf_ipv4_policy);
1788 if (!tb[NETCONFA_IFINDEX])
1791 ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1793 case NETCONFA_IFINDEX_ALL:
1794 devconf = net->ipv4.devconf_all;
1796 case NETCONFA_IFINDEX_DEFAULT:
1797 devconf = net->ipv4.devconf_dflt;
1800 dev = __dev_get_by_index(net, ifindex);
1803 in_dev = __in_dev_get_rtnl(dev);
1806 devconf = &in_dev->cnf;
1811 skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_ATOMIC);
1815 err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1816 NETLINK_CB(in_skb).portid,
1817 nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1820 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1821 WARN_ON(err == -EMSGSIZE);
1825 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1830 static int inet_netconf_dump_devconf(struct sk_buff *skb,
1831 struct netlink_callback *cb)
1833 struct net *net = sock_net(skb->sk);
1836 struct net_device *dev;
1837 struct in_device *in_dev;
1838 struct hlist_head *head;
1841 s_idx = idx = cb->args[1];
1843 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1845 head = &net->dev_index_head[h];
1847 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1849 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1852 in_dev = __in_dev_get_rcu(dev);
1856 if (inet_netconf_fill_devconf(skb, dev->ifindex,
1858 NETLINK_CB(cb->skb).portid,
1866 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1872 if (h == NETDEV_HASHENTRIES) {
1873 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
1874 net->ipv4.devconf_all,
1875 NETLINK_CB(cb->skb).portid,
1877 RTM_NEWNETCONF, NLM_F_MULTI,
1883 if (h == NETDEV_HASHENTRIES + 1) {
1884 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
1885 net->ipv4.devconf_dflt,
1886 NETLINK_CB(cb->skb).portid,
1888 RTM_NEWNETCONF, NLM_F_MULTI,
1901 #ifdef CONFIG_SYSCTL
1903 static void devinet_copy_dflt_conf(struct net *net, int i)
1905 struct net_device *dev;
1908 for_each_netdev_rcu(net, dev) {
1909 struct in_device *in_dev;
1911 in_dev = __in_dev_get_rcu(dev);
1912 if (in_dev && !test_bit(i, in_dev->cnf.state))
1913 in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1918 /* called with RTNL locked */
1919 static void inet_forward_change(struct net *net)
1921 struct net_device *dev;
1922 int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1924 IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1925 IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1926 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1927 NETCONFA_IFINDEX_ALL,
1928 net->ipv4.devconf_all);
1929 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1930 NETCONFA_IFINDEX_DEFAULT,
1931 net->ipv4.devconf_dflt);
1933 for_each_netdev(net, dev) {
1934 struct in_device *in_dev;
1936 dev_disable_lro(dev);
1938 in_dev = __in_dev_get_rcu(dev);
1940 IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1941 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1942 dev->ifindex, &in_dev->cnf);
1948 static int devinet_conf_proc(struct ctl_table *ctl, int write,
1949 void __user *buffer,
1950 size_t *lenp, loff_t *ppos)
1952 int old_value = *(int *)ctl->data;
1953 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1954 int new_value = *(int *)ctl->data;
1957 struct ipv4_devconf *cnf = ctl->extra1;
1958 struct net *net = ctl->extra2;
1959 int i = (int *)ctl->data - cnf->data;
1961 set_bit(i, cnf->state);
1963 if (cnf == net->ipv4.devconf_dflt)
1964 devinet_copy_dflt_conf(net, i);
1965 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
1966 i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
1967 if ((new_value == 0) && (old_value != 0))
1968 rt_cache_flush(net);
1969 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
1970 new_value != old_value) {
1973 if (cnf == net->ipv4.devconf_dflt)
1974 ifindex = NETCONFA_IFINDEX_DEFAULT;
1975 else if (cnf == net->ipv4.devconf_all)
1976 ifindex = NETCONFA_IFINDEX_ALL;
1978 struct in_device *idev =
1979 container_of(cnf, struct in_device,
1981 ifindex = idev->dev->ifindex;
1983 inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
1991 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
1992 void __user *buffer,
1993 size_t *lenp, loff_t *ppos)
1995 int *valp = ctl->data;
1998 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2000 if (write && *valp != val) {
2001 struct net *net = ctl->extra2;
2003 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2004 if (!rtnl_trylock()) {
2005 /* Restore the original values before restarting */
2008 return restart_syscall();
2010 if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2011 inet_forward_change(net);
2013 struct ipv4_devconf *cnf = ctl->extra1;
2014 struct in_device *idev =
2015 container_of(cnf, struct in_device, cnf);
2017 dev_disable_lro(idev->dev);
2018 inet_netconf_notify_devconf(net,
2019 NETCONFA_FORWARDING,
2024 rt_cache_flush(net);
2026 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2027 NETCONFA_IFINDEX_DEFAULT,
2028 net->ipv4.devconf_dflt);
2034 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2035 void __user *buffer,
2036 size_t *lenp, loff_t *ppos)
2038 int *valp = ctl->data;
2040 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2041 struct net *net = ctl->extra2;
2043 if (write && *valp != val)
2044 rt_cache_flush(net);
2049 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2052 .data = ipv4_devconf.data + \
2053 IPV4_DEVCONF_ ## attr - 1, \
2054 .maxlen = sizeof(int), \
2056 .proc_handler = proc, \
2057 .extra1 = &ipv4_devconf, \
2060 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2061 DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2063 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2064 DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2066 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2067 DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2069 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2070 DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2072 static struct devinet_sysctl_table {
2073 struct ctl_table_header *sysctl_header;
2074 struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2075 } devinet_sysctl = {
2077 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2078 devinet_sysctl_forward),
2079 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2081 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2082 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2083 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2084 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2085 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2086 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2087 "accept_source_route"),
2088 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2089 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2090 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2091 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2092 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2093 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2094 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2095 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2096 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2097 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2098 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2099 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2100 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2101 DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2102 "force_igmp_version"),
2103 DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2104 "igmpv2_unsolicited_report_interval"),
2105 DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2106 "igmpv3_unsolicited_report_interval"),
2108 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2109 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2110 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2111 "promote_secondaries"),
2112 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2117 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2118 struct ipv4_devconf *p)
2121 struct devinet_sysctl_table *t;
2122 char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2124 t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2128 for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2129 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2130 t->devinet_vars[i].extra1 = p;
2131 t->devinet_vars[i].extra2 = net;
2134 snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2136 t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2137 if (!t->sysctl_header)
2149 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
2151 struct devinet_sysctl_table *t = cnf->sysctl;
2157 unregister_net_sysctl_table(t->sysctl_header);
2161 static void devinet_sysctl_register(struct in_device *idev)
2163 neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
2164 __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2168 static void devinet_sysctl_unregister(struct in_device *idev)
2170 __devinet_sysctl_unregister(&idev->cnf);
2171 neigh_sysctl_unregister(idev->arp_parms);
2174 static struct ctl_table ctl_forward_entry[] = {
2176 .procname = "ip_forward",
2177 .data = &ipv4_devconf.data[
2178 IPV4_DEVCONF_FORWARDING - 1],
2179 .maxlen = sizeof(int),
2181 .proc_handler = devinet_sysctl_forward,
2182 .extra1 = &ipv4_devconf,
2183 .extra2 = &init_net,
2189 static __net_init int devinet_init_net(struct net *net)
2192 struct ipv4_devconf *all, *dflt;
2193 #ifdef CONFIG_SYSCTL
2194 struct ctl_table *tbl = ctl_forward_entry;
2195 struct ctl_table_header *forw_hdr;
2199 all = &ipv4_devconf;
2200 dflt = &ipv4_devconf_dflt;
2202 if (!net_eq(net, &init_net)) {
2203 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2207 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2209 goto err_alloc_dflt;
2211 #ifdef CONFIG_SYSCTL
2212 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2216 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2217 tbl[0].extra1 = all;
2218 tbl[0].extra2 = net;
2222 #ifdef CONFIG_SYSCTL
2223 err = __devinet_sysctl_register(net, "all", all);
2227 err = __devinet_sysctl_register(net, "default", dflt);
2232 forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2233 if (forw_hdr == NULL)
2235 net->ipv4.forw_hdr = forw_hdr;
2238 net->ipv4.devconf_all = all;
2239 net->ipv4.devconf_dflt = dflt;
2242 #ifdef CONFIG_SYSCTL
2244 __devinet_sysctl_unregister(dflt);
2246 __devinet_sysctl_unregister(all);
2248 if (tbl != ctl_forward_entry)
2252 if (dflt != &ipv4_devconf_dflt)
2255 if (all != &ipv4_devconf)
2261 static __net_exit void devinet_exit_net(struct net *net)
2263 #ifdef CONFIG_SYSCTL
2264 struct ctl_table *tbl;
2266 tbl = net->ipv4.forw_hdr->ctl_table_arg;
2267 unregister_net_sysctl_table(net->ipv4.forw_hdr);
2268 __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
2269 __devinet_sysctl_unregister(net->ipv4.devconf_all);
2272 kfree(net->ipv4.devconf_dflt);
2273 kfree(net->ipv4.devconf_all);
2276 static __net_initdata struct pernet_operations devinet_ops = {
2277 .init = devinet_init_net,
2278 .exit = devinet_exit_net,
2281 static struct rtnl_af_ops inet_af_ops = {
2283 .fill_link_af = inet_fill_link_af,
2284 .get_link_af_size = inet_get_link_af_size,
2285 .validate_link_af = inet_validate_link_af,
2286 .set_link_af = inet_set_link_af,
2289 void __init devinet_init(void)
2293 for (i = 0; i < IN4_ADDR_HSIZE; i++)
2294 INIT_HLIST_HEAD(&inet_addr_lst[i]);
2296 register_pernet_subsys(&devinet_ops);
2298 register_gifconf(PF_INET, inet_gifconf);
2299 register_netdevice_notifier(&ip_netdev_notifier);
2301 schedule_delayed_work(&check_lifetime_work, 0);
2303 rtnl_af_register(&inet_af_ops);
2305 rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
2306 rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
2307 rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
2308 rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2309 inet_netconf_dump_devconf, NULL);