2 * NET3 Protocol independent device support routines.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Derived from the non IP parts of dev.c 1.0.19
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Mark Evans, <evansmp@uhura.aston.ac.uk>
15 * Florian la Roche <rzsfl@rz.uni-sb.de>
16 * Alan Cox <gw4pts@gw4pts.ampr.org>
17 * David Hinds <dahinds@users.sourceforge.net>
18 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
19 * Adam Sulmicki <adam@cfar.umd.edu>
20 * Pekka Riikonen <priikone@poesidon.pspt.fi>
23 * D.J. Barrow : Fixed bug where dev->refcnt gets set
24 * to 2 if register_netdev gets called
25 * before net_dev_init & also removed a
26 * few lines of code in the process.
27 * Alan Cox : device private ioctl copies fields back.
28 * Alan Cox : Transmit queue code does relevant
29 * stunts to keep the queue safe.
30 * Alan Cox : Fixed double lock.
31 * Alan Cox : Fixed promisc NULL pointer trap
32 * ???????? : Support the full private ioctl range
33 * Alan Cox : Moved ioctl permission check into
35 * Tim Kordas : SIOCADDMULTI/SIOCDELMULTI
36 * Alan Cox : 100 backlog just doesn't cut it when
37 * you start doing multicast video 8)
38 * Alan Cox : Rewrote net_bh and list manager.
39 * Alan Cox : Fix ETH_P_ALL echoback lengths.
40 * Alan Cox : Took out transmit every packet pass
41 * Saved a few bytes in the ioctl handler
42 * Alan Cox : Network driver sets packet type before
43 * calling netif_rx. Saves a function
45 * Alan Cox : Hashed net_bh()
46 * Richard Kooijman: Timestamp fixes.
47 * Alan Cox : Wrong field in SIOCGIFDSTADDR
48 * Alan Cox : Device lock protection.
49 * Alan Cox : Fixed nasty side effect of device close
51 * Rudi Cilibrasi : Pass the right thing to
53 * Dave Miller : 32bit quantity for the device lock to
54 * make it work out on a Sparc.
55 * Bjorn Ekwall : Added KERNELD hack.
56 * Alan Cox : Cleaned up the backlog initialise.
57 * Craig Metz : SIOCGIFCONF fix if space for under
59 * Thomas Bogendoerfer : Return ENODEV for dev_open, if there
60 * is no device open function.
61 * Andi Kleen : Fix error reporting for SIOCGIFCONF
62 * Michael Chastain : Fix signed/unsigned for SIOCGIFCONF
63 * Cyrus Durgin : Cleaned for KMOD
64 * Adam Sulmicki : Bug Fix : Network Device Unload
65 * A network device unload needs to purge
67 * Paul Rusty Russell : SIOCSIFNAME
68 * Pekka Riikonen : Netdev boot-time settings code
69 * Andrew Morton : Make unregister_netdevice wait
70 * indefinitely on dev->refcnt
71 * J Hadi Salim : - Backlog queue sampling
72 * - netif_rx() feedback
75 #include <asm/uaccess.h>
76 #include <asm/system.h>
77 #include <linux/bitops.h>
78 #include <linux/capability.h>
79 #include <linux/cpu.h>
80 #include <linux/types.h>
81 #include <linux/kernel.h>
82 #include <linux/sched.h>
83 #include <linux/mutex.h>
84 #include <linux/string.h>
86 #include <linux/socket.h>
87 #include <linux/sockios.h>
88 #include <linux/errno.h>
89 #include <linux/interrupt.h>
90 #include <linux/if_ether.h>
91 #include <linux/netdevice.h>
92 #include <linux/etherdevice.h>
93 #include <linux/ethtool.h>
94 #include <linux/notifier.h>
95 #include <linux/skbuff.h>
96 #include <net/net_namespace.h>
98 #include <linux/rtnetlink.h>
99 #include <linux/proc_fs.h>
100 #include <linux/seq_file.h>
101 #include <linux/stat.h>
102 #include <linux/if_bridge.h>
103 #include <linux/if_macvlan.h>
105 #include <net/pkt_sched.h>
106 #include <net/checksum.h>
107 #include <linux/highmem.h>
108 #include <linux/init.h>
109 #include <linux/kmod.h>
110 #include <linux/module.h>
111 #include <linux/netpoll.h>
112 #include <linux/rcupdate.h>
113 #include <linux/delay.h>
114 #include <net/wext.h>
115 #include <net/iw_handler.h>
116 #include <asm/current.h>
117 #include <linux/audit.h>
118 #include <linux/dmaengine.h>
119 #include <linux/err.h>
120 #include <linux/ctype.h>
121 #include <linux/if_arp.h>
122 #include <linux/if_vlan.h>
123 #include <linux/ip.h>
125 #include <linux/ipv6.h>
126 #include <linux/in.h>
127 #include <linux/jhash.h>
128 #include <linux/random.h>
130 #include "net-sysfs.h"
132 /* Instead of increasing this, you should create a hash table. */
133 #define MAX_GRO_SKBS 8
136 * The list of packet types we will receive (as opposed to discard)
137 * and the routines to invoke.
139 * Why 16. Because with 16 the only overlap we get on a hash of the
140 * low nibble of the protocol value is RARP/SNAP/X.25.
142 * NOTE: That is no longer true with the addition of VLAN tags. Not
143 * sure which should go first, but I bet it won't make much
144 * difference if we are running VLANs. The good news is that
145 * this protocol won't be in the list unless compiled in, so
146 * the average user (w/out VLANs) will not be adversely affected.
163 #define PTYPE_HASH_SIZE (16)
164 #define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1)
166 static DEFINE_SPINLOCK(ptype_lock);
167 static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
168 static struct list_head ptype_all __read_mostly; /* Taps */
171 * The @dev_base_head list is protected by @dev_base_lock and the rtnl
174 * Pure readers hold dev_base_lock for reading.
176 * Writers must hold the rtnl semaphore while they loop through the
177 * dev_base_head list, and hold dev_base_lock for writing when they do the
178 * actual updates. This allows pure readers to access the list even
179 * while a writer is preparing to update it.
181 * To put it another way, dev_base_lock is held for writing only to
182 * protect against pure readers; the rtnl semaphore provides the
183 * protection against other writers.
185 * See, for example usages, register_netdevice() and
186 * unregister_netdevice(), which must be called with the rtnl
189 DEFINE_RWLOCK(dev_base_lock);
191 EXPORT_SYMBOL(dev_base_lock);
193 #define NETDEV_HASHBITS 8
194 #define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS)
196 static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
198 unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
199 return &net->dev_name_head[hash & ((1 << NETDEV_HASHBITS) - 1)];
202 static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
204 return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)];
207 /* Device list insertion */
208 static int list_netdevice(struct net_device *dev)
210 struct net *net = dev_net(dev);
214 write_lock_bh(&dev_base_lock);
215 list_add_tail(&dev->dev_list, &net->dev_base_head);
216 hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
217 hlist_add_head(&dev->index_hlist, dev_index_hash(net, dev->ifindex));
218 write_unlock_bh(&dev_base_lock);
222 /* Device list removal */
223 static void unlist_netdevice(struct net_device *dev)
227 /* Unlink dev from the device chain */
228 write_lock_bh(&dev_base_lock);
229 list_del(&dev->dev_list);
230 hlist_del(&dev->name_hlist);
231 hlist_del(&dev->index_hlist);
232 write_unlock_bh(&dev_base_lock);
239 static RAW_NOTIFIER_HEAD(netdev_chain);
242 * Device drivers call our routines to queue packets here. We empty the
243 * queue in the local softnet handler.
246 DEFINE_PER_CPU(struct softnet_data, softnet_data);
248 #ifdef CONFIG_LOCKDEP
250 * register_netdevice() inits txq->_xmit_lock and sets lockdep class
251 * according to dev->type
253 static const unsigned short netdev_lock_type[] =
254 {ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
255 ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
256 ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
257 ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
258 ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
259 ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
260 ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
261 ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
262 ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
263 ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
264 ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
265 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
266 ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211,
267 ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET,
268 ARPHRD_PHONET_PIPE, ARPHRD_VOID, ARPHRD_NONE};
270 static const char *netdev_lock_name[] =
271 {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
272 "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
273 "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
274 "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
275 "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
276 "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
277 "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
278 "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
279 "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
280 "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
281 "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
282 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
283 "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211",
284 "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET",
285 "_xmit_PHONET_PIPE", "_xmit_VOID", "_xmit_NONE"};
287 static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
288 static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
290 static inline unsigned short netdev_lock_pos(unsigned short dev_type)
294 for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
295 if (netdev_lock_type[i] == dev_type)
297 /* the last key is used by default */
298 return ARRAY_SIZE(netdev_lock_type) - 1;
301 static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
302 unsigned short dev_type)
306 i = netdev_lock_pos(dev_type);
307 lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
308 netdev_lock_name[i]);
311 static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
315 i = netdev_lock_pos(dev->type);
316 lockdep_set_class_and_name(&dev->addr_list_lock,
317 &netdev_addr_lock_key[i],
318 netdev_lock_name[i]);
321 static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
322 unsigned short dev_type)
325 static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
330 /*******************************************************************************
332 Protocol management and registration routines
334 *******************************************************************************/
337 * Add a protocol ID to the list. Now that the input handler is
338 * smarter we can dispense with all the messy stuff that used to be
341 * BEWARE!!! Protocol handlers, mangling input packets,
342 * MUST BE last in hash buckets and checking protocol handlers
343 * MUST start from promiscuous ptype_all chain in net_bh.
344 * It is true now, do not change it.
345 * Explanation follows: if protocol handler, mangling packet, will
346 * be the first on list, it is not able to sense, that packet
347 * is cloned and should be copied-on-write, so that it will
348 * change it and subsequent readers will get broken packet.
353 * dev_add_pack - add packet handler
354 * @pt: packet type declaration
356 * Add a protocol handler to the networking stack. The passed &packet_type
357 * is linked into kernel lists and may not be freed until it has been
358 * removed from the kernel lists.
360 * This call does not sleep therefore it can not
361 * guarantee all CPU's that are in middle of receiving packets
362 * will see the new packet type (until the next received packet).
365 void dev_add_pack(struct packet_type *pt)
369 spin_lock_bh(&ptype_lock);
370 if (pt->type == htons(ETH_P_ALL))
371 list_add_rcu(&pt->list, &ptype_all);
373 hash = ntohs(pt->type) & PTYPE_HASH_MASK;
374 list_add_rcu(&pt->list, &ptype_base[hash]);
376 spin_unlock_bh(&ptype_lock);
380 * __dev_remove_pack - remove packet handler
381 * @pt: packet type declaration
383 * Remove a protocol handler that was previously added to the kernel
384 * protocol handlers by dev_add_pack(). The passed &packet_type is removed
385 * from the kernel lists and can be freed or reused once this function
388 * The packet type might still be in use by receivers
389 * and must not be freed until after all the CPU's have gone
390 * through a quiescent state.
392 void __dev_remove_pack(struct packet_type *pt)
394 struct list_head *head;
395 struct packet_type *pt1;
397 spin_lock_bh(&ptype_lock);
399 if (pt->type == htons(ETH_P_ALL))
402 head = &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
404 list_for_each_entry(pt1, head, list) {
406 list_del_rcu(&pt->list);
411 printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
413 spin_unlock_bh(&ptype_lock);
416 * dev_remove_pack - remove packet handler
417 * @pt: packet type declaration
419 * Remove a protocol handler that was previously added to the kernel
420 * protocol handlers by dev_add_pack(). The passed &packet_type is removed
421 * from the kernel lists and can be freed or reused once this function
424 * This call sleeps to guarantee that no CPU is looking at the packet
427 void dev_remove_pack(struct packet_type *pt)
429 __dev_remove_pack(pt);
434 /******************************************************************************
436 Device Boot-time Settings Routines
438 *******************************************************************************/
440 /* Boot time configuration table */
441 static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
444 * netdev_boot_setup_add - add new setup entry
445 * @name: name of the device
446 * @map: configured settings for the device
448 * Adds new setup entry to the dev_boot_setup list. The function
449 * returns 0 on error and 1 on success. This is a generic routine to
452 static int netdev_boot_setup_add(char *name, struct ifmap *map)
454 struct netdev_boot_setup *s;
458 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
459 if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
460 memset(s[i].name, 0, sizeof(s[i].name));
461 strlcpy(s[i].name, name, IFNAMSIZ);
462 memcpy(&s[i].map, map, sizeof(s[i].map));
467 return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
471 * netdev_boot_setup_check - check boot time settings
472 * @dev: the netdevice
474 * Check boot time settings for the device.
475 * The found settings are set for the device to be used
476 * later in the device probing.
477 * Returns 0 if no settings found, 1 if they are.
479 int netdev_boot_setup_check(struct net_device *dev)
481 struct netdev_boot_setup *s = dev_boot_setup;
484 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
485 if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
486 !strcmp(dev->name, s[i].name)) {
487 dev->irq = s[i].map.irq;
488 dev->base_addr = s[i].map.base_addr;
489 dev->mem_start = s[i].map.mem_start;
490 dev->mem_end = s[i].map.mem_end;
499 * netdev_boot_base - get address from boot time settings
500 * @prefix: prefix for network device
501 * @unit: id for network device
503 * Check boot time settings for the base address of device.
504 * The found settings are set for the device to be used
505 * later in the device probing.
506 * Returns 0 if no settings found.
508 unsigned long netdev_boot_base(const char *prefix, int unit)
510 const struct netdev_boot_setup *s = dev_boot_setup;
514 sprintf(name, "%s%d", prefix, unit);
517 * If device already registered then return base of 1
518 * to indicate not to probe for this interface
520 if (__dev_get_by_name(&init_net, name))
523 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
524 if (!strcmp(name, s[i].name))
525 return s[i].map.base_addr;
530 * Saves at boot time configured settings for any netdevice.
532 int __init netdev_boot_setup(char *str)
537 str = get_options(str, ARRAY_SIZE(ints), ints);
542 memset(&map, 0, sizeof(map));
546 map.base_addr = ints[2];
548 map.mem_start = ints[3];
550 map.mem_end = ints[4];
552 /* Add new entry to the list */
553 return netdev_boot_setup_add(str, &map);
556 __setup("netdev=", netdev_boot_setup);
558 /*******************************************************************************
560 Device Interface Subroutines
562 *******************************************************************************/
565 * __dev_get_by_name - find a device by its name
566 * @net: the applicable net namespace
567 * @name: name to find
569 * Find an interface by name. Must be called under RTNL semaphore
570 * or @dev_base_lock. If the name is found a pointer to the device
571 * is returned. If the name is not found then %NULL is returned. The
572 * reference counters are not incremented so the caller must be
573 * careful with locks.
576 struct net_device *__dev_get_by_name(struct net *net, const char *name)
578 struct hlist_node *p;
580 hlist_for_each(p, dev_name_hash(net, name)) {
581 struct net_device *dev
582 = hlist_entry(p, struct net_device, name_hlist);
583 if (!strncmp(dev->name, name, IFNAMSIZ))
590 * dev_get_by_name - find a device by its name
591 * @net: the applicable net namespace
592 * @name: name to find
594 * Find an interface by name. This can be called from any
595 * context and does its own locking. The returned handle has
596 * the usage count incremented and the caller must use dev_put() to
597 * release it when it is no longer needed. %NULL is returned if no
598 * matching device is found.
601 struct net_device *dev_get_by_name(struct net *net, const char *name)
603 struct net_device *dev;
605 read_lock(&dev_base_lock);
606 dev = __dev_get_by_name(net, name);
609 read_unlock(&dev_base_lock);
614 * __dev_get_by_index - find a device by its ifindex
615 * @net: the applicable net namespace
616 * @ifindex: index of device
618 * Search for an interface by index. Returns %NULL if the device
619 * is not found or a pointer to the device. The device has not
620 * had its reference counter increased so the caller must be careful
621 * about locking. The caller must hold either the RTNL semaphore
625 struct net_device *__dev_get_by_index(struct net *net, int ifindex)
627 struct hlist_node *p;
629 hlist_for_each(p, dev_index_hash(net, ifindex)) {
630 struct net_device *dev
631 = hlist_entry(p, struct net_device, index_hlist);
632 if (dev->ifindex == ifindex)
640 * dev_get_by_index - find a device by its ifindex
641 * @net: the applicable net namespace
642 * @ifindex: index of device
644 * Search for an interface by index. Returns NULL if the device
645 * is not found or a pointer to the device. The device returned has
646 * had a reference added and the pointer is safe until the user calls
647 * dev_put to indicate they have finished with it.
650 struct net_device *dev_get_by_index(struct net *net, int ifindex)
652 struct net_device *dev;
654 read_lock(&dev_base_lock);
655 dev = __dev_get_by_index(net, ifindex);
658 read_unlock(&dev_base_lock);
663 * dev_getbyhwaddr - find a device by its hardware address
664 * @net: the applicable net namespace
665 * @type: media type of device
666 * @ha: hardware address
668 * Search for an interface by MAC address. Returns NULL if the device
669 * is not found or a pointer to the device. The caller must hold the
670 * rtnl semaphore. The returned device has not had its ref count increased
671 * and the caller must therefore be careful about locking
674 * If the API was consistent this would be __dev_get_by_hwaddr
677 struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha)
679 struct net_device *dev;
683 for_each_netdev(net, dev)
684 if (dev->type == type &&
685 !memcmp(dev->dev_addr, ha, dev->addr_len))
691 EXPORT_SYMBOL(dev_getbyhwaddr);
693 struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
695 struct net_device *dev;
698 for_each_netdev(net, dev)
699 if (dev->type == type)
705 EXPORT_SYMBOL(__dev_getfirstbyhwtype);
707 struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
709 struct net_device *dev;
712 dev = __dev_getfirstbyhwtype(net, type);
719 EXPORT_SYMBOL(dev_getfirstbyhwtype);
722 * dev_get_by_flags - find any device with given flags
723 * @net: the applicable net namespace
724 * @if_flags: IFF_* values
725 * @mask: bitmask of bits in if_flags to check
727 * Search for any interface with the given flags. Returns NULL if a device
728 * is not found or a pointer to the device. The device returned has
729 * had a reference added and the pointer is safe until the user calls
730 * dev_put to indicate they have finished with it.
733 struct net_device * dev_get_by_flags(struct net *net, unsigned short if_flags, unsigned short mask)
735 struct net_device *dev, *ret;
738 read_lock(&dev_base_lock);
739 for_each_netdev(net, dev) {
740 if (((dev->flags ^ if_flags) & mask) == 0) {
746 read_unlock(&dev_base_lock);
751 * dev_valid_name - check if name is okay for network device
754 * Network device names need to be valid file names to
755 * to allow sysfs to work. We also disallow any kind of
758 int dev_valid_name(const char *name)
762 if (strlen(name) >= IFNAMSIZ)
764 if (!strcmp(name, ".") || !strcmp(name, ".."))
768 if (*name == '/' || isspace(*name))
776 * __dev_alloc_name - allocate a name for a device
777 * @net: network namespace to allocate the device name in
778 * @name: name format string
779 * @buf: scratch buffer and result name string
781 * Passed a format string - eg "lt%d" it will try and find a suitable
782 * id. It scans list of devices to build up a free map, then chooses
783 * the first empty slot. The caller must hold the dev_base or rtnl lock
784 * while allocating the name and adding the device in order to avoid
786 * Limited to bits_per_byte * page size devices (ie 32K on most platforms).
787 * Returns the number of the unit assigned or a negative errno code.
790 static int __dev_alloc_name(struct net *net, const char *name, char *buf)
794 const int max_netdevices = 8*PAGE_SIZE;
795 unsigned long *inuse;
796 struct net_device *d;
798 p = strnchr(name, IFNAMSIZ-1, '%');
801 * Verify the string as this thing may have come from
802 * the user. There must be either one "%d" and no other "%"
805 if (p[1] != 'd' || strchr(p + 2, '%'))
808 /* Use one page as a bit array of possible slots */
809 inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
813 for_each_netdev(net, d) {
814 if (!sscanf(d->name, name, &i))
816 if (i < 0 || i >= max_netdevices)
819 /* avoid cases where sscanf is not exact inverse of printf */
820 snprintf(buf, IFNAMSIZ, name, i);
821 if (!strncmp(buf, d->name, IFNAMSIZ))
825 i = find_first_zero_bit(inuse, max_netdevices);
826 free_page((unsigned long) inuse);
829 snprintf(buf, IFNAMSIZ, name, i);
830 if (!__dev_get_by_name(net, buf))
833 /* It is possible to run out of possible slots
834 * when the name is long and there isn't enough space left
835 * for the digits, or if all bits are used.
841 * dev_alloc_name - allocate a name for a device
843 * @name: name format string
845 * Passed a format string - eg "lt%d" it will try and find a suitable
846 * id. It scans list of devices to build up a free map, then chooses
847 * the first empty slot. The caller must hold the dev_base or rtnl lock
848 * while allocating the name and adding the device in order to avoid
850 * Limited to bits_per_byte * page size devices (ie 32K on most platforms).
851 * Returns the number of the unit assigned or a negative errno code.
854 int dev_alloc_name(struct net_device *dev, const char *name)
860 BUG_ON(!dev_net(dev));
862 ret = __dev_alloc_name(net, name, buf);
864 strlcpy(dev->name, buf, IFNAMSIZ);
870 * dev_change_name - change name of a device
872 * @newname: name (or format string) must be at least IFNAMSIZ
874 * Change name of a device, can pass format strings "eth%d".
877 int dev_change_name(struct net_device *dev, const char *newname)
879 char oldname[IFNAMSIZ];
885 BUG_ON(!dev_net(dev));
888 if (dev->flags & IFF_UP)
891 if (!dev_valid_name(newname))
894 if (strncmp(newname, dev->name, IFNAMSIZ) == 0)
897 memcpy(oldname, dev->name, IFNAMSIZ);
899 if (strchr(newname, '%')) {
900 err = dev_alloc_name(dev, newname);
904 else if (__dev_get_by_name(net, newname))
907 strlcpy(dev->name, newname, IFNAMSIZ);
910 /* For now only devices in the initial network namespace
913 if (net == &init_net) {
914 ret = device_rename(&dev->dev, dev->name);
916 memcpy(dev->name, oldname, IFNAMSIZ);
921 write_lock_bh(&dev_base_lock);
922 hlist_del(&dev->name_hlist);
923 hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
924 write_unlock_bh(&dev_base_lock);
926 ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
927 ret = notifier_to_errno(ret);
932 "%s: name change rollback failed: %d.\n",
936 memcpy(dev->name, oldname, IFNAMSIZ);
945 * dev_set_alias - change ifalias of a device
947 * @alias: name up to IFALIASZ
948 * @len: limit of bytes to copy from info
950 * Set ifalias for a device,
952 int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
967 dev->ifalias = krealloc(dev->ifalias, len+1, GFP_KERNEL);
971 strlcpy(dev->ifalias, alias, len+1);
977 * netdev_features_change - device changes features
978 * @dev: device to cause notification
980 * Called to indicate a device has changed features.
982 void netdev_features_change(struct net_device *dev)
984 call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
986 EXPORT_SYMBOL(netdev_features_change);
989 * netdev_state_change - device changes state
990 * @dev: device to cause notification
992 * Called to indicate a device has changed state. This function calls
993 * the notifier chains for netdev_chain and sends a NEWLINK message
994 * to the routing socket.
996 void netdev_state_change(struct net_device *dev)
998 if (dev->flags & IFF_UP) {
999 call_netdevice_notifiers(NETDEV_CHANGE, dev);
1000 rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
1004 void netdev_bonding_change(struct net_device *dev)
1006 call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, dev);
1008 EXPORT_SYMBOL(netdev_bonding_change);
1011 * dev_load - load a network module
1012 * @net: the applicable net namespace
1013 * @name: name of interface
1015 * If a network interface is not present and the process has suitable
1016 * privileges this function loads the module. If module loading is not
1017 * available in this kernel then it becomes a nop.
1020 void dev_load(struct net *net, const char *name)
1022 struct net_device *dev;
1024 read_lock(&dev_base_lock);
1025 dev = __dev_get_by_name(net, name);
1026 read_unlock(&dev_base_lock);
1028 if (!dev && capable(CAP_SYS_MODULE))
1029 request_module("%s", name);
1033 * dev_open - prepare an interface for use.
1034 * @dev: device to open
1036 * Takes a device from down to up state. The device's private open
1037 * function is invoked and then the multicast lists are loaded. Finally
1038 * the device is moved into the up state and a %NETDEV_UP message is
1039 * sent to the netdev notifier chain.
1041 * Calling this function on an active interface is a nop. On a failure
1042 * a negative errno code is returned.
1044 int dev_open(struct net_device *dev)
1046 const struct net_device_ops *ops = dev->netdev_ops;
1055 if (dev->flags & IFF_UP)
1059 * Is it even present?
1061 if (!netif_device_present(dev))
1065 * Call device private open method
1067 set_bit(__LINK_STATE_START, &dev->state);
1069 if (ops->ndo_validate_addr)
1070 ret = ops->ndo_validate_addr(dev);
1072 if (!ret && ops->ndo_open)
1073 ret = ops->ndo_open(dev);
1076 * If it went open OK then:
1080 clear_bit(__LINK_STATE_START, &dev->state);
1085 dev->flags |= IFF_UP;
1088 * Initialize multicasting status
1090 dev_set_rx_mode(dev);
1093 * Wakeup transmit queue engine
1098 * ... and announce new interface.
1100 call_netdevice_notifiers(NETDEV_UP, dev);
1107 * dev_close - shutdown an interface.
1108 * @dev: device to shutdown
1110 * This function moves an active device into down state. A
1111 * %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
1112 * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
1115 int dev_close(struct net_device *dev)
1117 const struct net_device_ops *ops = dev->netdev_ops;
1122 if (!(dev->flags & IFF_UP))
1126 * Tell people we are going down, so that they can
1127 * prepare to death, when device is still operating.
1129 call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
1131 clear_bit(__LINK_STATE_START, &dev->state);
1133 /* Synchronize to scheduled poll. We cannot touch poll list,
1134 * it can be even on different cpu. So just clear netif_running().
1136 * dev->stop() will invoke napi_disable() on all of it's
1137 * napi_struct instances on this device.
1139 smp_mb__after_clear_bit(); /* Commit netif_running(). */
1141 dev_deactivate(dev);
1144 * Call the device specific close. This cannot fail.
1145 * Only if device is UP
1147 * We allow it to be called even after a DETACH hot-plug
1154 * Device is now down.
1157 dev->flags &= ~IFF_UP;
1160 * Tell people we are down
1162 call_netdevice_notifiers(NETDEV_DOWN, dev);
1169 * dev_disable_lro - disable Large Receive Offload on a device
1172 * Disable Large Receive Offload (LRO) on a net device. Must be
1173 * called under RTNL. This is needed if received packets may be
1174 * forwarded to another interface.
1176 void dev_disable_lro(struct net_device *dev)
1178 if (dev->ethtool_ops && dev->ethtool_ops->get_flags &&
1179 dev->ethtool_ops->set_flags) {
1180 u32 flags = dev->ethtool_ops->get_flags(dev);
1181 if (flags & ETH_FLAG_LRO) {
1182 flags &= ~ETH_FLAG_LRO;
1183 dev->ethtool_ops->set_flags(dev, flags);
1186 WARN_ON(dev->features & NETIF_F_LRO);
1188 EXPORT_SYMBOL(dev_disable_lro);
1191 static int dev_boot_phase = 1;
1194 * Device change register/unregister. These are not inline or static
1195 * as we export them to the world.
1199 * register_netdevice_notifier - register a network notifier block
1202 * Register a notifier to be called when network device events occur.
1203 * The notifier passed is linked into the kernel structures and must
1204 * not be reused until it has been unregistered. A negative errno code
1205 * is returned on a failure.
1207 * When registered all registration and up events are replayed
1208 * to the new notifier to allow device to have a race free
1209 * view of the network device list.
1212 int register_netdevice_notifier(struct notifier_block *nb)
1214 struct net_device *dev;
1215 struct net_device *last;
1220 err = raw_notifier_chain_register(&netdev_chain, nb);
1226 for_each_netdev(net, dev) {
1227 err = nb->notifier_call(nb, NETDEV_REGISTER, dev);
1228 err = notifier_to_errno(err);
1232 if (!(dev->flags & IFF_UP))
1235 nb->notifier_call(nb, NETDEV_UP, dev);
1246 for_each_netdev(net, dev) {
1250 if (dev->flags & IFF_UP) {
1251 nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
1252 nb->notifier_call(nb, NETDEV_DOWN, dev);
1254 nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
1258 raw_notifier_chain_unregister(&netdev_chain, nb);
1263 * unregister_netdevice_notifier - unregister a network notifier block
1266 * Unregister a notifier previously registered by
1267 * register_netdevice_notifier(). The notifier is unlinked into the
1268 * kernel structures and may then be reused. A negative errno code
1269 * is returned on a failure.
1272 int unregister_netdevice_notifier(struct notifier_block *nb)
1277 err = raw_notifier_chain_unregister(&netdev_chain, nb);
1283 * call_netdevice_notifiers - call all network notifier blocks
1284 * @val: value passed unmodified to notifier function
1285 * @dev: net_device pointer passed unmodified to notifier function
1287 * Call all network notifier blocks. Parameters and return value
1288 * are as for raw_notifier_call_chain().
1291 int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
1293 return raw_notifier_call_chain(&netdev_chain, val, dev);
1296 /* When > 0 there are consumers of rx skb time stamps */
1297 static atomic_t netstamp_needed = ATOMIC_INIT(0);
1299 void net_enable_timestamp(void)
1301 atomic_inc(&netstamp_needed);
1304 void net_disable_timestamp(void)
1306 atomic_dec(&netstamp_needed);
1309 static inline void net_timestamp(struct sk_buff *skb)
1311 if (atomic_read(&netstamp_needed))
1312 __net_timestamp(skb);
1314 skb->tstamp.tv64 = 0;
1318 * Support routine. Sends outgoing frames to any network
1319 * taps currently in use.
1322 static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1324 struct packet_type *ptype;
1329 list_for_each_entry_rcu(ptype, &ptype_all, list) {
1330 /* Never send packets back to the socket
1331 * they originated from - MvS (miquels@drinkel.ow.org)
1333 if ((ptype->dev == dev || !ptype->dev) &&
1334 (ptype->af_packet_priv == NULL ||
1335 (struct sock *)ptype->af_packet_priv != skb->sk)) {
1336 struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
1340 /* skb->nh should be correctly
1341 set by sender, so that the second statement is
1342 just protection against buggy protocols.
1344 skb_reset_mac_header(skb2);
1346 if (skb_network_header(skb2) < skb2->data ||
1347 skb2->network_header > skb2->tail) {
1348 if (net_ratelimit())
1349 printk(KERN_CRIT "protocol %04x is "
1351 skb2->protocol, dev->name);
1352 skb_reset_network_header(skb2);
1355 skb2->transport_header = skb2->network_header;
1356 skb2->pkt_type = PACKET_OUTGOING;
1357 ptype->func(skb2, skb->dev, ptype, skb->dev);
1364 static inline void __netif_reschedule(struct Qdisc *q)
1366 struct softnet_data *sd;
1367 unsigned long flags;
1369 local_irq_save(flags);
1370 sd = &__get_cpu_var(softnet_data);
1371 q->next_sched = sd->output_queue;
1372 sd->output_queue = q;
1373 raise_softirq_irqoff(NET_TX_SOFTIRQ);
1374 local_irq_restore(flags);
1377 void __netif_schedule(struct Qdisc *q)
1379 if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))
1380 __netif_reschedule(q);
1382 EXPORT_SYMBOL(__netif_schedule);
1384 void dev_kfree_skb_irq(struct sk_buff *skb)
1386 if (atomic_dec_and_test(&skb->users)) {
1387 struct softnet_data *sd;
1388 unsigned long flags;
1390 local_irq_save(flags);
1391 sd = &__get_cpu_var(softnet_data);
1392 skb->next = sd->completion_queue;
1393 sd->completion_queue = skb;
1394 raise_softirq_irqoff(NET_TX_SOFTIRQ);
1395 local_irq_restore(flags);
1398 EXPORT_SYMBOL(dev_kfree_skb_irq);
1400 void dev_kfree_skb_any(struct sk_buff *skb)
1402 if (in_irq() || irqs_disabled())
1403 dev_kfree_skb_irq(skb);
1407 EXPORT_SYMBOL(dev_kfree_skb_any);
1411 * netif_device_detach - mark device as removed
1412 * @dev: network device
1414 * Mark device as removed from system and therefore no longer available.
1416 void netif_device_detach(struct net_device *dev)
1418 if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
1419 netif_running(dev)) {
1420 netif_stop_queue(dev);
1423 EXPORT_SYMBOL(netif_device_detach);
1426 * netif_device_attach - mark device as attached
1427 * @dev: network device
1429 * Mark device as attached from system and restart if needed.
1431 void netif_device_attach(struct net_device *dev)
1433 if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
1434 netif_running(dev)) {
1435 netif_wake_queue(dev);
1436 __netdev_watchdog_up(dev);
1439 EXPORT_SYMBOL(netif_device_attach);
1441 static bool can_checksum_protocol(unsigned long features, __be16 protocol)
1443 return ((features & NETIF_F_GEN_CSUM) ||
1444 ((features & NETIF_F_IP_CSUM) &&
1445 protocol == htons(ETH_P_IP)) ||
1446 ((features & NETIF_F_IPV6_CSUM) &&
1447 protocol == htons(ETH_P_IPV6)));
1450 static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb)
1452 if (can_checksum_protocol(dev->features, skb->protocol))
1455 if (skb->protocol == htons(ETH_P_8021Q)) {
1456 struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1457 if (can_checksum_protocol(dev->features & dev->vlan_features,
1458 veh->h_vlan_encapsulated_proto))
1466 * Invalidate hardware checksum when packet is to be mangled, and
1467 * complete checksum manually on outgoing path.
1469 int skb_checksum_help(struct sk_buff *skb)
1472 int ret = 0, offset;
1474 if (skb->ip_summed == CHECKSUM_COMPLETE)
1475 goto out_set_summed;
1477 if (unlikely(skb_shinfo(skb)->gso_size)) {
1478 /* Let GSO fix up the checksum. */
1479 goto out_set_summed;
1482 offset = skb->csum_start - skb_headroom(skb);
1483 BUG_ON(offset >= skb_headlen(skb));
1484 csum = skb_checksum(skb, offset, skb->len - offset, 0);
1486 offset += skb->csum_offset;
1487 BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
1489 if (skb_cloned(skb) &&
1490 !skb_clone_writable(skb, offset + sizeof(__sum16))) {
1491 ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1496 *(__sum16 *)(skb->data + offset) = csum_fold(csum);
1498 skb->ip_summed = CHECKSUM_NONE;
1504 * skb_gso_segment - Perform segmentation on skb.
1505 * @skb: buffer to segment
1506 * @features: features for the output path (see dev->features)
1508 * This function segments the given skb and returns a list of segments.
1510 * It may return NULL if the skb requires no segmentation. This is
1511 * only possible when GSO is used for verifying header integrity.
1513 struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
1515 struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
1516 struct packet_type *ptype;
1517 __be16 type = skb->protocol;
1520 skb_reset_mac_header(skb);
1521 skb->mac_len = skb->network_header - skb->mac_header;
1522 __skb_pull(skb, skb->mac_len);
1524 if (WARN_ON(skb->ip_summed != CHECKSUM_PARTIAL)) {
1525 if (skb_header_cloned(skb) &&
1526 (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
1527 return ERR_PTR(err);
1531 list_for_each_entry_rcu(ptype,
1532 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
1533 if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
1534 if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
1535 err = ptype->gso_send_check(skb);
1536 segs = ERR_PTR(err);
1537 if (err || skb_gso_ok(skb, features))
1539 __skb_push(skb, (skb->data -
1540 skb_network_header(skb)));
1542 segs = ptype->gso_segment(skb, features);
1548 __skb_push(skb, skb->data - skb_mac_header(skb));
1553 EXPORT_SYMBOL(skb_gso_segment);
1555 /* Take action when hardware reception checksum errors are detected. */
1557 void netdev_rx_csum_fault(struct net_device *dev)
1559 if (net_ratelimit()) {
1560 printk(KERN_ERR "%s: hw csum failure.\n",
1561 dev ? dev->name : "<unknown>");
1565 EXPORT_SYMBOL(netdev_rx_csum_fault);
1568 /* Actually, we should eliminate this check as soon as we know, that:
1569 * 1. IOMMU is present and allows to map all the memory.
1570 * 2. No high memory really exists on this machine.
1573 static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1575 #ifdef CONFIG_HIGHMEM
1578 if (dev->features & NETIF_F_HIGHDMA)
1581 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1582 if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1590 void (*destructor)(struct sk_buff *skb);
1593 #define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
1595 static void dev_gso_skb_destructor(struct sk_buff *skb)
1597 struct dev_gso_cb *cb;
1600 struct sk_buff *nskb = skb->next;
1602 skb->next = nskb->next;
1605 } while (skb->next);
1607 cb = DEV_GSO_CB(skb);
1609 cb->destructor(skb);
1613 * dev_gso_segment - Perform emulated hardware segmentation on skb.
1614 * @skb: buffer to segment
1616 * This function segments the given skb and stores the list of segments
1619 static int dev_gso_segment(struct sk_buff *skb)
1621 struct net_device *dev = skb->dev;
1622 struct sk_buff *segs;
1623 int features = dev->features & ~(illegal_highdma(dev, skb) ?
1626 segs = skb_gso_segment(skb, features);
1628 /* Verifying header integrity only. */
1633 return PTR_ERR(segs);
1636 DEV_GSO_CB(skb)->destructor = skb->destructor;
1637 skb->destructor = dev_gso_skb_destructor;
1642 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
1643 struct netdev_queue *txq)
1645 const struct net_device_ops *ops = dev->netdev_ops;
1647 prefetch(&dev->netdev_ops->ndo_start_xmit);
1648 if (likely(!skb->next)) {
1649 if (!list_empty(&ptype_all))
1650 dev_queue_xmit_nit(skb, dev);
1652 if (netif_needs_gso(dev, skb)) {
1653 if (unlikely(dev_gso_segment(skb)))
1659 return ops->ndo_start_xmit(skb, dev);
1664 struct sk_buff *nskb = skb->next;
1667 skb->next = nskb->next;
1669 rc = ops->ndo_start_xmit(nskb, dev);
1671 nskb->next = skb->next;
1675 if (unlikely(netif_tx_queue_stopped(txq) && skb->next))
1676 return NETDEV_TX_BUSY;
1677 } while (skb->next);
1679 skb->destructor = DEV_GSO_CB(skb)->destructor;
1686 static u32 simple_tx_hashrnd;
1687 static int simple_tx_hashrnd_initialized = 0;
1689 static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb)
1691 u32 addr1, addr2, ports;
1695 if (unlikely(!simple_tx_hashrnd_initialized)) {
1696 get_random_bytes(&simple_tx_hashrnd, 4);
1697 simple_tx_hashrnd_initialized = 1;
1700 switch (skb->protocol) {
1701 case htons(ETH_P_IP):
1702 if (!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)))
1703 ip_proto = ip_hdr(skb)->protocol;
1704 addr1 = ip_hdr(skb)->saddr;
1705 addr2 = ip_hdr(skb)->daddr;
1706 ihl = ip_hdr(skb)->ihl;
1708 case htons(ETH_P_IPV6):
1709 ip_proto = ipv6_hdr(skb)->nexthdr;
1710 addr1 = ipv6_hdr(skb)->saddr.s6_addr32[3];
1711 addr2 = ipv6_hdr(skb)->daddr.s6_addr32[3];
1726 case IPPROTO_UDPLITE:
1727 ports = *((u32 *) (skb_network_header(skb) + (ihl * 4)));
1735 hash = jhash_3words(addr1, addr2, ports, simple_tx_hashrnd);
1737 return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
1740 static struct netdev_queue *dev_pick_tx(struct net_device *dev,
1741 struct sk_buff *skb)
1743 const struct net_device_ops *ops = dev->netdev_ops;
1744 u16 queue_index = 0;
1746 if (ops->ndo_select_queue)
1747 queue_index = ops->ndo_select_queue(dev, skb);
1748 else if (dev->real_num_tx_queues > 1)
1749 queue_index = simple_tx_hash(dev, skb);
1751 skb_set_queue_mapping(skb, queue_index);
1752 return netdev_get_tx_queue(dev, queue_index);
1756 * dev_queue_xmit - transmit a buffer
1757 * @skb: buffer to transmit
1759 * Queue a buffer for transmission to a network device. The caller must
1760 * have set the device and priority and built the buffer before calling
1761 * this function. The function can be called from an interrupt.
1763 * A negative errno code is returned on a failure. A success does not
1764 * guarantee the frame will be transmitted as it may be dropped due
1765 * to congestion or traffic shaping.
1767 * -----------------------------------------------------------------------------------
1768 * I notice this method can also return errors from the queue disciplines,
1769 * including NET_XMIT_DROP, which is a positive value. So, errors can also
1772 * Regardless of the return value, the skb is consumed, so it is currently
1773 * difficult to retry a send to this method. (You can bump the ref count
1774 * before sending to hold a reference for retry if you are careful.)
1776 * When calling this method, interrupts MUST be enabled. This is because
1777 * the BH enable code must have IRQs enabled so that it will not deadlock.
1780 int dev_queue_xmit(struct sk_buff *skb)
1782 struct net_device *dev = skb->dev;
1783 struct netdev_queue *txq;
1787 /* GSO will handle the following emulations directly. */
1788 if (netif_needs_gso(dev, skb))
1791 if (skb_shinfo(skb)->frag_list &&
1792 !(dev->features & NETIF_F_FRAGLIST) &&
1793 __skb_linearize(skb))
1796 /* Fragmented skb is linearized if device does not support SG,
1797 * or if at least one of fragments is in highmem and device
1798 * does not support DMA from it.
1800 if (skb_shinfo(skb)->nr_frags &&
1801 (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
1802 __skb_linearize(skb))
1805 /* If packet is not checksummed and device does not support
1806 * checksumming for this protocol, complete checksumming here.
1808 if (skb->ip_summed == CHECKSUM_PARTIAL) {
1809 skb_set_transport_header(skb, skb->csum_start -
1811 if (!dev_can_checksum(dev, skb) && skb_checksum_help(skb))
1816 /* Disable soft irqs for various locks below. Also
1817 * stops preemption for RCU.
1821 txq = dev_pick_tx(dev, skb);
1822 q = rcu_dereference(txq->qdisc);
1824 #ifdef CONFIG_NET_CLS_ACT
1825 skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
1828 spinlock_t *root_lock = qdisc_lock(q);
1830 spin_lock(root_lock);
1832 if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
1836 rc = qdisc_enqueue_root(skb, q);
1839 spin_unlock(root_lock);
1844 /* The device has no queue. Common case for software devices:
1845 loopback, all the sorts of tunnels...
1847 Really, it is unlikely that netif_tx_lock protection is necessary
1848 here. (f.e. loopback and IP tunnels are clean ignoring statistics
1850 However, it is possible, that they rely on protection
1853 Check this and shot the lock. It is not prone from deadlocks.
1854 Either shot noqueue qdisc, it is even simpler 8)
1856 if (dev->flags & IFF_UP) {
1857 int cpu = smp_processor_id(); /* ok because BHs are off */
1859 if (txq->xmit_lock_owner != cpu) {
1861 HARD_TX_LOCK(dev, txq, cpu);
1863 if (!netif_tx_queue_stopped(txq)) {
1865 if (!dev_hard_start_xmit(skb, dev, txq)) {
1866 HARD_TX_UNLOCK(dev, txq);
1870 HARD_TX_UNLOCK(dev, txq);
1871 if (net_ratelimit())
1872 printk(KERN_CRIT "Virtual device %s asks to "
1873 "queue packet!\n", dev->name);
1875 /* Recursion is detected! It is possible,
1877 if (net_ratelimit())
1878 printk(KERN_CRIT "Dead loop on virtual device "
1879 "%s, fix it urgently!\n", dev->name);
1884 rcu_read_unlock_bh();
1890 rcu_read_unlock_bh();
1895 /*=======================================================================
1897 =======================================================================*/
1899 int netdev_max_backlog __read_mostly = 1000;
1900 int netdev_budget __read_mostly = 300;
1901 int weight_p __read_mostly = 64; /* old backlog weight */
1903 DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
1907 * netif_rx - post buffer to the network code
1908 * @skb: buffer to post
1910 * This function receives a packet from a device driver and queues it for
1911 * the upper (protocol) levels to process. It always succeeds. The buffer
1912 * may be dropped during processing for congestion control or by the
1916 * NET_RX_SUCCESS (no congestion)
1917 * NET_RX_DROP (packet was dropped)
1921 int netif_rx(struct sk_buff *skb)
1923 struct softnet_data *queue;
1924 unsigned long flags;
1926 /* if netpoll wants it, pretend we never saw it */
1927 if (netpoll_rx(skb))
1930 if (!skb->tstamp.tv64)
1934 * The code is rearranged so that the path is the most
1935 * short when CPU is congested, but is still operating.
1937 local_irq_save(flags);
1938 queue = &__get_cpu_var(softnet_data);
1940 __get_cpu_var(netdev_rx_stat).total++;
1941 if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
1942 if (queue->input_pkt_queue.qlen) {
1944 __skb_queue_tail(&queue->input_pkt_queue, skb);
1945 local_irq_restore(flags);
1946 return NET_RX_SUCCESS;
1949 napi_schedule(&queue->backlog);
1953 __get_cpu_var(netdev_rx_stat).dropped++;
1954 local_irq_restore(flags);
1960 int netif_rx_ni(struct sk_buff *skb)
1965 err = netif_rx(skb);
1966 if (local_softirq_pending())
1973 EXPORT_SYMBOL(netif_rx_ni);
1975 static void net_tx_action(struct softirq_action *h)
1977 struct softnet_data *sd = &__get_cpu_var(softnet_data);
1979 if (sd->completion_queue) {
1980 struct sk_buff *clist;
1982 local_irq_disable();
1983 clist = sd->completion_queue;
1984 sd->completion_queue = NULL;
1988 struct sk_buff *skb = clist;
1989 clist = clist->next;
1991 WARN_ON(atomic_read(&skb->users));
1996 if (sd->output_queue) {
1999 local_irq_disable();
2000 head = sd->output_queue;
2001 sd->output_queue = NULL;
2005 struct Qdisc *q = head;
2006 spinlock_t *root_lock;
2008 head = head->next_sched;
2010 root_lock = qdisc_lock(q);
2011 if (spin_trylock(root_lock)) {
2012 smp_mb__before_clear_bit();
2013 clear_bit(__QDISC_STATE_SCHED,
2016 spin_unlock(root_lock);
2018 if (!test_bit(__QDISC_STATE_DEACTIVATED,
2020 __netif_reschedule(q);
2022 smp_mb__before_clear_bit();
2023 clear_bit(__QDISC_STATE_SCHED,
2031 static inline int deliver_skb(struct sk_buff *skb,
2032 struct packet_type *pt_prev,
2033 struct net_device *orig_dev)
2035 atomic_inc(&skb->users);
2036 return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
2039 #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
2040 /* These hooks defined here for ATM */
2042 struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
2043 unsigned char *addr);
2044 void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent) __read_mostly;
2047 * If bridge module is loaded call bridging hook.
2048 * returns NULL if packet was consumed.
2050 struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
2051 struct sk_buff *skb) __read_mostly;
2052 static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
2053 struct packet_type **pt_prev, int *ret,
2054 struct net_device *orig_dev)
2056 struct net_bridge_port *port;
2058 if (skb->pkt_type == PACKET_LOOPBACK ||
2059 (port = rcu_dereference(skb->dev->br_port)) == NULL)
2063 *ret = deliver_skb(skb, *pt_prev, orig_dev);
2067 return br_handle_frame_hook(port, skb);
2070 #define handle_bridge(skb, pt_prev, ret, orig_dev) (skb)
2073 #if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE)
2074 struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *skb) __read_mostly;
2075 EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook);
2077 static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
2078 struct packet_type **pt_prev,
2080 struct net_device *orig_dev)
2082 if (skb->dev->macvlan_port == NULL)
2086 *ret = deliver_skb(skb, *pt_prev, orig_dev);
2089 return macvlan_handle_frame_hook(skb);
2092 #define handle_macvlan(skb, pt_prev, ret, orig_dev) (skb)
2095 #ifdef CONFIG_NET_CLS_ACT
2096 /* TODO: Maybe we should just force sch_ingress to be compiled in
2097 * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
2098 * a compare and 2 stores extra right now if we dont have it on
2099 * but have CONFIG_NET_CLS_ACT
2100 * NOTE: This doesnt stop any functionality; if you dont have
2101 * the ingress scheduler, you just cant add policies on ingress.
2104 static int ing_filter(struct sk_buff *skb)
2106 struct net_device *dev = skb->dev;
2107 u32 ttl = G_TC_RTTL(skb->tc_verd);
2108 struct netdev_queue *rxq;
2109 int result = TC_ACT_OK;
2112 if (MAX_RED_LOOP < ttl++) {
2114 "Redir loop detected Dropping packet (%d->%d)\n",
2115 skb->iif, dev->ifindex);
2119 skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
2120 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
2122 rxq = &dev->rx_queue;
2125 if (q != &noop_qdisc) {
2126 spin_lock(qdisc_lock(q));
2127 if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
2128 result = qdisc_enqueue_root(skb, q);
2129 spin_unlock(qdisc_lock(q));
2135 static inline struct sk_buff *handle_ing(struct sk_buff *skb,
2136 struct packet_type **pt_prev,
2137 int *ret, struct net_device *orig_dev)
2139 if (skb->dev->rx_queue.qdisc == &noop_qdisc)
2143 *ret = deliver_skb(skb, *pt_prev, orig_dev);
2146 /* Huh? Why does turning on AF_PACKET affect this? */
2147 skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
2150 switch (ing_filter(skb)) {
2164 * netif_nit_deliver - deliver received packets to network taps
2167 * This function is used to deliver incoming packets to network
2168 * taps. It should be used when the normal netif_receive_skb path
2169 * is bypassed, for example because of VLAN acceleration.
2171 void netif_nit_deliver(struct sk_buff *skb)
2173 struct packet_type *ptype;
2175 if (list_empty(&ptype_all))
2178 skb_reset_network_header(skb);
2179 skb_reset_transport_header(skb);
2180 skb->mac_len = skb->network_header - skb->mac_header;
2183 list_for_each_entry_rcu(ptype, &ptype_all, list) {
2184 if (!ptype->dev || ptype->dev == skb->dev)
2185 deliver_skb(skb, ptype, skb->dev);
2191 * netif_receive_skb - process receive buffer from network
2192 * @skb: buffer to process
2194 * netif_receive_skb() is the main receive data processing function.
2195 * It always succeeds. The buffer may be dropped during processing
2196 * for congestion control or by the protocol layers.
2198 * This function may only be called from softirq context and interrupts
2199 * should be enabled.
2201 * Return values (usually ignored):
2202 * NET_RX_SUCCESS: no congestion
2203 * NET_RX_DROP: packet was dropped
2205 int netif_receive_skb(struct sk_buff *skb)
2207 struct packet_type *ptype, *pt_prev;
2208 struct net_device *orig_dev;
2209 struct net_device *null_or_orig;
2210 int ret = NET_RX_DROP;
2213 if (skb->vlan_tci && vlan_hwaccel_do_receive(skb))
2214 return NET_RX_SUCCESS;
2216 /* if we've gotten here through NAPI, check netpoll */
2217 if (netpoll_receive_skb(skb))
2220 if (!skb->tstamp.tv64)
2224 skb->iif = skb->dev->ifindex;
2226 null_or_orig = NULL;
2227 orig_dev = skb->dev;
2228 if (orig_dev->master) {
2229 if (skb_bond_should_drop(skb))
2230 null_or_orig = orig_dev; /* deliver only exact match */
2232 skb->dev = orig_dev->master;
2235 __get_cpu_var(netdev_rx_stat).total++;
2237 skb_reset_network_header(skb);
2238 skb_reset_transport_header(skb);
2239 skb->mac_len = skb->network_header - skb->mac_header;
2245 /* Don't receive packets in an exiting network namespace */
2246 if (!net_alive(dev_net(skb->dev))) {
2251 #ifdef CONFIG_NET_CLS_ACT
2252 if (skb->tc_verd & TC_NCLS) {
2253 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
2258 list_for_each_entry_rcu(ptype, &ptype_all, list) {
2259 if (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
2260 ptype->dev == orig_dev) {
2262 ret = deliver_skb(skb, pt_prev, orig_dev);
2267 #ifdef CONFIG_NET_CLS_ACT
2268 skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
2274 skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
2277 skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev);
2281 type = skb->protocol;
2282 list_for_each_entry_rcu(ptype,
2283 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
2284 if (ptype->type == type &&
2285 (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
2286 ptype->dev == orig_dev)) {
2288 ret = deliver_skb(skb, pt_prev, orig_dev);
2294 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
2297 /* Jamal, now you will not able to escape explaining
2298 * me how you were going to use this. :-)
2308 /* Network device is going away, flush any packets still pending */
2309 static void flush_backlog(void *arg)
2311 struct net_device *dev = arg;
2312 struct softnet_data *queue = &__get_cpu_var(softnet_data);
2313 struct sk_buff *skb, *tmp;
2315 skb_queue_walk_safe(&queue->input_pkt_queue, skb, tmp)
2316 if (skb->dev == dev) {
2317 __skb_unlink(skb, &queue->input_pkt_queue);
2322 static int napi_gro_complete(struct sk_buff *skb)
2324 struct packet_type *ptype;
2325 __be16 type = skb->protocol;
2326 struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
2329 if (!skb_shinfo(skb)->frag_list)
2333 list_for_each_entry_rcu(ptype, head, list) {
2334 if (ptype->type != type || ptype->dev || !ptype->gro_complete)
2337 err = ptype->gro_complete(skb);
2343 WARN_ON(&ptype->list == head);
2345 return NET_RX_SUCCESS;
2349 __skb_push(skb, -skb_network_offset(skb));
2350 return netif_receive_skb(skb);
2353 void napi_gro_flush(struct napi_struct *napi)
2355 struct sk_buff *skb, *next;
2357 for (skb = napi->gro_list; skb; skb = next) {
2360 napi_gro_complete(skb);
2363 napi->gro_list = NULL;
2365 EXPORT_SYMBOL(napi_gro_flush);
2367 int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2369 struct sk_buff **pp = NULL;
2370 struct packet_type *ptype;
2371 __be16 type = skb->protocol;
2372 struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
2377 if (!(skb->dev->features & NETIF_F_GRO))
2381 list_for_each_entry_rcu(ptype, head, list) {
2384 if (ptype->type != type || ptype->dev || !ptype->gro_receive)
2387 skb_reset_network_header(skb);
2388 mac_len = skb->network_header - skb->mac_header;
2389 skb->mac_len = mac_len;
2390 NAPI_GRO_CB(skb)->same_flow = 0;
2391 NAPI_GRO_CB(skb)->flush = 0;
2393 for (p = napi->gro_list; p; p = p->next) {
2395 NAPI_GRO_CB(p)->same_flow =
2396 p->mac_len == mac_len &&
2397 !memcmp(skb_mac_header(p), skb_mac_header(skb),
2399 NAPI_GRO_CB(p)->flush = 0;
2402 pp = ptype->gro_receive(&napi->gro_list, skb);
2407 if (&ptype->list == head)
2410 same_flow = NAPI_GRO_CB(skb)->same_flow;
2413 struct sk_buff *nskb = *pp;
2417 napi_gro_complete(nskb);
2424 if (NAPI_GRO_CB(skb)->flush || count >= MAX_GRO_SKBS) {
2425 __skb_push(skb, -skb_network_offset(skb));
2429 NAPI_GRO_CB(skb)->count = 1;
2430 skb->next = napi->gro_list;
2431 napi->gro_list = skb;
2434 return NET_RX_SUCCESS;
2437 return netif_receive_skb(skb);
2439 EXPORT_SYMBOL(napi_gro_receive);
2441 static int process_backlog(struct napi_struct *napi, int quota)
2444 struct softnet_data *queue = &__get_cpu_var(softnet_data);
2445 unsigned long start_time = jiffies;
2447 napi->weight = weight_p;
2449 struct sk_buff *skb;
2451 local_irq_disable();
2452 skb = __skb_dequeue(&queue->input_pkt_queue);
2454 __napi_complete(napi);
2460 napi_gro_receive(napi, skb);
2461 } while (++work < quota && jiffies == start_time);
2463 napi_gro_flush(napi);
2469 * __napi_schedule - schedule for receive
2470 * @n: entry to schedule
2472 * The entry's receive function will be scheduled to run
2474 void __napi_schedule(struct napi_struct *n)
2476 unsigned long flags;
2478 local_irq_save(flags);
2479 list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list);
2480 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2481 local_irq_restore(flags);
2483 EXPORT_SYMBOL(__napi_schedule);
2485 void __napi_complete(struct napi_struct *n)
2487 BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
2488 BUG_ON(n->gro_list);
2490 list_del(&n->poll_list);
2491 smp_mb__before_clear_bit();
2492 clear_bit(NAPI_STATE_SCHED, &n->state);
2494 EXPORT_SYMBOL(__napi_complete);
2496 void napi_complete(struct napi_struct *n)
2498 unsigned long flags;
2501 * don't let napi dequeue from the cpu poll list
2502 * just in case its running on a different cpu
2504 if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state)))
2508 local_irq_save(flags);
2510 local_irq_restore(flags);
2512 EXPORT_SYMBOL(napi_complete);
2514 void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
2515 int (*poll)(struct napi_struct *, int), int weight)
2517 INIT_LIST_HEAD(&napi->poll_list);
2518 napi->gro_list = NULL;
2520 napi->weight = weight;
2521 list_add(&napi->dev_list, &dev->napi_list);
2522 #ifdef CONFIG_NETPOLL
2524 spin_lock_init(&napi->poll_lock);
2525 napi->poll_owner = -1;
2527 set_bit(NAPI_STATE_SCHED, &napi->state);
2529 EXPORT_SYMBOL(netif_napi_add);
2531 void netif_napi_del(struct napi_struct *napi)
2533 struct sk_buff *skb, *next;
2535 list_del_init(&napi->dev_list);
2537 for (skb = napi->gro_list; skb; skb = next) {
2543 napi->gro_list = NULL;
2545 EXPORT_SYMBOL(netif_napi_del);
2548 static void net_rx_action(struct softirq_action *h)
2550 struct list_head *list = &__get_cpu_var(softnet_data).poll_list;
2551 unsigned long time_limit = jiffies + 2;
2552 int budget = netdev_budget;
2555 local_irq_disable();
2557 while (!list_empty(list)) {
2558 struct napi_struct *n;
2561 /* If softirq window is exhuasted then punt.
2562 * Allow this to run for 2 jiffies since which will allow
2563 * an average latency of 1.5/HZ.
2565 if (unlikely(budget <= 0 || time_after(jiffies, time_limit)))
2570 /* Even though interrupts have been re-enabled, this
2571 * access is safe because interrupts can only add new
2572 * entries to the tail of this list, and only ->poll()
2573 * calls can remove this head entry from the list.
2575 n = list_entry(list->next, struct napi_struct, poll_list);
2577 have = netpoll_poll_lock(n);
2581 /* This NAPI_STATE_SCHED test is for avoiding a race
2582 * with netpoll's poll_napi(). Only the entity which
2583 * obtains the lock and sees NAPI_STATE_SCHED set will
2584 * actually make the ->poll() call. Therefore we avoid
2585 * accidently calling ->poll() when NAPI is not scheduled.
2588 if (test_bit(NAPI_STATE_SCHED, &n->state))
2589 work = n->poll(n, weight);
2591 WARN_ON_ONCE(work > weight);
2595 local_irq_disable();
2597 /* Drivers must not modify the NAPI state if they
2598 * consume the entire weight. In such cases this code
2599 * still "owns" the NAPI instance and therefore can
2600 * move the instance around on the list at-will.
2602 if (unlikely(work == weight)) {
2603 if (unlikely(napi_disable_pending(n)))
2606 list_move_tail(&n->poll_list, list);
2609 netpoll_poll_unlock(have);
2614 #ifdef CONFIG_NET_DMA
2616 * There may not be any more sk_buffs coming right now, so push
2617 * any pending DMA copies to hardware
2619 dma_issue_pending_all();
2625 __get_cpu_var(netdev_rx_stat).time_squeeze++;
2626 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2630 static gifconf_func_t * gifconf_list [NPROTO];
2633 * register_gifconf - register a SIOCGIF handler
2634 * @family: Address family
2635 * @gifconf: Function handler
2637 * Register protocol dependent address dumping routines. The handler
2638 * that is passed must not be freed or reused until it has been replaced
2639 * by another handler.
2641 int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
2643 if (family >= NPROTO)
2645 gifconf_list[family] = gifconf;
2651 * Map an interface index to its name (SIOCGIFNAME)
2655 * We need this ioctl for efficient implementation of the
2656 * if_indextoname() function required by the IPv6 API. Without
2657 * it, we would have to search all the interfaces to find a
2661 static int dev_ifname(struct net *net, struct ifreq __user *arg)
2663 struct net_device *dev;
2667 * Fetch the caller's info block.
2670 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2673 read_lock(&dev_base_lock);
2674 dev = __dev_get_by_index(net, ifr.ifr_ifindex);
2676 read_unlock(&dev_base_lock);
2680 strcpy(ifr.ifr_name, dev->name);
2681 read_unlock(&dev_base_lock);
2683 if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2689 * Perform a SIOCGIFCONF call. This structure will change
2690 * size eventually, and there is nothing I can do about it.
2691 * Thus we will need a 'compatibility mode'.
2694 static int dev_ifconf(struct net *net, char __user *arg)
2697 struct net_device *dev;
2704 * Fetch the caller's info block.
2707 if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
2714 * Loop over the interfaces, and write an info block for each.
2718 for_each_netdev(net, dev) {
2719 for (i = 0; i < NPROTO; i++) {
2720 if (gifconf_list[i]) {
2723 done = gifconf_list[i](dev, NULL, 0);
2725 done = gifconf_list[i](dev, pos + total,
2735 * All done. Write the updated control block back to the caller.
2737 ifc.ifc_len = total;
2740 * Both BSD and Solaris return 0 here, so we do too.
2742 return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
2745 #ifdef CONFIG_PROC_FS
2747 * This is invoked by the /proc filesystem handler to display a device
2750 void *dev_seq_start(struct seq_file *seq, loff_t *pos)
2751 __acquires(dev_base_lock)
2753 struct net *net = seq_file_net(seq);
2755 struct net_device *dev;
2757 read_lock(&dev_base_lock);
2759 return SEQ_START_TOKEN;
2762 for_each_netdev(net, dev)
2769 void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2771 struct net *net = seq_file_net(seq);
2773 return v == SEQ_START_TOKEN ?
2774 first_net_device(net) : next_net_device((struct net_device *)v);
2777 void dev_seq_stop(struct seq_file *seq, void *v)
2778 __releases(dev_base_lock)
2780 read_unlock(&dev_base_lock);
2783 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
2785 const struct net_device_stats *stats = dev_get_stats(dev);
2787 seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
2788 "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
2789 dev->name, stats->rx_bytes, stats->rx_packets,
2791 stats->rx_dropped + stats->rx_missed_errors,
2792 stats->rx_fifo_errors,
2793 stats->rx_length_errors + stats->rx_over_errors +
2794 stats->rx_crc_errors + stats->rx_frame_errors,
2795 stats->rx_compressed, stats->multicast,
2796 stats->tx_bytes, stats->tx_packets,
2797 stats->tx_errors, stats->tx_dropped,
2798 stats->tx_fifo_errors, stats->collisions,
2799 stats->tx_carrier_errors +
2800 stats->tx_aborted_errors +
2801 stats->tx_window_errors +
2802 stats->tx_heartbeat_errors,
2803 stats->tx_compressed);
2807 * Called from the PROCfs module. This now uses the new arbitrary sized
2808 * /proc/net interface to create /proc/net/dev
2810 static int dev_seq_show(struct seq_file *seq, void *v)
2812 if (v == SEQ_START_TOKEN)
2813 seq_puts(seq, "Inter-| Receive "
2815 " face |bytes packets errs drop fifo frame "
2816 "compressed multicast|bytes packets errs "
2817 "drop fifo colls carrier compressed\n");
2819 dev_seq_printf_stats(seq, v);
2823 static struct netif_rx_stats *softnet_get_online(loff_t *pos)
2825 struct netif_rx_stats *rc = NULL;
2827 while (*pos < nr_cpu_ids)
2828 if (cpu_online(*pos)) {
2829 rc = &per_cpu(netdev_rx_stat, *pos);
2836 static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
2838 return softnet_get_online(pos);
2841 static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2844 return softnet_get_online(pos);
2847 static void softnet_seq_stop(struct seq_file *seq, void *v)
2851 static int softnet_seq_show(struct seq_file *seq, void *v)
2853 struct netif_rx_stats *s = v;
2855 seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
2856 s->total, s->dropped, s->time_squeeze, 0,
2857 0, 0, 0, 0, /* was fastroute */
2862 static const struct seq_operations dev_seq_ops = {
2863 .start = dev_seq_start,
2864 .next = dev_seq_next,
2865 .stop = dev_seq_stop,
2866 .show = dev_seq_show,
2869 static int dev_seq_open(struct inode *inode, struct file *file)
2871 return seq_open_net(inode, file, &dev_seq_ops,
2872 sizeof(struct seq_net_private));
2875 static const struct file_operations dev_seq_fops = {
2876 .owner = THIS_MODULE,
2877 .open = dev_seq_open,
2879 .llseek = seq_lseek,
2880 .release = seq_release_net,
2883 static const struct seq_operations softnet_seq_ops = {
2884 .start = softnet_seq_start,
2885 .next = softnet_seq_next,
2886 .stop = softnet_seq_stop,
2887 .show = softnet_seq_show,
2890 static int softnet_seq_open(struct inode *inode, struct file *file)
2892 return seq_open(file, &softnet_seq_ops);
2895 static const struct file_operations softnet_seq_fops = {
2896 .owner = THIS_MODULE,
2897 .open = softnet_seq_open,
2899 .llseek = seq_lseek,
2900 .release = seq_release,
2903 static void *ptype_get_idx(loff_t pos)
2905 struct packet_type *pt = NULL;
2909 list_for_each_entry_rcu(pt, &ptype_all, list) {
2915 for (t = 0; t < PTYPE_HASH_SIZE; t++) {
2916 list_for_each_entry_rcu(pt, &ptype_base[t], list) {
2925 static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
2929 return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
2932 static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2934 struct packet_type *pt;
2935 struct list_head *nxt;
2939 if (v == SEQ_START_TOKEN)
2940 return ptype_get_idx(0);
2943 nxt = pt->list.next;
2944 if (pt->type == htons(ETH_P_ALL)) {
2945 if (nxt != &ptype_all)
2948 nxt = ptype_base[0].next;
2950 hash = ntohs(pt->type) & PTYPE_HASH_MASK;
2952 while (nxt == &ptype_base[hash]) {
2953 if (++hash >= PTYPE_HASH_SIZE)
2955 nxt = ptype_base[hash].next;
2958 return list_entry(nxt, struct packet_type, list);
2961 static void ptype_seq_stop(struct seq_file *seq, void *v)
2967 static int ptype_seq_show(struct seq_file *seq, void *v)
2969 struct packet_type *pt = v;
2971 if (v == SEQ_START_TOKEN)
2972 seq_puts(seq, "Type Device Function\n");
2973 else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) {
2974 if (pt->type == htons(ETH_P_ALL))
2975 seq_puts(seq, "ALL ");
2977 seq_printf(seq, "%04x", ntohs(pt->type));
2979 seq_printf(seq, " %-8s %pF\n",
2980 pt->dev ? pt->dev->name : "", pt->func);
2986 static const struct seq_operations ptype_seq_ops = {
2987 .start = ptype_seq_start,
2988 .next = ptype_seq_next,
2989 .stop = ptype_seq_stop,
2990 .show = ptype_seq_show,
2993 static int ptype_seq_open(struct inode *inode, struct file *file)
2995 return seq_open_net(inode, file, &ptype_seq_ops,
2996 sizeof(struct seq_net_private));
2999 static const struct file_operations ptype_seq_fops = {
3000 .owner = THIS_MODULE,
3001 .open = ptype_seq_open,
3003 .llseek = seq_lseek,
3004 .release = seq_release_net,
3008 static int __net_init dev_proc_net_init(struct net *net)
3012 if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops))
3014 if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops))
3016 if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops))
3019 if (wext_proc_init(net))
3025 proc_net_remove(net, "ptype");
3027 proc_net_remove(net, "softnet_stat");
3029 proc_net_remove(net, "dev");
3033 static void __net_exit dev_proc_net_exit(struct net *net)
3035 wext_proc_exit(net);
3037 proc_net_remove(net, "ptype");
3038 proc_net_remove(net, "softnet_stat");
3039 proc_net_remove(net, "dev");
3042 static struct pernet_operations __net_initdata dev_proc_ops = {
3043 .init = dev_proc_net_init,
3044 .exit = dev_proc_net_exit,
3047 static int __init dev_proc_init(void)
3049 return register_pernet_subsys(&dev_proc_ops);
3052 #define dev_proc_init() 0
3053 #endif /* CONFIG_PROC_FS */
3057 * netdev_set_master - set up master/slave pair
3058 * @slave: slave device
3059 * @master: new master device
3061 * Changes the master device of the slave. Pass %NULL to break the
3062 * bonding. The caller must hold the RTNL semaphore. On a failure
3063 * a negative errno code is returned. On success the reference counts
3064 * are adjusted, %RTM_NEWLINK is sent to the routing socket and the
3065 * function returns zero.
3067 int netdev_set_master(struct net_device *slave, struct net_device *master)
3069 struct net_device *old = slave->master;
3079 slave->master = master;
3087 slave->flags |= IFF_SLAVE;
3089 slave->flags &= ~IFF_SLAVE;
3091 rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
3095 static void dev_change_rx_flags(struct net_device *dev, int flags)
3097 const struct net_device_ops *ops = dev->netdev_ops;
3099 if ((dev->flags & IFF_UP) && ops->ndo_change_rx_flags)
3100 ops->ndo_change_rx_flags(dev, flags);
3103 static int __dev_set_promiscuity(struct net_device *dev, int inc)
3105 unsigned short old_flags = dev->flags;
3111 dev->flags |= IFF_PROMISC;
3112 dev->promiscuity += inc;
3113 if (dev->promiscuity == 0) {
3116 * If inc causes overflow, untouch promisc and return error.
3119 dev->flags &= ~IFF_PROMISC;
3121 dev->promiscuity -= inc;
3122 printk(KERN_WARNING "%s: promiscuity touches roof, "
3123 "set promiscuity failed, promiscuity feature "
3124 "of device might be broken.\n", dev->name);
3128 if (dev->flags != old_flags) {
3129 printk(KERN_INFO "device %s %s promiscuous mode\n",
3130 dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
3132 if (audit_enabled) {
3133 current_uid_gid(&uid, &gid);
3134 audit_log(current->audit_context, GFP_ATOMIC,
3135 AUDIT_ANOM_PROMISCUOUS,
3136 "dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
3137 dev->name, (dev->flags & IFF_PROMISC),
3138 (old_flags & IFF_PROMISC),
3139 audit_get_loginuid(current),
3141 audit_get_sessionid(current));
3144 dev_change_rx_flags(dev, IFF_PROMISC);
3150 * dev_set_promiscuity - update promiscuity count on a device
3154 * Add or remove promiscuity from a device. While the count in the device
3155 * remains above zero the interface remains promiscuous. Once it hits zero
3156 * the device reverts back to normal filtering operation. A negative inc
3157 * value is used to drop promiscuity on the device.
3158 * Return 0 if successful or a negative errno code on error.
3160 int dev_set_promiscuity(struct net_device *dev, int inc)
3162 unsigned short old_flags = dev->flags;
3165 err = __dev_set_promiscuity(dev, inc);
3168 if (dev->flags != old_flags)
3169 dev_set_rx_mode(dev);
3174 * dev_set_allmulti - update allmulti count on a device
3178 * Add or remove reception of all multicast frames to a device. While the
3179 * count in the device remains above zero the interface remains listening
3180 * to all interfaces. Once it hits zero the device reverts back to normal
3181 * filtering operation. A negative @inc value is used to drop the counter
3182 * when releasing a resource needing all multicasts.
3183 * Return 0 if successful or a negative errno code on error.
3186 int dev_set_allmulti(struct net_device *dev, int inc)
3188 unsigned short old_flags = dev->flags;
3192 dev->flags |= IFF_ALLMULTI;
3193 dev->allmulti += inc;
3194 if (dev->allmulti == 0) {
3197 * If inc causes overflow, untouch allmulti and return error.
3200 dev->flags &= ~IFF_ALLMULTI;
3202 dev->allmulti -= inc;
3203 printk(KERN_WARNING "%s: allmulti touches roof, "
3204 "set allmulti failed, allmulti feature of "
3205 "device might be broken.\n", dev->name);
3209 if (dev->flags ^ old_flags) {
3210 dev_change_rx_flags(dev, IFF_ALLMULTI);
3211 dev_set_rx_mode(dev);
3217 * Upload unicast and multicast address lists to device and
3218 * configure RX filtering. When the device doesn't support unicast
3219 * filtering it is put in promiscuous mode while unicast addresses
3222 void __dev_set_rx_mode(struct net_device *dev)
3224 const struct net_device_ops *ops = dev->netdev_ops;
3226 /* dev_open will call this function so the list will stay sane. */
3227 if (!(dev->flags&IFF_UP))
3230 if (!netif_device_present(dev))
3233 if (ops->ndo_set_rx_mode)
3234 ops->ndo_set_rx_mode(dev);
3236 /* Unicast addresses changes may only happen under the rtnl,
3237 * therefore calling __dev_set_promiscuity here is safe.
3239 if (dev->uc_count > 0 && !dev->uc_promisc) {
3240 __dev_set_promiscuity(dev, 1);
3241 dev->uc_promisc = 1;
3242 } else if (dev->uc_count == 0 && dev->uc_promisc) {
3243 __dev_set_promiscuity(dev, -1);
3244 dev->uc_promisc = 0;
3247 if (ops->ndo_set_multicast_list)
3248 ops->ndo_set_multicast_list(dev);
3252 void dev_set_rx_mode(struct net_device *dev)
3254 netif_addr_lock_bh(dev);
3255 __dev_set_rx_mode(dev);
3256 netif_addr_unlock_bh(dev);
3259 int __dev_addr_delete(struct dev_addr_list **list, int *count,
3260 void *addr, int alen, int glbl)
3262 struct dev_addr_list *da;
3264 for (; (da = *list) != NULL; list = &da->next) {
3265 if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
3266 alen == da->da_addrlen) {
3268 int old_glbl = da->da_gusers;
3285 int __dev_addr_add(struct dev_addr_list **list, int *count,
3286 void *addr, int alen, int glbl)
3288 struct dev_addr_list *da;
3290 for (da = *list; da != NULL; da = da->next) {
3291 if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
3292 da->da_addrlen == alen) {
3294 int old_glbl = da->da_gusers;
3304 da = kzalloc(sizeof(*da), GFP_ATOMIC);
3307 memcpy(da->da_addr, addr, alen);
3308 da->da_addrlen = alen;
3310 da->da_gusers = glbl ? 1 : 0;
3318 * dev_unicast_delete - Release secondary unicast address.
3320 * @addr: address to delete
3321 * @alen: length of @addr
3323 * Release reference to a secondary unicast address and remove it
3324 * from the device if the reference count drops to zero.
3326 * The caller must hold the rtnl_mutex.
3328 int dev_unicast_delete(struct net_device *dev, void *addr, int alen)
3334 netif_addr_lock_bh(dev);
3335 err = __dev_addr_delete(&dev->uc_list, &dev->uc_count, addr, alen, 0);
3337 __dev_set_rx_mode(dev);
3338 netif_addr_unlock_bh(dev);
3341 EXPORT_SYMBOL(dev_unicast_delete);
3344 * dev_unicast_add - add a secondary unicast address
3346 * @addr: address to add
3347 * @alen: length of @addr
3349 * Add a secondary unicast address to the device or increase
3350 * the reference count if it already exists.
3352 * The caller must hold the rtnl_mutex.
3354 int dev_unicast_add(struct net_device *dev, void *addr, int alen)
3360 netif_addr_lock_bh(dev);
3361 err = __dev_addr_add(&dev->uc_list, &dev->uc_count, addr, alen, 0);
3363 __dev_set_rx_mode(dev);
3364 netif_addr_unlock_bh(dev);
3367 EXPORT_SYMBOL(dev_unicast_add);
3369 int __dev_addr_sync(struct dev_addr_list **to, int *to_count,
3370 struct dev_addr_list **from, int *from_count)
3372 struct dev_addr_list *da, *next;
3376 while (da != NULL) {
3378 if (!da->da_synced) {
3379 err = __dev_addr_add(to, to_count,
3380 da->da_addr, da->da_addrlen, 0);
3385 } else if (da->da_users == 1) {
3386 __dev_addr_delete(to, to_count,
3387 da->da_addr, da->da_addrlen, 0);
3388 __dev_addr_delete(from, from_count,
3389 da->da_addr, da->da_addrlen, 0);
3396 void __dev_addr_unsync(struct dev_addr_list **to, int *to_count,
3397 struct dev_addr_list **from, int *from_count)
3399 struct dev_addr_list *da, *next;
3402 while (da != NULL) {
3404 if (da->da_synced) {
3405 __dev_addr_delete(to, to_count,
3406 da->da_addr, da->da_addrlen, 0);
3408 __dev_addr_delete(from, from_count,
3409 da->da_addr, da->da_addrlen, 0);
3416 * dev_unicast_sync - Synchronize device's unicast list to another device
3417 * @to: destination device
3418 * @from: source device
3420 * Add newly added addresses to the destination device and release
3421 * addresses that have no users left. The source device must be
3422 * locked by netif_tx_lock_bh.
3424 * This function is intended to be called from the dev->set_rx_mode
3425 * function of layered software devices.
3427 int dev_unicast_sync(struct net_device *to, struct net_device *from)
3431 netif_addr_lock_bh(to);
3432 err = __dev_addr_sync(&to->uc_list, &to->uc_count,
3433 &from->uc_list, &from->uc_count);
3435 __dev_set_rx_mode(to);
3436 netif_addr_unlock_bh(to);
3439 EXPORT_SYMBOL(dev_unicast_sync);
3442 * dev_unicast_unsync - Remove synchronized addresses from the destination device
3443 * @to: destination device
3444 * @from: source device
3446 * Remove all addresses that were added to the destination device by
3447 * dev_unicast_sync(). This function is intended to be called from the
3448 * dev->stop function of layered software devices.
3450 void dev_unicast_unsync(struct net_device *to, struct net_device *from)
3452 netif_addr_lock_bh(from);
3453 netif_addr_lock(to);
3455 __dev_addr_unsync(&to->uc_list, &to->uc_count,
3456 &from->uc_list, &from->uc_count);
3457 __dev_set_rx_mode(to);
3459 netif_addr_unlock(to);
3460 netif_addr_unlock_bh(from);
3462 EXPORT_SYMBOL(dev_unicast_unsync);
3464 static void __dev_addr_discard(struct dev_addr_list **list)
3466 struct dev_addr_list *tmp;
3468 while (*list != NULL) {
3471 if (tmp->da_users > tmp->da_gusers)
3472 printk("__dev_addr_discard: address leakage! "
3473 "da_users=%d\n", tmp->da_users);
3478 static void dev_addr_discard(struct net_device *dev)
3480 netif_addr_lock_bh(dev);
3482 __dev_addr_discard(&dev->uc_list);
3485 __dev_addr_discard(&dev->mc_list);
3488 netif_addr_unlock_bh(dev);
3492 * dev_get_flags - get flags reported to userspace
3495 * Get the combination of flag bits exported through APIs to userspace.
3497 unsigned dev_get_flags(const struct net_device *dev)
3501 flags = (dev->flags & ~(IFF_PROMISC |
3506 (dev->gflags & (IFF_PROMISC |
3509 if (netif_running(dev)) {
3510 if (netif_oper_up(dev))
3511 flags |= IFF_RUNNING;
3512 if (netif_carrier_ok(dev))
3513 flags |= IFF_LOWER_UP;
3514 if (netif_dormant(dev))
3515 flags |= IFF_DORMANT;
3522 * dev_change_flags - change device settings
3524 * @flags: device state flags
3526 * Change settings on device based state flags. The flags are
3527 * in the userspace exported format.
3529 int dev_change_flags(struct net_device *dev, unsigned flags)
3532 int old_flags = dev->flags;
3537 * Set the flags on our device.
3540 dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
3541 IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
3543 (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
3547 * Load in the correct multicast list now the flags have changed.
3550 if ((old_flags ^ flags) & IFF_MULTICAST)
3551 dev_change_rx_flags(dev, IFF_MULTICAST);
3553 dev_set_rx_mode(dev);
3556 * Have we downed the interface. We handle IFF_UP ourselves
3557 * according to user attempts to set it, rather than blindly
3562 if ((old_flags ^ flags) & IFF_UP) { /* Bit is different ? */
3563 ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
3566 dev_set_rx_mode(dev);
3569 if (dev->flags & IFF_UP &&
3570 ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
3572 call_netdevice_notifiers(NETDEV_CHANGE, dev);
3574 if ((flags ^ dev->gflags) & IFF_PROMISC) {
3575 int inc = (flags & IFF_PROMISC) ? +1 : -1;
3576 dev->gflags ^= IFF_PROMISC;
3577 dev_set_promiscuity(dev, inc);
3580 /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
3581 is important. Some (broken) drivers set IFF_PROMISC, when
3582 IFF_ALLMULTI is requested not asking us and not reporting.
3584 if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
3585 int inc = (flags & IFF_ALLMULTI) ? +1 : -1;
3586 dev->gflags ^= IFF_ALLMULTI;
3587 dev_set_allmulti(dev, inc);
3590 /* Exclude state transition flags, already notified */
3591 changes = (old_flags ^ dev->flags) & ~(IFF_UP | IFF_RUNNING);
3593 rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
3599 * dev_set_mtu - Change maximum transfer unit
3601 * @new_mtu: new transfer unit
3603 * Change the maximum transfer size of the network device.
3605 int dev_set_mtu(struct net_device *dev, int new_mtu)
3607 const struct net_device_ops *ops = dev->netdev_ops;
3610 if (new_mtu == dev->mtu)
3613 /* MTU must be positive. */
3617 if (!netif_device_present(dev))
3621 if (ops->ndo_change_mtu)
3622 err = ops->ndo_change_mtu(dev, new_mtu);
3626 if (!err && dev->flags & IFF_UP)
3627 call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
3632 * dev_set_mac_address - Change Media Access Control Address
3636 * Change the hardware (MAC) address of the device
3638 int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
3640 const struct net_device_ops *ops = dev->netdev_ops;
3643 if (!ops->ndo_set_mac_address)
3645 if (sa->sa_family != dev->type)
3647 if (!netif_device_present(dev))
3649 err = ops->ndo_set_mac_address(dev, sa);
3651 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
3656 * Perform the SIOCxIFxxx calls, inside read_lock(dev_base_lock)
3658 static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd)
3661 struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
3667 case SIOCGIFFLAGS: /* Get interface flags */
3668 ifr->ifr_flags = dev_get_flags(dev);
3671 case SIOCGIFMETRIC: /* Get the metric on the interface
3672 (currently unused) */
3673 ifr->ifr_metric = 0;
3676 case SIOCGIFMTU: /* Get the MTU of a device */
3677 ifr->ifr_mtu = dev->mtu;
3682 memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
3684 memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
3685 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
3686 ifr->ifr_hwaddr.sa_family = dev->type;
3694 ifr->ifr_map.mem_start = dev->mem_start;
3695 ifr->ifr_map.mem_end = dev->mem_end;
3696 ifr->ifr_map.base_addr = dev->base_addr;
3697 ifr->ifr_map.irq = dev->irq;
3698 ifr->ifr_map.dma = dev->dma;
3699 ifr->ifr_map.port = dev->if_port;
3703 ifr->ifr_ifindex = dev->ifindex;
3707 ifr->ifr_qlen = dev->tx_queue_len;
3711 /* dev_ioctl() should ensure this case
3723 * Perform the SIOCxIFxxx calls, inside rtnl_lock()
3725 static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
3728 struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
3729 const struct net_device_ops *ops;
3734 ops = dev->netdev_ops;
3737 case SIOCSIFFLAGS: /* Set interface flags */
3738 return dev_change_flags(dev, ifr->ifr_flags);
3740 case SIOCSIFMETRIC: /* Set the metric on the interface
3741 (currently unused) */
3744 case SIOCSIFMTU: /* Set the MTU of a device */
3745 return dev_set_mtu(dev, ifr->ifr_mtu);
3748 return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
3750 case SIOCSIFHWBROADCAST:
3751 if (ifr->ifr_hwaddr.sa_family != dev->type)
3753 memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
3754 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
3755 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
3759 if (ops->ndo_set_config) {
3760 if (!netif_device_present(dev))
3762 return ops->ndo_set_config(dev, &ifr->ifr_map);
3767 if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
3768 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
3770 if (!netif_device_present(dev))
3772 return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
3776 if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
3777 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
3779 if (!netif_device_present(dev))
3781 return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
3785 if (ifr->ifr_qlen < 0)
3787 dev->tx_queue_len = ifr->ifr_qlen;
3791 ifr->ifr_newname[IFNAMSIZ-1] = '\0';
3792 return dev_change_name(dev, ifr->ifr_newname);
3795 * Unknown or private ioctl
3799 if ((cmd >= SIOCDEVPRIVATE &&
3800 cmd <= SIOCDEVPRIVATE + 15) ||
3801 cmd == SIOCBONDENSLAVE ||
3802 cmd == SIOCBONDRELEASE ||
3803 cmd == SIOCBONDSETHWADDR ||
3804 cmd == SIOCBONDSLAVEINFOQUERY ||
3805 cmd == SIOCBONDINFOQUERY ||
3806 cmd == SIOCBONDCHANGEACTIVE ||
3807 cmd == SIOCGMIIPHY ||
3808 cmd == SIOCGMIIREG ||
3809 cmd == SIOCSMIIREG ||
3810 cmd == SIOCBRADDIF ||
3811 cmd == SIOCBRDELIF ||
3812 cmd == SIOCWANDEV) {
3814 if (ops->ndo_do_ioctl) {
3815 if (netif_device_present(dev))
3816 err = ops->ndo_do_ioctl(dev, ifr, cmd);
3828 * This function handles all "interface"-type I/O control requests. The actual
3829 * 'doing' part of this is dev_ifsioc above.
3833 * dev_ioctl - network device ioctl
3834 * @net: the applicable net namespace
3835 * @cmd: command to issue
3836 * @arg: pointer to a struct ifreq in user space
3838 * Issue ioctl functions to devices. This is normally called by the
3839 * user space syscall interfaces but can sometimes be useful for
3840 * other purposes. The return value is the return from the syscall if
3841 * positive or a negative errno code on error.
3844 int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
3850 /* One special case: SIOCGIFCONF takes ifconf argument
3851 and requires shared lock, because it sleeps writing
3855 if (cmd == SIOCGIFCONF) {
3857 ret = dev_ifconf(net, (char __user *) arg);
3861 if (cmd == SIOCGIFNAME)
3862 return dev_ifname(net, (struct ifreq __user *)arg);
3864 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
3867 ifr.ifr_name[IFNAMSIZ-1] = 0;
3869 colon = strchr(ifr.ifr_name, ':');
3874 * See which interface the caller is talking about.
3879 * These ioctl calls:
3880 * - can be done by all.
3881 * - atomic and do not require locking.
3892 dev_load(net, ifr.ifr_name);
3893 read_lock(&dev_base_lock);
3894 ret = dev_ifsioc_locked(net, &ifr, cmd);
3895 read_unlock(&dev_base_lock);
3899 if (copy_to_user(arg, &ifr,
3900 sizeof(struct ifreq)))
3906 dev_load(net, ifr.ifr_name);
3908 ret = dev_ethtool(net, &ifr);
3913 if (copy_to_user(arg, &ifr,
3914 sizeof(struct ifreq)))
3920 * These ioctl calls:
3921 * - require superuser power.
3922 * - require strict serialization.
3928 if (!capable(CAP_NET_ADMIN))
3930 dev_load(net, ifr.ifr_name);
3932 ret = dev_ifsioc(net, &ifr, cmd);
3937 if (copy_to_user(arg, &ifr,
3938 sizeof(struct ifreq)))
3944 * These ioctl calls:
3945 * - require superuser power.
3946 * - require strict serialization.
3947 * - do not return a value
3957 case SIOCSIFHWBROADCAST:
3960 case SIOCBONDENSLAVE:
3961 case SIOCBONDRELEASE:
3962 case SIOCBONDSETHWADDR:
3963 case SIOCBONDCHANGEACTIVE:
3966 if (!capable(CAP_NET_ADMIN))
3969 case SIOCBONDSLAVEINFOQUERY:
3970 case SIOCBONDINFOQUERY:
3971 dev_load(net, ifr.ifr_name);
3973 ret = dev_ifsioc(net, &ifr, cmd);
3978 /* Get the per device memory space. We can add this but
3979 * currently do not support it */
3981 /* Set the per device memory buffer space.
3982 * Not applicable in our case */
3987 * Unknown or private ioctl.
3990 if (cmd == SIOCWANDEV ||
3991 (cmd >= SIOCDEVPRIVATE &&
3992 cmd <= SIOCDEVPRIVATE + 15)) {
3993 dev_load(net, ifr.ifr_name);
3995 ret = dev_ifsioc(net, &ifr, cmd);
3997 if (!ret && copy_to_user(arg, &ifr,
3998 sizeof(struct ifreq)))
4002 /* Take care of Wireless Extensions */
4003 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
4004 return wext_handle_ioctl(net, &ifr, cmd, arg);
4011 * dev_new_index - allocate an ifindex
4012 * @net: the applicable net namespace
4014 * Returns a suitable unique value for a new device interface
4015 * number. The caller must hold the rtnl semaphore or the
4016 * dev_base_lock to be sure it remains unique.
4018 static int dev_new_index(struct net *net)
4024 if (!__dev_get_by_index(net, ifindex))
4029 /* Delayed registration/unregisteration */
4030 static LIST_HEAD(net_todo_list);
4032 static void net_set_todo(struct net_device *dev)
4034 list_add_tail(&dev->todo_list, &net_todo_list);
4037 static void rollback_registered(struct net_device *dev)
4039 BUG_ON(dev_boot_phase);
4042 /* Some devices call without registering for initialization unwind. */
4043 if (dev->reg_state == NETREG_UNINITIALIZED) {
4044 printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
4045 "was registered\n", dev->name, dev);
4051 BUG_ON(dev->reg_state != NETREG_REGISTERED);
4053 /* If device is running, close it first. */
4056 /* And unlink it from device chain. */
4057 unlist_netdevice(dev);
4059 dev->reg_state = NETREG_UNREGISTERING;
4063 /* Shutdown queueing discipline. */
4067 /* Notify protocols, that we are about to destroy
4068 this device. They should clean all the things.
4070 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
4073 * Flush the unicast and multicast chains
4075 dev_addr_discard(dev);
4077 if (dev->netdev_ops->ndo_uninit)
4078 dev->netdev_ops->ndo_uninit(dev);
4080 /* Notifier chain MUST detach us from master device. */
4081 WARN_ON(dev->master);
4083 /* Remove entries from kobject tree */
4084 netdev_unregister_kobject(dev);
4091 static void __netdev_init_queue_locks_one(struct net_device *dev,
4092 struct netdev_queue *dev_queue,
4095 spin_lock_init(&dev_queue->_xmit_lock);
4096 netdev_set_xmit_lockdep_class(&dev_queue->_xmit_lock, dev->type);
4097 dev_queue->xmit_lock_owner = -1;
4100 static void netdev_init_queue_locks(struct net_device *dev)
4102 netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL);
4103 __netdev_init_queue_locks_one(dev, &dev->rx_queue, NULL);
4106 unsigned long netdev_fix_features(unsigned long features, const char *name)
4108 /* Fix illegal SG+CSUM combinations. */
4109 if ((features & NETIF_F_SG) &&
4110 !(features & NETIF_F_ALL_CSUM)) {
4112 printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no "
4113 "checksum feature.\n", name);
4114 features &= ~NETIF_F_SG;
4117 /* TSO requires that SG is present as well. */
4118 if ((features & NETIF_F_TSO) && !(features & NETIF_F_SG)) {
4120 printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no "
4121 "SG feature.\n", name);
4122 features &= ~NETIF_F_TSO;
4125 if (features & NETIF_F_UFO) {
4126 if (!(features & NETIF_F_GEN_CSUM)) {
4128 printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
4129 "since no NETIF_F_HW_CSUM feature.\n",
4131 features &= ~NETIF_F_UFO;
4134 if (!(features & NETIF_F_SG)) {
4136 printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
4137 "since no NETIF_F_SG feature.\n", name);
4138 features &= ~NETIF_F_UFO;
4144 EXPORT_SYMBOL(netdev_fix_features);
4147 * register_netdevice - register a network device
4148 * @dev: device to register
4150 * Take a completed network device structure and add it to the kernel
4151 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
4152 * chain. 0 is returned on success. A negative errno code is returned
4153 * on a failure to set up the device, or if the name is a duplicate.
4155 * Callers must hold the rtnl semaphore. You may want
4156 * register_netdev() instead of this.
4159 * The locking appears insufficient to guarantee two parallel registers
4160 * will not get the same name.
4163 int register_netdevice(struct net_device *dev)
4165 struct hlist_head *head;
4166 struct hlist_node *p;
4168 struct net *net = dev_net(dev);
4170 BUG_ON(dev_boot_phase);
4175 /* When net_device's are persistent, this will be fatal. */
4176 BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
4179 spin_lock_init(&dev->addr_list_lock);
4180 netdev_set_addr_lockdep_class(dev);
4181 netdev_init_queue_locks(dev);
4185 #ifdef CONFIG_COMPAT_NET_DEV_OPS
4186 /* Netdevice_ops API compatiability support.
4187 * This is temporary until all network devices are converted.
4189 if (dev->netdev_ops) {
4190 const struct net_device_ops *ops = dev->netdev_ops;
4192 dev->init = ops->ndo_init;
4193 dev->uninit = ops->ndo_uninit;
4194 dev->open = ops->ndo_open;
4195 dev->change_rx_flags = ops->ndo_change_rx_flags;
4196 dev->set_rx_mode = ops->ndo_set_rx_mode;
4197 dev->set_multicast_list = ops->ndo_set_multicast_list;
4198 dev->set_mac_address = ops->ndo_set_mac_address;
4199 dev->validate_addr = ops->ndo_validate_addr;
4200 dev->do_ioctl = ops->ndo_do_ioctl;
4201 dev->set_config = ops->ndo_set_config;
4202 dev->change_mtu = ops->ndo_change_mtu;
4203 dev->tx_timeout = ops->ndo_tx_timeout;
4204 dev->get_stats = ops->ndo_get_stats;
4205 dev->vlan_rx_register = ops->ndo_vlan_rx_register;
4206 dev->vlan_rx_add_vid = ops->ndo_vlan_rx_add_vid;
4207 dev->vlan_rx_kill_vid = ops->ndo_vlan_rx_kill_vid;
4208 #ifdef CONFIG_NET_POLL_CONTROLLER
4209 dev->poll_controller = ops->ndo_poll_controller;
4212 char drivername[64];
4213 pr_info("%s (%s): not using net_device_ops yet\n",
4214 dev->name, netdev_drivername(dev, drivername, 64));
4216 /* This works only because net_device_ops and the
4217 compatiablity structure are the same. */
4218 dev->netdev_ops = (void *) &(dev->init);
4222 /* Init, if this function is available */
4223 if (dev->netdev_ops->ndo_init) {
4224 ret = dev->netdev_ops->ndo_init(dev);
4232 if (!dev_valid_name(dev->name)) {
4237 dev->ifindex = dev_new_index(net);
4238 if (dev->iflink == -1)
4239 dev->iflink = dev->ifindex;
4241 /* Check for existence of name */
4242 head = dev_name_hash(net, dev->name);
4243 hlist_for_each(p, head) {
4244 struct net_device *d
4245 = hlist_entry(p, struct net_device, name_hlist);
4246 if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
4252 /* Fix illegal checksum combinations */
4253 if ((dev->features & NETIF_F_HW_CSUM) &&
4254 (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
4255 printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n",
4257 dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
4260 if ((dev->features & NETIF_F_NO_CSUM) &&
4261 (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
4262 printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n",
4264 dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
4267 dev->features = netdev_fix_features(dev->features, dev->name);
4269 /* Enable software GSO if SG is supported. */
4270 if (dev->features & NETIF_F_SG)
4271 dev->features |= NETIF_F_GSO;
4273 netdev_initialize_kobject(dev);
4274 ret = netdev_register_kobject(dev);
4277 dev->reg_state = NETREG_REGISTERED;
4280 * Default initial state at registry is that the
4281 * device is present.
4284 set_bit(__LINK_STATE_PRESENT, &dev->state);
4286 dev_init_scheduler(dev);
4288 list_netdevice(dev);
4290 /* Notify protocols, that a new device appeared. */
4291 ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
4292 ret = notifier_to_errno(ret);
4294 rollback_registered(dev);
4295 dev->reg_state = NETREG_UNREGISTERED;
4302 if (dev->netdev_ops->ndo_uninit)
4303 dev->netdev_ops->ndo_uninit(dev);
4308 * register_netdev - register a network device
4309 * @dev: device to register
4311 * Take a completed network device structure and add it to the kernel
4312 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
4313 * chain. 0 is returned on success. A negative errno code is returned
4314 * on a failure to set up the device, or if the name is a duplicate.
4316 * This is a wrapper around register_netdevice that takes the rtnl semaphore
4317 * and expands the device name if you passed a format string to
4320 int register_netdev(struct net_device *dev)
4327 * If the name is a format string the caller wants us to do a
4330 if (strchr(dev->name, '%')) {
4331 err = dev_alloc_name(dev, dev->name);
4336 err = register_netdevice(dev);
4341 EXPORT_SYMBOL(register_netdev);
4344 * netdev_wait_allrefs - wait until all references are gone.
4346 * This is called when unregistering network devices.
4348 * Any protocol or device that holds a reference should register
4349 * for netdevice notification, and cleanup and put back the
4350 * reference if they receive an UNREGISTER event.
4351 * We can get stuck here if buggy protocols don't correctly
4354 static void netdev_wait_allrefs(struct net_device *dev)
4356 unsigned long rebroadcast_time, warning_time;
4358 rebroadcast_time = warning_time = jiffies;
4359 while (atomic_read(&dev->refcnt) != 0) {
4360 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
4363 /* Rebroadcast unregister notification */
4364 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
4366 if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
4368 /* We must not have linkwatch events
4369 * pending on unregister. If this
4370 * happens, we simply run the queue
4371 * unscheduled, resulting in a noop
4374 linkwatch_run_queue();
4379 rebroadcast_time = jiffies;
4384 if (time_after(jiffies, warning_time + 10 * HZ)) {
4385 printk(KERN_EMERG "unregister_netdevice: "
4386 "waiting for %s to become free. Usage "
4388 dev->name, atomic_read(&dev->refcnt));
4389 warning_time = jiffies;
4398 * register_netdevice(x1);
4399 * register_netdevice(x2);
4401 * unregister_netdevice(y1);
4402 * unregister_netdevice(y2);
4408 * We are invoked by rtnl_unlock().
4409 * This allows us to deal with problems:
4410 * 1) We can delete sysfs objects which invoke hotplug
4411 * without deadlocking with linkwatch via keventd.
4412 * 2) Since we run with the RTNL semaphore not held, we can sleep
4413 * safely in order to wait for the netdev refcnt to drop to zero.
4415 * We must not return until all unregister events added during
4416 * the interval the lock was held have been completed.
4418 void netdev_run_todo(void)
4420 struct list_head list;
4422 /* Snapshot list, allow later requests */
4423 list_replace_init(&net_todo_list, &list);
4427 while (!list_empty(&list)) {
4428 struct net_device *dev
4429 = list_entry(list.next, struct net_device, todo_list);
4430 list_del(&dev->todo_list);
4432 if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
4433 printk(KERN_ERR "network todo '%s' but state %d\n",
4434 dev->name, dev->reg_state);
4439 dev->reg_state = NETREG_UNREGISTERED;
4441 on_each_cpu(flush_backlog, dev, 1);
4443 netdev_wait_allrefs(dev);
4446 BUG_ON(atomic_read(&dev->refcnt));
4447 WARN_ON(dev->ip_ptr);
4448 WARN_ON(dev->ip6_ptr);
4449 WARN_ON(dev->dn_ptr);
4451 if (dev->destructor)
4452 dev->destructor(dev);
4454 /* Free network device */
4455 kobject_put(&dev->dev.kobj);
4460 * dev_get_stats - get network device statistics
4461 * @dev: device to get statistics from
4463 * Get network statistics from device. The device driver may provide
4464 * its own method by setting dev->netdev_ops->get_stats; otherwise
4465 * the internal statistics structure is used.
4467 const struct net_device_stats *dev_get_stats(struct net_device *dev)
4469 const struct net_device_ops *ops = dev->netdev_ops;
4471 if (ops->ndo_get_stats)
4472 return ops->ndo_get_stats(dev);
4476 EXPORT_SYMBOL(dev_get_stats);
4478 static void netdev_init_one_queue(struct net_device *dev,
4479 struct netdev_queue *queue,
4485 static void netdev_init_queues(struct net_device *dev)
4487 netdev_init_one_queue(dev, &dev->rx_queue, NULL);
4488 netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
4489 spin_lock_init(&dev->tx_global_lock);
4493 * alloc_netdev_mq - allocate network device
4494 * @sizeof_priv: size of private data to allocate space for
4495 * @name: device name format string
4496 * @setup: callback to initialize device
4497 * @queue_count: the number of subqueues to allocate
4499 * Allocates a struct net_device with private data area for driver use
4500 * and performs basic initialization. Also allocates subquue structs
4501 * for each queue on the device at the end of the netdevice.
4503 struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
4504 void (*setup)(struct net_device *), unsigned int queue_count)
4506 struct netdev_queue *tx;
4507 struct net_device *dev;
4511 BUG_ON(strlen(name) >= sizeof(dev->name));
4513 alloc_size = sizeof(struct net_device);
4515 /* ensure 32-byte alignment of private area */
4516 alloc_size = (alloc_size + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
4517 alloc_size += sizeof_priv;
4519 /* ensure 32-byte alignment of whole construct */
4520 alloc_size += NETDEV_ALIGN_CONST;
4522 p = kzalloc(alloc_size, GFP_KERNEL);
4524 printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");
4528 tx = kcalloc(queue_count, sizeof(struct netdev_queue), GFP_KERNEL);
4530 printk(KERN_ERR "alloc_netdev: Unable to allocate "
4536 dev = (struct net_device *)
4537 (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
4538 dev->padded = (char *)dev - (char *)p;
4539 dev_net_set(dev, &init_net);
4542 dev->num_tx_queues = queue_count;
4543 dev->real_num_tx_queues = queue_count;
4545 dev->gso_max_size = GSO_MAX_SIZE;
4547 netdev_init_queues(dev);
4549 INIT_LIST_HEAD(&dev->napi_list);
4551 strcpy(dev->name, name);
4554 EXPORT_SYMBOL(alloc_netdev_mq);
4557 * free_netdev - free network device
4560 * This function does the last stage of destroying an allocated device
4561 * interface. The reference to the device object is released.
4562 * If this is the last reference then it will be freed.
4564 void free_netdev(struct net_device *dev)
4566 struct napi_struct *p, *n;
4568 release_net(dev_net(dev));
4572 list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
4575 /* Compatibility with error handling in drivers */
4576 if (dev->reg_state == NETREG_UNINITIALIZED) {
4577 kfree((char *)dev - dev->padded);
4581 BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
4582 dev->reg_state = NETREG_RELEASED;
4584 /* will free via device release */
4585 put_device(&dev->dev);
4589 * synchronize_net - Synchronize with packet receive processing
4591 * Wait for packets currently being received to be done.
4592 * Does not block later packets from starting.
4594 void synchronize_net(void)
4601 * unregister_netdevice - remove device from the kernel
4604 * This function shuts down a device interface and removes it
4605 * from the kernel tables.
4607 * Callers must hold the rtnl semaphore. You may want
4608 * unregister_netdev() instead of this.
4611 void unregister_netdevice(struct net_device *dev)
4615 rollback_registered(dev);
4616 /* Finish processing unregister after unlock */
4621 * unregister_netdev - remove device from the kernel
4624 * This function shuts down a device interface and removes it
4625 * from the kernel tables.
4627 * This is just a wrapper for unregister_netdevice that takes
4628 * the rtnl semaphore. In general you want to use this and not
4629 * unregister_netdevice.
4631 void unregister_netdev(struct net_device *dev)
4634 unregister_netdevice(dev);
4638 EXPORT_SYMBOL(unregister_netdev);
4641 * dev_change_net_namespace - move device to different nethost namespace
4643 * @net: network namespace
4644 * @pat: If not NULL name pattern to try if the current device name
4645 * is already taken in the destination network namespace.
4647 * This function shuts down a device interface and moves it
4648 * to a new network namespace. On success 0 is returned, on
4649 * a failure a netagive errno code is returned.
4651 * Callers must hold the rtnl semaphore.
4654 int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
4657 const char *destname;
4662 /* Don't allow namespace local devices to be moved. */
4664 if (dev->features & NETIF_F_NETNS_LOCAL)
4668 /* Don't allow real devices to be moved when sysfs
4672 if (dev->dev.parent)
4676 /* Ensure the device has been registrered */
4678 if (dev->reg_state != NETREG_REGISTERED)
4681 /* Get out if there is nothing todo */
4683 if (net_eq(dev_net(dev), net))
4686 /* Pick the destination device name, and ensure
4687 * we can use it in the destination network namespace.
4690 destname = dev->name;
4691 if (__dev_get_by_name(net, destname)) {
4692 /* We get here if we can't use the current device name */
4695 if (!dev_valid_name(pat))
4697 if (strchr(pat, '%')) {
4698 if (__dev_alloc_name(net, pat, buf) < 0)
4703 if (__dev_get_by_name(net, destname))
4708 * And now a mini version of register_netdevice unregister_netdevice.
4711 /* If device is running close it first. */
4714 /* And unlink it from device chain */
4716 unlist_netdevice(dev);
4720 /* Shutdown queueing discipline. */
4723 /* Notify protocols, that we are about to destroy
4724 this device. They should clean all the things.
4726 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
4729 * Flush the unicast and multicast chains
4731 dev_addr_discard(dev);
4733 netdev_unregister_kobject(dev);
4735 /* Actually switch the network namespace */
4736 dev_net_set(dev, net);
4738 /* Assign the new device name */
4739 if (destname != dev->name)
4740 strcpy(dev->name, destname);
4742 /* If there is an ifindex conflict assign a new one */
4743 if (__dev_get_by_index(net, dev->ifindex)) {
4744 int iflink = (dev->iflink == dev->ifindex);
4745 dev->ifindex = dev_new_index(net);
4747 dev->iflink = dev->ifindex;
4750 /* Fixup kobjects */
4751 err = netdev_register_kobject(dev);
4754 /* Add the device back in the hashes */
4755 list_netdevice(dev);
4757 /* Notify protocols, that a new device appeared. */
4758 call_netdevice_notifiers(NETDEV_REGISTER, dev);
4766 static int dev_cpu_callback(struct notifier_block *nfb,
4767 unsigned long action,
4770 struct sk_buff **list_skb;
4771 struct Qdisc **list_net;
4772 struct sk_buff *skb;
4773 unsigned int cpu, oldcpu = (unsigned long)ocpu;
4774 struct softnet_data *sd, *oldsd;
4776 if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
4779 local_irq_disable();
4780 cpu = smp_processor_id();
4781 sd = &per_cpu(softnet_data, cpu);
4782 oldsd = &per_cpu(softnet_data, oldcpu);
4784 /* Find end of our completion_queue. */
4785 list_skb = &sd->completion_queue;
4787 list_skb = &(*list_skb)->next;
4788 /* Append completion queue from offline CPU. */
4789 *list_skb = oldsd->completion_queue;
4790 oldsd->completion_queue = NULL;
4792 /* Find end of our output_queue. */
4793 list_net = &sd->output_queue;
4795 list_net = &(*list_net)->next_sched;
4796 /* Append output queue from offline CPU. */
4797 *list_net = oldsd->output_queue;
4798 oldsd->output_queue = NULL;
4800 raise_softirq_irqoff(NET_TX_SOFTIRQ);
4803 /* Process offline CPU's input_pkt_queue */
4804 while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
4812 * netdev_increment_features - increment feature set by one
4813 * @all: current feature set
4814 * @one: new feature set
4815 * @mask: mask feature set
4817 * Computes a new feature set after adding a device with feature set
4818 * @one to the master device with current feature set @all. Will not
4819 * enable anything that is off in @mask. Returns the new feature set.
4821 unsigned long netdev_increment_features(unsigned long all, unsigned long one,
4824 /* If device needs checksumming, downgrade to it. */
4825 if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM))
4826 all ^= NETIF_F_NO_CSUM | (one & NETIF_F_ALL_CSUM);
4827 else if (mask & NETIF_F_ALL_CSUM) {
4828 /* If one device supports v4/v6 checksumming, set for all. */
4829 if (one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM) &&
4830 !(all & NETIF_F_GEN_CSUM)) {
4831 all &= ~NETIF_F_ALL_CSUM;
4832 all |= one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
4835 /* If one device supports hw checksumming, set for all. */
4836 if (one & NETIF_F_GEN_CSUM && !(all & NETIF_F_GEN_CSUM)) {
4837 all &= ~NETIF_F_ALL_CSUM;
4838 all |= NETIF_F_HW_CSUM;
4842 one |= NETIF_F_ALL_CSUM;
4844 one |= all & NETIF_F_ONE_FOR_ALL;
4845 all &= one | NETIF_F_LLTX | NETIF_F_GSO;
4846 all |= one & mask & NETIF_F_ONE_FOR_ALL;
4850 EXPORT_SYMBOL(netdev_increment_features);
4852 static struct hlist_head *netdev_create_hash(void)
4855 struct hlist_head *hash;
4857 hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL);
4859 for (i = 0; i < NETDEV_HASHENTRIES; i++)
4860 INIT_HLIST_HEAD(&hash[i]);
4865 /* Initialize per network namespace state */
4866 static int __net_init netdev_init(struct net *net)
4868 INIT_LIST_HEAD(&net->dev_base_head);
4870 net->dev_name_head = netdev_create_hash();
4871 if (net->dev_name_head == NULL)
4874 net->dev_index_head = netdev_create_hash();
4875 if (net->dev_index_head == NULL)
4881 kfree(net->dev_name_head);
4887 * netdev_drivername - network driver for the device
4888 * @dev: network device
4889 * @buffer: buffer for resulting name
4890 * @len: size of buffer
4892 * Determine network driver for device.
4894 char *netdev_drivername(const struct net_device *dev, char *buffer, int len)
4896 const struct device_driver *driver;
4897 const struct device *parent;
4899 if (len <= 0 || !buffer)
4903 parent = dev->dev.parent;
4908 driver = parent->driver;
4909 if (driver && driver->name)
4910 strlcpy(buffer, driver->name, len);
4914 static void __net_exit netdev_exit(struct net *net)
4916 kfree(net->dev_name_head);
4917 kfree(net->dev_index_head);
4920 static struct pernet_operations __net_initdata netdev_net_ops = {
4921 .init = netdev_init,
4922 .exit = netdev_exit,
4925 static void __net_exit default_device_exit(struct net *net)
4927 struct net_device *dev;
4929 * Push all migratable of the network devices back to the
4930 * initial network namespace
4934 for_each_netdev(net, dev) {
4936 char fb_name[IFNAMSIZ];
4938 /* Ignore unmoveable devices (i.e. loopback) */
4939 if (dev->features & NETIF_F_NETNS_LOCAL)
4942 /* Delete virtual devices */
4943 if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink) {
4944 dev->rtnl_link_ops->dellink(dev);
4948 /* Push remaing network devices to init_net */
4949 snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
4950 err = dev_change_net_namespace(dev, &init_net, fb_name);
4952 printk(KERN_EMERG "%s: failed to move %s to init_net: %d\n",
4953 __func__, dev->name, err);
4961 static struct pernet_operations __net_initdata default_device_ops = {
4962 .exit = default_device_exit,
4966 * Initialize the DEV module. At boot time this walks the device list and
4967 * unhooks any devices that fail to initialise (normally hardware not
4968 * present) and leaves us with a valid list of present and active devices.
4973 * This is called single threaded during boot, so no need
4974 * to take the rtnl semaphore.
4976 static int __init net_dev_init(void)
4978 int i, rc = -ENOMEM;
4980 BUG_ON(!dev_boot_phase);
4982 if (dev_proc_init())
4985 if (netdev_kobject_init())
4988 INIT_LIST_HEAD(&ptype_all);
4989 for (i = 0; i < PTYPE_HASH_SIZE; i++)
4990 INIT_LIST_HEAD(&ptype_base[i]);
4992 if (register_pernet_subsys(&netdev_net_ops))
4996 * Initialise the packet receive queues.
4999 for_each_possible_cpu(i) {
5000 struct softnet_data *queue;
5002 queue = &per_cpu(softnet_data, i);
5003 skb_queue_head_init(&queue->input_pkt_queue);
5004 queue->completion_queue = NULL;
5005 INIT_LIST_HEAD(&queue->poll_list);
5007 queue->backlog.poll = process_backlog;
5008 queue->backlog.weight = weight_p;
5009 queue->backlog.gro_list = NULL;
5014 /* The loopback device is special if any other network devices
5015 * is present in a network namespace the loopback device must
5016 * be present. Since we now dynamically allocate and free the
5017 * loopback device ensure this invariant is maintained by
5018 * keeping the loopback device as the first device on the
5019 * list of network devices. Ensuring the loopback devices
5020 * is the first device that appears and the last network device
5023 if (register_pernet_device(&loopback_net_ops))
5026 if (register_pernet_device(&default_device_ops))
5029 open_softirq(NET_TX_SOFTIRQ, net_tx_action);
5030 open_softirq(NET_RX_SOFTIRQ, net_rx_action);
5032 hotcpu_notifier(dev_cpu_callback, 0);
5035 #ifdef CONFIG_NET_DMA
5043 subsys_initcall(net_dev_init);
5045 EXPORT_SYMBOL(__dev_get_by_index);
5046 EXPORT_SYMBOL(__dev_get_by_name);
5047 EXPORT_SYMBOL(__dev_remove_pack);
5048 EXPORT_SYMBOL(dev_valid_name);
5049 EXPORT_SYMBOL(dev_add_pack);
5050 EXPORT_SYMBOL(dev_alloc_name);
5051 EXPORT_SYMBOL(dev_close);
5052 EXPORT_SYMBOL(dev_get_by_flags);
5053 EXPORT_SYMBOL(dev_get_by_index);
5054 EXPORT_SYMBOL(dev_get_by_name);
5055 EXPORT_SYMBOL(dev_open);
5056 EXPORT_SYMBOL(dev_queue_xmit);
5057 EXPORT_SYMBOL(dev_remove_pack);
5058 EXPORT_SYMBOL(dev_set_allmulti);
5059 EXPORT_SYMBOL(dev_set_promiscuity);
5060 EXPORT_SYMBOL(dev_change_flags);
5061 EXPORT_SYMBOL(dev_set_mtu);
5062 EXPORT_SYMBOL(dev_set_mac_address);
5063 EXPORT_SYMBOL(free_netdev);
5064 EXPORT_SYMBOL(netdev_boot_setup_check);
5065 EXPORT_SYMBOL(netdev_set_master);
5066 EXPORT_SYMBOL(netdev_state_change);
5067 EXPORT_SYMBOL(netif_receive_skb);
5068 EXPORT_SYMBOL(netif_rx);
5069 EXPORT_SYMBOL(register_gifconf);
5070 EXPORT_SYMBOL(register_netdevice);
5071 EXPORT_SYMBOL(register_netdevice_notifier);
5072 EXPORT_SYMBOL(skb_checksum_help);
5073 EXPORT_SYMBOL(synchronize_net);
5074 EXPORT_SYMBOL(unregister_netdevice);
5075 EXPORT_SYMBOL(unregister_netdevice_notifier);
5076 EXPORT_SYMBOL(net_enable_timestamp);
5077 EXPORT_SYMBOL(net_disable_timestamp);
5078 EXPORT_SYMBOL(dev_get_flags);
5080 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
5081 EXPORT_SYMBOL(br_handle_frame_hook);
5082 EXPORT_SYMBOL(br_fdb_get_hook);
5083 EXPORT_SYMBOL(br_fdb_put_hook);
5086 EXPORT_SYMBOL(dev_load);
5088 EXPORT_PER_CPU_SYMBOL(softnet_data);