net/core/dev.c

   1 /*
   2  *      NET3    Protocol independent device support routines.
   3  *
   4  *              This program is free software; you can redistribute it and/or
   5  *              modify it under the terms of the GNU General Public License
   6  *              as published by the Free Software Foundation; either version
   7  *              2 of the License, or (at your option) any later version.
   8  *
   9  *      Derived from the non IP parts of dev.c 1.0.19
  10  *              Authors:        Ross Biro
  11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
  13  *
  14  *      Additional Authors:
  15  *              Florian la Roche <rzsfl@rz.uni-sb.de>
  16  *              Alan Cox <gw4pts@gw4pts.ampr.org>
  17  *              David Hinds <dahinds@users.sourceforge.net>
  18  *              Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
  19  *              Adam Sulmicki <adam@cfar.umd.edu>
  20  *              Pekka Riikonen <priikone@poesidon.pspt.fi>
  21  *
  22  *      Changes:
  23  *              D.J. Barrow     :       Fixed bug where dev->refcnt gets set
  24  *                                      to 2 if register_netdev gets called
  25  *                                      before net_dev_init & also removed a
  26  *                                      few lines of code in the process.
  27  *              Alan Cox        :       device private ioctl copies fields back.
  28  *              Alan Cox        :       Transmit queue code does relevant
  29  *                                      stunts to keep the queue safe.
  30  *              Alan Cox        :       Fixed double lock.
  31  *              Alan Cox        :       Fixed promisc NULL pointer trap
  32  *              ????????        :       Support the full private ioctl range
  33  *              Alan Cox        :       Moved ioctl permission check into
  34  *                                      drivers
  35  *              Tim Kordas      :       SIOCADDMULTI/SIOCDELMULTI
  36  *              Alan Cox        :       100 backlog just doesn't cut it when
  37  *                                      you start doing multicast video 8)
  38  *              Alan Cox        :       Rewrote net_bh and list manager.
  39  *              Alan Cox        :       Fix ETH_P_ALL echoback lengths.
  40  *              Alan Cox        :       Took out transmit every packet pass
  41  *                                      Saved a few bytes in the ioctl handler
  42  *              Alan Cox        :       Network driver sets packet type before
  43  *                                      calling netif_rx. Saves a function
  44  *                                      call a packet.
  45  *              Alan Cox        :       Hashed net_bh()
  46  *              Richard Kooijman:       Timestamp fixes.
  47  *              Alan Cox        :       Wrong field in SIOCGIFDSTADDR
  48  *              Alan Cox        :       Device lock protection.
  49  *              Alan Cox        :       Fixed nasty side effect of device close
  50  *                                      changes.
  51  *              Rudi Cilibrasi  :       Pass the right thing to
  52  *                                      set_mac_address()
  53  *              Dave Miller     :       32bit quantity for the device lock to
  54  *                                      make it work out on a Sparc.
  55  *              Bjorn Ekwall    :       Added KERNELD hack.
  56  *              Alan Cox        :       Cleaned up the backlog initialise.
  57  *              Craig Metz      :       SIOCGIFCONF fix if space for under
  58  *                                      1 device.
  59  *          Thomas Bogendoerfer :       Return ENODEV for dev_open, if there
  60  *                                      is no device open function.
  61  *              Andi Kleen      :       Fix error reporting for SIOCGIFCONF
  62  *          Michael Chastain    :       Fix signed/unsigned for SIOCGIFCONF
  63  *              Cyrus Durgin    :       Cleaned for KMOD
  64  *              Adam Sulmicki   :       Bug Fix : Network Device Unload
  65  *                                      A network device unload needs to purge
  66  *                                      the backlog queue.
  67  *      Paul Rusty Russell      :       SIOCSIFNAME
  68  *              Pekka Riikonen  :       Netdev boot-time settings code
  69  *              Andrew Morton   :       Make unregister_netdevice wait
  70  *                                      indefinitely on dev->refcnt
  71  *              J Hadi Salim    :       - Backlog queue sampling
  72  *                                      - netif_rx() feedback
  73  */
  74
  75 #include <asm/uaccess.h>
  76 #include <asm/system.h>
  77 #include <linux/bitops.h>
  78 #include <linux/capability.h>
  79 #include <linux/cpu.h>
  80 #include <linux/types.h>
  81 #include <linux/kernel.h>
  82 #include <linux/sched.h>
  83 #include <linux/mutex.h>
  84 #include <linux/string.h>
  85 #include <linux/mm.h>
  86 #include <linux/socket.h>
  87 #include <linux/sockios.h>
  88 #include <linux/errno.h>
  89 #include <linux/interrupt.h>
  90 #include <linux/if_ether.h>
  91 #include <linux/netdevice.h>
  92 #include <linux/etherdevice.h>
  93 #include <linux/ethtool.h>
  94 #include <linux/notifier.h>
  95 #include <linux/skbuff.h>
  96 #include <net/net_namespace.h>
  97 #include <net/sock.h>
  98 #include <linux/rtnetlink.h>
  99 #include <linux/proc_fs.h>
 100 #include <linux/seq_file.h>
 101 #include <linux/stat.h>
 102 #include <linux/if_bridge.h>
 103 #include <linux/if_macvlan.h>
 104 #include <net/dst.h>
 105 #include <net/pkt_sched.h>
 106 #include <net/checksum.h>
 107 #include <linux/highmem.h>
 108 #include <linux/init.h>
 109 #include <linux/kmod.h>
 110 #include <linux/module.h>
 111 #include <linux/netpoll.h>
 112 #include <linux/rcupdate.h>
 113 #include <linux/delay.h>
 114 #include <net/wext.h>
 115 #include <net/iw_handler.h>
 116 #include <asm/current.h>
 117 #include <linux/audit.h>
 118 #include <linux/dmaengine.h>
 119 #include <linux/err.h>
 120 #include <linux/ctype.h>
 121 #include <linux/if_arp.h>
 122 #include <linux/if_vlan.h>
 123 #include <linux/ip.h>
 124 #include <net/ip.h>
 125 #include <linux/ipv6.h>
 126 #include <linux/in.h>
 127 #include <linux/jhash.h>
 128 #include <linux/random.h>
 129
 130 #include "net-sysfs.h"
 131
 132 /* Instead of increasing this, you should create a hash table. */
 133 #define MAX_GRO_SKBS 8
 134
 135 /*
 136  *      The list of packet types we will receive (as opposed to discard)
 137  *      and the routines to invoke.
 138  *
 139  *      Why 16. Because with 16 the only overlap we get on a hash of the
 140  *      low nibble of the protocol value is RARP/SNAP/X.25.
 141  *
 142  *      NOTE:  That is no longer true with the addition of VLAN tags.  Not
 143  *             sure which should go first, but I bet it won't make much
 144  *             difference if we are running VLANs.  The good news is that
 145  *             this protocol won't be in the list unless compiled in, so
 146  *             the average user (w/out VLANs) will not be adversely affected.
 147  *             --BLG
 148  *
 149  *              0800    IP
 150  *              8100    802.1Q VLAN
 151  *              0001    802.3
 152  *              0002    AX.25
 153  *              0004    802.2
 154  *              8035    RARP
 155  *              0005    SNAP
 156  *              0805    X.25
 157  *              0806    ARP
 158  *              8137    IPX
 159  *              0009    Localtalk
 160  *              86DD    IPv6
 161  */
 162
 163 #define PTYPE_HASH_SIZE (16)
 164 #define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1)
 165
 166 static DEFINE_SPINLOCK(ptype_lock);
 167 static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
 168 static struct list_head ptype_all __read_mostly;        /* Taps */
 169
 170 /*
 171  * The @dev_base_head list is protected by @dev_base_lock and the rtnl
 172  * semaphore.
 173  *
 174  * Pure readers hold dev_base_lock for reading.
 175  *
 176  * Writers must hold the rtnl semaphore while they loop through the
 177  * dev_base_head list, and hold dev_base_lock for writing when they do the
 178  * actual updates.  This allows pure readers to access the list even
 179  * while a writer is preparing to update it.
 180  *
 181  * To put it another way, dev_base_lock is held for writing only to
 182  * protect against pure readers; the rtnl semaphore provides the
 183  * protection against other writers.
 184  *
 185  * See, for example usages, register_netdevice() and
 186  * unregister_netdevice(), which must be called with the rtnl
 187  * semaphore held.
 188  */
 189 DEFINE_RWLOCK(dev_base_lock);
 190
 191 EXPORT_SYMBOL(dev_base_lock);
 192
 193 #define NETDEV_HASHBITS 8
 194 #define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS)
 195
 196 static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
 197 {
 198         unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
 199         return &net->dev_name_head[hash & ((1 << NETDEV_HASHBITS) - 1)];
 200 }
 201
 202 static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
 203 {
 204         return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)];
 205 }
 206
 207 /* Device list insertion */
 208 static int list_netdevice(struct net_device *dev)
 209 {
 210         struct net *net = dev_net(dev);
 211
 212         ASSERT_RTNL();
 213
 214         write_lock_bh(&dev_base_lock);
 215         list_add_tail(&dev->dev_list, &net->dev_base_head);
 216         hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
 217         hlist_add_head(&dev->index_hlist, dev_index_hash(net, dev->ifindex));
 218         write_unlock_bh(&dev_base_lock);
 219         return 0;
 220 }
 221
 222 /* Device list removal */
 223 static void unlist_netdevice(struct net_device *dev)
 224 {
 225         ASSERT_RTNL();
 226
 227         /* Unlink dev from the device chain */
 228         write_lock_bh(&dev_base_lock);
 229         list_del(&dev->dev_list);
 230         hlist_del(&dev->name_hlist);
 231         hlist_del(&dev->index_hlist);
 232         write_unlock_bh(&dev_base_lock);
 233 }
 234
 235 /*
 236  *      Our notifier list
 237  */
 238
 239 static RAW_NOTIFIER_HEAD(netdev_chain);
 240
 241 /*
 242  *      Device drivers call our routines to queue packets here. We empty the
 243  *      queue in the local softnet handler.
 244  */
 245
 246 DEFINE_PER_CPU(struct softnet_data, softnet_data);
 247
 248 #ifdef CONFIG_LOCKDEP
 249 /*
 250  * register_netdevice() inits txq->_xmit_lock and sets lockdep class
 251  * according to dev->type
 252  */
 253 static const unsigned short netdev_lock_type[] =
 254         {ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
 255          ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
 256          ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
 257          ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
 258          ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
 259          ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
 260          ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
 261          ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
 262          ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
 263          ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
 264          ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
 265          ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
 266          ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211,
 267          ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET,
 268          ARPHRD_PHONET_PIPE, ARPHRD_VOID, ARPHRD_NONE};
 269
 270 static const char *netdev_lock_name[] =
 271         {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
 272          "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
 273          "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
 274          "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
 275          "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
 276          "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
 277          "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
 278          "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
 279          "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
 280          "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
 281          "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
 282          "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
 283          "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211",
 284          "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET",
 285          "_xmit_PHONET_PIPE", "_xmit_VOID", "_xmit_NONE"};
 286
 287 static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
 288 static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
 289
 290 static inline unsigned short netdev_lock_pos(unsigned short dev_type)
 291 {
 292         int i;
 293
 294         for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
 295                 if (netdev_lock_type[i] == dev_type)
 296                         return i;
 297         /* the last key is used by default */
 298         return ARRAY_SIZE(netdev_lock_type) - 1;
 299 }
 300
 301 static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
 302                                                  unsigned short dev_type)
 303 {
 304         int i;
 305
 306         i = netdev_lock_pos(dev_type);
 307         lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
 308                                    netdev_lock_name[i]);
 309 }
 310
 311 static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
 312 {
 313         int i;
 314
 315         i = netdev_lock_pos(dev->type);
 316         lockdep_set_class_and_name(&dev->addr_list_lock,
 317                                    &netdev_addr_lock_key[i],
 318                                    netdev_lock_name[i]);
 319 }
 320 #else
 321 static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
 322                                                  unsigned short dev_type)
 323 {
 324 }
 325 static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
 326 {
 327 }
 328 #endif
 329
 330 /*******************************************************************************
 331
 332                 Protocol management and registration routines
 333
 334 *******************************************************************************/
 335
 336 /*
 337  *      Add a protocol ID to the list. Now that the input handler is
 338  *      smarter we can dispense with all the messy stuff that used to be
 339  *      here.
 340  *
 341  *      BEWARE!!! Protocol handlers, mangling input packets,
 342  *      MUST BE last in hash buckets and checking protocol handlers
 343  *      MUST start from promiscuous ptype_all chain in net_bh.
 344  *      It is true now, do not change it.
 345  *      Explanation follows: if protocol handler, mangling packet, will
 346  *      be the first on list, it is not able to sense, that packet
 347  *      is cloned and should be copied-on-write, so that it will
 348  *      change it and subsequent readers will get broken packet.
 349  *                                                      --ANK (980803)
 350  */
 351
 352 /**
 353  *      dev_add_pack - add packet handler
 354  *      @pt: packet type declaration
 355  *
 356  *      Add a protocol handler to the networking stack. The passed &packet_type
 357  *      is linked into kernel lists and may not be freed until it has been
 358  *      removed from the kernel lists.
 359  *
 360  *      This call does not sleep therefore it can not
 361  *      guarantee all CPU's that are in middle of receiving packets
 362  *      will see the new packet type (until the next received packet).
 363  */
 364
 365 void dev_add_pack(struct packet_type *pt)
 366 {
 367         int hash;
 368
 369         spin_lock_bh(&ptype_lock);
 370         if (pt->type == htons(ETH_P_ALL))
 371                 list_add_rcu(&pt->list, &ptype_all);
 372         else {
 373                 hash = ntohs(pt->type) & PTYPE_HASH_MASK;
 374                 list_add_rcu(&pt->list, &ptype_base[hash]);
 375         }
 376         spin_unlock_bh(&ptype_lock);
 377 }
 378
 379 /**
 380  *      __dev_remove_pack        - remove packet handler
 381  *      @pt: packet type declaration
 382  *
 383  *      Remove a protocol handler that was previously added to the kernel
 384  *      protocol handlers by dev_add_pack(). The passed &packet_type is removed
 385  *      from the kernel lists and can be freed or reused once this function
 386  *      returns.
 387  *
 388  *      The packet type might still be in use by receivers
 389  *      and must not be freed until after all the CPU's have gone
 390  *      through a quiescent state.
 391  */
 392 void __dev_remove_pack(struct packet_type *pt)
 393 {
 394         struct list_head *head;
 395         struct packet_type *pt1;
 396
 397         spin_lock_bh(&ptype_lock);
 398
 399         if (pt->type == htons(ETH_P_ALL))
 400                 head = &ptype_all;
 401         else
 402                 head = &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
 403
 404         list_for_each_entry(pt1, head, list) {
 405                 if (pt == pt1) {
 406                         list_del_rcu(&pt->list);
 407                         goto out;
 408                 }
 409         }
 410
 411         printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
 412 out:
 413         spin_unlock_bh(&ptype_lock);
 414 }
 415 /**
 416  *      dev_remove_pack  - remove packet handler
 417  *      @pt: packet type declaration
 418  *
 419  *      Remove a protocol handler that was previously added to the kernel
 420  *      protocol handlers by dev_add_pack(). The passed &packet_type is removed
 421  *      from the kernel lists and can be freed or reused once this function
 422  *      returns.
 423  *
 424  *      This call sleeps to guarantee that no CPU is looking at the packet
 425  *      type after return.
 426  */
 427 void dev_remove_pack(struct packet_type *pt)
 428 {
 429         __dev_remove_pack(pt);
 430
 431         synchronize_net();
 432 }
 433
 434 /******************************************************************************
 435
 436                       Device Boot-time Settings Routines
 437
 438 *******************************************************************************/
 439
 440 /* Boot time configuration table */
 441 static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
 442
 443 /**
 444  *      netdev_boot_setup_add   - add new setup entry
 445  *      @name: name of the device
 446  *      @map: configured settings for the device
 447  *
 448  *      Adds new setup entry to the dev_boot_setup list.  The function
 449  *      returns 0 on error and 1 on success.  This is a generic routine to
 450  *      all netdevices.
 451  */
 452 static int netdev_boot_setup_add(char *name, struct ifmap *map)
 453 {
 454         struct netdev_boot_setup *s;
 455         int i;
 456
 457         s = dev_boot_setup;
 458         for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
 459                 if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
 460                         memset(s[i].name, 0, sizeof(s[i].name));
 461                         strlcpy(s[i].name, name, IFNAMSIZ);
 462                         memcpy(&s[i].map, map, sizeof(s[i].map));
 463                         break;
 464                 }
 465         }
 466
 467         return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
 468 }
 469
 470 /**
 471  *      netdev_boot_setup_check - check boot time settings
 472  *      @dev: the netdevice
 473  *
 474  *      Check boot time settings for the device.
 475  *      The found settings are set for the device to be used
 476  *      later in the device probing.
 477  *      Returns 0 if no settings found, 1 if they are.
 478  */
 479 int netdev_boot_setup_check(struct net_device *dev)
 480 {
 481         struct netdev_boot_setup *s = dev_boot_setup;
 482         int i;
 483
 484         for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
 485                 if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
 486                     !strcmp(dev->name, s[i].name)) {
 487                         dev->irq        = s[i].map.irq;
 488                         dev->base_addr  = s[i].map.base_addr;
 489                         dev->mem_start  = s[i].map.mem_start;
 490                         dev->mem_end    = s[i].map.mem_end;
 491                         return 1;
 492                 }
 493         }
 494         return 0;
 495 }
 496
 497
 498 /**
 499  *      netdev_boot_base        - get address from boot time settings
 500  *      @prefix: prefix for network device
 501  *      @unit: id for network device
 502  *
 503  *      Check boot time settings for the base address of device.
 504  *      The found settings are set for the device to be used
 505  *      later in the device probing.
 506  *      Returns 0 if no settings found.
 507  */
 508 unsigned long netdev_boot_base(const char *prefix, int unit)
 509 {
 510         const struct netdev_boot_setup *s = dev_boot_setup;
 511         char name[IFNAMSIZ];
 512         int i;
 513
 514         sprintf(name, "%s%d", prefix, unit);
 515
 516         /*
 517          * If device already registered then return base of 1
 518          * to indicate not to probe for this interface
 519          */
 520         if (__dev_get_by_name(&init_net, name))
 521                 return 1;
 522
 523         for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
 524                 if (!strcmp(name, s[i].name))
 525                         return s[i].map.base_addr;
 526         return 0;
 527 }
 528
 529 /*
 530  * Saves at boot time configured settings for any netdevice.
 531  */
 532 int __init netdev_boot_setup(char *str)
 533 {
 534         int ints[5];
 535         struct ifmap map;
 536
 537         str = get_options(str, ARRAY_SIZE(ints), ints);
 538         if (!str || !*str)
 539                 return 0;
 540
 541         /* Save settings */
 542         memset(&map, 0, sizeof(map));
 543         if (ints[0] > 0)
 544                 map.irq = ints[1];
 545         if (ints[0] > 1)
 546                 map.base_addr = ints[2];
 547         if (ints[0] > 2)
 548                 map.mem_start = ints[3];
 549         if (ints[0] > 3)
 550                 map.mem_end = ints[4];
 551
 552         /* Add new entry to the list */
 553         return netdev_boot_setup_add(str, &map);
 554 }
 555
 556 __setup("netdev=", netdev_boot_setup);
 557
 558 /*******************************************************************************
 559
 560                             Device Interface Subroutines
 561
 562 *******************************************************************************/
 563
 564 /**
 565  *      __dev_get_by_name       - find a device by its name
 566  *      @net: the applicable net namespace
 567  *      @name: name to find
 568  *
 569  *      Find an interface by name. Must be called under RTNL semaphore
 570  *      or @dev_base_lock. If the name is found a pointer to the device
 571  *      is returned. If the name is not found then %NULL is returned. The
 572  *      reference counters are not incremented so the caller must be
 573  *      careful with locks.
 574  */
 575
 576 struct net_device *__dev_get_by_name(struct net *net, const char *name)
 577 {
 578         struct hlist_node *p;
 579
 580         hlist_for_each(p, dev_name_hash(net, name)) {
 581                 struct net_device *dev
 582                         = hlist_entry(p, struct net_device, name_hlist);
 583                 if (!strncmp(dev->name, name, IFNAMSIZ))
 584                         return dev;
 585         }
 586         return NULL;
 587 }
 588
 589 /**
 590  *      dev_get_by_name         - find a device by its name
 591  *      @net: the applicable net namespace
 592  *      @name: name to find
 593  *
 594  *      Find an interface by name. This can be called from any
 595  *      context and does its own locking. The returned handle has
 596  *      the usage count incremented and the caller must use dev_put() to
 597  *      release it when it is no longer needed. %NULL is returned if no
 598  *      matching device is found.
 599  */
 600
 601 struct net_device *dev_get_by_name(struct net *net, const char *name)
 602 {
 603         struct net_device *dev;
 604
 605         read_lock(&dev_base_lock);
 606         dev = __dev_get_by_name(net, name);
 607         if (dev)
 608                 dev_hold(dev);
 609         read_unlock(&dev_base_lock);
 610         return dev;
 611 }
 612
 613 /**
 614  *      __dev_get_by_index - find a device by its ifindex
 615  *      @net: the applicable net namespace
 616  *      @ifindex: index of device
 617  *
 618  *      Search for an interface by index. Returns %NULL if the device
 619  *      is not found or a pointer to the device. The device has not
 620  *      had its reference counter increased so the caller must be careful
 621  *      about locking. The caller must hold either the RTNL semaphore
 622  *      or @dev_base_lock.
 623  */
 624
 625 struct net_device *__dev_get_by_index(struct net *net, int ifindex)
 626 {
 627         struct hlist_node *p;
 628
 629         hlist_for_each(p, dev_index_hash(net, ifindex)) {
 630                 struct net_device *dev
 631                         = hlist_entry(p, struct net_device, index_hlist);
 632                 if (dev->ifindex == ifindex)
 633                         return dev;
 634         }
 635         return NULL;
 636 }
 637
 638
 639 /**
 640  *      dev_get_by_index - find a device by its ifindex
 641  *      @net: the applicable net namespace
 642  *      @ifindex: index of device
 643  *
 644  *      Search for an interface by index. Returns NULL if the device
 645  *      is not found or a pointer to the device. The device returned has
 646  *      had a reference added and the pointer is safe until the user calls
 647  *      dev_put to indicate they have finished with it.
 648  */
 649
 650 struct net_device *dev_get_by_index(struct net *net, int ifindex)
 651 {
 652         struct net_device *dev;
 653
 654         read_lock(&dev_base_lock);
 655         dev = __dev_get_by_index(net, ifindex);
 656         if (dev)
 657                 dev_hold(dev);
 658         read_unlock(&dev_base_lock);
 659         return dev;
 660 }
 661
 662 /**
 663  *      dev_getbyhwaddr - find a device by its hardware address
 664  *      @net: the applicable net namespace
 665  *      @type: media type of device
 666  *      @ha: hardware address
 667  *
 668  *      Search for an interface by MAC address. Returns NULL if the device
 669  *      is not found or a pointer to the device. The caller must hold the
 670  *      rtnl semaphore. The returned device has not had its ref count increased
 671  *      and the caller must therefore be careful about locking
 672  *
 673  *      BUGS:
 674  *      If the API was consistent this would be __dev_get_by_hwaddr
 675  */
 676
 677 struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha)
 678 {
 679         struct net_device *dev;
 680
 681         ASSERT_RTNL();
 682
 683         for_each_netdev(net, dev)
 684                 if (dev->type == type &&
 685                     !memcmp(dev->dev_addr, ha, dev->addr_len))
 686                         return dev;
 687
 688         return NULL;
 689 }
 690
 691 EXPORT_SYMBOL(dev_getbyhwaddr);
 692
 693 struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
 694 {
 695         struct net_device *dev;
 696
 697         ASSERT_RTNL();
 698         for_each_netdev(net, dev)
 699                 if (dev->type == type)
 700                         return dev;
 701
 702         return NULL;
 703 }
 704
 705 EXPORT_SYMBOL(__dev_getfirstbyhwtype);
 706
 707 struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
 708 {
 709         struct net_device *dev;
 710
 711         rtnl_lock();
 712         dev = __dev_getfirstbyhwtype(net, type);
 713         if (dev)
 714                 dev_hold(dev);
 715         rtnl_unlock();
 716         return dev;
 717 }
 718
 719 EXPORT_SYMBOL(dev_getfirstbyhwtype);
 720
 721 /**
 722  *      dev_get_by_flags - find any device with given flags
 723  *      @net: the applicable net namespace
 724  *      @if_flags: IFF_* values
 725  *      @mask: bitmask of bits in if_flags to check
 726  *
 727  *      Search for any interface with the given flags. Returns NULL if a device
 728  *      is not found or a pointer to the device. The device returned has
 729  *      had a reference added and the pointer is safe until the user calls
 730  *      dev_put to indicate they have finished with it.
 731  */
 732
 733 struct net_device * dev_get_by_flags(struct net *net, unsigned short if_flags, unsigned short mask)
 734 {
 735         struct net_device *dev, *ret;
 736
 737         ret = NULL;
 738         read_lock(&dev_base_lock);
 739         for_each_netdev(net, dev) {
 740                 if (((dev->flags ^ if_flags) & mask) == 0) {
 741                         dev_hold(dev);
 742                         ret = dev;
 743                         break;
 744                 }
 745         }
 746         read_unlock(&dev_base_lock);
 747         return ret;
 748 }
 749
 750 /**
 751  *      dev_valid_name - check if name is okay for network device
 752  *      @name: name string
 753  *
 754  *      Network device names need to be valid file names to
 755  *      to allow sysfs to work.  We also disallow any kind of
 756  *      whitespace.
 757  */
 758 int dev_valid_name(const char *name)
 759 {
 760         if (*name == '\0')
 761                 return 0;
 762         if (strlen(name) >= IFNAMSIZ)
 763                 return 0;
 764         if (!strcmp(name, ".") || !strcmp(name, ".."))
 765                 return 0;
 766
 767         while (*name) {
 768                 if (*name == '/' || isspace(*name))
 769                         return 0;
 770                 name++;
 771         }
 772         return 1;
 773 }
 774
 775 /**
 776  *      __dev_alloc_name - allocate a name for a device
 777  *      @net: network namespace to allocate the device name in
 778  *      @name: name format string
 779  *      @buf:  scratch buffer and result name string
 780  *
 781  *      Passed a format string - eg "lt%d" it will try and find a suitable
 782  *      id. It scans list of devices to build up a free map, then chooses
 783  *      the first empty slot. The caller must hold the dev_base or rtnl lock
 784  *      while allocating the name and adding the device in order to avoid
 785  *      duplicates.
 786  *      Limited to bits_per_byte * page size devices (ie 32K on most platforms).
 787  *      Returns the number of the unit assigned or a negative errno code.
 788  */
 789
 790 static int __dev_alloc_name(struct net *net, const char *name, char *buf)
 791 {
 792         int i = 0;
 793         const char *p;
 794         const int max_netdevices = 8*PAGE_SIZE;
 795         unsigned long *inuse;
 796         struct net_device *d;
 797
 798         p = strnchr(name, IFNAMSIZ-1, '%');
 799         if (p) {
 800                 /*
 801                  * Verify the string as this thing may have come from
 802                  * the user.  There must be either one "%d" and no other "%"
 803                  * characters.
 804                  */
 805                 if (p[1] != 'd' || strchr(p + 2, '%'))
 806                         return -EINVAL;
 807
 808                 /* Use one page as a bit array of possible slots */
 809                 inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
 810                 if (!inuse)
 811                         return -ENOMEM;
 812
 813                 for_each_netdev(net, d) {
 814                         if (!sscanf(d->name, name, &i))
 815                                 continue;
 816                         if (i < 0 || i >= max_netdevices)
 817                                 continue;
 818
 819                         /*  avoid cases where sscanf is not exact inverse of printf */
 820                         snprintf(buf, IFNAMSIZ, name, i);
 821                         if (!strncmp(buf, d->name, IFNAMSIZ))
 822                                 set_bit(i, inuse);
 823                 }
 824
 825                 i = find_first_zero_bit(inuse, max_netdevices);
 826                 free_page((unsigned long) inuse);
 827         }
 828
 829         snprintf(buf, IFNAMSIZ, name, i);
 830         if (!__dev_get_by_name(net, buf))
 831                 return i;
 832
 833         /* It is possible to run out of possible slots
 834          * when the name is long and there isn't enough space left
 835          * for the digits, or if all bits are used.
 836          */
 837         return -ENFILE;
 838 }
 839
 840 /**
 841  *      dev_alloc_name - allocate a name for a device
 842  *      @dev: device
 843  *      @name: name format string
 844  *
 845  *      Passed a format string - eg "lt%d" it will try and find a suitable
 846  *      id. It scans list of devices to build up a free map, then chooses
 847  *      the first empty slot. The caller must hold the dev_base or rtnl lock
 848  *      while allocating the name and adding the device in order to avoid
 849  *      duplicates.
 850  *      Limited to bits_per_byte * page size devices (ie 32K on most platforms).
 851  *      Returns the number of the unit assigned or a negative errno code.
 852  */
 853
 854 int dev_alloc_name(struct net_device *dev, const char *name)
 855 {
 856         char buf[IFNAMSIZ];
 857         struct net *net;
 858         int ret;
 859
 860         BUG_ON(!dev_net(dev));
 861         net = dev_net(dev);
 862         ret = __dev_alloc_name(net, name, buf);
 863         if (ret >= 0)
 864                 strlcpy(dev->name, buf, IFNAMSIZ);
 865         return ret;
 866 }
 867
 868
 869 /**
 870  *      dev_change_name - change name of a device
 871  *      @dev: device
 872  *      @newname: name (or format string) must be at least IFNAMSIZ
 873  *
 874  *      Change name of a device, can pass format strings "eth%d".
 875  *      for wildcarding.
 876  */
 877 int dev_change_name(struct net_device *dev, const char *newname)
 878 {
 879         char oldname[IFNAMSIZ];
 880         int err = 0;
 881         int ret;
 882         struct net *net;
 883
 884         ASSERT_RTNL();
 885         BUG_ON(!dev_net(dev));
 886
 887         net = dev_net(dev);
 888         if (dev->flags & IFF_UP)
 889                 return -EBUSY;
 890
 891         if (!dev_valid_name(newname))
 892                 return -EINVAL;
 893
 894         if (strncmp(newname, dev->name, IFNAMSIZ) == 0)
 895                 return 0;
 896
 897         memcpy(oldname, dev->name, IFNAMSIZ);
 898
 899         if (strchr(newname, '%')) {
 900                 err = dev_alloc_name(dev, newname);
 901                 if (err < 0)
 902                         return err;
 903         }
 904         else if (__dev_get_by_name(net, newname))
 905                 return -EEXIST;
 906         else
 907                 strlcpy(dev->name, newname, IFNAMSIZ);
 908
 909 rollback:
 910         /* For now only devices in the initial network namespace
 911          * are in sysfs.
 912          */
 913         if (net == &init_net) {
 914                 ret = device_rename(&dev->dev, dev->name);
 915                 if (ret) {
 916                         memcpy(dev->name, oldname, IFNAMSIZ);
 917                         return ret;
 918                 }
 919         }
 920
 921         write_lock_bh(&dev_base_lock);
 922         hlist_del(&dev->name_hlist);
 923         hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
 924         write_unlock_bh(&dev_base_lock);
 925
 926         ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
 927         ret = notifier_to_errno(ret);
 928
 929         if (ret) {
 930                 if (err) {
 931                         printk(KERN_ERR
 932                                "%s: name change rollback failed: %d.\n",
 933                                dev->name, ret);
 934                 } else {
 935                         err = ret;
 936                         memcpy(dev->name, oldname, IFNAMSIZ);
 937                         goto rollback;
 938                 }
 939         }
 940
 941         return err;
 942 }
 943
 944 /**
 945  *      dev_set_alias - change ifalias of a device
 946  *      @dev: device
 947  *      @alias: name up to IFALIASZ
 948  *      @len: limit of bytes to copy from info
 949  *
 950  *      Set ifalias for a device,
 951  */
 952 int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
 953 {
 954         ASSERT_RTNL();
 955
 956         if (len >= IFALIASZ)
 957                 return -EINVAL;
 958
 959         if (!len) {
 960                 if (dev->ifalias) {
 961                         kfree(dev->ifalias);
 962                         dev->ifalias = NULL;
 963                 }
 964                 return 0;
 965         }
 966
 967         dev->ifalias = krealloc(dev->ifalias, len+1, GFP_KERNEL);
 968         if (!dev->ifalias)
 969                 return -ENOMEM;
 970
 971         strlcpy(dev->ifalias, alias, len+1);
 972         return len;
 973 }
 974
 975
 976 /**
 977  *      netdev_features_change - device changes features
 978  *      @dev: device to cause notification
 979  *
 980  *      Called to indicate a device has changed features.
 981  */
 982 void netdev_features_change(struct net_device *dev)
 983 {
 984         call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
 985 }
 986 EXPORT_SYMBOL(netdev_features_change);
 987
 988 /**
 989  *      netdev_state_change - device changes state
 990  *      @dev: device to cause notification
 991  *
 992  *      Called to indicate a device has changed state. This function calls
 993  *      the notifier chains for netdev_chain and sends a NEWLINK message
 994  *      to the routing socket.
 995  */
 996 void netdev_state_change(struct net_device *dev)
 997 {
 998         if (dev->flags & IFF_UP) {
 999                 call_netdevice_notifiers(NETDEV_CHANGE, dev);
1000                 rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
1001         }
1002 }
1003
1004 void netdev_bonding_change(struct net_device *dev)
1005 {
1006         call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, dev);
1007 }
1008 EXPORT_SYMBOL(netdev_bonding_change);
1009
1010 /**
1011  *      dev_load        - load a network module
1012  *      @net: the applicable net namespace
1013  *      @name: name of interface
1014  *
1015  *      If a network interface is not present and the process has suitable
1016  *      privileges this function loads the module. If module loading is not
1017  *      available in this kernel then it becomes a nop.
1018  */
1019
1020 void dev_load(struct net *net, const char *name)
1021 {
1022         struct net_device *dev;
1023
1024         read_lock(&dev_base_lock);
1025         dev = __dev_get_by_name(net, name);
1026         read_unlock(&dev_base_lock);
1027
1028         if (!dev && capable(CAP_SYS_MODULE))
1029                 request_module("%s", name);
1030 }
1031
1032 /**
1033  *      dev_open        - prepare an interface for use.
1034  *      @dev:   device to open
1035  *
1036  *      Takes a device from down to up state. The device's private open
1037  *      function is invoked and then the multicast lists are loaded. Finally
1038  *      the device is moved into the up state and a %NETDEV_UP message is
1039  *      sent to the netdev notifier chain.
1040  *
1041  *      Calling this function on an active interface is a nop. On a failure
1042  *      a negative errno code is returned.
1043  */
1044 int dev_open(struct net_device *dev)
1045 {
1046         const struct net_device_ops *ops = dev->netdev_ops;
1047         int ret = 0;
1048
1049         ASSERT_RTNL();
1050
1051         /*
1052          *      Is it already up?
1053          */
1054
1055         if (dev->flags & IFF_UP)
1056                 return 0;
1057
1058         /*
1059          *      Is it even present?
1060          */
1061         if (!netif_device_present(dev))
1062                 return -ENODEV;
1063
1064         /*
1065          *      Call device private open method
1066          */
1067         set_bit(__LINK_STATE_START, &dev->state);
1068
1069         if (ops->ndo_validate_addr)
1070                 ret = ops->ndo_validate_addr(dev);
1071
1072         if (!ret && ops->ndo_open)
1073                 ret = ops->ndo_open(dev);
1074
1075         /*
1076          *      If it went open OK then:
1077          */
1078
1079         if (ret)
1080                 clear_bit(__LINK_STATE_START, &dev->state);
1081         else {
1082                 /*
1083                  *      Set the flags.
1084                  */
1085                 dev->flags |= IFF_UP;
1086
1087                 /*
1088                  *      Initialize multicasting status
1089                  */
1090                 dev_set_rx_mode(dev);
1091
1092                 /*
1093                  *      Wakeup transmit queue engine
1094                  */
1095                 dev_activate(dev);
1096
1097                 /*
1098                  *      ... and announce new interface.
1099                  */
1100                 call_netdevice_notifiers(NETDEV_UP, dev);
1101         }
1102
1103         return ret;
1104 }
1105
1106 /**
1107  *      dev_close - shutdown an interface.
1108  *      @dev: device to shutdown
1109  *
1110  *      This function moves an active device into down state. A
1111  *      %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
1112  *      is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
1113  *      chain.
1114  */
1115 int dev_close(struct net_device *dev)
1116 {
1117         const struct net_device_ops *ops = dev->netdev_ops;
1118         ASSERT_RTNL();
1119
1120         might_sleep();
1121
1122         if (!(dev->flags & IFF_UP))
1123                 return 0;
1124
1125         /*
1126          *      Tell people we are going down, so that they can
1127          *      prepare to death, when device is still operating.
1128          */
1129         call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
1130
1131         clear_bit(__LINK_STATE_START, &dev->state);
1132
1133         /* Synchronize to scheduled poll. We cannot touch poll list,
1134          * it can be even on different cpu. So just clear netif_running().
1135          *
1136          * dev->stop() will invoke napi_disable() on all of it's
1137          * napi_struct instances on this device.
1138          */
1139         smp_mb__after_clear_bit(); /* Commit netif_running(). */
1140
1141         dev_deactivate(dev);
1142
1143         /*
1144          *      Call the device specific close. This cannot fail.
1145          *      Only if device is UP
1146          *
1147          *      We allow it to be called even after a DETACH hot-plug
1148          *      event.
1149          */
1150         if (ops->ndo_stop)
1151                 ops->ndo_stop(dev);
1152
1153         /*
1154          *      Device is now down.
1155          */
1156
1157         dev->flags &= ~IFF_UP;
1158
1159         /*
1160          * Tell people we are down
1161          */
1162         call_netdevice_notifiers(NETDEV_DOWN, dev);
1163
1164         return 0;
1165 }
1166
1167
1168 /**
1169  *      dev_disable_lro - disable Large Receive Offload on a device
1170  *      @dev: device
1171  *
1172  *      Disable Large Receive Offload (LRO) on a net device.  Must be
1173  *      called under RTNL.  This is needed if received packets may be
1174  *      forwarded to another interface.
1175  */
1176 void dev_disable_lro(struct net_device *dev)
1177 {
1178         if (dev->ethtool_ops && dev->ethtool_ops->get_flags &&
1179             dev->ethtool_ops->set_flags) {
1180                 u32 flags = dev->ethtool_ops->get_flags(dev);
1181                 if (flags & ETH_FLAG_LRO) {
1182                         flags &= ~ETH_FLAG_LRO;
1183                         dev->ethtool_ops->set_flags(dev, flags);
1184                 }
1185         }
1186         WARN_ON(dev->features & NETIF_F_LRO);
1187 }
1188 EXPORT_SYMBOL(dev_disable_lro);
1189
1190
1191 static int dev_boot_phase = 1;
1192
1193 /*
1194  *      Device change register/unregister. These are not inline or static
1195  *      as we export them to the world.
1196  */
1197
1198 /**
1199  *      register_netdevice_notifier - register a network notifier block
1200  *      @nb: notifier
1201  *
1202  *      Register a notifier to be called when network device events occur.
1203  *      The notifier passed is linked into the kernel structures and must
1204  *      not be reused until it has been unregistered. A negative errno code
1205  *      is returned on a failure.
1206  *
1207  *      When registered all registration and up events are replayed
1208  *      to the new notifier to allow device to have a race free
1209  *      view of the network device list.
1210  */
1211
1212 int register_netdevice_notifier(struct notifier_block *nb)
1213 {
1214         struct net_device *dev;
1215         struct net_device *last;
1216         struct net *net;
1217         int err;
1218
1219         rtnl_lock();
1220         err = raw_notifier_chain_register(&netdev_chain, nb);
1221         if (err)
1222                 goto unlock;
1223         if (dev_boot_phase)
1224                 goto unlock;
1225         for_each_net(net) {
1226                 for_each_netdev(net, dev) {
1227                         err = nb->notifier_call(nb, NETDEV_REGISTER, dev);
1228                         err = notifier_to_errno(err);
1229                         if (err)
1230                                 goto rollback;
1231
1232                         if (!(dev->flags & IFF_UP))
1233                                 continue;
1234
1235                         nb->notifier_call(nb, NETDEV_UP, dev);
1236                 }
1237         }
1238
1239 unlock:
1240         rtnl_unlock();
1241         return err;
1242
1243 rollback:
1244         last = dev;
1245         for_each_net(net) {
1246                 for_each_netdev(net, dev) {
1247                         if (dev == last)
1248                                 break;
1249
1250                         if (dev->flags & IFF_UP) {
1251                                 nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
1252                                 nb->notifier_call(nb, NETDEV_DOWN, dev);
1253                         }
1254                         nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
1255                 }
1256         }
1257
1258         raw_notifier_chain_unregister(&netdev_chain, nb);
1259         goto unlock;
1260 }
1261
1262 /**
1263  *      unregister_netdevice_notifier - unregister a network notifier block
1264  *      @nb: notifier
1265  *
1266  *      Unregister a notifier previously registered by
1267  *      register_netdevice_notifier(). The notifier is unlinked into the
1268  *      kernel structures and may then be reused. A negative errno code
1269  *      is returned on a failure.
1270  */
1271
1272 int unregister_netdevice_notifier(struct notifier_block *nb)
1273 {
1274         int err;
1275
1276         rtnl_lock();
1277         err = raw_notifier_chain_unregister(&netdev_chain, nb);
1278         rtnl_unlock();
1279         return err;
1280 }
1281
1282 /**
1283  *      call_netdevice_notifiers - call all network notifier blocks
1284  *      @val: value passed unmodified to notifier function
1285  *      @dev: net_device pointer passed unmodified to notifier function
1286  *
1287  *      Call all network notifier blocks.  Parameters and return value
1288  *      are as for raw_notifier_call_chain().
1289  */
1290
1291 int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
1292 {
1293         return raw_notifier_call_chain(&netdev_chain, val, dev);
1294 }
1295
1296 /* When > 0 there are consumers of rx skb time stamps */
1297 static atomic_t netstamp_needed = ATOMIC_INIT(0);
1298
1299 void net_enable_timestamp(void)
1300 {
1301         atomic_inc(&netstamp_needed);
1302 }
1303
1304 void net_disable_timestamp(void)
1305 {
1306         atomic_dec(&netstamp_needed);
1307 }
1308
1309 static inline void net_timestamp(struct sk_buff *skb)
1310 {
1311         if (atomic_read(&netstamp_needed))
1312                 __net_timestamp(skb);
1313         else
1314                 skb->tstamp.tv64 = 0;
1315 }
1316
1317 /*
1318  *      Support routine. Sends outgoing frames to any network
1319  *      taps currently in use.
1320  */
1321
1322 static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1323 {
1324         struct packet_type *ptype;
1325
1326         net_timestamp(skb);
1327
1328         rcu_read_lock();
1329         list_for_each_entry_rcu(ptype, &ptype_all, list) {
1330                 /* Never send packets back to the socket
1331                  * they originated from - MvS (miquels@drinkel.ow.org)
1332                  */
1333                 if ((ptype->dev == dev || !ptype->dev) &&
1334                     (ptype->af_packet_priv == NULL ||
1335                      (struct sock *)ptype->af_packet_priv != skb->sk)) {
1336                         struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
1337                         if (!skb2)
1338                                 break;
1339
1340                         /* skb->nh should be correctly
1341                            set by sender, so that the second statement is
1342                            just protection against buggy protocols.
1343                          */
1344                         skb_reset_mac_header(skb2);
1345
1346                         if (skb_network_header(skb2) < skb2->data ||
1347                             skb2->network_header > skb2->tail) {
1348                                 if (net_ratelimit())
1349                                         printk(KERN_CRIT "protocol %04x is "
1350                                                "buggy, dev %s\n",
1351                                                skb2->protocol, dev->name);
1352                                 skb_reset_network_header(skb2);
1353                         }
1354
1355                         skb2->transport_header = skb2->network_header;
1356                         skb2->pkt_type = PACKET_OUTGOING;
1357                         ptype->func(skb2, skb->dev, ptype, skb->dev);
1358                 }
1359         }
1360         rcu_read_unlock();
1361 }
1362
1363
1364 static inline void __netif_reschedule(struct Qdisc *q)
1365 {
1366         struct softnet_data *sd;
1367         unsigned long flags;
1368
1369         local_irq_save(flags);
1370         sd = &__get_cpu_var(softnet_data);
1371         q->next_sched = sd->output_queue;
1372         sd->output_queue = q;
1373         raise_softirq_irqoff(NET_TX_SOFTIRQ);
1374         local_irq_restore(flags);
1375 }
1376
1377 void __netif_schedule(struct Qdisc *q)
1378 {
1379         if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))
1380                 __netif_reschedule(q);
1381 }
1382 EXPORT_SYMBOL(__netif_schedule);
1383
1384 void dev_kfree_skb_irq(struct sk_buff *skb)
1385 {
1386         if (atomic_dec_and_test(&skb->users)) {
1387                 struct softnet_data *sd;
1388                 unsigned long flags;
1389
1390                 local_irq_save(flags);
1391                 sd = &__get_cpu_var(softnet_data);
1392                 skb->next = sd->completion_queue;
1393                 sd->completion_queue = skb;
1394                 raise_softirq_irqoff(NET_TX_SOFTIRQ);
1395                 local_irq_restore(flags);
1396         }
1397 }
1398 EXPORT_SYMBOL(dev_kfree_skb_irq);
1399
1400 void dev_kfree_skb_any(struct sk_buff *skb)
1401 {
1402         if (in_irq() || irqs_disabled())
1403                 dev_kfree_skb_irq(skb);
1404         else
1405                 dev_kfree_skb(skb);
1406 }
1407 EXPORT_SYMBOL(dev_kfree_skb_any);
1408
1409
1410 /**
1411  * netif_device_detach - mark device as removed
1412  * @dev: network device
1413  *
1414  * Mark device as removed from system and therefore no longer available.
1415  */
1416 void netif_device_detach(struct net_device *dev)
1417 {
1418         if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
1419             netif_running(dev)) {
1420                 netif_stop_queue(dev);
1421         }
1422 }
1423 EXPORT_SYMBOL(netif_device_detach);
1424
1425 /**
1426  * netif_device_attach - mark device as attached
1427  * @dev: network device
1428  *
1429  * Mark device as attached from system and restart if needed.
1430  */
1431 void netif_device_attach(struct net_device *dev)
1432 {
1433         if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
1434             netif_running(dev)) {
1435                 netif_wake_queue(dev);
1436                 __netdev_watchdog_up(dev);
1437         }
1438 }
1439 EXPORT_SYMBOL(netif_device_attach);
1440
1441 static bool can_checksum_protocol(unsigned long features, __be16 protocol)
1442 {
1443         return ((features & NETIF_F_GEN_CSUM) ||
1444                 ((features & NETIF_F_IP_CSUM) &&
1445                  protocol == htons(ETH_P_IP)) ||
1446                 ((features & NETIF_F_IPV6_CSUM) &&
1447                  protocol == htons(ETH_P_IPV6)));
1448 }
1449
1450 static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb)
1451 {
1452         if (can_checksum_protocol(dev->features, skb->protocol))
1453                 return true;
1454
1455         if (skb->protocol == htons(ETH_P_8021Q)) {
1456                 struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1457                 if (can_checksum_protocol(dev->features & dev->vlan_features,
1458                                           veh->h_vlan_encapsulated_proto))
1459                         return true;
1460         }
1461
1462         return false;
1463 }
1464
1465 /*
1466  * Invalidate hardware checksum when packet is to be mangled, and
1467  * complete checksum manually on outgoing path.
1468  */
1469 int skb_checksum_help(struct sk_buff *skb)
1470 {
1471         __wsum csum;
1472         int ret = 0, offset;
1473
1474         if (skb->ip_summed == CHECKSUM_COMPLETE)
1475                 goto out_set_summed;
1476
1477         if (unlikely(skb_shinfo(skb)->gso_size)) {
1478                 /* Let GSO fix up the checksum. */
1479                 goto out_set_summed;
1480         }
1481
1482         offset = skb->csum_start - skb_headroom(skb);
1483         BUG_ON(offset >= skb_headlen(skb));
1484         csum = skb_checksum(skb, offset, skb->len - offset, 0);
1485
1486         offset += skb->csum_offset;
1487         BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
1488
1489         if (skb_cloned(skb) &&
1490             !skb_clone_writable(skb, offset + sizeof(__sum16))) {
1491                 ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1492                 if (ret)
1493                         goto out;
1494         }
1495
1496         *(__sum16 *)(skb->data + offset) = csum_fold(csum);
1497 out_set_summed:
1498         skb->ip_summed = CHECKSUM_NONE;
1499 out:
1500         return ret;
1501 }
1502
1503 /**
1504  *      skb_gso_segment - Perform segmentation on skb.
1505  *      @skb: buffer to segment
1506  *      @features: features for the output path (see dev->features)
1507  *
1508  *      This function segments the given skb and returns a list of segments.
1509  *
1510  *      It may return NULL if the skb requires no segmentation.  This is
1511  *      only possible when GSO is used for verifying header integrity.
1512  */
1513 struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
1514 {
1515         struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
1516         struct packet_type *ptype;
1517         __be16 type = skb->protocol;
1518         int err;
1519
1520         skb_reset_mac_header(skb);
1521         skb->mac_len = skb->network_header - skb->mac_header;
1522         __skb_pull(skb, skb->mac_len);
1523
1524         if (WARN_ON(skb->ip_summed != CHECKSUM_PARTIAL)) {
1525                 if (skb_header_cloned(skb) &&
1526                     (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
1527                         return ERR_PTR(err);
1528         }
1529
1530         rcu_read_lock();
1531         list_for_each_entry_rcu(ptype,
1532                         &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
1533                 if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
1534                         if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
1535                                 err = ptype->gso_send_check(skb);
1536                                 segs = ERR_PTR(err);
1537                                 if (err || skb_gso_ok(skb, features))
1538                                         break;
1539                                 __skb_push(skb, (skb->data -
1540                                                  skb_network_header(skb)));
1541                         }
1542                         segs = ptype->gso_segment(skb, features);
1543                         break;
1544                 }
1545         }
1546         rcu_read_unlock();
1547
1548         __skb_push(skb, skb->data - skb_mac_header(skb));
1549
1550         return segs;
1551 }
1552
1553 EXPORT_SYMBOL(skb_gso_segment);
1554
1555 /* Take action when hardware reception checksum errors are detected. */
1556 #ifdef CONFIG_BUG
1557 void netdev_rx_csum_fault(struct net_device *dev)
1558 {
1559         if (net_ratelimit()) {
1560                 printk(KERN_ERR "%s: hw csum failure.\n",
1561                         dev ? dev->name : "<unknown>");
1562                 dump_stack();
1563         }
1564 }
1565 EXPORT_SYMBOL(netdev_rx_csum_fault);
1566 #endif
1567
1568 /* Actually, we should eliminate this check as soon as we know, that:
1569  * 1. IOMMU is present and allows to map all the memory.
1570  * 2. No high memory really exists on this machine.
1571  */
1572
1573 static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1574 {
1575 #ifdef CONFIG_HIGHMEM
1576         int i;
1577
1578         if (dev->features & NETIF_F_HIGHDMA)
1579                 return 0;
1580
1581         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1582                 if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1583                         return 1;
1584
1585 #endif
1586         return 0;
1587 }
1588
1589 struct dev_gso_cb {
1590         void (*destructor)(struct sk_buff *skb);
1591 };
1592
1593 #define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
1594
1595 static void dev_gso_skb_destructor(struct sk_buff *skb)
1596 {
1597         struct dev_gso_cb *cb;
1598
1599         do {
1600                 struct sk_buff *nskb = skb->next;
1601
1602                 skb->next = nskb->next;
1603                 nskb->next = NULL;
1604                 kfree_skb(nskb);
1605         } while (skb->next);
1606
1607         cb = DEV_GSO_CB(skb);
1608         if (cb->destructor)
1609                 cb->destructor(skb);
1610 }
1611
1612 /**
1613  *      dev_gso_segment - Perform emulated hardware segmentation on skb.
1614  *      @skb: buffer to segment
1615  *
1616  *      This function segments the given skb and stores the list of segments
1617  *      in skb->next.
1618  */
1619 static int dev_gso_segment(struct sk_buff *skb)
1620 {
1621         struct net_device *dev = skb->dev;
1622         struct sk_buff *segs;
1623         int features = dev->features & ~(illegal_highdma(dev, skb) ?
1624                                          NETIF_F_SG : 0);
1625
1626         segs = skb_gso_segment(skb, features);
1627
1628         /* Verifying header integrity only. */
1629         if (!segs)
1630                 return 0;
1631
1632         if (IS_ERR(segs))
1633                 return PTR_ERR(segs);
1634
1635         skb->next = segs;
1636         DEV_GSO_CB(skb)->destructor = skb->destructor;
1637         skb->destructor = dev_gso_skb_destructor;
1638
1639         return 0;
1640 }
1641
1642 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
1643                         struct netdev_queue *txq)
1644 {
1645         const struct net_device_ops *ops = dev->netdev_ops;
1646
1647         prefetch(&dev->netdev_ops->ndo_start_xmit);
1648         if (likely(!skb->next)) {
1649                 if (!list_empty(&ptype_all))
1650                         dev_queue_xmit_nit(skb, dev);
1651
1652                 if (netif_needs_gso(dev, skb)) {
1653                         if (unlikely(dev_gso_segment(skb)))
1654                                 goto out_kfree_skb;
1655                         if (skb->next)
1656                                 goto gso;
1657                 }
1658
1659                 return ops->ndo_start_xmit(skb, dev);
1660         }
1661
1662 gso:
1663         do {
1664                 struct sk_buff *nskb = skb->next;
1665                 int rc;
1666
1667                 skb->next = nskb->next;
1668                 nskb->next = NULL;
1669                 rc = ops->ndo_start_xmit(nskb, dev);
1670                 if (unlikely(rc)) {
1671                         nskb->next = skb->next;
1672                         skb->next = nskb;
1673                         return rc;
1674                 }
1675                 if (unlikely(netif_tx_queue_stopped(txq) && skb->next))
1676                         return NETDEV_TX_BUSY;
1677         } while (skb->next);
1678
1679         skb->destructor = DEV_GSO_CB(skb)->destructor;
1680
1681 out_kfree_skb:
1682         kfree_skb(skb);
1683         return 0;
1684 }
1685
1686 static u32 simple_tx_hashrnd;
1687 static int simple_tx_hashrnd_initialized = 0;
1688
1689 static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb)
1690 {
1691         u32 addr1, addr2, ports;
1692         u32 hash, ihl;
1693         u8 ip_proto = 0;
1694
1695         if (unlikely(!simple_tx_hashrnd_initialized)) {
1696                 get_random_bytes(&simple_tx_hashrnd, 4);
1697                 simple_tx_hashrnd_initialized = 1;
1698         }
1699
1700         switch (skb->protocol) {
1701         case htons(ETH_P_IP):
1702                 if (!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)))
1703                         ip_proto = ip_hdr(skb)->protocol;
1704                 addr1 = ip_hdr(skb)->saddr;
1705                 addr2 = ip_hdr(skb)->daddr;
1706                 ihl = ip_hdr(skb)->ihl;
1707                 break;
1708         case htons(ETH_P_IPV6):
1709                 ip_proto = ipv6_hdr(skb)->nexthdr;
1710                 addr1 = ipv6_hdr(skb)->saddr.s6_addr32[3];
1711                 addr2 = ipv6_hdr(skb)->daddr.s6_addr32[3];
1712                 ihl = (40 >> 2);
1713                 break;
1714         default:
1715                 return 0;
1716         }
1717
1718
1719         switch (ip_proto) {
1720         case IPPROTO_TCP:
1721         case IPPROTO_UDP:
1722         case IPPROTO_DCCP:
1723         case IPPROTO_ESP:
1724         case IPPROTO_AH:
1725         case IPPROTO_SCTP:
1726         case IPPROTO_UDPLITE:
1727                 ports = *((u32 *) (skb_network_header(skb) + (ihl * 4)));
1728                 break;
1729
1730         default:
1731                 ports = 0;
1732                 break;
1733         }
1734
1735         hash = jhash_3words(addr1, addr2, ports, simple_tx_hashrnd);
1736
1737         return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
1738 }
1739
1740 static struct netdev_queue *dev_pick_tx(struct net_device *dev,
1741                                         struct sk_buff *skb)
1742 {
1743         const struct net_device_ops *ops = dev->netdev_ops;
1744         u16 queue_index = 0;
1745
1746         if (ops->ndo_select_queue)
1747                 queue_index = ops->ndo_select_queue(dev, skb);
1748         else if (dev->real_num_tx_queues > 1)
1749                 queue_index = simple_tx_hash(dev, skb);
1750
1751         skb_set_queue_mapping(skb, queue_index);
1752         return netdev_get_tx_queue(dev, queue_index);
1753 }
1754
1755 /**
1756  *      dev_queue_xmit - transmit a buffer
1757  *      @skb: buffer to transmit
1758  *
1759  *      Queue a buffer for transmission to a network device. The caller must
1760  *      have set the device and priority and built the buffer before calling
1761  *      this function. The function can be called from an interrupt.
1762  *
1763  *      A negative errno code is returned on a failure. A success does not
1764  *      guarantee the frame will be transmitted as it may be dropped due
1765  *      to congestion or traffic shaping.
1766  *
1767  * -----------------------------------------------------------------------------------
1768  *      I notice this method can also return errors from the queue disciplines,
1769  *      including NET_XMIT_DROP, which is a positive value.  So, errors can also
1770  *      be positive.
1771  *
1772  *      Regardless of the return value, the skb is consumed, so it is currently
1773  *      difficult to retry a send to this method.  (You can bump the ref count
1774  *      before sending to hold a reference for retry if you are careful.)
1775  *
1776  *      When calling this method, interrupts MUST be enabled.  This is because
1777  *      the BH enable code must have IRQs enabled so that it will not deadlock.
1778  *          --BLG
1779  */
1780 int dev_queue_xmit(struct sk_buff *skb)
1781 {
1782         struct net_device *dev = skb->dev;
1783         struct netdev_queue *txq;
1784         struct Qdisc *q;
1785         int rc = -ENOMEM;
1786
1787         /* GSO will handle the following emulations directly. */
1788         if (netif_needs_gso(dev, skb))
1789                 goto gso;
1790
1791         if (skb_shinfo(skb)->frag_list &&
1792             !(dev->features & NETIF_F_FRAGLIST) &&
1793             __skb_linearize(skb))
1794                 goto out_kfree_skb;
1795
1796         /* Fragmented skb is linearized if device does not support SG,
1797          * or if at least one of fragments is in highmem and device
1798          * does not support DMA from it.
1799          */
1800         if (skb_shinfo(skb)->nr_frags &&
1801             (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
1802             __skb_linearize(skb))
1803                 goto out_kfree_skb;
1804
1805         /* If packet is not checksummed and device does not support
1806          * checksumming for this protocol, complete checksumming here.
1807          */
1808         if (skb->ip_summed == CHECKSUM_PARTIAL) {
1809                 skb_set_transport_header(skb, skb->csum_start -
1810                                               skb_headroom(skb));
1811                 if (!dev_can_checksum(dev, skb) && skb_checksum_help(skb))
1812                         goto out_kfree_skb;
1813         }
1814
1815 gso:
1816         /* Disable soft irqs for various locks below. Also
1817          * stops preemption for RCU.
1818          */
1819         rcu_read_lock_bh();
1820
1821         txq = dev_pick_tx(dev, skb);
1822         q = rcu_dereference(txq->qdisc);
1823
1824 #ifdef CONFIG_NET_CLS_ACT
1825         skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
1826 #endif
1827         if (q->enqueue) {
1828                 spinlock_t *root_lock = qdisc_lock(q);
1829
1830                 spin_lock(root_lock);
1831
1832                 if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
1833                         kfree_skb(skb);
1834                         rc = NET_XMIT_DROP;
1835                 } else {
1836                         rc = qdisc_enqueue_root(skb, q);
1837                         qdisc_run(q);
1838                 }
1839                 spin_unlock(root_lock);
1840
1841                 goto out;
1842         }
1843
1844         /* The device has no queue. Common case for software devices:
1845            loopback, all the sorts of tunnels...
1846
1847            Really, it is unlikely that netif_tx_lock protection is necessary
1848            here.  (f.e. loopback and IP tunnels are clean ignoring statistics
1849            counters.)
1850            However, it is possible, that they rely on protection
1851            made by us here.
1852
1853            Check this and shot the lock. It is not prone from deadlocks.
1854            Either shot noqueue qdisc, it is even simpler 8)
1855          */
1856         if (dev->flags & IFF_UP) {
1857                 int cpu = smp_processor_id(); /* ok because BHs are off */
1858
1859                 if (txq->xmit_lock_owner != cpu) {
1860
1861                         HARD_TX_LOCK(dev, txq, cpu);
1862
1863                         if (!netif_tx_queue_stopped(txq)) {
1864                                 rc = 0;
1865                                 if (!dev_hard_start_xmit(skb, dev, txq)) {
1866                                         HARD_TX_UNLOCK(dev, txq);
1867                                         goto out;
1868                                 }
1869                         }
1870                         HARD_TX_UNLOCK(dev, txq);
1871                         if (net_ratelimit())
1872                                 printk(KERN_CRIT "Virtual device %s asks to "
1873                                        "queue packet!\n", dev->name);
1874                 } else {
1875                         /* Recursion is detected! It is possible,
1876                          * unfortunately */
1877                         if (net_ratelimit())
1878                                 printk(KERN_CRIT "Dead loop on virtual device "
1879                                        "%s, fix it urgently!\n", dev->name);
1880                 }
1881         }
1882
1883         rc = -ENETDOWN;
1884         rcu_read_unlock_bh();
1885
1886 out_kfree_skb:
1887         kfree_skb(skb);
1888         return rc;
1889 out:
1890         rcu_read_unlock_bh();
1891         return rc;
1892 }
1893
1894
1895 /*=======================================================================
1896                         Receiver routines
1897   =======================================================================*/
1898
1899 int netdev_max_backlog __read_mostly = 1000;
1900 int netdev_budget __read_mostly = 300;
1901 int weight_p __read_mostly = 64;            /* old backlog weight */
1902
1903 DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
1904
1905
1906 /**
1907  *      netif_rx        -       post buffer to the network code
1908  *      @skb: buffer to post
1909  *
1910  *      This function receives a packet from a device driver and queues it for
1911  *      the upper (protocol) levels to process.  It always succeeds. The buffer
1912  *      may be dropped during processing for congestion control or by the
1913  *      protocol layers.
1914  *
1915  *      return values:
1916  *      NET_RX_SUCCESS  (no congestion)
1917  *      NET_RX_DROP     (packet was dropped)
1918  *
1919  */
1920
1921 int netif_rx(struct sk_buff *skb)
1922 {
1923         struct softnet_data *queue;
1924         unsigned long flags;
1925
1926         /* if netpoll wants it, pretend we never saw it */
1927         if (netpoll_rx(skb))
1928                 return NET_RX_DROP;
1929
1930         if (!skb->tstamp.tv64)
1931                 net_timestamp(skb);
1932
1933         /*
1934          * The code is rearranged so that the path is the most
1935          * short when CPU is congested, but is still operating.
1936          */
1937         local_irq_save(flags);
1938         queue = &__get_cpu_var(softnet_data);
1939
1940         __get_cpu_var(netdev_rx_stat).total++;
1941         if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
1942                 if (queue->input_pkt_queue.qlen) {
1943 enqueue:
1944                         __skb_queue_tail(&queue->input_pkt_queue, skb);
1945                         local_irq_restore(flags);
1946                         return NET_RX_SUCCESS;
1947                 }
1948
1949                 napi_schedule(&queue->backlog);
1950                 goto enqueue;
1951         }
1952
1953         __get_cpu_var(netdev_rx_stat).dropped++;
1954         local_irq_restore(flags);
1955
1956         kfree_skb(skb);
1957         return NET_RX_DROP;
1958 }
1959
1960 int netif_rx_ni(struct sk_buff *skb)
1961 {
1962         int err;
1963
1964         preempt_disable();
1965         err = netif_rx(skb);
1966         if (local_softirq_pending())
1967                 do_softirq();
1968         preempt_enable();
1969
1970         return err;
1971 }
1972
1973 EXPORT_SYMBOL(netif_rx_ni);
1974
1975 static void net_tx_action(struct softirq_action *h)
1976 {
1977         struct softnet_data *sd = &__get_cpu_var(softnet_data);
1978
1979         if (sd->completion_queue) {
1980                 struct sk_buff *clist;
1981
1982                 local_irq_disable();
1983                 clist = sd->completion_queue;
1984                 sd->completion_queue = NULL;
1985                 local_irq_enable();
1986
1987                 while (clist) {
1988                         struct sk_buff *skb = clist;
1989                         clist = clist->next;
1990
1991                         WARN_ON(atomic_read(&skb->users));
1992                         __kfree_skb(skb);
1993                 }
1994         }
1995
1996         if (sd->output_queue) {
1997                 struct Qdisc *head;
1998
1999                 local_irq_disable();
2000                 head = sd->output_queue;
2001                 sd->output_queue = NULL;
2002                 local_irq_enable();
2003
2004                 while (head) {
2005                         struct Qdisc *q = head;
2006                         spinlock_t *root_lock;
2007
2008                         head = head->next_sched;
2009
2010                         root_lock = qdisc_lock(q);
2011                         if (spin_trylock(root_lock)) {
2012                                 smp_mb__before_clear_bit();
2013                                 clear_bit(__QDISC_STATE_SCHED,
2014                                           &q->state);
2015                                 qdisc_run(q);
2016                                 spin_unlock(root_lock);
2017                         } else {
2018                                 if (!test_bit(__QDISC_STATE_DEACTIVATED,
2019                                               &q->state)) {
2020                                         __netif_reschedule(q);
2021                                 } else {
2022                                         smp_mb__before_clear_bit();
2023                                         clear_bit(__QDISC_STATE_SCHED,
2024                                                   &q->state);
2025                                 }
2026                         }
2027                 }
2028         }
2029 }
2030
2031 static inline int deliver_skb(struct sk_buff *skb,
2032                               struct packet_type *pt_prev,
2033                               struct net_device *orig_dev)
2034 {
2035         atomic_inc(&skb->users);
2036         return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
2037 }
2038
2039 #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
2040 /* These hooks defined here for ATM */
2041 struct net_bridge;
2042 struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
2043                                                 unsigned char *addr);
2044 void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent) __read_mostly;
2045
2046 /*
2047  * If bridge module is loaded call bridging hook.
2048  *  returns NULL if packet was consumed.
2049  */
2050 struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
2051                                         struct sk_buff *skb) __read_mostly;
2052 static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
2053                                             struct packet_type **pt_prev, int *ret,
2054                                             struct net_device *orig_dev)
2055 {
2056         struct net_bridge_port *port;
2057
2058         if (skb->pkt_type == PACKET_LOOPBACK ||
2059             (port = rcu_dereference(skb->dev->br_port)) == NULL)
2060                 return skb;
2061
2062         if (*pt_prev) {
2063                 *ret = deliver_skb(skb, *pt_prev, orig_dev);
2064                 *pt_prev = NULL;
2065         }
2066
2067         return br_handle_frame_hook(port, skb);
2068 }
2069 #else
2070 #define handle_bridge(skb, pt_prev, ret, orig_dev)      (skb)
2071 #endif
2072
2073 #if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE)
2074 struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *skb) __read_mostly;
2075 EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook);
2076
2077 static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
2078                                              struct packet_type **pt_prev,
2079                                              int *ret,
2080                                              struct net_device *orig_dev)
2081 {
2082         if (skb->dev->macvlan_port == NULL)
2083                 return skb;
2084
2085         if (*pt_prev) {
2086                 *ret = deliver_skb(skb, *pt_prev, orig_dev);
2087                 *pt_prev = NULL;
2088         }
2089         return macvlan_handle_frame_hook(skb);
2090 }
2091 #else
2092 #define handle_macvlan(skb, pt_prev, ret, orig_dev)     (skb)
2093 #endif
2094
2095 #ifdef CONFIG_NET_CLS_ACT
2096 /* TODO: Maybe we should just force sch_ingress to be compiled in
2097  * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
2098  * a compare and 2 stores extra right now if we dont have it on
2099  * but have CONFIG_NET_CLS_ACT
2100  * NOTE: This doesnt stop any functionality; if you dont have
2101  * the ingress scheduler, you just cant add policies on ingress.
2102  *
2103  */
2104 static int ing_filter(struct sk_buff *skb)
2105 {
2106         struct net_device *dev = skb->dev;
2107         u32 ttl = G_TC_RTTL(skb->tc_verd);
2108         struct netdev_queue *rxq;
2109         int result = TC_ACT_OK;
2110         struct Qdisc *q;
2111
2112         if (MAX_RED_LOOP < ttl++) {
2113                 printk(KERN_WARNING
2114                        "Redir loop detected Dropping packet (%d->%d)\n",
2115                        skb->iif, dev->ifindex);
2116                 return TC_ACT_SHOT;
2117         }
2118
2119         skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
2120         skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
2121
2122         rxq = &dev->rx_queue;
2123
2124         q = rxq->qdisc;
2125         if (q != &noop_qdisc) {
2126                 spin_lock(qdisc_lock(q));
2127                 if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
2128                         result = qdisc_enqueue_root(skb, q);
2129                 spin_unlock(qdisc_lock(q));
2130         }
2131
2132         return result;
2133 }
2134
2135 static inline struct sk_buff *handle_ing(struct sk_buff *skb,
2136                                          struct packet_type **pt_prev,
2137                                          int *ret, struct net_device *orig_dev)
2138 {
2139         if (skb->dev->rx_queue.qdisc == &noop_qdisc)
2140                 goto out;
2141
2142         if (*pt_prev) {
2143                 *ret = deliver_skb(skb, *pt_prev, orig_dev);
2144                 *pt_prev = NULL;
2145         } else {
2146                 /* Huh? Why does turning on AF_PACKET affect this? */
2147                 skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
2148         }
2149
2150         switch (ing_filter(skb)) {
2151         case TC_ACT_SHOT:
2152         case TC_ACT_STOLEN:
2153                 kfree_skb(skb);
2154                 return NULL;
2155         }
2156
2157 out:
2158         skb->tc_verd = 0;
2159         return skb;
2160 }
2161 #endif
2162
2163 /*
2164  *      netif_nit_deliver - deliver received packets to network taps
2165  *      @skb: buffer
2166  *
2167  *      This function is used to deliver incoming packets to network
2168  *      taps. It should be used when the normal netif_receive_skb path
2169  *      is bypassed, for example because of VLAN acceleration.
2170  */
2171 void netif_nit_deliver(struct sk_buff *skb)
2172 {
2173         struct packet_type *ptype;
2174
2175         if (list_empty(&ptype_all))
2176                 return;
2177
2178         skb_reset_network_header(skb);
2179         skb_reset_transport_header(skb);
2180         skb->mac_len = skb->network_header - skb->mac_header;
2181
2182         rcu_read_lock();
2183         list_for_each_entry_rcu(ptype, &ptype_all, list) {
2184                 if (!ptype->dev || ptype->dev == skb->dev)
2185                         deliver_skb(skb, ptype, skb->dev);
2186         }
2187         rcu_read_unlock();
2188 }
2189
2190 /**
2191  *      netif_receive_skb - process receive buffer from network
2192  *      @skb: buffer to process
2193  *
2194  *      netif_receive_skb() is the main receive data processing function.
2195  *      It always succeeds. The buffer may be dropped during processing
2196  *      for congestion control or by the protocol layers.
2197  *
2198  *      This function may only be called from softirq context and interrupts
2199  *      should be enabled.
2200  *
2201  *      Return values (usually ignored):
2202  *      NET_RX_SUCCESS: no congestion
2203  *      NET_RX_DROP: packet was dropped
2204  */
2205 int netif_receive_skb(struct sk_buff *skb)
2206 {
2207         struct packet_type *ptype, *pt_prev;
2208         struct net_device *orig_dev;
2209         struct net_device *null_or_orig;
2210         int ret = NET_RX_DROP;
2211         __be16 type;
2212
2213         if (skb->vlan_tci && vlan_hwaccel_do_receive(skb))
2214                 return NET_RX_SUCCESS;
2215
2216         /* if we've gotten here through NAPI, check netpoll */
2217         if (netpoll_receive_skb(skb))
2218                 return NET_RX_DROP;
2219
2220         if (!skb->tstamp.tv64)
2221                 net_timestamp(skb);
2222
2223         if (!skb->iif)
2224                 skb->iif = skb->dev->ifindex;
2225
2226         null_or_orig = NULL;
2227         orig_dev = skb->dev;
2228         if (orig_dev->master) {
2229                 if (skb_bond_should_drop(skb))
2230                         null_or_orig = orig_dev; /* deliver only exact match */
2231                 else
2232                         skb->dev = orig_dev->master;
2233         }
2234
2235         __get_cpu_var(netdev_rx_stat).total++;
2236
2237         skb_reset_network_header(skb);
2238         skb_reset_transport_header(skb);
2239         skb->mac_len = skb->network_header - skb->mac_header;
2240
2241         pt_prev = NULL;
2242
2243         rcu_read_lock();
2244
2245         /* Don't receive packets in an exiting network namespace */
2246         if (!net_alive(dev_net(skb->dev))) {
2247                 kfree_skb(skb);
2248                 goto out;
2249         }
2250
2251 #ifdef CONFIG_NET_CLS_ACT
2252         if (skb->tc_verd & TC_NCLS) {
2253                 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
2254                 goto ncls;
2255         }
2256 #endif
2257
2258         list_for_each_entry_rcu(ptype, &ptype_all, list) {
2259                 if (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
2260                     ptype->dev == orig_dev) {
2261                         if (pt_prev)
2262                                 ret = deliver_skb(skb, pt_prev, orig_dev);
2263                         pt_prev = ptype;
2264                 }
2265         }
2266
2267 #ifdef CONFIG_NET_CLS_ACT
2268         skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
2269         if (!skb)
2270                 goto out;
2271 ncls:
2272 #endif
2273
2274         skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
2275         if (!skb)
2276                 goto out;
2277         skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev);
2278         if (!skb)
2279                 goto out;
2280
2281         type = skb->protocol;
2282         list_for_each_entry_rcu(ptype,
2283                         &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
2284                 if (ptype->type == type &&
2285                     (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
2286                      ptype->dev == orig_dev)) {
2287                         if (pt_prev)
2288                                 ret = deliver_skb(skb, pt_prev, orig_dev);
2289                         pt_prev = ptype;
2290                 }
2291         }
2292
2293         if (pt_prev) {
2294                 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
2295         } else {
2296                 kfree_skb(skb);
2297                 /* Jamal, now you will not able to escape explaining
2298                  * me how you were going to use this. :-)
2299                  */
2300                 ret = NET_RX_DROP;
2301         }
2302
2303 out:
2304         rcu_read_unlock();
2305         return ret;
2306 }
2307
2308 /* Network device is going away, flush any packets still pending  */
2309 static void flush_backlog(void *arg)
2310 {
2311         struct net_device *dev = arg;
2312         struct softnet_data *queue = &__get_cpu_var(softnet_data);
2313         struct sk_buff *skb, *tmp;
2314
2315         skb_queue_walk_safe(&queue->input_pkt_queue, skb, tmp)
2316                 if (skb->dev == dev) {
2317                         __skb_unlink(skb, &queue->input_pkt_queue);
2318                         kfree_skb(skb);
2319                 }
2320 }
2321
2322 static int napi_gro_complete(struct sk_buff *skb)
2323 {
2324         struct packet_type *ptype;
2325         __be16 type = skb->protocol;
2326         struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
2327         int err = -ENOENT;
2328
2329         if (!skb_shinfo(skb)->frag_list)
2330                 goto out;
2331
2332         rcu_read_lock();
2333         list_for_each_entry_rcu(ptype, head, list) {
2334                 if (ptype->type != type || ptype->dev || !ptype->gro_complete)
2335                         continue;
2336
2337                 err = ptype->gro_complete(skb);
2338                 break;
2339         }
2340         rcu_read_unlock();
2341
2342         if (err) {
2343                 WARN_ON(&ptype->list == head);
2344                 kfree_skb(skb);
2345                 return NET_RX_SUCCESS;
2346         }
2347
2348 out:
2349         __skb_push(skb, -skb_network_offset(skb));
2350         return netif_receive_skb(skb);
2351 }
2352
2353 void napi_gro_flush(struct napi_struct *napi)
2354 {
2355         struct sk_buff *skb, *next;
2356
2357         for (skb = napi->gro_list; skb; skb = next) {
2358                 next = skb->next;
2359                 skb->next = NULL;
2360                 napi_gro_complete(skb);
2361         }
2362
2363         napi->gro_list = NULL;
2364 }
2365 EXPORT_SYMBOL(napi_gro_flush);
2366
2367 int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2368 {
2369         struct sk_buff **pp = NULL;
2370         struct packet_type *ptype;
2371         __be16 type = skb->protocol;
2372         struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
2373         int count = 0;
2374         int same_flow;
2375         int mac_len;
2376
2377         if (!(skb->dev->features & NETIF_F_GRO))
2378                 goto normal;
2379
2380         rcu_read_lock();
2381         list_for_each_entry_rcu(ptype, head, list) {
2382                 struct sk_buff *p;
2383
2384                 if (ptype->type != type || ptype->dev || !ptype->gro_receive)
2385                         continue;
2386
2387                 skb_reset_network_header(skb);
2388                 mac_len = skb->network_header - skb->mac_header;
2389                 skb->mac_len = mac_len;
2390                 NAPI_GRO_CB(skb)->same_flow = 0;
2391                 NAPI_GRO_CB(skb)->flush = 0;
2392
2393                 for (p = napi->gro_list; p; p = p->next) {
2394                         count++;
2395                         NAPI_GRO_CB(p)->same_flow =
2396                                 p->mac_len == mac_len &&
2397                                 !memcmp(skb_mac_header(p), skb_mac_header(skb),
2398                                         mac_len);
2399                         NAPI_GRO_CB(p)->flush = 0;
2400                 }
2401
2402                 pp = ptype->gro_receive(&napi->gro_list, skb);
2403                 break;
2404         }
2405         rcu_read_unlock();
2406
2407         if (&ptype->list == head)
2408                 goto normal;
2409
2410         same_flow = NAPI_GRO_CB(skb)->same_flow;
2411
2412         if (pp) {
2413                 struct sk_buff *nskb = *pp;
2414
2415                 *pp = nskb->next;
2416                 nskb->next = NULL;
2417                 napi_gro_complete(nskb);
2418                 count--;
2419         }
2420
2421         if (same_flow)
2422                 goto ok;
2423
2424         if (NAPI_GRO_CB(skb)->flush || count >= MAX_GRO_SKBS) {
2425                 __skb_push(skb, -skb_network_offset(skb));
2426                 goto normal;
2427         }
2428
2429         NAPI_GRO_CB(skb)->count = 1;
2430         skb->next = napi->gro_list;
2431         napi->gro_list = skb;
2432
2433 ok:
2434         return NET_RX_SUCCESS;
2435
2436 normal:
2437         return netif_receive_skb(skb);
2438 }
2439 EXPORT_SYMBOL(napi_gro_receive);
2440
2441 static int process_backlog(struct napi_struct *napi, int quota)
2442 {
2443         int work = 0;
2444         struct softnet_data *queue = &__get_cpu_var(softnet_data);
2445         unsigned long start_time = jiffies;
2446
2447         napi->weight = weight_p;
2448         do {
2449                 struct sk_buff *skb;
2450
2451                 local_irq_disable();
2452                 skb = __skb_dequeue(&queue->input_pkt_queue);
2453                 if (!skb) {
2454                         __napi_complete(napi);
2455                         local_irq_enable();
2456                         break;
2457                 }
2458                 local_irq_enable();
2459
2460                 napi_gro_receive(napi, skb);
2461         } while (++work < quota && jiffies == start_time);
2462
2463         napi_gro_flush(napi);
2464
2465         return work;
2466 }
2467
2468 /**
2469  * __napi_schedule - schedule for receive
2470  * @n: entry to schedule
2471  *
2472  * The entry's receive function will be scheduled to run
2473  */
2474 void __napi_schedule(struct napi_struct *n)
2475 {
2476         unsigned long flags;
2477
2478         local_irq_save(flags);
2479         list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list);
2480         __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2481         local_irq_restore(flags);
2482 }
2483 EXPORT_SYMBOL(__napi_schedule);
2484
2485 void __napi_complete(struct napi_struct *n)
2486 {
2487         BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
2488         BUG_ON(n->gro_list);
2489
2490         list_del(&n->poll_list);
2491         smp_mb__before_clear_bit();
2492         clear_bit(NAPI_STATE_SCHED, &n->state);
2493 }
2494 EXPORT_SYMBOL(__napi_complete);
2495
2496 void napi_complete(struct napi_struct *n)
2497 {
2498         unsigned long flags;
2499
2500         /*
2501          * don't let napi dequeue from the cpu poll list
2502          * just in case its running on a different cpu
2503          */
2504         if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state)))
2505                 return;
2506
2507         napi_gro_flush(n);
2508         local_irq_save(flags);
2509         __napi_complete(n);
2510         local_irq_restore(flags);
2511 }
2512 EXPORT_SYMBOL(napi_complete);
2513
2514 void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
2515                     int (*poll)(struct napi_struct *, int), int weight)
2516 {
2517         INIT_LIST_HEAD(&napi->poll_list);
2518         napi->gro_list = NULL;
2519         napi->poll = poll;
2520         napi->weight = weight;
2521         list_add(&napi->dev_list, &dev->napi_list);
2522 #ifdef CONFIG_NETPOLL
2523         napi->dev = dev;
2524         spin_lock_init(&napi->poll_lock);
2525         napi->poll_owner = -1;
2526 #endif
2527         set_bit(NAPI_STATE_SCHED, &napi->state);
2528 }
2529 EXPORT_SYMBOL(netif_napi_add);
2530
2531 void netif_napi_del(struct napi_struct *napi)
2532 {
2533         struct sk_buff *skb, *next;
2534
2535         list_del_init(&napi->dev_list);
2536
2537         for (skb = napi->gro_list; skb; skb = next) {
2538                 next = skb->next;
2539                 skb->next = NULL;
2540                 kfree_skb(skb);
2541         }
2542
2543         napi->gro_list = NULL;
2544 }
2545 EXPORT_SYMBOL(netif_napi_del);
2546
2547
2548 static void net_rx_action(struct softirq_action *h)
2549 {
2550         struct list_head *list = &__get_cpu_var(softnet_data).poll_list;
2551         unsigned long time_limit = jiffies + 2;
2552         int budget = netdev_budget;
2553         void *have;
2554
2555         local_irq_disable();
2556
2557         while (!list_empty(list)) {
2558                 struct napi_struct *n;
2559                 int work, weight;
2560
2561                 /* If softirq window is exhuasted then punt.
2562                  * Allow this to run for 2 jiffies since which will allow
2563                  * an average latency of 1.5/HZ.
2564                  */
2565                 if (unlikely(budget <= 0 || time_after(jiffies, time_limit)))
2566                         goto softnet_break;
2567
2568                 local_irq_enable();
2569
2570                 /* Even though interrupts have been re-enabled, this
2571                  * access is safe because interrupts can only add new
2572                  * entries to the tail of this list, and only ->poll()
2573                  * calls can remove this head entry from the list.
2574                  */
2575                 n = list_entry(list->next, struct napi_struct, poll_list);
2576
2577                 have = netpoll_poll_lock(n);
2578
2579                 weight = n->weight;
2580
2581                 /* This NAPI_STATE_SCHED test is for avoiding a race
2582                  * with netpoll's poll_napi().  Only the entity which
2583                  * obtains the lock and sees NAPI_STATE_SCHED set will
2584                  * actually make the ->poll() call.  Therefore we avoid
2585                  * accidently calling ->poll() when NAPI is not scheduled.
2586                  */
2587                 work = 0;
2588                 if (test_bit(NAPI_STATE_SCHED, &n->state))
2589                         work = n->poll(n, weight);
2590
2591                 WARN_ON_ONCE(work > weight);
2592
2593                 budget -= work;
2594
2595                 local_irq_disable();
2596
2597                 /* Drivers must not modify the NAPI state if they
2598                  * consume the entire weight.  In such cases this code
2599                  * still "owns" the NAPI instance and therefore can
2600                  * move the instance around on the list at-will.
2601                  */
2602                 if (unlikely(work == weight)) {
2603                         if (unlikely(napi_disable_pending(n)))
2604                                 __napi_complete(n);
2605                         else
2606                                 list_move_tail(&n->poll_list, list);
2607                 }
2608
2609                 netpoll_poll_unlock(have);
2610         }
2611 out:
2612         local_irq_enable();
2613
2614 #ifdef CONFIG_NET_DMA
2615         /*
2616          * There may not be any more sk_buffs coming right now, so push
2617          * any pending DMA copies to hardware
2618          */
2619         dma_issue_pending_all();
2620 #endif
2621
2622         return;
2623
2624 softnet_break:
2625         __get_cpu_var(netdev_rx_stat).time_squeeze++;
2626         __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2627         goto out;
2628 }
2629
2630 static gifconf_func_t * gifconf_list [NPROTO];
2631
2632 /**
2633  *      register_gifconf        -       register a SIOCGIF handler
2634  *      @family: Address family
2635  *      @gifconf: Function handler
2636  *
2637  *      Register protocol dependent address dumping routines. The handler
2638  *      that is passed must not be freed or reused until it has been replaced
2639  *      by another handler.
2640  */
2641 int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
2642 {
2643         if (family >= NPROTO)
2644                 return -EINVAL;
2645         gifconf_list[family] = gifconf;
2646         return 0;
2647 }
2648
2649
2650 /*
2651  *      Map an interface index to its name (SIOCGIFNAME)
2652  */
2653
2654 /*
2655  *      We need this ioctl for efficient implementation of the
2656  *      if_indextoname() function required by the IPv6 API.  Without
2657  *      it, we would have to search all the interfaces to find a
2658  *      match.  --pb
2659  */
2660
2661 static int dev_ifname(struct net *net, struct ifreq __user *arg)
2662 {
2663         struct net_device *dev;
2664         struct ifreq ifr;
2665
2666         /*
2667          *      Fetch the caller's info block.
2668          */
2669
2670         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2671                 return -EFAULT;
2672
2673         read_lock(&dev_base_lock);
2674         dev = __dev_get_by_index(net, ifr.ifr_ifindex);
2675         if (!dev) {
2676                 read_unlock(&dev_base_lock);
2677                 return -ENODEV;
2678         }
2679
2680         strcpy(ifr.ifr_name, dev->name);
2681         read_unlock(&dev_base_lock);
2682
2683         if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2684                 return -EFAULT;
2685         return 0;
2686 }
2687
2688 /*
2689  *      Perform a SIOCGIFCONF call. This structure will change
2690  *      size eventually, and there is nothing I can do about it.
2691  *      Thus we will need a 'compatibility mode'.
2692  */
2693
2694 static int dev_ifconf(struct net *net, char __user *arg)
2695 {
2696         struct ifconf ifc;
2697         struct net_device *dev;
2698         char __user *pos;
2699         int len;
2700         int total;
2701         int i;
2702
2703         /*
2704          *      Fetch the caller's info block.
2705          */
2706
2707         if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
2708                 return -EFAULT;
2709
2710         pos = ifc.ifc_buf;
2711         len = ifc.ifc_len;
2712
2713         /*
2714          *      Loop over the interfaces, and write an info block for each.
2715          */
2716
2717         total = 0;
2718         for_each_netdev(net, dev) {
2719                 for (i = 0; i < NPROTO; i++) {
2720                         if (gifconf_list[i]) {
2721                                 int done;
2722                                 if (!pos)
2723                                         done = gifconf_list[i](dev, NULL, 0);
2724                                 else
2725                                         done = gifconf_list[i](dev, pos + total,
2726                                                                len - total);
2727                                 if (done < 0)
2728                                         return -EFAULT;
2729                                 total += done;
2730                         }
2731                 }
2732         }
2733
2734         /*
2735          *      All done.  Write the updated control block back to the caller.
2736          */
2737         ifc.ifc_len = total;
2738
2739         /*
2740          *      Both BSD and Solaris return 0 here, so we do too.
2741          */
2742         return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
2743 }
2744
2745 #ifdef CONFIG_PROC_FS
2746 /*
2747  *      This is invoked by the /proc filesystem handler to display a device
2748  *      in detail.
2749  */
2750 void *dev_seq_start(struct seq_file *seq, loff_t *pos)
2751         __acquires(dev_base_lock)
2752 {
2753         struct net *net = seq_file_net(seq);
2754         loff_t off;
2755         struct net_device *dev;
2756
2757         read_lock(&dev_base_lock);
2758         if (!*pos)
2759                 return SEQ_START_TOKEN;
2760
2761         off = 1;
2762         for_each_netdev(net, dev)
2763                 if (off++ == *pos)
2764                         return dev;
2765
2766         return NULL;
2767 }
2768
2769 void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2770 {
2771         struct net *net = seq_file_net(seq);
2772         ++*pos;
2773         return v == SEQ_START_TOKEN ?
2774                 first_net_device(net) : next_net_device((struct net_device *)v);
2775 }
2776
2777 void dev_seq_stop(struct seq_file *seq, void *v)
2778         __releases(dev_base_lock)
2779 {
2780         read_unlock(&dev_base_lock);
2781 }
2782
2783 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
2784 {
2785         const struct net_device_stats *stats = dev_get_stats(dev);
2786
2787         seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
2788                    "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
2789                    dev->name, stats->rx_bytes, stats->rx_packets,
2790                    stats->rx_errors,
2791                    stats->rx_dropped + stats->rx_missed_errors,
2792                    stats->rx_fifo_errors,
2793                    stats->rx_length_errors + stats->rx_over_errors +
2794                     stats->rx_crc_errors + stats->rx_frame_errors,
2795                    stats->rx_compressed, stats->multicast,
2796                    stats->tx_bytes, stats->tx_packets,
2797                    stats->tx_errors, stats->tx_dropped,
2798                    stats->tx_fifo_errors, stats->collisions,
2799                    stats->tx_carrier_errors +
2800                     stats->tx_aborted_errors +
2801                     stats->tx_window_errors +
2802                     stats->tx_heartbeat_errors,
2803                    stats->tx_compressed);
2804 }
2805
2806 /*
2807  *      Called from the PROCfs module. This now uses the new arbitrary sized
2808  *      /proc/net interface to create /proc/net/dev
2809  */
2810 static int dev_seq_show(struct seq_file *seq, void *v)
2811 {
2812         if (v == SEQ_START_TOKEN)
2813                 seq_puts(seq, "Inter-|   Receive                            "
2814                               "                    |  Transmit\n"
2815                               " face |bytes    packets errs drop fifo frame "
2816                               "compressed multicast|bytes    packets errs "
2817                               "drop fifo colls carrier compressed\n");
2818         else
2819                 dev_seq_printf_stats(seq, v);
2820         return 0;
2821 }
2822
2823 static struct netif_rx_stats *softnet_get_online(loff_t *pos)
2824 {
2825         struct netif_rx_stats *rc = NULL;
2826
2827         while (*pos < nr_cpu_ids)
2828                 if (cpu_online(*pos)) {
2829                         rc = &per_cpu(netdev_rx_stat, *pos);
2830                         break;
2831                 } else
2832                         ++*pos;
2833         return rc;
2834 }
2835
2836 static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
2837 {
2838         return softnet_get_online(pos);
2839 }
2840
2841 static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2842 {
2843         ++*pos;
2844         return softnet_get_online(pos);
2845 }
2846
2847 static void softnet_seq_stop(struct seq_file *seq, void *v)
2848 {
2849 }
2850
2851 static int softnet_seq_show(struct seq_file *seq, void *v)
2852 {
2853         struct netif_rx_stats *s = v;
2854
2855         seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
2856                    s->total, s->dropped, s->time_squeeze, 0,
2857                    0, 0, 0, 0, /* was fastroute */
2858                    s->cpu_collision );
2859         return 0;
2860 }
2861
2862 static const struct seq_operations dev_seq_ops = {
2863         .start = dev_seq_start,
2864         .next  = dev_seq_next,
2865         .stop  = dev_seq_stop,
2866         .show  = dev_seq_show,
2867 };
2868
2869 static int dev_seq_open(struct inode *inode, struct file *file)
2870 {
2871         return seq_open_net(inode, file, &dev_seq_ops,
2872                             sizeof(struct seq_net_private));
2873 }
2874
2875 static const struct file_operations dev_seq_fops = {
2876         .owner   = THIS_MODULE,
2877         .open    = dev_seq_open,
2878         .read    = seq_read,
2879         .llseek  = seq_lseek,
2880         .release = seq_release_net,
2881 };
2882
2883 static const struct seq_operations softnet_seq_ops = {
2884         .start = softnet_seq_start,
2885         .next  = softnet_seq_next,
2886         .stop  = softnet_seq_stop,
2887         .show  = softnet_seq_show,
2888 };
2889
2890 static int softnet_seq_open(struct inode *inode, struct file *file)
2891 {
2892         return seq_open(file, &softnet_seq_ops);
2893 }
2894
2895 static const struct file_operations softnet_seq_fops = {
2896         .owner   = THIS_MODULE,
2897         .open    = softnet_seq_open,
2898         .read    = seq_read,
2899         .llseek  = seq_lseek,
2900         .release = seq_release,
2901 };
2902
2903 static void *ptype_get_idx(loff_t pos)
2904 {
2905         struct packet_type *pt = NULL;
2906         loff_t i = 0;
2907         int t;
2908
2909         list_for_each_entry_rcu(pt, &ptype_all, list) {
2910                 if (i == pos)
2911                         return pt;
2912                 ++i;
2913         }
2914
2915         for (t = 0; t < PTYPE_HASH_SIZE; t++) {
2916                 list_for_each_entry_rcu(pt, &ptype_base[t], list) {
2917                         if (i == pos)
2918                                 return pt;
2919                         ++i;
2920                 }
2921         }
2922         return NULL;
2923 }
2924
2925 static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
2926         __acquires(RCU)
2927 {
2928         rcu_read_lock();
2929         return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
2930 }
2931
2932 static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2933 {
2934         struct packet_type *pt;
2935         struct list_head *nxt;
2936         int hash;
2937
2938         ++*pos;
2939         if (v == SEQ_START_TOKEN)
2940                 return ptype_get_idx(0);
2941
2942         pt = v;
2943         nxt = pt->list.next;
2944         if (pt->type == htons(ETH_P_ALL)) {
2945                 if (nxt != &ptype_all)
2946                         goto found;
2947                 hash = 0;
2948                 nxt = ptype_base[0].next;
2949         } else
2950                 hash = ntohs(pt->type) & PTYPE_HASH_MASK;
2951
2952         while (nxt == &ptype_base[hash]) {
2953                 if (++hash >= PTYPE_HASH_SIZE)
2954                         return NULL;
2955                 nxt = ptype_base[hash].next;
2956         }
2957 found:
2958         return list_entry(nxt, struct packet_type, list);
2959 }
2960
2961 static void ptype_seq_stop(struct seq_file *seq, void *v)
2962         __releases(RCU)
2963 {
2964         rcu_read_unlock();
2965 }
2966
2967 static int ptype_seq_show(struct seq_file *seq, void *v)
2968 {
2969         struct packet_type *pt = v;
2970
2971         if (v == SEQ_START_TOKEN)
2972                 seq_puts(seq, "Type Device      Function\n");
2973         else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) {
2974                 if (pt->type == htons(ETH_P_ALL))
2975                         seq_puts(seq, "ALL ");
2976                 else
2977                         seq_printf(seq, "%04x", ntohs(pt->type));
2978
2979                 seq_printf(seq, " %-8s %pF\n",
2980                            pt->dev ? pt->dev->name : "", pt->func);
2981         }
2982
2983         return 0;
2984 }
2985
2986 static const struct seq_operations ptype_seq_ops = {
2987         .start = ptype_seq_start,
2988         .next  = ptype_seq_next,
2989         .stop  = ptype_seq_stop,
2990         .show  = ptype_seq_show,
2991 };
2992
2993 static int ptype_seq_open(struct inode *inode, struct file *file)
2994 {
2995         return seq_open_net(inode, file, &ptype_seq_ops,
2996                         sizeof(struct seq_net_private));
2997 }
2998
2999 static const struct file_operations ptype_seq_fops = {
3000         .owner   = THIS_MODULE,
3001         .open    = ptype_seq_open,
3002         .read    = seq_read,
3003         .llseek  = seq_lseek,
3004         .release = seq_release_net,
3005 };
3006
3007
3008 static int __net_init dev_proc_net_init(struct net *net)
3009 {
3010         int rc = -ENOMEM;
3011
3012         if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops))
3013                 goto out;
3014         if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops))
3015                 goto out_dev;
3016         if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops))
3017                 goto out_softnet;
3018
3019         if (wext_proc_init(net))
3020                 goto out_ptype;
3021         rc = 0;
3022 out:
3023         return rc;
3024 out_ptype:
3025         proc_net_remove(net, "ptype");
3026 out_softnet:
3027         proc_net_remove(net, "softnet_stat");
3028 out_dev:
3029         proc_net_remove(net, "dev");
3030         goto out;
3031 }
3032
3033 static void __net_exit dev_proc_net_exit(struct net *net)
3034 {
3035         wext_proc_exit(net);
3036
3037         proc_net_remove(net, "ptype");
3038         proc_net_remove(net, "softnet_stat");
3039         proc_net_remove(net, "dev");
3040 }
3041
3042 static struct pernet_operations __net_initdata dev_proc_ops = {
3043         .init = dev_proc_net_init,
3044         .exit = dev_proc_net_exit,
3045 };
3046
3047 static int __init dev_proc_init(void)
3048 {
3049         return register_pernet_subsys(&dev_proc_ops);
3050 }
3051 #else
3052 #define dev_proc_init() 0
3053 #endif  /* CONFIG_PROC_FS */
3054
3055
3056 /**
3057  *      netdev_set_master       -       set up master/slave pair
3058  *      @slave: slave device
3059  *      @master: new master device
3060  *
3061  *      Changes the master device of the slave. Pass %NULL to break the
3062  *      bonding. The caller must hold the RTNL semaphore. On a failure
3063  *      a negative errno code is returned. On success the reference counts
3064  *      are adjusted, %RTM_NEWLINK is sent to the routing socket and the
3065  *      function returns zero.
3066  */
3067 int netdev_set_master(struct net_device *slave, struct net_device *master)
3068 {
3069         struct net_device *old = slave->master;
3070
3071         ASSERT_RTNL();
3072
3073         if (master) {
3074                 if (old)
3075                         return -EBUSY;
3076                 dev_hold(master);
3077         }
3078
3079         slave->master = master;
3080
3081         synchronize_net();
3082
3083         if (old)
3084                 dev_put(old);
3085
3086         if (master)
3087                 slave->flags |= IFF_SLAVE;
3088         else
3089                 slave->flags &= ~IFF_SLAVE;
3090
3091         rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
3092         return 0;
3093 }
3094
3095 static void dev_change_rx_flags(struct net_device *dev, int flags)
3096 {
3097         const struct net_device_ops *ops = dev->netdev_ops;
3098
3099         if ((dev->flags & IFF_UP) && ops->ndo_change_rx_flags)
3100                 ops->ndo_change_rx_flags(dev, flags);
3101 }
3102
3103 static int __dev_set_promiscuity(struct net_device *dev, int inc)
3104 {
3105         unsigned short old_flags = dev->flags;
3106         uid_t uid;
3107         gid_t gid;
3108
3109         ASSERT_RTNL();
3110
3111         dev->flags |= IFF_PROMISC;
3112         dev->promiscuity += inc;
3113         if (dev->promiscuity == 0) {
3114                 /*
3115                  * Avoid overflow.
3116                  * If inc causes overflow, untouch promisc and return error.
3117                  */
3118                 if (inc < 0)
3119                         dev->flags &= ~IFF_PROMISC;
3120                 else {
3121                         dev->promiscuity -= inc;
3122                         printk(KERN_WARNING "%s: promiscuity touches roof, "
3123                                 "set promiscuity failed, promiscuity feature "
3124                                 "of device might be broken.\n", dev->name);
3125                         return -EOVERFLOW;
3126                 }
3127         }
3128         if (dev->flags != old_flags) {
3129                 printk(KERN_INFO "device %s %s promiscuous mode\n",
3130                        dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
3131                                                                "left");
3132                 if (audit_enabled) {
3133                         current_uid_gid(&uid, &gid);
3134                         audit_log(current->audit_context, GFP_ATOMIC,
3135                                 AUDIT_ANOM_PROMISCUOUS,
3136                                 "dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
3137                                 dev->name, (dev->flags & IFF_PROMISC),
3138                                 (old_flags & IFF_PROMISC),
3139                                 audit_get_loginuid(current),
3140                                 uid, gid,
3141                                 audit_get_sessionid(current));
3142                 }
3143
3144                 dev_change_rx_flags(dev, IFF_PROMISC);
3145         }
3146         return 0;
3147 }
3148
3149 /**
3150  *      dev_set_promiscuity     - update promiscuity count on a device
3151  *      @dev: device
3152  *      @inc: modifier
3153  *
3154  *      Add or remove promiscuity from a device. While the count in the device
3155  *      remains above zero the interface remains promiscuous. Once it hits zero
3156  *      the device reverts back to normal filtering operation. A negative inc
3157  *      value is used to drop promiscuity on the device.
3158  *      Return 0 if successful or a negative errno code on error.
3159  */
3160 int dev_set_promiscuity(struct net_device *dev, int inc)
3161 {
3162         unsigned short old_flags = dev->flags;
3163         int err;
3164
3165         err = __dev_set_promiscuity(dev, inc);
3166         if (err < 0)
3167                 return err;
3168         if (dev->flags != old_flags)
3169                 dev_set_rx_mode(dev);
3170         return err;
3171 }
3172
3173 /**
3174  *      dev_set_allmulti        - update allmulti count on a device
3175  *      @dev: device
3176  *      @inc: modifier
3177  *
3178  *      Add or remove reception of all multicast frames to a device. While the
3179  *      count in the device remains above zero the interface remains listening
3180  *      to all interfaces. Once it hits zero the device reverts back to normal
3181  *      filtering operation. A negative @inc value is used to drop the counter
3182  *      when releasing a resource needing all multicasts.
3183  *      Return 0 if successful or a negative errno code on error.
3184  */
3185
3186 int dev_set_allmulti(struct net_device *dev, int inc)
3187 {
3188         unsigned short old_flags = dev->flags;
3189
3190         ASSERT_RTNL();
3191
3192         dev->flags |= IFF_ALLMULTI;
3193         dev->allmulti += inc;
3194         if (dev->allmulti == 0) {
3195                 /*
3196                  * Avoid overflow.
3197                  * If inc causes overflow, untouch allmulti and return error.
3198                  */
3199                 if (inc < 0)
3200                         dev->flags &= ~IFF_ALLMULTI;
3201                 else {
3202                         dev->allmulti -= inc;
3203                         printk(KERN_WARNING "%s: allmulti touches roof, "
3204                                 "set allmulti failed, allmulti feature of "
3205                                 "device might be broken.\n", dev->name);
3206                         return -EOVERFLOW;
3207                 }
3208         }
3209         if (dev->flags ^ old_flags) {
3210                 dev_change_rx_flags(dev, IFF_ALLMULTI);
3211                 dev_set_rx_mode(dev);
3212         }
3213         return 0;
3214 }
3215
3216 /*
3217  *      Upload unicast and multicast address lists to device and
3218  *      configure RX filtering. When the device doesn't support unicast
3219  *      filtering it is put in promiscuous mode while unicast addresses
3220  *      are present.
3221  */
3222 void __dev_set_rx_mode(struct net_device *dev)
3223 {
3224         const struct net_device_ops *ops = dev->netdev_ops;
3225
3226         /* dev_open will call this function so the list will stay sane. */
3227         if (!(dev->flags&IFF_UP))
3228                 return;
3229
3230         if (!netif_device_present(dev))
3231                 return;
3232
3233         if (ops->ndo_set_rx_mode)
3234                 ops->ndo_set_rx_mode(dev);
3235         else {
3236                 /* Unicast addresses changes may only happen under the rtnl,
3237                  * therefore calling __dev_set_promiscuity here is safe.
3238                  */
3239                 if (dev->uc_count > 0 && !dev->uc_promisc) {
3240                         __dev_set_promiscuity(dev, 1);
3241                         dev->uc_promisc = 1;
3242                 } else if (dev->uc_count == 0 && dev->uc_promisc) {
3243                         __dev_set_promiscuity(dev, -1);
3244                         dev->uc_promisc = 0;
3245                 }
3246
3247                 if (ops->ndo_set_multicast_list)
3248                         ops->ndo_set_multicast_list(dev);
3249         }
3250 }
3251
3252 void dev_set_rx_mode(struct net_device *dev)
3253 {
3254         netif_addr_lock_bh(dev);
3255         __dev_set_rx_mode(dev);
3256         netif_addr_unlock_bh(dev);
3257 }
3258
3259 int __dev_addr_delete(struct dev_addr_list **list, int *count,
3260                       void *addr, int alen, int glbl)
3261 {
3262         struct dev_addr_list *da;
3263
3264         for (; (da = *list) != NULL; list = &da->next) {
3265                 if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
3266                     alen == da->da_addrlen) {
3267                         if (glbl) {
3268                                 int old_glbl = da->da_gusers;
3269                                 da->da_gusers = 0;
3270                                 if (old_glbl == 0)
3271                                         break;
3272                         }
3273                         if (--da->da_users)
3274                                 return 0;
3275
3276                         *list = da->next;
3277                         kfree(da);
3278                         (*count)--;
3279                         return 0;
3280                 }
3281         }
3282         return -ENOENT;
3283 }
3284
3285 int __dev_addr_add(struct dev_addr_list **list, int *count,
3286                    void *addr, int alen, int glbl)
3287 {
3288         struct dev_addr_list *da;
3289
3290         for (da = *list; da != NULL; da = da->next) {
3291                 if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
3292                     da->da_addrlen == alen) {
3293                         if (glbl) {
3294                                 int old_glbl = da->da_gusers;
3295                                 da->da_gusers = 1;
3296                                 if (old_glbl)
3297                                         return 0;
3298                         }
3299                         da->da_users++;
3300                         return 0;
3301                 }
3302         }
3303
3304         da = kzalloc(sizeof(*da), GFP_ATOMIC);
3305         if (da == NULL)
3306                 return -ENOMEM;
3307         memcpy(da->da_addr, addr, alen);
3308         da->da_addrlen = alen;
3309         da->da_users = 1;
3310         da->da_gusers = glbl ? 1 : 0;
3311         da->next = *list;
3312         *list = da;
3313         (*count)++;
3314         return 0;
3315 }
3316
3317 /**
3318  *      dev_unicast_delete      - Release secondary unicast address.
3319  *      @dev: device
3320  *      @addr: address to delete
3321  *      @alen: length of @addr
3322  *
3323  *      Release reference to a secondary unicast address and remove it
3324  *      from the device if the reference count drops to zero.
3325  *
3326  *      The caller must hold the rtnl_mutex.
3327  */
3328 int dev_unicast_delete(struct net_device *dev, void *addr, int alen)
3329 {
3330         int err;
3331
3332         ASSERT_RTNL();
3333
3334         netif_addr_lock_bh(dev);
3335         err = __dev_addr_delete(&dev->uc_list, &dev->uc_count, addr, alen, 0);
3336         if (!err)
3337                 __dev_set_rx_mode(dev);
3338         netif_addr_unlock_bh(dev);
3339         return err;
3340 }
3341 EXPORT_SYMBOL(dev_unicast_delete);
3342
3343 /**
3344  *      dev_unicast_add         - add a secondary unicast address
3345  *      @dev: device
3346  *      @addr: address to add
3347  *      @alen: length of @addr
3348  *
3349  *      Add a secondary unicast address to the device or increase
3350  *      the reference count if it already exists.
3351  *
3352  *      The caller must hold the rtnl_mutex.
3353  */
3354 int dev_unicast_add(struct net_device *dev, void *addr, int alen)
3355 {
3356         int err;
3357
3358         ASSERT_RTNL();
3359
3360         netif_addr_lock_bh(dev);
3361         err = __dev_addr_add(&dev->uc_list, &dev->uc_count, addr, alen, 0);
3362         if (!err)
3363                 __dev_set_rx_mode(dev);
3364         netif_addr_unlock_bh(dev);
3365         return err;
3366 }
3367 EXPORT_SYMBOL(dev_unicast_add);
3368
3369 int __dev_addr_sync(struct dev_addr_list **to, int *to_count,
3370                     struct dev_addr_list **from, int *from_count)
3371 {
3372         struct dev_addr_list *da, *next;
3373         int err = 0;
3374
3375         da = *from;
3376         while (da != NULL) {
3377                 next = da->next;
3378                 if (!da->da_synced) {
3379                         err = __dev_addr_add(to, to_count,
3380                                              da->da_addr, da->da_addrlen, 0);
3381                         if (err < 0)
3382                                 break;
3383                         da->da_synced = 1;
3384                         da->da_users++;
3385                 } else if (da->da_users == 1) {
3386                         __dev_addr_delete(to, to_count,
3387                                           da->da_addr, da->da_addrlen, 0);
3388                         __dev_addr_delete(from, from_count,
3389                                           da->da_addr, da->da_addrlen, 0);
3390                 }
3391                 da = next;
3392         }
3393         return err;
3394 }
3395
3396 void __dev_addr_unsync(struct dev_addr_list **to, int *to_count,
3397                        struct dev_addr_list **from, int *from_count)
3398 {
3399         struct dev_addr_list *da, *next;
3400
3401         da = *from;
3402         while (da != NULL) {
3403                 next = da->next;
3404                 if (da->da_synced) {
3405                         __dev_addr_delete(to, to_count,
3406                                           da->da_addr, da->da_addrlen, 0);
3407                         da->da_synced = 0;
3408                         __dev_addr_delete(from, from_count,
3409                                           da->da_addr, da->da_addrlen, 0);
3410                 }
3411                 da = next;
3412         }
3413 }
3414
3415 /**
3416  *      dev_unicast_sync - Synchronize device's unicast list to another device
3417  *      @to: destination device
3418  *      @from: source device
3419  *
3420  *      Add newly added addresses to the destination device and release
3421  *      addresses that have no users left. The source device must be
3422  *      locked by netif_tx_lock_bh.
3423  *
3424  *      This function is intended to be called from the dev->set_rx_mode
3425  *      function of layered software devices.
3426  */
3427 int dev_unicast_sync(struct net_device *to, struct net_device *from)
3428 {
3429         int err = 0;
3430
3431         netif_addr_lock_bh(to);
3432         err = __dev_addr_sync(&to->uc_list, &to->uc_count,
3433                               &from->uc_list, &from->uc_count);
3434         if (!err)
3435                 __dev_set_rx_mode(to);
3436         netif_addr_unlock_bh(to);
3437         return err;
3438 }
3439 EXPORT_SYMBOL(dev_unicast_sync);
3440
3441 /**
3442  *      dev_unicast_unsync - Remove synchronized addresses from the destination device
3443  *      @to: destination device
3444  *      @from: source device
3445  *
3446  *      Remove all addresses that were added to the destination device by
3447  *      dev_unicast_sync(). This function is intended to be called from the
3448  *      dev->stop function of layered software devices.
3449  */
3450 void dev_unicast_unsync(struct net_device *to, struct net_device *from)
3451 {
3452         netif_addr_lock_bh(from);
3453         netif_addr_lock(to);
3454
3455         __dev_addr_unsync(&to->uc_list, &to->uc_count,
3456                           &from->uc_list, &from->uc_count);
3457         __dev_set_rx_mode(to);
3458
3459         netif_addr_unlock(to);
3460         netif_addr_unlock_bh(from);
3461 }
3462 EXPORT_SYMBOL(dev_unicast_unsync);
3463
3464 static void __dev_addr_discard(struct dev_addr_list **list)
3465 {
3466         struct dev_addr_list *tmp;
3467
3468         while (*list != NULL) {
3469                 tmp = *list;
3470                 *list = tmp->next;
3471                 if (tmp->da_users > tmp->da_gusers)
3472                         printk("__dev_addr_discard: address leakage! "
3473                                "da_users=%d\n", tmp->da_users);
3474                 kfree(tmp);
3475         }
3476 }
3477
3478 static void dev_addr_discard(struct net_device *dev)
3479 {
3480         netif_addr_lock_bh(dev);
3481
3482         __dev_addr_discard(&dev->uc_list);
3483         dev->uc_count = 0;
3484
3485         __dev_addr_discard(&dev->mc_list);
3486         dev->mc_count = 0;
3487
3488         netif_addr_unlock_bh(dev);
3489 }
3490
3491 /**
3492  *      dev_get_flags - get flags reported to userspace
3493  *      @dev: device
3494  *
3495  *      Get the combination of flag bits exported through APIs to userspace.
3496  */
3497 unsigned dev_get_flags(const struct net_device *dev)
3498 {
3499         unsigned flags;
3500
3501         flags = (dev->flags & ~(IFF_PROMISC |
3502                                 IFF_ALLMULTI |
3503                                 IFF_RUNNING |
3504                                 IFF_LOWER_UP |
3505                                 IFF_DORMANT)) |
3506                 (dev->gflags & (IFF_PROMISC |
3507                                 IFF_ALLMULTI));
3508
3509         if (netif_running(dev)) {
3510                 if (netif_oper_up(dev))
3511                         flags |= IFF_RUNNING;
3512                 if (netif_carrier_ok(dev))
3513                         flags |= IFF_LOWER_UP;
3514                 if (netif_dormant(dev))
3515                         flags |= IFF_DORMANT;
3516         }
3517
3518         return flags;
3519 }
3520
3521 /**
3522  *      dev_change_flags - change device settings
3523  *      @dev: device
3524  *      @flags: device state flags
3525  *
3526  *      Change settings on device based state flags. The flags are
3527  *      in the userspace exported format.
3528  */
3529 int dev_change_flags(struct net_device *dev, unsigned flags)
3530 {
3531         int ret, changes;
3532         int old_flags = dev->flags;
3533
3534         ASSERT_RTNL();
3535
3536         /*
3537          *      Set the flags on our device.
3538          */
3539
3540         dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
3541                                IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
3542                                IFF_AUTOMEDIA)) |
3543                      (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
3544                                     IFF_ALLMULTI));
3545
3546         /*
3547          *      Load in the correct multicast list now the flags have changed.
3548          */
3549
3550         if ((old_flags ^ flags) & IFF_MULTICAST)
3551                 dev_change_rx_flags(dev, IFF_MULTICAST);
3552
3553         dev_set_rx_mode(dev);
3554
3555         /*
3556          *      Have we downed the interface. We handle IFF_UP ourselves
3557          *      according to user attempts to set it, rather than blindly
3558          *      setting it.
3559          */
3560
3561         ret = 0;
3562         if ((old_flags ^ flags) & IFF_UP) {     /* Bit is different  ? */
3563                 ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
3564
3565                 if (!ret)
3566                         dev_set_rx_mode(dev);
3567         }
3568
3569         if (dev->flags & IFF_UP &&
3570             ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
3571                                           IFF_VOLATILE)))
3572                 call_netdevice_notifiers(NETDEV_CHANGE, dev);
3573
3574         if ((flags ^ dev->gflags) & IFF_PROMISC) {
3575                 int inc = (flags & IFF_PROMISC) ? +1 : -1;
3576                 dev->gflags ^= IFF_PROMISC;
3577                 dev_set_promiscuity(dev, inc);
3578         }
3579
3580         /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
3581            is important. Some (broken) drivers set IFF_PROMISC, when
3582            IFF_ALLMULTI is requested not asking us and not reporting.
3583          */
3584         if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
3585                 int inc = (flags & IFF_ALLMULTI) ? +1 : -1;
3586                 dev->gflags ^= IFF_ALLMULTI;
3587                 dev_set_allmulti(dev, inc);
3588         }
3589
3590         /* Exclude state transition flags, already notified */
3591         changes = (old_flags ^ dev->flags) & ~(IFF_UP | IFF_RUNNING);
3592         if (changes)
3593                 rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
3594
3595         return ret;
3596 }
3597
3598 /**
3599  *      dev_set_mtu - Change maximum transfer unit
3600  *      @dev: device
3601  *      @new_mtu: new transfer unit
3602  *
3603  *      Change the maximum transfer size of the network device.
3604  */
3605 int dev_set_mtu(struct net_device *dev, int new_mtu)
3606 {
3607         const struct net_device_ops *ops = dev->netdev_ops;
3608         int err;
3609
3610         if (new_mtu == dev->mtu)
3611                 return 0;
3612
3613         /*      MTU must be positive.    */
3614         if (new_mtu < 0)
3615                 return -EINVAL;
3616
3617         if (!netif_device_present(dev))
3618                 return -ENODEV;
3619
3620         err = 0;
3621         if (ops->ndo_change_mtu)
3622                 err = ops->ndo_change_mtu(dev, new_mtu);
3623         else
3624                 dev->mtu = new_mtu;
3625
3626         if (!err && dev->flags & IFF_UP)
3627                 call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
3628         return err;
3629 }
3630
3631 /**
3632  *      dev_set_mac_address - Change Media Access Control Address
3633  *      @dev: device
3634  *      @sa: new address
3635  *
3636  *      Change the hardware (MAC) address of the device
3637  */
3638 int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
3639 {
3640         const struct net_device_ops *ops = dev->netdev_ops;
3641         int err;
3642
3643         if (!ops->ndo_set_mac_address)
3644                 return -EOPNOTSUPP;
3645         if (sa->sa_family != dev->type)
3646                 return -EINVAL;
3647         if (!netif_device_present(dev))
3648                 return -ENODEV;
3649         err = ops->ndo_set_mac_address(dev, sa);
3650         if (!err)
3651                 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
3652         return err;
3653 }
3654
3655 /*
3656  *      Perform the SIOCxIFxxx calls, inside read_lock(dev_base_lock)
3657  */
3658 static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd)
3659 {
3660         int err;
3661         struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
3662
3663         if (!dev)
3664                 return -ENODEV;
3665
3666         switch (cmd) {
3667                 case SIOCGIFFLAGS:      /* Get interface flags */
3668                         ifr->ifr_flags = dev_get_flags(dev);
3669                         return 0;
3670
3671                 case SIOCGIFMETRIC:     /* Get the metric on the interface
3672                                            (currently unused) */
3673                         ifr->ifr_metric = 0;
3674                         return 0;
3675
3676                 case SIOCGIFMTU:        /* Get the MTU of a device */
3677                         ifr->ifr_mtu = dev->mtu;
3678                         return 0;
3679
3680                 case SIOCGIFHWADDR:
3681                         if (!dev->addr_len)
3682                                 memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
3683                         else
3684                                 memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
3685                                        min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
3686                         ifr->ifr_hwaddr.sa_family = dev->type;
3687                         return 0;
3688
3689                 case SIOCGIFSLAVE:
3690                         err = -EINVAL;
3691                         break;
3692
3693                 case SIOCGIFMAP:
3694                         ifr->ifr_map.mem_start = dev->mem_start;
3695                         ifr->ifr_map.mem_end   = dev->mem_end;
3696                         ifr->ifr_map.base_addr = dev->base_addr;
3697                         ifr->ifr_map.irq       = dev->irq;
3698                         ifr->ifr_map.dma       = dev->dma;
3699                         ifr->ifr_map.port      = dev->if_port;
3700                         return 0;
3701
3702                 case SIOCGIFINDEX:
3703                         ifr->ifr_ifindex = dev->ifindex;
3704                         return 0;
3705
3706                 case SIOCGIFTXQLEN:
3707                         ifr->ifr_qlen = dev->tx_queue_len;
3708                         return 0;
3709
3710                 default:
3711                         /* dev_ioctl() should ensure this case
3712                          * is never reached
3713                          */
3714                         WARN_ON(1);
3715                         err = -EINVAL;
3716                         break;
3717
3718         }
3719         return err;
3720 }
3721
3722 /*
3723  *      Perform the SIOCxIFxxx calls, inside rtnl_lock()
3724  */
3725 static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
3726 {
3727         int err;
3728         struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
3729         const struct net_device_ops *ops;
3730
3731         if (!dev)
3732                 return -ENODEV;
3733
3734         ops = dev->netdev_ops;
3735
3736         switch (cmd) {
3737                 case SIOCSIFFLAGS:      /* Set interface flags */
3738                         return dev_change_flags(dev, ifr->ifr_flags);
3739
3740                 case SIOCSIFMETRIC:     /* Set the metric on the interface
3741                                            (currently unused) */
3742                         return -EOPNOTSUPP;
3743
3744                 case SIOCSIFMTU:        /* Set the MTU of a device */
3745                         return dev_set_mtu(dev, ifr->ifr_mtu);
3746
3747                 case SIOCSIFHWADDR:
3748                         return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
3749
3750                 case SIOCSIFHWBROADCAST:
3751                         if (ifr->ifr_hwaddr.sa_family != dev->type)
3752                                 return -EINVAL;
3753                         memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
3754                                min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
3755                         call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
3756                         return 0;
3757
3758                 case SIOCSIFMAP:
3759                         if (ops->ndo_set_config) {
3760                                 if (!netif_device_present(dev))
3761                                         return -ENODEV;
3762                                 return ops->ndo_set_config(dev, &ifr->ifr_map);
3763                         }
3764                         return -EOPNOTSUPP;
3765
3766                 case SIOCADDMULTI:
3767                         if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
3768                             ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
3769                                 return -EINVAL;
3770                         if (!netif_device_present(dev))
3771                                 return -ENODEV;
3772                         return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
3773                                           dev->addr_len, 1);
3774
3775                 case SIOCDELMULTI:
3776                         if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
3777                             ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
3778                                 return -EINVAL;
3779                         if (!netif_device_present(dev))
3780                                 return -ENODEV;
3781                         return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
3782                                              dev->addr_len, 1);
3783
3784                 case SIOCSIFTXQLEN:
3785                         if (ifr->ifr_qlen < 0)
3786                                 return -EINVAL;
3787                         dev->tx_queue_len = ifr->ifr_qlen;
3788                         return 0;
3789
3790                 case SIOCSIFNAME:
3791                         ifr->ifr_newname[IFNAMSIZ-1] = '\0';
3792                         return dev_change_name(dev, ifr->ifr_newname);
3793
3794                 /*
3795                  *      Unknown or private ioctl
3796                  */
3797
3798                 default:
3799                         if ((cmd >= SIOCDEVPRIVATE &&
3800                             cmd <= SIOCDEVPRIVATE + 15) ||
3801                             cmd == SIOCBONDENSLAVE ||
3802                             cmd == SIOCBONDRELEASE ||
3803                             cmd == SIOCBONDSETHWADDR ||
3804                             cmd == SIOCBONDSLAVEINFOQUERY ||
3805                             cmd == SIOCBONDINFOQUERY ||
3806                             cmd == SIOCBONDCHANGEACTIVE ||
3807                             cmd == SIOCGMIIPHY ||
3808                             cmd == SIOCGMIIREG ||
3809                             cmd == SIOCSMIIREG ||
3810                             cmd == SIOCBRADDIF ||
3811                             cmd == SIOCBRDELIF ||
3812                             cmd == SIOCWANDEV) {
3813                                 err = -EOPNOTSUPP;
3814                                 if (ops->ndo_do_ioctl) {
3815                                         if (netif_device_present(dev))
3816                                                 err = ops->ndo_do_ioctl(dev, ifr, cmd);
3817                                         else
3818                                                 err = -ENODEV;
3819                                 }
3820                         } else
3821                                 err = -EINVAL;
3822
3823         }
3824         return err;
3825 }
3826
3827 /*
3828  *      This function handles all "interface"-type I/O control requests. The actual
3829  *      'doing' part of this is dev_ifsioc above.
3830  */
3831
3832 /**
3833  *      dev_ioctl       -       network device ioctl
3834  *      @net: the applicable net namespace
3835  *      @cmd: command to issue
3836  *      @arg: pointer to a struct ifreq in user space
3837  *
3838  *      Issue ioctl functions to devices. This is normally called by the
3839  *      user space syscall interfaces but can sometimes be useful for
3840  *      other purposes. The return value is the return from the syscall if
3841  *      positive or a negative errno code on error.
3842  */
3843
3844 int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
3845 {
3846         struct ifreq ifr;
3847         int ret;
3848         char *colon;
3849
3850         /* One special case: SIOCGIFCONF takes ifconf argument
3851            and requires shared lock, because it sleeps writing
3852            to user space.
3853          */
3854
3855         if (cmd == SIOCGIFCONF) {
3856                 rtnl_lock();
3857                 ret = dev_ifconf(net, (char __user *) arg);
3858                 rtnl_unlock();
3859                 return ret;
3860         }
3861         if (cmd == SIOCGIFNAME)
3862                 return dev_ifname(net, (struct ifreq __user *)arg);
3863
3864         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
3865                 return -EFAULT;
3866
3867         ifr.ifr_name[IFNAMSIZ-1] = 0;
3868
3869         colon = strchr(ifr.ifr_name, ':');
3870         if (colon)
3871                 *colon = 0;
3872
3873         /*
3874          *      See which interface the caller is talking about.
3875          */
3876
3877         switch (cmd) {
3878                 /*
3879                  *      These ioctl calls:
3880                  *      - can be done by all.
3881                  *      - atomic and do not require locking.
3882                  *      - return a value
3883                  */
3884                 case SIOCGIFFLAGS:
3885                 case SIOCGIFMETRIC:
3886                 case SIOCGIFMTU:
3887                 case SIOCGIFHWADDR:
3888                 case SIOCGIFSLAVE:
3889                 case SIOCGIFMAP:
3890                 case SIOCGIFINDEX:
3891                 case SIOCGIFTXQLEN:
3892                         dev_load(net, ifr.ifr_name);
3893                         read_lock(&dev_base_lock);
3894                         ret = dev_ifsioc_locked(net, &ifr, cmd);
3895                         read_unlock(&dev_base_lock);
3896                         if (!ret) {
3897                                 if (colon)
3898                                         *colon = ':';
3899                                 if (copy_to_user(arg, &ifr,
3900                                                  sizeof(struct ifreq)))
3901                                         ret = -EFAULT;
3902                         }
3903                         return ret;
3904
3905                 case SIOCETHTOOL:
3906                         dev_load(net, ifr.ifr_name);
3907                         rtnl_lock();
3908                         ret = dev_ethtool(net, &ifr);
3909                         rtnl_unlock();
3910                         if (!ret) {
3911                                 if (colon)
3912                                         *colon = ':';
3913                                 if (copy_to_user(arg, &ifr,
3914                                                  sizeof(struct ifreq)))
3915                                         ret = -EFAULT;
3916                         }
3917                         return ret;
3918
3919                 /*
3920                  *      These ioctl calls:
3921                  *      - require superuser power.
3922                  *      - require strict serialization.
3923                  *      - return a value
3924                  */
3925                 case SIOCGMIIPHY:
3926                 case SIOCGMIIREG:
3927                 case SIOCSIFNAME:
3928                         if (!capable(CAP_NET_ADMIN))
3929                                 return -EPERM;
3930                         dev_load(net, ifr.ifr_name);
3931                         rtnl_lock();
3932                         ret = dev_ifsioc(net, &ifr, cmd);
3933                         rtnl_unlock();
3934                         if (!ret) {
3935                                 if (colon)
3936                                         *colon = ':';
3937                                 if (copy_to_user(arg, &ifr,
3938                                                  sizeof(struct ifreq)))
3939                                         ret = -EFAULT;
3940                         }
3941                         return ret;
3942
3943                 /*
3944                  *      These ioctl calls:
3945                  *      - require superuser power.
3946                  *      - require strict serialization.
3947                  *      - do not return a value
3948                  */
3949                 case SIOCSIFFLAGS:
3950                 case SIOCSIFMETRIC:
3951                 case SIOCSIFMTU:
3952                 case SIOCSIFMAP:
3953                 case SIOCSIFHWADDR:
3954                 case SIOCSIFSLAVE:
3955                 case SIOCADDMULTI:
3956                 case SIOCDELMULTI:
3957                 case SIOCSIFHWBROADCAST:
3958                 case SIOCSIFTXQLEN:
3959                 case SIOCSMIIREG:
3960                 case SIOCBONDENSLAVE:
3961                 case SIOCBONDRELEASE:
3962                 case SIOCBONDSETHWADDR:
3963                 case SIOCBONDCHANGEACTIVE:
3964                 case SIOCBRADDIF:
3965                 case SIOCBRDELIF:
3966                         if (!capable(CAP_NET_ADMIN))
3967                                 return -EPERM;
3968                         /* fall through */
3969                 case SIOCBONDSLAVEINFOQUERY:
3970                 case SIOCBONDINFOQUERY:
3971                         dev_load(net, ifr.ifr_name);
3972                         rtnl_lock();
3973                         ret = dev_ifsioc(net, &ifr, cmd);
3974                         rtnl_unlock();
3975                         return ret;
3976
3977                 case SIOCGIFMEM:
3978                         /* Get the per device memory space. We can add this but
3979                          * currently do not support it */
3980                 case SIOCSIFMEM:
3981                         /* Set the per device memory buffer space.
3982                          * Not applicable in our case */
3983                 case SIOCSIFLINK:
3984                         return -EINVAL;
3985
3986                 /*
3987                  *      Unknown or private ioctl.
3988                  */
3989                 default:
3990                         if (cmd == SIOCWANDEV ||
3991                             (cmd >= SIOCDEVPRIVATE &&
3992                              cmd <= SIOCDEVPRIVATE + 15)) {
3993                                 dev_load(net, ifr.ifr_name);
3994                                 rtnl_lock();
3995                                 ret = dev_ifsioc(net, &ifr, cmd);
3996                                 rtnl_unlock();
3997                                 if (!ret && copy_to_user(arg, &ifr,
3998                                                          sizeof(struct ifreq)))
3999                                         ret = -EFAULT;
4000                                 return ret;
4001                         }
4002                         /* Take care of Wireless Extensions */
4003                         if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
4004                                 return wext_handle_ioctl(net, &ifr, cmd, arg);
4005                         return -EINVAL;
4006         }
4007 }
4008
4009
4010 /**
4011  *      dev_new_index   -       allocate an ifindex
4012  *      @net: the applicable net namespace
4013  *
4014  *      Returns a suitable unique value for a new device interface
4015  *      number.  The caller must hold the rtnl semaphore or the
4016  *      dev_base_lock to be sure it remains unique.
4017  */
4018 static int dev_new_index(struct net *net)
4019 {
4020         static int ifindex;
4021         for (;;) {
4022                 if (++ifindex <= 0)
4023                         ifindex = 1;
4024                 if (!__dev_get_by_index(net, ifindex))
4025                         return ifindex;
4026         }
4027 }
4028
4029 /* Delayed registration/unregisteration */
4030 static LIST_HEAD(net_todo_list);
4031
4032 static void net_set_todo(struct net_device *dev)
4033 {
4034         list_add_tail(&dev->todo_list, &net_todo_list);
4035 }
4036
4037 static void rollback_registered(struct net_device *dev)
4038 {
4039         BUG_ON(dev_boot_phase);
4040         ASSERT_RTNL();
4041
4042         /* Some devices call without registering for initialization unwind. */
4043         if (dev->reg_state == NETREG_UNINITIALIZED) {
4044                 printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
4045                                   "was registered\n", dev->name, dev);
4046
4047                 WARN_ON(1);
4048                 return;
4049         }
4050
4051         BUG_ON(dev->reg_state != NETREG_REGISTERED);
4052
4053         /* If device is running, close it first. */
4054         dev_close(dev);
4055
4056         /* And unlink it from device chain. */
4057         unlist_netdevice(dev);
4058
4059         dev->reg_state = NETREG_UNREGISTERING;
4060
4061         synchronize_net();
4062
4063         /* Shutdown queueing discipline. */
4064         dev_shutdown(dev);
4065
4066
4067         /* Notify protocols, that we are about to destroy
4068            this device. They should clean all the things.
4069         */
4070         call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
4071
4072         /*
4073          *      Flush the unicast and multicast chains
4074          */
4075         dev_addr_discard(dev);
4076
4077         if (dev->netdev_ops->ndo_uninit)
4078                 dev->netdev_ops->ndo_uninit(dev);
4079
4080         /* Notifier chain MUST detach us from master device. */
4081         WARN_ON(dev->master);
4082
4083         /* Remove entries from kobject tree */
4084         netdev_unregister_kobject(dev);
4085
4086         synchronize_net();
4087
4088         dev_put(dev);
4089 }
4090
4091 static void __netdev_init_queue_locks_one(struct net_device *dev,
4092                                           struct netdev_queue *dev_queue,
4093                                           void *_unused)
4094 {
4095         spin_lock_init(&dev_queue->_xmit_lock);
4096         netdev_set_xmit_lockdep_class(&dev_queue->_xmit_lock, dev->type);
4097         dev_queue->xmit_lock_owner = -1;
4098 }
4099
4100 static void netdev_init_queue_locks(struct net_device *dev)
4101 {
4102         netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL);
4103         __netdev_init_queue_locks_one(dev, &dev->rx_queue, NULL);
4104 }
4105
4106 unsigned long netdev_fix_features(unsigned long features, const char *name)
4107 {
4108         /* Fix illegal SG+CSUM combinations. */
4109         if ((features & NETIF_F_SG) &&
4110             !(features & NETIF_F_ALL_CSUM)) {
4111                 if (name)
4112                         printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no "
4113                                "checksum feature.\n", name);
4114                 features &= ~NETIF_F_SG;
4115         }
4116
4117         /* TSO requires that SG is present as well. */
4118         if ((features & NETIF_F_TSO) && !(features & NETIF_F_SG)) {
4119                 if (name)
4120                         printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no "
4121                                "SG feature.\n", name);
4122                 features &= ~NETIF_F_TSO;
4123         }
4124
4125         if (features & NETIF_F_UFO) {
4126                 if (!(features & NETIF_F_GEN_CSUM)) {
4127                         if (name)
4128                                 printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
4129                                        "since no NETIF_F_HW_CSUM feature.\n",
4130                                        name);
4131                         features &= ~NETIF_F_UFO;
4132                 }
4133
4134                 if (!(features & NETIF_F_SG)) {
4135                         if (name)
4136                                 printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
4137                                        "since no NETIF_F_SG feature.\n", name);
4138                         features &= ~NETIF_F_UFO;
4139                 }
4140         }
4141
4142         return features;
4143 }
4144 EXPORT_SYMBOL(netdev_fix_features);
4145
4146 /**
4147  *      register_netdevice      - register a network device
4148  *      @dev: device to register
4149  *
4150  *      Take a completed network device structure and add it to the kernel
4151  *      interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
4152  *      chain. 0 is returned on success. A negative errno code is returned
4153  *      on a failure to set up the device, or if the name is a duplicate.
4154  *
4155  *      Callers must hold the rtnl semaphore. You may want
4156  *      register_netdev() instead of this.
4157  *
4158  *      BUGS:
4159  *      The locking appears insufficient to guarantee two parallel registers
4160  *      will not get the same name.
4161  */
4162
4163 int register_netdevice(struct net_device *dev)
4164 {
4165         struct hlist_head *head;
4166         struct hlist_node *p;
4167         int ret;
4168         struct net *net = dev_net(dev);
4169
4170         BUG_ON(dev_boot_phase);
4171         ASSERT_RTNL();
4172
4173         might_sleep();
4174
4175         /* When net_device's are persistent, this will be fatal. */
4176         BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
4177         BUG_ON(!net);
4178
4179         spin_lock_init(&dev->addr_list_lock);
4180         netdev_set_addr_lockdep_class(dev);
4181         netdev_init_queue_locks(dev);
4182
4183         dev->iflink = -1;
4184
4185 #ifdef CONFIG_COMPAT_NET_DEV_OPS
4186         /* Netdevice_ops API compatiability support.
4187          * This is temporary until all network devices are converted.
4188          */
4189         if (dev->netdev_ops) {
4190                 const struct net_device_ops *ops = dev->netdev_ops;
4191
4192                 dev->init = ops->ndo_init;
4193                 dev->uninit = ops->ndo_uninit;
4194                 dev->open = ops->ndo_open;
4195                 dev->change_rx_flags = ops->ndo_change_rx_flags;
4196                 dev->set_rx_mode = ops->ndo_set_rx_mode;
4197                 dev->set_multicast_list = ops->ndo_set_multicast_list;
4198                 dev->set_mac_address = ops->ndo_set_mac_address;
4199                 dev->validate_addr = ops->ndo_validate_addr;
4200                 dev->do_ioctl = ops->ndo_do_ioctl;
4201                 dev->set_config = ops->ndo_set_config;
4202                 dev->change_mtu = ops->ndo_change_mtu;
4203                 dev->tx_timeout = ops->ndo_tx_timeout;
4204                 dev->get_stats = ops->ndo_get_stats;
4205                 dev->vlan_rx_register = ops->ndo_vlan_rx_register;
4206                 dev->vlan_rx_add_vid = ops->ndo_vlan_rx_add_vid;
4207                 dev->vlan_rx_kill_vid = ops->ndo_vlan_rx_kill_vid;
4208 #ifdef CONFIG_NET_POLL_CONTROLLER
4209                 dev->poll_controller = ops->ndo_poll_controller;
4210 #endif
4211         } else {
4212                 char drivername[64];
4213                 pr_info("%s (%s): not using net_device_ops yet\n",
4214                         dev->name, netdev_drivername(dev, drivername, 64));
4215
4216                 /* This works only because net_device_ops and the
4217                    compatiablity structure are the same. */
4218                 dev->netdev_ops = (void *) &(dev->init);
4219         }
4220 #endif
4221
4222         /* Init, if this function is available */
4223         if (dev->netdev_ops->ndo_init) {
4224                 ret = dev->netdev_ops->ndo_init(dev);
4225                 if (ret) {
4226                         if (ret > 0)
4227                                 ret = -EIO;
4228                         goto out;
4229                 }
4230         }
4231
4232         if (!dev_valid_name(dev->name)) {
4233                 ret = -EINVAL;
4234                 goto err_uninit;
4235         }
4236
4237         dev->ifindex = dev_new_index(net);
4238         if (dev->iflink == -1)
4239                 dev->iflink = dev->ifindex;
4240
4241         /* Check for existence of name */
4242         head = dev_name_hash(net, dev->name);
4243         hlist_for_each(p, head) {
4244                 struct net_device *d
4245                         = hlist_entry(p, struct net_device, name_hlist);
4246                 if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
4247                         ret = -EEXIST;
4248                         goto err_uninit;
4249                 }
4250         }
4251
4252         /* Fix illegal checksum combinations */
4253         if ((dev->features & NETIF_F_HW_CSUM) &&
4254             (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
4255                 printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n",
4256                        dev->name);
4257                 dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
4258         }
4259
4260         if ((dev->features & NETIF_F_NO_CSUM) &&
4261             (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
4262                 printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n",
4263                        dev->name);
4264                 dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
4265         }
4266
4267         dev->features = netdev_fix_features(dev->features, dev->name);
4268
4269         /* Enable software GSO if SG is supported. */
4270         if (dev->features & NETIF_F_SG)
4271                 dev->features |= NETIF_F_GSO;
4272
4273         netdev_initialize_kobject(dev);
4274         ret = netdev_register_kobject(dev);
4275         if (ret)
4276                 goto err_uninit;
4277         dev->reg_state = NETREG_REGISTERED;
4278
4279         /*
4280          *      Default initial state at registry is that the
4281          *      device is present.
4282          */
4283
4284         set_bit(__LINK_STATE_PRESENT, &dev->state);
4285
4286         dev_init_scheduler(dev);
4287         dev_hold(dev);
4288         list_netdevice(dev);
4289
4290         /* Notify protocols, that a new device appeared. */
4291         ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
4292         ret = notifier_to_errno(ret);
4293         if (ret) {
4294                 rollback_registered(dev);
4295                 dev->reg_state = NETREG_UNREGISTERED;
4296         }
4297
4298 out:
4299         return ret;
4300
4301 err_uninit:
4302         if (dev->netdev_ops->ndo_uninit)
4303                 dev->netdev_ops->ndo_uninit(dev);
4304         goto out;
4305 }
4306
4307 /**
4308  *      register_netdev - register a network device
4309  *      @dev: device to register
4310  *
4311  *      Take a completed network device structure and add it to the kernel
4312  *      interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
4313  *      chain. 0 is returned on success. A negative errno code is returned
4314  *      on a failure to set up the device, or if the name is a duplicate.
4315  *
4316  *      This is a wrapper around register_netdevice that takes the rtnl semaphore
4317  *      and expands the device name if you passed a format string to
4318  *      alloc_netdev.
4319  */
4320 int register_netdev(struct net_device *dev)
4321 {
4322         int err;
4323
4324         rtnl_lock();
4325
4326         /*
4327          * If the name is a format string the caller wants us to do a
4328          * name allocation.
4329          */
4330         if (strchr(dev->name, '%')) {
4331                 err = dev_alloc_name(dev, dev->name);
4332                 if (err < 0)
4333                         goto out;
4334         }
4335
4336         err = register_netdevice(dev);
4337 out:
4338         rtnl_unlock();
4339         return err;
4340 }
4341 EXPORT_SYMBOL(register_netdev);
4342
4343 /*
4344  * netdev_wait_allrefs - wait until all references are gone.
4345  *
4346  * This is called when unregistering network devices.
4347  *
4348  * Any protocol or device that holds a reference should register
4349  * for netdevice notification, and cleanup and put back the
4350  * reference if they receive an UNREGISTER event.
4351  * We can get stuck here if buggy protocols don't correctly
4352  * call dev_put.
4353  */
4354 static void netdev_wait_allrefs(struct net_device *dev)
4355 {
4356         unsigned long rebroadcast_time, warning_time;
4357
4358         rebroadcast_time = warning_time = jiffies;
4359         while (atomic_read(&dev->refcnt) != 0) {
4360                 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
4361                         rtnl_lock();
4362
4363                         /* Rebroadcast unregister notification */
4364                         call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
4365
4366                         if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
4367                                      &dev->state)) {
4368                                 /* We must not have linkwatch events
4369                                  * pending on unregister. If this
4370                                  * happens, we simply run the queue
4371                                  * unscheduled, resulting in a noop
4372                                  * for this device.
4373                                  */
4374                                 linkwatch_run_queue();
4375                         }
4376
4377                         __rtnl_unlock();
4378
4379                         rebroadcast_time = jiffies;
4380                 }
4381
4382                 msleep(250);
4383
4384                 if (time_after(jiffies, warning_time + 10 * HZ)) {
4385                         printk(KERN_EMERG "unregister_netdevice: "
4386                                "waiting for %s to become free. Usage "
4387                                "count = %d\n",
4388                                dev->name, atomic_read(&dev->refcnt));
4389                         warning_time = jiffies;
4390                 }
4391         }
4392 }
4393
4394 /* The sequence is:
4395  *
4396  *      rtnl_lock();
4397  *      ...
4398  *      register_netdevice(x1);
4399  *      register_netdevice(x2);
4400  *      ...
4401  *      unregister_netdevice(y1);
4402  *      unregister_netdevice(y2);
4403  *      ...
4404  *      rtnl_unlock();
4405  *      free_netdev(y1);
4406  *      free_netdev(y2);
4407  *
4408  * We are invoked by rtnl_unlock().
4409  * This allows us to deal with problems:
4410  * 1) We can delete sysfs objects which invoke hotplug
4411  *    without deadlocking with linkwatch via keventd.
4412  * 2) Since we run with the RTNL semaphore not held, we can sleep
4413  *    safely in order to wait for the netdev refcnt to drop to zero.
4414  *
4415  * We must not return until all unregister events added during
4416  * the interval the lock was held have been completed.
4417  */
4418 void netdev_run_todo(void)
4419 {
4420         struct list_head list;
4421
4422         /* Snapshot list, allow later requests */
4423         list_replace_init(&net_todo_list, &list);
4424
4425         __rtnl_unlock();
4426
4427         while (!list_empty(&list)) {
4428                 struct net_device *dev
4429                         = list_entry(list.next, struct net_device, todo_list);
4430                 list_del(&dev->todo_list);
4431
4432                 if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
4433                         printk(KERN_ERR "network todo '%s' but state %d\n",
4434                                dev->name, dev->reg_state);
4435                         dump_stack();
4436                         continue;
4437                 }
4438
4439                 dev->reg_state = NETREG_UNREGISTERED;
4440
4441                 on_each_cpu(flush_backlog, dev, 1);
4442
4443                 netdev_wait_allrefs(dev);
4444
4445                 /* paranoia */
4446                 BUG_ON(atomic_read(&dev->refcnt));
4447                 WARN_ON(dev->ip_ptr);
4448                 WARN_ON(dev->ip6_ptr);
4449                 WARN_ON(dev->dn_ptr);
4450
4451                 if (dev->destructor)
4452                         dev->destructor(dev);
4453
4454                 /* Free network device */
4455                 kobject_put(&dev->dev.kobj);
4456         }
4457 }
4458
4459 /**
4460  *      dev_get_stats   - get network device statistics
4461  *      @dev: device to get statistics from
4462  *
4463  *      Get network statistics from device. The device driver may provide
4464  *      its own method by setting dev->netdev_ops->get_stats; otherwise
4465  *      the internal statistics structure is used.
4466  */
4467 const struct net_device_stats *dev_get_stats(struct net_device *dev)
4468  {
4469         const struct net_device_ops *ops = dev->netdev_ops;
4470
4471         if (ops->ndo_get_stats)
4472                 return ops->ndo_get_stats(dev);
4473         else
4474                 return &dev->stats;
4475 }
4476 EXPORT_SYMBOL(dev_get_stats);
4477
4478 static void netdev_init_one_queue(struct net_device *dev,
4479                                   struct netdev_queue *queue,
4480                                   void *_unused)
4481 {
4482         queue->dev = dev;
4483 }
4484
4485 static void netdev_init_queues(struct net_device *dev)
4486 {
4487         netdev_init_one_queue(dev, &dev->rx_queue, NULL);
4488         netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
4489         spin_lock_init(&dev->tx_global_lock);
4490 }
4491
4492 /**
4493  *      alloc_netdev_mq - allocate network device
4494  *      @sizeof_priv:   size of private data to allocate space for
4495  *      @name:          device name format string
4496  *      @setup:         callback to initialize device
4497  *      @queue_count:   the number of subqueues to allocate
4498  *
4499  *      Allocates a struct net_device with private data area for driver use
4500  *      and performs basic initialization.  Also allocates subquue structs
4501  *      for each queue on the device at the end of the netdevice.
4502  */
4503 struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
4504                 void (*setup)(struct net_device *), unsigned int queue_count)
4505 {
4506         struct netdev_queue *tx;
4507         struct net_device *dev;
4508         size_t alloc_size;
4509         void *p;
4510
4511         BUG_ON(strlen(name) >= sizeof(dev->name));
4512
4513         alloc_size = sizeof(struct net_device);
4514         if (sizeof_priv) {
4515                 /* ensure 32-byte alignment of private area */
4516                 alloc_size = (alloc_size + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
4517                 alloc_size += sizeof_priv;
4518         }
4519         /* ensure 32-byte alignment of whole construct */
4520         alloc_size += NETDEV_ALIGN_CONST;
4521
4522         p = kzalloc(alloc_size, GFP_KERNEL);
4523         if (!p) {
4524                 printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");
4525                 return NULL;
4526         }
4527
4528         tx = kcalloc(queue_count, sizeof(struct netdev_queue), GFP_KERNEL);
4529         if (!tx) {
4530                 printk(KERN_ERR "alloc_netdev: Unable to allocate "
4531                        "tx qdiscs.\n");
4532                 kfree(p);
4533                 return NULL;
4534         }
4535
4536         dev = (struct net_device *)
4537                 (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
4538         dev->padded = (char *)dev - (char *)p;
4539         dev_net_set(dev, &init_net);
4540
4541         dev->_tx = tx;
4542         dev->num_tx_queues = queue_count;
4543         dev->real_num_tx_queues = queue_count;
4544
4545         dev->gso_max_size = GSO_MAX_SIZE;
4546
4547         netdev_init_queues(dev);
4548
4549         INIT_LIST_HEAD(&dev->napi_list);
4550         setup(dev);
4551         strcpy(dev->name, name);
4552         return dev;
4553 }
4554 EXPORT_SYMBOL(alloc_netdev_mq);
4555
4556 /**
4557  *      free_netdev - free network device
4558  *      @dev: device
4559  *
4560  *      This function does the last stage of destroying an allocated device
4561  *      interface. The reference to the device object is released.
4562  *      If this is the last reference then it will be freed.
4563  */
4564 void free_netdev(struct net_device *dev)
4565 {
4566         struct napi_struct *p, *n;
4567
4568         release_net(dev_net(dev));
4569
4570         kfree(dev->_tx);
4571
4572         list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
4573                 netif_napi_del(p);
4574
4575         /*  Compatibility with error handling in drivers */
4576         if (dev->reg_state == NETREG_UNINITIALIZED) {
4577                 kfree((char *)dev - dev->padded);
4578                 return;
4579         }
4580
4581         BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
4582         dev->reg_state = NETREG_RELEASED;
4583
4584         /* will free via device release */
4585         put_device(&dev->dev);
4586 }
4587
4588 /**
4589  *      synchronize_net -  Synchronize with packet receive processing
4590  *
4591  *      Wait for packets currently being received to be done.
4592  *      Does not block later packets from starting.
4593  */
4594 void synchronize_net(void)
4595 {
4596         might_sleep();
4597         synchronize_rcu();
4598 }
4599
4600 /**
4601  *      unregister_netdevice - remove device from the kernel
4602  *      @dev: device
4603  *
4604  *      This function shuts down a device interface and removes it
4605  *      from the kernel tables.
4606  *
4607  *      Callers must hold the rtnl semaphore.  You may want
4608  *      unregister_netdev() instead of this.
4609  */
4610
4611 void unregister_netdevice(struct net_device *dev)
4612 {
4613         ASSERT_RTNL();
4614
4615         rollback_registered(dev);
4616         /* Finish processing unregister after unlock */
4617         net_set_todo(dev);
4618 }
4619
4620 /**
4621  *      unregister_netdev - remove device from the kernel
4622  *      @dev: device
4623  *
4624  *      This function shuts down a device interface and removes it
4625  *      from the kernel tables.
4626  *
4627  *      This is just a wrapper for unregister_netdevice that takes
4628  *      the rtnl semaphore.  In general you want to use this and not
4629  *      unregister_netdevice.
4630  */
4631 void unregister_netdev(struct net_device *dev)
4632 {
4633         rtnl_lock();
4634         unregister_netdevice(dev);
4635         rtnl_unlock();
4636 }
4637
4638 EXPORT_SYMBOL(unregister_netdev);
4639
4640 /**
4641  *      dev_change_net_namespace - move device to different nethost namespace
4642  *      @dev: device
4643  *      @net: network namespace
4644  *      @pat: If not NULL name pattern to try if the current device name
4645  *            is already taken in the destination network namespace.
4646  *
4647  *      This function shuts down a device interface and moves it
4648  *      to a new network namespace. On success 0 is returned, on
4649  *      a failure a netagive errno code is returned.
4650  *
4651  *      Callers must hold the rtnl semaphore.
4652  */
4653
4654 int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
4655 {
4656         char buf[IFNAMSIZ];
4657         const char *destname;
4658         int err;
4659
4660         ASSERT_RTNL();
4661
4662         /* Don't allow namespace local devices to be moved. */
4663         err = -EINVAL;
4664         if (dev->features & NETIF_F_NETNS_LOCAL)
4665                 goto out;
4666
4667 #ifdef CONFIG_SYSFS
4668         /* Don't allow real devices to be moved when sysfs
4669          * is enabled.
4670          */
4671         err = -EINVAL;
4672         if (dev->dev.parent)
4673                 goto out;
4674 #endif
4675
4676         /* Ensure the device has been registrered */
4677         err = -EINVAL;
4678         if (dev->reg_state != NETREG_REGISTERED)
4679                 goto out;
4680
4681         /* Get out if there is nothing todo */
4682         err = 0;
4683         if (net_eq(dev_net(dev), net))
4684                 goto out;
4685
4686         /* Pick the destination device name, and ensure
4687          * we can use it in the destination network namespace.
4688          */
4689         err = -EEXIST;
4690         destname = dev->name;
4691         if (__dev_get_by_name(net, destname)) {
4692                 /* We get here if we can't use the current device name */
4693                 if (!pat)
4694                         goto out;
4695                 if (!dev_valid_name(pat))
4696                         goto out;
4697                 if (strchr(pat, '%')) {
4698                         if (__dev_alloc_name(net, pat, buf) < 0)
4699                                 goto out;
4700                         destname = buf;
4701                 } else
4702                         destname = pat;
4703                 if (__dev_get_by_name(net, destname))
4704                         goto out;
4705         }
4706
4707         /*
4708          * And now a mini version of register_netdevice unregister_netdevice.
4709          */
4710
4711         /* If device is running close it first. */
4712         dev_close(dev);
4713
4714         /* And unlink it from device chain */
4715         err = -ENODEV;
4716         unlist_netdevice(dev);
4717
4718         synchronize_net();
4719
4720         /* Shutdown queueing discipline. */
4721         dev_shutdown(dev);
4722
4723         /* Notify protocols, that we are about to destroy
4724            this device. They should clean all the things.
4725         */
4726         call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
4727
4728         /*
4729          *      Flush the unicast and multicast chains
4730          */
4731         dev_addr_discard(dev);
4732
4733         netdev_unregister_kobject(dev);
4734
4735         /* Actually switch the network namespace */
4736         dev_net_set(dev, net);
4737
4738         /* Assign the new device name */
4739         if (destname != dev->name)
4740                 strcpy(dev->name, destname);
4741
4742         /* If there is an ifindex conflict assign a new one */
4743         if (__dev_get_by_index(net, dev->ifindex)) {
4744                 int iflink = (dev->iflink == dev->ifindex);
4745                 dev->ifindex = dev_new_index(net);
4746                 if (iflink)
4747                         dev->iflink = dev->ifindex;
4748         }
4749
4750         /* Fixup kobjects */
4751         err = netdev_register_kobject(dev);
4752         WARN_ON(err);
4753
4754         /* Add the device back in the hashes */
4755         list_netdevice(dev);
4756
4757         /* Notify protocols, that a new device appeared. */
4758         call_netdevice_notifiers(NETDEV_REGISTER, dev);
4759
4760         synchronize_net();
4761         err = 0;
4762 out:
4763         return err;
4764 }
4765
4766 static int dev_cpu_callback(struct notifier_block *nfb,
4767                             unsigned long action,
4768                             void *ocpu)
4769 {
4770         struct sk_buff **list_skb;
4771         struct Qdisc **list_net;
4772         struct sk_buff *skb;
4773         unsigned int cpu, oldcpu = (unsigned long)ocpu;
4774         struct softnet_data *sd, *oldsd;
4775
4776         if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
4777                 return NOTIFY_OK;
4778
4779         local_irq_disable();
4780         cpu = smp_processor_id();
4781         sd = &per_cpu(softnet_data, cpu);
4782         oldsd = &per_cpu(softnet_data, oldcpu);
4783
4784         /* Find end of our completion_queue. */
4785         list_skb = &sd->completion_queue;
4786         while (*list_skb)
4787                 list_skb = &(*list_skb)->next;
4788         /* Append completion queue from offline CPU. */
4789         *list_skb = oldsd->completion_queue;
4790         oldsd->completion_queue = NULL;
4791
4792         /* Find end of our output_queue. */
4793         list_net = &sd->output_queue;
4794         while (*list_net)
4795                 list_net = &(*list_net)->next_sched;
4796         /* Append output queue from offline CPU. */
4797         *list_net = oldsd->output_queue;
4798         oldsd->output_queue = NULL;
4799
4800         raise_softirq_irqoff(NET_TX_SOFTIRQ);
4801         local_irq_enable();
4802
4803         /* Process offline CPU's input_pkt_queue */
4804         while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
4805                 netif_rx(skb);
4806
4807         return NOTIFY_OK;
4808 }
4809
4810
4811 /**
4812  *      netdev_increment_features - increment feature set by one
4813  *      @all: current feature set
4814  *      @one: new feature set
4815  *      @mask: mask feature set
4816  *
4817  *      Computes a new feature set after adding a device with feature set
4818  *      @one to the master device with current feature set @all.  Will not
4819  *      enable anything that is off in @mask. Returns the new feature set.
4820  */
4821 unsigned long netdev_increment_features(unsigned long all, unsigned long one,
4822                                         unsigned long mask)
4823 {
4824         /* If device needs checksumming, downgrade to it. */
4825         if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM))
4826                 all ^= NETIF_F_NO_CSUM | (one & NETIF_F_ALL_CSUM);
4827         else if (mask & NETIF_F_ALL_CSUM) {
4828                 /* If one device supports v4/v6 checksumming, set for all. */
4829                 if (one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM) &&
4830                     !(all & NETIF_F_GEN_CSUM)) {
4831                         all &= ~NETIF_F_ALL_CSUM;
4832                         all |= one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
4833                 }
4834
4835                 /* If one device supports hw checksumming, set for all. */
4836                 if (one & NETIF_F_GEN_CSUM && !(all & NETIF_F_GEN_CSUM)) {
4837                         all &= ~NETIF_F_ALL_CSUM;
4838                         all |= NETIF_F_HW_CSUM;
4839                 }
4840         }
4841
4842         one |= NETIF_F_ALL_CSUM;
4843
4844         one |= all & NETIF_F_ONE_FOR_ALL;
4845         all &= one | NETIF_F_LLTX | NETIF_F_GSO;
4846         all |= one & mask & NETIF_F_ONE_FOR_ALL;
4847
4848         return all;
4849 }
4850 EXPORT_SYMBOL(netdev_increment_features);
4851
4852 static struct hlist_head *netdev_create_hash(void)
4853 {
4854         int i;
4855         struct hlist_head *hash;
4856
4857         hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL);
4858         if (hash != NULL)
4859                 for (i = 0; i < NETDEV_HASHENTRIES; i++)
4860                         INIT_HLIST_HEAD(&hash[i]);
4861
4862         return hash;
4863 }
4864
4865 /* Initialize per network namespace state */
4866 static int __net_init netdev_init(struct net *net)
4867 {
4868         INIT_LIST_HEAD(&net->dev_base_head);
4869
4870         net->dev_name_head = netdev_create_hash();
4871         if (net->dev_name_head == NULL)
4872                 goto err_name;
4873
4874         net->dev_index_head = netdev_create_hash();
4875         if (net->dev_index_head == NULL)
4876                 goto err_idx;
4877
4878         return 0;
4879
4880 err_idx:
4881         kfree(net->dev_name_head);
4882 err_name:
4883         return -ENOMEM;
4884 }
4885
4886 /**
4887  *      netdev_drivername - network driver for the device
4888  *      @dev: network device
4889  *      @buffer: buffer for resulting name
4890  *      @len: size of buffer
4891  *
4892  *      Determine network driver for device.
4893  */
4894 char *netdev_drivername(const struct net_device *dev, char *buffer, int len)
4895 {
4896         const struct device_driver *driver;
4897         const struct device *parent;
4898
4899         if (len <= 0 || !buffer)
4900                 return buffer;
4901         buffer[0] = 0;
4902
4903         parent = dev->dev.parent;
4904
4905         if (!parent)
4906                 return buffer;
4907
4908         driver = parent->driver;
4909         if (driver && driver->name)
4910                 strlcpy(buffer, driver->name, len);
4911         return buffer;
4912 }
4913
4914 static void __net_exit netdev_exit(struct net *net)
4915 {
4916         kfree(net->dev_name_head);
4917         kfree(net->dev_index_head);
4918 }
4919
4920 static struct pernet_operations __net_initdata netdev_net_ops = {
4921         .init = netdev_init,
4922         .exit = netdev_exit,
4923 };
4924
4925 static void __net_exit default_device_exit(struct net *net)
4926 {
4927         struct net_device *dev;
4928         /*
4929          * Push all migratable of the network devices back to the
4930          * initial network namespace
4931          */
4932         rtnl_lock();
4933 restart:
4934         for_each_netdev(net, dev) {
4935                 int err;
4936                 char fb_name[IFNAMSIZ];
4937
4938                 /* Ignore unmoveable devices (i.e. loopback) */
4939                 if (dev->features & NETIF_F_NETNS_LOCAL)
4940                         continue;
4941
4942                 /* Delete virtual devices */
4943                 if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink) {
4944                         dev->rtnl_link_ops->dellink(dev);
4945                         goto restart;
4946                 }
4947
4948                 /* Push remaing network devices to init_net */
4949                 snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
4950                 err = dev_change_net_namespace(dev, &init_net, fb_name);
4951                 if (err) {
4952                         printk(KERN_EMERG "%s: failed to move %s to init_net: %d\n",
4953                                 __func__, dev->name, err);
4954                         BUG();
4955                 }
4956                 goto restart;
4957         }
4958         rtnl_unlock();
4959 }
4960
4961 static struct pernet_operations __net_initdata default_device_ops = {
4962         .exit = default_device_exit,
4963 };
4964
4965 /*
4966  *      Initialize the DEV module. At boot time this walks the device list and
4967  *      unhooks any devices that fail to initialise (normally hardware not
4968  *      present) and leaves us with a valid list of present and active devices.
4969  *
4970  */
4971
4972 /*
4973  *       This is called single threaded during boot, so no need
4974  *       to take the rtnl semaphore.
4975  */
4976 static int __init net_dev_init(void)
4977 {
4978         int i, rc = -ENOMEM;
4979
4980         BUG_ON(!dev_boot_phase);
4981
4982         if (dev_proc_init())
4983                 goto out;
4984
4985         if (netdev_kobject_init())
4986                 goto out;
4987
4988         INIT_LIST_HEAD(&ptype_all);
4989         for (i = 0; i < PTYPE_HASH_SIZE; i++)
4990                 INIT_LIST_HEAD(&ptype_base[i]);
4991
4992         if (register_pernet_subsys(&netdev_net_ops))
4993                 goto out;
4994
4995         /*
4996          *      Initialise the packet receive queues.
4997          */
4998
4999         for_each_possible_cpu(i) {
5000                 struct softnet_data *queue;
5001
5002                 queue = &per_cpu(softnet_data, i);
5003                 skb_queue_head_init(&queue->input_pkt_queue);
5004                 queue->completion_queue = NULL;
5005                 INIT_LIST_HEAD(&queue->poll_list);
5006
5007                 queue->backlog.poll = process_backlog;
5008                 queue->backlog.weight = weight_p;
5009                 queue->backlog.gro_list = NULL;
5010         }
5011
5012         dev_boot_phase = 0;
5013
5014         /* The loopback device is special if any other network devices
5015          * is present in a network namespace the loopback device must
5016          * be present. Since we now dynamically allocate and free the
5017          * loopback device ensure this invariant is maintained by
5018          * keeping the loopback device as the first device on the
5019          * list of network devices.  Ensuring the loopback devices
5020          * is the first device that appears and the last network device
5021          * that disappears.
5022          */
5023         if (register_pernet_device(&loopback_net_ops))
5024                 goto out;
5025
5026         if (register_pernet_device(&default_device_ops))
5027                 goto out;
5028
5029         open_softirq(NET_TX_SOFTIRQ, net_tx_action);
5030         open_softirq(NET_RX_SOFTIRQ, net_rx_action);
5031
5032         hotcpu_notifier(dev_cpu_callback, 0);
5033         dst_init();
5034         dev_mcast_init();
5035         #ifdef CONFIG_NET_DMA
5036         dmaengine_get();
5037         #endif
5038         rc = 0;
5039 out:
5040         return rc;
5041 }
5042
5043 subsys_initcall(net_dev_init);
5044
5045 EXPORT_SYMBOL(__dev_get_by_index);
5046 EXPORT_SYMBOL(__dev_get_by_name);
5047 EXPORT_SYMBOL(__dev_remove_pack);
5048 EXPORT_SYMBOL(dev_valid_name);
5049 EXPORT_SYMBOL(dev_add_pack);
5050 EXPORT_SYMBOL(dev_alloc_name);
5051 EXPORT_SYMBOL(dev_close);
5052 EXPORT_SYMBOL(dev_get_by_flags);
5053 EXPORT_SYMBOL(dev_get_by_index);
5054 EXPORT_SYMBOL(dev_get_by_name);
5055 EXPORT_SYMBOL(dev_open);
5056 EXPORT_SYMBOL(dev_queue_xmit);
5057 EXPORT_SYMBOL(dev_remove_pack);
5058 EXPORT_SYMBOL(dev_set_allmulti);
5059 EXPORT_SYMBOL(dev_set_promiscuity);
5060 EXPORT_SYMBOL(dev_change_flags);
5061 EXPORT_SYMBOL(dev_set_mtu);
5062 EXPORT_SYMBOL(dev_set_mac_address);
5063 EXPORT_SYMBOL(free_netdev);
5064 EXPORT_SYMBOL(netdev_boot_setup_check);
5065 EXPORT_SYMBOL(netdev_set_master);
5066 EXPORT_SYMBOL(netdev_state_change);
5067 EXPORT_SYMBOL(netif_receive_skb);
5068 EXPORT_SYMBOL(netif_rx);
5069 EXPORT_SYMBOL(register_gifconf);
5070 EXPORT_SYMBOL(register_netdevice);
5071 EXPORT_SYMBOL(register_netdevice_notifier);
5072 EXPORT_SYMBOL(skb_checksum_help);
5073 EXPORT_SYMBOL(synchronize_net);
5074 EXPORT_SYMBOL(unregister_netdevice);
5075 EXPORT_SYMBOL(unregister_netdevice_notifier);
5076 EXPORT_SYMBOL(net_enable_timestamp);
5077 EXPORT_SYMBOL(net_disable_timestamp);
5078 EXPORT_SYMBOL(dev_get_flags);
5079
5080 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
5081 EXPORT_SYMBOL(br_handle_frame_hook);
5082 EXPORT_SYMBOL(br_fdb_get_hook);
5083 EXPORT_SYMBOL(br_fdb_put_hook);
5084 #endif
5085
5086 EXPORT_SYMBOL(dev_load);
5087
5088 EXPORT_PER_CPU_SYMBOL(softnet_data);