]> git.karo-electronics.de Git - mv-sheeva.git/blobdiff - drivers/net/bonding/bond_main.c
Merge branch 'master' into csb1725
[mv-sheeva.git] / drivers / net / bonding / bond_main.c
index e953c6ad6e6d1ea3fd7e22fddc8f5f27ba1f8b38..3b16c34ed86e93ddfacd0338ef42f687ab70fce7 100644 (file)
@@ -76,6 +76,7 @@
 #include <linux/if_vlan.h>
 #include <linux/if_bonding.h>
 #include <linux/jiffies.h>
+#include <linux/preempt.h>
 #include <net/route.h>
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
@@ -109,6 +110,7 @@ static char *arp_validate;
 static char *fail_over_mac;
 static int all_slaves_active = 0;
 static struct bond_params bonding_defaults;
+static int resend_igmp = BOND_DEFAULT_RESEND_IGMP;
 
 module_param(max_bonds, int, 0);
 MODULE_PARM_DESC(max_bonds, "Max number of bonded devices");
@@ -163,9 +165,15 @@ module_param(all_slaves_active, int, 0);
 MODULE_PARM_DESC(all_slaves_active, "Keep all frames received on an interface"
                                     "by setting active flag for all slaves.  "
                                     "0 for never (default), 1 for always.");
+module_param(resend_igmp, int, 0);
+MODULE_PARM_DESC(resend_igmp, "Number of IGMP membership reports to send on link failure");
 
 /*----------------------------- Global variables ----------------------------*/
 
+#ifdef CONFIG_NET_POLL_CONTROLLER
+atomic_t netpoll_block_tx = ATOMIC_INIT(0);
+#endif
+
 static const char * const version =
        DRV_DESCRIPTION ": v" DRV_VERSION " (" DRV_RELDATE ")\n";
 
@@ -176,9 +184,6 @@ static int arp_ip_count;
 static int bond_mode   = BOND_MODE_ROUNDROBIN;
 static int xmit_hashtype = BOND_XMIT_POLICY_LAYER2;
 static int lacp_fast;
-#ifdef CONFIG_NET_POLL_CONTROLLER
-static int disable_netpoll = 1;
-#endif
 
 const struct bond_parm_tbl bond_lacp_tbl[] = {
 {      "slow",         AD_LACP_SLOW},
@@ -307,6 +312,7 @@ static int bond_del_vlan(struct bonding *bond, unsigned short vlan_id)
 
        pr_debug("bond: %s, vlan id %d\n", bond->dev->name, vlan_id);
 
+       block_netpoll_tx();
        write_lock_bh(&bond->lock);
 
        list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
@@ -341,6 +347,7 @@ static int bond_del_vlan(struct bonding *bond, unsigned short vlan_id)
 
 out:
        write_unlock_bh(&bond->lock);
+       unblock_netpoll_tx();
        return res;
 }
 
@@ -411,46 +418,19 @@ struct vlan_entry *bond_next_vlan(struct bonding *bond, struct vlan_entry *curr)
  * @bond: bond device that got this skb for tx.
  * @skb: hw accel VLAN tagged skb to transmit
  * @slave_dev: slave that is supposed to xmit this skbuff
- *
- * When the bond gets an skb to transmit that is
- * already hardware accelerated VLAN tagged, and it
- * needs to relay this skb to a slave that is not
- * hw accel capable, the skb needs to be "unaccelerated",
- * i.e. strip the hwaccel tag and re-insert it as part
- * of the payload.
  */
 int bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb,
                        struct net_device *slave_dev)
 {
-       unsigned short uninitialized_var(vlan_id);
-
-       /* Test vlan_list not vlgrp to catch and handle 802.1p tags */
-       if (!list_empty(&bond->vlan_list) &&
-           !(slave_dev->features & NETIF_F_HW_VLAN_TX) &&
-           vlan_get_tag(skb, &vlan_id) == 0) {
-               skb->dev = slave_dev;
-               skb = vlan_put_tag(skb, vlan_id);
-               if (!skb) {
-                       /* vlan_put_tag() frees the skb in case of error,
-                        * so return success here so the calling functions
-                        * won't attempt to free is again.
-                        */
-                       return 0;
-               }
-       } else {
-               skb->dev = slave_dev;
-       }
-
+       skb->dev = slave_dev;
        skb->priority = 1;
 #ifdef CONFIG_NET_POLL_CONTROLLER
        if (unlikely(bond->dev->priv_flags & IFF_IN_NETPOLL)) {
                struct netpoll *np = bond->dev->npinfo->netpoll;
                slave_dev->npinfo = bond->dev->npinfo;
-               np->real_dev = np->dev = skb->dev;
                slave_dev->priv_flags |= IFF_IN_NETPOLL;
-               netpoll_send_skb(np, skb);
+               netpoll_send_skb_on_dev(np, skb, slave_dev);
                slave_dev->priv_flags &= ~IFF_IN_NETPOLL;
-               np->dev = bond->dev;
        } else
 #endif
                dev_queue_xmit(skb);
@@ -488,9 +468,9 @@ static void bond_vlan_rx_register(struct net_device *bond_dev,
        struct slave *slave;
        int i;
 
-       write_lock(&bond->lock);
+       write_lock_bh(&bond->lock);
        bond->vlgrp = grp;
-       write_unlock(&bond->lock);
+       write_unlock_bh(&bond->lock);
 
        bond_for_each_slave(bond, slave, i) {
                struct net_device *slave_dev = slave->dev;
@@ -865,26 +845,61 @@ static void bond_mc_del(struct bonding *bond, void *addr)
 }
 
 
-/*
- * Retrieve the list of registered multicast addresses for the bonding
- * device and retransmit an IGMP JOIN request to the current active
- * slave.
- */
-static void bond_resend_igmp_join_requests(struct bonding *bond)
+static void __bond_resend_igmp_join_requests(struct net_device *dev)
 {
        struct in_device *in_dev;
        struct ip_mc_list *im;
 
        rcu_read_lock();
-       in_dev = __in_dev_get_rcu(bond->dev);
+       in_dev = __in_dev_get_rcu(dev);
        if (in_dev) {
+               read_lock(&in_dev->mc_list_lock);
                for (im = in_dev->mc_list; im; im = im->next)
                        ip_mc_rejoin_group(im);
+               read_unlock(&in_dev->mc_list_lock);
        }
 
        rcu_read_unlock();
 }
 
+/*
+ * Retrieve the list of registered multicast addresses for the bonding
+ * device and retransmit an IGMP JOIN request to the current active
+ * slave.
+ */
+static void bond_resend_igmp_join_requests(struct bonding *bond)
+{
+       struct net_device *vlan_dev;
+       struct vlan_entry *vlan;
+
+       read_lock(&bond->lock);
+
+       /* rejoin all groups on bond device */
+       __bond_resend_igmp_join_requests(bond->dev);
+
+       /* rejoin all groups on vlan devices */
+       if (bond->vlgrp) {
+               list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
+                       vlan_dev = vlan_group_get_device(bond->vlgrp,
+                                                        vlan->vlan_id);
+                       if (vlan_dev)
+                               __bond_resend_igmp_join_requests(vlan_dev);
+               }
+       }
+
+       if (--bond->igmp_retrans > 0)
+               queue_delayed_work(bond->wq, &bond->mcast_work, HZ/5);
+
+       read_unlock(&bond->lock);
+}
+
+static void bond_resend_igmp_join_requests_delayed(struct work_struct *work)
+{
+       struct bonding *bond = container_of(work, struct bonding,
+                                                       mcast_work.work);
+       bond_resend_igmp_join_requests(bond);
+}
+
 /*
  * flush all members of flush->mc_list from device dev->mc_list
  */
@@ -944,7 +959,6 @@ static void bond_mc_swap(struct bonding *bond, struct slave *new_active,
 
                netdev_for_each_mc_addr(ha, bond->dev)
                        dev_mc_add(new_active->dev, ha->addr);
-               bond_resend_igmp_join_requests(bond);
        }
 }
 
@@ -1164,11 +1178,13 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
                                bond_do_fail_over_mac(bond, new_active,
                                                      old_active);
 
-                       bond->send_grat_arp = bond->params.num_grat_arp;
-                       bond_send_gratuitous_arp(bond);
+                       if (netif_running(bond->dev)) {
+                               bond->send_grat_arp = bond->params.num_grat_arp;
+                               bond_send_gratuitous_arp(bond);
 
-                       bond->send_unsol_na = bond->params.num_unsol_na;
-                       bond_send_unsolicited_na(bond);
+                               bond->send_unsol_na = bond->params.num_unsol_na;
+                               bond_send_unsolicited_na(bond);
+                       }
 
                        write_unlock_bh(&bond->curr_slave_lock);
                        read_unlock(&bond->lock);
@@ -1180,9 +1196,13 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
                }
        }
 
-       /* resend IGMP joins since all were sent on curr_active_slave */
-       if (bond->params.mode == BOND_MODE_ROUNDROBIN) {
-               bond_resend_igmp_join_requests(bond);
+       /* resend IGMP joins since active slave has changed or
+        * all were sent on curr_active_slave */
+       if (((USES_PRIMARY(bond->params.mode) && new_active) ||
+            bond->params.mode == BOND_MODE_ROUNDROBIN) &&
+           netif_running(bond->dev)) {
+               bond->igmp_retrans = bond->params.resend_igmp;
+               queue_delayed_work(bond->wq, &bond->mcast_work, 0);
        }
 }
 
@@ -1294,9 +1314,14 @@ static bool slaves_support_netpoll(struct net_device *bond_dev)
 
 static void bond_poll_controller(struct net_device *bond_dev)
 {
-       struct net_device *dev = bond_dev->npinfo->netpoll->real_dev;
-       if (dev != bond_dev)
-               netpoll_poll_dev(dev);
+       struct bonding *bond = netdev_priv(bond_dev);
+       struct slave *slave;
+       int i;
+
+       bond_for_each_slave(bond, slave, i) {
+               if (slave->dev && IS_UP(slave->dev))
+                       netpoll_poll_dev(slave->dev);
+       }
 }
 
 static void bond_netpoll_cleanup(struct net_device *bond_dev)
@@ -1529,7 +1554,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 
        /* If this is the first slave, then we need to set the master's hardware
         * address to be the same as the slave's. */
-       if (bond->slave_cnt == 0)
+       if (is_zero_ether_addr(bond->dev->dev_addr))
                memcpy(bond->dev->dev_addr, slave_dev->dev_addr,
                       slave_dev->addr_len);
 
@@ -1763,23 +1788,15 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
        bond_set_carrier(bond);
 
 #ifdef CONFIG_NET_POLL_CONTROLLER
-       /*
-        * Netpoll and bonding is broken, make sure it is not initialized
-        * until it is fixed.
-        */
-       if (disable_netpoll) {
+       if (slaves_support_netpoll(bond_dev)) {
+               bond_dev->priv_flags &= ~IFF_DISABLE_NETPOLL;
+               if (bond_dev->npinfo)
+                       slave_dev->npinfo = bond_dev->npinfo;
+       } else if (!(bond_dev->priv_flags & IFF_DISABLE_NETPOLL)) {
                bond_dev->priv_flags |= IFF_DISABLE_NETPOLL;
-       } else {
-               if (slaves_support_netpoll(bond_dev)) {
-                       bond_dev->priv_flags &= ~IFF_DISABLE_NETPOLL;
-                       if (bond_dev->npinfo)
-                               slave_dev->npinfo = bond_dev->npinfo;
-               } else if (!(bond_dev->priv_flags & IFF_DISABLE_NETPOLL)) {
-                       bond_dev->priv_flags |= IFF_DISABLE_NETPOLL;
-                       pr_info("New slave device %s does not support netpoll\n",
-                               slave_dev->name);
-                       pr_info("Disabling netpoll support for %s\n", bond_dev->name);
-               }
+               pr_info("New slave device %s does not support netpoll\n",
+                       slave_dev->name);
+               pr_info("Disabling netpoll support for %s\n", bond_dev->name);
        }
 #endif
        read_unlock(&bond->lock);
@@ -1851,6 +1868,7 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev)
                return -EINVAL;
        }
 
+       block_netpoll_tx();
        netdev_bonding_change(bond_dev, NETDEV_BONDING_DESLAVE);
        write_lock_bh(&bond->lock);
 
@@ -1860,6 +1878,7 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev)
                pr_info("%s: %s not enslaved\n",
                        bond_dev->name, slave_dev->name);
                write_unlock_bh(&bond->lock);
+               unblock_netpoll_tx();
                return -EINVAL;
        }
 
@@ -1953,6 +1972,7 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev)
        }
 
        write_unlock_bh(&bond->lock);
+       unblock_netpoll_tx();
 
        /* must do this from outside any spinlocks */
        bond_destroy_slave_symlinks(bond_dev, slave_dev);
@@ -1983,10 +2003,8 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev)
 #ifdef CONFIG_NET_POLL_CONTROLLER
        read_lock_bh(&bond->lock);
 
-        /* Make sure netpoll over stays disabled until fixed. */
-       if (!disable_netpoll)
-               if (slaves_support_netpoll(bond_dev))
-                               bond_dev->priv_flags &= ~IFF_DISABLE_NETPOLL;
+       if (slaves_support_netpoll(bond_dev))
+               bond_dev->priv_flags &= ~IFF_DISABLE_NETPOLL;
        read_unlock_bh(&bond->lock);
        if (slave_dev->netdev_ops->ndo_netpoll_cleanup)
                slave_dev->netdev_ops->ndo_netpoll_cleanup(slave_dev);
@@ -2019,8 +2037,8 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev)
 * First release a slave and than destroy the bond if no more slaves are left.
 * Must be under rtnl_lock when this function is called.
 */
-int  bond_release_and_destroy(struct net_device *bond_dev,
-                             struct net_device *slave_dev)
+static int  bond_release_and_destroy(struct net_device *bond_dev,
+                                    struct net_device *slave_dev)
 {
        struct bonding *bond = netdev_priv(bond_dev);
        int ret;
@@ -2142,7 +2160,6 @@ static int bond_release_all(struct net_device *bond_dev)
 
 out:
        write_unlock_bh(&bond->lock);
-
        return 0;
 }
 
@@ -2191,9 +2208,11 @@ static int bond_ioctl_change_active(struct net_device *bond_dev, struct net_devi
            (old_active) &&
            (new_active->link == BOND_LINK_UP) &&
            IS_UP(new_active->dev)) {
+               block_netpoll_tx();
                write_lock_bh(&bond->curr_slave_lock);
                bond_change_active_slave(bond, new_active);
                write_unlock_bh(&bond->curr_slave_lock);
+               unblock_netpoll_tx();
        } else
                res = -EINVAL;
 
@@ -2368,8 +2387,11 @@ static void bond_miimon_commit(struct bonding *bond)
                                slave->state = BOND_STATE_BACKUP;
                        }
 
-                       pr_info("%s: link status definitely up for interface %s.\n",
-                               bond->dev->name, slave->dev->name);
+                       bond_update_speed_duplex(slave);
+
+                       pr_info("%s: link status definitely up for interface %s, %d Mbps %s duplex.\n",
+                               bond->dev->name, slave->dev->name,
+                               slave->speed, slave->duplex ? "full" : "half");
 
                        /* notify ad that the link status has changed */
                        if (bond->params.mode == BOND_MODE_8023AD)
@@ -2422,9 +2444,11 @@ static void bond_miimon_commit(struct bonding *bond)
 
 do_failover:
                ASSERT_RTNL();
+               block_netpoll_tx();
                write_lock_bh(&bond->curr_slave_lock);
                bond_select_active_slave(bond);
                write_unlock_bh(&bond->curr_slave_lock);
+               unblock_netpoll_tx();
        }
 
        bond_set_carrier(bond);
@@ -2867,11 +2891,13 @@ void bond_loadbalance_arp_mon(struct work_struct *work)
        }
 
        if (do_failover) {
+               block_netpoll_tx();
                write_lock_bh(&bond->curr_slave_lock);
 
                bond_select_active_slave(bond);
 
                write_unlock_bh(&bond->curr_slave_lock);
+               unblock_netpoll_tx();
        }
 
 re_arm:
@@ -3030,9 +3056,11 @@ static void bond_ab_arp_commit(struct bonding *bond, int delta_in_ticks)
 
 do_failover:
                ASSERT_RTNL();
+               block_netpoll_tx();
                write_lock_bh(&bond->curr_slave_lock);
                bond_select_active_slave(bond);
                write_unlock_bh(&bond->curr_slave_lock);
+               unblock_netpoll_tx();
        }
 
        bond_set_carrier(bond);
@@ -3312,6 +3340,8 @@ static void bond_info_show_slave(struct seq_file *seq,
        seq_printf(seq, "\nSlave Interface: %s\n", slave->dev->name);
        seq_printf(seq, "MII Status: %s\n",
                   (slave->link == BOND_LINK_UP) ?  "up" : "down");
+       seq_printf(seq, "Speed: %d Mbps\n", slave->speed);
+       seq_printf(seq, "Duplex: %s\n", slave->duplex ? "full" : "half");
        seq_printf(seq, "Link Failure Count: %u\n",
                   slave->link_failure_count);
 
@@ -3744,6 +3774,8 @@ static int bond_open(struct net_device *bond_dev)
 
        bond->kill_timers = 0;
 
+       INIT_DELAYED_WORK(&bond->mcast_work, bond_resend_igmp_join_requests_delayed);
+
        if (bond_is_lb(bond)) {
                /* bond_alb_initialize must be called before the timer
                 * is started.
@@ -3828,6 +3860,8 @@ static int bond_close(struct net_device *bond_dev)
                break;
        }
 
+       if (delayed_work_pending(&bond->mcast_work))
+               cancel_delayed_work(&bond->mcast_work);
 
        if (bond_is_lb(bond)) {
                /* Must be called only after all
@@ -4514,6 +4548,13 @@ static netdev_tx_t bond_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
        struct bonding *bond = netdev_priv(dev);
 
+       /*
+        * If we risk deadlock from transmitting this in the
+        * netpoll path, tell netpoll to queue the frame for later tx
+        */
+       if (is_netpoll_tx_blocked(dev))
+               return NETDEV_TX_BUSY;
+
        if (TX_QUEUE_OVERRIDE(bond->params.mode)) {
                if (!bond_slave_override(bond, skb))
                        return NETDEV_TX_OK;
@@ -4678,6 +4719,10 @@ static void bond_setup(struct net_device *bond_dev)
                               NETIF_F_HW_VLAN_RX |
                               NETIF_F_HW_VLAN_FILTER);
 
+       /* By default, we enable GRO on bonding devices.
+        * Actual support requires lowlevel drivers are GRO ready.
+        */
+       bond_dev->features |= NETIF_F_GRO;
 }
 
 static void bond_work_cancel_all(struct bonding *bond)
@@ -4699,6 +4744,9 @@ static void bond_work_cancel_all(struct bonding *bond)
        if (bond->params.mode == BOND_MODE_8023AD &&
            delayed_work_pending(&bond->ad_work))
                cancel_delayed_work(&bond->ad_work);
+
+       if (delayed_work_pending(&bond->mcast_work))
+               cancel_delayed_work(&bond->mcast_work);
 }
 
 /*
@@ -4891,6 +4939,13 @@ static int bond_check_params(struct bond_params *params)
                all_slaves_active = 0;
        }
 
+       if (resend_igmp < 0 || resend_igmp > 255) {
+               pr_warning("Warning: resend_igmp (%d) should be between "
+                          "0 and 255, resetting to %d\n",
+                          resend_igmp, BOND_DEFAULT_RESEND_IGMP);
+               resend_igmp = BOND_DEFAULT_RESEND_IGMP;
+       }
+
        /* reset values for TLB/ALB */
        if ((bond_mode == BOND_MODE_TLB) ||
            (bond_mode == BOND_MODE_ALB)) {
@@ -5063,6 +5118,7 @@ static int bond_check_params(struct bond_params *params)
        params->fail_over_mac = fail_over_mac_value;
        params->tx_queues = tx_queues;
        params->all_slaves_active = all_slaves_active;
+       params->resend_igmp = resend_igmp;
 
        if (primary) {
                strncpy(params->primary, primary, IFNAMSIZ);
@@ -5239,6 +5295,7 @@ static int __init bonding_init(void)
        if (res)
                goto err;
 
+
        register_netdevice_notifier(&bond_netdev_notifier);
        register_inetaddr_notifier(&bond_inetaddr_notifier);
        bond_register_ipv6_notifier();
@@ -5262,6 +5319,13 @@ static void __exit bonding_exit(void)
 
        rtnl_link_unregister(&bond_link_ops);
        unregister_pernet_subsys(&bond_net_ops);
+
+#ifdef CONFIG_NET_POLL_CONTROLLER
+       /*
+        * Make sure we don't have an imbalance on our netpoll blocking
+        */
+       WARN_ON(atomic_read(&netpoll_block_tx));
+#endif
 }
 
 module_init(bonding_init);