]> git.karo-electronics.de Git - karo-tx-linux.git/commitdiff
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
authorDavid S. Miller <davem@davemloft.net>
Sat, 24 Oct 2015 13:54:12 +0000 (06:54 -0700)
committerDavid S. Miller <davem@davemloft.net>
Sat, 24 Oct 2015 13:54:12 +0000 (06:54 -0700)
Conflicts:
net/ipv6/xfrm6_output.c
net/openvswitch/flow_netlink.c
net/openvswitch/vport-gre.c
net/openvswitch/vport-vxlan.c
net/openvswitch/vport.c
net/openvswitch/vport.h

The openvswitch conflicts were overlapping changes.  One was
the egress tunnel info fix in 'net' and the other was the
vport ->send() op simplification in 'net-next'.

The xfrm6_output.c conflicts was also a simplification
overlapping a bug fix.

Signed-off-by: David S. Miller <davem@davemloft.net>
37 files changed:
1  2 
Documentation/devicetree/bindings/net/cpsw.txt
drivers/net/ethernet/amd/xgbe/xgbe-dev.c
drivers/net/ethernet/amd/xgbe/xgbe-drv.c
drivers/net/ethernet/broadcom/bcm63xx_enet.c
drivers/net/ethernet/intel/i40e/i40e_ethtool.c
drivers/net/ethernet/intel/i40e/i40e_main.c
drivers/net/ethernet/marvell/mv643xx_eth.c
drivers/net/ethernet/ti/cpsw.c
drivers/net/phy/Kconfig
drivers/net/phy/Makefile
drivers/net/usb/qmi_wwan.c
drivers/net/vxlan.c
drivers/net/xen-netfront.c
include/linux/netdevice.h
include/uapi/linux/openvswitch.h
net/core/dev.c
net/ipv4/netfilter/ipt_rpfilter.c
net/ipv4/tcp_output.c
net/ipv4/xfrm4_output.c
net/ipv6/ip6_fib.c
net/ipv6/ip6_output.c
net/ipv6/route.c
net/ipv6/xfrm6_output.c
net/ipv6/xfrm6_policy.c
net/netfilter/core.c
net/openvswitch/actions.c
net/openvswitch/conntrack.c
net/openvswitch/datapath.c
net/openvswitch/flow_netlink.c
net/openvswitch/vport-geneve.c
net/openvswitch/vport-gre.c
net/openvswitch/vport-internal_dev.c
net/openvswitch/vport-vxlan.c
net/openvswitch/vport.c
net/openvswitch/vport.h
net/tipc/msg.c
net/tipc/udp_media.c

index 676ecf62491d56f5cee714acd2e7f872914facee,a2cae4eb4a60a38c83059c66934e6462ca5a2c6c..4efca560adda4b22f0498f123f053650e4427da5
@@@ -30,13 -30,6 +30,13 @@@ Optional properties
  - dual_emac           : Specifies Switch to act as Dual EMAC
  - syscon              : Phandle to the system control device node, which is
                          the control module device of the am33x
 +- mode-gpios          : Should be added if one/multiple gpio lines are
 +                        required to be driven so that cpsw data lines
 +                        can be connected to the phy via selective mux.
 +                        For example in dra72x-evm, pcf gpio has to be
 +                        driven low so that cpsw slave 0 and phy data
 +                        lines are connected via mux.
 +
  
  Slave Properties:
  Required properties:
@@@ -46,6 -39,7 +46,7 @@@
  Optional properties:
  - dual_emac_res_vlan  : Specifies VID to be used to segregate the ports
  - mac-address         : See ethernet.txt file in the same directory
+ - phy-handle          : See ethernet.txt file in the same directory
  
  Note: "ti,hwmods" field is used to fetch the base address and irq
  resources from TI, omap hwmod data base during device registration.
index 45512242baea58caaeedf5bd3732c4f6dd85afeb,e9ab8b9f3b9cb8835f7df479a75f9e94666d608b..112f1bc8bceef908ddab996860e86847e3b610b6
@@@ -1595,7 -1595,7 +1595,7 @@@ static void xgbe_dev_xmit(struct xgbe_c
                                  packet->rdesc_count, 1);
  
        /* Make sure ownership is written to the descriptor */
-       dma_wmb();
+       wmb();
  
        ring->cur = cur_index + 1;
        if (!packet->skb->xmit_more ||
@@@ -1940,31 -1940,84 +1940,31 @@@ static void xgbe_config_mtl_mode(struc
  static unsigned int xgbe_calculate_per_queue_fifo(unsigned int fifo_size,
                                                  unsigned int queue_count)
  {
 -      unsigned int q_fifo_size = 0;
 -      enum xgbe_mtl_fifo_size p_fifo = XGMAC_MTL_FIFO_SIZE_256;
 +      unsigned int q_fifo_size;
 +      unsigned int p_fifo;
  
 -      /* Calculate Tx/Rx fifo share per queue */
 -      switch (fifo_size) {
 -      case 0:
 -              q_fifo_size = XGBE_FIFO_SIZE_B(128);
 -              break;
 -      case 1:
 -              q_fifo_size = XGBE_FIFO_SIZE_B(256);
 -              break;
 -      case 2:
 -              q_fifo_size = XGBE_FIFO_SIZE_B(512);
 -              break;
 -      case 3:
 -              q_fifo_size = XGBE_FIFO_SIZE_KB(1);
 -              break;
 -      case 4:
 -              q_fifo_size = XGBE_FIFO_SIZE_KB(2);
 -              break;
 -      case 5:
 -              q_fifo_size = XGBE_FIFO_SIZE_KB(4);
 -              break;
 -      case 6:
 -              q_fifo_size = XGBE_FIFO_SIZE_KB(8);
 -              break;
 -      case 7:
 -              q_fifo_size = XGBE_FIFO_SIZE_KB(16);
 -              break;
 -      case 8:
 -              q_fifo_size = XGBE_FIFO_SIZE_KB(32);
 -              break;
 -      case 9:
 -              q_fifo_size = XGBE_FIFO_SIZE_KB(64);
 -              break;
 -      case 10:
 -              q_fifo_size = XGBE_FIFO_SIZE_KB(128);
 -              break;
 -      case 11:
 -              q_fifo_size = XGBE_FIFO_SIZE_KB(256);
 -              break;
 -      }
 +      /* Calculate the configured fifo size */
 +      q_fifo_size = 1 << (fifo_size + 7);
  
 -      /* The configured value is not the actual amount of fifo RAM */
 +      /* The configured value may not be the actual amount of fifo RAM */
        q_fifo_size = min_t(unsigned int, XGBE_FIFO_MAX, q_fifo_size);
  
        q_fifo_size = q_fifo_size / queue_count;
  
 -      /* Set the queue fifo size programmable value */
 -      if (q_fifo_size >= XGBE_FIFO_SIZE_KB(256))
 -              p_fifo = XGMAC_MTL_FIFO_SIZE_256K;
 -      else if (q_fifo_size >= XGBE_FIFO_SIZE_KB(128))
 -              p_fifo = XGMAC_MTL_FIFO_SIZE_128K;
 -      else if (q_fifo_size >= XGBE_FIFO_SIZE_KB(64))
 -              p_fifo = XGMAC_MTL_FIFO_SIZE_64K;
 -      else if (q_fifo_size >= XGBE_FIFO_SIZE_KB(32))
 -              p_fifo = XGMAC_MTL_FIFO_SIZE_32K;
 -      else if (q_fifo_size >= XGBE_FIFO_SIZE_KB(16))
 -              p_fifo = XGMAC_MTL_FIFO_SIZE_16K;
 -      else if (q_fifo_size >= XGBE_FIFO_SIZE_KB(8))
 -              p_fifo = XGMAC_MTL_FIFO_SIZE_8K;
 -      else if (q_fifo_size >= XGBE_FIFO_SIZE_KB(4))
 -              p_fifo = XGMAC_MTL_FIFO_SIZE_4K;
 -      else if (q_fifo_size >= XGBE_FIFO_SIZE_KB(2))
 -              p_fifo = XGMAC_MTL_FIFO_SIZE_2K;
 -      else if (q_fifo_size >= XGBE_FIFO_SIZE_KB(1))
 -              p_fifo = XGMAC_MTL_FIFO_SIZE_1K;
 -      else if (q_fifo_size >= XGBE_FIFO_SIZE_B(512))
 -              p_fifo = XGMAC_MTL_FIFO_SIZE_512;
 -      else if (q_fifo_size >= XGBE_FIFO_SIZE_B(256))
 -              p_fifo = XGMAC_MTL_FIFO_SIZE_256;
 +      /* Each increment in the queue fifo size represents 256 bytes of
 +       * fifo, with 0 representing 256 bytes. Distribute the fifo equally
 +       * between the queues.
 +       */
 +      p_fifo = q_fifo_size / 256;
 +      if (p_fifo)
 +              p_fifo--;
  
        return p_fifo;
  }
  
  static void xgbe_config_tx_fifo_size(struct xgbe_prv_data *pdata)
  {
 -      enum xgbe_mtl_fifo_size fifo_size;
 +      unsigned int fifo_size;
        unsigned int i;
  
        fifo_size = xgbe_calculate_per_queue_fifo(pdata->hw_feat.tx_fifo_size,
  
  static void xgbe_config_rx_fifo_size(struct xgbe_prv_data *pdata)
  {
 -      enum xgbe_mtl_fifo_size fifo_size;
 +      unsigned int fifo_size;
        unsigned int i;
  
        fifo_size = xgbe_calculate_per_queue_fifo(pdata->hw_feat.rx_fifo_size,
@@@ -2171,7 -2224,7 +2171,7 @@@ static u64 xgbe_mmc_read(struct xgbe_pr
  
        default:
                read_hi = false;
 -      };
 +      }
  
        val = XGMAC_IOREAD(pdata, reg_lo);
  
index 49f796aaad4f1cce23438023de0d649ac03488ea,d2b77d985441f2b1e82889db57eaca6ae133cc55..cff8940e169409d567d4f4f9fa696ba446a1d477
@@@ -360,9 -360,6 +360,9 @@@ static irqreturn_t xgbe_isr(int irq, vo
                        }
                }
  
 +              if (XGMAC_GET_BITS(dma_ch_isr, DMA_CH_SR, RBU))
 +                      pdata->ext_stats.rx_buffer_unavailable++;
 +
                /* Restart the device on a Fatal Bus Error */
                if (XGMAC_GET_BITS(dma_ch_isr, DMA_CH_SR, FBE))
                        schedule_work(&pdata->restart_work);
                                /* Read Tx Timestamp to clear interrupt */
                                pdata->tx_tstamp =
                                        hw_if->get_tx_tstamp(pdata);
 -                              schedule_work(&pdata->tx_tstamp_work);
 +                              queue_work(pdata->dev_workqueue,
 +                                         &pdata->tx_tstamp_work);
                        }
                }
        }
@@@ -454,7 -450,7 +454,7 @@@ static void xgbe_service_timer(unsigne
  {
        struct xgbe_prv_data *pdata = (struct xgbe_prv_data *)data;
  
 -      schedule_work(&pdata->service_work);
 +      queue_work(pdata->dev_workqueue, &pdata->service_work);
  
        mod_timer(&pdata->service_timer, jiffies + HZ);
  }
@@@ -895,7 -891,7 +895,7 @@@ static int xgbe_start(struct xgbe_prv_d
        netif_tx_start_all_queues(netdev);
  
        xgbe_start_timers(pdata);
 -      schedule_work(&pdata->service_work);
 +      queue_work(pdata->dev_workqueue, &pdata->service_work);
  
        DBGPR("<--xgbe_start\n");
  
@@@ -1811,6 -1807,7 +1811,7 @@@ static int xgbe_tx_poll(struct xgbe_cha
        struct netdev_queue *txq;
        int processed = 0;
        unsigned int tx_packets = 0, tx_bytes = 0;
+       unsigned int cur;
  
        DBGPR("-->xgbe_tx_poll\n");
  
        if (!ring)
                return 0;
  
+       cur = ring->cur;
        txq = netdev_get_tx_queue(netdev, channel->queue_index);
  
        while ((processed < XGBE_TX_DESC_MAX_PROC) &&
-              (ring->dirty != ring->cur)) {
+              (ring->dirty != cur)) {
                rdata = XGBE_GET_DESC_DATA(ring, ring->dirty);
                rdesc = rdata->rdesc;
  
index 95af75d35bc59145b2e89ba717c6a784485e34c9,4183c2abeeeb2dc206f2ca4aa90d88279e2eb6d9..8b1929e9f698c4d8da22573b483795af1bf5bc5b
@@@ -1333,6 -1333,7 +1333,6 @@@ static void bcm_enet_get_drvinfo(struc
                sizeof(drvinfo->version));
        strlcpy(drvinfo->fw_version, "N/A", sizeof(drvinfo->fw_version));
        strlcpy(drvinfo->bus_info, "bcm63xx", sizeof(drvinfo->bus_info));
 -      drvinfo->n_stats = BCM_ENET_STATS_LEN;
  }
  
  static int bcm_enet_get_sset_count(struct net_device *netdev,
@@@ -2048,7 -2049,7 +2048,7 @@@ static void swphy_poll_timer(unsigned l
  
        for (i = 0; i < priv->num_ports; i++) {
                struct bcm63xx_enetsw_port *port;
-               int val, j, up, advertise, lpa, lpa2, speed, duplex, media;
+               int val, j, up, advertise, lpa, speed, duplex, media;
                int external_phy = bcm_enet_port_is_rgmii(i);
                u8 override;
  
                lpa = bcmenet_sw_mdio_read(priv, external_phy, port->phy_id,
                                           MII_LPA);
  
-               lpa2 = bcmenet_sw_mdio_read(priv, external_phy, port->phy_id,
-                                           MII_STAT1000);
                /* figure out media and duplex from advertise and LPA values */
                media = mii_nway_result(lpa & advertise);
                duplex = (media & ADVERTISE_FULL) ? 1 : 0;
-               if (lpa2 & LPA_1000FULL)
-                       duplex = 1;
-               if (lpa2 & (LPA_1000FULL | LPA_1000HALF))
-                       speed = 1000;
-               else {
-                       if (media & (ADVERTISE_100FULL | ADVERTISE_100HALF))
-                               speed = 100;
-                       else
-                               speed = 10;
+               if (media & (ADVERTISE_100FULL | ADVERTISE_100HALF))
+                       speed = 100;
+               else
+                       speed = 10;
+               if (val & BMSR_ESTATEN) {
+                       advertise = bcmenet_sw_mdio_read(priv, external_phy,
+                                               port->phy_id, MII_CTRL1000);
+                       lpa = bcmenet_sw_mdio_read(priv, external_phy,
+                                               port->phy_id, MII_STAT1000);
+                       if (advertise & (ADVERTISE_1000FULL | ADVERTISE_1000HALF)
+                                       && lpa & (LPA_1000FULL | LPA_1000HALF)) {
+                               speed = 1000;
+                               duplex = (lpa & LPA_1000FULL);
+                       }
                }
  
                dev_info(&priv->pdev->dev,
@@@ -2596,6 -2602,7 +2601,6 @@@ static void bcm_enetsw_get_drvinfo(stru
        strncpy(drvinfo->version, bcm_enet_driver_version, 32);
        strncpy(drvinfo->fw_version, "N/A", 32);
        strncpy(drvinfo->bus_info, "bcm63xx", 32);
 -      drvinfo->n_stats = BCM_ENETSW_STATS_LEN;
  }
  
  static void bcm_enetsw_get_ethtool_stats(struct net_device *netdev,
index 8fd26fdd77059e33336754590398d31920edfa46,13a5d4cf494bc76e98fd500afe856951703bf244..3f385ffe420f712abbda79b14c46e2b6196d8dc8
@@@ -87,9 -87,11 +87,9 @@@ static const struct i40e_stats i40e_gst
        I40E_VSI_STAT("rx_broadcast", eth_stats.rx_broadcast),
        I40E_VSI_STAT("tx_broadcast", eth_stats.tx_broadcast),
        I40E_VSI_STAT("rx_unknown_protocol", eth_stats.rx_unknown_protocol),
 +      I40E_VSI_STAT("tx_linearize", tx_linearize),
  };
  
 -static int i40e_add_fdir_ethtool(struct i40e_vsi *vsi,
 -                               struct ethtool_rxnfc *cmd);
 -
  /* These PF_STATs might look like duplicates of some NETDEV_STATs,
   * but they are separate.  This device supports Virtualization, and
   * as such might have several netdevs supporting VMDq and FCoE going
@@@ -227,12 -229,10 +227,12 @@@ static const char i40e_gstrings_test[][
  
  static const char i40e_priv_flags_strings[][ETH_GSTRING_LEN] = {
        "NPAR",
 +      "LinkPolling",
 +      "flow-director-atr",
 +      "veb-stats",
  };
  
 -#define I40E_PRIV_FLAGS_STR_LEN \
 -      (sizeof(i40e_priv_flags_strings) / ETH_GSTRING_LEN)
 +#define I40E_PRIV_FLAGS_STR_LEN ARRAY_SIZE(i40e_priv_flags_strings)
  
  /**
   * i40e_partition_setting_complaint - generic complaint for MFP restriction
@@@ -253,8 -253,7 +253,8 @@@ static void i40e_partition_setting_comp
   **/
  static void i40e_get_settings_link_up(struct i40e_hw *hw,
                                      struct ethtool_cmd *ecmd,
 -                                    struct net_device *netdev)
 +                                    struct net_device *netdev,
 +                                    struct i40e_pf *pf)
  {
        struct i40e_link_status *hw_link_info = &hw->phy.link_info;
        u32 link_speed = hw_link_info->link_speed;
        case I40E_PHY_TYPE_40GBASE_AOC:
                ecmd->supported = SUPPORTED_40000baseCR4_Full;
                break;
 -      case I40E_PHY_TYPE_40GBASE_KR4:
 -              ecmd->supported = SUPPORTED_Autoneg |
 -                                SUPPORTED_40000baseKR4_Full;
 -              ecmd->advertising = ADVERTISED_Autoneg |
 -                                  ADVERTISED_40000baseKR4_Full;
 -              break;
        case I40E_PHY_TYPE_40GBASE_SR4:
                ecmd->supported = SUPPORTED_40000baseSR4_Full;
                break;
        case I40E_PHY_TYPE_40GBASE_LR4:
                ecmd->supported = SUPPORTED_40000baseLR4_Full;
                break;
 -      case I40E_PHY_TYPE_20GBASE_KR2:
 -              ecmd->supported = SUPPORTED_Autoneg |
 -                                SUPPORTED_20000baseKR2_Full;
 -              ecmd->advertising = ADVERTISED_Autoneg |
 -                                  ADVERTISED_20000baseKR2_Full;
 -              break;
 -      case I40E_PHY_TYPE_10GBASE_KX4:
 -              ecmd->supported = SUPPORTED_Autoneg |
 -                                SUPPORTED_10000baseKX4_Full;
 -              ecmd->advertising = ADVERTISED_Autoneg |
 -                                  ADVERTISED_10000baseKX4_Full;
 -              break;
 -      case I40E_PHY_TYPE_10GBASE_KR:
 -              ecmd->supported = SUPPORTED_Autoneg |
 -                                SUPPORTED_10000baseKR_Full;
 -              ecmd->advertising = ADVERTISED_Autoneg |
 -                                  ADVERTISED_10000baseKR_Full;
 -              break;
        case I40E_PHY_TYPE_10GBASE_SR:
        case I40E_PHY_TYPE_10GBASE_LR:
        case I40E_PHY_TYPE_1000BASE_SX:
        case I40E_PHY_TYPE_1000BASE_LX:
 -              ecmd->supported = SUPPORTED_10000baseT_Full |
 -                                SUPPORTED_1000baseT_Full;
 +              ecmd->supported = SUPPORTED_10000baseT_Full;
 +              if (hw_link_info->module_type[2] &
 +                  I40E_MODULE_TYPE_1000BASE_SX ||
 +                  hw_link_info->module_type[2] &
 +                  I40E_MODULE_TYPE_1000BASE_LX) {
 +                      ecmd->supported |= SUPPORTED_1000baseT_Full;
 +                      if (hw_link_info->requested_speeds &
 +                          I40E_LINK_SPEED_1GB)
 +                              ecmd->advertising |= ADVERTISED_1000baseT_Full;
 +              }
                if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
                        ecmd->advertising |= ADVERTISED_10000baseT_Full;
 -              if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB)
 -                      ecmd->advertising |= ADVERTISED_1000baseT_Full;
 -              break;
 -      case I40E_PHY_TYPE_1000BASE_KX:
 -              ecmd->supported = SUPPORTED_Autoneg |
 -                                SUPPORTED_1000baseKX_Full;
 -              ecmd->advertising = ADVERTISED_Autoneg |
 -                                  ADVERTISED_1000baseKX_Full;
                break;
        case I40E_PHY_TYPE_10GBASE_T:
        case I40E_PHY_TYPE_1000BASE_T:
 -      case I40E_PHY_TYPE_100BASE_TX:
                ecmd->supported = SUPPORTED_Autoneg |
                                  SUPPORTED_10000baseT_Full |
 -                                SUPPORTED_1000baseT_Full |
 -                                SUPPORTED_100baseT_Full;
 +                                SUPPORTED_1000baseT_Full;
                ecmd->advertising = ADVERTISED_Autoneg;
                if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
                        ecmd->advertising |= ADVERTISED_10000baseT_Full;
                if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB)
                        ecmd->advertising |= ADVERTISED_1000baseT_Full;
 +              break;
 +      case I40E_PHY_TYPE_1000BASE_T_OPTICAL:
 +              ecmd->supported = SUPPORTED_Autoneg |
 +                                SUPPORTED_1000baseT_Full;
 +              ecmd->advertising = ADVERTISED_Autoneg |
 +                                  ADVERTISED_1000baseT_Full;
 +              break;
 +      case I40E_PHY_TYPE_100BASE_TX:
 +              ecmd->supported = SUPPORTED_Autoneg |
 +                                SUPPORTED_100baseT_Full;
                if (hw_link_info->requested_speeds & I40E_LINK_SPEED_100MB)
                        ecmd->advertising |= ADVERTISED_100baseT_Full;
                break;
                break;
        case I40E_PHY_TYPE_SGMII:
                ecmd->supported = SUPPORTED_Autoneg |
 -                                SUPPORTED_1000baseT_Full |
 -                                SUPPORTED_100baseT_Full;
 +                                SUPPORTED_1000baseT_Full;
                if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB)
                        ecmd->advertising |= ADVERTISED_1000baseT_Full;
 -              if (hw_link_info->requested_speeds & I40E_LINK_SPEED_100MB)
 -                      ecmd->advertising |= ADVERTISED_100baseT_Full;
 +              if (pf->hw.mac.type == I40E_MAC_X722) {
 +                      ecmd->supported |= SUPPORTED_100baseT_Full;
 +                      if (hw_link_info->requested_speeds &
 +                          I40E_LINK_SPEED_100MB)
 +                              ecmd->advertising |= ADVERTISED_100baseT_Full;
 +              }
 +              break;
 +      /* Backplane is set based on supported phy types in get_settings
 +       * so don't set anything here but don't warn either
 +       */
 +      case I40E_PHY_TYPE_40GBASE_KR4:
 +      case I40E_PHY_TYPE_20GBASE_KR2:
 +      case I40E_PHY_TYPE_10GBASE_KR:
 +      case I40E_PHY_TYPE_10GBASE_KX4:
 +      case I40E_PHY_TYPE_1000BASE_KX:
                break;
        default:
                /* if we got here and link is up something bad is afoot */
   * Reports link settings that can be determined when link is down
   **/
  static void i40e_get_settings_link_down(struct i40e_hw *hw,
 -                                      struct ethtool_cmd *ecmd)
 +                                      struct ethtool_cmd *ecmd,
 +                                      struct i40e_pf *pf)
  {
 -      struct i40e_link_status *hw_link_info = &hw->phy.link_info;
 +      enum i40e_aq_capabilities_phy_type phy_types = hw->phy.phy_types;
  
        /* link is down and the driver needs to fall back on
 -       * device ID to determine what kinds of info to display,
 -       * it's mostly a guess that may change when link is up
 +       * supported phy types to figure out what info to display
         */
 -      switch (hw->device_id) {
 -      case I40E_DEV_ID_QSFP_A:
 -      case I40E_DEV_ID_QSFP_B:
 -      case I40E_DEV_ID_QSFP_C:
 -              /* pluggable QSFP */
 -              ecmd->supported = SUPPORTED_40000baseSR4_Full |
 -                                SUPPORTED_40000baseCR4_Full |
 -                                SUPPORTED_40000baseLR4_Full;
 -              ecmd->advertising = ADVERTISED_40000baseSR4_Full |
 -                                  ADVERTISED_40000baseCR4_Full |
 -                                  ADVERTISED_40000baseLR4_Full;
 -              break;
 -      case I40E_DEV_ID_KX_B:
 -              /* backplane 40G */
 -              ecmd->supported = SUPPORTED_40000baseKR4_Full;
 -              ecmd->advertising = ADVERTISED_40000baseKR4_Full;
 -              break;
 -      case I40E_DEV_ID_KX_C:
 -              /* backplane 10G */
 -              ecmd->supported = SUPPORTED_10000baseKR_Full;
 -              ecmd->advertising = ADVERTISED_10000baseKR_Full;
 -              break;
 -      case I40E_DEV_ID_10G_BASE_T:
 -              ecmd->supported = SUPPORTED_10000baseT_Full |
 -                                SUPPORTED_1000baseT_Full |
 -                                SUPPORTED_100baseT_Full;
 -              /* Figure out what has been requested */
 -              if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
 -                      ecmd->advertising |= ADVERTISED_10000baseT_Full;
 -              if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB)
 -                      ecmd->advertising |= ADVERTISED_1000baseT_Full;
 -              if (hw_link_info->requested_speeds & I40E_LINK_SPEED_100MB)
 +      ecmd->supported = 0x0;
 +      ecmd->advertising = 0x0;
 +      if (phy_types & I40E_CAP_PHY_TYPE_SGMII) {
 +              ecmd->supported |= SUPPORTED_Autoneg |
 +                                 SUPPORTED_1000baseT_Full;
 +              ecmd->advertising |= ADVERTISED_Autoneg |
 +                                   ADVERTISED_1000baseT_Full;
 +              if (pf->hw.mac.type == I40E_MAC_X722) {
 +                      ecmd->supported |= SUPPORTED_100baseT_Full;
                        ecmd->advertising |= ADVERTISED_100baseT_Full;
 -              break;
 -      case I40E_DEV_ID_20G_KR2:
 -              /* backplane 20G */
 -              ecmd->supported = SUPPORTED_20000baseKR2_Full;
 -              ecmd->advertising = ADVERTISED_20000baseKR2_Full;
 -              break;
 -      default:
 -              /* all the rest are 10G/1G */
 -              ecmd->supported = SUPPORTED_10000baseT_Full |
 -                                SUPPORTED_1000baseT_Full;
 -              /* Figure out what has been requested */
 -              if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
 -                      ecmd->advertising |= ADVERTISED_10000baseT_Full;
 -              if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB)
 -                      ecmd->advertising |= ADVERTISED_1000baseT_Full;
 -              break;
 +              }
        }
 +      if (phy_types & I40E_CAP_PHY_TYPE_XAUI ||
 +          phy_types & I40E_CAP_PHY_TYPE_XFI ||
 +          phy_types & I40E_CAP_PHY_TYPE_SFI ||
 +          phy_types & I40E_CAP_PHY_TYPE_10GBASE_SFPP_CU ||
 +          phy_types & I40E_CAP_PHY_TYPE_10GBASE_AOC)
 +              ecmd->supported |= SUPPORTED_10000baseT_Full;
 +      if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_CR1_CU ||
 +          phy_types & I40E_CAP_PHY_TYPE_10GBASE_CR1 ||
 +          phy_types & I40E_CAP_PHY_TYPE_10GBASE_T ||
 +          phy_types & I40E_CAP_PHY_TYPE_10GBASE_SR ||
 +          phy_types & I40E_CAP_PHY_TYPE_10GBASE_LR) {
 +              ecmd->supported |= SUPPORTED_Autoneg |
 +                                 SUPPORTED_10000baseT_Full;
 +              ecmd->advertising |= ADVERTISED_Autoneg |
 +                                   ADVERTISED_10000baseT_Full;
 +      }
 +      if (phy_types & I40E_CAP_PHY_TYPE_XLAUI ||
 +          phy_types & I40E_CAP_PHY_TYPE_XLPPI ||
 +          phy_types & I40E_CAP_PHY_TYPE_40GBASE_AOC)
 +              ecmd->supported |= SUPPORTED_40000baseCR4_Full;
 +      if (phy_types & I40E_CAP_PHY_TYPE_40GBASE_CR4_CU ||
 +          phy_types & I40E_CAP_PHY_TYPE_40GBASE_CR4) {
 +              ecmd->supported |= SUPPORTED_Autoneg |
 +                                SUPPORTED_40000baseCR4_Full;
 +              ecmd->advertising |= ADVERTISED_Autoneg |
 +                                  ADVERTISED_40000baseCR4_Full;
 +      }
 +      if ((phy_types & I40E_CAP_PHY_TYPE_100BASE_TX) &&
 +          !(phy_types & I40E_CAP_PHY_TYPE_1000BASE_T)) {
 +              ecmd->supported |= SUPPORTED_Autoneg |
 +                                 SUPPORTED_100baseT_Full;
 +              ecmd->advertising |= ADVERTISED_Autoneg |
 +                                   ADVERTISED_100baseT_Full;
 +      }
 +      if (phy_types & I40E_CAP_PHY_TYPE_1000BASE_T ||
 +          phy_types & I40E_CAP_PHY_TYPE_1000BASE_SX ||
 +          phy_types & I40E_CAP_PHY_TYPE_1000BASE_LX ||
 +          phy_types & I40E_CAP_PHY_TYPE_1000BASE_T_OPTICAL) {
 +              ecmd->supported |= SUPPORTED_Autoneg |
 +                                 SUPPORTED_1000baseT_Full;
 +              ecmd->advertising |= ADVERTISED_Autoneg |
 +                                   ADVERTISED_1000baseT_Full;
 +      }
 +      if (phy_types & I40E_CAP_PHY_TYPE_40GBASE_SR4)
 +              ecmd->supported |= SUPPORTED_40000baseSR4_Full;
 +      if (phy_types & I40E_CAP_PHY_TYPE_40GBASE_LR4)
 +              ecmd->supported |= SUPPORTED_40000baseLR4_Full;
  
        /* With no link speed and duplex are unknown */
        ethtool_cmd_speed_set(ecmd, SPEED_UNKNOWN);
@@@ -481,43 -475,12 +481,43 @@@ static int i40e_get_settings(struct net
        bool link_up = hw_link_info->link_info & I40E_AQ_LINK_UP;
  
        if (link_up)
 -              i40e_get_settings_link_up(hw, ecmd, netdev);
 +              i40e_get_settings_link_up(hw, ecmd, netdev, pf);
        else
 -              i40e_get_settings_link_down(hw, ecmd);
 +              i40e_get_settings_link_down(hw, ecmd, pf);
  
        /* Now set the settings that don't rely on link being up/down */
  
 +      /* For backplane, supported and advertised are only reliant on the
 +       * phy types the NVM specifies are supported.
 +       */
 +      if (hw->device_id == I40E_DEV_ID_KX_B ||
 +          hw->device_id == I40E_DEV_ID_KX_C ||
 +          hw->device_id == I40E_DEV_ID_20G_KR2 ||
 +          hw->device_id ==  I40E_DEV_ID_20G_KR2_A) {
 +              ecmd->supported = SUPPORTED_Autoneg;
 +              ecmd->advertising = ADVERTISED_Autoneg;
 +              if (hw->phy.phy_types & I40E_CAP_PHY_TYPE_40GBASE_KR4) {
 +                      ecmd->supported |= SUPPORTED_40000baseKR4_Full;
 +                      ecmd->advertising |= ADVERTISED_40000baseKR4_Full;
 +              }
 +              if (hw->phy.phy_types & I40E_CAP_PHY_TYPE_20GBASE_KR2) {
 +                      ecmd->supported |= SUPPORTED_20000baseKR2_Full;
 +                      ecmd->advertising |= ADVERTISED_20000baseKR2_Full;
 +              }
 +              if (hw->phy.phy_types & I40E_CAP_PHY_TYPE_10GBASE_KR) {
 +                      ecmd->supported |= SUPPORTED_10000baseKR_Full;
 +                      ecmd->advertising |= ADVERTISED_10000baseKR_Full;
 +              }
 +              if (hw->phy.phy_types & I40E_CAP_PHY_TYPE_10GBASE_KX4) {
 +                      ecmd->supported |= SUPPORTED_10000baseKX4_Full;
 +                      ecmd->advertising |= ADVERTISED_10000baseKX4_Full;
 +              }
 +              if (hw->phy.phy_types & I40E_CAP_PHY_TYPE_1000BASE_KX) {
 +                      ecmd->supported |= SUPPORTED_1000baseKX_Full;
 +                      ecmd->advertising |= ADVERTISED_1000baseKX_Full;
 +              }
 +      }
 +
        /* Set autoneg settings */
        ecmd->autoneg = ((hw_link_info->an_info & I40E_AQ_AN_COMPLETED) ?
                          AUTONEG_ENABLE : AUTONEG_DISABLE);
@@@ -617,14 -580,6 +617,14 @@@ static int i40e_set_settings(struct net
            hw->phy.link_info.link_info & I40E_AQ_LINK_UP)
                return -EOPNOTSUPP;
  
 +      if (hw->device_id == I40E_DEV_ID_KX_B ||
 +          hw->device_id == I40E_DEV_ID_KX_C ||
 +          hw->device_id == I40E_DEV_ID_20G_KR2 ||
 +          hw->device_id == I40E_DEV_ID_20G_KR2_A) {
 +              netdev_info(netdev, "Changing settings is not supported on backplane.\n");
 +              return -EOPNOTSUPP;
 +      }
 +
        /* get our own copy of the bits to check against */
        memset(&safe_ecmd, 0, sizeof(struct ethtool_cmd));
        i40e_get_settings(netdev, &safe_ecmd);
  
        /* Check autoneg */
        if (autoneg == AUTONEG_ENABLE) {
 -              /* If autoneg is not supported, return error */
 -              if (!(safe_ecmd.supported & SUPPORTED_Autoneg)) {
 -                      netdev_info(netdev, "Autoneg not supported on this phy\n");
 -                      return -EINVAL;
 -              }
                /* If autoneg was not already enabled */
                if (!(hw->phy.link_info.an_info & I40E_AQ_AN_COMPLETED)) {
 +                      /* If autoneg is not supported, return error */
 +                      if (!(safe_ecmd.supported & SUPPORTED_Autoneg)) {
 +                              netdev_info(netdev, "Autoneg not supported on this phy\n");
 +                              return -EINVAL;
 +                      }
 +                      /* Autoneg is allowed to change */
                        config.abilities = abilities.abilities |
                                           I40E_AQ_PHY_ENABLE_AN;
                        change = true;
                }
        } else {
 -              /* If autoneg is supported 10GBASE_T is the only phy that
 -               * can disable it, so otherwise return error
 -               */
 -              if (safe_ecmd.supported & SUPPORTED_Autoneg &&
 -                  hw->phy.link_info.phy_type != I40E_PHY_TYPE_10GBASE_T) {
 -                      netdev_info(netdev, "Autoneg cannot be disabled on this phy\n");
 -                      return -EINVAL;
 -              }
                /* If autoneg is currently enabled */
                if (hw->phy.link_info.an_info & I40E_AQ_AN_COMPLETED) {
 +                      /* If autoneg is supported 10GBASE_T is the only PHY
 +                       * that can disable it, so otherwise return error
 +                       */
 +                      if (safe_ecmd.supported & SUPPORTED_Autoneg &&
 +                          hw->phy.link_info.phy_type !=
 +                          I40E_PHY_TYPE_10GBASE_T) {
 +                              netdev_info(netdev, "Autoneg cannot be disabled on this phy\n");
 +                              return -EINVAL;
 +                      }
 +                      /* Autoneg is allowed to change */
                        config.abilities = abilities.abilities &
                                           ~I40E_AQ_PHY_ENABLE_AN;
                        change = true;
            advertise & ADVERTISED_40000baseLR4_Full)
                config.link_speed |= I40E_LINK_SPEED_40GB;
  
 +      /* If speed didn't get set, set it to what it currently is.
 +       * This is needed because if advertise is 0 (as it is when autoneg
 +       * is disabled) then speed won't get set.
 +       */
 +      if (!config.link_speed)
 +              config.link_speed = abilities.link_speed;
 +
        if (change || (abilities.link_speed != config.link_speed)) {
                /* copy over the rest of the abilities */
                config.phy_type = abilities.phy_type;
                        /* Tell the OS link is going down, the link will go
                         * back up when fw says it is ready asynchronously
                         */
 -                      netdev_info(netdev, "PHY settings change requested, NIC Link is going down.\n");
 +                      i40e_print_link_message(vsi, false);
                        netif_carrier_off(netdev);
                        netif_tx_stop_all_queues(netdev);
                }
                        return -EAGAIN;
                }
  
 -              status = i40e_aq_get_link_info(hw, true, NULL, NULL);
 +              status = i40e_update_link_info(hw);
                if (status)
 -                      netdev_info(netdev, "Updating link info failed with err %s aq_err %s\n",
 -                                  i40e_stat_str(hw, status),
 -                                  i40e_aq_str(hw, hw->aq.asq_last_status));
 +                      netdev_dbg(netdev, "Updating link info failed with err %s aq_err %s\n",
 +                                 i40e_stat_str(hw, status),
 +                                 i40e_aq_str(hw, hw->aq.asq_last_status));
  
        } else {
                netdev_info(netdev, "Nothing changed, exiting without setting anything.\n");
@@@ -879,7 -824,7 +879,7 @@@ static int i40e_set_pauseparam(struct n
        /* Tell the OS link is going down, the link will go back up when fw
         * says it is ready asynchronously
         */
 -      netdev_info(netdev, "Flow control settings change requested, NIC Link is going down.\n");
 +      i40e_print_link_message(vsi, false);
        netif_carrier_off(netdev);
        netif_tx_stop_all_queues(netdev);
  
@@@ -1003,7 -948,9 +1003,7 @@@ static int i40e_get_eeprom(struct net_d
  
                cmd = (struct i40e_nvm_access *)eeprom;
                ret_val = i40e_nvmupd_command(hw, cmd, bytes, &errno);
 -              if (ret_val &&
 -                  ((hw->aq.asq_last_status != I40E_AQ_RC_EACCES) ||
 -                   (hw->debug_mask & I40E_DEBUG_NVM)))
 +              if (ret_val && (hw->debug_mask & I40E_DEBUG_NVM))
                        dev_info(&pf->pdev->dev,
                                 "NVMUpdate read failed err=%d status=0x%x errno=%d module=%d offset=0x%x size=%d\n",
                                 ret_val, hw->aq.asq_last_status, errno,
@@@ -1107,7 -1054,10 +1107,7 @@@ static int i40e_set_eeprom(struct net_d
  
        cmd = (struct i40e_nvm_access *)eeprom;
        ret_val = i40e_nvmupd_command(hw, cmd, bytes, &errno);
 -      if (ret_val &&
 -          ((hw->aq.asq_last_status != I40E_AQ_RC_EPERM &&
 -            hw->aq.asq_last_status != I40E_AQ_RC_EBUSY) ||
 -           (hw->debug_mask & I40E_DEBUG_NVM)))
 +      if (ret_val && (hw->debug_mask & I40E_DEBUG_NVM))
                dev_info(&pf->pdev->dev,
                         "NVMUpdate write failed err=%d status=0x%x errno=%d module=%d offset=0x%x size=%d\n",
                         ret_val, hw->aq.asq_last_status, errno,
@@@ -1127,10 -1077,11 +1127,10 @@@ static void i40e_get_drvinfo(struct net
        strlcpy(drvinfo->driver, i40e_driver_name, sizeof(drvinfo->driver));
        strlcpy(drvinfo->version, i40e_driver_version_str,
                sizeof(drvinfo->version));
 -      strlcpy(drvinfo->fw_version, i40e_fw_version_str(&pf->hw),
 +      strlcpy(drvinfo->fw_version, i40e_nvm_version_str(&pf->hw),
                sizeof(drvinfo->fw_version));
        strlcpy(drvinfo->bus_info, pci_name(pf->pdev),
                sizeof(drvinfo->bus_info));
 -      drvinfo->n_priv_flags = I40E_PRIV_FLAGS_STR_LEN;
  }
  
  static void i40e_get_ringparam(struct net_device *netdev,
@@@ -1215,11 -1166,6 +1215,11 @@@ static int i40e_set_ringparam(struct ne
                        /* clone ring and setup updated count */
                        tx_rings[i] = *vsi->tx_rings[i];
                        tx_rings[i].count = new_tx_count;
 +                      /* the desc and bi pointers will be reallocated in the
 +                       * setup call
 +                       */
 +                      tx_rings[i].desc = NULL;
 +                      tx_rings[i].rx_bi = NULL;
                        err = i40e_setup_tx_descriptors(&tx_rings[i]);
                        if (err) {
                                while (i) {
                        /* clone ring and setup updated count */
                        rx_rings[i] = *vsi->rx_rings[i];
                        rx_rings[i].count = new_rx_count;
 +                      /* the desc and bi pointers will be reallocated in the
 +                       * setup call
 +                       */
 +                      rx_rings[i].desc = NULL;
 +                      rx_rings[i].rx_bi = NULL;
                        err = i40e_setup_rx_descriptors(&rx_rings[i]);
                        if (err) {
                                while (i) {
@@@ -1322,8 -1263,7 +1322,8 @@@ static int i40e_get_sset_count(struct n
                if (vsi == pf->vsi[pf->lan_vsi] && pf->hw.partition_id == 1) {
                        int len = I40E_PF_STATS_LEN(netdev);
  
 -                      if (pf->lan_veb != I40E_NO_VEB)
 +                      if ((pf->lan_veb != I40E_NO_VEB) &&
 +                          (pf->flags & I40E_FLAG_VEB_STATS_ENABLED))
                                len += I40E_VEB_STATS_TOTAL;
                        return len;
                } else {
@@@ -1396,16 -1336,20 +1396,22 @@@ static void i40e_get_ethtool_stats(stru
        if (vsi != pf->vsi[pf->lan_vsi] || pf->hw.partition_id != 1)
                return;
  
 -      if (pf->lan_veb != I40E_NO_VEB) {
 +      if ((pf->lan_veb != I40E_NO_VEB) &&
 +          (pf->flags & I40E_FLAG_VEB_STATS_ENABLED)) {
                struct i40e_veb *veb = pf->veb[pf->lan_veb];
 +
                for (j = 0; j < I40E_VEB_STATS_LEN; j++) {
                        p = (char *)veb;
                        p += i40e_gstrings_veb_stats[j].stat_offset;
                        data[i++] = (i40e_gstrings_veb_stats[j].sizeof_stat ==
                                     sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
                }
+               for (j = 0; j < I40E_MAX_TRAFFIC_CLASS; j++) {
+                       data[i++] = veb->tc_stats.tc_tx_packets[j];
+                       data[i++] = veb->tc_stats.tc_tx_bytes[j];
+                       data[i++] = veb->tc_stats.tc_rx_packets[j];
+                       data[i++] = veb->tc_stats.tc_rx_bytes[j];
+               }
        }
        for (j = 0; j < I40E_GLOBAL_STATS_LEN; j++) {
                p = (char *)pf + i40e_gstrings_stats[j].stat_offset;
@@@ -1471,8 -1415,7 +1477,8 @@@ static void i40e_get_strings(struct net
                if (vsi != pf->vsi[pf->lan_vsi] || pf->hw.partition_id != 1)
                        return;
  
 -              if (pf->lan_veb != I40E_NO_VEB) {
 +              if ((pf->lan_veb != I40E_NO_VEB) &&
 +                  (pf->flags & I40E_FLAG_VEB_STATS_ENABLED)) {
                        for (i = 0; i < I40E_VEB_STATS_LEN; i++) {
                                snprintf(p, ETH_GSTRING_LEN, "veb.%s",
                                        i40e_gstrings_veb_stats[i].stat_string);
@@@ -1567,18 -1510,9 +1573,18 @@@ static int i40e_link_test(struct net_de
  {
        struct i40e_netdev_priv *np = netdev_priv(netdev);
        struct i40e_pf *pf = np->vsi->back;
 +      i40e_status status;
 +      bool link_up = false;
  
        netif_info(pf, hw, netdev, "link test\n");
 -      if (i40e_get_link_status(&pf->hw))
 +      status = i40e_get_link_status(&pf->hw, &link_up);
 +      if (status) {
 +              netif_err(pf, drv, netdev, "link query timed out, please retry test\n");
 +              *data = 1;
 +              return *data;
 +      }
 +
 +      if (link_up)
                *data = 0;
        else
                *data = 1;
@@@ -1647,7 -1581,7 +1653,7 @@@ static inline bool i40e_active_vfs(stru
        int i;
  
        for (i = 0; i < pf->num_alloc_vfs; i++)
 -              if (vfs[i].vf_states & I40E_VF_STAT_ACTIVE)
 +              if (test_bit(I40E_VF_STAT_ACTIVE, &vfs[i].vf_states))
                        return true;
        return false;
  }
@@@ -1854,14 -1788,6 +1860,14 @@@ static int i40e_get_coalesce(struct net
  
        ec->rx_coalesce_usecs = vsi->rx_itr_setting & ~I40E_ITR_DYNAMIC;
        ec->tx_coalesce_usecs = vsi->tx_itr_setting & ~I40E_ITR_DYNAMIC;
 +      /* we use the _usecs_high to store/set the interrupt rate limit
 +       * that the hardware supports, that almost but not quite
 +       * fits the original intent of the ethtool variable,
 +       * the rx_coalesce_usecs_high limits total interrupts
 +       * per second from both tx/rx sources.
 +       */
 +      ec->rx_coalesce_usecs_high = vsi->int_rate_limit;
 +      ec->tx_coalesce_usecs_high = vsi->int_rate_limit;
  
        return 0;
  }
@@@ -1880,17 -1806,6 +1886,17 @@@ static int i40e_set_coalesce(struct net
        if (ec->tx_max_coalesced_frames_irq || ec->rx_max_coalesced_frames_irq)
                vsi->work_limit = ec->tx_max_coalesced_frames_irq;
  
 +      /* tx_coalesce_usecs_high is ignored, use rx-usecs-high instead */
 +      if (ec->tx_coalesce_usecs_high != vsi->int_rate_limit) {
 +              netif_info(pf, drv, netdev, "tx-usecs-high is not used, please program rx-usecs-high\n");
 +              return -EINVAL;
 +      }
 +
 +      if (ec->rx_coalesce_usecs_high >= INTRL_REG_TO_USEC(I40E_MAX_INTRL)) {
 +              netif_info(pf, drv, netdev, "Invalid value, rx-usecs-high range is 0-235\n");
 +              return -EINVAL;
 +      }
 +
        vector = vsi->base_vector;
        if ((ec->rx_coalesce_usecs >= (I40E_MIN_ITR << 1)) &&
            (ec->rx_coalesce_usecs <= (I40E_MAX_ITR << 1))) {
                return -EINVAL;
        }
  
 +      vsi->int_rate_limit = ec->rx_coalesce_usecs_high;
 +
        if ((ec->tx_coalesce_usecs >= (I40E_MIN_ITR << 1)) &&
            (ec->tx_coalesce_usecs <= (I40E_MAX_ITR << 1))) {
                vsi->tx_itr_setting = ec->tx_coalesce_usecs;
                vsi->tx_itr_setting &= ~I40E_ITR_DYNAMIC;
  
        for (i = 0; i < vsi->num_q_vectors; i++, vector++) {
 +              u16 intrl = INTRL_USEC_TO_REG(vsi->int_rate_limit);
 +
                q_vector = vsi->q_vectors[i];
                q_vector->rx.itr = ITR_TO_REG(vsi->rx_itr_setting);
                wr32(hw, I40E_PFINT_ITRN(0, vector - 1), q_vector->rx.itr);
                q_vector->tx.itr = ITR_TO_REG(vsi->tx_itr_setting);
                wr32(hw, I40E_PFINT_ITRN(1, vector - 1), q_vector->tx.itr);
 +              wr32(hw, I40E_PFINT_RATEN(vector - 1), intrl);
                i40e_flush(hw);
        }
  
@@@ -2700,51 -2610,10 +2706,51 @@@ static u32 i40e_get_priv_flags(struct n
  
        ret_flags |= pf->hw.func_caps.npar_enable ?
                I40E_PRIV_FLAGS_NPAR_FLAG : 0;
 +      ret_flags |= pf->flags & I40E_FLAG_LINK_POLLING_ENABLED ?
 +              I40E_PRIV_FLAGS_LINKPOLL_FLAG : 0;
 +      ret_flags |= pf->flags & I40E_FLAG_FD_ATR_ENABLED ?
 +              I40E_PRIV_FLAGS_FD_ATR : 0;
 +      ret_flags |= pf->flags & I40E_FLAG_VEB_STATS_ENABLED ?
 +              I40E_PRIV_FLAGS_VEB_STATS : 0;
  
        return ret_flags;
  }
  
 +/**
 + * i40e_set_priv_flags - set private flags
 + * @dev: network interface device structure
 + * @flags: bit flags to be set
 + **/
 +static int i40e_set_priv_flags(struct net_device *dev, u32 flags)
 +{
 +      struct i40e_netdev_priv *np = netdev_priv(dev);
 +      struct i40e_vsi *vsi = np->vsi;
 +      struct i40e_pf *pf = vsi->back;
 +
 +      if (flags & I40E_PRIV_FLAGS_LINKPOLL_FLAG)
 +              pf->flags |= I40E_FLAG_LINK_POLLING_ENABLED;
 +      else
 +              pf->flags &= ~I40E_FLAG_LINK_POLLING_ENABLED;
 +
 +      /* allow the user to control the state of the Flow
 +       * Director ATR (Application Targeted Routing) feature
 +       * of the driver
 +       */
 +      if (flags & I40E_PRIV_FLAGS_FD_ATR) {
 +              pf->flags |= I40E_FLAG_FD_ATR_ENABLED;
 +      } else {
 +              pf->flags &= ~I40E_FLAG_FD_ATR_ENABLED;
 +              pf->auto_disable_flags |= I40E_FLAG_FD_ATR_ENABLED;
 +      }
 +
 +      if (flags & I40E_PRIV_FLAGS_VEB_STATS)
 +              pf->flags |= I40E_FLAG_VEB_STATS_ENABLED;
 +      else
 +              pf->flags &= ~I40E_FLAG_VEB_STATS_ENABLED;
 +
 +      return 0;
 +}
 +
  static const struct ethtool_ops i40e_ethtool_ops = {
        .get_settings           = i40e_get_settings,
        .set_settings           = i40e_set_settings,
        .set_channels           = i40e_set_channels,
        .get_ts_info            = i40e_get_ts_info,
        .get_priv_flags         = i40e_get_priv_flags,
 +      .set_priv_flags         = i40e_set_priv_flags,
  };
  
  void i40e_set_ethtool_ops(struct net_device *netdev)
index 3e595adfb0bfc1b346361765cc3feaef85ffc89e,3dd26cdd0bf27365ec60d084c027800bee128f93..b825f978d441d1987581b249694298bb5996538d
@@@ -39,7 -39,7 +39,7 @@@ static const char i40e_driver_string[] 
  
  #define DRV_VERSION_MAJOR 1
  #define DRV_VERSION_MINOR 3
 -#define DRV_VERSION_BUILD 9
 +#define DRV_VERSION_BUILD 46
  #define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \
             __stringify(DRV_VERSION_MINOR) "." \
             __stringify(DRV_VERSION_BUILD)    DRV_KERN
@@@ -75,13 -75,10 +75,13 @@@ static const struct pci_device_id i40e_
        {PCI_VDEVICE(INTEL, I40E_DEV_ID_QSFP_B), 0},
        {PCI_VDEVICE(INTEL, I40E_DEV_ID_QSFP_C), 0},
        {PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_BASE_T), 0},
 +      {PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_BASE_T4), 0},
        {PCI_VDEVICE(INTEL, I40E_DEV_ID_20G_KR2), 0},
        {PCI_VDEVICE(INTEL, I40E_DEV_ID_SFP_X722), 0},
        {PCI_VDEVICE(INTEL, I40E_DEV_ID_1G_BASE_T_X722), 0},
        {PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_BASE_T_X722), 0},
 +      {PCI_VDEVICE(INTEL, I40E_DEV_ID_20G_KR2), 0},
 +      {PCI_VDEVICE(INTEL, I40E_DEV_ID_20G_KR2_A), 0},
        /* required last entry */
        {0, }
  };
@@@ -216,10 -213,10 +216,10 @@@ static int i40e_get_lump(struct i40e_p
                        ret = i;
                        pile->search_hint = i + j;
                        break;
 -              } else {
 -                      /* not enough, so skip over it and continue looking */
 -                      i += j;
                }
 +
 +              /* not enough, so skip over it and continue looking */
 +              i += j;
        }
  
        return ret;
@@@ -302,69 -299,25 +302,69 @@@ static void i40e_tx_timeout(struct net_
        struct i40e_netdev_priv *np = netdev_priv(netdev);
        struct i40e_vsi *vsi = np->vsi;
        struct i40e_pf *pf = vsi->back;
 +      struct i40e_ring *tx_ring = NULL;
 +      unsigned int i, hung_queue = 0;
 +      u32 head, val;
  
        pf->tx_timeout_count++;
  
 +      /* find the stopped queue the same way the stack does */
 +      for (i = 0; i < netdev->num_tx_queues; i++) {
 +              struct netdev_queue *q;
 +              unsigned long trans_start;
 +
 +              q = netdev_get_tx_queue(netdev, i);
 +              trans_start = q->trans_start ? : netdev->trans_start;
 +              if (netif_xmit_stopped(q) &&
 +                  time_after(jiffies,
 +                             (trans_start + netdev->watchdog_timeo))) {
 +                      hung_queue = i;
 +                      break;
 +              }
 +      }
 +
 +      if (i == netdev->num_tx_queues) {
 +              netdev_info(netdev, "tx_timeout: no netdev hung queue found\n");
 +      } else {
 +              /* now that we have an index, find the tx_ring struct */
 +              for (i = 0; i < vsi->num_queue_pairs; i++) {
 +                      if (vsi->tx_rings[i] && vsi->tx_rings[i]->desc) {
 +                              if (hung_queue ==
 +                                  vsi->tx_rings[i]->queue_index) {
 +                                      tx_ring = vsi->tx_rings[i];
 +                                      break;
 +                              }
 +                      }
 +              }
 +      }
 +
        if (time_after(jiffies, (pf->tx_timeout_last_recovery + HZ*20)))
 -              pf->tx_timeout_recovery_level = 1;
 +              pf->tx_timeout_recovery_level = 1;  /* reset after some time */
 +      else if (time_before(jiffies,
 +                    (pf->tx_timeout_last_recovery + netdev->watchdog_timeo)))
 +              return;   /* don't do any new action before the next timeout */
 +
 +      if (tx_ring) {
 +              head = i40e_get_head(tx_ring);
 +              /* Read interrupt register */
 +              if (pf->flags & I40E_FLAG_MSIX_ENABLED)
 +                      val = rd32(&pf->hw,
 +                           I40E_PFINT_DYN_CTLN(tx_ring->q_vector->v_idx +
 +                                              tx_ring->vsi->base_vector - 1));
 +              else
 +                      val = rd32(&pf->hw, I40E_PFINT_DYN_CTL0);
 +
 +              netdev_info(netdev, "tx_timeout: VSI_seid: %d, Q %d, NTC: 0x%x, HWB: 0x%x, NTU: 0x%x, TAIL: 0x%x, INT: 0x%x\n",
 +                          vsi->seid, hung_queue, tx_ring->next_to_clean,
 +                          head, tx_ring->next_to_use,
 +                          readl(tx_ring->tail), val);
 +      }
 +
        pf->tx_timeout_last_recovery = jiffies;
 -      netdev_info(netdev, "tx_timeout recovery level %d\n",
 -                  pf->tx_timeout_recovery_level);
 +      netdev_info(netdev, "tx_timeout recovery level %d, hung_queue %d\n",
 +                  pf->tx_timeout_recovery_level, hung_queue);
  
        switch (pf->tx_timeout_recovery_level) {
 -      case 0:
 -              /* disable and re-enable queues for the VSI */
 -              if (in_interrupt()) {
 -                      set_bit(__I40E_REINIT_REQUESTED, &pf->state);
 -                      set_bit(__I40E_REINIT_REQUESTED, &vsi->state);
 -              } else {
 -                      i40e_vsi_reinit_locked(vsi);
 -              }
 -              break;
        case 1:
                set_bit(__I40E_PF_RESET_REQUESTED, &pf->state);
                break;
                break;
        default:
                netdev_err(netdev, "tx_timeout recovery unsuccessful\n");
 -              set_bit(__I40E_DOWN_REQUESTED, &pf->state);
 -              set_bit(__I40E_DOWN_REQUESTED, &vsi->state);
                break;
        }
 +
        i40e_service_event_schedule(pf);
        pf->tx_timeout_recovery_level++;
  }
@@@ -477,7 -431,6 +477,7 @@@ static struct rtnl_link_stats64 *i40e_g
        stats->tx_errors        = vsi_stats->tx_errors;
        stats->tx_dropped       = vsi_stats->tx_dropped;
        stats->rx_errors        = vsi_stats->rx_errors;
 +      stats->rx_dropped       = vsi_stats->rx_dropped;
        stats->rx_crc_errors    = vsi_stats->rx_crc_errors;
        stats->rx_length_errors = vsi_stats->rx_length_errors;
  
@@@ -503,11 -456,11 +503,11 @@@ void i40e_vsi_reset_stats(struct i40e_v
        memset(&vsi->eth_stats_offsets, 0, sizeof(vsi->eth_stats_offsets));
        if (vsi->rx_rings && vsi->rx_rings[0]) {
                for (i = 0; i < vsi->num_queue_pairs; i++) {
 -                      memset(&vsi->rx_rings[i]->stats, 0 ,
 +                      memset(&vsi->rx_rings[i]->stats, 0,
                               sizeof(vsi->rx_rings[i]->stats));
 -                      memset(&vsi->rx_rings[i]->rx_stats, 0 ,
 +                      memset(&vsi->rx_rings[i]->rx_stats, 0,
                               sizeof(vsi->rx_rings[i]->rx_stats));
 -                      memset(&vsi->tx_rings[i]->stats, 0 ,
 +                      memset(&vsi->tx_rings[i]->stats, 0,
                               sizeof(vsi->tx_rings[i]->stats));
                        memset(&vsi->tx_rings[i]->tx_stats, 0,
                               sizeof(vsi->tx_rings[i]->tx_stats));
@@@ -801,6 -754,7 +801,6 @@@ static void i40e_update_link_xoff_rx(st
        struct i40e_hw_port_stats *nsd = &pf->stats;
        struct i40e_hw *hw = &pf->hw;
        u64 xoff = 0;
 -      u16 i, v;
  
        if ((hw->fc.current_mode != I40E_FC_FULL) &&
            (hw->fc.current_mode != I40E_FC_RX_PAUSE))
        if (!(nsd->link_xoff_rx - xoff))
                return;
  
 -      /* Clear the __I40E_HANG_CHECK_ARMED bit for all Tx rings */
 -      for (v = 0; v < pf->num_alloc_vsi; v++) {
 -              struct i40e_vsi *vsi = pf->vsi[v];
 -
 -              if (!vsi || !vsi->tx_rings[0])
 -                      continue;
 -
 -              for (i = 0; i < vsi->num_queue_pairs; i++) {
 -                      struct i40e_ring *ring = vsi->tx_rings[i];
 -                      clear_bit(__I40E_HANG_CHECK_ARMED, &ring->state);
 -              }
 -      }
  }
  
  /**
@@@ -830,7 -796,7 +830,7 @@@ static void i40e_update_prio_xoff_rx(st
        bool xoff[I40E_MAX_TRAFFIC_CLASS] = {false};
        struct i40e_dcbx_config *dcb_cfg;
        struct i40e_hw *hw = &pf->hw;
 -      u16 i, v;
 +      u16 i;
        u8 tc;
  
        dcb_cfg = &hw->local_dcbx_config;
  
        for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) {
                u64 prio_xoff = nsd->priority_xoff_rx[i];
 +
                i40e_stat_update32(hw, I40E_GLPRT_PXOFFRXC(hw->port, i),
                                   pf->stat_offsets_loaded,
                                   &osd->priority_xoff_rx[i],
                tc = dcb_cfg->etscfg.prioritytable[i];
                xoff[tc] = true;
        }
 -
 -      /* Clear the __I40E_HANG_CHECK_ARMED bit for Tx rings */
 -      for (v = 0; v < pf->num_alloc_vsi; v++) {
 -              struct i40e_vsi *vsi = pf->vsi[v];
 -
 -              if (!vsi || !vsi->tx_rings[0])
 -                      continue;
 -
 -              for (i = 0; i < vsi->num_queue_pairs; i++) {
 -                      struct i40e_ring *ring = vsi->tx_rings[i];
 -
 -                      tc = ring->dcb_tc;
 -                      if (xoff[tc])
 -                              clear_bit(__I40E_HANG_CHECK_ARMED,
 -                                        &ring->state);
 -              }
 -      }
  }
  
  /**
@@@ -880,7 -862,6 +880,7 @@@ static void i40e_update_vsi_stats(struc
        u32 rx_page, rx_buf;
        u64 bytes, packets;
        unsigned int start;
 +      u64 tx_linearize;
        u64 rx_p, rx_b;
        u64 tx_p, tx_b;
        u16 q;
         */
        rx_b = rx_p = 0;
        tx_b = tx_p = 0;
 -      tx_restart = tx_busy = 0;
 +      tx_restart = tx_busy = tx_linearize = 0;
        rx_page = 0;
        rx_buf = 0;
        rcu_read_lock();
                tx_p += packets;
                tx_restart += p->tx_stats.restart_queue;
                tx_busy += p->tx_stats.tx_busy;
 +              tx_linearize += p->tx_stats.tx_linearize;
  
                /* Rx queue is part of the same block as Tx queue */
                p = &p[1];
        rcu_read_unlock();
        vsi->tx_restart = tx_restart;
        vsi->tx_busy = tx_busy;
 +      vsi->tx_linearize = tx_linearize;
        vsi->rx_page_failed = rx_page;
        vsi->rx_buf_failed = rx_buf;
  
@@@ -1277,7 -1256,7 +1277,7 @@@ bool i40e_is_vsi_in_vlan(struct i40e_vs
         * so we have to go through all the list in order to make sure
         */
        list_for_each_entry(f, &vsi->mac_filter_list, list) {
 -              if (f->vlan >= 0)
 +              if (f->vlan >= 0 || vsi->info.pvid)
                        return true;
        }
  
@@@ -1355,9 -1334,6 +1355,9 @@@ static int i40e_rm_default_mac_filter(s
   * @is_netdev: make sure its a netdev filter, else doesn't matter
   *
   * Returns ptr to the filter object or NULL when no memory available.
 + *
 + * NOTE: This function is expected to be called with mac_filter_list_lock
 + * being held.
   **/
  struct i40e_mac_filter *i40e_add_filter(struct i40e_vsi *vsi,
                                        u8 *macaddr, s16 vlan,
@@@ -1416,9 -1392,6 +1416,9 @@@ add_filter_out
   * @vlan: the vlan
   * @is_vf: make sure it's a VF filter, else doesn't matter
   * @is_netdev: make sure it's a netdev filter, else doesn't matter
 + *
 + * NOTE: This function is expected to be called with mac_filter_list_lock
 + * being held.
   **/
  void i40e_del_filter(struct i40e_vsi *vsi,
                     u8 *macaddr, s16 vlan,
        } else {
                /* make sure we don't remove a filter in use by VF or netdev */
                int min_f = 0;
 +
                min_f += (f->is_vf ? 1 : 0);
                min_f += (f->is_netdev ? 1 : 0);
  
@@@ -1505,7 -1477,6 +1505,7 @@@ static int i40e_set_mac(struct net_devi
  
        if (vsi->type == I40E_VSI_MAIN) {
                i40e_status ret;
 +
                ret = i40e_aq_mac_address_write(&vsi->back->hw,
                                                I40E_AQC_WRITE_TYPE_LAA_WOL,
                                                addr->sa_data, NULL);
                element.flags = I40E_AQC_MACVLAN_DEL_PERFECT_MATCH;
                i40e_aq_remove_macvlan(&pf->hw, vsi->seid, &element, 1, NULL);
        } else {
 +              spin_lock_bh(&vsi->mac_filter_list_lock);
                i40e_del_filter(vsi, netdev->dev_addr, I40E_VLAN_ANY,
                                false, false);
 +              spin_unlock_bh(&vsi->mac_filter_list_lock);
        }
  
        if (ether_addr_equal(addr->sa_data, hw->mac.addr)) {
                element.flags = cpu_to_le16(I40E_AQC_MACVLAN_ADD_PERFECT_MATCH);
                i40e_aq_add_macvlan(&pf->hw, vsi->seid, &element, 1, NULL);
        } else {
 +              spin_lock_bh(&vsi->mac_filter_list_lock);
                f = i40e_add_filter(vsi, addr->sa_data, I40E_VLAN_ANY,
                                    false, false);
                if (f)
                        f->is_laa = true;
 +              spin_unlock_bh(&vsi->mac_filter_list_lock);
        }
  
 -      i40e_sync_vsi_filters(vsi);
 +      i40e_sync_vsi_filters(vsi, false);
        ether_addr_copy(netdev->dev_addr, addr->sa_data);
  
        return 0;
@@@ -1717,8 -1684,6 +1717,8 @@@ static void i40e_set_rx_mode(struct net
        struct netdev_hw_addr *mca;
        struct netdev_hw_addr *ha;
  
 +      spin_lock_bh(&vsi->mac_filter_list_lock);
 +
        /* add addr if not already in the filter list */
        netdev_for_each_uc_addr(uca, netdev) {
                if (!i40e_find_mac(vsi, uca->addr, false, true)) {
  
        /* remove filter if not in netdev list */
        list_for_each_entry_safe(f, ftmp, &vsi->mac_filter_list, list) {
 -              bool found = false;
  
                if (!f->is_netdev)
                        continue;
  
 -              if (is_multicast_ether_addr(f->macaddr)) {
 -                      netdev_for_each_mc_addr(mca, netdev) {
 -                              if (ether_addr_equal(mca->addr, f->macaddr)) {
 -                                      found = true;
 -                                      break;
 -                              }
 -                      }
 -              } else {
 -                      netdev_for_each_uc_addr(uca, netdev) {
 -                              if (ether_addr_equal(uca->addr, f->macaddr)) {
 -                                      found = true;
 -                                      break;
 -                              }
 -                      }
 +              netdev_for_each_mc_addr(mca, netdev)
 +                      if (ether_addr_equal(mca->addr, f->macaddr))
 +                              goto bottom_of_search_loop;
  
 -                      for_each_dev_addr(netdev, ha) {
 -                              if (ether_addr_equal(ha->addr, f->macaddr)) {
 -                                      found = true;
 -                                      break;
 -                              }
 -                      }
 -              }
 -              if (!found)
 -                      i40e_del_filter(
 -                         vsi, f->macaddr, I40E_VLAN_ANY, false, true);
 +              netdev_for_each_uc_addr(uca, netdev)
 +                      if (ether_addr_equal(uca->addr, f->macaddr))
 +                              goto bottom_of_search_loop;
 +
 +              for_each_dev_addr(netdev, ha)
 +                      if (ether_addr_equal(ha->addr, f->macaddr))
 +                              goto bottom_of_search_loop;
 +
 +              /* f->macaddr wasn't found in uc, mc, or ha list so delete it */
 +              i40e_del_filter(vsi, f->macaddr, I40E_VLAN_ANY, false, true);
 +
 +bottom_of_search_loop:
 +              continue;
        }
 +      spin_unlock_bh(&vsi->mac_filter_list_lock);
  
        /* check for other flag changes */
        if (vsi->current_netdev_flags != vsi->netdev->flags) {
        }
  }
  
 +/**
 + * i40e_mac_filter_entry_clone - Clones a MAC filter entry
 + * @src: source MAC filter entry to be clones
 + *
 + * Returns the pointer to newly cloned MAC filter entry or NULL
 + * in case of error
 + **/
 +static struct i40e_mac_filter *i40e_mac_filter_entry_clone(
 +                                      struct i40e_mac_filter *src)
 +{
 +      struct i40e_mac_filter *f;
 +
 +      f = kzalloc(sizeof(*f), GFP_ATOMIC);
 +      if (!f)
 +              return NULL;
 +      *f = *src;
 +
 +      INIT_LIST_HEAD(&f->list);
 +
 +      return f;
 +}
 +
 +/**
 + * i40e_undo_del_filter_entries - Undo the changes made to MAC filter entries
 + * @vsi: pointer to vsi struct
 + * @from: Pointer to list which contains MAC filter entries - changes to
 + *        those entries needs to be undone.
 + *
 + * MAC filter entries from list were slated to be removed from device.
 + **/
 +static void i40e_undo_del_filter_entries(struct i40e_vsi *vsi,
 +                                       struct list_head *from)
 +{
 +      struct i40e_mac_filter *f, *ftmp;
 +
 +      list_for_each_entry_safe(f, ftmp, from, list) {
 +              f->changed = true;
 +              /* Move the element back into MAC filter list*/
 +              list_move_tail(&f->list, &vsi->mac_filter_list);
 +      }
 +}
 +
 +/**
 + * i40e_undo_add_filter_entries - Undo the changes made to MAC filter entries
 + * @vsi: pointer to vsi struct
 + *
 + * MAC filter entries from list were slated to be added from device.
 + **/
 +static void i40e_undo_add_filter_entries(struct i40e_vsi *vsi)
 +{
 +      struct i40e_mac_filter *f, *ftmp;
 +
 +      list_for_each_entry_safe(f, ftmp, &vsi->mac_filter_list, list) {
 +              if (!f->changed && f->counter)
 +                      f->changed = true;
 +      }
 +}
 +
 +/**
 + * i40e_cleanup_add_list - Deletes the element from add list and release
 + *                    memory
 + * @add_list: Pointer to list which contains MAC filter entries
 + **/
 +static void i40e_cleanup_add_list(struct list_head *add_list)
 +{
 +      struct i40e_mac_filter *f, *ftmp;
 +
 +      list_for_each_entry_safe(f, ftmp, add_list, list) {
 +              list_del(&f->list);
 +              kfree(f);
 +      }
 +}
 +
  /**
   * i40e_sync_vsi_filters - Update the VSI filter list to the HW
   * @vsi: ptr to the VSI
 + * @grab_rtnl: whether RTNL needs to be grabbed
   *
   * Push any outstanding VSI filter changes through the AdminQ.
   *
   * Returns 0 or error value
   **/
 -int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
 +int i40e_sync_vsi_filters(struct i40e_vsi *vsi, bool grab_rtnl)
  {
 -      struct i40e_mac_filter *f, *ftmp;
 +      struct list_head tmp_del_list, tmp_add_list;
 +      struct i40e_mac_filter *f, *ftmp, *fclone;
        bool promisc_forced_on = false;
        bool add_happened = false;
        int filter_list_len = 0;
        u32 changed_flags = 0;
 +      bool err_cond = false;
        i40e_status ret = 0;
        struct i40e_pf *pf;
        int num_add = 0;
                vsi->current_netdev_flags = vsi->netdev->flags;
        }
  
 +      INIT_LIST_HEAD(&tmp_del_list);
 +      INIT_LIST_HEAD(&tmp_add_list);
 +
        if (vsi->flags & I40E_VSI_FLAG_FILTER_CHANGED) {
                vsi->flags &= ~I40E_VSI_FLAG_FILTER_CHANGED;
  
 -              filter_list_len = pf->hw.aq.asq_buf_size /
 -                          sizeof(struct i40e_aqc_remove_macvlan_element_data);
 -              del_list = kcalloc(filter_list_len,
 -                          sizeof(struct i40e_aqc_remove_macvlan_element_data),
 -                          GFP_KERNEL);
 -              if (!del_list)
 -                      return -ENOMEM;
 -
 +              spin_lock_bh(&vsi->mac_filter_list_lock);
                list_for_each_entry_safe(f, ftmp, &vsi->mac_filter_list, list) {
                        if (!f->changed)
                                continue;
                        if (f->counter != 0)
                                continue;
                        f->changed = false;
 +
 +                      /* Move the element into temporary del_list */
 +                      list_move_tail(&f->list, &tmp_del_list);
 +              }
 +
 +              list_for_each_entry_safe(f, ftmp, &vsi->mac_filter_list, list) {
 +                      if (!f->changed)
 +                              continue;
 +
 +                      if (f->counter == 0)
 +                              continue;
 +                      f->changed = false;
 +
 +                      /* Clone MAC filter entry and add into temporary list */
 +                      fclone = i40e_mac_filter_entry_clone(f);
 +                      if (!fclone) {
 +                              err_cond = true;
 +                              break;
 +                      }
 +                      list_add_tail(&fclone->list, &tmp_add_list);
 +              }
 +
 +              /* if failed to clone MAC filter entry - undo */
 +              if (err_cond) {
 +                      i40e_undo_del_filter_entries(vsi, &tmp_del_list);
 +                      i40e_undo_add_filter_entries(vsi);
 +              }
 +              spin_unlock_bh(&vsi->mac_filter_list_lock);
 +
 +              if (err_cond)
 +                      i40e_cleanup_add_list(&tmp_add_list);
 +      }
 +
 +      /* Now process 'del_list' outside the lock */
 +      if (!list_empty(&tmp_del_list)) {
 +              filter_list_len = pf->hw.aq.asq_buf_size /
 +                          sizeof(struct i40e_aqc_remove_macvlan_element_data);
 +              del_list = kcalloc(filter_list_len,
 +                          sizeof(struct i40e_aqc_remove_macvlan_element_data),
 +                          GFP_KERNEL);
 +              if (!del_list) {
 +                      i40e_cleanup_add_list(&tmp_add_list);
 +
 +                      /* Undo VSI's MAC filter entry element updates */
 +                      spin_lock_bh(&vsi->mac_filter_list_lock);
 +                      i40e_undo_del_filter_entries(vsi, &tmp_del_list);
 +                      i40e_undo_add_filter_entries(vsi);
 +                      spin_unlock_bh(&vsi->mac_filter_list_lock);
 +                      return -ENOMEM;
 +              }
 +
 +              list_for_each_entry_safe(f, ftmp, &tmp_del_list, list) {
                        cmd_flags = 0;
  
                        /* add to delete list */
                        del_list[num_del].flags = cmd_flags;
                        num_del++;
  
 -                      /* unlink from filter list */
 -                      list_del(&f->list);
 -                      kfree(f);
 -
                        /* flush a full buffer */
                        if (num_del == filter_list_len) {
                                ret = i40e_aq_remove_macvlan(&pf->hw,
                                memset(del_list, 0, sizeof(*del_list));
  
                                if (ret && aq_err != I40E_AQ_RC_ENOENT)
 -                                      dev_info(&pf->pdev->dev,
 -                                               "ignoring delete macvlan error, err %s, aq_err %s while flushing a full buffer\n",
 -                                               i40e_stat_str(&pf->hw, ret),
 -                                               i40e_aq_str(&pf->hw, aq_err));
 +                                      dev_err(&pf->pdev->dev,
 +                                              "ignoring delete macvlan error, err %s, aq_err %s while flushing a full buffer\n",
 +                                              i40e_stat_str(&pf->hw, ret),
 +                                              i40e_aq_str(&pf->hw, aq_err));
                        }
 +                      /* Release memory for MAC filter entries which were
 +                       * synced up with HW.
 +                       */
 +                      list_del(&f->list);
 +                      kfree(f);
                }
 +
                if (num_del) {
                        ret = i40e_aq_remove_macvlan(&pf->hw, vsi->seid,
                                                     del_list, num_del, NULL);
  
                kfree(del_list);
                del_list = NULL;
 +      }
 +
 +      if (!list_empty(&tmp_add_list)) {
  
                /* do all the adds now */
                filter_list_len = pf->hw.aq.asq_buf_size /
                add_list = kcalloc(filter_list_len,
                               sizeof(struct i40e_aqc_add_macvlan_element_data),
                               GFP_KERNEL);
 -              if (!add_list)
 +              if (!add_list) {
 +                      /* Purge element from temporary lists */
 +                      i40e_cleanup_add_list(&tmp_add_list);
 +
 +                      /* Undo add filter entries from VSI MAC filter list */
 +                      spin_lock_bh(&vsi->mac_filter_list_lock);
 +                      i40e_undo_add_filter_entries(vsi);
 +                      spin_unlock_bh(&vsi->mac_filter_list_lock);
                        return -ENOMEM;
 +              }
  
 -              list_for_each_entry_safe(f, ftmp, &vsi->mac_filter_list, list) {
 -                      if (!f->changed)
 -                              continue;
 +              list_for_each_entry_safe(f, ftmp, &tmp_add_list, list) {
  
 -                      if (f->counter == 0)
 -                              continue;
 -                      f->changed = false;
                        add_happened = true;
                        cmd_flags = 0;
  
                                        break;
                                memset(add_list, 0, sizeof(*add_list));
                        }
 +                      /* Entries from tmp_add_list were cloned from MAC
 +                       * filter list, hence clean those cloned entries
 +                       */
 +                      list_del(&f->list);
 +                      kfree(f);
                }
 +
                if (num_add) {
                        ret = i40e_aq_add_macvlan(&pf->hw, vsi->seid,
                                                  add_list, num_add, NULL);
        /* check for changes in promiscuous modes */
        if (changed_flags & IFF_ALLMULTI) {
                bool cur_multipromisc;
 +
                cur_multipromisc = !!(vsi->current_netdev_flags & IFF_ALLMULTI);
                ret = i40e_aq_set_vsi_multicast_promiscuous(&vsi->back->hw,
                                                            vsi->seid,
        }
        if ((changed_flags & IFF_PROMISC) || promisc_forced_on) {
                bool cur_promisc;
 +
                cur_promisc = (!!(vsi->current_netdev_flags & IFF_PROMISC) ||
                               test_bit(__I40E_FILTER_OVERFLOW_PROMISC,
                                        &vsi->state));
                         */
                        if (pf->cur_promisc != cur_promisc) {
                                pf->cur_promisc = cur_promisc;
 -                              i40e_do_reset_safe(pf,
 +                              if (grab_rtnl)
 +                                      i40e_do_reset_safe(pf,
 +                                              BIT(__I40E_PF_RESET_REQUESTED));
 +                              else
 +                                      i40e_do_reset(pf,
                                                BIT(__I40E_PF_RESET_REQUESTED));
                        }
                } else {
@@@ -2167,7 -1996,7 +2167,7 @@@ static void i40e_sync_filters_subtask(s
        for (v = 0; v < pf->num_alloc_vsi; v++) {
                if (pf->vsi[v] &&
                    (pf->vsi[v]->flags & I40E_VSI_FLAG_FILTER_CHANGED))
 -                      i40e_sync_vsi_filters(pf->vsi[v]);
 +                      i40e_sync_vsi_filters(pf->vsi[v], true);
        }
  }
  
@@@ -2308,9 -2137,6 +2308,9 @@@ int i40e_vsi_add_vlan(struct i40e_vsi *
        is_vf = (vsi->type == I40E_VSI_SRIOV);
        is_netdev = !!(vsi->netdev);
  
 +      /* Locked once because all functions invoked below iterates list*/
 +      spin_lock_bh(&vsi->mac_filter_list_lock);
 +
        if (is_netdev) {
                add_f = i40e_add_filter(vsi, vsi->netdev->dev_addr, vid,
                                        is_vf, is_netdev);
                        dev_info(&vsi->back->pdev->dev,
                                 "Could not add vlan filter %d for %pM\n",
                                 vid, vsi->netdev->dev_addr);
 +                      spin_unlock_bh(&vsi->mac_filter_list_lock);
                        return -ENOMEM;
                }
        }
                        dev_info(&vsi->back->pdev->dev,
                                 "Could not add vlan filter %d for %pM\n",
                                 vid, f->macaddr);
 +                      spin_unlock_bh(&vsi->mac_filter_list_lock);
                        return -ENOMEM;
                }
        }
                                dev_info(&vsi->back->pdev->dev,
                                         "Could not add filter 0 for %pM\n",
                                         vsi->netdev->dev_addr);
 +                              spin_unlock_bh(&vsi->mac_filter_list_lock);
                                return -ENOMEM;
                        }
                }
        /* Do not assume that I40E_VLAN_ANY should be reset to VLAN 0 */
        if (vid > 0 && !vsi->info.pvid) {
                list_for_each_entry(f, &vsi->mac_filter_list, list) {
 -                      if (i40e_find_filter(vsi, f->macaddr, I40E_VLAN_ANY,
 -                                           is_vf, is_netdev)) {
 -                              i40e_del_filter(vsi, f->macaddr, I40E_VLAN_ANY,
 -                                              is_vf, is_netdev);
 -                              add_f = i40e_add_filter(vsi, f->macaddr,
 -                                                      0, is_vf, is_netdev);
 -                              if (!add_f) {
 -                                      dev_info(&vsi->back->pdev->dev,
 -                                               "Could not add filter 0 for %pM\n",
 -                                               f->macaddr);
 -                                      return -ENOMEM;
 -                              }
 +                      if (!i40e_find_filter(vsi, f->macaddr, I40E_VLAN_ANY,
 +                                            is_vf, is_netdev))
 +                              continue;
 +                      i40e_del_filter(vsi, f->macaddr, I40E_VLAN_ANY,
 +                                      is_vf, is_netdev);
 +                      add_f = i40e_add_filter(vsi, f->macaddr,
 +                                              0, is_vf, is_netdev);
 +                      if (!add_f) {
 +                              dev_info(&vsi->back->pdev->dev,
 +                                       "Could not add filter 0 for %pM\n",
 +                                      f->macaddr);
 +                              spin_unlock_bh(&vsi->mac_filter_list_lock);
 +                              return -ENOMEM;
                        }
                }
        }
  
 +      /* Make sure to release before sync_vsi_filter because that
 +       * function will lock/unlock as necessary
 +       */
 +      spin_unlock_bh(&vsi->mac_filter_list_lock);
 +
        if (test_bit(__I40E_DOWN, &vsi->back->state) ||
            test_bit(__I40E_RESET_RECOVERY_PENDING, &vsi->back->state))
                return 0;
  
 -      return i40e_sync_vsi_filters(vsi);
 +      return i40e_sync_vsi_filters(vsi, false);
  }
  
  /**
@@@ -2406,9 -2223,6 +2406,9 @@@ int i40e_vsi_kill_vlan(struct i40e_vsi 
        is_vf = (vsi->type == I40E_VSI_SRIOV);
        is_netdev = !!(netdev);
  
 +      /* Locked once because all functions invoked below iterates list */
 +      spin_lock_bh(&vsi->mac_filter_list_lock);
 +
        if (is_netdev)
                i40e_del_filter(vsi, netdev->dev_addr, vid, is_vf, is_netdev);
  
                        dev_info(&vsi->back->pdev->dev,
                                 "Could not add filter %d for %pM\n",
                                 I40E_VLAN_ANY, netdev->dev_addr);
 +                      spin_unlock_bh(&vsi->mac_filter_list_lock);
                        return -ENOMEM;
                }
        }
                list_for_each_entry(f, &vsi->mac_filter_list, list) {
                        i40e_del_filter(vsi, f->macaddr, 0, is_vf, is_netdev);
                        add_f = i40e_add_filter(vsi, f->macaddr, I40E_VLAN_ANY,
 -                                          is_vf, is_netdev);
 +                                              is_vf, is_netdev);
                        if (!add_f) {
                                dev_info(&vsi->back->pdev->dev,
                                         "Could not add filter %d for %pM\n",
                                         I40E_VLAN_ANY, f->macaddr);
 +                              spin_unlock_bh(&vsi->mac_filter_list_lock);
                                return -ENOMEM;
                        }
                }
        }
  
 +      /* Make sure to release before sync_vsi_filter because that
 +       * function with lock/unlock as necessary
 +       */
 +      spin_unlock_bh(&vsi->mac_filter_list_lock);
 +
        if (test_bit(__I40E_DOWN, &vsi->back->state) ||
            test_bit(__I40E_RESET_RECOVERY_PENDING, &vsi->back->state))
                return 0;
  
 -      return i40e_sync_vsi_filters(vsi);
 +      return i40e_sync_vsi_filters(vsi, false);
  }
  
  /**
@@@ -2802,6 -2609,8 +2802,6 @@@ static int i40e_configure_tx_ring(struc
        wr32(hw, I40E_QTX_CTL(pf_q), qtx_ctl);
        i40e_flush(hw);
  
 -      clear_bit(__I40E_HANG_CHECK_ARMED, &ring->state);
 -
        /* cache tail off for easier writes later */
        ring->tail = hw->hw_addr + I40E_QTX_TAIL(pf_q);
  
@@@ -3073,9 -2882,11 +3073,9 @@@ static int i40e_vsi_configure(struct i4
  static void i40e_vsi_configure_msix(struct i40e_vsi *vsi)
  {
        struct i40e_pf *pf = vsi->back;
 -      struct i40e_q_vector *q_vector;
        struct i40e_hw *hw = &pf->hw;
        u16 vector;
        int i, q;
 -      u32 val;
        u32 qp;
  
        /* The interrupt indexing is offset by 1 in the PFINT_ITRn
        qp = vsi->base_queue;
        vector = vsi->base_vector;
        for (i = 0; i < vsi->num_q_vectors; i++, vector++) {
 -              q_vector = vsi->q_vectors[i];
 +              struct i40e_q_vector *q_vector = vsi->q_vectors[i];
 +
 +              q_vector->itr_countdown = ITR_COUNTDOWN_START;
                q_vector->rx.itr = ITR_TO_REG(vsi->rx_itr_setting);
                q_vector->rx.latency_range = I40E_LOW_LATENCY;
                wr32(hw, I40E_PFINT_ITRN(I40E_RX_ITR, vector - 1),
                q_vector->tx.latency_range = I40E_LOW_LATENCY;
                wr32(hw, I40E_PFINT_ITRN(I40E_TX_ITR, vector - 1),
                     q_vector->tx.itr);
 +              wr32(hw, I40E_PFINT_RATEN(vector - 1),
 +                   INTRL_USEC_TO_REG(vsi->int_rate_limit));
  
                /* Linked list for the queuepairs assigned to this vector */
                wr32(hw, I40E_PFINT_LNKLSTN(vector - 1), qp);
                for (q = 0; q < q_vector->num_ringpairs; q++) {
 +                      u32 val;
 +
                        val = I40E_QINT_RQCTL_CAUSE_ENA_MASK |
                              (I40E_RX_ITR << I40E_QINT_RQCTL_ITR_INDX_SHIFT)  |
                              (vector      << I40E_QINT_RQCTL_MSIX_INDX_SHIFT) |
@@@ -3183,7 -2988,6 +3183,7 @@@ static void i40e_configure_msi_and_lega
        u32 val;
  
        /* set the ITR configuration */
 +      q_vector->itr_countdown = ITR_COUNTDOWN_START;
        q_vector->rx.itr = ITR_TO_REG(vsi->rx_itr_setting);
        q_vector->rx.latency_range = I40E_LOW_LATENCY;
        wr32(hw, I40E_PFINT_ITR0(I40E_RX_ITR), q_vector->rx.itr);
@@@ -3241,6 -3045,24 +3241,6 @@@ void i40e_irq_dynamic_enable_icr0(struc
        i40e_flush(hw);
  }
  
 -/**
 - * i40e_irq_dynamic_enable - Enable default interrupt generation settings
 - * @vsi: pointer to a vsi
 - * @vector: enable a particular Hw Interrupt vector
 - **/
 -void i40e_irq_dynamic_enable(struct i40e_vsi *vsi, int vector)
 -{
 -      struct i40e_pf *pf = vsi->back;
 -      struct i40e_hw *hw = &pf->hw;
 -      u32 val;
 -
 -      val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
 -            I40E_PFINT_DYN_CTLN_CLEARPBA_MASK |
 -            (I40E_ITR_NONE << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT);
 -      wr32(hw, I40E_PFINT_DYN_CTLN(vector - 1), val);
 -      /* skip the flush */
 -}
 -
  /**
   * i40e_irq_dynamic_disable - Disable default interrupt generation settings
   * @vsi: pointer to a vsi
@@@ -3269,7 -3091,7 +3269,7 @@@ static irqreturn_t i40e_msix_clean_ring
        if (!q_vector->tx.ring && !q_vector->rx.ring)
                return IRQ_HANDLED;
  
 -      napi_schedule(&q_vector->napi);
 +      napi_schedule_irqoff(&q_vector->napi);
  
        return IRQ_HANDLED;
  }
@@@ -3314,7 -3136,8 +3314,7 @@@ static int i40e_vsi_request_irq_msix(st
                                  q_vector);
                if (err) {
                        dev_info(&pf->pdev->dev,
 -                               "%s: request_irq failed, error: %d\n",
 -                               __func__, err);
 +                               "MSIX request_irq failed, error: %d\n", err);
                        goto free_queue_irqs;
                }
                /* assign the mask for this irq */
@@@ -3379,7 -3202,8 +3379,7 @@@ static int i40e_vsi_enable_irq(struct i
        int i;
  
        if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
 -              for (i = vsi->base_vector;
 -                   i < (vsi->num_q_vectors + vsi->base_vector); i++)
 +              for (i = 0; i < vsi->num_q_vectors; i++)
                        i40e_irq_dynamic_enable(vsi, i);
        } else {
                i40e_irq_dynamic_enable_icr0(pf);
@@@ -3438,12 -3262,9 +3438,12 @@@ static irqreturn_t i40e_intr(int irq, v
  
        /* only q0 is used in MSI/Legacy mode, and none are used in MSIX */
        if (icr0 & I40E_PFINT_ICR0_QUEUE_0_MASK) {
 +              struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
 +              struct i40e_q_vector *q_vector = vsi->q_vectors[0];
  
                /* temporarily disable queue cause for NAPI processing */
                u32 qval = rd32(hw, I40E_QINT_RQCTL(0));
 +
                qval &= ~I40E_QINT_RQCTL_CAUSE_ENA_MASK;
                wr32(hw, I40E_QINT_RQCTL(0), qval);
  
                wr32(hw, I40E_QINT_TQCTL(0), qval);
  
                if (!test_bit(__I40E_DOWN, &pf->state))
 -                      napi_schedule(&pf->vsi[pf->lan_vsi]->q_vectors[0]->napi);
 +                      napi_schedule_irqoff(&q_vector->napi);
        }
  
        if (icr0 & I40E_PFINT_ICR0_ADMINQ_MASK) {
@@@ -3613,9 -3434,10 +3613,9 @@@ static bool i40e_clean_fdir_tx_irq(stru
        i += tx_ring->count;
        tx_ring->next_to_clean = i;
  
 -      if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) {
 -              i40e_irq_dynamic_enable(vsi,
 -                              tx_ring->q_vector->v_idx + vsi->base_vector);
 -      }
 +      if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED)
 +              i40e_irq_dynamic_enable(vsi, tx_ring->q_vector->v_idx);
 +
        return budget > 0;
  }
  
@@@ -3753,12 -3575,14 +3753,12 @@@ static void i40e_netpoll(struct net_dev
        if (test_bit(__I40E_DOWN, &vsi->state))
                return;
  
 -      pf->flags |= I40E_FLAG_IN_NETPOLL;
        if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
                for (i = 0; i < vsi->num_q_vectors; i++)
                        i40e_msix_clean_rings(0, vsi->q_vectors[i]);
        } else {
                i40e_intr(pf->pdev->irq, netdev);
        }
 -      pf->flags &= ~I40E_FLAG_IN_NETPOLL;
  }
  #endif
  
@@@ -3839,8 -3663,9 +3839,8 @@@ static int i40e_vsi_control_tx(struct i
                ret = i40e_pf_txq_wait(pf, pf_q, enable);
                if (ret) {
                        dev_info(&pf->pdev->dev,
 -                               "%s: VSI seid %d Tx ring %d %sable timeout\n",
 -                               __func__, vsi->seid, pf_q,
 -                               (enable ? "en" : "dis"));
 +                               "VSI seid %d Tx ring %d %sable timeout\n",
 +                               vsi->seid, pf_q, (enable ? "en" : "dis"));
                        break;
                }
        }
@@@ -3916,8 -3741,9 +3916,8 @@@ static int i40e_vsi_control_rx(struct i
                ret = i40e_pf_rxq_wait(pf, pf_q, enable);
                if (ret) {
                        dev_info(&pf->pdev->dev,
 -                               "%s: VSI seid %d Rx ring %d %sable timeout\n",
 -                               __func__, vsi->seid, pf_q,
 -                               (enable ? "en" : "dis"));
 +                               "VSI seid %d Rx ring %d %sable timeout\n",
 +                               vsi->seid, pf_q, (enable ? "en" : "dis"));
                        break;
                }
        }
@@@ -4212,15 -4038,17 +4212,15 @@@ static void i40e_quiesce_vsi(struct i40
        if ((test_bit(__I40E_PORT_TX_SUSPENDED, &vsi->back->state)) &&
            vsi->type == I40E_VSI_FCOE) {
                dev_dbg(&vsi->back->pdev->dev,
 -                      "%s: VSI seid %d skipping FCoE VSI disable\n",
 -                       __func__, vsi->seid);
 +                       "VSI seid %d skipping FCoE VSI disable\n", vsi->seid);
                return;
        }
  
        set_bit(__I40E_NEEDS_RESTART, &vsi->state);
 -      if (vsi->netdev && netif_running(vsi->netdev)) {
 +      if (vsi->netdev && netif_running(vsi->netdev))
                vsi->netdev->netdev_ops->ndo_stop(vsi->netdev);
 -      } else {
 +      else
                i40e_vsi_close(vsi);
 -      }
  }
  
  /**
@@@ -4285,8 -4113,8 +4285,8 @@@ static int i40e_vsi_wait_txq_disabled(s
                ret = i40e_pf_txq_wait(pf, pf_q, false);
                if (ret) {
                        dev_info(&pf->pdev->dev,
 -                               "%s: VSI seid %d Tx ring %d disable timeout\n",
 -                               __func__, vsi->seid, pf_q);
 +                               "VSI seid %d Tx ring %d disable timeout\n",
 +                               vsi->seid, pf_q);
                        return ret;
                }
        }
@@@ -4318,108 -4146,6 +4318,108 @@@ static int i40e_pf_wait_txq_disabled(st
  }
  
  #endif
 +
 +/**
 + * i40e_detect_recover_hung_queue - Function to detect and recover hung_queue
 + * @q_idx: TX queue number
 + * @vsi: Pointer to VSI struct
 + *
 + * This function checks specified queue for given VSI. Detects hung condition.
 + * Sets hung bit since it is two step process. Before next run of service task
 + * if napi_poll runs, it reset 'hung' bit for respective q_vector. If not,
 + * hung condition remain unchanged and during subsequent run, this function
 + * issues SW interrupt to recover from hung condition.
 + **/
 +static void i40e_detect_recover_hung_queue(int q_idx, struct i40e_vsi *vsi)
 +{
 +      struct i40e_ring *tx_ring = NULL;
 +      struct i40e_pf  *pf;
 +      u32 head, val, tx_pending;
 +      int i;
 +
 +      pf = vsi->back;
 +
 +      /* now that we have an index, find the tx_ring struct */
 +      for (i = 0; i < vsi->num_queue_pairs; i++) {
 +              if (vsi->tx_rings[i] && vsi->tx_rings[i]->desc) {
 +                      if (q_idx == vsi->tx_rings[i]->queue_index) {
 +                              tx_ring = vsi->tx_rings[i];
 +                              break;
 +                      }
 +              }
 +      }
 +
 +      if (!tx_ring)
 +              return;
 +
 +      /* Read interrupt register */
 +      if (pf->flags & I40E_FLAG_MSIX_ENABLED)
 +              val = rd32(&pf->hw,
 +                         I40E_PFINT_DYN_CTLN(tx_ring->q_vector->v_idx +
 +                                             tx_ring->vsi->base_vector - 1));
 +      else
 +              val = rd32(&pf->hw, I40E_PFINT_DYN_CTL0);
 +
 +      head = i40e_get_head(tx_ring);
 +
 +      tx_pending = i40e_get_tx_pending(tx_ring);
 +
 +      /* Interrupts are disabled and TX pending is non-zero,
 +       * trigger the SW interrupt (don't wait). Worst case
 +       * there will be one extra interrupt which may result
 +       * into not cleaning any queues because queues are cleaned.
 +       */
 +      if (tx_pending && (!(val & I40E_PFINT_DYN_CTLN_INTENA_MASK)))
 +              i40e_force_wb(vsi, tx_ring->q_vector);
 +}
 +
 +/**
 + * i40e_detect_recover_hung - Function to detect and recover hung_queues
 + * @pf:  pointer to PF struct
 + *
 + * LAN VSI has netdev and netdev has TX queues. This function is to check
 + * each of those TX queues if they are hung, trigger recovery by issuing
 + * SW interrupt.
 + **/
 +static void i40e_detect_recover_hung(struct i40e_pf *pf)
 +{
 +      struct net_device *netdev;
 +      struct i40e_vsi *vsi;
 +      int i;
 +
 +      /* Only for LAN VSI */
 +      vsi = pf->vsi[pf->lan_vsi];
 +
 +      if (!vsi)
 +              return;
 +
 +      /* Make sure, VSI state is not DOWN/RECOVERY_PENDING */
 +      if (test_bit(__I40E_DOWN, &vsi->back->state) ||
 +          test_bit(__I40E_RESET_RECOVERY_PENDING, &vsi->back->state))
 +              return;
 +
 +      /* Make sure type is MAIN VSI */
 +      if (vsi->type != I40E_VSI_MAIN)
 +              return;
 +
 +      netdev = vsi->netdev;
 +      if (!netdev)
 +              return;
 +
 +      /* Bail out if netif_carrier is not OK */
 +      if (!netif_carrier_ok(netdev))
 +              return;
 +
 +      /* Go thru' TX queues for netdev */
 +      for (i = 0; i < netdev->num_tx_queues; i++) {
 +              struct netdev_queue *q;
 +
 +              q = netdev_get_tx_queue(netdev, i);
 +              if (q)
 +                      i40e_detect_recover_hung_queue(i, vsi);
 +      }
 +}
 +
  /**
   * i40e_get_iscsi_tc_map - Return TC map for iSCSI APP
   * @pf: pointer to PF
   * i40e_print_link_message - print link up or down
   * @vsi: the VSI for which link needs a message
   */
 -static void i40e_print_link_message(struct i40e_vsi *vsi, bool isup)
 +void i40e_print_link_message(struct i40e_vsi *vsi, bool isup)
  {
 -      char speed[SPEED_SIZE] = "Unknown";
 -      char fc[FC_SIZE] = "RX/TX";
 +      char *speed = "Unknown";
 +      char *fc = "Unknown";
  
 +      if (vsi->current_isup == isup)
 +              return;
 +      vsi->current_isup = isup;
        if (!isup) {
                netdev_info(vsi->netdev, "NIC Link is Down\n");
                return;
  
        switch (vsi->back->hw.phy.link_info.link_speed) {
        case I40E_LINK_SPEED_40GB:
 -              strlcpy(speed, "40 Gbps", SPEED_SIZE);
 +              speed = "40 G";
                break;
        case I40E_LINK_SPEED_20GB:
 -              strncpy(speed, "20 Gbps", SPEED_SIZE);
 +              speed = "20 G";
                break;
        case I40E_LINK_SPEED_10GB:
 -              strlcpy(speed, "10 Gbps", SPEED_SIZE);
 +              speed = "10 G";
                break;
        case I40E_LINK_SPEED_1GB:
 -              strlcpy(speed, "1000 Mbps", SPEED_SIZE);
 +              speed = "1000 M";
                break;
        case I40E_LINK_SPEED_100MB:
 -              strncpy(speed, "100 Mbps", SPEED_SIZE);
 +              speed = "100 M";
                break;
        default:
                break;
  
        switch (vsi->back->hw.fc.current_mode) {
        case I40E_FC_FULL:
 -              strlcpy(fc, "RX/TX", FC_SIZE);
 +              fc = "RX/TX";
                break;
        case I40E_FC_TX_PAUSE:
 -              strlcpy(fc, "TX", FC_SIZE);
 +              fc = "TX";
                break;
        case I40E_FC_RX_PAUSE:
 -              strlcpy(fc, "RX", FC_SIZE);
 +              fc = "RX";
                break;
        default:
 -              strlcpy(fc, "None", FC_SIZE);
 +              fc = "None";
                break;
        }
  
 -      netdev_info(vsi->netdev, "NIC Link is Up %s Full Duplex, Flow Control: %s\n",
 +      netdev_info(vsi->netdev, "NIC Link is Up %sbps Full Duplex, Flow Control: %s\n",
                    speed, fc);
  }
  
@@@ -5495,13 -5218,15 +5495,13 @@@ void i40e_do_reset(struct i40e_pf *pf, 
                         "VSI reinit requested\n");
                for (v = 0; v < pf->num_alloc_vsi; v++) {
                        struct i40e_vsi *vsi = pf->vsi[v];
 +
                        if (vsi != NULL &&
                            test_bit(__I40E_REINIT_REQUESTED, &vsi->state)) {
                                i40e_vsi_reinit_locked(pf->vsi[v]);
                                clear_bit(__I40E_REINIT_REQUESTED, &vsi->state);
                        }
                }
 -
 -              /* no further action needed, so return now */
 -              return;
        } else if (reset_flags & BIT_ULL(__I40E_DOWN_REQUESTED)) {
                int v;
  
                dev_info(&pf->pdev->dev, "VSI down requested\n");
                for (v = 0; v < pf->num_alloc_vsi; v++) {
                        struct i40e_vsi *vsi = pf->vsi[v];
 +
                        if (vsi != NULL &&
                            test_bit(__I40E_DOWN_REQUESTED, &vsi->state)) {
                                set_bit(__I40E_DOWN, &vsi->state);
                                clear_bit(__I40E_DOWN_REQUESTED, &vsi->state);
                        }
                }
 -
 -              /* no further action needed, so return now */
 -              return;
        } else {
                dev_info(&pf->pdev->dev,
                         "bad reset request 0x%08x\n", reset_flags);
 -              return;
        }
  }
  
@@@ -5575,7 -5303,8 +5575,7 @@@ bool i40e_dcb_need_reconfig(struct i40e
                dev_dbg(&pf->pdev->dev, "APP Table change detected.\n");
        }
  
 -      dev_dbg(&pf->pdev->dev, "%s: need_reconfig=%d\n", __func__,
 -              need_reconfig);
 +      dev_dbg(&pf->pdev->dev, "dcb need_reconfig=%d\n", need_reconfig);
        return need_reconfig;
  }
  
@@@ -5602,14 -5331,16 +5602,14 @@@ static int i40e_handle_lldp_event(struc
        /* Ignore if event is not for Nearest Bridge */
        type = ((mib->type >> I40E_AQ_LLDP_BRIDGE_TYPE_SHIFT)
                & I40E_AQ_LLDP_BRIDGE_TYPE_MASK);
 -      dev_dbg(&pf->pdev->dev,
 -              "%s: LLDP event mib bridge type 0x%x\n", __func__, type);
 +      dev_dbg(&pf->pdev->dev, "LLDP event mib bridge type 0x%x\n", type);
        if (type != I40E_AQ_LLDP_BRIDGE_TYPE_NEAREST_BRIDGE)
                return ret;
  
        /* Check MIB Type and return if event for Remote MIB update */
        type = mib->type & I40E_AQ_LLDP_MIB_TYPE_MASK;
        dev_dbg(&pf->pdev->dev,
 -              "%s: LLDP event mib type %s\n", __func__,
 -              type ? "remote" : "local");
 +              "LLDP event mib type %s\n", type ? "remote" : "local");
        if (type == I40E_AQ_LLDP_MIB_REMOTE) {
                /* Update the remote cached instance and return */
                ret = i40e_aq_get_dcb_config(hw, I40E_AQ_LLDP_MIB_REMOTE,
@@@ -5794,9 -5525,7 +5794,9 @@@ u32 i40e_get_global_fd_count(struct i40
   **/
  void i40e_fdir_check_and_reenable(struct i40e_pf *pf)
  {
 +      struct i40e_fdir_filter *filter;
        u32 fcnt_prog, fcnt_avail;
 +      struct hlist_node *node;
  
        if (test_bit(__I40E_FD_FLUSH_REQUESTED, &pf->state))
                return;
                                dev_info(&pf->pdev->dev, "ATR is being enabled since we have space in the table now\n");
                }
        }
 +
 +      /* if hw had a problem adding a filter, delete it */
 +      if (pf->fd_inv > 0) {
 +              hlist_for_each_entry_safe(filter, node,
 +                                        &pf->fdir_filter_list, fdir_node) {
 +                      if (filter->fd_id == pf->fd_inv) {
 +                              hlist_del(&filter->fdir_node);
 +                              kfree(filter);
 +                              pf->fdir_pf_active_filters--;
 +                      }
 +              }
 +      }
  }
  
  #define I40E_MIN_FD_FLUSH_INTERVAL 10
@@@ -5856,51 -5573,49 +5856,51 @@@ static void i40e_fdir_flush_and_replay(
        if (!(pf->flags & (I40E_FLAG_FD_SB_ENABLED | I40E_FLAG_FD_ATR_ENABLED)))
                return;
  
 -      if (time_after(jiffies, pf->fd_flush_timestamp +
 -                              (I40E_MIN_FD_FLUSH_INTERVAL * HZ))) {
 -              /* If the flush is happening too quick and we have mostly
 -               * SB rules we should not re-enable ATR for some time.
 -               */
 -              min_flush_time = pf->fd_flush_timestamp
 -                              + (I40E_MIN_FD_FLUSH_SB_ATR_UNSTABLE * HZ);
 -              fd_room = pf->fdir_pf_filter_count - pf->fdir_pf_active_filters;
 +      if (!time_after(jiffies, pf->fd_flush_timestamp +
 +                               (I40E_MIN_FD_FLUSH_INTERVAL * HZ)))
 +              return;
  
 -              if (!(time_after(jiffies, min_flush_time)) &&
 -                  (fd_room < I40E_FDIR_BUFFER_HEAD_ROOM_FOR_ATR)) {
 -                      if (I40E_DEBUG_FD & pf->hw.debug_mask)
 -                              dev_info(&pf->pdev->dev, "ATR disabled, not enough FD filter space.\n");
 -                      disable_atr = true;
 -              }
 +      /* If the flush is happening too quick and we have mostly SB rules we
 +       * should not re-enable ATR for some time.
 +       */
 +      min_flush_time = pf->fd_flush_timestamp +
 +                       (I40E_MIN_FD_FLUSH_SB_ATR_UNSTABLE * HZ);
 +      fd_room = pf->fdir_pf_filter_count - pf->fdir_pf_active_filters;
  
 -              pf->fd_flush_timestamp = jiffies;
 -              pf->flags &= ~I40E_FLAG_FD_ATR_ENABLED;
 -              /* flush all filters */
 -              wr32(&pf->hw, I40E_PFQF_CTL_1,
 -                   I40E_PFQF_CTL_1_CLEARFDTABLE_MASK);
 -              i40e_flush(&pf->hw);
 -              pf->fd_flush_cnt++;
 -              pf->fd_add_err = 0;
 -              do {
 -                      /* Check FD flush status every 5-6msec */
 -                      usleep_range(5000, 6000);
 -                      reg = rd32(&pf->hw, I40E_PFQF_CTL_1);
 -                      if (!(reg & I40E_PFQF_CTL_1_CLEARFDTABLE_MASK))
 -                              break;
 -              } while (flush_wait_retry--);
 -              if (reg & I40E_PFQF_CTL_1_CLEARFDTABLE_MASK) {
 -                      dev_warn(&pf->pdev->dev, "FD table did not flush, needs more time\n");
 -              } else {
 -                      /* replay sideband filters */
 -                      i40e_fdir_filter_restore(pf->vsi[pf->lan_vsi]);
 -                      if (!disable_atr)
 -                              pf->flags |= I40E_FLAG_FD_ATR_ENABLED;
 -                      clear_bit(__I40E_FD_FLUSH_REQUESTED, &pf->state);
 -                      if (I40E_DEBUG_FD & pf->hw.debug_mask)
 -                              dev_info(&pf->pdev->dev, "FD Filter table flushed and FD-SB replayed.\n");
 -              }
 +      if (!(time_after(jiffies, min_flush_time)) &&
 +          (fd_room < I40E_FDIR_BUFFER_HEAD_ROOM_FOR_ATR)) {
 +              if (I40E_DEBUG_FD & pf->hw.debug_mask)
 +                      dev_info(&pf->pdev->dev, "ATR disabled, not enough FD filter space.\n");
 +              disable_atr = true;
 +      }
 +
 +      pf->fd_flush_timestamp = jiffies;
 +      pf->flags &= ~I40E_FLAG_FD_ATR_ENABLED;
 +      /* flush all filters */
 +      wr32(&pf->hw, I40E_PFQF_CTL_1,
 +           I40E_PFQF_CTL_1_CLEARFDTABLE_MASK);
 +      i40e_flush(&pf->hw);
 +      pf->fd_flush_cnt++;
 +      pf->fd_add_err = 0;
 +      do {
 +              /* Check FD flush status every 5-6msec */
 +              usleep_range(5000, 6000);
 +              reg = rd32(&pf->hw, I40E_PFQF_CTL_1);
 +              if (!(reg & I40E_PFQF_CTL_1_CLEARFDTABLE_MASK))
 +                      break;
 +      } while (flush_wait_retry--);
 +      if (reg & I40E_PFQF_CTL_1_CLEARFDTABLE_MASK) {
 +              dev_warn(&pf->pdev->dev, "FD table did not flush, needs more time\n");
 +      } else {
 +              /* replay sideband filters */
 +              i40e_fdir_filter_restore(pf->vsi[pf->lan_vsi]);
 +              if (!disable_atr)
 +                      pf->flags |= I40E_FLAG_FD_ATR_ENABLED;
 +              clear_bit(__I40E_FD_FLUSH_REQUESTED, &pf->state);
 +              if (I40E_DEBUG_FD & pf->hw.debug_mask)
 +                      dev_info(&pf->pdev->dev, "FD Filter table flushed and FD-SB replayed.\n");
        }
 +
  }
  
  /**
@@@ -6008,23 -5723,15 +6008,23 @@@ static void i40e_veb_link_event(struct 
   **/
  static void i40e_link_event(struct i40e_pf *pf)
  {
 -      bool new_link, old_link;
        struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
        u8 new_link_speed, old_link_speed;
 +      i40e_status status;
 +      bool new_link, old_link;
  
        /* set this to force the get_link_status call to refresh state */
        pf->hw.phy.get_link_info = true;
  
        old_link = (pf->hw.phy.link_info_old.link_info & I40E_AQ_LINK_UP);
 -      new_link = i40e_get_link_status(&pf->hw);
 +
 +      status = i40e_get_link_status(&pf->hw, &new_link);
 +      if (status) {
 +              dev_dbg(&pf->pdev->dev, "couldn't get link state, status: %d\n",
 +                      status);
 +              return;
 +      }
 +
        old_link_speed = pf->hw.phy.link_info_old.link_speed;
        new_link_speed = pf->hw.phy.link_info.link_speed;
  
                i40e_ptp_set_increment(pf);
  }
  
 -/**
 - * i40e_check_hang_subtask - Check for hung queues and dropped interrupts
 - * @pf: board private structure
 - *
 - * Set the per-queue flags to request a check for stuck queues in the irq
 - * clean functions, then force interrupts to be sure the irq clean is called.
 - **/
 -static void i40e_check_hang_subtask(struct i40e_pf *pf)
 -{
 -      int i, v;
 -
 -      /* If we're down or resetting, just bail */
 -      if (test_bit(__I40E_DOWN, &pf->state) ||
 -          test_bit(__I40E_CONFIG_BUSY, &pf->state))
 -              return;
 -
 -      /* for each VSI/netdev
 -       *     for each Tx queue
 -       *         set the check flag
 -       *     for each q_vector
 -       *         force an interrupt
 -       */
 -      for (v = 0; v < pf->num_alloc_vsi; v++) {
 -              struct i40e_vsi *vsi = pf->vsi[v];
 -              int armed = 0;
 -
 -              if (!pf->vsi[v] ||
 -                  test_bit(__I40E_DOWN, &vsi->state) ||
 -                  (vsi->netdev && !netif_carrier_ok(vsi->netdev)))
 -                      continue;
 -
 -              for (i = 0; i < vsi->num_queue_pairs; i++) {
 -                      set_check_for_tx_hang(vsi->tx_rings[i]);
 -                      if (test_bit(__I40E_HANG_CHECK_ARMED,
 -                                   &vsi->tx_rings[i]->state))
 -                              armed++;
 -              }
 -
 -              if (armed) {
 -                      if (!(pf->flags & I40E_FLAG_MSIX_ENABLED)) {
 -                              wr32(&vsi->back->hw, I40E_PFINT_DYN_CTL0,
 -                                   (I40E_PFINT_DYN_CTL0_INTENA_MASK |
 -                                    I40E_PFINT_DYN_CTL0_SWINT_TRIG_MASK |
 -                                    I40E_PFINT_DYN_CTL0_ITR_INDX_MASK |
 -                                    I40E_PFINT_DYN_CTL0_SW_ITR_INDX_ENA_MASK |
 -                                    I40E_PFINT_DYN_CTL0_SW_ITR_INDX_MASK));
 -                      } else {
 -                              u16 vec = vsi->base_vector - 1;
 -                              u32 val = (I40E_PFINT_DYN_CTLN_INTENA_MASK |
 -                                    I40E_PFINT_DYN_CTLN_SWINT_TRIG_MASK |
 -                                    I40E_PFINT_DYN_CTLN_ITR_INDX_MASK |
 -                                    I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_MASK |
 -                                    I40E_PFINT_DYN_CTLN_SW_ITR_INDX_MASK);
 -                              for (i = 0; i < vsi->num_q_vectors; i++, vec++)
 -                                      wr32(&vsi->back->hw,
 -                                           I40E_PFINT_DYN_CTLN(vec), val);
 -                      }
 -                      i40e_flush(&vsi->back->hw);
 -              }
 -      }
 -}
 -
  /**
   * i40e_watchdog_subtask - periodic checks not using event driven response
   * @pf: board private structure
@@@ -6071,8 -5840,8 +6071,8 @@@ static void i40e_watchdog_subtask(struc
                return;
        pf->service_timer_previous = jiffies;
  
 -      i40e_check_hang_subtask(pf);
 -      i40e_link_event(pf);
 +      if (pf->flags & I40E_FLAG_LINK_POLLING_ENABLED)
 +              i40e_link_event(pf);
  
        /* Update the stats for active netdevs so the network stack
         * can look at updated numbers whenever it cares to
                if (pf->vsi[i] && pf->vsi[i]->netdev)
                        i40e_update_stats(pf->vsi[i]);
  
 -      /* Update the stats for the active switching components */
 -      for (i = 0; i < I40E_MAX_VEB; i++)
 -              if (pf->veb[i])
 -                      i40e_update_veb_stats(pf->veb[i]);
 +      if (pf->flags & I40E_FLAG_VEB_STATS_ENABLED) {
 +              /* Update the stats for the active switching components */
 +              for (i = 0; i < I40E_MAX_VEB; i++)
 +                      if (pf->veb[i])
 +                              i40e_update_veb_stats(pf->veb[i]);
 +      }
  
        i40e_ptp_rx_hang(pf->vsi[pf->lan_vsi]);
  }
@@@ -6397,9 -6164,8 +6397,9 @@@ static void i40e_config_bridge_mode(str
  {
        struct i40e_pf *pf = veb->pf;
  
 -      dev_info(&pf->pdev->dev, "enabling bridge mode: %s\n",
 -               veb->bridge_mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
 +      if (pf->hw.debug_mask & I40E_DEBUG_LAN)
 +              dev_info(&pf->pdev->dev, "enabling bridge mode: %s\n",
 +                       veb->bridge_mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
        if (veb->bridge_mode & BRIDGE_MODE_VEPA)
                i40e_disable_pf_switch_lb(pf);
        else
@@@ -6466,7 -6232,6 +6466,7 @@@ static int i40e_reconstitute_veb(struc
  
                if (pf->vsi[v]->veb_idx == veb->idx) {
                        struct i40e_vsi *vsi = pf->vsi[v];
 +
                        vsi->uplink_seid = veb->seid;
                        ret = i40e_add_vsi(vsi);
                        if (ret) {
@@@ -6531,6 -6296,12 +6531,6 @@@ static int i40e_get_capabilities(struc
                }
        } while (err);
  
 -      if (((pf->hw.aq.fw_maj_ver == 2) && (pf->hw.aq.fw_min_ver < 22)) ||
 -          (pf->hw.aq.fw_maj_ver < 2)) {
 -              pf->hw.func_caps.num_msix_vectors++;
 -              pf->hw.func_caps.num_msix_vectors_vf++;
 -      }
 -
        if (pf->hw.debug_mask & I40E_DEBUG_USER)
                dev_info(&pf->pdev->dev,
                         "pf=%d, num_vfs=%d, msix_pf=%d, msix_vf=%d, fd_g=%d, fd_b=%d, pf_max_q=%d num_vsi=%d\n",
@@@ -6743,7 -6514,9 +6743,7 @@@ static void i40e_reset_and_rebuild(stru
        }
  #endif /* CONFIG_I40E_DCB */
  #ifdef I40E_FCOE
 -      ret = i40e_init_pf_fcoe(pf);
 -      if (ret)
 -              dev_info(&pf->pdev->dev, "init_pf_fcoe failed: %d\n", ret);
 +      i40e_init_pf_fcoe(pf);
  
  #endif
        /* do basic switch setup */
        /* make sure our flow control settings are restored */
        ret = i40e_set_fc(&pf->hw, &set_fc_aq_fail, true);
        if (ret)
 -              dev_info(&pf->pdev->dev, "set fc fail, err %s aq_err %s\n",
 -                       i40e_stat_str(&pf->hw, ret),
 -                       i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
 +              dev_dbg(&pf->pdev->dev, "setting flow control: ret = %s last_status = %s\n",
 +                      i40e_stat_str(&pf->hw, ret),
 +                      i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
  
        /* Rebuild the VSIs and VEBs that existed before reset.
         * They are still in our local switch element arrays, so only
        if (pf->flags & I40E_FLAG_MSIX_ENABLED)
                ret = i40e_setup_misc_vector(pf);
  
 +      /* Add a filter to drop all Flow control frames from any VSI from being
 +       * transmitted. By doing so we stop a malicious VF from sending out
 +       * PAUSE or PFC frames and potentially controlling traffic for other
 +       * PF/VF VSIs.
 +       * The FW can still send Flow control frames if enabled.
 +       */
 +      i40e_add_filter_to_drop_tx_flow_control_frames(&pf->hw,
 +                                                     pf->main_vsi_seid);
 +
        /* restart the VSIs that were rebuilt and running before the reset */
        i40e_pf_unquiesce_all_vsi(pf);
  
@@@ -7044,7 -6808,6 +7044,7 @@@ static void i40e_service_task(struct wo
                return;
        }
  
 +      i40e_detect_recover_hung(pf);
        i40e_reset_subtask(pf);
        i40e_handle_mdd_event(pf);
        i40e_vc_process_vflr_event(pf);
@@@ -7228,7 -6991,6 +7228,7 @@@ static int i40e_vsi_mem_alloc(struct i4
        vsi->idx = vsi_idx;
        vsi->rx_itr_setting = pf->rx_itr_default;
        vsi->tx_itr_setting = pf->tx_itr_default;
 +      vsi->int_rate_limit = 0;
        vsi->rss_table_size = (vsi->type == I40E_VSI_MAIN) ?
                                pf->rss_table_size : 64;
        vsi->netdev_registered = false;
        /* Setup default MSIX irq handler for VSI */
        i40e_vsi_setup_irqhandler(vsi, i40e_msix_clean_rings);
  
 +      /* Initialize VSI lock */
 +      spin_lock_init(&vsi->mac_filter_list_lock);
        pf->vsi[vsi_idx] = vsi;
        ret = vsi_idx;
        goto unlock_pf;
@@@ -7806,7 -7566,7 +7806,7 @@@ static int i40e_config_rss_aq(struct i4
                         "Cannot set RSS key, err %s aq_err %s\n",
                         i40e_stat_str(&pf->hw, ret),
                         i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
 -              return ret;
 +              goto config_rss_aq_out;
        }
  
        if (vsi->type == I40E_VSI_MAIN)
                         i40e_stat_str(&pf->hw, ret),
                         i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
  
 +config_rss_aq_out:
 +      kfree(rss_lut);
        return ret;
  }
  
@@@ -8096,7 -7854,6 +8096,7 @@@ static int i40e_sw_init(struct i40e_pf 
        /* Set default capability flags */
        pf->flags = I40E_FLAG_RX_CSUM_ENABLED |
                    I40E_FLAG_MSI_ENABLED     |
 +                  I40E_FLAG_LINK_POLLING_ENABLED |
                    I40E_FLAG_MSIX_ENABLED;
  
        if (iommu_present(&pci_bus_type))
            (pf->hw.func_caps.fd_filters_best_effort > 0)) {
                pf->flags |= I40E_FLAG_FD_ATR_ENABLED;
                pf->atr_sample_rate = I40E_DEFAULT_ATR_SAMPLE_RATE;
 -              if (!(pf->flags & I40E_FLAG_MFP_ENABLED)) {
 -                      pf->flags |= I40E_FLAG_FD_SB_ENABLED;
 -              } else {
 +              if (pf->flags & I40E_FLAG_MFP_ENABLED &&
 +                  pf->hw.num_partitions > 1)
                        dev_info(&pf->pdev->dev,
                                 "Flow Director Sideband mode Disabled in MFP mode\n");
 -              }
 +              else
 +                      pf->flags |= I40E_FLAG_FD_SB_ENABLED;
                pf->fdir_pf_filter_count =
                                 pf->hw.func_caps.fd_filters_guaranteed;
                pf->hw.fdir_shared_filter_count =
        if (pf->hw.func_caps.vmdq) {
                pf->num_vmdq_vsis = I40E_DEFAULT_NUM_VMDQ_VSI;
                pf->flags |= I40E_FLAG_VMDQ_ENABLED;
+               pf->num_vmdq_qps = i40e_default_queues_per_vmdq(pf);
        }
  
  #ifdef I40E_FCOE
 -      err = i40e_init_pf_fcoe(pf);
 -      if (err)
 -              dev_info(&pf->pdev->dev, "init_pf_fcoe failed: %d\n", err);
 +      i40e_init_pf_fcoe(pf);
  
  #endif /* I40E_FCOE */
  #ifdef CONFIG_PCI_IOV
        pf->lan_veb = I40E_NO_VEB;
        pf->lan_vsi = I40E_NO_VSI;
  
 +      /* By default FW has this off for performance reasons */
 +      pf->flags &= ~I40E_FLAG_VEB_STATS_ENABLED;
 +
        /* set up queue assignment tracking */
        size = sizeof(struct i40e_lump_tracking)
                + (sizeof(u16) * pf->hw.func_caps.num_tx_qp);
@@@ -8363,6 -8120,9 +8364,6 @@@ static void i40e_del_vxlan_port(struct 
                pf->vxlan_ports[idx] = 0;
                pf->pending_vxlan_bitmap |= BIT_ULL(idx);
                pf->flags |= I40E_FLAG_VXLAN_FILTER_SYNC;
 -
 -              dev_info(&pf->pdev->dev, "deleting vxlan port %d\n",
 -                       ntohs(port));
        } else {
                netdev_warn(netdev, "vxlan port %d was not found, not deleting\n",
                            ntohs(port));
@@@ -8514,15 -8274,13 +8515,15 @@@ static int i40e_ndo_bridge_setlink(stru
   * @seq: RTNL message seq #
   * @dev: the netdev being configured
   * @filter_mask: unused
 + * @nlflags: netlink flags passed in
   *
   * Return the mode in which the hardware bridge is operating in
   * i.e VEB or VEPA.
   **/
  static int i40e_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
                                   struct net_device *dev,
 -                                 u32 filter_mask, int nlflags)
 +                                 u32 __always_unused filter_mask,
 +                                 int nlflags)
  {
        struct i40e_netdev_priv *np = netdev_priv(dev);
        struct i40e_vsi *vsi = np->vsi;
  /**
   * i40e_features_check - Validate encapsulated packet conforms to limits
   * @skb: skb buff
 - * @netdev: This physical port's netdev
 + * @dev: This physical port's netdev
   * @features: Offload features that the stack believes apply
   **/
  static netdev_features_t i40e_features_check(struct sk_buff *skb,
@@@ -8666,26 -8424,17 +8667,26 @@@ static int i40e_config_netdev(struct i4
                 * default a MAC-VLAN filter that accepts any tagged packet
                 * which must be replaced by a normal filter.
                 */
 -              if (!i40e_rm_default_mac_filter(vsi, mac_addr))
 +              if (!i40e_rm_default_mac_filter(vsi, mac_addr)) {
 +                      spin_lock_bh(&vsi->mac_filter_list_lock);
                        i40e_add_filter(vsi, mac_addr,
                                        I40E_VLAN_ANY, false, true);
 +                      spin_unlock_bh(&vsi->mac_filter_list_lock);
 +              }
        } else {
                /* relate the VSI_VMDQ name to the VSI_MAIN name */
                snprintf(netdev->name, IFNAMSIZ, "%sv%%d",
                         pf->vsi[pf->lan_vsi]->netdev->name);
                random_ether_addr(mac_addr);
 +
 +              spin_lock_bh(&vsi->mac_filter_list_lock);
                i40e_add_filter(vsi, mac_addr, I40E_VLAN_ANY, false, false);
 +              spin_unlock_bh(&vsi->mac_filter_list_lock);
        }
 +
 +      spin_lock_bh(&vsi->mac_filter_list_lock);
        i40e_add_filter(vsi, brdcast, I40E_VLAN_ANY, false, false);
 +      spin_unlock_bh(&vsi->mac_filter_list_lock);
  
        ether_addr_copy(netdev->dev_addr, mac_addr);
        ether_addr_copy(netdev->perm_addr, mac_addr);
@@@ -8741,22 -8490,12 +8742,22 @@@ int i40e_is_vsi_uplink_mode_veb(struct 
                return 1;
  
        veb = pf->veb[vsi->veb_idx];
 +      if (!veb) {
 +              dev_info(&pf->pdev->dev,
 +                       "There is no veb associated with the bridge\n");
 +              return -ENOENT;
 +      }
 +
        /* Uplink is a bridge in VEPA mode */
 -      if (veb && (veb->bridge_mode & BRIDGE_MODE_VEPA))
 +      if (veb->bridge_mode & BRIDGE_MODE_VEPA) {
                return 0;
 +      } else {
 +              /* Uplink is a bridge in VEB mode */
 +              return 1;
 +      }
  
 -      /* Uplink is a bridge in VEB mode */
 -      return 1;
 +      /* VEPA is now default bridge, so return 0 */
 +      return 0;
  }
  
  /**
  static int i40e_add_vsi(struct i40e_vsi *vsi)
  {
        int ret = -ENODEV;
 -      struct i40e_mac_filter *f, *ftmp;
 +      u8 laa_macaddr[ETH_ALEN];
 +      bool found_laa_mac_filter = false;
        struct i40e_pf *pf = vsi->back;
        struct i40e_hw *hw = &pf->hw;
        struct i40e_vsi_context ctxt;
 +      struct i40e_mac_filter *f, *ftmp;
 +
        u8 enabled_tc = 0x1; /* TC0 enabled */
        int f_count = 0;
  
                vsi->id = ctxt.vsi_number;
        }
  
 +      spin_lock_bh(&vsi->mac_filter_list_lock);
        /* If macvlan filters already exist, force them to get loaded */
        list_for_each_entry_safe(f, ftmp, &vsi->mac_filter_list, list) {
                f->changed = true;
                f_count++;
  
 +              /* Expected to have only one MAC filter entry for LAA in list */
                if (f->is_laa && vsi->type == I40E_VSI_MAIN) {
 -                      struct i40e_aqc_remove_macvlan_element_data element;
 +                      ether_addr_copy(laa_macaddr, f->macaddr);
 +                      found_laa_mac_filter = true;
 +              }
 +      }
 +      spin_unlock_bh(&vsi->mac_filter_list_lock);
  
 -                      memset(&element, 0, sizeof(element));
 -                      ether_addr_copy(element.mac_addr, f->macaddr);
 -                      element.flags = I40E_AQC_MACVLAN_DEL_PERFECT_MATCH;
 -                      ret = i40e_aq_remove_macvlan(hw, vsi->seid,
 -                                                   &element, 1, NULL);
 -                      if (ret) {
 -                              /* some older FW has a different default */
 -                              element.flags |=
 -                                             I40E_AQC_MACVLAN_DEL_IGNORE_VLAN;
 -                              i40e_aq_remove_macvlan(hw, vsi->seid,
 -                                                     &element, 1, NULL);
 -                      }
 +      if (found_laa_mac_filter) {
 +              struct i40e_aqc_remove_macvlan_element_data element;
  
 -                      i40e_aq_mac_address_write(hw,
 -                                                I40E_AQC_WRITE_TYPE_LAA_WOL,
 -                                                f->macaddr, NULL);
 +              memset(&element, 0, sizeof(element));
 +              ether_addr_copy(element.mac_addr, laa_macaddr);
 +              element.flags = I40E_AQC_MACVLAN_DEL_PERFECT_MATCH;
 +              ret = i40e_aq_remove_macvlan(hw, vsi->seid,
 +                                           &element, 1, NULL);
 +              if (ret) {
 +                      /* some older FW has a different default */
 +                      element.flags |=
 +                                     I40E_AQC_MACVLAN_DEL_IGNORE_VLAN;
 +                      i40e_aq_remove_macvlan(hw, vsi->seid,
 +                                             &element, 1, NULL);
                }
 +
 +              i40e_aq_mac_address_write(hw,
 +                                        I40E_AQC_WRITE_TYPE_LAA_WOL,
 +                                        laa_macaddr, NULL);
        }
 +
        if (f_count) {
                vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED;
                pf->flags |= I40E_FLAG_FILTER_SYNC;
@@@ -9044,13 -8771,10 +9045,13 @@@ int i40e_vsi_release(struct i40e_vsi *v
                i40e_vsi_disable_irq(vsi);
        }
  
 +      spin_lock_bh(&vsi->mac_filter_list_lock);
        list_for_each_entry_safe(f, ftmp, &vsi->mac_filter_list, list)
                i40e_del_filter(vsi, f->macaddr, f->vlan,
                                f->is_vf, f->is_netdev);
 -      i40e_sync_vsi_filters(vsi);
 +      spin_unlock_bh(&vsi->mac_filter_list_lock);
 +
 +      i40e_sync_vsi_filters(vsi, false);
  
        i40e_vsi_delete(vsi);
        i40e_vsi_free_q_vectors(vsi);
@@@ -9275,7 -8999,8 +9276,7 @@@ struct i40e_vsi *i40e_vsi_setup(struct 
                if (veb) {
                        if (vsi->seid != pf->vsi[pf->lan_vsi]->seid) {
                                dev_info(&vsi->back->pdev->dev,
 -                                       "%s: New VSI creation error, uplink seid of LAN VSI expected.\n",
 -                                       __func__);
 +                                       "New VSI creation error, uplink seid of LAN VSI expected.\n");
                                return NULL;
                        }
                        /* We come up by default in VEPA mode if SRIOV is not
@@@ -9925,7 -9650,6 +9926,7 @@@ static int i40e_setup_pf_switch(struct 
        } else {
                /* force a reset of TC and queue layout configurations */
                u8 enabled_tc = pf->vsi[pf->lan_vsi]->tc_config.enabled_tc;
 +
                pf->vsi[pf->lan_vsi]->tc_config.enabled_tc = 0;
                pf->vsi[pf->lan_vsi]->seid = pf->main_vsi_seid;
                i40e_vsi_config_tc(pf->vsi[pf->lan_vsi], enabled_tc);
                i40e_config_rss(pf);
  
        /* fill in link information and enable LSE reporting */
 -      i40e_aq_get_link_info(&pf->hw, true, NULL, NULL);
 +      i40e_update_link_info(&pf->hw);
        i40e_link_event(pf);
  
        /* Initialize user-specific link properties */
@@@ -10067,14 -9791,8 +10068,14 @@@ static void i40e_determine_queue_usage(
        }
  
        pf->queues_left = queues_left;
 +      dev_dbg(&pf->pdev->dev,
 +              "qs_avail=%d FD SB=%d lan_qs=%d lan_tc0=%d vf=%d*%d vmdq=%d*%d, remaining=%d\n",
 +              pf->hw.func_caps.num_tx_qp,
 +              !!(pf->flags & I40E_FLAG_FD_SB_ENABLED),
 +              pf->num_lan_qps, pf->rss_size, pf->num_req_vfs, pf->num_vf_qps,
 +              pf->num_vmdq_vsis, pf->num_vmdq_qps, queues_left);
  #ifdef I40E_FCOE
 -      dev_info(&pf->pdev->dev, "fcoe queues = %d\n", pf->num_fcoe_qps);
 +      dev_dbg(&pf->pdev->dev, "fcoe queues = %d\n", pf->num_fcoe_qps);
  #endif
  }
  
@@@ -10142,19 -9860,12 +10143,19 @@@ static void i40e_print_features(struct 
        }
        if (pf->flags & I40E_FLAG_DCB_CAPABLE)
                buf += sprintf(buf, "DCB ");
 +#if IS_ENABLED(CONFIG_VXLAN)
 +      buf += sprintf(buf, "VxLAN ");
 +#endif
        if (pf->flags & I40E_FLAG_PTP)
                buf += sprintf(buf, "PTP ");
  #ifdef I40E_FCOE
        if (pf->flags & I40E_FLAG_FCOE_ENABLED)
                buf += sprintf(buf, "FCOE ");
  #endif
 +      if (pf->flags & I40E_FLAG_VEB_MODE_ENABLED)
 +              buf += sprintf(buf, "VEB ");
 +      else
 +              buf += sprintf(buf, "VEPA ");
  
        BUG_ON(buf > (string + INFO_STRING_LEN));
        dev_info(&pf->pdev->dev, "%s\n", string);
  static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
  {
        struct i40e_aq_get_phy_abilities_resp abilities;
 -      unsigned long ioremap_len;
        struct i40e_pf *pf;
        struct i40e_hw *hw;
        static u16 pfs_found;
 +      u16 wol_nvm_bits;
        u16 link_status;
 -      int err = 0;
 +      int err;
        u32 len;
        u32 i;
 +      u8 set_fc_aq_fail;
  
        err = pci_enable_device_mem(pdev);
        if (err)
        hw = &pf->hw;
        hw->back = pf;
  
 -      ioremap_len = min_t(unsigned long, pci_resource_len(pdev, 0),
 -                          I40E_MAX_CSR_SPACE);
 +      pf->ioremap_len = min_t(int, pci_resource_len(pdev, 0),
 +                              I40E_MAX_CSR_SPACE);
  
 -      hw->hw_addr = ioremap(pci_resource_start(pdev, 0), ioremap_len);
 +      hw->hw_addr = ioremap(pci_resource_start(pdev, 0), pf->ioremap_len);
        if (!hw->hw_addr) {
                err = -EIO;
                dev_info(&pdev->dev, "ioremap(0x%04x, 0x%04x) failed: 0x%x\n",
                         (unsigned int)pci_resource_start(pdev, 0),
 -                       (unsigned int)pci_resource_len(pdev, 0), err);
 +                       pf->ioremap_len, err);
                goto err_ioremap;
        }
        hw->vendor_id = pdev->vendor;
        pf->hw.fc.requested_mode = I40E_FC_NONE;
  
        err = i40e_init_adminq(hw);
 -      dev_info(&pdev->dev, "%s\n", i40e_fw_version_str(hw));
 +
 +      /* provide nvm, fw, api versions */
 +      dev_info(&pdev->dev, "fw %d.%d.%05d api %d.%d nvm %s\n",
 +               hw->aq.fw_maj_ver, hw->aq.fw_min_ver, hw->aq.fw_build,
 +               hw->aq.api_maj_ver, hw->aq.api_min_ver,
 +               i40e_nvm_version_str(hw));
 +
        if (err) {
                dev_info(&pdev->dev,
                         "The driver for the device stopped because the NVM image is newer than expected. You must install the most recent version of the network driver.\n");
        INIT_WORK(&pf->service_task, i40e_service_task);
        clear_bit(__I40E_SERVICE_SCHED, &pf->state);
        pf->flags |= I40E_FLAG_NEED_LINK_UPDATE;
 -      pf->link_check_timeout = jiffies;
  
 -      /* WoL defaults to disabled */
 -      pf->wol_en = false;
 +      /* NVM bit on means WoL disabled for the port */
 +      i40e_read_nvm_word(hw, I40E_SR_NVM_WAKE_ON_LAN, &wol_nvm_bits);
 +      if ((1 << hw->port) & wol_nvm_bits || hw->partition_id != 1)
 +              pf->wol_en = false;
 +      else
 +              pf->wol_en = true;
        device_set_wakeup_enable(&pf->pdev->dev, pf->wol_en);
  
        /* set up the main switch operations */
                dev_info(&pdev->dev, "setup_pf_switch failed: %d\n", err);
                goto err_vsis;
        }
 +
 +      /* Make sure flow control is set according to current settings */
 +      err = i40e_set_fc(hw, &set_fc_aq_fail, true);
 +      if (set_fc_aq_fail & I40E_SET_FC_AQ_FAIL_GET)
 +              dev_dbg(&pf->pdev->dev,
 +                      "Set fc with err %s aq_err %s on get_phy_cap\n",
 +                      i40e_stat_str(hw, err),
 +                      i40e_aq_str(hw, hw->aq.asq_last_status));
 +      if (set_fc_aq_fail & I40E_SET_FC_AQ_FAIL_SET)
 +              dev_dbg(&pf->pdev->dev,
 +                      "Set fc with err %s aq_err %s on set_phy_config\n",
 +                      i40e_stat_str(hw, err),
 +                      i40e_aq_str(hw, hw->aq.asq_last_status));
 +      if (set_fc_aq_fail & I40E_SET_FC_AQ_FAIL_UPDATE)
 +              dev_dbg(&pf->pdev->dev,
 +                      "Set fc with err %s aq_err %s on get_link_info\n",
 +                      i40e_stat_str(hw, err),
 +                      i40e_aq_str(hw, hw->aq.asq_last_status));
 +
        /* if FDIR VSI was set up, start it now */
        for (i = 0; i < pf->num_alloc_vsi; i++) {
                if (pf->vsi[i] && pf->vsi[i]->type == I40E_VSI_FDIR) {
        i40e_fcoe_vsi_setup(pf);
  
  #endif
 -      /* Get the negotiated link width and speed from PCI config space */
 -      pcie_capability_read_word(pf->pdev, PCI_EXP_LNKSTA, &link_status);
 +#define PCI_SPEED_SIZE 8
 +#define PCI_WIDTH_SIZE 8
 +      /* Devices on the IOSF bus do not have this information
 +       * and will report PCI Gen 1 x 1 by default so don't bother
 +       * checking them.
 +       */
 +      if (!(pf->flags & I40E_FLAG_NO_PCI_LINK_CHECK)) {
 +              char speed[PCI_SPEED_SIZE] = "Unknown";
 +              char width[PCI_WIDTH_SIZE] = "Unknown";
  
 -      i40e_set_pci_config_data(hw, link_status);
 +              /* Get the negotiated link width and speed from PCI config
 +               * space
 +               */
 +              pcie_capability_read_word(pf->pdev, PCI_EXP_LNKSTA,
 +                                        &link_status);
 +
 +              i40e_set_pci_config_data(hw, link_status);
 +
 +              switch (hw->bus.speed) {
 +              case i40e_bus_speed_8000:
 +                      strncpy(speed, "8.0", PCI_SPEED_SIZE); break;
 +              case i40e_bus_speed_5000:
 +                      strncpy(speed, "5.0", PCI_SPEED_SIZE); break;
 +              case i40e_bus_speed_2500:
 +                      strncpy(speed, "2.5", PCI_SPEED_SIZE); break;
 +              default:
 +                      break;
 +              }
 +              switch (hw->bus.width) {
 +              case i40e_bus_width_pcie_x8:
 +                      strncpy(width, "8", PCI_WIDTH_SIZE); break;
 +              case i40e_bus_width_pcie_x4:
 +                      strncpy(width, "4", PCI_WIDTH_SIZE); break;
 +              case i40e_bus_width_pcie_x2:
 +                      strncpy(width, "2", PCI_WIDTH_SIZE); break;
 +              case i40e_bus_width_pcie_x1:
 +                      strncpy(width, "1", PCI_WIDTH_SIZE); break;
 +              default:
 +                      break;
 +              }
  
 -      dev_info(&pdev->dev, "PCI-Express: %s %s\n",
 -              (hw->bus.speed == i40e_bus_speed_8000 ? "Speed 8.0GT/s" :
 -               hw->bus.speed == i40e_bus_speed_5000 ? "Speed 5.0GT/s" :
 -               hw->bus.speed == i40e_bus_speed_2500 ? "Speed 2.5GT/s" :
 -               "Unknown"),
 -              (hw->bus.width == i40e_bus_width_pcie_x8 ? "Width x8" :
 -               hw->bus.width == i40e_bus_width_pcie_x4 ? "Width x4" :
 -               hw->bus.width == i40e_bus_width_pcie_x2 ? "Width x2" :
 -               hw->bus.width == i40e_bus_width_pcie_x1 ? "Width x1" :
 -               "Unknown"));
 +              dev_info(&pdev->dev, "PCI-Express: Speed %sGT/s Width x%s\n",
 +                       speed, width);
  
 -      if (hw->bus.width < i40e_bus_width_pcie_x8 ||
 -          hw->bus.speed < i40e_bus_speed_8000) {
 -              dev_warn(&pdev->dev, "PCI-Express bandwidth available for this device may be insufficient for optimal performance.\n");
 -              dev_warn(&pdev->dev, "Please move the device to a different PCI-e link with more lanes and/or higher transfer rate.\n");
 +              if (hw->bus.width < i40e_bus_width_pcie_x8 ||
 +                  hw->bus.speed < i40e_bus_speed_8000) {
 +                      dev_warn(&pdev->dev, "PCI-Express bandwidth available for this device may be insufficient for optimal performance.\n");
 +                      dev_warn(&pdev->dev, "Please move the device to a different PCI-e link with more lanes and/or higher transfer rate.\n");
 +              }
        }
  
        /* get the requested speeds from the fw */
        err = i40e_aq_get_phy_capabilities(hw, false, false, &abilities, NULL);
        if (err)
 -              dev_info(&pf->pdev->dev,
 -                       "get phy capabilities failed, err %s aq_err %s, advertised speed settings may not be correct\n",
 -                       i40e_stat_str(&pf->hw, err),
 -                       i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
 +              dev_dbg(&pf->pdev->dev, "get requested speeds ret =  %s last_status =  %s\n",
 +                      i40e_stat_str(&pf->hw, err),
 +                      i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
        pf->hw.phy.link_info.requested_speeds = abilities.link_speed;
  
 +      /* get the supported phy types from the fw */
 +      err = i40e_aq_get_phy_capabilities(hw, false, true, &abilities, NULL);
 +      if (err)
 +              dev_dbg(&pf->pdev->dev, "get supported phy types ret =  %s last_status =  %s\n",
 +                      i40e_stat_str(&pf->hw, err),
 +                      i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
 +      pf->hw.phy.phy_types = le32_to_cpu(abilities.phy_type);
 +
 +      /* Add a filter to drop all Flow control frames from any VSI from being
 +       * transmitted. By doing so we stop a malicious VF from sending out
 +       * PAUSE or PFC frames and potentially controlling traffic for other
 +       * PF/VF VSIs.
 +       * The FW can still send Flow control frames if enabled.
 +       */
 +      i40e_add_filter_to_drop_tx_flow_control_frames(&pf->hw,
 +                                                     pf->main_vsi_seid);
 +
        /* print a string summarizing features */
        i40e_print_features(pf);
  
@@@ -10680,7 -10317,6 +10681,7 @@@ err_dma
  static void i40e_remove(struct pci_dev *pdev)
  {
        struct i40e_pf *pf = pci_get_drvdata(pdev);
 +      struct i40e_hw *hw = &pf->hw;
        i40e_status ret_code;
        int i;
  
  
        i40e_ptp_stop(pf);
  
 +      /* Disable RSS in hw */
 +      wr32(hw, I40E_PFQF_HENA(0), 0);
 +      wr32(hw, I40E_PFQF_HENA(1), 0);
 +
        /* no more scheduling of any task */
        set_bit(__I40E_DOWN, &pf->state);
        del_timer_sync(&pf->service_timer);
@@@ -10808,7 -10440,7 +10809,7 @@@ static pci_ers_result_t i40e_pci_error_
        int err;
        u32 reg;
  
 -      dev_info(&pdev->dev, "%s\n", __func__);
 +      dev_dbg(&pdev->dev, "%s\n", __func__);
        if (pci_enable_device_mem(pdev)) {
                dev_info(&pdev->dev,
                         "Cannot re-enable PCI device after reset.\n");
@@@ -10848,13 -10480,13 +10849,13 @@@ static void i40e_pci_error_resume(struc
  {
        struct i40e_pf *pf = pci_get_drvdata(pdev);
  
 -      dev_info(&pdev->dev, "%s\n", __func__);
 +      dev_dbg(&pdev->dev, "%s\n", __func__);
        if (test_bit(__I40E_SUSPENDED, &pf->state))
                return;
  
        rtnl_lock();
        i40e_handle_reset_warning(pf);
 -      rtnl_lock();
 +      rtnl_unlock();
  }
  
  /**
@@@ -10940,7 -10572,9 +10941,7 @@@ static int i40e_resume(struct pci_dev *
  
        err = pci_enable_device_mem(pdev);
        if (err) {
 -              dev_err(&pdev->dev,
 -                      "%s: Cannot enable PCI device from suspend\n",
 -                      __func__);
 +              dev_err(&pdev->dev, "Cannot enable PCI device from suspend\n");
                return err;
        }
        pci_set_master(pdev);
index 603d29df5832923325d74c9a128813be5bad5790,e893a35143c5229a0cd3b8aec141d302fdf19e72..6bf725921e79c6aa093275684eea722d20089497
@@@ -759,11 -759,23 +759,23 @@@ txq_put_data_tso(struct net_device *dev
  
        desc->l4i_chk = 0;
        desc->byte_cnt = length;
-       desc->buf_ptr = dma_map_single(dev->dev.parent, data,
-                                      length, DMA_TO_DEVICE);
-       if (unlikely(dma_mapping_error(dev->dev.parent, desc->buf_ptr))) {
-               WARN(1, "dma_map_single failed!\n");
-               return -ENOMEM;
+       if (length <= 8 && (uintptr_t)data & 0x7) {
+               /* Copy unaligned small data fragment to TSO header data area */
+               memcpy(txq->tso_hdrs + txq->tx_curr_desc * TSO_HEADER_SIZE,
+                      data, length);
+               desc->buf_ptr = txq->tso_hdrs_dma
+                       + txq->tx_curr_desc * TSO_HEADER_SIZE;
+       } else {
+               /* Alignment is okay, map buffer and hand off to hardware */
+               txq->tx_desc_mapping[tx_index] = DESC_DMA_MAP_SINGLE;
+               desc->buf_ptr = dma_map_single(dev->dev.parent, data,
+                       length, DMA_TO_DEVICE);
+               if (unlikely(dma_mapping_error(dev->dev.parent,
+                                              desc->buf_ptr))) {
+                       WARN(1, "dma_map_single failed!\n");
+                       return -ENOMEM;
+               }
        }
  
        cmd_sts = BUFFER_OWNED_BY_DMA;
  }
  
  static inline void
- txq_put_hdr_tso(struct sk_buff *skb, struct tx_queue *txq, int length)
+ txq_put_hdr_tso(struct sk_buff *skb, struct tx_queue *txq, int length,
+               u32 *first_cmd_sts, bool first_desc)
  {
        struct mv643xx_eth_private *mp = txq_to_mp(txq);
        int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
        int ret;
        u32 cmd_csum = 0;
        u16 l4i_chk = 0;
+       u32 cmd_sts;
  
        tx_index = txq->tx_curr_desc;
        desc = &txq->tx_desc_area[tx_index];
        desc->byte_cnt = hdr_len;
        desc->buf_ptr = txq->tso_hdrs_dma +
                        txq->tx_curr_desc * TSO_HEADER_SIZE;
-       desc->cmd_sts = cmd_csum | BUFFER_OWNED_BY_DMA  | TX_FIRST_DESC |
+       cmd_sts = cmd_csum | BUFFER_OWNED_BY_DMA  | TX_FIRST_DESC |
                                   GEN_CRC;
  
+       /* Defer updating the first command descriptor until all
+        * following descriptors have been written.
+        */
+       if (first_desc)
+               *first_cmd_sts = cmd_sts;
+       else
+               desc->cmd_sts = cmd_sts;
        txq->tx_curr_desc++;
        if (txq->tx_curr_desc == txq->tx_ring_size)
                txq->tx_curr_desc = 0;
@@@ -819,6 -841,8 +841,8 @@@ static int txq_submit_tso(struct tx_que
        int desc_count = 0;
        struct tso_t tso;
        int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
+       struct tx_desc *first_tx_desc;
+       u32 first_cmd_sts = 0;
  
        /* Count needed descriptors */
        if ((txq->tx_desc_count + tso_count_descs(skb)) >= txq->tx_ring_size) {
                return -EBUSY;
        }
  
+       first_tx_desc = &txq->tx_desc_area[txq->tx_curr_desc];
        /* Initialize the TSO handler, and prepare the first payload */
        tso_start(skb, &tso);
  
        total_len = skb->len - hdr_len;
        while (total_len > 0) {
+               bool first_desc = (desc_count == 0);
                char *hdr;
  
                data_left = min_t(int, skb_shinfo(skb)->gso_size, total_len);
                /* prepare packet headers: MAC + IP + TCP */
                hdr = txq->tso_hdrs + txq->tx_curr_desc * TSO_HEADER_SIZE;
                tso_build_hdr(skb, hdr, &tso, data_left, total_len == 0);
-               txq_put_hdr_tso(skb, txq, data_left);
+               txq_put_hdr_tso(skb, txq, data_left, &first_cmd_sts,
+                               first_desc);
  
                while (data_left > 0) {
                        int size;
        __skb_queue_tail(&txq->tx_skb, skb);
        skb_tx_timestamp(skb);
  
+       /* ensure all other descriptors are written before first cmd_sts */
+       wmb();
+       first_tx_desc->cmd_sts = first_cmd_sts;
        /* clear TX_END status */
        mp->work_tx_end &= ~(1 << txq->index);
  
@@@ -1586,6 -1618,7 +1618,6 @@@ static void mv643xx_eth_get_drvinfo(str
                sizeof(drvinfo->version));
        strlcpy(drvinfo->fw_version, "N/A", sizeof(drvinfo->fw_version));
        strlcpy(drvinfo->bus_info, "platform", sizeof(drvinfo->bus_info));
 -      drvinfo->n_stats = ARRAY_SIZE(mv643xx_eth_stats);
  }
  
  static int mv643xx_eth_nway_reset(struct net_device *dev)
@@@ -1844,19 -1877,29 +1876,19 @@@ static void mv643xx_eth_program_multica
        struct netdev_hw_addr *ha;
        int i;
  
 -      if (dev->flags & (IFF_PROMISC | IFF_ALLMULTI)) {
 -              int port_num;
 -              u32 accept;
 +      if (dev->flags & (IFF_PROMISC | IFF_ALLMULTI))
 +              goto promiscuous;
  
 -oom:
 -              port_num = mp->port_num;
 -              accept = 0x01010101;
 -              for (i = 0; i < 0x100; i += 4) {
 -                      wrl(mp, SPECIAL_MCAST_TABLE(port_num) + i, accept);
 -                      wrl(mp, OTHER_MCAST_TABLE(port_num) + i, accept);
 -              }
 -              return;
 -      }
 -
 -      mc_spec = kzalloc(0x200, GFP_ATOMIC);
 -      if (mc_spec == NULL)
 -              goto oom;
 -      mc_other = mc_spec + (0x100 >> 2);
 +      /* Allocate both mc_spec and mc_other tables */
 +      mc_spec = kcalloc(128, sizeof(u32), GFP_ATOMIC);
 +      if (!mc_spec)
 +              goto promiscuous;
 +      mc_other = &mc_spec[64];
  
        netdev_for_each_mc_addr(ha, dev) {
                u8 *a = ha->addr;
                u32 *table;
 -              int entry;
 +              u8 entry;
  
                if (memcmp(a, "\x01\x00\x5e\x00\x00", 5) == 0) {
                        table = mc_spec;
                table[entry >> 2] |= 1 << (8 * (entry & 3));
        }
  
 -      for (i = 0; i < 0x100; i += 4) {
 -              wrl(mp, SPECIAL_MCAST_TABLE(mp->port_num) + i, mc_spec[i >> 2]);
 -              wrl(mp, OTHER_MCAST_TABLE(mp->port_num) + i, mc_other[i >> 2]);
 +      for (i = 0; i < 64; i++) {
 +              wrl(mp, SPECIAL_MCAST_TABLE(mp->port_num) + i * sizeof(u32),
 +                  mc_spec[i]);
 +              wrl(mp, OTHER_MCAST_TABLE(mp->port_num) + i * sizeof(u32),
 +                  mc_other[i]);
        }
  
        kfree(mc_spec);
 +      return;
 +
 +promiscuous:
 +      for (i = 0; i < 64; i++) {
 +              wrl(mp, SPECIAL_MCAST_TABLE(mp->port_num) + i * sizeof(u32),
 +                  0x01010101u);
 +              wrl(mp, OTHER_MCAST_TABLE(mp->port_num) + i * sizeof(u32),
 +                  0x01010101u);
 +      }
  }
  
  static void mv643xx_eth_set_rx_mode(struct net_device *dev)
index 55d2d8577d075719cc28e61430bbb1414e3750c8,874fb297e96c563525a5d275a8e2239b6ab41725..040fbc1e55080a4d025df2a2fae888da6151008c
@@@ -29,8 -29,8 +29,9 @@@
  #include <linux/workqueue.h>
  #include <linux/delay.h>
  #include <linux/pm_runtime.h>
 +#include <linux/gpio.h>
  #include <linux/of.h>
+ #include <linux/of_mdio.h>
  #include <linux/of_net.h>
  #include <linux/of_device.h>
  #include <linux/if_vlan.h>
@@@ -366,6 -366,7 +367,7 @@@ struct cpsw_priv 
        spinlock_t                      lock;
        struct platform_device          *pdev;
        struct net_device               *ndev;
+       struct device_node              *phy_node;
        struct napi_struct              napi_rx;
        struct napi_struct              napi_tx;
        struct device                   *dev;
@@@ -1146,7 -1147,11 +1148,11 @@@ static void cpsw_slave_open(struct cpsw
                cpsw_ale_add_mcast(priv->ale, priv->ndev->broadcast,
                                   1 << slave_port, 0, 0, ALE_MCAST_FWD_2);
  
-       slave->phy = phy_connect(priv->ndev, slave->data->phy_id,
+       if (priv->phy_node)
+               slave->phy = of_phy_connect(priv->ndev, priv->phy_node,
+                                &cpsw_adjust_link, 0, slave->data->phy_if);
+       else
+               slave->phy = phy_connect(priv->ndev, slave->data->phy_id,
                                 &cpsw_adjust_link, slave->data->phy_if);
        if (IS_ERR(slave->phy)) {
                dev_err(priv->dev, "phy %s not found on slave %d\n",
@@@ -1784,6 -1789,7 +1790,6 @@@ static void cpsw_get_drvinfo(struct net
        strlcpy(info->driver, "cpsw", sizeof(info->driver));
        strlcpy(info->version, "1.0", sizeof(info->version));
        strlcpy(info->bus_info, priv->pdev->name, sizeof(info->bus_info));
 -      info->regdump_len = cpsw_get_regs_len(ndev);
  }
  
  static u32 cpsw_get_msglevel(struct net_device *ndev)
@@@ -1934,11 -1940,12 +1940,12 @@@ static void cpsw_slave_init(struct cpsw
        slave->port_vlan = data->dual_emac_res_vlan;
  }
  
- static int cpsw_probe_dt(struct cpsw_platform_data *data,
+ static int cpsw_probe_dt(struct cpsw_priv *priv,
                         struct platform_device *pdev)
  {
        struct device_node *node = pdev->dev.of_node;
        struct device_node *slave_node;
+       struct cpsw_platform_data *data = &priv->data;
        int i = 0, ret;
        u32 prop;
  
                if (strcmp(slave_node->name, "slave"))
                        continue;
  
+               priv->phy_node = of_parse_phandle(slave_node, "phy-handle", 0);
                parp = of_get_property(slave_node, "phy_id", &lenp);
                if ((parp == NULL) || (lenp != (sizeof(void *) * 2))) {
                        dev_err(&pdev->dev, "Missing slave[%d] phy_id property\n", i);
                }
                snprintf(slave_data->phy_id, sizeof(slave_data->phy_id),
                         PHY_ID_FMT, mdio->name, phyid);
                slave_data->phy_if = of_get_phy_mode(slave_node);
                if (slave_data->phy_if < 0) {
                        dev_err(&pdev->dev, "Missing or malformed slave[%d] phy-mode property\n",
@@@ -2057,10 -2064,13 +2064,10 @@@ no_phy_slave
                if (mac_addr) {
                        memcpy(slave_data->mac_addr, mac_addr, ETH_ALEN);
                } else {
 -                      if (of_machine_is_compatible("ti,am33xx")) {
 -                              ret = cpsw_am33xx_cm_get_macid(&pdev->dev,
 -                                                      0x630, i,
 -                                                      slave_data->mac_addr);
 -                              if (ret)
 -                                      return ret;
 -                      }
 +                      ret = ti_cm_get_macid(&pdev->dev, i,
 +                                            slave_data->mac_addr);
 +                      if (ret)
 +                              return ret;
                }
                if (data->dual_emac) {
                        if (of_property_read_u32(slave_node, "dual_emac_res_vlan",
@@@ -2204,7 -2214,6 +2211,7 @@@ static int cpsw_probe(struct platform_d
        void __iomem                    *ss_regs;
        struct resource                 *res, *ss_res;
        const struct of_device_id       *of_id;
 +      struct gpio_descs               *mode;
        u32 slave_offset, sliver_offset, slave_size;
        int ret = 0, i;
        int irq;
                goto clean_ndev_ret;
        }
  
 +      mode = devm_gpiod_get_array_optional(&pdev->dev, "mode", GPIOD_OUT_LOW);
 +      if (IS_ERR(mode)) {
 +              ret = PTR_ERR(mode);
 +              dev_err(&pdev->dev, "gpio request failed, ret %d\n", ret);
 +              goto clean_ndev_ret;
 +      }
 +
        /*
         * This may be required here for child devices.
         */
        /* Select default pin state */
        pinctrl_pm_select_default_state(&pdev->dev);
  
-       if (cpsw_probe_dt(&priv->data, pdev)) {
+       if (cpsw_probe_dt(priv, pdev)) {
                dev_err(&pdev->dev, "cpsw: platform data missing\n");
                ret = -ENODEV;
                goto clean_runtime_disable_ret;
@@@ -2583,7 -2585,17 +2590,7 @@@ static struct platform_driver cpsw_driv
        .remove = cpsw_remove,
  };
  
 -static int __init cpsw_init(void)
 -{
 -      return platform_driver_register(&cpsw_driver);
 -}
 -late_initcall(cpsw_init);
 -
 -static void __exit cpsw_exit(void)
 -{
 -      platform_driver_unregister(&cpsw_driver);
 -}
 -module_exit(cpsw_exit);
 +module_platform_driver(cpsw_driver);
  
  MODULE_LICENSE("GPL");
  MODULE_AUTHOR("Cyril Chemparathy <cyril@ti.com>");
diff --combined drivers/net/phy/Kconfig
index a7fb66580cee2b408ae1817e304cf16392a872c4,436972b2a746a23d27bac9ebb4e23d17b6d54715..60994a83a0d68ca2e4c229fe16140b6017a9dd55
@@@ -69,39 -69,20 +69,39 @@@ config SMSC_PH
        ---help---
          Currently supports the LAN83C185, LAN8187 and LAN8700 PHYs
  
 +config BCM_NET_PHYLIB
 +      tristate
 +
  config BROADCOM_PHY
        tristate "Drivers for Broadcom PHYs"
 +      select BCM_NET_PHYLIB
        ---help---
          Currently supports the BCM5411, BCM5421, BCM5461, BCM54616S, BCM5464,
          BCM5481 and BCM5482 PHYs.
  
 +config BCM_CYGNUS_PHY
 +      tristate "Drivers for Broadcom Cygnus SoC internal PHY"
 +      depends on ARCH_BCM_CYGNUS || COMPILE_TEST
 +      depends on MDIO_BCM_IPROC
 +      select BCM_NET_PHYLIB
 +      ---help---
 +        This PHY driver is for the 1G internal PHYs of the Broadcom
 +        Cygnus Family SoC.
 +
 +        Currently supports internal PHY's used in the BCM11300,
 +        BCM11320, BCM11350, BCM11360, BCM58300, BCM58302,
 +        BCM58303 & BCM58305 Broadcom Cygnus SoCs.
 +
  config BCM63XX_PHY
        tristate "Drivers for Broadcom 63xx SOCs internal PHY"
        depends on BCM63XX
 +      select BCM_NET_PHYLIB
        ---help---
          Currently supports the 6348 and 6358 PHYs.
  
  config BCM7XXX_PHY
        tristate "Drivers for Broadcom 7xxx SOCs internal PHYs"
 +      select BCM_NET_PHYLIB
        ---help---
          Currently supports the BCM7366, BCM7439, BCM7445, and
          40nm and 65nm generation of BCM7xxx Set Top Box SoCs.
@@@ -141,6 -122,11 +141,11 @@@ config MICREL_PH
        ---help---
          Supports the KSZ9021, VSC8201, KS8001 PHYs.
  
+ config DP83848_PHY
+       tristate "Driver for Texas Instruments DP83848 PHY"
+       ---help---
+         Supports the DP83848 PHY.
  config DP83867_PHY
        tristate "Drivers for Texas Instruments DP83867 Gigabit PHY"
        ---help---
@@@ -242,15 -228,6 +247,15 @@@ config MDIO_BCM_UNIMA
          This hardware can be found in the Broadcom GENET Ethernet MAC
          controllers as well as some Broadcom Ethernet switches such as the
          Starfighter 2 switches.
 +
 +config MDIO_BCM_IPROC
 +      tristate "Broadcom iProc MDIO bus controller"
 +      depends on ARCH_BCM_IPROC || COMPILE_TEST
 +      depends on HAS_IOMEM && OF_MDIO
 +      help
 +        This module provides a driver for the MDIO busses found in the
 +        Broadcom iProc SoC's.
 +
  endif # PHYLIB
  
  config MICREL_KS8995MA
diff --combined drivers/net/phy/Makefile
index 7655d47ad8d8198dd0f141d64e7e2e0f20f6f58f,b74822463930051f60151beec04c5ccfe0e7bd0b..f31a4e25cf151a58efd594daeb2ec09724c39279
@@@ -12,12 -12,10 +12,12 @@@ obj-$(CONFIG_QSEMI_PHY)            += qsemi.
  obj-$(CONFIG_SMSC_PHY)                += smsc.o
  obj-$(CONFIG_TERANETICS_PHY)  += teranetics.o
  obj-$(CONFIG_VITESSE_PHY)     += vitesse.o
 +obj-$(CONFIG_BCM_NET_PHYLIB)  += bcm-phy-lib.o
  obj-$(CONFIG_BROADCOM_PHY)    += broadcom.o
  obj-$(CONFIG_BCM63XX_PHY)     += bcm63xx.o
  obj-$(CONFIG_BCM7XXX_PHY)     += bcm7xxx.o
  obj-$(CONFIG_BCM87XX_PHY)     += bcm87xx.o
 +obj-$(CONFIG_BCM_CYGNUS_PHY)  += bcm-cygnus.o
  obj-$(CONFIG_ICPLUS_PHY)      += icplus.o
  obj-$(CONFIG_REALTEK_PHY)     += realtek.o
  obj-$(CONFIG_LSI_ET1011C_PHY) += et1011c.o
@@@ -26,6 -24,7 +26,7 @@@ obj-$(CONFIG_MDIO_BITBANG)    += mdio-bitb
  obj-$(CONFIG_MDIO_GPIO)               += mdio-gpio.o
  obj-$(CONFIG_NATIONAL_PHY)    += national.o
  obj-$(CONFIG_DP83640_PHY)     += dp83640.o
+ obj-$(CONFIG_DP83848_PHY)     += dp83848.o
  obj-$(CONFIG_DP83867_PHY)     += dp83867.o
  obj-$(CONFIG_STE10XP)         += ste10Xp.o
  obj-$(CONFIG_MICREL_PHY)      += micrel.o
@@@ -40,4 -39,3 +41,4 @@@ obj-$(CONFIG_MDIO_SUN4I)      += mdio-sun4i.
  obj-$(CONFIG_MDIO_MOXART)     += mdio-moxart.o
  obj-$(CONFIG_MDIO_BCM_UNIMAC) += mdio-bcm-unimac.o
  obj-$(CONFIG_MICROCHIP_PHY)   += microchip.o
 +obj-$(CONFIG_MDIO_BCM_IPROC)  += mdio-bcm-iproc.o
index 4752e69de00e1f0ab366f35638564de9c14c8fa9,2a7c1be23c4f2aea38e2ec99384be91e7f9d0f62..75ae756e93cf6fdfce1216b6f09cdf6217a39b17
@@@ -229,11 -229,11 +229,11 @@@ static int qmi_wwan_bind(struct usbnet 
        u8 *buf = intf->cur_altsetting->extra;
        int len = intf->cur_altsetting->extralen;
        struct usb_interface_descriptor *desc = &intf->cur_altsetting->desc;
 -      struct usb_cdc_union_desc *cdc_union = NULL;
 -      struct usb_cdc_ether_desc *cdc_ether = NULL;
 -      u32 found = 0;
 +      struct usb_cdc_union_desc *cdc_union;
 +      struct usb_cdc_ether_desc *cdc_ether;
        struct usb_driver *driver = driver_of(intf);
        struct qmi_wwan_state *info = (void *)&dev->data;
 +      struct usb_cdc_parsed_header hdr;
  
        BUILD_BUG_ON((sizeof(((struct usbnet *)0)->data) <
                      sizeof(struct qmi_wwan_state)));
        info->data = intf;
  
        /* and a number of CDC descriptors */
 -      while (len > 3) {
 -              struct usb_descriptor_header *h = (void *)buf;
 -
 -              /* ignore any misplaced descriptors */
 -              if (h->bDescriptorType != USB_DT_CS_INTERFACE)
 -                      goto next_desc;
 -
 -              /* buf[2] is CDC descriptor subtype */
 -              switch (buf[2]) {
 -              case USB_CDC_HEADER_TYPE:
 -                      if (found & 1 << USB_CDC_HEADER_TYPE) {
 -                              dev_dbg(&intf->dev, "extra CDC header\n");
 -                              goto err;
 -                      }
 -                      if (h->bLength != sizeof(struct usb_cdc_header_desc)) {
 -                              dev_dbg(&intf->dev, "CDC header len %u\n",
 -                                      h->bLength);
 -                              goto err;
 -                      }
 -                      break;
 -              case USB_CDC_UNION_TYPE:
 -                      if (found & 1 << USB_CDC_UNION_TYPE) {
 -                              dev_dbg(&intf->dev, "extra CDC union\n");
 -                              goto err;
 -                      }
 -                      if (h->bLength != sizeof(struct usb_cdc_union_desc)) {
 -                              dev_dbg(&intf->dev, "CDC union len %u\n",
 -                                      h->bLength);
 -                              goto err;
 -                      }
 -                      cdc_union = (struct usb_cdc_union_desc *)buf;
 -                      break;
 -              case USB_CDC_ETHERNET_TYPE:
 -                      if (found & 1 << USB_CDC_ETHERNET_TYPE) {
 -                              dev_dbg(&intf->dev, "extra CDC ether\n");
 -                              goto err;
 -                      }
 -                      if (h->bLength != sizeof(struct usb_cdc_ether_desc)) {
 -                              dev_dbg(&intf->dev, "CDC ether len %u\n",
 -                                      h->bLength);
 -                              goto err;
 -                      }
 -                      cdc_ether = (struct usb_cdc_ether_desc *)buf;
 -                      break;
 -              }
 -
 -              /* Remember which CDC functional descriptors we've seen.  Works
 -               * for all types we care about, of which USB_CDC_ETHERNET_TYPE
 -               * (0x0f) is the highest numbered
 -               */
 -              if (buf[2] < 32)
 -                      found |= 1 << buf[2];
 -
 -next_desc:
 -              len -= h->bLength;
 -              buf += h->bLength;
 -      }
 +      cdc_parse_cdc_header(&hdr, intf, buf, len);
 +      cdc_union = hdr.usb_cdc_union_desc;
 +      cdc_ether = hdr.usb_cdc_ether_desc;
  
        /* Use separate control and data interfaces if we found a CDC Union */
        if (cdc_union) {
@@@ -711,6 -765,10 +711,10 @@@ static const struct usb_device_id produ
        {QMI_FIXED_INTF(0x1199, 0x9056, 8)},    /* Sierra Wireless Modem */
        {QMI_FIXED_INTF(0x1199, 0x9057, 8)},
        {QMI_FIXED_INTF(0x1199, 0x9061, 8)},    /* Sierra Wireless Modem */
+       {QMI_FIXED_INTF(0x1199, 0x9070, 8)},    /* Sierra Wireless MC74xx/EM74xx */
+       {QMI_FIXED_INTF(0x1199, 0x9070, 10)},   /* Sierra Wireless MC74xx/EM74xx */
+       {QMI_FIXED_INTF(0x1199, 0x9071, 8)},    /* Sierra Wireless MC74xx/EM74xx */
+       {QMI_FIXED_INTF(0x1199, 0x9071, 10)},   /* Sierra Wireless MC74xx/EM74xx */
        {QMI_FIXED_INTF(0x1bbb, 0x011e, 4)},    /* Telekom Speedstick LTE II (Alcatel One Touch L100V LTE) */
        {QMI_FIXED_INTF(0x1bbb, 0x0203, 2)},    /* Alcatel L800MA */
        {QMI_FIXED_INTF(0x2357, 0x0201, 4)},    /* TP-LINK HSUPA Modem MA180 */
diff --combined drivers/net/vxlan.c
index cf262ccf504739c986397022f2833666a8d7d93d,c1587ece28cfffeb3c7c3011bf087215edb77662..6369a5734d4c3e899e96ec74469b0af4b3bca865
@@@ -75,7 -75,8 +75,7 @@@ static struct rtnl_link_ops vxlan_link_
  
  static const u8 all_zeros_mac[ETH_ALEN];
  
 -static struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
 -                                       bool no_share, u32 flags);
 +static int vxlan_sock_add(struct vxlan_dev *vxlan);
  
  /* per-network namespace private data for this module */
  struct vxlan_net {
@@@ -993,30 -994,19 +993,30 @@@ static bool vxlan_snoop(struct net_devi
  static bool vxlan_group_used(struct vxlan_net *vn, struct vxlan_dev *dev)
  {
        struct vxlan_dev *vxlan;
 +      unsigned short family = dev->default_dst.remote_ip.sa.sa_family;
  
        /* The vxlan_sock is only used by dev, leaving group has
         * no effect on other vxlan devices.
         */
 -      if (atomic_read(&dev->vn_sock->refcnt) == 1)
 +      if (family == AF_INET && dev->vn4_sock &&
 +          atomic_read(&dev->vn4_sock->refcnt) == 1)
                return false;
 +#if IS_ENABLED(CONFIG_IPV6)
 +      if (family == AF_INET6 && dev->vn6_sock &&
 +          atomic_read(&dev->vn6_sock->refcnt) == 1)
 +              return false;
 +#endif
  
        list_for_each_entry(vxlan, &vn->vxlan_list, next) {
                if (!netif_running(vxlan->dev) || vxlan == dev)
                        continue;
  
 -              if (vxlan->vn_sock != dev->vn_sock)
 +              if (family == AF_INET && vxlan->vn4_sock != dev->vn4_sock)
                        continue;
 +#if IS_ENABLED(CONFIG_IPV6)
 +              if (family == AF_INET6 && vxlan->vn6_sock != dev->vn6_sock)
 +                      continue;
 +#endif
  
                if (!vxlan_addr_equal(&vxlan->default_dst.remote_ip,
                                      &dev->default_dst.remote_ip))
        return false;
  }
  
 -static void vxlan_sock_release(struct vxlan_sock *vs)
 +static void __vxlan_sock_release(struct vxlan_sock *vs)
  {
 -      struct sock *sk = vs->sock->sk;
 -      struct net *net = sock_net(sk);
 -      struct vxlan_net *vn = net_generic(net, vxlan_net_id);
 +      struct vxlan_net *vn;
  
 +      if (!vs)
 +              return;
        if (!atomic_dec_and_test(&vs->refcnt))
                return;
  
 +      vn = net_generic(sock_net(vs->sock->sk), vxlan_net_id);
        spin_lock(&vn->sock_lock);
        hlist_del_rcu(&vs->hlist);
        vxlan_notify_del_rx_port(vs);
        queue_work(vxlan_wq, &vs->del_work);
  }
  
 +static void vxlan_sock_release(struct vxlan_dev *vxlan)
 +{
 +      __vxlan_sock_release(vxlan->vn4_sock);
 +#if IS_ENABLED(CONFIG_IPV6)
 +      __vxlan_sock_release(vxlan->vn6_sock);
 +#endif
 +}
 +
  /* Update multicast group membership when first VNI on
   * multicast address is brought up
   */
  static int vxlan_igmp_join(struct vxlan_dev *vxlan)
  {
 -      struct vxlan_sock *vs = vxlan->vn_sock;
 -      struct sock *sk = vs->sock->sk;
 +      struct sock *sk;
        union vxlan_addr *ip = &vxlan->default_dst.remote_ip;
        int ifindex = vxlan->default_dst.remote_ifindex;
        int ret = -EINVAL;
  
 -      lock_sock(sk);
        if (ip->sa.sa_family == AF_INET) {
                struct ip_mreqn mreq = {
                        .imr_multiaddr.s_addr   = ip->sin.sin_addr.s_addr,
                        .imr_ifindex            = ifindex,
                };
  
 +              sk = vxlan->vn4_sock->sock->sk;
 +              lock_sock(sk);
                ret = ip_mc_join_group(sk, &mreq);
 +              release_sock(sk);
  #if IS_ENABLED(CONFIG_IPV6)
        } else {
 +              sk = vxlan->vn6_sock->sock->sk;
 +              lock_sock(sk);
                ret = ipv6_stub->ipv6_sock_mc_join(sk, ifindex,
                                                   &ip->sin6.sin6_addr);
 +              release_sock(sk);
  #endif
        }
 -      release_sock(sk);
  
        return ret;
  }
  /* Inverse of vxlan_igmp_join when last VNI is brought down */
  static int vxlan_igmp_leave(struct vxlan_dev *vxlan)
  {
 -      struct vxlan_sock *vs = vxlan->vn_sock;
 -      struct sock *sk = vs->sock->sk;
 +      struct sock *sk;
        union vxlan_addr *ip = &vxlan->default_dst.remote_ip;
        int ifindex = vxlan->default_dst.remote_ifindex;
        int ret = -EINVAL;
  
 -      lock_sock(sk);
        if (ip->sa.sa_family == AF_INET) {
                struct ip_mreqn mreq = {
                        .imr_multiaddr.s_addr   = ip->sin.sin_addr.s_addr,
                        .imr_ifindex            = ifindex,
                };
  
 +              sk = vxlan->vn4_sock->sock->sk;
 +              lock_sock(sk);
                ret = ip_mc_leave_group(sk, &mreq);
 +              release_sock(sk);
  #if IS_ENABLED(CONFIG_IPV6)
        } else {
 +              sk = vxlan->vn6_sock->sock->sk;
 +              lock_sock(sk);
                ret = ipv6_stub->ipv6_sock_mc_drop(sk, ifindex,
                                                   &ip->sin6.sin6_addr);
 +              release_sock(sk);
  #endif
        }
 -      release_sock(sk);
  
        return ret;
  }
@@@ -1898,7 -1873,8 +1898,7 @@@ static void vxlan_xmit_one(struct sk_bu
  {
        struct ip_tunnel_info *info;
        struct vxlan_dev *vxlan = netdev_priv(dev);
 -      struct sock *sk = vxlan->vn_sock->sock->sk;
 -      unsigned short family = vxlan_get_sk_family(vxlan->vn_sock);
 +      struct sock *sk;
        struct rtable *rt = NULL;
        const struct iphdr *old_iph;
        struct flowi4 fl4;
                                  dev->name);
                        goto drop;
                }
 -              if (family != ip_tunnel_info_af(info))
 -                      goto drop;
 -
                dst_port = info->key.tp_dst ? : vxlan->cfg.dst_port;
                vni = be64_to_cpu(info->key.tun_id);
 -              remote_ip.sa.sa_family = family;
 -              if (family == AF_INET)
 +              remote_ip.sa.sa_family = ip_tunnel_info_af(info);
 +              if (remote_ip.sa.sa_family == AF_INET)
                        remote_ip.sin.sin_addr.s_addr = info->key.u.ipv4.dst;
                else
                        remote_ip.sin6.sin6_addr = info->key.u.ipv6.dst;
        }
  
        if (dst->sa.sa_family == AF_INET) {
 +              if (!vxlan->vn4_sock)
 +                      goto drop;
 +              sk = vxlan->vn4_sock->sock->sk;
 +
                if (info && (info->key.tun_flags & TUNNEL_DONT_FRAGMENT))
                        df = htons(IP_DF);
  
                struct flowi6 fl6;
                u32 rt6i_flags;
  
 +              if (!vxlan->vn6_sock)
 +                      goto drop;
 +              sk = vxlan->vn6_sock->sock->sk;
 +
                memset(&fl6, 0, sizeof(fl6));
                fl6.flowi6_oif = rdst ? rdst->remote_ifindex : 0;
                fl6.daddr = dst->sin6.sin6_addr;
@@@ -2233,6 -2204,7 +2233,6 @@@ static void vxlan_vs_add_dev(struct vxl
        struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
        __u32 vni = vxlan->default_dst.remote_vni;
  
 -      vxlan->vn_sock = vs;
        spin_lock(&vn->sock_lock);
        hlist_add_head_rcu(&vxlan->hlist, vni_head(vs, vni));
        spin_unlock(&vn->sock_lock);
@@@ -2272,18 -2244,22 +2272,18 @@@ static void vxlan_uninit(struct net_dev
  static int vxlan_open(struct net_device *dev)
  {
        struct vxlan_dev *vxlan = netdev_priv(dev);
 -      struct vxlan_sock *vs;
 -      int ret = 0;
 +      int ret;
  
 -      vs = vxlan_sock_add(vxlan->net, vxlan->cfg.dst_port,
 -                          vxlan->cfg.no_share, vxlan->flags);
 -      if (IS_ERR(vs))
 -              return PTR_ERR(vs);
 -
 -      vxlan_vs_add_dev(vs, vxlan);
 +      ret = vxlan_sock_add(vxlan);
 +      if (ret < 0)
 +              return ret;
  
        if (vxlan_addr_multicast(&vxlan->default_dst.remote_ip)) {
                ret = vxlan_igmp_join(vxlan);
                if (ret == -EADDRINUSE)
                        ret = 0;
                if (ret) {
 -                      vxlan_sock_release(vs);
 +                      vxlan_sock_release(vxlan);
                        return ret;
                }
        }
@@@ -2318,6 -2294,7 +2318,6 @@@ static int vxlan_stop(struct net_devic
  {
        struct vxlan_dev *vxlan = netdev_priv(dev);
        struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
 -      struct vxlan_sock *vs = vxlan->vn_sock;
        int ret = 0;
  
        if (vxlan_addr_multicast(&vxlan->default_dst.remote_ip) &&
        del_timer_sync(&vxlan->age_timer);
  
        vxlan_flush(vxlan);
 -      vxlan_sock_release(vs);
 +      vxlan_sock_release(vxlan);
  
        return ret;
  }
@@@ -2360,6 -2337,46 +2360,46 @@@ static int vxlan_change_mtu(struct net_
        return 0;
  }
  
+ static int egress_ipv4_tun_info(struct net_device *dev, struct sk_buff *skb,
+                               struct ip_tunnel_info *info,
+                               __be16 sport, __be16 dport)
+ {
+       struct vxlan_dev *vxlan = netdev_priv(dev);
+       struct rtable *rt;
+       struct flowi4 fl4;
+       memset(&fl4, 0, sizeof(fl4));
+       fl4.flowi4_tos = RT_TOS(info->key.tos);
+       fl4.flowi4_mark = skb->mark;
+       fl4.flowi4_proto = IPPROTO_UDP;
+       fl4.daddr = info->key.u.ipv4.dst;
+       rt = ip_route_output_key(vxlan->net, &fl4);
+       if (IS_ERR(rt))
+               return PTR_ERR(rt);
+       ip_rt_put(rt);
+       info->key.u.ipv4.src = fl4.saddr;
+       info->key.tp_src = sport;
+       info->key.tp_dst = dport;
+       return 0;
+ }
+ static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
+ {
+       struct vxlan_dev *vxlan = netdev_priv(dev);
+       struct ip_tunnel_info *info = skb_tunnel_info(skb);
+       __be16 sport, dport;
+       sport = udp_flow_src_port(dev_net(dev), skb, vxlan->cfg.port_min,
+                                 vxlan->cfg.port_max, true);
+       dport = info->key.tp_dst ? : vxlan->cfg.dst_port;
+       if (ip_tunnel_info_af(info) == AF_INET)
+               return egress_ipv4_tun_info(dev, skb, info, sport, dport);
+       return -EINVAL;
+ }
  static const struct net_device_ops vxlan_netdev_ops = {
        .ndo_init               = vxlan_init,
        .ndo_uninit             = vxlan_uninit,
        .ndo_fdb_add            = vxlan_fdb_add,
        .ndo_fdb_del            = vxlan_fdb_delete,
        .ndo_fdb_dump           = vxlan_fdb_dump,
+       .ndo_fill_metadata_dst  = vxlan_fill_metadata_dst,
  };
  
  /* Info for udev, that this is a virtual tunnel endpoint */
@@@ -2563,13 -2581,14 +2604,13 @@@ static struct socket *vxlan_create_sock
  }
  
  /* Create new listen socket if needed */
 -static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port,
 -                                            u32 flags)
 +static struct vxlan_sock *vxlan_socket_create(struct net *net, bool ipv6,
 +                                            __be16 port, u32 flags)
  {
        struct vxlan_net *vn = net_generic(net, vxlan_net_id);
        struct vxlan_sock *vs;
        struct socket *sock;
        unsigned int h;
 -      bool ipv6 = !!(flags & VXLAN_F_IPV6);
        struct udp_tunnel_sock_cfg tunnel_cfg;
  
        vs = kzalloc(sizeof(*vs), GFP_KERNEL);
        return vs;
  }
  
 -static struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
 -                                       bool no_share, u32 flags)
 +static int __vxlan_sock_add(struct vxlan_dev *vxlan, bool ipv6)
  {
 -      struct vxlan_net *vn = net_generic(net, vxlan_net_id);
 -      struct vxlan_sock *vs;
 -      bool ipv6 = flags & VXLAN_F_IPV6;
 +      struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
 +      struct vxlan_sock *vs = NULL;
  
 -      if (!no_share) {
 +      if (!vxlan->cfg.no_share) {
                spin_lock(&vn->sock_lock);
 -              vs = vxlan_find_sock(net, ipv6 ? AF_INET6 : AF_INET, port,
 -                                   flags);
 -              if (vs) {
 -                      if (!atomic_add_unless(&vs->refcnt, 1, 0))
 -                              vs = ERR_PTR(-EBUSY);
 +              vs = vxlan_find_sock(vxlan->net, ipv6 ? AF_INET6 : AF_INET,
 +                                   vxlan->cfg.dst_port, vxlan->flags);
 +              if (vs && !atomic_add_unless(&vs->refcnt, 1, 0)) {
                        spin_unlock(&vn->sock_lock);
 -                      return vs;
 +                      return -EBUSY;
                }
                spin_unlock(&vn->sock_lock);
        }
 +      if (!vs)
 +              vs = vxlan_socket_create(vxlan->net, ipv6,
 +                                       vxlan->cfg.dst_port, vxlan->flags);
 +      if (IS_ERR(vs))
 +              return PTR_ERR(vs);
 +#if IS_ENABLED(CONFIG_IPV6)
 +      if (ipv6)
 +              vxlan->vn6_sock = vs;
 +      else
 +#endif
 +              vxlan->vn4_sock = vs;
 +      vxlan_vs_add_dev(vs, vxlan);
 +      return 0;
 +}
  
 -      return vxlan_socket_create(net, port, flags);
 +static int vxlan_sock_add(struct vxlan_dev *vxlan)
 +{
 +      bool ipv6 = vxlan->flags & VXLAN_F_IPV6;
 +      bool metadata = vxlan->flags & VXLAN_F_COLLECT_METADATA;
 +      int ret = 0;
 +
 +      vxlan->vn4_sock = NULL;
 +#if IS_ENABLED(CONFIG_IPV6)
 +      vxlan->vn6_sock = NULL;
 +      if (ipv6 || metadata)
 +              ret = __vxlan_sock_add(vxlan, true);
 +#endif
 +      if (!ret && (!ipv6 || metadata))
 +              ret = __vxlan_sock_add(vxlan, false);
 +      if (ret < 0)
 +              vxlan_sock_release(vxlan);
 +      return ret;
  }
  
  static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
        struct vxlan_net *vn = net_generic(src_net, vxlan_net_id);
        struct vxlan_dev *vxlan = netdev_priv(dev);
        struct vxlan_rdst *dst = &vxlan->default_dst;
 +      unsigned short needed_headroom = ETH_HLEN;
        int err;
        bool use_ipv6 = false;
        __be16 default_port = vxlan->cfg.dst_port;
                if (!IS_ENABLED(CONFIG_IPV6))
                        return -EPFNOSUPPORT;
                use_ipv6 = true;
 +              vxlan->flags |= VXLAN_F_IPV6;
        }
  
        if (conf->remote_ifindex) {
                                pr_info("IPv6 is disabled via sysctl\n");
                                return -EPERM;
                        }
 -                      vxlan->flags |= VXLAN_F_IPV6;
                }
  #endif
  
                if (!conf->mtu)
                        dev->mtu = lowerdev->mtu - (use_ipv6 ? VXLAN6_HEADROOM : VXLAN_HEADROOM);
  
 -              dev->needed_headroom = lowerdev->hard_header_len +
 -                                     (use_ipv6 ? VXLAN6_HEADROOM : VXLAN_HEADROOM);
 -      } else if (use_ipv6) {
 -              vxlan->flags |= VXLAN_F_IPV6;
 -              dev->needed_headroom = ETH_HLEN + VXLAN6_HEADROOM;
 -      } else {
 -              dev->needed_headroom = ETH_HLEN + VXLAN_HEADROOM;
 +              needed_headroom = lowerdev->hard_header_len;
        }
  
 +      if (use_ipv6 || conf->flags & VXLAN_F_COLLECT_METADATA)
 +              needed_headroom += VXLAN6_HEADROOM;
 +      else
 +              needed_headroom += VXLAN_HEADROOM;
 +      dev->needed_headroom = needed_headroom;
 +
        memcpy(&vxlan->cfg, conf, sizeof(*conf));
        if (!vxlan->cfg.dst_port)
                vxlan->cfg.dst_port = default_port;
index 9bf63c27a9b7af80dc3ba46d2e7633c9f9391eb8,6febc053a37febc069d241e6811d904f18890f45..441b158d04f73c0a7d3a1dbe61cc37a4913eec2a
@@@ -1706,19 -1706,19 +1706,19 @@@ static void xennet_destroy_queues(struc
  }
  
  static int xennet_create_queues(struct netfront_info *info,
-                               unsigned int num_queues)
+                               unsigned int *num_queues)
  {
        unsigned int i;
        int ret;
  
-       info->queues = kcalloc(num_queues, sizeof(struct netfront_queue),
+       info->queues = kcalloc(*num_queues, sizeof(struct netfront_queue),
                               GFP_KERNEL);
        if (!info->queues)
                return -ENOMEM;
  
        rtnl_lock();
  
-       for (i = 0; i < num_queues; i++) {
+       for (i = 0; i < *num_queues; i++) {
                struct netfront_queue *queue = &info->queues[i];
  
                queue->id = i;
                if (ret < 0) {
                        dev_warn(&info->netdev->dev,
                                 "only created %d queues\n", i);
-                       num_queues = i;
+                       *num_queues = i;
                        break;
                }
  
                        napi_enable(&queue->napi);
        }
  
-       netif_set_real_num_tx_queues(info->netdev, num_queues);
+       netif_set_real_num_tx_queues(info->netdev, *num_queues);
  
        rtnl_unlock();
  
-       if (num_queues == 0) {
+       if (*num_queues == 0) {
                dev_err(&info->netdev->dev, "no queues\n");
                return -EINVAL;
        }
@@@ -1788,7 -1788,7 +1788,7 @@@ static int talk_to_netback(struct xenbu
        if (info->queues)
                xennet_destroy_queues(info);
  
-       err = xennet_create_queues(info, num_queues);
+       err = xennet_create_queues(info, &num_queues);
        if (err < 0)
                goto destroy_ring;
  
@@@ -1819,22 -1819,19 +1819,22 @@@ again
                goto destroy_ring;
        }
  
 -      if (num_queues == 1) {
 -              err = write_queue_xenstore_keys(&info->queues[0], &xbt, 0); /* flat */
 -              if (err)
 -                      goto abort_transaction_no_dev_fatal;
 -      } else {
 +      if (xenbus_exists(XBT_NIL,
 +                        info->xbdev->otherend, "multi-queue-max-queues")) {
                /* Write the number of queues */
 -              err = xenbus_printf(xbt, dev->nodename, "multi-queue-num-queues",
 -                                  "%u", num_queues);
 +              err = xenbus_printf(xbt, dev->nodename,
 +                                  "multi-queue-num-queues", "%u", num_queues);
                if (err) {
                        message = "writing multi-queue-num-queues";
                        goto abort_transaction_no_dev_fatal;
                }
 +      }
  
 +      if (num_queues == 1) {
 +              err = write_queue_xenstore_keys(&info->queues[0], &xbt, 0); /* flat */
 +              if (err)
 +                      goto abort_transaction_no_dev_fatal;
 +      } else {
                /* Write the keys for each queue */
                for (i = 0; i < num_queues; ++i) {
                        queue = &info->queues[i];
index 773383859bd90eb576d76c61963d0b6f7a78be54,210d11a75e4ff36bcdedb6496c9e5943cb24f465..4ac653b7b8ace2a9a2f2ee0feae07aab1bbcf459
@@@ -881,7 -881,6 +881,7 @@@ typedef u16 (*select_queue_fallback_t)(
   * int (*ndo_set_vf_rate)(struct net_device *dev, int vf, int min_tx_rate,
   *                      int max_tx_rate);
   * int (*ndo_set_vf_spoofchk)(struct net_device *dev, int vf, bool setting);
 + * int (*ndo_set_vf_trust)(struct net_device *dev, int vf, bool setting);
   * int (*ndo_get_vf_config)(struct net_device *dev,
   *                        int vf, struct ifla_vf_info *ivf);
   * int (*ndo_set_vf_link_state)(struct net_device *dev, int vf, int link_state);
   *    This function is used to pass protocol port error state information
   *    to the switch driver. The switch driver can react to the proto_down
   *      by doing a phys down on the associated switch port.
+  * int (*ndo_fill_metadata_dst)(struct net_device *dev, struct sk_buff *skb);
+  *    This function is used to get egress tunnel information for given skb.
+  *    This is useful for retrieving outer tunnel header parameters while
+  *    sampling packet.
   *
   */
  struct net_device_ops {
                                                   int max_tx_rate);
        int                     (*ndo_set_vf_spoofchk)(struct net_device *dev,
                                                       int vf, bool setting);
 +      int                     (*ndo_set_vf_trust)(struct net_device *dev,
 +                                                  int vf, bool setting);
        int                     (*ndo_get_vf_config)(struct net_device *dev,
                                                     int vf,
                                                     struct ifla_vf_info *ivf);
        int                     (*ndo_get_iflink)(const struct net_device *dev);
        int                     (*ndo_change_proto_down)(struct net_device *dev,
                                                         bool proto_down);
+       int                     (*ndo_fill_metadata_dst)(struct net_device *dev,
+                                                      struct sk_buff *skb);
  };
  
  /**
   * @IFF_LIVE_ADDR_CHANGE: device supports hardware address
   *    change when it's running
   * @IFF_MACVLAN: Macvlan device
 - * @IFF_VRF_MASTER: device is a VRF master
 + * @IFF_L3MDEV_MASTER: device is an L3 master device
   * @IFF_NO_QUEUE: device can run without qdisc attached
   * @IFF_OPENVSWITCH: device is a Open vSwitch master
 + * @IFF_L3MDEV_SLAVE: device is enslaved to an L3 master device
   */
  enum netdev_priv_flags {
        IFF_802_1Q_VLAN                 = 1<<0,
        IFF_XMIT_DST_RELEASE_PERM       = 1<<17,
        IFF_IPVLAN_MASTER               = 1<<18,
        IFF_IPVLAN_SLAVE                = 1<<19,
 -      IFF_VRF_MASTER                  = 1<<20,
 +      IFF_L3MDEV_MASTER               = 1<<20,
        IFF_NO_QUEUE                    = 1<<21,
        IFF_OPENVSWITCH                 = 1<<22,
 +      IFF_L3MDEV_SLAVE                = 1<<23,
  };
  
  #define IFF_802_1Q_VLAN                       IFF_802_1Q_VLAN
  #define IFF_XMIT_DST_RELEASE_PERM     IFF_XMIT_DST_RELEASE_PERM
  #define IFF_IPVLAN_MASTER             IFF_IPVLAN_MASTER
  #define IFF_IPVLAN_SLAVE              IFF_IPVLAN_SLAVE
 -#define IFF_VRF_MASTER                        IFF_VRF_MASTER
 +#define IFF_L3MDEV_MASTER             IFF_L3MDEV_MASTER
  #define IFF_NO_QUEUE                  IFF_NO_QUEUE
  #define IFF_OPENVSWITCH                       IFF_OPENVSWITCH
  
   *    @dn_ptr:        DECnet specific data
   *    @ip6_ptr:       IPv6 specific data
   *    @ax25_ptr:      AX.25 specific data
 - *    @vrf_ptr:       VRF specific data
   *    @ieee80211_ptr: IEEE 802.11 specific data, assign before registering
   *
   *    @last_rx:       Time of last Rx
@@@ -1591,9 -1593,6 +1597,9 @@@ struct net_device 
  #ifdef CONFIG_NET_SWITCHDEV
        const struct switchdev_ops *switchdev_ops;
  #endif
 +#ifdef CONFIG_NET_L3_MASTER_DEV
 +      const struct l3mdev_ops *l3mdev_ops;
 +#endif
  
        const struct header_ops *header_ops;
  
        struct dn_dev __rcu     *dn_ptr;
        struct inet6_dev __rcu  *ip6_ptr;
        void                    *ax25_ptr;
 -      struct net_vrf_dev __rcu *vrf_ptr;
        struct wireless_dev     *ieee80211_ptr;
        struct wpan_dev         *ieee802154_ptr;
  #if IS_ENABLED(CONFIG_MPLS_ROUTING)
@@@ -2109,7 -2109,6 +2115,7 @@@ struct pcpu_sw_netstats 
  #define NETDEV_PRECHANGEMTU   0x0017 /* notify before mtu change happened */
  #define NETDEV_CHANGEINFODATA 0x0018
  #define NETDEV_BONDING_INFO   0x0019
 +#define NETDEV_PRECHANGEUPPER 0x001A
  
  int register_netdevice_notifier(struct notifier_block *nb);
  int unregister_netdevice_notifier(struct notifier_block *nb);
@@@ -2210,6 -2209,7 +2216,7 @@@ void dev_add_offload(struct packet_offl
  void dev_remove_offload(struct packet_offload *po);
  
  int dev_get_iflink(const struct net_device *dev);
+ int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb);
  struct net_device *__dev_get_by_flags(struct net *net, unsigned short flags,
                                      unsigned short mask);
  struct net_device *dev_get_by_name(struct net *net, const char *name);
@@@ -2220,8 -2220,12 +2227,8 @@@ int dev_open(struct net_device *dev)
  int dev_close(struct net_device *dev);
  int dev_close_many(struct list_head *head, bool unlink);
  void dev_disable_lro(struct net_device *dev);
 -int dev_loopback_xmit(struct sock *sk, struct sk_buff *newskb);
 -int dev_queue_xmit_sk(struct sock *sk, struct sk_buff *skb);
 -static inline int dev_queue_xmit(struct sk_buff *skb)
 -{
 -      return dev_queue_xmit_sk(skb->sk, skb);
 -}
 +int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *newskb);
 +int dev_queue_xmit(struct sk_buff *skb);
  int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv);
  int register_netdevice(struct net_device *dev);
  void unregister_netdevice_queue(struct net_device *dev, struct list_head *head);
@@@ -2993,7 -2997,11 +3000,7 @@@ static inline void dev_consume_skb_any(
  
  int netif_rx(struct sk_buff *skb);
  int netif_rx_ni(struct sk_buff *skb);
 -int netif_receive_skb_sk(struct sock *sk, struct sk_buff *skb);
 -static inline int netif_receive_skb(struct sk_buff *skb)
 -{
 -      return netif_receive_skb_sk(skb->sk, skb);
 -}
 +int netif_receive_skb(struct sk_buff *skb);
  gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb);
  void napi_gro_flush(struct napi_struct *napi, bool flush_old);
  struct sk_buff *napi_get_frags(struct napi_struct *napi);
@@@ -3831,14 -3839,9 +3838,14 @@@ static inline bool netif_supports_nofcs
        return dev->priv_flags & IFF_SUPP_NOFCS;
  }
  
 -static inline bool netif_is_vrf(const struct net_device *dev)
 +static inline bool netif_is_l3_master(const struct net_device *dev)
  {
 -      return dev->priv_flags & IFF_VRF_MASTER;
 +      return dev->priv_flags & IFF_L3MDEV_MASTER;
 +}
 +
 +static inline bool netif_is_l3_slave(const struct net_device *dev)
 +{
 +      return dev->priv_flags & IFF_L3MDEV_SLAVE;
  }
  
  static inline bool netif_is_bridge_master(const struct net_device *dev)
@@@ -3851,6 -3854,27 +3858,6 @@@ static inline bool netif_is_ovs_master(
        return dev->priv_flags & IFF_OPENVSWITCH;
  }
  
 -static inline bool netif_index_is_vrf(struct net *net, int ifindex)
 -{
 -      bool rc = false;
 -
 -#if IS_ENABLED(CONFIG_NET_VRF)
 -      struct net_device *dev;
 -
 -      if (ifindex == 0)
 -              return false;
 -
 -      rcu_read_lock();
 -
 -      dev = dev_get_by_index_rcu(net, ifindex);
 -      if (dev)
 -              rc = netif_is_vrf(dev);
 -
 -      rcu_read_unlock();
 -#endif
 -      return rc;
 -}
 -
  /* This device needs to keep skb dst for qdisc enqueue or ndo_start_xmit() */
  static inline void netif_keep_dst(struct net_device *dev)
  {
index fef125e2d7774aec9c3299be0cc8df6145608be8,e663627a8ef36530b4dc658e9d30d4b7bc32a90a..28ccedd000f5720f6f41660ea6f049b9fd9f145c
@@@ -349,8 -349,6 +349,8 @@@ enum ovs_tunnel_key_attr 
        OVS_TUNNEL_KEY_ATTR_TP_SRC,             /* be16 src Transport Port. */
        OVS_TUNNEL_KEY_ATTR_TP_DST,             /* be16 dst Transport Port. */
        OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS,         /* Nested OVS_VXLAN_EXT_* */
 +      OVS_TUNNEL_KEY_ATTR_IPV6_SRC,           /* struct in6_addr src IPv6 address. */
 +      OVS_TUNNEL_KEY_ATTR_IPV6_DST,           /* struct in6_addr dst IPv6 address. */
        __OVS_TUNNEL_KEY_ATTR_MAX
  };
  
@@@ -622,7 -620,8 +622,8 @@@ struct ovs_action_hash 
   * enum ovs_ct_attr - Attributes for %OVS_ACTION_ATTR_CT action.
   * @OVS_CT_ATTR_COMMIT: If present, commits the connection to the conntrack
   * table. This allows future packets for the same connection to be identified
-  * as 'established' or 'related'.
+  * as 'established' or 'related'. The flow key for the current packet will
+  * retain the pre-commit connection state.
   * @OVS_CT_ATTR_ZONE: u16 connection tracking zone.
   * @OVS_CT_ATTR_MARK: u32 value followed by u32 mask. For each bit set in the
   * mask, the corresponding bit in the value is copied to the connection
diff --combined net/core/dev.c
index 1225b4be8ed6e5b419bc7789e75a75b3339eb7ba,c14748d051e7f58343768e920e52317154ef06fb..13f49f81ae13a3e3f07b3304b8f04bcb791d84b3
@@@ -99,6 -99,7 +99,7 @@@
  #include <linux/rtnetlink.h>
  #include <linux/stat.h>
  #include <net/dst.h>
+ #include <net/dst_metadata.h>
  #include <net/pkt_sched.h>
  #include <net/checksum.h>
  #include <net/xfrm.h>
@@@ -681,6 -682,32 +682,32 @@@ int dev_get_iflink(const struct net_dev
  }
  EXPORT_SYMBOL(dev_get_iflink);
  
+ /**
+  *    dev_fill_metadata_dst - Retrieve tunnel egress information.
+  *    @dev: targeted interface
+  *    @skb: The packet.
+  *
+  *    For better visibility of tunnel traffic OVS needs to retrieve
+  *    egress tunnel information for a packet. Following API allows
+  *    user to get this info.
+  */
+ int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
+ {
+       struct ip_tunnel_info *info;
+       if (!dev->netdev_ops  || !dev->netdev_ops->ndo_fill_metadata_dst)
+               return -EINVAL;
+       info = skb_tunnel_info_unclone(skb);
+       if (!info)
+               return -ENOMEM;
+       if (unlikely(!(info->mode & IP_TUNNEL_INFO_TX)))
+               return -EINVAL;
+       return dev->netdev_ops->ndo_fill_metadata_dst(dev, skb);
+ }
+ EXPORT_SYMBOL_GPL(dev_fill_metadata_dst);
  /**
   *    __dev_get_by_name       - find a device by its name
   *    @net: the applicable net namespace
@@@ -2915,11 -2942,9 +2942,11 @@@ EXPORT_SYMBOL(xmit_recursion)
  
  /**
   *    dev_loopback_xmit - loop back @skb
 + *    @net: network namespace this loopback is happening in
 + *    @sk:  sk needed to be a netfilter okfn
   *    @skb: buffer to transmit
   */
 -int dev_loopback_xmit(struct sock *sk, struct sk_buff *skb)
 +int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
  {
        skb_reset_mac_header(skb);
        __skb_pull(skb, skb_network_offset(skb));
@@@ -2974,7 -2999,6 +3001,7 @@@ static u16 __netdev_pick_tx(struct net_
                        new_index = skb_tx_hash(dev, skb);
  
                if (queue_index != new_index && sk &&
 +                  sk_fullsock(sk) &&
                    rcu_access_pointer(sk->sk_dst_cache))
                        sk_tx_queue_set(sk, new_index);
  
        return rc;
  }
  
 -int dev_queue_xmit_sk(struct sock *sk, struct sk_buff *skb)
 +int dev_queue_xmit(struct sk_buff *skb)
  {
        return __dev_queue_xmit(skb, NULL);
  }
 -EXPORT_SYMBOL(dev_queue_xmit_sk);
 +EXPORT_SYMBOL(dev_queue_xmit);
  
  int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv)
  {
@@@ -3671,14 -3695,6 +3698,14 @@@ static inline struct sk_buff *handle_in
        case TC_ACT_QUEUED:
                kfree_skb(skb);
                return NULL;
 +      case TC_ACT_REDIRECT:
 +              /* skb_mac_header check was done by cls/act_bpf, so
 +               * we can safely push the L2 header back before
 +               * redirecting to another netdev
 +               */
 +              __skb_push(skb, skb->mac_len);
 +              skb_do_redirect(skb);
 +              return NULL;
        default:
                break;
        }
@@@ -3993,13 -4009,13 +4020,13 @@@ static int netif_receive_skb_internal(s
   *    NET_RX_SUCCESS: no congestion
   *    NET_RX_DROP: packet was dropped
   */
 -int netif_receive_skb_sk(struct sock *sk, struct sk_buff *skb)
 +int netif_receive_skb(struct sk_buff *skb)
  {
        trace_netif_receive_skb_entry(skb);
  
        return netif_receive_skb_internal(skb);
  }
 -EXPORT_SYMBOL(netif_receive_skb_sk);
 +EXPORT_SYMBOL(netif_receive_skb);
  
  /* Network device is going away, flush any packets still pending
   * Called with irqs disabled.
@@@ -4868,7 -4884,8 +4895,7 @@@ struct netdev_adjacent 
        struct rcu_head rcu;
  };
  
 -static struct netdev_adjacent *__netdev_find_adj(struct net_device *dev,
 -                                               struct net_device *adj_dev,
 +static struct netdev_adjacent *__netdev_find_adj(struct net_device *adj_dev,
                                                 struct list_head *adj_list)
  {
        struct netdev_adjacent *adj;
@@@ -4894,7 -4911,7 +4921,7 @@@ bool netdev_has_upper_dev(struct net_de
  {
        ASSERT_RTNL();
  
 -      return __netdev_find_adj(dev, upper_dev, &dev->all_adj_list.upper);
 +      return __netdev_find_adj(upper_dev, &dev->all_adj_list.upper);
  }
  EXPORT_SYMBOL(netdev_has_upper_dev);
  
@@@ -5156,7 -5173,7 +5183,7 @@@ static int __netdev_adjacent_dev_insert
        struct netdev_adjacent *adj;
        int ret;
  
 -      adj = __netdev_find_adj(dev, adj_dev, dev_list);
 +      adj = __netdev_find_adj(adj_dev, dev_list);
  
        if (adj) {
                adj->ref_nr++;
@@@ -5212,7 -5229,7 +5239,7 @@@ static void __netdev_adjacent_dev_remov
  {
        struct netdev_adjacent *adj;
  
 -      adj = __netdev_find_adj(dev, adj_dev, dev_list);
 +      adj = __netdev_find_adj(adj_dev, dev_list);
  
        if (!adj) {
                pr_err("tried to remove device %s from %s\n",
@@@ -5333,10 -5350,10 +5360,10 @@@ static int __netdev_upper_dev_link(stru
                return -EBUSY;
  
        /* To prevent loops, check if dev is not upper device to upper_dev. */
 -      if (__netdev_find_adj(upper_dev, dev, &upper_dev->all_adj_list.upper))
 +      if (__netdev_find_adj(dev, &upper_dev->all_adj_list.upper))
                return -EBUSY;
  
 -      if (__netdev_find_adj(dev, upper_dev, &dev->adj_list.upper))
 +      if (__netdev_find_adj(upper_dev, &dev->adj_list.upper))
                return -EEXIST;
  
        if (master && netdev_master_upper_dev_get(dev))
        changeupper_info.master = master;
        changeupper_info.linking = true;
  
 +      ret = call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, dev,
 +                                          &changeupper_info.info);
 +      ret = notifier_to_errno(ret);
 +      if (ret)
 +              return ret;
 +
        ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, private,
                                                   master);
        if (ret)
@@@ -5494,9 -5505,6 +5521,9 @@@ void netdev_upper_dev_unlink(struct net
        changeupper_info.master = netdev_master_upper_dev_get(dev) == upper_dev;
        changeupper_info.linking = false;
  
 +      call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, dev,
 +                                    &changeupper_info.info);
 +
        __netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);
  
        /* Here is the tricky part. We must remove all dev's lower
@@@ -5623,7 -5631,7 +5650,7 @@@ void *netdev_lower_dev_get_private(stru
  
        if (!lower_dev)
                return NULL;
 -      lower = __netdev_find_adj(dev, lower_dev, &dev->adj_list.lower);
 +      lower = __netdev_find_adj(lower_dev, &dev->adj_list.lower);
        if (!lower)
                return NULL;
  
index 74dd6671b66da53a8919944d1ff563ce6cc063c4,c4ffc9de165420f5839006ec053859aff77147fe..78cc64eddfc1855849652c563b3c931cd7cc72e6
@@@ -32,11 -32,12 +32,11 @@@ static __be32 rpfilter_get_saddr(__be3
        return addr;
  }
  
 -static bool rpfilter_lookup_reverse(struct flowi4 *fl4,
 +static bool rpfilter_lookup_reverse(struct net *net, struct flowi4 *fl4,
                                const struct net_device *dev, u8 flags)
  {
        struct fib_result res;
        bool dev_match;
 -      struct net *net = dev_net(dev);
        int ret __maybe_unused;
  
        if (fib_lookup(net, fl4, &res, FIB_LOOKUP_IGNORE_LINKSTATE))
@@@ -60,9 -61,7 +60,7 @@@
        if (FIB_RES_DEV(res) == dev)
                dev_match = true;
  #endif
-       if (dev_match || flags & XT_RPFILTER_LOOSE)
-               return FIB_RES_NH(res).nh_scope <= RT_SCOPE_HOST;
-       return dev_match;
+       return dev_match || flags & XT_RPFILTER_LOOSE;
  }
  
  static bool rpfilter_is_local(const struct sk_buff *skb)
@@@ -97,7 -96,7 +95,7 @@@ static bool rpfilter_mt(const struct sk
        flow.flowi4_tos = RT_TOS(iph->tos);
        flow.flowi4_scope = RT_SCOPE_UNIVERSE;
  
 -      return rpfilter_lookup_reverse(&flow, par->in, info->flags) ^ invert;
 +      return rpfilter_lookup_reverse(par->net, &flow, par->in, info->flags) ^ invert;
  }
  
  static int rpfilter_check(const struct xt_mtchk_param *par)
diff --combined net/ipv4/tcp_output.c
index f6f7f9b4901bbbd75f94c17c12566b14d2431258,3dbee0d83b15b0cbd2a1008cab5eeb8f9365c878..f4f9793eb0255e62f623303b87e44c1c777e2251
@@@ -357,10 -357,14 +357,10 @@@ static void tcp_ecn_clear_syn(struct so
  }
  
  static void
 -tcp_ecn_make_synack(const struct request_sock *req, struct tcphdr *th,
 -                  struct sock *sk)
 +tcp_ecn_make_synack(const struct request_sock *req, struct tcphdr *th)
  {
 -      if (inet_rsk(req)->ecn_ok) {
 +      if (inet_rsk(req)->ecn_ok)
                th->ece = 1;
 -              if (tcp_ca_needs_ecn(sk))
 -                      INET_ECN_xmit(sk);
 -      }
  }
  
  /* Set up ECN state for a packet on a ESTABLISHED socket that is about to
@@@ -608,11 -612,12 +608,11 @@@ static unsigned int tcp_syn_options(str
  }
  
  /* Set up TCP options for SYN-ACKs. */
 -static unsigned int tcp_synack_options(struct sock *sk,
 -                                 struct request_sock *req,
 -                                 unsigned int mss, struct sk_buff *skb,
 -                                 struct tcp_out_options *opts,
 -                                 const struct tcp_md5sig_key *md5,
 -                                 struct tcp_fastopen_cookie *foc)
 +static unsigned int tcp_synack_options(struct request_sock *req,
 +                                     unsigned int mss, struct sk_buff *skb,
 +                                     struct tcp_out_options *opts,
 +                                     const struct tcp_md5sig_key *md5,
 +                                     struct tcp_fastopen_cookie *foc)
  {
        struct inet_request_sock *ireq = inet_rsk(req);
        unsigned int remaining = MAX_TCP_OPTION_SPACE;
@@@ -1822,7 -1827,7 +1822,7 @@@ static bool tcp_tso_should_defer(struc
  
        /* Ok, it looks like it is advisable to defer. */
  
 -      if (cong_win < send_win && cong_win < skb->len)
 +      if (cong_win < send_win && cong_win <= skb->len)
                *is_cwnd_limited = true;
  
        return true;
@@@ -2055,6 -2060,7 +2055,6 @@@ static bool tcp_write_xmit(struct sock 
  
                cwnd_quota = tcp_cwnd_test(tp, skb);
                if (!cwnd_quota) {
 -                      is_cwnd_limited = true;
                        if (push_one == 2)
                                /* Force out a loss probe pkt. */
                                cwnd_quota = 1;
@@@ -2136,7 -2142,6 +2136,7 @@@ repair
                /* Send one loss probe per tail loss episode. */
                if (push_one != 2)
                        tcp_schedule_loss_probe(sk);
 +              is_cwnd_limited |= (tcp_packets_in_flight(tp) >= tp->snd_cwnd);
                tcp_cwnd_validate(sk, is_cwnd_limited);
                return false;
        }
@@@ -2160,7 -2165,7 +2160,7 @@@ bool tcp_schedule_loss_probe(struct soc
        /* Don't do any loss probe on a Fast Open connection before 3WHS
         * finishes.
         */
 -      if (sk->sk_state == TCP_SYN_RECV)
 +      if (tp->fastopen_rsk)
                return false;
  
        /* TLP is only scheduled when next timer event is RTO. */
        /* Schedule a loss probe in 2*RTT for SACK capable connections
         * in Open state, that are either limited by cwnd or application.
         */
 -      if (sysctl_tcp_early_retrans < 3 || !tp->srtt_us || !tp->packets_out ||
 +      if (sysctl_tcp_early_retrans < 3 || !tp->packets_out ||
            !tcp_is_sack(tp) || inet_csk(sk)->icsk_ca_state != TCP_CA_Open)
                return false;
  
                return false;
  
        /* Probe timeout is at least 1.5*rtt + TCP_DELACK_MAX to account
 -       * for delayed ack when there's one outstanding packet.
 +       * for delayed ack when there's one outstanding packet. If no RTT
 +       * sample is available then probe after TCP_TIMEOUT_INIT.
         */
 -      timeout = rtt << 1;
 +      timeout = rtt << 1 ? : TCP_TIMEOUT_INIT;
        if (tp->packets_out == 1)
                timeout = max_t(u32, timeout,
                                (rtt + (rtt >> 1) + TCP_DELACK_MAX));
@@@ -2655,6 -2659,8 +2655,6 @@@ int tcp_retransmit_skb(struct sock *sk
                        net_dbg_ratelimited("retrans_out leaked\n");
                }
  #endif
 -              if (!tp->retrans_out)
 -                      tp->lost_retrans_low = tp->snd_nxt;
                TCP_SKB_CB(skb)->sacked |= TCPCB_RETRANS;
                tp->retrans_out += tcp_skb_pcount(skb);
  
                if (!tp->retrans_stamp)
                        tp->retrans_stamp = tcp_skb_timestamp(skb);
  
 -              /* snd_nxt is stored to detect loss of retransmitted segment,
 -               * see tcp_input.c tcp_sacktag_write_queue().
 -               */
 -              TCP_SKB_CB(skb)->ack_seq = tp->snd_nxt;
        } else if (err != -EBUSY) {
                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL);
        }
@@@ -2939,22 -2949,20 +2939,22 @@@ int tcp_send_synack(struct sock *sk
   * Allocate one skb and build a SYNACK packet.
   * @dst is consumed : Caller should not use it again.
   */
 -struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 +struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
                                struct request_sock *req,
 -                              struct tcp_fastopen_cookie *foc)
 +                              struct tcp_fastopen_cookie *foc,
 +                              bool attach_req)
  {
 -      struct tcp_out_options opts;
        struct inet_request_sock *ireq = inet_rsk(req);
 -      struct tcp_sock *tp = tcp_sk(sk);
 -      struct tcphdr *th;
 -      struct sk_buff *skb;
 +      const struct tcp_sock *tp = tcp_sk(sk);
        struct tcp_md5sig_key *md5 = NULL;
 +      struct tcp_out_options opts;
 +      struct sk_buff *skb;
        int tcp_header_size;
 +      struct tcphdr *th;
 +      u16 user_mss;
        int mss;
  
 -      skb = sock_wmalloc(sk, MAX_TCP_HEADER, 1, GFP_ATOMIC);
 +      skb = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
        if (unlikely(!skb)) {
                dst_release(dst);
                return NULL;
        /* Reserve space for headers. */
        skb_reserve(skb, MAX_TCP_HEADER);
  
 +      if (attach_req) {
 +              skb->destructor = sock_edemux;
 +              sock_hold(req_to_sk(req));
 +              skb->sk = req_to_sk(req);
 +      } else {
 +              /* sk is a const pointer, because we want to express multiple
 +               * cpu might call us concurrently.
 +               * sk->sk_wmem_alloc in an atomic, we can promote to rw.
 +               */
 +              skb_set_owner_w(skb, (struct sock *)sk);
 +      }
        skb_dst_set(skb, dst);
  
        mss = dst_metric_advmss(dst);
 -      if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss)
 -              mss = tp->rx_opt.user_mss;
 +      user_mss = READ_ONCE(tp->rx_opt.user_mss);
 +      if (user_mss && user_mss < mss)
 +              mss = user_mss;
  
        memset(&opts, 0, sizeof(opts));
  #ifdef CONFIG_SYN_COOKIES
        rcu_read_lock();
        md5 = tcp_rsk(req)->af_specific->req_md5_lookup(sk, req_to_sk(req));
  #endif
 -      tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, md5,
 -                                           foc) + sizeof(*th);
 +      skb_set_hash(skb, tcp_rsk(req)->txhash, PKT_HASH_TYPE_L4);
 +      tcp_header_size = tcp_synack_options(req, mss, skb, &opts, md5, foc) +
 +                        sizeof(*th);
  
        skb_push(skb, tcp_header_size);
        skb_reset_transport_header(skb);
        memset(th, 0, sizeof(struct tcphdr));
        th->syn = 1;
        th->ack = 1;
 -      tcp_ecn_make_synack(req, th, sk);
 +      tcp_ecn_make_synack(req, th);
        th->source = htons(ireq->ir_num);
        th->dest = ireq->ir_rmt_port;
        /* Setting of flags are superfluous here for callers (and ECE is
        th->ack_seq = htonl(tcp_rsk(req)->rcv_nxt);
  
        /* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */
 -      th->window = htons(min(req->rcv_wnd, 65535U));
 -      tcp_options_write((__be32 *)(th + 1), tp, &opts);
 +      th->window = htons(min(req->rsk_rcv_wnd, 65535U));
 +      tcp_options_write((__be32 *)(th + 1), NULL, &opts);
        th->doff = (tcp_header_size >> 2);
        TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_OUTSEGS);
  
@@@ -3410,7 -3405,7 +3410,7 @@@ static int tcp_xmit_probe_skb(struct so
         */
        tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK);
        skb_mstamp_get(&skb->skb_mstamp);
-       NET_INC_STATS_BH(sock_net(sk), mib);
+       NET_INC_STATS(sock_net(sk), mib);
        return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC);
  }
  
@@@ -3505,14 -3500,13 +3505,14 @@@ void tcp_send_probe0(struct sock *sk
                                  TCP_RTO_MAX);
  }
  
 -int tcp_rtx_synack(struct sock *sk, struct request_sock *req)
 +int tcp_rtx_synack(const struct sock *sk, struct request_sock *req)
  {
        const struct tcp_request_sock_ops *af_ops = tcp_rsk(req)->af_specific;
        struct flowi fl;
        int res;
  
 -      res = af_ops->send_synack(sk, NULL, &fl, req, 0, NULL);
 +      tcp_rsk(req)->txhash = net_tx_rndhash();
 +      res = af_ops->send_synack(sk, NULL, &fl, req, NULL, true);
        if (!res) {
                TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
diff --combined net/ipv4/xfrm4_output.c
index 9f298d0dc9a1ccc3ac53dd205be8b90e56cc866b,41a261355662eb42fae031fdd30132767469f98e..7ee6518afa86ff785cacda0f115297ff6e5d0fa5
@@@ -30,6 -30,8 +30,8 @@@ static int xfrm4_tunnel_check_size(stru
  
        mtu = dst_mtu(skb_dst(skb));
        if (skb->len > mtu) {
+               skb->protocol = htons(ETH_P_IP);
                if (skb->sk)
                        xfrm_local_error(skb, mtu);
                else
@@@ -80,25 -82,24 +82,25 @@@ int xfrm4_output_finish(struct sock *sk
        return xfrm_output(sk, skb);
  }
  
 -static int __xfrm4_output(struct sock *sk, struct sk_buff *skb)
 +static int __xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb)
  {
        struct xfrm_state *x = skb_dst(skb)->xfrm;
  
  #ifdef CONFIG_NETFILTER
        if (!x) {
                IPCB(skb)->flags |= IPSKB_REROUTED;
 -              return dst_output_sk(sk, skb);
 +              return dst_output(net, sk, skb);
        }
  #endif
  
        return x->outer_mode->afinfo->output_finish(sk, skb);
  }
  
 -int xfrm4_output(struct sock *sk, struct sk_buff *skb)
 +int xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb)
  {
 -      return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, sk, skb,
 -                          NULL, skb_dst(skb)->dev, __xfrm4_output,
 +      return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
 +                          net, sk, skb, NULL, skb_dst(skb)->dev,
 +                          __xfrm4_output,
                            !(IPCB(skb)->flags & IPSKB_REROUTED));
  }
  
diff --combined net/ipv6/ip6_fib.c
index 09fddf70cca4ba885e1bfe942807753a9b073bc2,6cedc62b2abb1c3520647b4046c1f027ffe1295b..0c7e276c230e4ab2cd7c7ab0688e84920a41f69b
@@@ -264,7 -264,6 +264,7 @@@ struct fib6_table *fib6_get_table(struc
  
        return NULL;
  }
 +EXPORT_SYMBOL_GPL(fib6_get_table);
  
  static void __net_init fib6_tables_init(struct net *net)
  {
@@@ -286,7 -285,17 +286,17 @@@ struct fib6_table *fib6_get_table(struc
  struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
                                   int flags, pol_lookup_t lookup)
  {
-       return (struct dst_entry *) lookup(net, net->ipv6.fib6_main_tbl, fl6, flags);
+       struct rt6_info *rt;
+       rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, flags);
+       if (rt->rt6i_flags & RTF_REJECT &&
+           rt->dst.error == -EAGAIN) {
+               ip6_rt_put(rt);
+               rt = net->ipv6.ip6_null_entry;
+               dst_hold(&rt->dst);
+       }
+       return &rt->dst;
  }
  
  static void __net_init fib6_tables_init(struct net *net)
diff --combined net/ipv6/ip6_output.c
index 0c89671e0767e5debe909d654cd9d089bcf8fa19,8dddb45c433e53ad35d30cd4263a5a5080cd0da9..c2650688aca757708cb9f8877ed82101036ac5b5
@@@ -28,6 -28,7 +28,7 @@@
  
  #include <linux/errno.h>
  #include <linux/kernel.h>
+ #include <linux/overflow-arith.h>
  #include <linux/string.h>
  #include <linux/socket.h>
  #include <linux/net.h>
@@@ -55,9 -56,8 +56,9 @@@
  #include <net/xfrm.h>
  #include <net/checksum.h>
  #include <linux/mroute6.h>
 +#include <net/l3mdev.h>
  
 -static int ip6_finish_output2(struct sock *sk, struct sk_buff *skb)
 +static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
  {
        struct dst_entry *dst = skb_dst(skb);
        struct net_device *dev = dst->dev;
@@@ -72,7 -72,7 +73,7 @@@
                struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
  
                if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
 -                  ((mroute6_socket(dev_net(dev), skb) &&
 +                  ((mroute6_socket(net, skb) &&
                     !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
                     ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
                                         &ipv6_hdr(skb)->saddr))) {
                         */
                        if (newskb)
                                NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
 -                                      sk, newskb, NULL, newskb->dev,
 +                                      net, sk, newskb, NULL, newskb->dev,
                                        dev_loopback_xmit);
  
                        if (ipv6_hdr(skb)->hop_limit == 0) {
 -                              IP6_INC_STATS(dev_net(dev), idev,
 +                              IP6_INC_STATS(net, idev,
                                              IPSTATS_MIB_OUTDISCARDS);
                                kfree_skb(skb);
                                return 0;
                        }
                }
  
 -              IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
 -                              skb->len);
 +              IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
  
                if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
                    IPV6_ADDR_SCOPE_NODELOCAL &&
        }
        rcu_read_unlock_bh();
  
 -      IP6_INC_STATS(dev_net(dst->dev),
 -                    ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
 +      IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
        kfree_skb(skb);
        return -EINVAL;
  }
  
 -static int ip6_finish_output(struct sock *sk, struct sk_buff *skb)
 +static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
  {
        if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
            dst_allfrag(skb_dst(skb)) ||
            (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
 -              return ip6_fragment(sk, skb, ip6_finish_output2);
 +              return ip6_fragment(net, sk, skb, ip6_finish_output2);
        else
 -              return ip6_finish_output2(sk, skb);
 +              return ip6_finish_output2(net, sk, skb);
  }
  
 -int ip6_output(struct sock *sk, struct sk_buff *skb)
 +int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
  {
        struct net_device *dev = skb_dst(skb)->dev;
        struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
 +
        if (unlikely(idev->cnf.disable_ipv6)) {
 -              IP6_INC_STATS(dev_net(dev), idev,
 -                            IPSTATS_MIB_OUTDISCARDS);
 +              IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
                kfree_skb(skb);
                return 0;
        }
  
 -      return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, sk, skb,
 -                          NULL, dev,
 +      return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
 +                          net, sk, skb, NULL, dev,
                            ip6_finish_output,
                            !(IP6CB(skb)->flags & IP6SKB_REROUTED));
  }
  
  /*
 - *    xmit an sk_buff (used by TCP, SCTP and DCCP)
 + * xmit an sk_buff (used by TCP, SCTP and DCCP)
 + * Note : socket lock is not held for SYNACK packets, but might be modified
 + * by calls to skb_set_owner_w() and ipv6_local_error(),
 + * which are using proper atomic operations or spinlocks.
   */
 -
 -int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
 +int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
             struct ipv6_txoptions *opt, int tclass)
  {
        struct net *net = sock_net(sk);
 -      struct ipv6_pinfo *np = inet6_sk(sk);
 +      const struct ipv6_pinfo *np = inet6_sk(sk);
        struct in6_addr *first_hop = &fl6->daddr;
        struct dst_entry *dst = skb_dst(skb);
        struct ipv6hdr *hdr;
                        }
                        consume_skb(skb);
                        skb = skb2;
 -                      skb_set_owner_w(skb, sk);
 +                      /* skb_set_owner_w() changes sk->sk_wmem_alloc atomically,
 +                       * it is safe to call in our context (socket lock not held)
 +                       */
 +                      skb_set_owner_w(skb, (struct sock *)sk);
                }
                if (opt->opt_flen)
                        ipv6_push_frag_opts(skb, opt, &proto);
        if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
                IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
                              IPSTATS_MIB_OUT, skb->len);
 -              return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, sk, skb,
 -                             NULL, dst->dev, dst_output_sk);
 +              /* hooks should never assume socket lock is held.
 +               * we promote our socket to non const
 +               */
 +              return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
 +                             net, (struct sock *)sk, skb, NULL, dst->dev,
 +                             dst_output);
        }
  
        skb->dev = dst->dev;
 -      ipv6_local_error(sk, EMSGSIZE, fl6, mtu);
 +      /* ipv6_local_error() does not require socket lock,
 +       * we promote our socket to non const
 +       */
 +      ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
 +
        IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
        kfree_skb(skb);
        return -EMSGSIZE;
@@@ -329,11 -318,10 +330,11 @@@ static int ip6_forward_proxy_check(stru
        return 0;
  }
  
 -static inline int ip6_forward_finish(struct sock *sk, struct sk_buff *skb)
 +static inline int ip6_forward_finish(struct net *net, struct sock *sk,
 +                                   struct sk_buff *skb)
  {
        skb_sender_cpu_clear(skb);
 -      return dst_output_sk(sk, skb);
 +      return dst_output(net, sk, skb);
  }
  
  static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
@@@ -528,8 -516,8 +529,8 @@@ int ip6_forward(struct sk_buff *skb
  
        IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
        IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
 -      return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, NULL, skb,
 -                     skb->dev, dst->dev,
 +      return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
 +                     net, NULL, skb, skb->dev, dst->dev,
                       ip6_forward_finish);
  
  error:
@@@ -556,8 -544,8 +557,8 @@@ static void ip6_copy_metadata(struct sk
        skb_copy_secmark(to, from);
  }
  
 -int ip6_fragment(struct sock *sk, struct sk_buff *skb,
 -               int (*output)(struct sock *, struct sk_buff *))
 +int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
 +               int (*output)(struct net *, struct sock *, struct sk_buff *))
  {
        struct sk_buff *frag;
        struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
        __be32 frag_id;
        int ptr, offset = 0, err = 0;
        u8 *prevhdr, nexthdr = 0;
 -      struct net *net = dev_net(skb_dst(skb)->dev);
  
        hlen = ip6_find_1stfragopt(skb, &prevhdr);
        nexthdr = *prevhdr;
                if (np->frag_size)
                        mtu = np->frag_size;
        }
-       mtu -= hlen + sizeof(struct frag_hdr);
+       if (overflow_usub(mtu, hlen + sizeof(struct frag_hdr), &mtu) ||
+           mtu <= 7)
+               goto fail_toobig;
  
        frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
                                    &ipv6_hdr(skb)->saddr);
                                ip6_copy_metadata(frag, skb);
                        }
  
 -                      err = output(sk, skb);
 +                      err = output(net, sk, skb);
                        if (!err)
                                IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
                                              IPSTATS_MIB_FRAGCREATES);
@@@ -817,7 -809,7 +821,7 @@@ slow_path
                /*
                 *      Put this fragment into the sending queue.
                 */
 -              err = output(sk, frag);
 +              err = output(net, sk, frag);
                if (err)
                        goto fail;
  
@@@ -899,7 -891,7 +903,7 @@@ out
        return dst;
  }
  
 -static int ip6_dst_lookup_tail(struct net *net, struct sock *sk,
 +static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
                               struct dst_entry **dst, struct flowi6 *fl6)
  {
  #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
@@@ -1030,7 -1022,7 +1034,7 @@@ EXPORT_SYMBOL_GPL(ip6_dst_lookup)
   *    It returns a valid dst pointer on success, or a pointer encoded
   *    error code.
   */
 -struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
 +struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
                                      const struct in6_addr *final_dst)
  {
        struct dst_entry *dst = NULL;
        if (final_dst)
                fl6->daddr = *final_dst;
        if (!fl6->flowi6_oif)
 -              fl6->flowi6_oif = dst->dev->ifindex;
 +              fl6->flowi6_oif = l3mdev_fib_oif(dst->dev);
  
        return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
  }
@@@ -1696,7 -1688,7 +1700,7 @@@ int ip6_send_skb(struct sk_buff *skb
        struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
        int err;
  
 -      err = ip6_local_out(skb);
 +      err = ip6_local_out(net, skb->sk, skb);
        if (err) {
                if (err > 0)
                        err = net_xmit_errno(err);
diff --combined net/ipv6/route.c
index d0619632723a298cf2374874b2f4fb03fa18640f,946880ad48acda725eb66f9e2d0a8fd0f2b4ec40..2701cb3d88e9372cd226d0f5f58fbdc3d9f7582d
@@@ -61,7 -61,6 +61,7 @@@
  #include <net/nexthop.h>
  #include <net/lwtunnel.h>
  #include <net/ip_tunnels.h>
 +#include <net/l3mdev.h>
  
  #include <asm/uaccess.h>
  
@@@ -87,9 -86,9 +87,9 @@@ static void           ip6_dst_ifdown(struct dst_
  static int             ip6_dst_gc(struct dst_ops *ops);
  
  static int            ip6_pkt_discard(struct sk_buff *skb);
 -static int            ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb);
 +static int            ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
  static int            ip6_pkt_prohibit(struct sk_buff *skb);
 -static int            ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb);
 +static int            ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
  static void           ip6_link_failure(struct sk_buff *skb);
  static void           ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
                                           struct sk_buff *skb, u32 mtu);
@@@ -304,7 -303,7 +304,7 @@@ static const struct rt6_info ip6_blk_ho
                .obsolete       = DST_OBSOLETE_FORCE_CHK,
                .error          = -EINVAL,
                .input          = dst_discard,
 -              .output         = dst_discard_sk,
 +              .output         = dst_discard_out,
        },
        .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
        .rt6i_protocol  = RTPROT_KERNEL,
@@@ -422,7 -421,31 +422,7 @@@ static bool rt6_check_expired(const str
  static int rt6_info_hash_nhsfn(unsigned int candidate_count,
                               const struct flowi6 *fl6)
  {
 -      unsigned int val = fl6->flowi6_proto;
 -
 -      val ^= ipv6_addr_hash(&fl6->daddr);
 -      val ^= ipv6_addr_hash(&fl6->saddr);
 -
 -      /* Work only if this not encapsulated */
 -      switch (fl6->flowi6_proto) {
 -      case IPPROTO_UDP:
 -      case IPPROTO_TCP:
 -      case IPPROTO_SCTP:
 -              val ^= (__force u16)fl6->fl6_sport;
 -              val ^= (__force u16)fl6->fl6_dport;
 -              break;
 -
 -      case IPPROTO_ICMPV6:
 -              val ^= (__force u16)fl6->fl6_icmp_type;
 -              val ^= (__force u16)fl6->fl6_icmp_code;
 -              break;
 -      }
 -      /* RFC6438 recommands to use flowlabel */
 -      val ^= (__force u32)fl6->flowlabel;
 -
 -      /* Perhaps, we need to tune, this function? */
 -      val = val ^ (val >> 7) ^ (val >> 12);
 -      return val % candidate_count;
 +      return get_hash_from_flowi6(fl6) % candidate_count;
  }
  
  static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
@@@ -475,10 -498,10 +475,10 @@@ static inline struct rt6_info *rt6_devi
                        if (dev->flags & IFF_LOOPBACK) {
                                if (!sprt->rt6i_idev ||
                                    sprt->rt6i_idev->dev->ifindex != oif) {
 -                                      if (flags & RT6_LOOKUP_F_IFACE && oif)
 +                                      if (flags & RT6_LOOKUP_F_IFACE)
                                                continue;
 -                                      if (local && (!oif ||
 -                                                    local->rt6i_idev->dev->ifindex == oif))
 +                                      if (local &&
 +                                          local->rt6i_idev->dev->ifindex == oif)
                                                continue;
                                }
                                local = sprt;
@@@ -515,7 -538,7 +515,7 @@@ static void rt6_probe_deferred(struct w
                container_of(w, struct __rt6_probe_work, work);
  
        addrconf_addr_solict_mult(&work->target, &mcaddr);
 -      ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL, NULL);
 +      ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, NULL);
        dev_put(work->dev);
        kfree(work);
  }
@@@ -1145,7 -1168,7 +1145,7 @@@ void ip6_route_input(struct sk_buff *sk
        int flags = RT6_LOOKUP_F_HAS_SADDR;
        struct ip_tunnel_info *tun_info;
        struct flowi6 fl6 = {
 -              .flowi6_iif = skb->dev->ifindex,
 +              .flowi6_iif = l3mdev_fib_oif(skb->dev),
                .daddr = iph->daddr,
                .saddr = iph->saddr,
                .flowlabel = ip6_flowinfo(iph),
@@@ -1169,20 -1192,17 +1169,22 @@@ static struct rt6_info *ip6_pol_route_o
  struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
                                    struct flowi6 *fl6)
  {
 +      struct dst_entry *dst;
        int flags = 0;
+       bool any_src;
  
 +      dst = l3mdev_rt6_dst_by_oif(net, fl6);
 +      if (dst)
 +              return dst;
 +
        fl6->flowi6_iif = LOOPBACK_IFINDEX;
  
+       any_src = ipv6_addr_any(&fl6->saddr);
        if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
-           fl6->flowi6_oif)
+           (fl6->flowi6_oif && any_src))
                flags |= RT6_LOOKUP_F_IFACE;
  
-       if (!ipv6_addr_any(&fl6->saddr))
+       if (!any_src)
                flags |= RT6_LOOKUP_F_HAS_SADDR;
        else if (sk)
                flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
@@@ -1203,7 -1223,7 +1205,7 @@@ struct dst_entry *ip6_blackhole_route(s
                new = &rt->dst;
                new->__use = 1;
                new->input = dst_discard;
 -              new->output = dst_discard_sk;
 +              new->output = dst_discard_out;
  
                dst_copy_metrics(new, &ort->dst);
                rt->rt6i_idev = ort->rt6i_idev;
@@@ -1729,21 -1749,21 +1731,21 @@@ static int ip6_convert_metrics(struct m
        return -EINVAL;
  }
  
 -int ip6_route_info_create(struct fib6_config *cfg, struct rt6_info **rt_ret)
 +static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
  {
 -      int err;
        struct net *net = cfg->fc_nlinfo.nl_net;
        struct rt6_info *rt = NULL;
        struct net_device *dev = NULL;
        struct inet6_dev *idev = NULL;
        struct fib6_table *table;
        int addr_type;
 +      int err = -EINVAL;
  
        if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
 -              return -EINVAL;
 +              goto out;
  #ifndef CONFIG_IPV6_SUBTREES
        if (cfg->fc_src_len)
 -              return -EINVAL;
 +              goto out;
  #endif
        if (cfg->fc_ifindex) {
                err = -ENODEV;
                switch (cfg->fc_type) {
                case RTN_BLACKHOLE:
                        rt->dst.error = -EINVAL;
 -                      rt->dst.output = dst_discard_sk;
 +                      rt->dst.output = dst_discard_out;
                        rt->dst.input = dst_discard;
                        break;
                case RTN_PROHIBIT:
@@@ -1963,7 -1983,9 +1965,7 @@@ install_route
  
        cfg->fc_nlinfo.nl_net = dev_net(dev);
  
 -      *rt_ret = rt;
 -
 -      return 0;
 +      return rt;
  out:
        if (dev)
                dev_put(dev);
        if (rt)
                dst_free(&rt->dst);
  
 -      *rt_ret = NULL;
 -
 -      return err;
 +      return ERR_PTR(err);
  }
  
  int ip6_route_add(struct fib6_config *cfg)
  {
        struct mx6_config mxc = { .mx = NULL, };
 -      struct rt6_info *rt = NULL;
 +      struct rt6_info *rt;
        int err;
  
 -      err = ip6_route_info_create(cfg, &rt);
 -      if (err)
 +      rt = ip6_route_info_create(cfg);
 +      if (IS_ERR(rt)) {
 +              err = PTR_ERR(rt);
 +              rt = NULL;
                goto out;
 +      }
  
        err = ip6_convert_metrics(&mxc, cfg);
        if (err)
@@@ -2268,6 -2289,7 +2270,6 @@@ static struct rt6_info *rt6_add_route_i
                                           unsigned int pref)
  {
        struct fib6_config cfg = {
 -              .fc_table       = RT6_TABLE_INFO,
                .fc_metric      = IP6_RT_PRIO_USER,
                .fc_ifindex     = ifindex,
                .fc_dst_len     = prefixlen,
                .fc_nlinfo.nl_net = net,
        };
  
 +      cfg.fc_table = l3mdev_fib_table_by_index(net, ifindex) ? : RT6_TABLE_INFO;
        cfg.fc_dst = *prefix;
        cfg.fc_gateway = *gwaddr;
  
@@@ -2319,7 -2340,7 +2321,7 @@@ struct rt6_info *rt6_add_dflt_router(co
                                     unsigned int pref)
  {
        struct fib6_config cfg = {
 -              .fc_table       = RT6_TABLE_DFLT,
 +              .fc_table       = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
                .fc_metric      = IP6_RT_PRIO_USER,
                .fc_ifindex     = dev->ifindex,
                .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
@@@ -2366,8 -2387,7 +2368,8 @@@ static void rtmsg_to_fib6_config(struc
  {
        memset(cfg, 0, sizeof(*cfg));
  
 -      cfg->fc_table = RT6_TABLE_MAIN;
 +      cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
 +                       : RT6_TABLE_MAIN;
        cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
        cfg->fc_metric = rtmsg->rtmsg_metric;
        cfg->fc_expires = rtmsg->rtmsg_info;
@@@ -2451,7 -2471,7 +2453,7 @@@ static int ip6_pkt_discard(struct sk_bu
        return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
  }
  
 -static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb)
 +static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
  {
        skb->dev = skb_dst(skb)->dev;
        return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
@@@ -2462,7 -2482,7 +2464,7 @@@ static int ip6_pkt_prohibit(struct sk_b
        return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
  }
  
 -static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb)
 +static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
  {
        skb->dev = skb_dst(skb)->dev;
        return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
@@@ -2476,7 -2496,6 +2478,7 @@@ struct rt6_info *addrconf_dst_alloc(str
                                    const struct in6_addr *addr,
                                    bool anycast)
  {
 +      u32 tb_id;
        struct net *net = dev_net(idev->dev);
        struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
                                            DST_NOCOUNT);
        rt->rt6i_gateway  = *addr;
        rt->rt6i_dst.addr = *addr;
        rt->rt6i_dst.plen = 128;
 -      rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
 +      tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
 +      rt->rt6i_table = fib6_get_table(net, tb_id);
        rt->dst.flags |= DST_NOCACHE;
  
        atomic_set(&rt->dst.__refcnt, 1);
@@@ -2879,12 -2897,9 +2881,12 @@@ static int ip6_route_multipath_add(stru
                                r_cfg.fc_encap_type = nla_get_u16(nla);
                }
  
 -              err = ip6_route_info_create(&r_cfg, &rt);
 -              if (err)
 +              rt = ip6_route_info_create(&r_cfg);
 +              if (IS_ERR(rt)) {
 +                      err = PTR_ERR(rt);
 +                      rt = NULL;
                        goto cleanup;
 +              }
  
                err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
                if (err) {
@@@ -3263,11 -3278,6 +3265,11 @@@ static int inet6_rtm_getroute(struct sk
        } else {
                fl6.flowi6_oif = oif;
  
 +              if (netif_index_is_l3_master(net, oif)) {
 +                      fl6.flowi6_flags = FLOWI_FLAG_L3MDEV_SRC |
 +                                         FLOWI_FLAG_SKIP_NH_OIF;
 +              }
 +
                rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
        }
  
diff --combined net/ipv6/xfrm6_output.c
index 9db067a11b525c4bb026fa1d352c05a591e60124,e15feb7b413dd1a93a376f2ab6aabcbc3c3bb944..4d09ce6fa90e666bfdeda09cbdc8c7b0cb8b5824
@@@ -79,6 -79,7 +79,7 @@@ static int xfrm6_tunnel_check_size(stru
  
        if (!skb->ignore_df && skb->len > mtu) {
                skb->dev = dst->dev;
+               skb->protocol = htons(ETH_P_IPV6);
  
                if (xfrm6_local_dontfrag(skb))
                        xfrm6_local_rxpmtu(skb, mtu);
@@@ -131,52 -132,49 +132,57 @@@ int xfrm6_output_finish(struct sock *sk
        return xfrm_output(sk, skb);
  }
  
 -static int __xfrm6_output(struct sock *sk, struct sk_buff *skb)
 +static int __xfrm6_output_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 +{
 +      struct xfrm_state *x = skb_dst(skb)->xfrm;
 +
 +      return x->outer_mode->afinfo->output_finish(sk, skb);
 +}
 +
 +static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
  {
        struct dst_entry *dst = skb_dst(skb);
        struct xfrm_state *x = dst->xfrm;
        int mtu;
+       bool toobig;
  
  #ifdef CONFIG_NETFILTER
        if (!x) {
                IP6CB(skb)->flags |= IP6SKB_REROUTED;
 -              return dst_output_sk(sk, skb);
 +              return dst_output(net, sk, skb);
        }
  #endif
  
+       if (x->props.mode != XFRM_MODE_TUNNEL)
+               goto skip_frag;
        if (skb->protocol == htons(ETH_P_IPV6))
                mtu = ip6_skb_dst_mtu(skb);
        else
                mtu = dst_mtu(skb_dst(skb));
  
-       if (skb->len > mtu && xfrm6_local_dontfrag(skb)) {
+       toobig = skb->len > mtu && !skb_is_gso(skb);
+       if (toobig && xfrm6_local_dontfrag(skb)) {
                xfrm6_local_rxpmtu(skb, mtu);
                return -EMSGSIZE;
-       } else if (!skb->ignore_df && skb->len > mtu && skb->sk) {
+       } else if (!skb->ignore_df && toobig && skb->sk) {
                xfrm_local_error(skb, mtu);
                return -EMSGSIZE;
        }
  
-       if (x->props.mode == XFRM_MODE_TUNNEL &&
-           ((skb->len > mtu && !skb_is_gso(skb)) ||
-               dst_allfrag(skb_dst(skb)))) {
+       if (toobig || dst_allfrag(skb_dst(skb)))
 -              return ip6_fragment(sk, skb,
 -                                  x->outer_mode->afinfo->output_finish);
 +              return ip6_fragment(net, sk, skb,
 +                                  __xfrm6_output_finish);
-       }
+ skip_frag:
        return x->outer_mode->afinfo->output_finish(sk, skb);
  }
  
 -int xfrm6_output(struct sock *sk, struct sk_buff *skb)
 +int xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
  {
 -      return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, sk, skb,
 -                          NULL, skb_dst(skb)->dev, __xfrm6_output,
 +      return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
 +                          net, sk, skb,  NULL, skb_dst(skb)->dev,
 +                          __xfrm6_output,
                            !(IP6CB(skb)->flags & IP6SKB_REROUTED));
  }
diff --combined net/ipv6/xfrm6_policy.c
index 08c9c93f352737ba21eebad04f6e8b3f268dbdde,da55e0c85bb8edca213eae4686c46073e2af1650..2cc5840f943d566ad8d2012d95a8ccac03dd4927
@@@ -20,7 -20,7 +20,7 @@@
  #include <net/ip.h>
  #include <net/ipv6.h>
  #include <net/ip6_route.h>
 -#include <net/vrf.h>
 +#include <net/l3mdev.h>
  #if IS_ENABLED(CONFIG_IPV6_MIP6)
  #include <net/mip6.h>
  #endif
@@@ -133,8 -133,10 +133,8 @@@ _decode_session6(struct sk_buff *skb, s
  
        nexthdr = nh[nhoff];
  
 -      if (skb_dst(skb)) {
 -              oif = vrf_master_ifindex(skb_dst(skb)->dev) ?
 -                      : skb_dst(skb)->dev->ifindex;
 -      }
 +      if (skb_dst(skb))
 +              oif = l3mdev_fib_oif(skb_dst(skb)->dev);
  
        memset(fl6, 0, sizeof(struct flowi6));
        fl6->flowi6_mark = skb->mark;
                        return;
  
                case IPPROTO_ICMPV6:
-                       if (!onlyproto && pskb_may_pull(skb, nh + offset + 2 - skb->data)) {
+                       if (!onlyproto && (nh + offset + 2 < skb->data ||
+                           pskb_may_pull(skb, nh + offset + 2 - skb->data))) {
                                u8 *icmp;
  
                                nh = skb_network_header(skb);
  #if IS_ENABLED(CONFIG_IPV6_MIP6)
                case IPPROTO_MH:
                        offset += ipv6_optlen(exthdr);
-                       if (!onlyproto && pskb_may_pull(skb, nh + offset + 3 - skb->data)) {
+                       if (!onlyproto && (nh + offset + 3 < skb->data ||
+                           pskb_may_pull(skb, nh + offset + 3 - skb->data))) {
                                struct ip6_mh *mh;
  
                                nh = skb_network_header(skb);
diff --combined net/netfilter/core.c
index 09e661c3ae58fb2d0ecf4cb29cd9d31fb467df14,21a085686dc1b543439f33e448531bc64a273684..f39276d1c2d76788dfc018f2ebdc07d2855f977d
@@@ -152,6 -152,8 +152,8 @@@ void nf_unregister_net_hook(struct net 
  #endif
        synchronize_net();
        nf_queue_nf_hook_drop(net, &entry->ops);
+       /* other cpu might still process nfqueue verdict that used reg */
+       synchronize_net();
        kfree(entry);
  }
  EXPORT_SYMBOL(nf_unregister_net_hook);
@@@ -269,7 -271,7 +271,7 @@@ unsigned int nf_iterate(struct list_hea
                /* Optimization: we don't need to hold module
                   reference here, since function can't sleep. --RR */
  repeat:
 -              verdict = (*elemp)->hook(*elemp, skb, state);
 +              verdict = (*elemp)->hook((*elemp)->priv, skb, state);
                if (verdict != NF_ACCEPT) {
  #ifdef CONFIG_NETFILTER_DEBUG
                        if (unlikely((verdict & NF_VERDICT_MASK)
@@@ -313,6 -315,8 +315,6 @@@ next_hook
                int err = nf_queue(skb, elem, state,
                                   verdict >> NF_VERDICT_QBITS);
                if (err < 0) {
 -                      if (err == -ECANCELED)
 -                              goto next_hook;
                        if (err == -ESRCH &&
                           (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS))
                                goto next_hook;
@@@ -346,12 -350,6 +348,12 @@@ int skb_make_writable(struct sk_buff *s
  }
  EXPORT_SYMBOL(skb_make_writable);
  
 +/* This needs to be compiled in any case to avoid dependencies between the
 + * nfnetlink_queue code and nf_conntrack.
 + */
 +struct nfnl_ct_hook __rcu *nfnl_ct_hook __read_mostly;
 +EXPORT_SYMBOL_GPL(nfnl_ct_hook);
 +
  #if IS_ENABLED(CONFIG_NF_CONNTRACK)
  /* This does not belong here, but locally generated errors need it if connection
     tracking in use: without this, connection may not be in hash table, and hence
@@@ -389,6 -387,9 +391,6 @@@ void nf_conntrack_destroy(struct nf_con
  }
  EXPORT_SYMBOL(nf_conntrack_destroy);
  
 -struct nfq_ct_hook __rcu *nfq_ct_hook __read_mostly;
 -EXPORT_SYMBOL_GPL(nfq_ct_hook);
 -
  /* Built-in default zone used e.g. by modules. */
  const struct nf_conntrack_zone nf_ct_zone_dflt = {
        .id     = NF_CT_DEFAULT_ZONE_ID,
index c6087233d7fca456ee6e8db52aabb015d6f67f94,0bf0f406de523908df30d2dd0d6b76aa1c3fc846..221fa8b37a473ea3dd2a5f2987f9b4233ba6987d
@@@ -620,7 -620,7 +620,7 @@@ static int set_sctp(struct sk_buff *skb
        return 0;
  }
  
 -static int ovs_vport_output(struct sock *sock, struct sk_buff *skb)
 +static int ovs_vport_output(struct net *net, struct sock *sk, struct sk_buff *skb)
  {
        struct ovs_frag_data *data = this_cpu_ptr(&ovs_frag_data_storage);
        struct vport *vport = data->vport;
@@@ -679,8 -679,8 +679,8 @@@ static void prepare_frag(struct vport *
        skb_pull(skb, hlen);
  }
  
 -static void ovs_fragment(struct vport *vport, struct sk_buff *skb, u16 mru,
 -                       __be16 ethertype)
 +static void ovs_fragment(struct net *net, struct vport *vport,
 +                       struct sk_buff *skb, u16 mru, __be16 ethertype)
  {
        if (skb_network_offset(skb) > MAX_L2_LEN) {
                OVS_NLERR(1, "L2 header too long to fragment");
                skb_dst_set_noref(skb, &ovs_dst);
                IPCB(skb)->frag_max_size = mru;
  
 -              ip_do_fragment(skb->sk, skb, ovs_vport_output);
 +              ip_do_fragment(net, skb->sk, skb, ovs_vport_output);
                refdst_drop(orig_dst);
        } else if (ethertype == htons(ETH_P_IPV6)) {
                const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops();
                skb_dst_set_noref(skb, &ovs_rt.dst);
                IP6CB(skb)->frag_max_size = mru;
  
 -              v6ops->fragment(skb->sk, skb, ovs_vport_output);
 +              v6ops->fragment(net, skb->sk, skb, ovs_vport_output);
                refdst_drop(orig_dst);
        } else {
                WARN_ONCE(1, "Failed fragment ->%s: eth=%04x, MRU=%d, MTU=%d.",
@@@ -746,7 -746,6 +746,7 @@@ static void do_output(struct datapath *
                if (likely(!mru || (skb->len <= mru + ETH_HLEN))) {
                        ovs_vport_send(vport, skb);
                } else if (mru <= vport->dev->mtu) {
 +                      struct net *net = read_pnet(&dp->net);
                        __be16 ethertype = key->eth.type;
  
                        if (!is_flow_key_valid(key)) {
                                        ethertype = vlan_get_protocol(skb);
                        }
  
 -                      ovs_fragment(vport, skb, mru, ethertype);
 +                      ovs_fragment(net, vport, skb, mru, ethertype);
                } else {
                        kfree_skb(skb);
                }
@@@ -769,7 -768,6 +769,6 @@@ static int output_userspace(struct data
                            struct sw_flow_key *key, const struct nlattr *attr,
                            const struct nlattr *actions, int actions_len)
  {
-       struct ip_tunnel_info info;
        struct dp_upcall_info upcall;
        const struct nlattr *a;
        int rem;
                        if (vport) {
                                int err;
  
-                               upcall.egress_tun_info = &info;
-                               err = ovs_vport_get_egress_tun_info(vport, skb,
-                                                                   &upcall);
-                               if (err)
-                                       upcall.egress_tun_info = NULL;
+                               err = dev_fill_metadata_dst(vport->dev, skb);
+                               if (!err)
+                                       upcall.egress_tun_info = skb_tunnel_info(skb);
                        }
  
                        break;
index 9ed833e9bb7db5c149366a4cb351bcd4b45ee4f3,a5ec34f8502f013161bb08e9cb9d9960559a9e67..bd165ee2bb1633d96db7d4fd45180f336a5c20f5
@@@ -151,6 -151,8 +151,8 @@@ static void ovs_ct_update_key(const str
        ct = nf_ct_get(skb, &ctinfo);
        if (ct) {
                state = ovs_ct_get_state(ctinfo);
+               if (!nf_ct_is_confirmed(ct))
+                       state |= OVS_CS_F_NEW;
                if (ct->master)
                        state |= OVS_CS_F_RELATED;
                zone = nf_ct_zone(ct);
@@@ -222,9 -224,6 +224,6 @@@ static int ovs_ct_set_labels(struct sk_
        struct nf_conn *ct;
        int err;
  
-       if (!IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS))
-               return -ENOTSUPP;
        /* The connection could be invalid, in which case set_label is no-op.*/
        ct = nf_ct_get(skb, &ctinfo);
        if (!ct)
@@@ -304,7 -303,7 +303,7 @@@ static int handle_fragments(struct net 
                int err;
  
                memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
 -              err = ip_defrag(skb, user);
 +              err = ip_defrag(net, skb, user);
                if (err)
                        return err;
  
                struct sk_buff *reasm;
  
                memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm));
 -              reasm = nf_ct_frag6_gather(skb, user);
 +              reasm = nf_ct_frag6_gather(net, skb, user);
                if (!reasm)
                        return -EINPROGRESS;
  
@@@ -347,7 -346,7 +346,7 @@@ ovs_ct_expect_find(struct net *net, con
  {
        struct nf_conntrack_tuple tuple;
  
 -      if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), proto, &tuple))
 +      if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), proto, net, &tuple))
                return NULL;
        return __nf_ct_expect_find(net, zone, &tuple);
  }
@@@ -377,7 -376,7 +376,7 @@@ static bool skb_nfct_cached(const struc
        return true;
  }
  
- static int __ovs_ct_lookup(struct net *net, const struct sw_flow_key *key,
+ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
                           const struct ovs_conntrack_info *info,
                           struct sk_buff *skb)
  {
                }
        }
  
+       ovs_ct_update_key(skb, key, true);
        return 0;
  }
  
@@@ -430,8 -431,6 +431,6 @@@ static int ovs_ct_lookup(struct net *ne
                err = __ovs_ct_lookup(net, key, info, skb);
                if (err)
                        return err;
-               ovs_ct_update_key(skb, key, true);
        }
  
        return 0;
@@@ -460,8 -459,6 +459,6 @@@ static int ovs_ct_commit(struct net *ne
        if (nf_conntrack_confirm(skb) != NF_ACCEPT)
                return -EINVAL;
  
-       ovs_ct_update_key(skb, key, true);
        return 0;
  }
  
@@@ -587,6 -584,10 +584,10 @@@ static int parse_ct(const struct nlatt
                case OVS_CT_ATTR_MARK: {
                        struct md_mark *mark = nla_data(a);
  
+                       if (!mark->mask) {
+                               OVS_NLERR(log, "ct_mark mask cannot be 0");
+                               return -EINVAL;
+                       }
                        info->mark = *mark;
                        break;
                }
                case OVS_CT_ATTR_LABELS: {
                        struct md_labels *labels = nla_data(a);
  
+                       if (!labels_nonzero(&labels->mask)) {
+                               OVS_NLERR(log, "ct_labels mask cannot be 0");
+                               return -EINVAL;
+                       }
                        info->labels = *labels;
                        break;
                }
@@@ -705,11 -710,12 +710,12 @@@ int ovs_ct_action_to_attr(const struct 
        if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) &&
            nla_put_u16(skb, OVS_CT_ATTR_ZONE, ct_info->zone.id))
                return -EMSGSIZE;
-       if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) &&
+       if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) && ct_info->mark.mask &&
            nla_put(skb, OVS_CT_ATTR_MARK, sizeof(ct_info->mark),
                    &ct_info->mark))
                return -EMSGSIZE;
        if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
+           labels_nonzero(&ct_info->labels.mask) &&
            nla_put(skb, OVS_CT_ATTR_LABELS, sizeof(ct_info->labels),
                    &ct_info->labels))
                return -EMSGSIZE;
index a75828091e21fc477142d78d25accbd9a7cb5563,c5d08ee377304313e7e320133e46343adff5da95..5633172b791ab98e297ba1605c34bf42e684ccfb
@@@ -91,7 -91,8 +91,7 @@@ static bool ovs_must_notify(struct genl
  static void ovs_notify(struct genl_family *family,
                       struct sk_buff *skb, struct genl_info *info)
  {
 -      genl_notify(family, skb, genl_info_net(info), info->snd_portid,
 -                  0, info->nlhdr, GFP_KERNEL);
 +      genl_notify(family, skb, info, 0, GFP_KERNEL);
  }
  
  /**
@@@ -489,9 -490,8 +489,8 @@@ static int queue_userspace_packet(struc
  
        if (upcall_info->egress_tun_info) {
                nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_EGRESS_TUN_KEY);
-               err = ovs_nla_put_egress_tunnel_key(user_skb,
-                                                   upcall_info->egress_tun_info,
-                                                   upcall_info->egress_tun_opts);
+               err = ovs_nla_put_tunnel_info(user_skb,
+                                             upcall_info->egress_tun_info);
                BUG_ON(err);
                nla_nest_end(user_skb, nla);
        }
index 80e1f09397c0e6c64ab7f8bea5814f3664571cb3,38536c137c54d0d4ebcebcce613fefdb89799b5a..907d6fd28ede695cc1b876570c101883ed0b4b0e
@@@ -262,8 -262,8 +262,8 @@@ size_t ovs_tun_key_attr_size(void
         * updating this function.
         */
        return    nla_total_size(8)    /* OVS_TUNNEL_KEY_ATTR_ID */
 -              + nla_total_size(4)    /* OVS_TUNNEL_KEY_ATTR_IPV4_SRC */
 -              + nla_total_size(4)    /* OVS_TUNNEL_KEY_ATTR_IPV4_DST */
 +              + nla_total_size(16)   /* OVS_TUNNEL_KEY_ATTR_IPV[46]_SRC */
 +              + nla_total_size(16)   /* OVS_TUNNEL_KEY_ATTR_IPV[46]_DST */
                + nla_total_size(1)    /* OVS_TUNNEL_KEY_ATTR_TOS */
                + nla_total_size(1)    /* OVS_TUNNEL_KEY_ATTR_TTL */
                + nla_total_size(0)    /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */
@@@ -323,8 -323,6 +323,8 @@@ static const struct ovs_len_tbl ovs_tun
        [OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS]   = { .len = OVS_ATTR_VARIABLE },
        [OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS]    = { .len = OVS_ATTR_NESTED,
                                                .next = ovs_vxlan_ext_key_lens },
 +      [OVS_TUNNEL_KEY_ATTR_IPV6_SRC]      = { .len = sizeof(struct in6_addr) },
 +      [OVS_TUNNEL_KEY_ATTR_IPV6_DST]      = { .len = sizeof(struct in6_addr) },
  };
  
  /* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute.  */
@@@ -544,15 -542,15 +544,15 @@@ static int vxlan_tun_opt_from_nlattr(co
        return 0;
  }
  
 -static int ipv4_tun_from_nlattr(const struct nlattr *attr,
 -                              struct sw_flow_match *match, bool is_mask,
 -                              bool log)
 +static int ip_tun_from_nlattr(const struct nlattr *attr,
 +                            struct sw_flow_match *match, bool is_mask,
 +                            bool log)
  {
 -      struct nlattr *a;
 -      int rem;
 -      bool ttl = false;
 +      bool ttl = false, ipv4 = false, ipv6 = false;
        __be16 tun_flags = 0;
        int opts_type = 0;
 +      struct nlattr *a;
 +      int rem;
  
        nla_for_each_nested(a, attr, rem) {
                int type = nla_type(a);
                case OVS_TUNNEL_KEY_ATTR_IPV4_SRC:
                        SW_FLOW_KEY_PUT(match, tun_key.u.ipv4.src,
                                        nla_get_in_addr(a), is_mask);
 +                      ipv4 = true;
                        break;
                case OVS_TUNNEL_KEY_ATTR_IPV4_DST:
                        SW_FLOW_KEY_PUT(match, tun_key.u.ipv4.dst,
                                        nla_get_in_addr(a), is_mask);
 +                      ipv4 = true;
 +                      break;
 +              case OVS_TUNNEL_KEY_ATTR_IPV6_SRC:
 +                      SW_FLOW_KEY_PUT(match, tun_key.u.ipv6.dst,
 +                                      nla_get_in6_addr(a), is_mask);
 +                      ipv6 = true;
 +                      break;
 +              case OVS_TUNNEL_KEY_ATTR_IPV6_DST:
 +                      SW_FLOW_KEY_PUT(match, tun_key.u.ipv6.dst,
 +                                      nla_get_in6_addr(a), is_mask);
 +                      ipv6 = true;
                        break;
                case OVS_TUNNEL_KEY_ATTR_TOS:
                        SW_FLOW_KEY_PUT(match, tun_key.tos,
                        opts_type = type;
                        break;
                default:
 -                      OVS_NLERR(log, "Unknown IPv4 tunnel attribute %d",
 +                      OVS_NLERR(log, "Unknown IP tunnel attribute %d",
                                  type);
                        return -EINVAL;
                }
        }
  
        SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask);
 +      if (is_mask)
 +              SW_FLOW_KEY_MEMSET_FIELD(match, tun_proto, 0xff, true);
 +      else
 +              SW_FLOW_KEY_PUT(match, tun_proto, ipv6 ? AF_INET6 : AF_INET,
 +                              false);
  
        if (rem > 0) {
 -              OVS_NLERR(log, "IPv4 tunnel attribute has %d unknown bytes.",
 +              OVS_NLERR(log, "IP tunnel attribute has %d unknown bytes.",
                          rem);
                return -EINVAL;
        }
  
 +      if (ipv4 && ipv6) {
 +              OVS_NLERR(log, "Mixed IPv4 and IPv6 tunnel attributes");
 +              return -EINVAL;
 +      }
 +
        if (!is_mask) {
 -              if (!match->key->tun_key.u.ipv4.dst) {
 +              if (!ipv4 && !ipv6) {
 +                      OVS_NLERR(log, "IP tunnel dst address not specified");
 +                      return -EINVAL;
 +              }
 +              if (ipv4 && !match->key->tun_key.u.ipv4.dst) {
                        OVS_NLERR(log, "IPv4 tunnel dst address is zero");
                        return -EINVAL;
                }
 +              if (ipv6 && ipv6_addr_any(&match->key->tun_key.u.ipv6.dst)) {
 +                      OVS_NLERR(log, "IPv6 tunnel dst address is zero");
 +                      return -EINVAL;
 +              }
  
                if (!ttl) {
 -                      OVS_NLERR(log, "IPv4 tunnel TTL not specified.");
 +                      OVS_NLERR(log, "IP tunnel TTL not specified.");
                        return -EINVAL;
                }
        }
@@@ -714,36 -682,21 +714,36 @@@ static int vxlan_opt_to_nlattr(struct s
        return 0;
  }
  
 -static int __ipv4_tun_to_nlattr(struct sk_buff *skb,
 -                              const struct ip_tunnel_key *output,
 -                              const void *tun_opts, int swkey_tun_opts_len)
 +static int __ip_tun_to_nlattr(struct sk_buff *skb,
 +                            const struct ip_tunnel_key *output,
 +                            const void *tun_opts, int swkey_tun_opts_len,
 +                            unsigned short tun_proto)
  {
        if (output->tun_flags & TUNNEL_KEY &&
            nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id))
                return -EMSGSIZE;
 -      if (output->u.ipv4.src &&
 -          nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC,
 -                          output->u.ipv4.src))
 -              return -EMSGSIZE;
 -      if (output->u.ipv4.dst &&
 -          nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST,
 -                          output->u.ipv4.dst))
 -              return -EMSGSIZE;
 +      switch (tun_proto) {
 +      case AF_INET:
 +              if (output->u.ipv4.src &&
 +                  nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC,
 +                                  output->u.ipv4.src))
 +                      return -EMSGSIZE;
 +              if (output->u.ipv4.dst &&
 +                  nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST,
 +                                  output->u.ipv4.dst))
 +                      return -EMSGSIZE;
 +              break;
 +      case AF_INET6:
 +              if (!ipv6_addr_any(&output->u.ipv6.src) &&
 +                  nla_put_in6_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV6_SRC,
 +                                   &output->u.ipv6.src))
 +                      return -EMSGSIZE;
 +              if (!ipv6_addr_any(&output->u.ipv6.dst) &&
 +                  nla_put_in6_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV6_DST,
 +                                   &output->u.ipv6.dst))
 +                      return -EMSGSIZE;
 +              break;
 +      }
        if (output->tos &&
            nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->tos))
                return -EMSGSIZE;
        if ((output->tun_flags & TUNNEL_OAM) &&
            nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM))
                return -EMSGSIZE;
-       if (tun_opts) {
+       if (swkey_tun_opts_len) {
                if (output->tun_flags & TUNNEL_GENEVE_OPT &&
                    nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS,
                            swkey_tun_opts_len, tun_opts))
        return 0;
  }
  
 -static int ipv4_tun_to_nlattr(struct sk_buff *skb,
 -                            const struct ip_tunnel_key *output,
 -                            const void *tun_opts, int swkey_tun_opts_len)
 +static int ip_tun_to_nlattr(struct sk_buff *skb,
 +                          const struct ip_tunnel_key *output,
 +                          const void *tun_opts, int swkey_tun_opts_len,
 +                          unsigned short tun_proto)
  {
        struct nlattr *nla;
        int err;
        if (!nla)
                return -EMSGSIZE;
  
 -      err = __ipv4_tun_to_nlattr(skb, output, tun_opts, swkey_tun_opts_len);
 +      err = __ip_tun_to_nlattr(skb, output, tun_opts, swkey_tun_opts_len,
 +                               tun_proto);
        if (err)
                return err;
  
        return 0;
  }
  
- int ovs_nla_put_egress_tunnel_key(struct sk_buff *skb,
-                                 const struct ip_tunnel_info *egress_tun_info,
-                                 const void *egress_tun_opts)
+ int ovs_nla_put_tunnel_info(struct sk_buff *skb,
+                           struct ip_tunnel_info *tun_info)
  {
-       return __ip_tun_to_nlattr(skb, &egress_tun_info->key,
-                                 egress_tun_opts,
-                                 egress_tun_info->options_len,
-                                 ip_tunnel_info_af(egress_tun_info));
 -      return __ipv4_tun_to_nlattr(skb, &tun_info->key,
 -                                  ip_tunnel_info_opts(tun_info),
 -                                  tun_info->options_len);
++      return __ip_tun_to_nlattr(skb, &tun_info->key,
++                                ip_tunnel_info_opts(tun_info),
++                                tun_info->options_len,
++                                ip_tunnel_info_af(tun_info));
  }
  
  static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match,
                *attrs &= ~(1 << OVS_KEY_ATTR_SKB_MARK);
        }
        if (*attrs & (1 << OVS_KEY_ATTR_TUNNEL)) {
 -              if (ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match,
 -                                       is_mask, log) < 0)
 +              if (ip_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match,
 +                                     is_mask, log) < 0)
                        return -EINVAL;
                *attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL);
        }
            ovs_ct_verify(net, OVS_KEY_ATTR_CT_STATE)) {
                u32 ct_state = nla_get_u32(a[OVS_KEY_ATTR_CT_STATE]);
  
-               if (!is_mask && !ovs_ct_state_supported(ct_state)) {
+               if (ct_state & ~CT_SUPPORTED_MASK) {
                        OVS_NLERR(log, "ct_state flags %08x unsupported",
                                  ct_state);
                        return -EINVAL;
@@@ -1149,6 -1098,9 +1148,9 @@@ static void nlattr_set(struct nlattr *a
                } else {
                        memset(nla_data(nla), val, nla_len(nla));
                }
+               if (nla_type(nla) == OVS_KEY_ATTR_CT_STATE)
+                       *(u32 *)nla_data(nla) &= CT_SUPPORTED_MASK;
        }
  }
  
@@@ -1250,7 -1202,7 +1252,7 @@@ int ovs_nla_get_match(struct net *net, 
                        /* The userspace does not send tunnel attributes that
                         * are 0, but we should not wildcard them nonetheless.
                         */
 -                      if (match->key->tun_key.u.ipv4.dst)
 +                      if (match->key->tun_proto)
                                SW_FLOW_KEY_MEMSET_FIELD(match, tun_key,
                                                         0xff, true);
  
@@@ -1423,14 -1375,14 +1425,14 @@@ static int __ovs_nla_put_key(const stru
        if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority))
                goto nla_put_failure;
  
 -      if ((swkey->tun_key.u.ipv4.dst || is_mask)) {
 +      if ((swkey->tun_proto || is_mask)) {
                const void *opts = NULL;
  
                if (output->tun_key.tun_flags & TUNNEL_OPTIONS_PRESENT)
                        opts = TUN_METADATA_OPTS(output, swkey->tun_opts_len);
  
 -              if (ipv4_tun_to_nlattr(skb, &output->tun_key, opts,
 -                                     swkey->tun_opts_len))
 +              if (ip_tun_to_nlattr(skb, &output->tun_key, opts,
 +                                   swkey->tun_opts_len, swkey->tun_proto))
                        goto nla_put_failure;
        }
  
@@@ -1933,7 -1885,7 +1935,7 @@@ static int validate_and_copy_set_tun(co
        int err = 0, start, opts_type;
  
        ovs_match_init(&match, &key, NULL);
 -      opts_type = ipv4_tun_from_nlattr(nla_data(attr), &match, false, log);
 +      opts_type = ip_tun_from_nlattr(nla_data(attr), &match, false, log);
        if (opts_type < 0)
                return opts_type;
  
  
        tun_info = &tun_dst->u.tun_info;
        tun_info->mode = IP_TUNNEL_INFO_TX;
 +      if (key.tun_proto == AF_INET6)
 +              tun_info->mode |= IP_TUNNEL_INFO_IPV6;
        tun_info->key = key.tun_key;
  
        /* We need to store the options in the action itself since
@@@ -2432,11 -2382,7 +2434,7 @@@ static int set_action_to_attr(const str
                if (!start)
                        return -EMSGSIZE;
  
-               err = ip_tun_to_nlattr(skb, &tun_info->key,
-                                      tun_info->options_len ?
-                                            ip_tunnel_info_opts(tun_info) : NULL,
-                                      tun_info->options_len,
-                                      ip_tunnel_info_af(tun_info));
+               err = ovs_nla_put_tunnel_info(skb, tun_info);
                if (err)
                        return err;
                nla_nest_end(skb, start);
index 7a568ca8da54377cbc08e7dd69a27bc64868130b,5f8aaaaa0785385b89096925718d96b3335c1d32..efb736bb685545a0cb6a323d3eca87fc54eeb9f4
@@@ -52,18 -52,6 +52,6 @@@ static int geneve_get_options(const str
        return 0;
  }
  
- static int geneve_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
-                                     struct dp_upcall_info *upcall)
- {
-       struct geneve_port *geneve_port = geneve_vport(vport);
-       struct net *net = ovs_dp_get_net(vport->dp);
-       __be16 dport = htons(geneve_port->port_no);
-       __be16 sport = udp_flow_src_port(net, skb, 1, USHRT_MAX, true);
-       return ovs_tunnel_get_egress_info(upcall, ovs_dp_get_net(vport->dp),
-                                         skb, IPPROTO_UDP, sport, dport);
- }
  static struct vport *geneve_tnl_create(const struct vport_parms *parms)
  {
        struct net *net = ovs_dp_get_net(parms->dp);
@@@ -128,9 -116,8 +116,8 @@@ static struct vport_ops ovs_geneve_vpor
        .create         = geneve_create,
        .destroy        = ovs_netdev_tunnel_destroy,
        .get_options    = geneve_get_options,
 -      .send           = ovs_netdev_send,
 +      .send           = dev_queue_xmit,
        .owner          = THIS_MODULE,
-       .get_egress_tun_info    = geneve_get_egress_tun_info,
  };
  
  static int __init ovs_geneve_tnl_init(void)
index cdb758ab01cfd64254acdd3b4b9a314868faca82,64225bf5eb405f4082547bbc8f09d920de72cdb8..c3257d78d3d28e6ed06e1be9c2e4a4f452c52886
@@@ -84,18 -84,10 +84,10 @@@ static struct vport *gre_create(const s
        return ovs_netdev_link(vport, parms->name);
  }
  
- static int gre_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
-                                  struct dp_upcall_info *upcall)
- {
-       return ovs_tunnel_get_egress_info(upcall, ovs_dp_get_net(vport->dp),
-                                         skb, IPPROTO_GRE, 0, 0);
- }
  static struct vport_ops ovs_gre_vport_ops = {
        .type           = OVS_VPORT_TYPE_GRE,
        .create         = gre_create,
 -      .send           = ovs_netdev_send,
 +      .send           = dev_queue_xmit,
-       .get_egress_tun_info    = gre_get_egress_tun_info,
        .destroy        = ovs_netdev_tunnel_destroy,
        .owner          = THIS_MODULE,
  };
index 7f0a8bd0885778d94260b239aa40d4a6776cf1c0,b3934126daa894d7bdaf7511ece6ff5319cf2c8a..ec76398a792fbb7451c53b958304a2e001704604
@@@ -106,12 -106,45 +106,45 @@@ static void internal_dev_destructor(str
        free_netdev(dev);
  }
  
+ static struct rtnl_link_stats64 *
+ internal_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
+ {
+       int i;
+       memset(stats, 0, sizeof(*stats));
+       stats->rx_errors  = dev->stats.rx_errors;
+       stats->tx_errors  = dev->stats.tx_errors;
+       stats->tx_dropped = dev->stats.tx_dropped;
+       stats->rx_dropped = dev->stats.rx_dropped;
+       for_each_possible_cpu(i) {
+               const struct pcpu_sw_netstats *percpu_stats;
+               struct pcpu_sw_netstats local_stats;
+               unsigned int start;
+               percpu_stats = per_cpu_ptr(dev->tstats, i);
+               do {
+                       start = u64_stats_fetch_begin_irq(&percpu_stats->syncp);
+                       local_stats = *percpu_stats;
+               } while (u64_stats_fetch_retry_irq(&percpu_stats->syncp, start));
+               stats->rx_bytes         += local_stats.rx_bytes;
+               stats->rx_packets       += local_stats.rx_packets;
+               stats->tx_bytes         += local_stats.tx_bytes;
+               stats->tx_packets       += local_stats.tx_packets;
+       }
+       return stats;
+ }
  static const struct net_device_ops internal_dev_netdev_ops = {
        .ndo_open = internal_dev_open,
        .ndo_stop = internal_dev_stop,
        .ndo_start_xmit = internal_dev_xmit,
        .ndo_set_mac_address = eth_mac_addr,
        .ndo_change_mtu = internal_dev_change_mtu,
+       .ndo_get_stats64 = internal_get_stats,
  };
  
  static struct rtnl_link_ops internal_dev_link_ops __read_mostly = {
@@@ -161,6 -194,11 +194,11 @@@ static struct vport *internal_dev_creat
                err = -ENOMEM;
                goto error_free_vport;
        }
+       vport->dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
+       if (!vport->dev->tstats) {
+               err = -ENOMEM;
+               goto error_free_netdev;
+       }
  
        dev_net_set(vport->dev, ovs_dp_get_net(vport->dp));
        internal_dev = internal_dev_priv(vport->dev);
        rtnl_lock();
        err = register_netdevice(vport->dev);
        if (err)
-               goto error_free_netdev;
+               goto error_unlock;
  
        dev_set_promiscuity(vport->dev, 1);
        rtnl_unlock();
  
        return vport;
  
- error_free_netdev:
+ error_unlock:
        rtnl_unlock();
+       free_percpu(vport->dev->tstats);
+ error_free_netdev:
        free_netdev(vport->dev);
  error_free_vport:
        ovs_vport_free(vport);
@@@ -198,25 -238,26 +238,25 @@@ static void internal_dev_destroy(struc
  
        /* unregister_netdevice() waits for an RCU grace period. */
        unregister_netdevice(vport->dev);
+       free_percpu(vport->dev->tstats);
        rtnl_unlock();
  }
  
 -static void internal_dev_recv(struct vport *vport, struct sk_buff *skb)
 +static netdev_tx_t internal_dev_recv(struct sk_buff *skb)
  {
 -      struct net_device *netdev = vport->dev;
 +      struct net_device *netdev = skb->dev;
        struct pcpu_sw_netstats *stats;
  
        if (unlikely(!(netdev->flags & IFF_UP))) {
                kfree_skb(skb);
                netdev->stats.rx_dropped++;
 -              return;
 +              return NETDEV_TX_OK;
        }
  
        skb_dst_drop(skb);
        nf_reset(skb);
        secpath_reset(skb);
  
 -      skb->dev = netdev;
        skb->pkt_type = PACKET_HOST;
        skb->protocol = eth_type_trans(skb, netdev);
        skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
        u64_stats_update_end(&stats->syncp);
  
        netif_rx(skb);
 +      return NETDEV_TX_OK;
  }
  
  static struct vport_ops ovs_internal_vport_ops = {
index 6f700710d4137d074742ac93c5a88f3c77eb9d69,e1c9c08880373276e8430cadd93f03fe4a1e11a0..1605691d94144aee0fc50ffb17be05eca2b59675
@@@ -146,32 -146,12 +146,12 @@@ static struct vport *vxlan_create(cons
        return ovs_netdev_link(vport, parms->name);
  }
  
- static int vxlan_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
-                                    struct dp_upcall_info *upcall)
- {
-       struct vxlan_dev *vxlan = netdev_priv(vport->dev);
-       struct net *net = ovs_dp_get_net(vport->dp);
-       unsigned short family = ip_tunnel_info_af(upcall->egress_tun_info);
-       __be16 dst_port = vxlan_dev_dst_port(vxlan, family);
-       __be16 src_port;
-       int port_min;
-       int port_max;
-       inet_get_local_port_range(net, &port_min, &port_max);
-       src_port = udp_flow_src_port(net, skb, 0, 0, true);
-       return ovs_tunnel_get_egress_info(upcall, net,
-                                         skb, IPPROTO_UDP,
-                                         src_port, dst_port);
- }
  static struct vport_ops ovs_vxlan_netdev_vport_ops = {
        .type                   = OVS_VPORT_TYPE_VXLAN,
        .create                 = vxlan_create,
        .destroy                = ovs_netdev_tunnel_destroy,
        .get_options            = vxlan_get_options,
 -      .send                   = ovs_netdev_send,
 +      .send                   = dev_queue_xmit,
-       .get_egress_tun_info    = vxlan_get_egress_tun_info,
  };
  
  static int __init ovs_vxlan_tnl_init(void)
diff --combined net/openvswitch/vport.c
index ef19d0b77d13fd4f3f1e4bef987dd2adbcca3760,320c765ce44a07e71daedfd457d37c81ea2e4c49..0ac0fd004d7ed885c009560d966da5b29b47f242
@@@ -479,91 -479,3 +479,33 @@@ void ovs_vport_deferred_free(struct vpo
        call_rcu(&vport->rcu, free_vport_rcu);
  }
  EXPORT_SYMBOL_GPL(ovs_vport_deferred_free);
- int ovs_tunnel_get_egress_info(struct dp_upcall_info *upcall,
-                              struct net *net,
-                              struct sk_buff *skb,
-                              u8 ipproto,
-                              __be16 tp_src,
-                              __be16 tp_dst)
- {
-       struct ip_tunnel_info *egress_tun_info = upcall->egress_tun_info;
-       const struct ip_tunnel_info *tun_info = skb_tunnel_info(skb);
-       const struct ip_tunnel_key *tun_key;
-       u32 skb_mark = skb->mark;
-       struct rtable *rt;
-       struct flowi4 fl;
-       if (unlikely(!tun_info))
-               return -EINVAL;
-       if (ip_tunnel_info_af(tun_info) != AF_INET)
-               return -EINVAL;
-       tun_key = &tun_info->key;
-       /* Route lookup to get srouce IP address.
-        * The process may need to be changed if the corresponding process
-        * in vports ops changed.
-        */
-       rt = ovs_tunnel_route_lookup(net, tun_key, skb_mark, &fl, ipproto);
-       if (IS_ERR(rt))
-               return PTR_ERR(rt);
-       ip_rt_put(rt);
-       /* Generate egress_tun_info based on tun_info,
-        * saddr, tp_src and tp_dst
-        */
-       ip_tunnel_key_init(&egress_tun_info->key,
-                          fl.saddr, tun_key->u.ipv4.dst,
-                          tun_key->tos,
-                          tun_key->ttl,
-                          tp_src, tp_dst,
-                          tun_key->tun_id,
-                          tun_key->tun_flags);
-       egress_tun_info->options_len = tun_info->options_len;
-       egress_tun_info->mode = tun_info->mode;
-       upcall->egress_tun_opts = ip_tunnel_info_opts(egress_tun_info);
-       return 0;
- }
- EXPORT_SYMBOL_GPL(ovs_tunnel_get_egress_info);
- int ovs_vport_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
-                                 struct dp_upcall_info *upcall)
- {
-       /* get_egress_tun_info() is only implemented on tunnel ports. */
-       if (unlikely(!vport->ops->get_egress_tun_info))
-               return -EINVAL;
-       return vport->ops->get_egress_tun_info(vport, skb, upcall);
- }
 +
 +static unsigned int packet_length(const struct sk_buff *skb)
 +{
 +      unsigned int length = skb->len - ETH_HLEN;
 +
 +      if (skb->protocol == htons(ETH_P_8021Q))
 +              length -= VLAN_HLEN;
 +
 +      return length;
 +}
 +
 +void ovs_vport_send(struct vport *vport, struct sk_buff *skb)
 +{
 +      int mtu = vport->dev->mtu;
 +
 +      if (unlikely(packet_length(skb) > mtu && !skb_is_gso(skb))) {
 +              net_warn_ratelimited("%s: dropped over-mtu packet: %d > %d\n",
 +                                   vport->dev->name,
 +                                   packet_length(skb), mtu);
 +              vport->dev->stats.tx_errors++;
 +              goto drop;
 +      }
 +
 +      skb->dev = vport->dev;
 +      vport->ops->send(skb);
 +      return;
 +
 +drop:
 +      kfree_skb(skb);
 +}
diff --combined net/openvswitch/vport.h
index 885607f28d56a149d5e872c5254c9b3fbddb5dec,d341ad6f3afe5734f587c1df347fd72dc2ba2c38..bdfd82a7c064948dc1dc83acbc85b6534c1bcf9b
@@@ -27,7 -27,6 +27,6 @@@
  #include <linux/skbuff.h>
  #include <linux/spinlock.h>
  #include <linux/u64_stats_sync.h>
- #include <net/route.h>
  
  #include "datapath.h"
  
@@@ -53,16 -52,6 +52,6 @@@ int ovs_vport_set_upcall_portids(struc
  int ovs_vport_get_upcall_portids(const struct vport *, struct sk_buff *);
  u32 ovs_vport_find_upcall_portid(const struct vport *, struct sk_buff *);
  
- int ovs_tunnel_get_egress_info(struct dp_upcall_info *upcall,
-                              struct net *net,
-                              struct sk_buff *,
-                              u8 ipproto,
-                              __be16 tp_src,
-                              __be16 tp_dst);
- int ovs_vport_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
-                                 struct dp_upcall_info *upcall);
  /**
   * struct vport_portids - array of netlink portids of a vport.
   *                        must be protected by rcu.
@@@ -140,8 -129,6 +129,6 @@@ struct vport_parms 
   * have any configuration.
   * @send: Send a packet on the device.
   * zero for dropped packets or negative for error.
-  * @get_egress_tun_info: Get the egress tunnel 5-tuple and other info for
-  * a packet.
   */
  struct vport_ops {
        enum ovs_vport_type type;
        int (*set_options)(struct vport *, struct nlattr *);
        int (*get_options)(const struct vport *, struct sk_buff *);
  
 -      void (*send)(struct vport *, struct sk_buff *);
 +      netdev_tx_t (*send) (struct sk_buff *skb);
-       int (*get_egress_tun_info)(struct vport *, struct sk_buff *,
-                                  struct dp_upcall_info *upcall);
        struct module *owner;
        struct list_head list;
  };
@@@ -215,25 -199,9 +199,25 @@@ static inline const char *ovs_vport_nam
  int ovs_vport_ops_register(struct vport_ops *ops);
  void ovs_vport_ops_unregister(struct vport_ops *ops);
  
 -static inline void ovs_vport_send(struct vport *vport, struct sk_buff *skb)
 +static inline struct rtable *ovs_tunnel_route_lookup(struct net *net,
 +                                                   const struct ip_tunnel_key *key,
 +                                                   u32 mark,
 +                                                   struct flowi4 *fl,
 +                                                   u8 protocol)
  {
 -      vport->ops->send(vport, skb);
 +      struct rtable *rt;
 +
 +      memset(fl, 0, sizeof(*fl));
 +      fl->daddr = key->u.ipv4.dst;
 +      fl->saddr = key->u.ipv4.src;
 +      fl->flowi4_tos = RT_TOS(key->tos);
 +      fl->flowi4_mark = mark;
 +      fl->flowi4_proto = protocol;
 +
 +      rt = ip_route_output_key(net, fl);
 +      return rt;
  }
  
 +void ovs_vport_send(struct vport *vport, struct sk_buff *skb);
 +
  #endif /* vport.h */
diff --combined net/tipc/msg.c
index 454f5ec275c8d7af20ce6b2d9b1086f5a5292087,5f73450159df3b7d99349e2b49610b7d9a1d6c47..26d38b3d8760f4c13ba22b222af39906d07d77c6
@@@ -121,7 -121,7 +121,7 @@@ int tipc_buf_append(struct sk_buff **he
  {
        struct sk_buff *head = *headbuf;
        struct sk_buff *frag = *buf;
-       struct sk_buff *tail;
+       struct sk_buff *tail = NULL;
        struct tipc_msg *msg;
        u32 fragid;
        int delta;
                if (unlikely(skb_unclone(frag, GFP_ATOMIC)))
                        goto err;
                head = *headbuf = frag;
-               skb_frag_list_init(head);
-               TIPC_SKB_CB(head)->tail = NULL;
                *buf = NULL;
+               TIPC_SKB_CB(head)->tail = NULL;
+               if (skb_is_nonlinear(head)) {
+                       skb_walk_frags(head, tail) {
+                               TIPC_SKB_CB(head)->tail = tail;
+                       }
+               } else {
+                       skb_frag_list_init(head);
+               }
                return 0;
        }
  
@@@ -590,34 -596,3 +596,34 @@@ error
        kfree_skb(head);
        return NULL;
  }
 +
 +/* tipc_skb_queue_sorted(); sort pkt into list according to sequence number
 + * @list: list to be appended to
 + * @seqno: sequence number of buffer to add
 + * @skb: buffer to add
 + */
 +void __tipc_skb_queue_sorted(struct sk_buff_head *list, u16 seqno,
 +                           struct sk_buff *skb)
 +{
 +      struct sk_buff *_skb, *tmp;
 +
 +      if (skb_queue_empty(list) || less(seqno, buf_seqno(skb_peek(list)))) {
 +              __skb_queue_head(list, skb);
 +              return;
 +      }
 +
 +      if (more(seqno, buf_seqno(skb_peek_tail(list)))) {
 +              __skb_queue_tail(list, skb);
 +              return;
 +      }
 +
 +      skb_queue_walk_safe(list, _skb, tmp) {
 +              if (more(seqno, buf_seqno(_skb)))
 +                      continue;
 +              if (seqno == buf_seqno(_skb))
 +                      break;
 +              __skb_queue_before(list, _skb, skb);
 +              return;
 +      }
 +      kfree_skb(skb);
 +}
diff --combined net/tipc/udp_media.c
index 9bc0b1e515fa3278955a60a31d8298f5ac600336,6e648d90297a9ecc69a23a68183fde658eed63cd..0021c01dec1708de8a492fdf94cd121977acde34
@@@ -52,6 -52,8 +52,8 @@@
  /* IANA assigned UDP port */
  #define UDP_PORT_DEFAULT      6118
  
+ #define UDP_MIN_HEADROOM        28
  static const struct nla_policy tipc_nl_udp_policy[TIPC_NLA_UDP_MAX + 1] = {
        [TIPC_NLA_UDP_UNSPEC]   = {.type = NLA_UNSPEC},
        [TIPC_NLA_UDP_LOCAL]    = {.type = NLA_BINARY,
@@@ -156,6 -158,9 +158,9 @@@ static int tipc_udp_send_msg(struct ne
        struct sk_buff *clone;
        struct rtable *rt;
  
+       if (skb_headroom(skb) < UDP_MIN_HEADROOM)
+               pskb_expand_head(skb, UDP_MIN_HEADROOM, 0, GFP_ATOMIC);
        clone = skb_clone(skb, GFP_ATOMIC);
        skb_set_inner_protocol(clone, htons(ETH_P_TIPC));
        ub = rcu_dereference_rtnl(b->media_ptr);
@@@ -425,6 -430,7 +430,6 @@@ static void tipc_udp_disable(struct tip
        }
        if (ub->ubsock)
                sock_set_flag(ub->ubsock->sk, SOCK_DEAD);
 -      RCU_INIT_POINTER(b->media_ptr, NULL);
        RCU_INIT_POINTER(ub->bearer, NULL);
  
        /* sock_release need to be done outside of rtnl lock */