- dual_emac : Specifies Switch to act as Dual EMAC
- syscon : Phandle to the system control device node, which is
the control module device of the am33x
+- mode-gpios : Should be added if one/multiple gpio lines are
+ required to be driven so that cpsw data lines
+ can be connected to the phy via selective mux.
+ For example in dra72x-evm, pcf gpio has to be
+ driven low so that cpsw slave 0 and phy data
+ lines are connected via mux.
+
Slave Properties:
Required properties:
Optional properties:
- dual_emac_res_vlan : Specifies VID to be used to segregate the ports
- mac-address : See ethernet.txt file in the same directory
+ - phy-handle : See ethernet.txt file in the same directory
Note: "ti,hwmods" field is used to fetch the base address and irq
resources from TI, omap hwmod data base during device registration.
packet->rdesc_count, 1);
/* Make sure ownership is written to the descriptor */
- dma_wmb();
+ wmb();
ring->cur = cur_index + 1;
if (!packet->skb->xmit_more ||
static unsigned int xgbe_calculate_per_queue_fifo(unsigned int fifo_size,
unsigned int queue_count)
{
- unsigned int q_fifo_size = 0;
- enum xgbe_mtl_fifo_size p_fifo = XGMAC_MTL_FIFO_SIZE_256;
+ unsigned int q_fifo_size;
+ unsigned int p_fifo;
- /* Calculate Tx/Rx fifo share per queue */
- switch (fifo_size) {
- case 0:
- q_fifo_size = XGBE_FIFO_SIZE_B(128);
- break;
- case 1:
- q_fifo_size = XGBE_FIFO_SIZE_B(256);
- break;
- case 2:
- q_fifo_size = XGBE_FIFO_SIZE_B(512);
- break;
- case 3:
- q_fifo_size = XGBE_FIFO_SIZE_KB(1);
- break;
- case 4:
- q_fifo_size = XGBE_FIFO_SIZE_KB(2);
- break;
- case 5:
- q_fifo_size = XGBE_FIFO_SIZE_KB(4);
- break;
- case 6:
- q_fifo_size = XGBE_FIFO_SIZE_KB(8);
- break;
- case 7:
- q_fifo_size = XGBE_FIFO_SIZE_KB(16);
- break;
- case 8:
- q_fifo_size = XGBE_FIFO_SIZE_KB(32);
- break;
- case 9:
- q_fifo_size = XGBE_FIFO_SIZE_KB(64);
- break;
- case 10:
- q_fifo_size = XGBE_FIFO_SIZE_KB(128);
- break;
- case 11:
- q_fifo_size = XGBE_FIFO_SIZE_KB(256);
- break;
- }
+ /* Calculate the configured fifo size */
+ q_fifo_size = 1 << (fifo_size + 7);
- /* The configured value is not the actual amount of fifo RAM */
+ /* The configured value may not be the actual amount of fifo RAM */
q_fifo_size = min_t(unsigned int, XGBE_FIFO_MAX, q_fifo_size);
q_fifo_size = q_fifo_size / queue_count;
- /* Set the queue fifo size programmable value */
- if (q_fifo_size >= XGBE_FIFO_SIZE_KB(256))
- p_fifo = XGMAC_MTL_FIFO_SIZE_256K;
- else if (q_fifo_size >= XGBE_FIFO_SIZE_KB(128))
- p_fifo = XGMAC_MTL_FIFO_SIZE_128K;
- else if (q_fifo_size >= XGBE_FIFO_SIZE_KB(64))
- p_fifo = XGMAC_MTL_FIFO_SIZE_64K;
- else if (q_fifo_size >= XGBE_FIFO_SIZE_KB(32))
- p_fifo = XGMAC_MTL_FIFO_SIZE_32K;
- else if (q_fifo_size >= XGBE_FIFO_SIZE_KB(16))
- p_fifo = XGMAC_MTL_FIFO_SIZE_16K;
- else if (q_fifo_size >= XGBE_FIFO_SIZE_KB(8))
- p_fifo = XGMAC_MTL_FIFO_SIZE_8K;
- else if (q_fifo_size >= XGBE_FIFO_SIZE_KB(4))
- p_fifo = XGMAC_MTL_FIFO_SIZE_4K;
- else if (q_fifo_size >= XGBE_FIFO_SIZE_KB(2))
- p_fifo = XGMAC_MTL_FIFO_SIZE_2K;
- else if (q_fifo_size >= XGBE_FIFO_SIZE_KB(1))
- p_fifo = XGMAC_MTL_FIFO_SIZE_1K;
- else if (q_fifo_size >= XGBE_FIFO_SIZE_B(512))
- p_fifo = XGMAC_MTL_FIFO_SIZE_512;
- else if (q_fifo_size >= XGBE_FIFO_SIZE_B(256))
- p_fifo = XGMAC_MTL_FIFO_SIZE_256;
+ /* Each increment in the queue fifo size represents 256 bytes of
+ * fifo, with 0 representing 256 bytes. Distribute the fifo equally
+ * between the queues.
+ */
+ p_fifo = q_fifo_size / 256;
+ if (p_fifo)
+ p_fifo--;
return p_fifo;
}
static void xgbe_config_tx_fifo_size(struct xgbe_prv_data *pdata)
{
- enum xgbe_mtl_fifo_size fifo_size;
+ unsigned int fifo_size;
unsigned int i;
fifo_size = xgbe_calculate_per_queue_fifo(pdata->hw_feat.tx_fifo_size,
static void xgbe_config_rx_fifo_size(struct xgbe_prv_data *pdata)
{
- enum xgbe_mtl_fifo_size fifo_size;
+ unsigned int fifo_size;
unsigned int i;
fifo_size = xgbe_calculate_per_queue_fifo(pdata->hw_feat.rx_fifo_size,
default:
read_hi = false;
- };
+ }
val = XGMAC_IOREAD(pdata, reg_lo);
}
}
+ if (XGMAC_GET_BITS(dma_ch_isr, DMA_CH_SR, RBU))
+ pdata->ext_stats.rx_buffer_unavailable++;
+
/* Restart the device on a Fatal Bus Error */
if (XGMAC_GET_BITS(dma_ch_isr, DMA_CH_SR, FBE))
schedule_work(&pdata->restart_work);
/* Read Tx Timestamp to clear interrupt */
pdata->tx_tstamp =
hw_if->get_tx_tstamp(pdata);
- schedule_work(&pdata->tx_tstamp_work);
+ queue_work(pdata->dev_workqueue,
+ &pdata->tx_tstamp_work);
}
}
}
{
struct xgbe_prv_data *pdata = (struct xgbe_prv_data *)data;
- schedule_work(&pdata->service_work);
+ queue_work(pdata->dev_workqueue, &pdata->service_work);
mod_timer(&pdata->service_timer, jiffies + HZ);
}
netif_tx_start_all_queues(netdev);
xgbe_start_timers(pdata);
- schedule_work(&pdata->service_work);
+ queue_work(pdata->dev_workqueue, &pdata->service_work);
DBGPR("<--xgbe_start\n");
struct netdev_queue *txq;
int processed = 0;
unsigned int tx_packets = 0, tx_bytes = 0;
+ unsigned int cur;
DBGPR("-->xgbe_tx_poll\n");
if (!ring)
return 0;
+ cur = ring->cur;
txq = netdev_get_tx_queue(netdev, channel->queue_index);
while ((processed < XGBE_TX_DESC_MAX_PROC) &&
- (ring->dirty != ring->cur)) {
+ (ring->dirty != cur)) {
rdata = XGBE_GET_DESC_DATA(ring, ring->dirty);
rdesc = rdata->rdesc;
sizeof(drvinfo->version));
strlcpy(drvinfo->fw_version, "N/A", sizeof(drvinfo->fw_version));
strlcpy(drvinfo->bus_info, "bcm63xx", sizeof(drvinfo->bus_info));
- drvinfo->n_stats = BCM_ENET_STATS_LEN;
}
static int bcm_enet_get_sset_count(struct net_device *netdev,
for (i = 0; i < priv->num_ports; i++) {
struct bcm63xx_enetsw_port *port;
- int val, j, up, advertise, lpa, lpa2, speed, duplex, media;
+ int val, j, up, advertise, lpa, speed, duplex, media;
int external_phy = bcm_enet_port_is_rgmii(i);
u8 override;
lpa = bcmenet_sw_mdio_read(priv, external_phy, port->phy_id,
MII_LPA);
- lpa2 = bcmenet_sw_mdio_read(priv, external_phy, port->phy_id,
- MII_STAT1000);
-
/* figure out media and duplex from advertise and LPA values */
media = mii_nway_result(lpa & advertise);
duplex = (media & ADVERTISE_FULL) ? 1 : 0;
- if (lpa2 & LPA_1000FULL)
- duplex = 1;
-
- if (lpa2 & (LPA_1000FULL | LPA_1000HALF))
- speed = 1000;
- else {
- if (media & (ADVERTISE_100FULL | ADVERTISE_100HALF))
- speed = 100;
- else
- speed = 10;
+
+ if (media & (ADVERTISE_100FULL | ADVERTISE_100HALF))
+ speed = 100;
+ else
+ speed = 10;
+
+ if (val & BMSR_ESTATEN) {
+ advertise = bcmenet_sw_mdio_read(priv, external_phy,
+ port->phy_id, MII_CTRL1000);
+
+ lpa = bcmenet_sw_mdio_read(priv, external_phy,
+ port->phy_id, MII_STAT1000);
+
+ if (advertise & (ADVERTISE_1000FULL | ADVERTISE_1000HALF)
+ && lpa & (LPA_1000FULL | LPA_1000HALF)) {
+ speed = 1000;
+ duplex = (lpa & LPA_1000FULL);
+ }
}
dev_info(&priv->pdev->dev,
strncpy(drvinfo->version, bcm_enet_driver_version, 32);
strncpy(drvinfo->fw_version, "N/A", 32);
strncpy(drvinfo->bus_info, "bcm63xx", 32);
- drvinfo->n_stats = BCM_ENETSW_STATS_LEN;
}
static void bcm_enetsw_get_ethtool_stats(struct net_device *netdev,
I40E_VSI_STAT("rx_broadcast", eth_stats.rx_broadcast),
I40E_VSI_STAT("tx_broadcast", eth_stats.tx_broadcast),
I40E_VSI_STAT("rx_unknown_protocol", eth_stats.rx_unknown_protocol),
+ I40E_VSI_STAT("tx_linearize", tx_linearize),
};
-static int i40e_add_fdir_ethtool(struct i40e_vsi *vsi,
- struct ethtool_rxnfc *cmd);
-
/* These PF_STATs might look like duplicates of some NETDEV_STATs,
* but they are separate. This device supports Virtualization, and
* as such might have several netdevs supporting VMDq and FCoE going
static const char i40e_priv_flags_strings[][ETH_GSTRING_LEN] = {
"NPAR",
+ "LinkPolling",
+ "flow-director-atr",
+ "veb-stats",
};
-#define I40E_PRIV_FLAGS_STR_LEN \
- (sizeof(i40e_priv_flags_strings) / ETH_GSTRING_LEN)
+#define I40E_PRIV_FLAGS_STR_LEN ARRAY_SIZE(i40e_priv_flags_strings)
/**
* i40e_partition_setting_complaint - generic complaint for MFP restriction
**/
static void i40e_get_settings_link_up(struct i40e_hw *hw,
struct ethtool_cmd *ecmd,
- struct net_device *netdev)
+ struct net_device *netdev,
+ struct i40e_pf *pf)
{
struct i40e_link_status *hw_link_info = &hw->phy.link_info;
u32 link_speed = hw_link_info->link_speed;
case I40E_PHY_TYPE_40GBASE_AOC:
ecmd->supported = SUPPORTED_40000baseCR4_Full;
break;
- case I40E_PHY_TYPE_40GBASE_KR4:
- ecmd->supported = SUPPORTED_Autoneg |
- SUPPORTED_40000baseKR4_Full;
- ecmd->advertising = ADVERTISED_Autoneg |
- ADVERTISED_40000baseKR4_Full;
- break;
case I40E_PHY_TYPE_40GBASE_SR4:
ecmd->supported = SUPPORTED_40000baseSR4_Full;
break;
case I40E_PHY_TYPE_40GBASE_LR4:
ecmd->supported = SUPPORTED_40000baseLR4_Full;
break;
- case I40E_PHY_TYPE_20GBASE_KR2:
- ecmd->supported = SUPPORTED_Autoneg |
- SUPPORTED_20000baseKR2_Full;
- ecmd->advertising = ADVERTISED_Autoneg |
- ADVERTISED_20000baseKR2_Full;
- break;
- case I40E_PHY_TYPE_10GBASE_KX4:
- ecmd->supported = SUPPORTED_Autoneg |
- SUPPORTED_10000baseKX4_Full;
- ecmd->advertising = ADVERTISED_Autoneg |
- ADVERTISED_10000baseKX4_Full;
- break;
- case I40E_PHY_TYPE_10GBASE_KR:
- ecmd->supported = SUPPORTED_Autoneg |
- SUPPORTED_10000baseKR_Full;
- ecmd->advertising = ADVERTISED_Autoneg |
- ADVERTISED_10000baseKR_Full;
- break;
case I40E_PHY_TYPE_10GBASE_SR:
case I40E_PHY_TYPE_10GBASE_LR:
case I40E_PHY_TYPE_1000BASE_SX:
case I40E_PHY_TYPE_1000BASE_LX:
- ecmd->supported = SUPPORTED_10000baseT_Full |
- SUPPORTED_1000baseT_Full;
+ ecmd->supported = SUPPORTED_10000baseT_Full;
+ if (hw_link_info->module_type[2] &
+ I40E_MODULE_TYPE_1000BASE_SX ||
+ hw_link_info->module_type[2] &
+ I40E_MODULE_TYPE_1000BASE_LX) {
+ ecmd->supported |= SUPPORTED_1000baseT_Full;
+ if (hw_link_info->requested_speeds &
+ I40E_LINK_SPEED_1GB)
+ ecmd->advertising |= ADVERTISED_1000baseT_Full;
+ }
if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
ecmd->advertising |= ADVERTISED_10000baseT_Full;
- if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB)
- ecmd->advertising |= ADVERTISED_1000baseT_Full;
- break;
- case I40E_PHY_TYPE_1000BASE_KX:
- ecmd->supported = SUPPORTED_Autoneg |
- SUPPORTED_1000baseKX_Full;
- ecmd->advertising = ADVERTISED_Autoneg |
- ADVERTISED_1000baseKX_Full;
break;
case I40E_PHY_TYPE_10GBASE_T:
case I40E_PHY_TYPE_1000BASE_T:
- case I40E_PHY_TYPE_100BASE_TX:
ecmd->supported = SUPPORTED_Autoneg |
SUPPORTED_10000baseT_Full |
- SUPPORTED_1000baseT_Full |
- SUPPORTED_100baseT_Full;
+ SUPPORTED_1000baseT_Full;
ecmd->advertising = ADVERTISED_Autoneg;
if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
ecmd->advertising |= ADVERTISED_10000baseT_Full;
if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB)
ecmd->advertising |= ADVERTISED_1000baseT_Full;
+ break;
+ case I40E_PHY_TYPE_1000BASE_T_OPTICAL:
+ ecmd->supported = SUPPORTED_Autoneg |
+ SUPPORTED_1000baseT_Full;
+ ecmd->advertising = ADVERTISED_Autoneg |
+ ADVERTISED_1000baseT_Full;
+ break;
+ case I40E_PHY_TYPE_100BASE_TX:
+ ecmd->supported = SUPPORTED_Autoneg |
+ SUPPORTED_100baseT_Full;
if (hw_link_info->requested_speeds & I40E_LINK_SPEED_100MB)
ecmd->advertising |= ADVERTISED_100baseT_Full;
break;
break;
case I40E_PHY_TYPE_SGMII:
ecmd->supported = SUPPORTED_Autoneg |
- SUPPORTED_1000baseT_Full |
- SUPPORTED_100baseT_Full;
+ SUPPORTED_1000baseT_Full;
if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB)
ecmd->advertising |= ADVERTISED_1000baseT_Full;
- if (hw_link_info->requested_speeds & I40E_LINK_SPEED_100MB)
- ecmd->advertising |= ADVERTISED_100baseT_Full;
+ if (pf->hw.mac.type == I40E_MAC_X722) {
+ ecmd->supported |= SUPPORTED_100baseT_Full;
+ if (hw_link_info->requested_speeds &
+ I40E_LINK_SPEED_100MB)
+ ecmd->advertising |= ADVERTISED_100baseT_Full;
+ }
+ break;
+ /* Backplane is set based on supported phy types in get_settings
+ * so don't set anything here but don't warn either
+ */
+ case I40E_PHY_TYPE_40GBASE_KR4:
+ case I40E_PHY_TYPE_20GBASE_KR2:
+ case I40E_PHY_TYPE_10GBASE_KR:
+ case I40E_PHY_TYPE_10GBASE_KX4:
+ case I40E_PHY_TYPE_1000BASE_KX:
break;
default:
/* if we got here and link is up something bad is afoot */
* Reports link settings that can be determined when link is down
**/
static void i40e_get_settings_link_down(struct i40e_hw *hw,
- struct ethtool_cmd *ecmd)
+ struct ethtool_cmd *ecmd,
+ struct i40e_pf *pf)
{
- struct i40e_link_status *hw_link_info = &hw->phy.link_info;
+ enum i40e_aq_capabilities_phy_type phy_types = hw->phy.phy_types;
/* link is down and the driver needs to fall back on
- * device ID to determine what kinds of info to display,
- * it's mostly a guess that may change when link is up
+ * supported phy types to figure out what info to display
*/
- switch (hw->device_id) {
- case I40E_DEV_ID_QSFP_A:
- case I40E_DEV_ID_QSFP_B:
- case I40E_DEV_ID_QSFP_C:
- /* pluggable QSFP */
- ecmd->supported = SUPPORTED_40000baseSR4_Full |
- SUPPORTED_40000baseCR4_Full |
- SUPPORTED_40000baseLR4_Full;
- ecmd->advertising = ADVERTISED_40000baseSR4_Full |
- ADVERTISED_40000baseCR4_Full |
- ADVERTISED_40000baseLR4_Full;
- break;
- case I40E_DEV_ID_KX_B:
- /* backplane 40G */
- ecmd->supported = SUPPORTED_40000baseKR4_Full;
- ecmd->advertising = ADVERTISED_40000baseKR4_Full;
- break;
- case I40E_DEV_ID_KX_C:
- /* backplane 10G */
- ecmd->supported = SUPPORTED_10000baseKR_Full;
- ecmd->advertising = ADVERTISED_10000baseKR_Full;
- break;
- case I40E_DEV_ID_10G_BASE_T:
- ecmd->supported = SUPPORTED_10000baseT_Full |
- SUPPORTED_1000baseT_Full |
- SUPPORTED_100baseT_Full;
- /* Figure out what has been requested */
- if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
- ecmd->advertising |= ADVERTISED_10000baseT_Full;
- if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB)
- ecmd->advertising |= ADVERTISED_1000baseT_Full;
- if (hw_link_info->requested_speeds & I40E_LINK_SPEED_100MB)
+ ecmd->supported = 0x0;
+ ecmd->advertising = 0x0;
+ if (phy_types & I40E_CAP_PHY_TYPE_SGMII) {
+ ecmd->supported |= SUPPORTED_Autoneg |
+ SUPPORTED_1000baseT_Full;
+ ecmd->advertising |= ADVERTISED_Autoneg |
+ ADVERTISED_1000baseT_Full;
+ if (pf->hw.mac.type == I40E_MAC_X722) {
+ ecmd->supported |= SUPPORTED_100baseT_Full;
ecmd->advertising |= ADVERTISED_100baseT_Full;
- break;
- case I40E_DEV_ID_20G_KR2:
- /* backplane 20G */
- ecmd->supported = SUPPORTED_20000baseKR2_Full;
- ecmd->advertising = ADVERTISED_20000baseKR2_Full;
- break;
- default:
- /* all the rest are 10G/1G */
- ecmd->supported = SUPPORTED_10000baseT_Full |
- SUPPORTED_1000baseT_Full;
- /* Figure out what has been requested */
- if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
- ecmd->advertising |= ADVERTISED_10000baseT_Full;
- if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB)
- ecmd->advertising |= ADVERTISED_1000baseT_Full;
- break;
+ }
}
+ if (phy_types & I40E_CAP_PHY_TYPE_XAUI ||
+ phy_types & I40E_CAP_PHY_TYPE_XFI ||
+ phy_types & I40E_CAP_PHY_TYPE_SFI ||
+ phy_types & I40E_CAP_PHY_TYPE_10GBASE_SFPP_CU ||
+ phy_types & I40E_CAP_PHY_TYPE_10GBASE_AOC)
+ ecmd->supported |= SUPPORTED_10000baseT_Full;
+ if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_CR1_CU ||
+ phy_types & I40E_CAP_PHY_TYPE_10GBASE_CR1 ||
+ phy_types & I40E_CAP_PHY_TYPE_10GBASE_T ||
+ phy_types & I40E_CAP_PHY_TYPE_10GBASE_SR ||
+ phy_types & I40E_CAP_PHY_TYPE_10GBASE_LR) {
+ ecmd->supported |= SUPPORTED_Autoneg |
+ SUPPORTED_10000baseT_Full;
+ ecmd->advertising |= ADVERTISED_Autoneg |
+ ADVERTISED_10000baseT_Full;
+ }
+ if (phy_types & I40E_CAP_PHY_TYPE_XLAUI ||
+ phy_types & I40E_CAP_PHY_TYPE_XLPPI ||
+ phy_types & I40E_CAP_PHY_TYPE_40GBASE_AOC)
+ ecmd->supported |= SUPPORTED_40000baseCR4_Full;
+ if (phy_types & I40E_CAP_PHY_TYPE_40GBASE_CR4_CU ||
+ phy_types & I40E_CAP_PHY_TYPE_40GBASE_CR4) {
+ ecmd->supported |= SUPPORTED_Autoneg |
+ SUPPORTED_40000baseCR4_Full;
+ ecmd->advertising |= ADVERTISED_Autoneg |
+ ADVERTISED_40000baseCR4_Full;
+ }
+ if ((phy_types & I40E_CAP_PHY_TYPE_100BASE_TX) &&
+ !(phy_types & I40E_CAP_PHY_TYPE_1000BASE_T)) {
+ ecmd->supported |= SUPPORTED_Autoneg |
+ SUPPORTED_100baseT_Full;
+ ecmd->advertising |= ADVERTISED_Autoneg |
+ ADVERTISED_100baseT_Full;
+ }
+ if (phy_types & I40E_CAP_PHY_TYPE_1000BASE_T ||
+ phy_types & I40E_CAP_PHY_TYPE_1000BASE_SX ||
+ phy_types & I40E_CAP_PHY_TYPE_1000BASE_LX ||
+ phy_types & I40E_CAP_PHY_TYPE_1000BASE_T_OPTICAL) {
+ ecmd->supported |= SUPPORTED_Autoneg |
+ SUPPORTED_1000baseT_Full;
+ ecmd->advertising |= ADVERTISED_Autoneg |
+ ADVERTISED_1000baseT_Full;
+ }
+ if (phy_types & I40E_CAP_PHY_TYPE_40GBASE_SR4)
+ ecmd->supported |= SUPPORTED_40000baseSR4_Full;
+ if (phy_types & I40E_CAP_PHY_TYPE_40GBASE_LR4)
+ ecmd->supported |= SUPPORTED_40000baseLR4_Full;
/* With no link speed and duplex are unknown */
ethtool_cmd_speed_set(ecmd, SPEED_UNKNOWN);
bool link_up = hw_link_info->link_info & I40E_AQ_LINK_UP;
if (link_up)
- i40e_get_settings_link_up(hw, ecmd, netdev);
+ i40e_get_settings_link_up(hw, ecmd, netdev, pf);
else
- i40e_get_settings_link_down(hw, ecmd);
+ i40e_get_settings_link_down(hw, ecmd, pf);
/* Now set the settings that don't rely on link being up/down */
+ /* For backplane, supported and advertised are only reliant on the
+ * phy types the NVM specifies are supported.
+ */
+ if (hw->device_id == I40E_DEV_ID_KX_B ||
+ hw->device_id == I40E_DEV_ID_KX_C ||
+ hw->device_id == I40E_DEV_ID_20G_KR2 ||
+ hw->device_id == I40E_DEV_ID_20G_KR2_A) {
+ ecmd->supported = SUPPORTED_Autoneg;
+ ecmd->advertising = ADVERTISED_Autoneg;
+ if (hw->phy.phy_types & I40E_CAP_PHY_TYPE_40GBASE_KR4) {
+ ecmd->supported |= SUPPORTED_40000baseKR4_Full;
+ ecmd->advertising |= ADVERTISED_40000baseKR4_Full;
+ }
+ if (hw->phy.phy_types & I40E_CAP_PHY_TYPE_20GBASE_KR2) {
+ ecmd->supported |= SUPPORTED_20000baseKR2_Full;
+ ecmd->advertising |= ADVERTISED_20000baseKR2_Full;
+ }
+ if (hw->phy.phy_types & I40E_CAP_PHY_TYPE_10GBASE_KR) {
+ ecmd->supported |= SUPPORTED_10000baseKR_Full;
+ ecmd->advertising |= ADVERTISED_10000baseKR_Full;
+ }
+ if (hw->phy.phy_types & I40E_CAP_PHY_TYPE_10GBASE_KX4) {
+ ecmd->supported |= SUPPORTED_10000baseKX4_Full;
+ ecmd->advertising |= ADVERTISED_10000baseKX4_Full;
+ }
+ if (hw->phy.phy_types & I40E_CAP_PHY_TYPE_1000BASE_KX) {
+ ecmd->supported |= SUPPORTED_1000baseKX_Full;
+ ecmd->advertising |= ADVERTISED_1000baseKX_Full;
+ }
+ }
+
/* Set autoneg settings */
ecmd->autoneg = ((hw_link_info->an_info & I40E_AQ_AN_COMPLETED) ?
AUTONEG_ENABLE : AUTONEG_DISABLE);
hw->phy.link_info.link_info & I40E_AQ_LINK_UP)
return -EOPNOTSUPP;
+ if (hw->device_id == I40E_DEV_ID_KX_B ||
+ hw->device_id == I40E_DEV_ID_KX_C ||
+ hw->device_id == I40E_DEV_ID_20G_KR2 ||
+ hw->device_id == I40E_DEV_ID_20G_KR2_A) {
+ netdev_info(netdev, "Changing settings is not supported on backplane.\n");
+ return -EOPNOTSUPP;
+ }
+
/* get our own copy of the bits to check against */
memset(&safe_ecmd, 0, sizeof(struct ethtool_cmd));
i40e_get_settings(netdev, &safe_ecmd);
/* Check autoneg */
if (autoneg == AUTONEG_ENABLE) {
- /* If autoneg is not supported, return error */
- if (!(safe_ecmd.supported & SUPPORTED_Autoneg)) {
- netdev_info(netdev, "Autoneg not supported on this phy\n");
- return -EINVAL;
- }
/* If autoneg was not already enabled */
if (!(hw->phy.link_info.an_info & I40E_AQ_AN_COMPLETED)) {
+ /* If autoneg is not supported, return error */
+ if (!(safe_ecmd.supported & SUPPORTED_Autoneg)) {
+ netdev_info(netdev, "Autoneg not supported on this phy\n");
+ return -EINVAL;
+ }
+ /* Autoneg is allowed to change */
config.abilities = abilities.abilities |
I40E_AQ_PHY_ENABLE_AN;
change = true;
}
} else {
- /* If autoneg is supported 10GBASE_T is the only phy that
- * can disable it, so otherwise return error
- */
- if (safe_ecmd.supported & SUPPORTED_Autoneg &&
- hw->phy.link_info.phy_type != I40E_PHY_TYPE_10GBASE_T) {
- netdev_info(netdev, "Autoneg cannot be disabled on this phy\n");
- return -EINVAL;
- }
/* If autoneg is currently enabled */
if (hw->phy.link_info.an_info & I40E_AQ_AN_COMPLETED) {
+ /* If autoneg is supported 10GBASE_T is the only PHY
+ * that can disable it, so otherwise return error
+ */
+ if (safe_ecmd.supported & SUPPORTED_Autoneg &&
+ hw->phy.link_info.phy_type !=
+ I40E_PHY_TYPE_10GBASE_T) {
+ netdev_info(netdev, "Autoneg cannot be disabled on this phy\n");
+ return -EINVAL;
+ }
+ /* Autoneg is allowed to change */
config.abilities = abilities.abilities &
~I40E_AQ_PHY_ENABLE_AN;
change = true;
advertise & ADVERTISED_40000baseLR4_Full)
config.link_speed |= I40E_LINK_SPEED_40GB;
+ /* If speed didn't get set, set it to what it currently is.
+ * This is needed because if advertise is 0 (as it is when autoneg
+ * is disabled) then speed won't get set.
+ */
+ if (!config.link_speed)
+ config.link_speed = abilities.link_speed;
+
if (change || (abilities.link_speed != config.link_speed)) {
/* copy over the rest of the abilities */
config.phy_type = abilities.phy_type;
/* Tell the OS link is going down, the link will go
* back up when fw says it is ready asynchronously
*/
- netdev_info(netdev, "PHY settings change requested, NIC Link is going down.\n");
+ i40e_print_link_message(vsi, false);
netif_carrier_off(netdev);
netif_tx_stop_all_queues(netdev);
}
return -EAGAIN;
}
- status = i40e_aq_get_link_info(hw, true, NULL, NULL);
+ status = i40e_update_link_info(hw);
if (status)
- netdev_info(netdev, "Updating link info failed with err %s aq_err %s\n",
- i40e_stat_str(hw, status),
- i40e_aq_str(hw, hw->aq.asq_last_status));
+ netdev_dbg(netdev, "Updating link info failed with err %s aq_err %s\n",
+ i40e_stat_str(hw, status),
+ i40e_aq_str(hw, hw->aq.asq_last_status));
} else {
netdev_info(netdev, "Nothing changed, exiting without setting anything.\n");
/* Tell the OS link is going down, the link will go back up when fw
* says it is ready asynchronously
*/
- netdev_info(netdev, "Flow control settings change requested, NIC Link is going down.\n");
+ i40e_print_link_message(vsi, false);
netif_carrier_off(netdev);
netif_tx_stop_all_queues(netdev);
cmd = (struct i40e_nvm_access *)eeprom;
ret_val = i40e_nvmupd_command(hw, cmd, bytes, &errno);
- if (ret_val &&
- ((hw->aq.asq_last_status != I40E_AQ_RC_EACCES) ||
- (hw->debug_mask & I40E_DEBUG_NVM)))
+ if (ret_val && (hw->debug_mask & I40E_DEBUG_NVM))
dev_info(&pf->pdev->dev,
"NVMUpdate read failed err=%d status=0x%x errno=%d module=%d offset=0x%x size=%d\n",
ret_val, hw->aq.asq_last_status, errno,
cmd = (struct i40e_nvm_access *)eeprom;
ret_val = i40e_nvmupd_command(hw, cmd, bytes, &errno);
- if (ret_val &&
- ((hw->aq.asq_last_status != I40E_AQ_RC_EPERM &&
- hw->aq.asq_last_status != I40E_AQ_RC_EBUSY) ||
- (hw->debug_mask & I40E_DEBUG_NVM)))
+ if (ret_val && (hw->debug_mask & I40E_DEBUG_NVM))
dev_info(&pf->pdev->dev,
"NVMUpdate write failed err=%d status=0x%x errno=%d module=%d offset=0x%x size=%d\n",
ret_val, hw->aq.asq_last_status, errno,
strlcpy(drvinfo->driver, i40e_driver_name, sizeof(drvinfo->driver));
strlcpy(drvinfo->version, i40e_driver_version_str,
sizeof(drvinfo->version));
- strlcpy(drvinfo->fw_version, i40e_fw_version_str(&pf->hw),
+ strlcpy(drvinfo->fw_version, i40e_nvm_version_str(&pf->hw),
sizeof(drvinfo->fw_version));
strlcpy(drvinfo->bus_info, pci_name(pf->pdev),
sizeof(drvinfo->bus_info));
- drvinfo->n_priv_flags = I40E_PRIV_FLAGS_STR_LEN;
}
static void i40e_get_ringparam(struct net_device *netdev,
/* clone ring and setup updated count */
tx_rings[i] = *vsi->tx_rings[i];
tx_rings[i].count = new_tx_count;
+ /* the desc and bi pointers will be reallocated in the
+ * setup call
+ */
+ tx_rings[i].desc = NULL;
+ tx_rings[i].rx_bi = NULL;
err = i40e_setup_tx_descriptors(&tx_rings[i]);
if (err) {
while (i) {
/* clone ring and setup updated count */
rx_rings[i] = *vsi->rx_rings[i];
rx_rings[i].count = new_rx_count;
+ /* the desc and bi pointers will be reallocated in the
+ * setup call
+ */
+ rx_rings[i].desc = NULL;
+ rx_rings[i].rx_bi = NULL;
err = i40e_setup_rx_descriptors(&rx_rings[i]);
if (err) {
while (i) {
if (vsi == pf->vsi[pf->lan_vsi] && pf->hw.partition_id == 1) {
int len = I40E_PF_STATS_LEN(netdev);
- if (pf->lan_veb != I40E_NO_VEB)
+ if ((pf->lan_veb != I40E_NO_VEB) &&
+ (pf->flags & I40E_FLAG_VEB_STATS_ENABLED))
len += I40E_VEB_STATS_TOTAL;
return len;
} else {
if (vsi != pf->vsi[pf->lan_vsi] || pf->hw.partition_id != 1)
return;
- if (pf->lan_veb != I40E_NO_VEB) {
+ if ((pf->lan_veb != I40E_NO_VEB) &&
+ (pf->flags & I40E_FLAG_VEB_STATS_ENABLED)) {
struct i40e_veb *veb = pf->veb[pf->lan_veb];
+
for (j = 0; j < I40E_VEB_STATS_LEN; j++) {
p = (char *)veb;
p += i40e_gstrings_veb_stats[j].stat_offset;
data[i++] = (i40e_gstrings_veb_stats[j].sizeof_stat ==
sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
}
+ for (j = 0; j < I40E_MAX_TRAFFIC_CLASS; j++) {
+ data[i++] = veb->tc_stats.tc_tx_packets[j];
+ data[i++] = veb->tc_stats.tc_tx_bytes[j];
+ data[i++] = veb->tc_stats.tc_rx_packets[j];
+ data[i++] = veb->tc_stats.tc_rx_bytes[j];
+ }
}
for (j = 0; j < I40E_GLOBAL_STATS_LEN; j++) {
p = (char *)pf + i40e_gstrings_stats[j].stat_offset;
if (vsi != pf->vsi[pf->lan_vsi] || pf->hw.partition_id != 1)
return;
- if (pf->lan_veb != I40E_NO_VEB) {
+ if ((pf->lan_veb != I40E_NO_VEB) &&
+ (pf->flags & I40E_FLAG_VEB_STATS_ENABLED)) {
for (i = 0; i < I40E_VEB_STATS_LEN; i++) {
snprintf(p, ETH_GSTRING_LEN, "veb.%s",
i40e_gstrings_veb_stats[i].stat_string);
{
struct i40e_netdev_priv *np = netdev_priv(netdev);
struct i40e_pf *pf = np->vsi->back;
+ i40e_status status;
+ bool link_up = false;
netif_info(pf, hw, netdev, "link test\n");
- if (i40e_get_link_status(&pf->hw))
+ status = i40e_get_link_status(&pf->hw, &link_up);
+ if (status) {
+ netif_err(pf, drv, netdev, "link query timed out, please retry test\n");
+ *data = 1;
+ return *data;
+ }
+
+ if (link_up)
*data = 0;
else
*data = 1;
int i;
for (i = 0; i < pf->num_alloc_vfs; i++)
- if (vfs[i].vf_states & I40E_VF_STAT_ACTIVE)
+ if (test_bit(I40E_VF_STAT_ACTIVE, &vfs[i].vf_states))
return true;
return false;
}
ec->rx_coalesce_usecs = vsi->rx_itr_setting & ~I40E_ITR_DYNAMIC;
ec->tx_coalesce_usecs = vsi->tx_itr_setting & ~I40E_ITR_DYNAMIC;
+ /* we use the _usecs_high to store/set the interrupt rate limit
+ * that the hardware supports, that almost but not quite
+ * fits the original intent of the ethtool variable,
+ * the rx_coalesce_usecs_high limits total interrupts
+ * per second from both tx/rx sources.
+ */
+ ec->rx_coalesce_usecs_high = vsi->int_rate_limit;
+ ec->tx_coalesce_usecs_high = vsi->int_rate_limit;
return 0;
}
if (ec->tx_max_coalesced_frames_irq || ec->rx_max_coalesced_frames_irq)
vsi->work_limit = ec->tx_max_coalesced_frames_irq;
+ /* tx_coalesce_usecs_high is ignored, use rx-usecs-high instead */
+ if (ec->tx_coalesce_usecs_high != vsi->int_rate_limit) {
+ netif_info(pf, drv, netdev, "tx-usecs-high is not used, please program rx-usecs-high\n");
+ return -EINVAL;
+ }
+
+ if (ec->rx_coalesce_usecs_high >= INTRL_REG_TO_USEC(I40E_MAX_INTRL)) {
+ netif_info(pf, drv, netdev, "Invalid value, rx-usecs-high range is 0-235\n");
+ return -EINVAL;
+ }
+
vector = vsi->base_vector;
if ((ec->rx_coalesce_usecs >= (I40E_MIN_ITR << 1)) &&
(ec->rx_coalesce_usecs <= (I40E_MAX_ITR << 1))) {
return -EINVAL;
}
+ vsi->int_rate_limit = ec->rx_coalesce_usecs_high;
+
if ((ec->tx_coalesce_usecs >= (I40E_MIN_ITR << 1)) &&
(ec->tx_coalesce_usecs <= (I40E_MAX_ITR << 1))) {
vsi->tx_itr_setting = ec->tx_coalesce_usecs;
vsi->tx_itr_setting &= ~I40E_ITR_DYNAMIC;
for (i = 0; i < vsi->num_q_vectors; i++, vector++) {
+ u16 intrl = INTRL_USEC_TO_REG(vsi->int_rate_limit);
+
q_vector = vsi->q_vectors[i];
q_vector->rx.itr = ITR_TO_REG(vsi->rx_itr_setting);
wr32(hw, I40E_PFINT_ITRN(0, vector - 1), q_vector->rx.itr);
q_vector->tx.itr = ITR_TO_REG(vsi->tx_itr_setting);
wr32(hw, I40E_PFINT_ITRN(1, vector - 1), q_vector->tx.itr);
+ wr32(hw, I40E_PFINT_RATEN(vector - 1), intrl);
i40e_flush(hw);
}
ret_flags |= pf->hw.func_caps.npar_enable ?
I40E_PRIV_FLAGS_NPAR_FLAG : 0;
+ ret_flags |= pf->flags & I40E_FLAG_LINK_POLLING_ENABLED ?
+ I40E_PRIV_FLAGS_LINKPOLL_FLAG : 0;
+ ret_flags |= pf->flags & I40E_FLAG_FD_ATR_ENABLED ?
+ I40E_PRIV_FLAGS_FD_ATR : 0;
+ ret_flags |= pf->flags & I40E_FLAG_VEB_STATS_ENABLED ?
+ I40E_PRIV_FLAGS_VEB_STATS : 0;
return ret_flags;
}
+/**
+ * i40e_set_priv_flags - set private flags
+ * @dev: network interface device structure
+ * @flags: bit flags to be set
+ **/
+static int i40e_set_priv_flags(struct net_device *dev, u32 flags)
+{
+ struct i40e_netdev_priv *np = netdev_priv(dev);
+ struct i40e_vsi *vsi = np->vsi;
+ struct i40e_pf *pf = vsi->back;
+
+ if (flags & I40E_PRIV_FLAGS_LINKPOLL_FLAG)
+ pf->flags |= I40E_FLAG_LINK_POLLING_ENABLED;
+ else
+ pf->flags &= ~I40E_FLAG_LINK_POLLING_ENABLED;
+
+ /* allow the user to control the state of the Flow
+ * Director ATR (Application Targeted Routing) feature
+ * of the driver
+ */
+ if (flags & I40E_PRIV_FLAGS_FD_ATR) {
+ pf->flags |= I40E_FLAG_FD_ATR_ENABLED;
+ } else {
+ pf->flags &= ~I40E_FLAG_FD_ATR_ENABLED;
+ pf->auto_disable_flags |= I40E_FLAG_FD_ATR_ENABLED;
+ }
+
+ if (flags & I40E_PRIV_FLAGS_VEB_STATS)
+ pf->flags |= I40E_FLAG_VEB_STATS_ENABLED;
+ else
+ pf->flags &= ~I40E_FLAG_VEB_STATS_ENABLED;
+
+ return 0;
+}
+
static const struct ethtool_ops i40e_ethtool_ops = {
.get_settings = i40e_get_settings,
.set_settings = i40e_set_settings,
.set_channels = i40e_set_channels,
.get_ts_info = i40e_get_ts_info,
.get_priv_flags = i40e_get_priv_flags,
+ .set_priv_flags = i40e_set_priv_flags,
};
void i40e_set_ethtool_ops(struct net_device *netdev)
#define DRV_VERSION_MAJOR 1
#define DRV_VERSION_MINOR 3
-#define DRV_VERSION_BUILD 9
+#define DRV_VERSION_BUILD 46
#define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \
__stringify(DRV_VERSION_MINOR) "." \
__stringify(DRV_VERSION_BUILD) DRV_KERN
{PCI_VDEVICE(INTEL, I40E_DEV_ID_QSFP_B), 0},
{PCI_VDEVICE(INTEL, I40E_DEV_ID_QSFP_C), 0},
{PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_BASE_T), 0},
+ {PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_BASE_T4), 0},
{PCI_VDEVICE(INTEL, I40E_DEV_ID_20G_KR2), 0},
{PCI_VDEVICE(INTEL, I40E_DEV_ID_SFP_X722), 0},
{PCI_VDEVICE(INTEL, I40E_DEV_ID_1G_BASE_T_X722), 0},
{PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_BASE_T_X722), 0},
+ {PCI_VDEVICE(INTEL, I40E_DEV_ID_20G_KR2), 0},
+ {PCI_VDEVICE(INTEL, I40E_DEV_ID_20G_KR2_A), 0},
/* required last entry */
{0, }
};
ret = i;
pile->search_hint = i + j;
break;
- } else {
- /* not enough, so skip over it and continue looking */
- i += j;
}
+
+ /* not enough, so skip over it and continue looking */
+ i += j;
}
return ret;
struct i40e_netdev_priv *np = netdev_priv(netdev);
struct i40e_vsi *vsi = np->vsi;
struct i40e_pf *pf = vsi->back;
+ struct i40e_ring *tx_ring = NULL;
+ unsigned int i, hung_queue = 0;
+ u32 head, val;
pf->tx_timeout_count++;
+ /* find the stopped queue the same way the stack does */
+ for (i = 0; i < netdev->num_tx_queues; i++) {
+ struct netdev_queue *q;
+ unsigned long trans_start;
+
+ q = netdev_get_tx_queue(netdev, i);
+ trans_start = q->trans_start ? : netdev->trans_start;
+ if (netif_xmit_stopped(q) &&
+ time_after(jiffies,
+ (trans_start + netdev->watchdog_timeo))) {
+ hung_queue = i;
+ break;
+ }
+ }
+
+ if (i == netdev->num_tx_queues) {
+ netdev_info(netdev, "tx_timeout: no netdev hung queue found\n");
+ } else {
+ /* now that we have an index, find the tx_ring struct */
+ for (i = 0; i < vsi->num_queue_pairs; i++) {
+ if (vsi->tx_rings[i] && vsi->tx_rings[i]->desc) {
+ if (hung_queue ==
+ vsi->tx_rings[i]->queue_index) {
+ tx_ring = vsi->tx_rings[i];
+ break;
+ }
+ }
+ }
+ }
+
if (time_after(jiffies, (pf->tx_timeout_last_recovery + HZ*20)))
- pf->tx_timeout_recovery_level = 1;
+ pf->tx_timeout_recovery_level = 1; /* reset after some time */
+ else if (time_before(jiffies,
+ (pf->tx_timeout_last_recovery + netdev->watchdog_timeo)))
+ return; /* don't do any new action before the next timeout */
+
+ if (tx_ring) {
+ head = i40e_get_head(tx_ring);
+ /* Read interrupt register */
+ if (pf->flags & I40E_FLAG_MSIX_ENABLED)
+ val = rd32(&pf->hw,
+ I40E_PFINT_DYN_CTLN(tx_ring->q_vector->v_idx +
+ tx_ring->vsi->base_vector - 1));
+ else
+ val = rd32(&pf->hw, I40E_PFINT_DYN_CTL0);
+
+ netdev_info(netdev, "tx_timeout: VSI_seid: %d, Q %d, NTC: 0x%x, HWB: 0x%x, NTU: 0x%x, TAIL: 0x%x, INT: 0x%x\n",
+ vsi->seid, hung_queue, tx_ring->next_to_clean,
+ head, tx_ring->next_to_use,
+ readl(tx_ring->tail), val);
+ }
+
pf->tx_timeout_last_recovery = jiffies;
- netdev_info(netdev, "tx_timeout recovery level %d\n",
- pf->tx_timeout_recovery_level);
+ netdev_info(netdev, "tx_timeout recovery level %d, hung_queue %d\n",
+ pf->tx_timeout_recovery_level, hung_queue);
switch (pf->tx_timeout_recovery_level) {
- case 0:
- /* disable and re-enable queues for the VSI */
- if (in_interrupt()) {
- set_bit(__I40E_REINIT_REQUESTED, &pf->state);
- set_bit(__I40E_REINIT_REQUESTED, &vsi->state);
- } else {
- i40e_vsi_reinit_locked(vsi);
- }
- break;
case 1:
set_bit(__I40E_PF_RESET_REQUESTED, &pf->state);
break;
break;
default:
netdev_err(netdev, "tx_timeout recovery unsuccessful\n");
- set_bit(__I40E_DOWN_REQUESTED, &pf->state);
- set_bit(__I40E_DOWN_REQUESTED, &vsi->state);
break;
}
+
i40e_service_event_schedule(pf);
pf->tx_timeout_recovery_level++;
}
stats->tx_errors = vsi_stats->tx_errors;
stats->tx_dropped = vsi_stats->tx_dropped;
stats->rx_errors = vsi_stats->rx_errors;
+ stats->rx_dropped = vsi_stats->rx_dropped;
stats->rx_crc_errors = vsi_stats->rx_crc_errors;
stats->rx_length_errors = vsi_stats->rx_length_errors;
memset(&vsi->eth_stats_offsets, 0, sizeof(vsi->eth_stats_offsets));
if (vsi->rx_rings && vsi->rx_rings[0]) {
for (i = 0; i < vsi->num_queue_pairs; i++) {
- memset(&vsi->rx_rings[i]->stats, 0 ,
+ memset(&vsi->rx_rings[i]->stats, 0,
sizeof(vsi->rx_rings[i]->stats));
- memset(&vsi->rx_rings[i]->rx_stats, 0 ,
+ memset(&vsi->rx_rings[i]->rx_stats, 0,
sizeof(vsi->rx_rings[i]->rx_stats));
- memset(&vsi->tx_rings[i]->stats, 0 ,
+ memset(&vsi->tx_rings[i]->stats, 0,
sizeof(vsi->tx_rings[i]->stats));
memset(&vsi->tx_rings[i]->tx_stats, 0,
sizeof(vsi->tx_rings[i]->tx_stats));
struct i40e_hw_port_stats *nsd = &pf->stats;
struct i40e_hw *hw = &pf->hw;
u64 xoff = 0;
- u16 i, v;
if ((hw->fc.current_mode != I40E_FC_FULL) &&
(hw->fc.current_mode != I40E_FC_RX_PAUSE))
if (!(nsd->link_xoff_rx - xoff))
return;
- /* Clear the __I40E_HANG_CHECK_ARMED bit for all Tx rings */
- for (v = 0; v < pf->num_alloc_vsi; v++) {
- struct i40e_vsi *vsi = pf->vsi[v];
-
- if (!vsi || !vsi->tx_rings[0])
- continue;
-
- for (i = 0; i < vsi->num_queue_pairs; i++) {
- struct i40e_ring *ring = vsi->tx_rings[i];
- clear_bit(__I40E_HANG_CHECK_ARMED, &ring->state);
- }
- }
}
/**
bool xoff[I40E_MAX_TRAFFIC_CLASS] = {false};
struct i40e_dcbx_config *dcb_cfg;
struct i40e_hw *hw = &pf->hw;
- u16 i, v;
+ u16 i;
u8 tc;
dcb_cfg = &hw->local_dcbx_config;
for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) {
u64 prio_xoff = nsd->priority_xoff_rx[i];
+
i40e_stat_update32(hw, I40E_GLPRT_PXOFFRXC(hw->port, i),
pf->stat_offsets_loaded,
&osd->priority_xoff_rx[i],
tc = dcb_cfg->etscfg.prioritytable[i];
xoff[tc] = true;
}
-
- /* Clear the __I40E_HANG_CHECK_ARMED bit for Tx rings */
- for (v = 0; v < pf->num_alloc_vsi; v++) {
- struct i40e_vsi *vsi = pf->vsi[v];
-
- if (!vsi || !vsi->tx_rings[0])
- continue;
-
- for (i = 0; i < vsi->num_queue_pairs; i++) {
- struct i40e_ring *ring = vsi->tx_rings[i];
-
- tc = ring->dcb_tc;
- if (xoff[tc])
- clear_bit(__I40E_HANG_CHECK_ARMED,
- &ring->state);
- }
- }
}
/**
u32 rx_page, rx_buf;
u64 bytes, packets;
unsigned int start;
+ u64 tx_linearize;
u64 rx_p, rx_b;
u64 tx_p, tx_b;
u16 q;
*/
rx_b = rx_p = 0;
tx_b = tx_p = 0;
- tx_restart = tx_busy = 0;
+ tx_restart = tx_busy = tx_linearize = 0;
rx_page = 0;
rx_buf = 0;
rcu_read_lock();
tx_p += packets;
tx_restart += p->tx_stats.restart_queue;
tx_busy += p->tx_stats.tx_busy;
+ tx_linearize += p->tx_stats.tx_linearize;
/* Rx queue is part of the same block as Tx queue */
p = &p[1];
rcu_read_unlock();
vsi->tx_restart = tx_restart;
vsi->tx_busy = tx_busy;
+ vsi->tx_linearize = tx_linearize;
vsi->rx_page_failed = rx_page;
vsi->rx_buf_failed = rx_buf;
* so we have to go through all the list in order to make sure
*/
list_for_each_entry(f, &vsi->mac_filter_list, list) {
- if (f->vlan >= 0)
+ if (f->vlan >= 0 || vsi->info.pvid)
return true;
}
* @is_netdev: make sure its a netdev filter, else doesn't matter
*
* Returns ptr to the filter object or NULL when no memory available.
+ *
+ * NOTE: This function is expected to be called with mac_filter_list_lock
+ * being held.
**/
struct i40e_mac_filter *i40e_add_filter(struct i40e_vsi *vsi,
u8 *macaddr, s16 vlan,
* @vlan: the vlan
* @is_vf: make sure it's a VF filter, else doesn't matter
* @is_netdev: make sure it's a netdev filter, else doesn't matter
+ *
+ * NOTE: This function is expected to be called with mac_filter_list_lock
+ * being held.
**/
void i40e_del_filter(struct i40e_vsi *vsi,
u8 *macaddr, s16 vlan,
} else {
/* make sure we don't remove a filter in use by VF or netdev */
int min_f = 0;
+
min_f += (f->is_vf ? 1 : 0);
min_f += (f->is_netdev ? 1 : 0);
if (vsi->type == I40E_VSI_MAIN) {
i40e_status ret;
+
ret = i40e_aq_mac_address_write(&vsi->back->hw,
I40E_AQC_WRITE_TYPE_LAA_WOL,
addr->sa_data, NULL);
element.flags = I40E_AQC_MACVLAN_DEL_PERFECT_MATCH;
i40e_aq_remove_macvlan(&pf->hw, vsi->seid, &element, 1, NULL);
} else {
+ spin_lock_bh(&vsi->mac_filter_list_lock);
i40e_del_filter(vsi, netdev->dev_addr, I40E_VLAN_ANY,
false, false);
+ spin_unlock_bh(&vsi->mac_filter_list_lock);
}
if (ether_addr_equal(addr->sa_data, hw->mac.addr)) {
element.flags = cpu_to_le16(I40E_AQC_MACVLAN_ADD_PERFECT_MATCH);
i40e_aq_add_macvlan(&pf->hw, vsi->seid, &element, 1, NULL);
} else {
+ spin_lock_bh(&vsi->mac_filter_list_lock);
f = i40e_add_filter(vsi, addr->sa_data, I40E_VLAN_ANY,
false, false);
if (f)
f->is_laa = true;
+ spin_unlock_bh(&vsi->mac_filter_list_lock);
}
- i40e_sync_vsi_filters(vsi);
+ i40e_sync_vsi_filters(vsi, false);
ether_addr_copy(netdev->dev_addr, addr->sa_data);
return 0;
struct netdev_hw_addr *mca;
struct netdev_hw_addr *ha;
+ spin_lock_bh(&vsi->mac_filter_list_lock);
+
/* add addr if not already in the filter list */
netdev_for_each_uc_addr(uca, netdev) {
if (!i40e_find_mac(vsi, uca->addr, false, true)) {
/* remove filter if not in netdev list */
list_for_each_entry_safe(f, ftmp, &vsi->mac_filter_list, list) {
- bool found = false;
if (!f->is_netdev)
continue;
- if (is_multicast_ether_addr(f->macaddr)) {
- netdev_for_each_mc_addr(mca, netdev) {
- if (ether_addr_equal(mca->addr, f->macaddr)) {
- found = true;
- break;
- }
- }
- } else {
- netdev_for_each_uc_addr(uca, netdev) {
- if (ether_addr_equal(uca->addr, f->macaddr)) {
- found = true;
- break;
- }
- }
+ netdev_for_each_mc_addr(mca, netdev)
+ if (ether_addr_equal(mca->addr, f->macaddr))
+ goto bottom_of_search_loop;
- for_each_dev_addr(netdev, ha) {
- if (ether_addr_equal(ha->addr, f->macaddr)) {
- found = true;
- break;
- }
- }
- }
- if (!found)
- i40e_del_filter(
- vsi, f->macaddr, I40E_VLAN_ANY, false, true);
+ netdev_for_each_uc_addr(uca, netdev)
+ if (ether_addr_equal(uca->addr, f->macaddr))
+ goto bottom_of_search_loop;
+
+ for_each_dev_addr(netdev, ha)
+ if (ether_addr_equal(ha->addr, f->macaddr))
+ goto bottom_of_search_loop;
+
+ /* f->macaddr wasn't found in uc, mc, or ha list so delete it */
+ i40e_del_filter(vsi, f->macaddr, I40E_VLAN_ANY, false, true);
+
+bottom_of_search_loop:
+ continue;
}
+ spin_unlock_bh(&vsi->mac_filter_list_lock);
/* check for other flag changes */
if (vsi->current_netdev_flags != vsi->netdev->flags) {
}
}
+/**
+ * i40e_mac_filter_entry_clone - Clones a MAC filter entry
+ * @src: source MAC filter entry to be clones
+ *
+ * Returns the pointer to newly cloned MAC filter entry or NULL
+ * in case of error
+ **/
+static struct i40e_mac_filter *i40e_mac_filter_entry_clone(
+ struct i40e_mac_filter *src)
+{
+ struct i40e_mac_filter *f;
+
+ f = kzalloc(sizeof(*f), GFP_ATOMIC);
+ if (!f)
+ return NULL;
+ *f = *src;
+
+ INIT_LIST_HEAD(&f->list);
+
+ return f;
+}
+
+/**
+ * i40e_undo_del_filter_entries - Undo the changes made to MAC filter entries
+ * @vsi: pointer to vsi struct
+ * @from: Pointer to list which contains MAC filter entries - changes to
+ * those entries needs to be undone.
+ *
+ * MAC filter entries from list were slated to be removed from device.
+ **/
+static void i40e_undo_del_filter_entries(struct i40e_vsi *vsi,
+ struct list_head *from)
+{
+ struct i40e_mac_filter *f, *ftmp;
+
+ list_for_each_entry_safe(f, ftmp, from, list) {
+ f->changed = true;
+ /* Move the element back into MAC filter list*/
+ list_move_tail(&f->list, &vsi->mac_filter_list);
+ }
+}
+
+/**
+ * i40e_undo_add_filter_entries - Undo the changes made to MAC filter entries
+ * @vsi: pointer to vsi struct
+ *
+ * MAC filter entries from list were slated to be added from device.
+ **/
+static void i40e_undo_add_filter_entries(struct i40e_vsi *vsi)
+{
+ struct i40e_mac_filter *f, *ftmp;
+
+ list_for_each_entry_safe(f, ftmp, &vsi->mac_filter_list, list) {
+ if (!f->changed && f->counter)
+ f->changed = true;
+ }
+}
+
+/**
+ * i40e_cleanup_add_list - Deletes the element from add list and release
+ * memory
+ * @add_list: Pointer to list which contains MAC filter entries
+ **/
+static void i40e_cleanup_add_list(struct list_head *add_list)
+{
+ struct i40e_mac_filter *f, *ftmp;
+
+ list_for_each_entry_safe(f, ftmp, add_list, list) {
+ list_del(&f->list);
+ kfree(f);
+ }
+}
+
/**
* i40e_sync_vsi_filters - Update the VSI filter list to the HW
* @vsi: ptr to the VSI
+ * @grab_rtnl: whether RTNL needs to be grabbed
*
* Push any outstanding VSI filter changes through the AdminQ.
*
* Returns 0 or error value
**/
-int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
+int i40e_sync_vsi_filters(struct i40e_vsi *vsi, bool grab_rtnl)
{
- struct i40e_mac_filter *f, *ftmp;
+ struct list_head tmp_del_list, tmp_add_list;
+ struct i40e_mac_filter *f, *ftmp, *fclone;
bool promisc_forced_on = false;
bool add_happened = false;
int filter_list_len = 0;
u32 changed_flags = 0;
+ bool err_cond = false;
i40e_status ret = 0;
struct i40e_pf *pf;
int num_add = 0;
vsi->current_netdev_flags = vsi->netdev->flags;
}
+ INIT_LIST_HEAD(&tmp_del_list);
+ INIT_LIST_HEAD(&tmp_add_list);
+
if (vsi->flags & I40E_VSI_FLAG_FILTER_CHANGED) {
vsi->flags &= ~I40E_VSI_FLAG_FILTER_CHANGED;
- filter_list_len = pf->hw.aq.asq_buf_size /
- sizeof(struct i40e_aqc_remove_macvlan_element_data);
- del_list = kcalloc(filter_list_len,
- sizeof(struct i40e_aqc_remove_macvlan_element_data),
- GFP_KERNEL);
- if (!del_list)
- return -ENOMEM;
-
+ spin_lock_bh(&vsi->mac_filter_list_lock);
list_for_each_entry_safe(f, ftmp, &vsi->mac_filter_list, list) {
if (!f->changed)
continue;
if (f->counter != 0)
continue;
f->changed = false;
+
+ /* Move the element into temporary del_list */
+ list_move_tail(&f->list, &tmp_del_list);
+ }
+
+ list_for_each_entry_safe(f, ftmp, &vsi->mac_filter_list, list) {
+ if (!f->changed)
+ continue;
+
+ if (f->counter == 0)
+ continue;
+ f->changed = false;
+
+ /* Clone MAC filter entry and add into temporary list */
+ fclone = i40e_mac_filter_entry_clone(f);
+ if (!fclone) {
+ err_cond = true;
+ break;
+ }
+ list_add_tail(&fclone->list, &tmp_add_list);
+ }
+
+ /* if failed to clone MAC filter entry - undo */
+ if (err_cond) {
+ i40e_undo_del_filter_entries(vsi, &tmp_del_list);
+ i40e_undo_add_filter_entries(vsi);
+ }
+ spin_unlock_bh(&vsi->mac_filter_list_lock);
+
+ if (err_cond)
+ i40e_cleanup_add_list(&tmp_add_list);
+ }
+
+ /* Now process 'del_list' outside the lock */
+ if (!list_empty(&tmp_del_list)) {
+ filter_list_len = pf->hw.aq.asq_buf_size /
+ sizeof(struct i40e_aqc_remove_macvlan_element_data);
+ del_list = kcalloc(filter_list_len,
+ sizeof(struct i40e_aqc_remove_macvlan_element_data),
+ GFP_KERNEL);
+ if (!del_list) {
+ i40e_cleanup_add_list(&tmp_add_list);
+
+ /* Undo VSI's MAC filter entry element updates */
+ spin_lock_bh(&vsi->mac_filter_list_lock);
+ i40e_undo_del_filter_entries(vsi, &tmp_del_list);
+ i40e_undo_add_filter_entries(vsi);
+ spin_unlock_bh(&vsi->mac_filter_list_lock);
+ return -ENOMEM;
+ }
+
+ list_for_each_entry_safe(f, ftmp, &tmp_del_list, list) {
cmd_flags = 0;
/* add to delete list */
del_list[num_del].flags = cmd_flags;
num_del++;
- /* unlink from filter list */
- list_del(&f->list);
- kfree(f);
-
/* flush a full buffer */
if (num_del == filter_list_len) {
ret = i40e_aq_remove_macvlan(&pf->hw,
memset(del_list, 0, sizeof(*del_list));
if (ret && aq_err != I40E_AQ_RC_ENOENT)
- dev_info(&pf->pdev->dev,
- "ignoring delete macvlan error, err %s, aq_err %s while flushing a full buffer\n",
- i40e_stat_str(&pf->hw, ret),
- i40e_aq_str(&pf->hw, aq_err));
+ dev_err(&pf->pdev->dev,
+ "ignoring delete macvlan error, err %s, aq_err %s while flushing a full buffer\n",
+ i40e_stat_str(&pf->hw, ret),
+ i40e_aq_str(&pf->hw, aq_err));
}
+ /* Release memory for MAC filter entries which were
+ * synced up with HW.
+ */
+ list_del(&f->list);
+ kfree(f);
}
+
if (num_del) {
ret = i40e_aq_remove_macvlan(&pf->hw, vsi->seid,
del_list, num_del, NULL);
kfree(del_list);
del_list = NULL;
+ }
+
+ if (!list_empty(&tmp_add_list)) {
/* do all the adds now */
filter_list_len = pf->hw.aq.asq_buf_size /
add_list = kcalloc(filter_list_len,
sizeof(struct i40e_aqc_add_macvlan_element_data),
GFP_KERNEL);
- if (!add_list)
+ if (!add_list) {
+ /* Purge element from temporary lists */
+ i40e_cleanup_add_list(&tmp_add_list);
+
+ /* Undo add filter entries from VSI MAC filter list */
+ spin_lock_bh(&vsi->mac_filter_list_lock);
+ i40e_undo_add_filter_entries(vsi);
+ spin_unlock_bh(&vsi->mac_filter_list_lock);
return -ENOMEM;
+ }
- list_for_each_entry_safe(f, ftmp, &vsi->mac_filter_list, list) {
- if (!f->changed)
- continue;
+ list_for_each_entry_safe(f, ftmp, &tmp_add_list, list) {
- if (f->counter == 0)
- continue;
- f->changed = false;
add_happened = true;
cmd_flags = 0;
break;
memset(add_list, 0, sizeof(*add_list));
}
+ /* Entries from tmp_add_list were cloned from MAC
+ * filter list, hence clean those cloned entries
+ */
+ list_del(&f->list);
+ kfree(f);
}
+
if (num_add) {
ret = i40e_aq_add_macvlan(&pf->hw, vsi->seid,
add_list, num_add, NULL);
/* check for changes in promiscuous modes */
if (changed_flags & IFF_ALLMULTI) {
bool cur_multipromisc;
+
cur_multipromisc = !!(vsi->current_netdev_flags & IFF_ALLMULTI);
ret = i40e_aq_set_vsi_multicast_promiscuous(&vsi->back->hw,
vsi->seid,
}
if ((changed_flags & IFF_PROMISC) || promisc_forced_on) {
bool cur_promisc;
+
cur_promisc = (!!(vsi->current_netdev_flags & IFF_PROMISC) ||
test_bit(__I40E_FILTER_OVERFLOW_PROMISC,
&vsi->state));
*/
if (pf->cur_promisc != cur_promisc) {
pf->cur_promisc = cur_promisc;
- i40e_do_reset_safe(pf,
+ if (grab_rtnl)
+ i40e_do_reset_safe(pf,
+ BIT(__I40E_PF_RESET_REQUESTED));
+ else
+ i40e_do_reset(pf,
BIT(__I40E_PF_RESET_REQUESTED));
}
} else {
for (v = 0; v < pf->num_alloc_vsi; v++) {
if (pf->vsi[v] &&
(pf->vsi[v]->flags & I40E_VSI_FLAG_FILTER_CHANGED))
- i40e_sync_vsi_filters(pf->vsi[v]);
+ i40e_sync_vsi_filters(pf->vsi[v], true);
}
}
is_vf = (vsi->type == I40E_VSI_SRIOV);
is_netdev = !!(vsi->netdev);
+ /* Locked once because all functions invoked below iterates list*/
+ spin_lock_bh(&vsi->mac_filter_list_lock);
+
if (is_netdev) {
add_f = i40e_add_filter(vsi, vsi->netdev->dev_addr, vid,
is_vf, is_netdev);
dev_info(&vsi->back->pdev->dev,
"Could not add vlan filter %d for %pM\n",
vid, vsi->netdev->dev_addr);
+ spin_unlock_bh(&vsi->mac_filter_list_lock);
return -ENOMEM;
}
}
dev_info(&vsi->back->pdev->dev,
"Could not add vlan filter %d for %pM\n",
vid, f->macaddr);
+ spin_unlock_bh(&vsi->mac_filter_list_lock);
return -ENOMEM;
}
}
dev_info(&vsi->back->pdev->dev,
"Could not add filter 0 for %pM\n",
vsi->netdev->dev_addr);
+ spin_unlock_bh(&vsi->mac_filter_list_lock);
return -ENOMEM;
}
}
/* Do not assume that I40E_VLAN_ANY should be reset to VLAN 0 */
if (vid > 0 && !vsi->info.pvid) {
list_for_each_entry(f, &vsi->mac_filter_list, list) {
- if (i40e_find_filter(vsi, f->macaddr, I40E_VLAN_ANY,
- is_vf, is_netdev)) {
- i40e_del_filter(vsi, f->macaddr, I40E_VLAN_ANY,
- is_vf, is_netdev);
- add_f = i40e_add_filter(vsi, f->macaddr,
- 0, is_vf, is_netdev);
- if (!add_f) {
- dev_info(&vsi->back->pdev->dev,
- "Could not add filter 0 for %pM\n",
- f->macaddr);
- return -ENOMEM;
- }
+ if (!i40e_find_filter(vsi, f->macaddr, I40E_VLAN_ANY,
+ is_vf, is_netdev))
+ continue;
+ i40e_del_filter(vsi, f->macaddr, I40E_VLAN_ANY,
+ is_vf, is_netdev);
+ add_f = i40e_add_filter(vsi, f->macaddr,
+ 0, is_vf, is_netdev);
+ if (!add_f) {
+ dev_info(&vsi->back->pdev->dev,
+ "Could not add filter 0 for %pM\n",
+ f->macaddr);
+ spin_unlock_bh(&vsi->mac_filter_list_lock);
+ return -ENOMEM;
}
}
}
+ /* Make sure to release before sync_vsi_filter because that
+ * function will lock/unlock as necessary
+ */
+ spin_unlock_bh(&vsi->mac_filter_list_lock);
+
if (test_bit(__I40E_DOWN, &vsi->back->state) ||
test_bit(__I40E_RESET_RECOVERY_PENDING, &vsi->back->state))
return 0;
- return i40e_sync_vsi_filters(vsi);
+ return i40e_sync_vsi_filters(vsi, false);
}
/**
is_vf = (vsi->type == I40E_VSI_SRIOV);
is_netdev = !!(netdev);
+ /* Locked once because all functions invoked below iterates list */
+ spin_lock_bh(&vsi->mac_filter_list_lock);
+
if (is_netdev)
i40e_del_filter(vsi, netdev->dev_addr, vid, is_vf, is_netdev);
dev_info(&vsi->back->pdev->dev,
"Could not add filter %d for %pM\n",
I40E_VLAN_ANY, netdev->dev_addr);
+ spin_unlock_bh(&vsi->mac_filter_list_lock);
return -ENOMEM;
}
}
list_for_each_entry(f, &vsi->mac_filter_list, list) {
i40e_del_filter(vsi, f->macaddr, 0, is_vf, is_netdev);
add_f = i40e_add_filter(vsi, f->macaddr, I40E_VLAN_ANY,
- is_vf, is_netdev);
+ is_vf, is_netdev);
if (!add_f) {
dev_info(&vsi->back->pdev->dev,
"Could not add filter %d for %pM\n",
I40E_VLAN_ANY, f->macaddr);
+ spin_unlock_bh(&vsi->mac_filter_list_lock);
return -ENOMEM;
}
}
}
+ /* Make sure to release before sync_vsi_filter because that
+ * function with lock/unlock as necessary
+ */
+ spin_unlock_bh(&vsi->mac_filter_list_lock);
+
if (test_bit(__I40E_DOWN, &vsi->back->state) ||
test_bit(__I40E_RESET_RECOVERY_PENDING, &vsi->back->state))
return 0;
- return i40e_sync_vsi_filters(vsi);
+ return i40e_sync_vsi_filters(vsi, false);
}
/**
wr32(hw, I40E_QTX_CTL(pf_q), qtx_ctl);
i40e_flush(hw);
- clear_bit(__I40E_HANG_CHECK_ARMED, &ring->state);
-
/* cache tail off for easier writes later */
ring->tail = hw->hw_addr + I40E_QTX_TAIL(pf_q);
static void i40e_vsi_configure_msix(struct i40e_vsi *vsi)
{
struct i40e_pf *pf = vsi->back;
- struct i40e_q_vector *q_vector;
struct i40e_hw *hw = &pf->hw;
u16 vector;
int i, q;
- u32 val;
u32 qp;
/* The interrupt indexing is offset by 1 in the PFINT_ITRn
qp = vsi->base_queue;
vector = vsi->base_vector;
for (i = 0; i < vsi->num_q_vectors; i++, vector++) {
- q_vector = vsi->q_vectors[i];
+ struct i40e_q_vector *q_vector = vsi->q_vectors[i];
+
+ q_vector->itr_countdown = ITR_COUNTDOWN_START;
q_vector->rx.itr = ITR_TO_REG(vsi->rx_itr_setting);
q_vector->rx.latency_range = I40E_LOW_LATENCY;
wr32(hw, I40E_PFINT_ITRN(I40E_RX_ITR, vector - 1),
q_vector->tx.latency_range = I40E_LOW_LATENCY;
wr32(hw, I40E_PFINT_ITRN(I40E_TX_ITR, vector - 1),
q_vector->tx.itr);
+ wr32(hw, I40E_PFINT_RATEN(vector - 1),
+ INTRL_USEC_TO_REG(vsi->int_rate_limit));
/* Linked list for the queuepairs assigned to this vector */
wr32(hw, I40E_PFINT_LNKLSTN(vector - 1), qp);
for (q = 0; q < q_vector->num_ringpairs; q++) {
+ u32 val;
+
val = I40E_QINT_RQCTL_CAUSE_ENA_MASK |
(I40E_RX_ITR << I40E_QINT_RQCTL_ITR_INDX_SHIFT) |
(vector << I40E_QINT_RQCTL_MSIX_INDX_SHIFT) |
u32 val;
/* set the ITR configuration */
+ q_vector->itr_countdown = ITR_COUNTDOWN_START;
q_vector->rx.itr = ITR_TO_REG(vsi->rx_itr_setting);
q_vector->rx.latency_range = I40E_LOW_LATENCY;
wr32(hw, I40E_PFINT_ITR0(I40E_RX_ITR), q_vector->rx.itr);
i40e_flush(hw);
}
-/**
- * i40e_irq_dynamic_enable - Enable default interrupt generation settings
- * @vsi: pointer to a vsi
- * @vector: enable a particular Hw Interrupt vector
- **/
-void i40e_irq_dynamic_enable(struct i40e_vsi *vsi, int vector)
-{
- struct i40e_pf *pf = vsi->back;
- struct i40e_hw *hw = &pf->hw;
- u32 val;
-
- val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
- I40E_PFINT_DYN_CTLN_CLEARPBA_MASK |
- (I40E_ITR_NONE << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT);
- wr32(hw, I40E_PFINT_DYN_CTLN(vector - 1), val);
- /* skip the flush */
-}
-
/**
* i40e_irq_dynamic_disable - Disable default interrupt generation settings
* @vsi: pointer to a vsi
if (!q_vector->tx.ring && !q_vector->rx.ring)
return IRQ_HANDLED;
- napi_schedule(&q_vector->napi);
+ napi_schedule_irqoff(&q_vector->napi);
return IRQ_HANDLED;
}
q_vector);
if (err) {
dev_info(&pf->pdev->dev,
- "%s: request_irq failed, error: %d\n",
- __func__, err);
+ "MSIX request_irq failed, error: %d\n", err);
goto free_queue_irqs;
}
/* assign the mask for this irq */
int i;
if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
- for (i = vsi->base_vector;
- i < (vsi->num_q_vectors + vsi->base_vector); i++)
+ for (i = 0; i < vsi->num_q_vectors; i++)
i40e_irq_dynamic_enable(vsi, i);
} else {
i40e_irq_dynamic_enable_icr0(pf);
/* only q0 is used in MSI/Legacy mode, and none are used in MSIX */
if (icr0 & I40E_PFINT_ICR0_QUEUE_0_MASK) {
+ struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
+ struct i40e_q_vector *q_vector = vsi->q_vectors[0];
/* temporarily disable queue cause for NAPI processing */
u32 qval = rd32(hw, I40E_QINT_RQCTL(0));
+
qval &= ~I40E_QINT_RQCTL_CAUSE_ENA_MASK;
wr32(hw, I40E_QINT_RQCTL(0), qval);
wr32(hw, I40E_QINT_TQCTL(0), qval);
if (!test_bit(__I40E_DOWN, &pf->state))
- napi_schedule(&pf->vsi[pf->lan_vsi]->q_vectors[0]->napi);
+ napi_schedule_irqoff(&q_vector->napi);
}
if (icr0 & I40E_PFINT_ICR0_ADMINQ_MASK) {
i += tx_ring->count;
tx_ring->next_to_clean = i;
- if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) {
- i40e_irq_dynamic_enable(vsi,
- tx_ring->q_vector->v_idx + vsi->base_vector);
- }
+ if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED)
+ i40e_irq_dynamic_enable(vsi, tx_ring->q_vector->v_idx);
+
return budget > 0;
}
if (test_bit(__I40E_DOWN, &vsi->state))
return;
- pf->flags |= I40E_FLAG_IN_NETPOLL;
if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
for (i = 0; i < vsi->num_q_vectors; i++)
i40e_msix_clean_rings(0, vsi->q_vectors[i]);
} else {
i40e_intr(pf->pdev->irq, netdev);
}
- pf->flags &= ~I40E_FLAG_IN_NETPOLL;
}
#endif
ret = i40e_pf_txq_wait(pf, pf_q, enable);
if (ret) {
dev_info(&pf->pdev->dev,
- "%s: VSI seid %d Tx ring %d %sable timeout\n",
- __func__, vsi->seid, pf_q,
- (enable ? "en" : "dis"));
+ "VSI seid %d Tx ring %d %sable timeout\n",
+ vsi->seid, pf_q, (enable ? "en" : "dis"));
break;
}
}
ret = i40e_pf_rxq_wait(pf, pf_q, enable);
if (ret) {
dev_info(&pf->pdev->dev,
- "%s: VSI seid %d Rx ring %d %sable timeout\n",
- __func__, vsi->seid, pf_q,
- (enable ? "en" : "dis"));
+ "VSI seid %d Rx ring %d %sable timeout\n",
+ vsi->seid, pf_q, (enable ? "en" : "dis"));
break;
}
}
if ((test_bit(__I40E_PORT_TX_SUSPENDED, &vsi->back->state)) &&
vsi->type == I40E_VSI_FCOE) {
dev_dbg(&vsi->back->pdev->dev,
- "%s: VSI seid %d skipping FCoE VSI disable\n",
- __func__, vsi->seid);
+ "VSI seid %d skipping FCoE VSI disable\n", vsi->seid);
return;
}
set_bit(__I40E_NEEDS_RESTART, &vsi->state);
- if (vsi->netdev && netif_running(vsi->netdev)) {
+ if (vsi->netdev && netif_running(vsi->netdev))
vsi->netdev->netdev_ops->ndo_stop(vsi->netdev);
- } else {
+ else
i40e_vsi_close(vsi);
- }
}
/**
ret = i40e_pf_txq_wait(pf, pf_q, false);
if (ret) {
dev_info(&pf->pdev->dev,
- "%s: VSI seid %d Tx ring %d disable timeout\n",
- __func__, vsi->seid, pf_q);
+ "VSI seid %d Tx ring %d disable timeout\n",
+ vsi->seid, pf_q);
return ret;
}
}
}
#endif
+
+/**
+ * i40e_detect_recover_hung_queue - Function to detect and recover hung_queue
+ * @q_idx: TX queue number
+ * @vsi: Pointer to VSI struct
+ *
+ * This function checks specified queue for given VSI. Detects hung condition.
+ * Sets hung bit since it is two step process. Before next run of service task
+ * if napi_poll runs, it reset 'hung' bit for respective q_vector. If not,
+ * hung condition remain unchanged and during subsequent run, this function
+ * issues SW interrupt to recover from hung condition.
+ **/
+static void i40e_detect_recover_hung_queue(int q_idx, struct i40e_vsi *vsi)
+{
+ struct i40e_ring *tx_ring = NULL;
+ struct i40e_pf *pf;
+ u32 head, val, tx_pending;
+ int i;
+
+ pf = vsi->back;
+
+ /* now that we have an index, find the tx_ring struct */
+ for (i = 0; i < vsi->num_queue_pairs; i++) {
+ if (vsi->tx_rings[i] && vsi->tx_rings[i]->desc) {
+ if (q_idx == vsi->tx_rings[i]->queue_index) {
+ tx_ring = vsi->tx_rings[i];
+ break;
+ }
+ }
+ }
+
+ if (!tx_ring)
+ return;
+
+ /* Read interrupt register */
+ if (pf->flags & I40E_FLAG_MSIX_ENABLED)
+ val = rd32(&pf->hw,
+ I40E_PFINT_DYN_CTLN(tx_ring->q_vector->v_idx +
+ tx_ring->vsi->base_vector - 1));
+ else
+ val = rd32(&pf->hw, I40E_PFINT_DYN_CTL0);
+
+ head = i40e_get_head(tx_ring);
+
+ tx_pending = i40e_get_tx_pending(tx_ring);
+
+ /* Interrupts are disabled and TX pending is non-zero,
+ * trigger the SW interrupt (don't wait). Worst case
+ * there will be one extra interrupt which may result
+ * into not cleaning any queues because queues are cleaned.
+ */
+ if (tx_pending && (!(val & I40E_PFINT_DYN_CTLN_INTENA_MASK)))
+ i40e_force_wb(vsi, tx_ring->q_vector);
+}
+
+/**
+ * i40e_detect_recover_hung - Function to detect and recover hung_queues
+ * @pf: pointer to PF struct
+ *
+ * LAN VSI has netdev and netdev has TX queues. This function is to check
+ * each of those TX queues if they are hung, trigger recovery by issuing
+ * SW interrupt.
+ **/
+static void i40e_detect_recover_hung(struct i40e_pf *pf)
+{
+ struct net_device *netdev;
+ struct i40e_vsi *vsi;
+ int i;
+
+ /* Only for LAN VSI */
+ vsi = pf->vsi[pf->lan_vsi];
+
+ if (!vsi)
+ return;
+
+ /* Make sure, VSI state is not DOWN/RECOVERY_PENDING */
+ if (test_bit(__I40E_DOWN, &vsi->back->state) ||
+ test_bit(__I40E_RESET_RECOVERY_PENDING, &vsi->back->state))
+ return;
+
+ /* Make sure type is MAIN VSI */
+ if (vsi->type != I40E_VSI_MAIN)
+ return;
+
+ netdev = vsi->netdev;
+ if (!netdev)
+ return;
+
+ /* Bail out if netif_carrier is not OK */
+ if (!netif_carrier_ok(netdev))
+ return;
+
+ /* Go thru' TX queues for netdev */
+ for (i = 0; i < netdev->num_tx_queues; i++) {
+ struct netdev_queue *q;
+
+ q = netdev_get_tx_queue(netdev, i);
+ if (q)
+ i40e_detect_recover_hung_queue(i, vsi);
+ }
+}
+
/**
* i40e_get_iscsi_tc_map - Return TC map for iSCSI APP
* @pf: pointer to PF
* i40e_print_link_message - print link up or down
* @vsi: the VSI for which link needs a message
*/
-static void i40e_print_link_message(struct i40e_vsi *vsi, bool isup)
+void i40e_print_link_message(struct i40e_vsi *vsi, bool isup)
{
- char speed[SPEED_SIZE] = "Unknown";
- char fc[FC_SIZE] = "RX/TX";
+ char *speed = "Unknown";
+ char *fc = "Unknown";
+ if (vsi->current_isup == isup)
+ return;
+ vsi->current_isup = isup;
if (!isup) {
netdev_info(vsi->netdev, "NIC Link is Down\n");
return;
switch (vsi->back->hw.phy.link_info.link_speed) {
case I40E_LINK_SPEED_40GB:
- strlcpy(speed, "40 Gbps", SPEED_SIZE);
+ speed = "40 G";
break;
case I40E_LINK_SPEED_20GB:
- strncpy(speed, "20 Gbps", SPEED_SIZE);
+ speed = "20 G";
break;
case I40E_LINK_SPEED_10GB:
- strlcpy(speed, "10 Gbps", SPEED_SIZE);
+ speed = "10 G";
break;
case I40E_LINK_SPEED_1GB:
- strlcpy(speed, "1000 Mbps", SPEED_SIZE);
+ speed = "1000 M";
break;
case I40E_LINK_SPEED_100MB:
- strncpy(speed, "100 Mbps", SPEED_SIZE);
+ speed = "100 M";
break;
default:
break;
switch (vsi->back->hw.fc.current_mode) {
case I40E_FC_FULL:
- strlcpy(fc, "RX/TX", FC_SIZE);
+ fc = "RX/TX";
break;
case I40E_FC_TX_PAUSE:
- strlcpy(fc, "TX", FC_SIZE);
+ fc = "TX";
break;
case I40E_FC_RX_PAUSE:
- strlcpy(fc, "RX", FC_SIZE);
+ fc = "RX";
break;
default:
- strlcpy(fc, "None", FC_SIZE);
+ fc = "None";
break;
}
- netdev_info(vsi->netdev, "NIC Link is Up %s Full Duplex, Flow Control: %s\n",
+ netdev_info(vsi->netdev, "NIC Link is Up %sbps Full Duplex, Flow Control: %s\n",
speed, fc);
}
"VSI reinit requested\n");
for (v = 0; v < pf->num_alloc_vsi; v++) {
struct i40e_vsi *vsi = pf->vsi[v];
+
if (vsi != NULL &&
test_bit(__I40E_REINIT_REQUESTED, &vsi->state)) {
i40e_vsi_reinit_locked(pf->vsi[v]);
clear_bit(__I40E_REINIT_REQUESTED, &vsi->state);
}
}
-
- /* no further action needed, so return now */
- return;
} else if (reset_flags & BIT_ULL(__I40E_DOWN_REQUESTED)) {
int v;
dev_info(&pf->pdev->dev, "VSI down requested\n");
for (v = 0; v < pf->num_alloc_vsi; v++) {
struct i40e_vsi *vsi = pf->vsi[v];
+
if (vsi != NULL &&
test_bit(__I40E_DOWN_REQUESTED, &vsi->state)) {
set_bit(__I40E_DOWN, &vsi->state);
clear_bit(__I40E_DOWN_REQUESTED, &vsi->state);
}
}
-
- /* no further action needed, so return now */
- return;
} else {
dev_info(&pf->pdev->dev,
"bad reset request 0x%08x\n", reset_flags);
- return;
}
}
dev_dbg(&pf->pdev->dev, "APP Table change detected.\n");
}
- dev_dbg(&pf->pdev->dev, "%s: need_reconfig=%d\n", __func__,
- need_reconfig);
+ dev_dbg(&pf->pdev->dev, "dcb need_reconfig=%d\n", need_reconfig);
return need_reconfig;
}
/* Ignore if event is not for Nearest Bridge */
type = ((mib->type >> I40E_AQ_LLDP_BRIDGE_TYPE_SHIFT)
& I40E_AQ_LLDP_BRIDGE_TYPE_MASK);
- dev_dbg(&pf->pdev->dev,
- "%s: LLDP event mib bridge type 0x%x\n", __func__, type);
+ dev_dbg(&pf->pdev->dev, "LLDP event mib bridge type 0x%x\n", type);
if (type != I40E_AQ_LLDP_BRIDGE_TYPE_NEAREST_BRIDGE)
return ret;
/* Check MIB Type and return if event for Remote MIB update */
type = mib->type & I40E_AQ_LLDP_MIB_TYPE_MASK;
dev_dbg(&pf->pdev->dev,
- "%s: LLDP event mib type %s\n", __func__,
- type ? "remote" : "local");
+ "LLDP event mib type %s\n", type ? "remote" : "local");
if (type == I40E_AQ_LLDP_MIB_REMOTE) {
/* Update the remote cached instance and return */
ret = i40e_aq_get_dcb_config(hw, I40E_AQ_LLDP_MIB_REMOTE,
**/
void i40e_fdir_check_and_reenable(struct i40e_pf *pf)
{
+ struct i40e_fdir_filter *filter;
u32 fcnt_prog, fcnt_avail;
+ struct hlist_node *node;
if (test_bit(__I40E_FD_FLUSH_REQUESTED, &pf->state))
return;
dev_info(&pf->pdev->dev, "ATR is being enabled since we have space in the table now\n");
}
}
+
+ /* if hw had a problem adding a filter, delete it */
+ if (pf->fd_inv > 0) {
+ hlist_for_each_entry_safe(filter, node,
+ &pf->fdir_filter_list, fdir_node) {
+ if (filter->fd_id == pf->fd_inv) {
+ hlist_del(&filter->fdir_node);
+ kfree(filter);
+ pf->fdir_pf_active_filters--;
+ }
+ }
+ }
}
#define I40E_MIN_FD_FLUSH_INTERVAL 10
if (!(pf->flags & (I40E_FLAG_FD_SB_ENABLED | I40E_FLAG_FD_ATR_ENABLED)))
return;
- if (time_after(jiffies, pf->fd_flush_timestamp +
- (I40E_MIN_FD_FLUSH_INTERVAL * HZ))) {
- /* If the flush is happening too quick and we have mostly
- * SB rules we should not re-enable ATR for some time.
- */
- min_flush_time = pf->fd_flush_timestamp
- + (I40E_MIN_FD_FLUSH_SB_ATR_UNSTABLE * HZ);
- fd_room = pf->fdir_pf_filter_count - pf->fdir_pf_active_filters;
+ if (!time_after(jiffies, pf->fd_flush_timestamp +
+ (I40E_MIN_FD_FLUSH_INTERVAL * HZ)))
+ return;
- if (!(time_after(jiffies, min_flush_time)) &&
- (fd_room < I40E_FDIR_BUFFER_HEAD_ROOM_FOR_ATR)) {
- if (I40E_DEBUG_FD & pf->hw.debug_mask)
- dev_info(&pf->pdev->dev, "ATR disabled, not enough FD filter space.\n");
- disable_atr = true;
- }
+ /* If the flush is happening too quick and we have mostly SB rules we
+ * should not re-enable ATR for some time.
+ */
+ min_flush_time = pf->fd_flush_timestamp +
+ (I40E_MIN_FD_FLUSH_SB_ATR_UNSTABLE * HZ);
+ fd_room = pf->fdir_pf_filter_count - pf->fdir_pf_active_filters;
- pf->fd_flush_timestamp = jiffies;
- pf->flags &= ~I40E_FLAG_FD_ATR_ENABLED;
- /* flush all filters */
- wr32(&pf->hw, I40E_PFQF_CTL_1,
- I40E_PFQF_CTL_1_CLEARFDTABLE_MASK);
- i40e_flush(&pf->hw);
- pf->fd_flush_cnt++;
- pf->fd_add_err = 0;
- do {
- /* Check FD flush status every 5-6msec */
- usleep_range(5000, 6000);
- reg = rd32(&pf->hw, I40E_PFQF_CTL_1);
- if (!(reg & I40E_PFQF_CTL_1_CLEARFDTABLE_MASK))
- break;
- } while (flush_wait_retry--);
- if (reg & I40E_PFQF_CTL_1_CLEARFDTABLE_MASK) {
- dev_warn(&pf->pdev->dev, "FD table did not flush, needs more time\n");
- } else {
- /* replay sideband filters */
- i40e_fdir_filter_restore(pf->vsi[pf->lan_vsi]);
- if (!disable_atr)
- pf->flags |= I40E_FLAG_FD_ATR_ENABLED;
- clear_bit(__I40E_FD_FLUSH_REQUESTED, &pf->state);
- if (I40E_DEBUG_FD & pf->hw.debug_mask)
- dev_info(&pf->pdev->dev, "FD Filter table flushed and FD-SB replayed.\n");
- }
+ if (!(time_after(jiffies, min_flush_time)) &&
+ (fd_room < I40E_FDIR_BUFFER_HEAD_ROOM_FOR_ATR)) {
+ if (I40E_DEBUG_FD & pf->hw.debug_mask)
+ dev_info(&pf->pdev->dev, "ATR disabled, not enough FD filter space.\n");
+ disable_atr = true;
+ }
+
+ pf->fd_flush_timestamp = jiffies;
+ pf->flags &= ~I40E_FLAG_FD_ATR_ENABLED;
+ /* flush all filters */
+ wr32(&pf->hw, I40E_PFQF_CTL_1,
+ I40E_PFQF_CTL_1_CLEARFDTABLE_MASK);
+ i40e_flush(&pf->hw);
+ pf->fd_flush_cnt++;
+ pf->fd_add_err = 0;
+ do {
+ /* Check FD flush status every 5-6msec */
+ usleep_range(5000, 6000);
+ reg = rd32(&pf->hw, I40E_PFQF_CTL_1);
+ if (!(reg & I40E_PFQF_CTL_1_CLEARFDTABLE_MASK))
+ break;
+ } while (flush_wait_retry--);
+ if (reg & I40E_PFQF_CTL_1_CLEARFDTABLE_MASK) {
+ dev_warn(&pf->pdev->dev, "FD table did not flush, needs more time\n");
+ } else {
+ /* replay sideband filters */
+ i40e_fdir_filter_restore(pf->vsi[pf->lan_vsi]);
+ if (!disable_atr)
+ pf->flags |= I40E_FLAG_FD_ATR_ENABLED;
+ clear_bit(__I40E_FD_FLUSH_REQUESTED, &pf->state);
+ if (I40E_DEBUG_FD & pf->hw.debug_mask)
+ dev_info(&pf->pdev->dev, "FD Filter table flushed and FD-SB replayed.\n");
}
+
}
/**
**/
static void i40e_link_event(struct i40e_pf *pf)
{
- bool new_link, old_link;
struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
u8 new_link_speed, old_link_speed;
+ i40e_status status;
+ bool new_link, old_link;
/* set this to force the get_link_status call to refresh state */
pf->hw.phy.get_link_info = true;
old_link = (pf->hw.phy.link_info_old.link_info & I40E_AQ_LINK_UP);
- new_link = i40e_get_link_status(&pf->hw);
+
+ status = i40e_get_link_status(&pf->hw, &new_link);
+ if (status) {
+ dev_dbg(&pf->pdev->dev, "couldn't get link state, status: %d\n",
+ status);
+ return;
+ }
+
old_link_speed = pf->hw.phy.link_info_old.link_speed;
new_link_speed = pf->hw.phy.link_info.link_speed;
i40e_ptp_set_increment(pf);
}
-/**
- * i40e_check_hang_subtask - Check for hung queues and dropped interrupts
- * @pf: board private structure
- *
- * Set the per-queue flags to request a check for stuck queues in the irq
- * clean functions, then force interrupts to be sure the irq clean is called.
- **/
-static void i40e_check_hang_subtask(struct i40e_pf *pf)
-{
- int i, v;
-
- /* If we're down or resetting, just bail */
- if (test_bit(__I40E_DOWN, &pf->state) ||
- test_bit(__I40E_CONFIG_BUSY, &pf->state))
- return;
-
- /* for each VSI/netdev
- * for each Tx queue
- * set the check flag
- * for each q_vector
- * force an interrupt
- */
- for (v = 0; v < pf->num_alloc_vsi; v++) {
- struct i40e_vsi *vsi = pf->vsi[v];
- int armed = 0;
-
- if (!pf->vsi[v] ||
- test_bit(__I40E_DOWN, &vsi->state) ||
- (vsi->netdev && !netif_carrier_ok(vsi->netdev)))
- continue;
-
- for (i = 0; i < vsi->num_queue_pairs; i++) {
- set_check_for_tx_hang(vsi->tx_rings[i]);
- if (test_bit(__I40E_HANG_CHECK_ARMED,
- &vsi->tx_rings[i]->state))
- armed++;
- }
-
- if (armed) {
- if (!(pf->flags & I40E_FLAG_MSIX_ENABLED)) {
- wr32(&vsi->back->hw, I40E_PFINT_DYN_CTL0,
- (I40E_PFINT_DYN_CTL0_INTENA_MASK |
- I40E_PFINT_DYN_CTL0_SWINT_TRIG_MASK |
- I40E_PFINT_DYN_CTL0_ITR_INDX_MASK |
- I40E_PFINT_DYN_CTL0_SW_ITR_INDX_ENA_MASK |
- I40E_PFINT_DYN_CTL0_SW_ITR_INDX_MASK));
- } else {
- u16 vec = vsi->base_vector - 1;
- u32 val = (I40E_PFINT_DYN_CTLN_INTENA_MASK |
- I40E_PFINT_DYN_CTLN_SWINT_TRIG_MASK |
- I40E_PFINT_DYN_CTLN_ITR_INDX_MASK |
- I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_MASK |
- I40E_PFINT_DYN_CTLN_SW_ITR_INDX_MASK);
- for (i = 0; i < vsi->num_q_vectors; i++, vec++)
- wr32(&vsi->back->hw,
- I40E_PFINT_DYN_CTLN(vec), val);
- }
- i40e_flush(&vsi->back->hw);
- }
- }
-}
-
/**
* i40e_watchdog_subtask - periodic checks not using event driven response
* @pf: board private structure
return;
pf->service_timer_previous = jiffies;
- i40e_check_hang_subtask(pf);
- i40e_link_event(pf);
+ if (pf->flags & I40E_FLAG_LINK_POLLING_ENABLED)
+ i40e_link_event(pf);
/* Update the stats for active netdevs so the network stack
* can look at updated numbers whenever it cares to
if (pf->vsi[i] && pf->vsi[i]->netdev)
i40e_update_stats(pf->vsi[i]);
- /* Update the stats for the active switching components */
- for (i = 0; i < I40E_MAX_VEB; i++)
- if (pf->veb[i])
- i40e_update_veb_stats(pf->veb[i]);
+ if (pf->flags & I40E_FLAG_VEB_STATS_ENABLED) {
+ /* Update the stats for the active switching components */
+ for (i = 0; i < I40E_MAX_VEB; i++)
+ if (pf->veb[i])
+ i40e_update_veb_stats(pf->veb[i]);
+ }
i40e_ptp_rx_hang(pf->vsi[pf->lan_vsi]);
}
{
struct i40e_pf *pf = veb->pf;
- dev_info(&pf->pdev->dev, "enabling bridge mode: %s\n",
- veb->bridge_mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
+ if (pf->hw.debug_mask & I40E_DEBUG_LAN)
+ dev_info(&pf->pdev->dev, "enabling bridge mode: %s\n",
+ veb->bridge_mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
if (veb->bridge_mode & BRIDGE_MODE_VEPA)
i40e_disable_pf_switch_lb(pf);
else
if (pf->vsi[v]->veb_idx == veb->idx) {
struct i40e_vsi *vsi = pf->vsi[v];
+
vsi->uplink_seid = veb->seid;
ret = i40e_add_vsi(vsi);
if (ret) {
}
} while (err);
- if (((pf->hw.aq.fw_maj_ver == 2) && (pf->hw.aq.fw_min_ver < 22)) ||
- (pf->hw.aq.fw_maj_ver < 2)) {
- pf->hw.func_caps.num_msix_vectors++;
- pf->hw.func_caps.num_msix_vectors_vf++;
- }
-
if (pf->hw.debug_mask & I40E_DEBUG_USER)
dev_info(&pf->pdev->dev,
"pf=%d, num_vfs=%d, msix_pf=%d, msix_vf=%d, fd_g=%d, fd_b=%d, pf_max_q=%d num_vsi=%d\n",
}
#endif /* CONFIG_I40E_DCB */
#ifdef I40E_FCOE
- ret = i40e_init_pf_fcoe(pf);
- if (ret)
- dev_info(&pf->pdev->dev, "init_pf_fcoe failed: %d\n", ret);
+ i40e_init_pf_fcoe(pf);
#endif
/* do basic switch setup */
/* make sure our flow control settings are restored */
ret = i40e_set_fc(&pf->hw, &set_fc_aq_fail, true);
if (ret)
- dev_info(&pf->pdev->dev, "set fc fail, err %s aq_err %s\n",
- i40e_stat_str(&pf->hw, ret),
- i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+ dev_dbg(&pf->pdev->dev, "setting flow control: ret = %s last_status = %s\n",
+ i40e_stat_str(&pf->hw, ret),
+ i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
/* Rebuild the VSIs and VEBs that existed before reset.
* They are still in our local switch element arrays, so only
if (pf->flags & I40E_FLAG_MSIX_ENABLED)
ret = i40e_setup_misc_vector(pf);
+ /* Add a filter to drop all Flow control frames from any VSI from being
+ * transmitted. By doing so we stop a malicious VF from sending out
+ * PAUSE or PFC frames and potentially controlling traffic for other
+ * PF/VF VSIs.
+ * The FW can still send Flow control frames if enabled.
+ */
+ i40e_add_filter_to_drop_tx_flow_control_frames(&pf->hw,
+ pf->main_vsi_seid);
+
/* restart the VSIs that were rebuilt and running before the reset */
i40e_pf_unquiesce_all_vsi(pf);
return;
}
+ i40e_detect_recover_hung(pf);
i40e_reset_subtask(pf);
i40e_handle_mdd_event(pf);
i40e_vc_process_vflr_event(pf);
vsi->idx = vsi_idx;
vsi->rx_itr_setting = pf->rx_itr_default;
vsi->tx_itr_setting = pf->tx_itr_default;
+ vsi->int_rate_limit = 0;
vsi->rss_table_size = (vsi->type == I40E_VSI_MAIN) ?
pf->rss_table_size : 64;
vsi->netdev_registered = false;
/* Setup default MSIX irq handler for VSI */
i40e_vsi_setup_irqhandler(vsi, i40e_msix_clean_rings);
+ /* Initialize VSI lock */
+ spin_lock_init(&vsi->mac_filter_list_lock);
pf->vsi[vsi_idx] = vsi;
ret = vsi_idx;
goto unlock_pf;
"Cannot set RSS key, err %s aq_err %s\n",
i40e_stat_str(&pf->hw, ret),
i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
- return ret;
+ goto config_rss_aq_out;
}
if (vsi->type == I40E_VSI_MAIN)
i40e_stat_str(&pf->hw, ret),
i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+config_rss_aq_out:
+ kfree(rss_lut);
return ret;
}
/* Set default capability flags */
pf->flags = I40E_FLAG_RX_CSUM_ENABLED |
I40E_FLAG_MSI_ENABLED |
+ I40E_FLAG_LINK_POLLING_ENABLED |
I40E_FLAG_MSIX_ENABLED;
if (iommu_present(&pci_bus_type))
(pf->hw.func_caps.fd_filters_best_effort > 0)) {
pf->flags |= I40E_FLAG_FD_ATR_ENABLED;
pf->atr_sample_rate = I40E_DEFAULT_ATR_SAMPLE_RATE;
- if (!(pf->flags & I40E_FLAG_MFP_ENABLED)) {
- pf->flags |= I40E_FLAG_FD_SB_ENABLED;
- } else {
+ if (pf->flags & I40E_FLAG_MFP_ENABLED &&
+ pf->hw.num_partitions > 1)
dev_info(&pf->pdev->dev,
"Flow Director Sideband mode Disabled in MFP mode\n");
- }
+ else
+ pf->flags |= I40E_FLAG_FD_SB_ENABLED;
pf->fdir_pf_filter_count =
pf->hw.func_caps.fd_filters_guaranteed;
pf->hw.fdir_shared_filter_count =
if (pf->hw.func_caps.vmdq) {
pf->num_vmdq_vsis = I40E_DEFAULT_NUM_VMDQ_VSI;
pf->flags |= I40E_FLAG_VMDQ_ENABLED;
+ pf->num_vmdq_qps = i40e_default_queues_per_vmdq(pf);
}
#ifdef I40E_FCOE
- err = i40e_init_pf_fcoe(pf);
- if (err)
- dev_info(&pf->pdev->dev, "init_pf_fcoe failed: %d\n", err);
+ i40e_init_pf_fcoe(pf);
#endif /* I40E_FCOE */
#ifdef CONFIG_PCI_IOV
pf->lan_veb = I40E_NO_VEB;
pf->lan_vsi = I40E_NO_VSI;
+ /* By default FW has this off for performance reasons */
+ pf->flags &= ~I40E_FLAG_VEB_STATS_ENABLED;
+
/* set up queue assignment tracking */
size = sizeof(struct i40e_lump_tracking)
+ (sizeof(u16) * pf->hw.func_caps.num_tx_qp);
pf->vxlan_ports[idx] = 0;
pf->pending_vxlan_bitmap |= BIT_ULL(idx);
pf->flags |= I40E_FLAG_VXLAN_FILTER_SYNC;
-
- dev_info(&pf->pdev->dev, "deleting vxlan port %d\n",
- ntohs(port));
} else {
netdev_warn(netdev, "vxlan port %d was not found, not deleting\n",
ntohs(port));
* @seq: RTNL message seq #
* @dev: the netdev being configured
* @filter_mask: unused
+ * @nlflags: netlink flags passed in
*
* Return the mode in which the hardware bridge is operating in
* i.e VEB or VEPA.
**/
static int i40e_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
struct net_device *dev,
- u32 filter_mask, int nlflags)
+ u32 __always_unused filter_mask,
+ int nlflags)
{
struct i40e_netdev_priv *np = netdev_priv(dev);
struct i40e_vsi *vsi = np->vsi;
/**
* i40e_features_check - Validate encapsulated packet conforms to limits
* @skb: skb buff
- * @netdev: This physical port's netdev
+ * @dev: This physical port's netdev
* @features: Offload features that the stack believes apply
**/
static netdev_features_t i40e_features_check(struct sk_buff *skb,
* default a MAC-VLAN filter that accepts any tagged packet
* which must be replaced by a normal filter.
*/
- if (!i40e_rm_default_mac_filter(vsi, mac_addr))
+ if (!i40e_rm_default_mac_filter(vsi, mac_addr)) {
+ spin_lock_bh(&vsi->mac_filter_list_lock);
i40e_add_filter(vsi, mac_addr,
I40E_VLAN_ANY, false, true);
+ spin_unlock_bh(&vsi->mac_filter_list_lock);
+ }
} else {
/* relate the VSI_VMDQ name to the VSI_MAIN name */
snprintf(netdev->name, IFNAMSIZ, "%sv%%d",
pf->vsi[pf->lan_vsi]->netdev->name);
random_ether_addr(mac_addr);
+
+ spin_lock_bh(&vsi->mac_filter_list_lock);
i40e_add_filter(vsi, mac_addr, I40E_VLAN_ANY, false, false);
+ spin_unlock_bh(&vsi->mac_filter_list_lock);
}
+
+ spin_lock_bh(&vsi->mac_filter_list_lock);
i40e_add_filter(vsi, brdcast, I40E_VLAN_ANY, false, false);
+ spin_unlock_bh(&vsi->mac_filter_list_lock);
ether_addr_copy(netdev->dev_addr, mac_addr);
ether_addr_copy(netdev->perm_addr, mac_addr);
return 1;
veb = pf->veb[vsi->veb_idx];
+ if (!veb) {
+ dev_info(&pf->pdev->dev,
+ "There is no veb associated with the bridge\n");
+ return -ENOENT;
+ }
+
/* Uplink is a bridge in VEPA mode */
- if (veb && (veb->bridge_mode & BRIDGE_MODE_VEPA))
+ if (veb->bridge_mode & BRIDGE_MODE_VEPA) {
return 0;
+ } else {
+ /* Uplink is a bridge in VEB mode */
+ return 1;
+ }
- /* Uplink is a bridge in VEB mode */
- return 1;
+ /* VEPA is now default bridge, so return 0 */
+ return 0;
}
/**
static int i40e_add_vsi(struct i40e_vsi *vsi)
{
int ret = -ENODEV;
- struct i40e_mac_filter *f, *ftmp;
+ u8 laa_macaddr[ETH_ALEN];
+ bool found_laa_mac_filter = false;
struct i40e_pf *pf = vsi->back;
struct i40e_hw *hw = &pf->hw;
struct i40e_vsi_context ctxt;
+ struct i40e_mac_filter *f, *ftmp;
+
u8 enabled_tc = 0x1; /* TC0 enabled */
int f_count = 0;
vsi->id = ctxt.vsi_number;
}
+ spin_lock_bh(&vsi->mac_filter_list_lock);
/* If macvlan filters already exist, force them to get loaded */
list_for_each_entry_safe(f, ftmp, &vsi->mac_filter_list, list) {
f->changed = true;
f_count++;
+ /* Expected to have only one MAC filter entry for LAA in list */
if (f->is_laa && vsi->type == I40E_VSI_MAIN) {
- struct i40e_aqc_remove_macvlan_element_data element;
+ ether_addr_copy(laa_macaddr, f->macaddr);
+ found_laa_mac_filter = true;
+ }
+ }
+ spin_unlock_bh(&vsi->mac_filter_list_lock);
- memset(&element, 0, sizeof(element));
- ether_addr_copy(element.mac_addr, f->macaddr);
- element.flags = I40E_AQC_MACVLAN_DEL_PERFECT_MATCH;
- ret = i40e_aq_remove_macvlan(hw, vsi->seid,
- &element, 1, NULL);
- if (ret) {
- /* some older FW has a different default */
- element.flags |=
- I40E_AQC_MACVLAN_DEL_IGNORE_VLAN;
- i40e_aq_remove_macvlan(hw, vsi->seid,
- &element, 1, NULL);
- }
+ if (found_laa_mac_filter) {
+ struct i40e_aqc_remove_macvlan_element_data element;
- i40e_aq_mac_address_write(hw,
- I40E_AQC_WRITE_TYPE_LAA_WOL,
- f->macaddr, NULL);
+ memset(&element, 0, sizeof(element));
+ ether_addr_copy(element.mac_addr, laa_macaddr);
+ element.flags = I40E_AQC_MACVLAN_DEL_PERFECT_MATCH;
+ ret = i40e_aq_remove_macvlan(hw, vsi->seid,
+ &element, 1, NULL);
+ if (ret) {
+ /* some older FW has a different default */
+ element.flags |=
+ I40E_AQC_MACVLAN_DEL_IGNORE_VLAN;
+ i40e_aq_remove_macvlan(hw, vsi->seid,
+ &element, 1, NULL);
}
+
+ i40e_aq_mac_address_write(hw,
+ I40E_AQC_WRITE_TYPE_LAA_WOL,
+ laa_macaddr, NULL);
}
+
if (f_count) {
vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED;
pf->flags |= I40E_FLAG_FILTER_SYNC;
i40e_vsi_disable_irq(vsi);
}
+ spin_lock_bh(&vsi->mac_filter_list_lock);
list_for_each_entry_safe(f, ftmp, &vsi->mac_filter_list, list)
i40e_del_filter(vsi, f->macaddr, f->vlan,
f->is_vf, f->is_netdev);
- i40e_sync_vsi_filters(vsi);
+ spin_unlock_bh(&vsi->mac_filter_list_lock);
+
+ i40e_sync_vsi_filters(vsi, false);
i40e_vsi_delete(vsi);
i40e_vsi_free_q_vectors(vsi);
if (veb) {
if (vsi->seid != pf->vsi[pf->lan_vsi]->seid) {
dev_info(&vsi->back->pdev->dev,
- "%s: New VSI creation error, uplink seid of LAN VSI expected.\n",
- __func__);
+ "New VSI creation error, uplink seid of LAN VSI expected.\n");
return NULL;
}
/* We come up by default in VEPA mode if SRIOV is not
} else {
/* force a reset of TC and queue layout configurations */
u8 enabled_tc = pf->vsi[pf->lan_vsi]->tc_config.enabled_tc;
+
pf->vsi[pf->lan_vsi]->tc_config.enabled_tc = 0;
pf->vsi[pf->lan_vsi]->seid = pf->main_vsi_seid;
i40e_vsi_config_tc(pf->vsi[pf->lan_vsi], enabled_tc);
i40e_config_rss(pf);
/* fill in link information and enable LSE reporting */
- i40e_aq_get_link_info(&pf->hw, true, NULL, NULL);
+ i40e_update_link_info(&pf->hw);
i40e_link_event(pf);
/* Initialize user-specific link properties */
}
pf->queues_left = queues_left;
+ dev_dbg(&pf->pdev->dev,
+ "qs_avail=%d FD SB=%d lan_qs=%d lan_tc0=%d vf=%d*%d vmdq=%d*%d, remaining=%d\n",
+ pf->hw.func_caps.num_tx_qp,
+ !!(pf->flags & I40E_FLAG_FD_SB_ENABLED),
+ pf->num_lan_qps, pf->rss_size, pf->num_req_vfs, pf->num_vf_qps,
+ pf->num_vmdq_vsis, pf->num_vmdq_qps, queues_left);
#ifdef I40E_FCOE
- dev_info(&pf->pdev->dev, "fcoe queues = %d\n", pf->num_fcoe_qps);
+ dev_dbg(&pf->pdev->dev, "fcoe queues = %d\n", pf->num_fcoe_qps);
#endif
}
}
if (pf->flags & I40E_FLAG_DCB_CAPABLE)
buf += sprintf(buf, "DCB ");
+#if IS_ENABLED(CONFIG_VXLAN)
+ buf += sprintf(buf, "VxLAN ");
+#endif
if (pf->flags & I40E_FLAG_PTP)
buf += sprintf(buf, "PTP ");
#ifdef I40E_FCOE
if (pf->flags & I40E_FLAG_FCOE_ENABLED)
buf += sprintf(buf, "FCOE ");
#endif
+ if (pf->flags & I40E_FLAG_VEB_MODE_ENABLED)
+ buf += sprintf(buf, "VEB ");
+ else
+ buf += sprintf(buf, "VEPA ");
BUG_ON(buf > (string + INFO_STRING_LEN));
dev_info(&pf->pdev->dev, "%s\n", string);
static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
{
struct i40e_aq_get_phy_abilities_resp abilities;
- unsigned long ioremap_len;
struct i40e_pf *pf;
struct i40e_hw *hw;
static u16 pfs_found;
+ u16 wol_nvm_bits;
u16 link_status;
- int err = 0;
+ int err;
u32 len;
u32 i;
+ u8 set_fc_aq_fail;
err = pci_enable_device_mem(pdev);
if (err)
hw = &pf->hw;
hw->back = pf;
- ioremap_len = min_t(unsigned long, pci_resource_len(pdev, 0),
- I40E_MAX_CSR_SPACE);
+ pf->ioremap_len = min_t(int, pci_resource_len(pdev, 0),
+ I40E_MAX_CSR_SPACE);
- hw->hw_addr = ioremap(pci_resource_start(pdev, 0), ioremap_len);
+ hw->hw_addr = ioremap(pci_resource_start(pdev, 0), pf->ioremap_len);
if (!hw->hw_addr) {
err = -EIO;
dev_info(&pdev->dev, "ioremap(0x%04x, 0x%04x) failed: 0x%x\n",
(unsigned int)pci_resource_start(pdev, 0),
- (unsigned int)pci_resource_len(pdev, 0), err);
+ pf->ioremap_len, err);
goto err_ioremap;
}
hw->vendor_id = pdev->vendor;
pf->hw.fc.requested_mode = I40E_FC_NONE;
err = i40e_init_adminq(hw);
- dev_info(&pdev->dev, "%s\n", i40e_fw_version_str(hw));
+
+ /* provide nvm, fw, api versions */
+ dev_info(&pdev->dev, "fw %d.%d.%05d api %d.%d nvm %s\n",
+ hw->aq.fw_maj_ver, hw->aq.fw_min_ver, hw->aq.fw_build,
+ hw->aq.api_maj_ver, hw->aq.api_min_ver,
+ i40e_nvm_version_str(hw));
+
if (err) {
dev_info(&pdev->dev,
"The driver for the device stopped because the NVM image is newer than expected. You must install the most recent version of the network driver.\n");
INIT_WORK(&pf->service_task, i40e_service_task);
clear_bit(__I40E_SERVICE_SCHED, &pf->state);
pf->flags |= I40E_FLAG_NEED_LINK_UPDATE;
- pf->link_check_timeout = jiffies;
- /* WoL defaults to disabled */
- pf->wol_en = false;
+ /* NVM bit on means WoL disabled for the port */
+ i40e_read_nvm_word(hw, I40E_SR_NVM_WAKE_ON_LAN, &wol_nvm_bits);
+ if ((1 << hw->port) & wol_nvm_bits || hw->partition_id != 1)
+ pf->wol_en = false;
+ else
+ pf->wol_en = true;
device_set_wakeup_enable(&pf->pdev->dev, pf->wol_en);
/* set up the main switch operations */
dev_info(&pdev->dev, "setup_pf_switch failed: %d\n", err);
goto err_vsis;
}
+
+ /* Make sure flow control is set according to current settings */
+ err = i40e_set_fc(hw, &set_fc_aq_fail, true);
+ if (set_fc_aq_fail & I40E_SET_FC_AQ_FAIL_GET)
+ dev_dbg(&pf->pdev->dev,
+ "Set fc with err %s aq_err %s on get_phy_cap\n",
+ i40e_stat_str(hw, err),
+ i40e_aq_str(hw, hw->aq.asq_last_status));
+ if (set_fc_aq_fail & I40E_SET_FC_AQ_FAIL_SET)
+ dev_dbg(&pf->pdev->dev,
+ "Set fc with err %s aq_err %s on set_phy_config\n",
+ i40e_stat_str(hw, err),
+ i40e_aq_str(hw, hw->aq.asq_last_status));
+ if (set_fc_aq_fail & I40E_SET_FC_AQ_FAIL_UPDATE)
+ dev_dbg(&pf->pdev->dev,
+ "Set fc with err %s aq_err %s on get_link_info\n",
+ i40e_stat_str(hw, err),
+ i40e_aq_str(hw, hw->aq.asq_last_status));
+
/* if FDIR VSI was set up, start it now */
for (i = 0; i < pf->num_alloc_vsi; i++) {
if (pf->vsi[i] && pf->vsi[i]->type == I40E_VSI_FDIR) {
i40e_fcoe_vsi_setup(pf);
#endif
- /* Get the negotiated link width and speed from PCI config space */
- pcie_capability_read_word(pf->pdev, PCI_EXP_LNKSTA, &link_status);
+#define PCI_SPEED_SIZE 8
+#define PCI_WIDTH_SIZE 8
+ /* Devices on the IOSF bus do not have this information
+ * and will report PCI Gen 1 x 1 by default so don't bother
+ * checking them.
+ */
+ if (!(pf->flags & I40E_FLAG_NO_PCI_LINK_CHECK)) {
+ char speed[PCI_SPEED_SIZE] = "Unknown";
+ char width[PCI_WIDTH_SIZE] = "Unknown";
- i40e_set_pci_config_data(hw, link_status);
+ /* Get the negotiated link width and speed from PCI config
+ * space
+ */
+ pcie_capability_read_word(pf->pdev, PCI_EXP_LNKSTA,
+ &link_status);
+
+ i40e_set_pci_config_data(hw, link_status);
+
+ switch (hw->bus.speed) {
+ case i40e_bus_speed_8000:
+ strncpy(speed, "8.0", PCI_SPEED_SIZE); break;
+ case i40e_bus_speed_5000:
+ strncpy(speed, "5.0", PCI_SPEED_SIZE); break;
+ case i40e_bus_speed_2500:
+ strncpy(speed, "2.5", PCI_SPEED_SIZE); break;
+ default:
+ break;
+ }
+ switch (hw->bus.width) {
+ case i40e_bus_width_pcie_x8:
+ strncpy(width, "8", PCI_WIDTH_SIZE); break;
+ case i40e_bus_width_pcie_x4:
+ strncpy(width, "4", PCI_WIDTH_SIZE); break;
+ case i40e_bus_width_pcie_x2:
+ strncpy(width, "2", PCI_WIDTH_SIZE); break;
+ case i40e_bus_width_pcie_x1:
+ strncpy(width, "1", PCI_WIDTH_SIZE); break;
+ default:
+ break;
+ }
- dev_info(&pdev->dev, "PCI-Express: %s %s\n",
- (hw->bus.speed == i40e_bus_speed_8000 ? "Speed 8.0GT/s" :
- hw->bus.speed == i40e_bus_speed_5000 ? "Speed 5.0GT/s" :
- hw->bus.speed == i40e_bus_speed_2500 ? "Speed 2.5GT/s" :
- "Unknown"),
- (hw->bus.width == i40e_bus_width_pcie_x8 ? "Width x8" :
- hw->bus.width == i40e_bus_width_pcie_x4 ? "Width x4" :
- hw->bus.width == i40e_bus_width_pcie_x2 ? "Width x2" :
- hw->bus.width == i40e_bus_width_pcie_x1 ? "Width x1" :
- "Unknown"));
+ dev_info(&pdev->dev, "PCI-Express: Speed %sGT/s Width x%s\n",
+ speed, width);
- if (hw->bus.width < i40e_bus_width_pcie_x8 ||
- hw->bus.speed < i40e_bus_speed_8000) {
- dev_warn(&pdev->dev, "PCI-Express bandwidth available for this device may be insufficient for optimal performance.\n");
- dev_warn(&pdev->dev, "Please move the device to a different PCI-e link with more lanes and/or higher transfer rate.\n");
+ if (hw->bus.width < i40e_bus_width_pcie_x8 ||
+ hw->bus.speed < i40e_bus_speed_8000) {
+ dev_warn(&pdev->dev, "PCI-Express bandwidth available for this device may be insufficient for optimal performance.\n");
+ dev_warn(&pdev->dev, "Please move the device to a different PCI-e link with more lanes and/or higher transfer rate.\n");
+ }
}
/* get the requested speeds from the fw */
err = i40e_aq_get_phy_capabilities(hw, false, false, &abilities, NULL);
if (err)
- dev_info(&pf->pdev->dev,
- "get phy capabilities failed, err %s aq_err %s, advertised speed settings may not be correct\n",
- i40e_stat_str(&pf->hw, err),
- i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+ dev_dbg(&pf->pdev->dev, "get requested speeds ret = %s last_status = %s\n",
+ i40e_stat_str(&pf->hw, err),
+ i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
pf->hw.phy.link_info.requested_speeds = abilities.link_speed;
+ /* get the supported phy types from the fw */
+ err = i40e_aq_get_phy_capabilities(hw, false, true, &abilities, NULL);
+ if (err)
+ dev_dbg(&pf->pdev->dev, "get supported phy types ret = %s last_status = %s\n",
+ i40e_stat_str(&pf->hw, err),
+ i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+ pf->hw.phy.phy_types = le32_to_cpu(abilities.phy_type);
+
+ /* Add a filter to drop all Flow control frames from any VSI from being
+ * transmitted. By doing so we stop a malicious VF from sending out
+ * PAUSE or PFC frames and potentially controlling traffic for other
+ * PF/VF VSIs.
+ * The FW can still send Flow control frames if enabled.
+ */
+ i40e_add_filter_to_drop_tx_flow_control_frames(&pf->hw,
+ pf->main_vsi_seid);
+
/* print a string summarizing features */
i40e_print_features(pf);
static void i40e_remove(struct pci_dev *pdev)
{
struct i40e_pf *pf = pci_get_drvdata(pdev);
+ struct i40e_hw *hw = &pf->hw;
i40e_status ret_code;
int i;
i40e_ptp_stop(pf);
+ /* Disable RSS in hw */
+ wr32(hw, I40E_PFQF_HENA(0), 0);
+ wr32(hw, I40E_PFQF_HENA(1), 0);
+
/* no more scheduling of any task */
set_bit(__I40E_DOWN, &pf->state);
del_timer_sync(&pf->service_timer);
int err;
u32 reg;
- dev_info(&pdev->dev, "%s\n", __func__);
+ dev_dbg(&pdev->dev, "%s\n", __func__);
if (pci_enable_device_mem(pdev)) {
dev_info(&pdev->dev,
"Cannot re-enable PCI device after reset.\n");
{
struct i40e_pf *pf = pci_get_drvdata(pdev);
- dev_info(&pdev->dev, "%s\n", __func__);
+ dev_dbg(&pdev->dev, "%s\n", __func__);
if (test_bit(__I40E_SUSPENDED, &pf->state))
return;
rtnl_lock();
i40e_handle_reset_warning(pf);
- rtnl_lock();
+ rtnl_unlock();
}
/**
err = pci_enable_device_mem(pdev);
if (err) {
- dev_err(&pdev->dev,
- "%s: Cannot enable PCI device from suspend\n",
- __func__);
+ dev_err(&pdev->dev, "Cannot enable PCI device from suspend\n");
return err;
}
pci_set_master(pdev);
desc->l4i_chk = 0;
desc->byte_cnt = length;
- desc->buf_ptr = dma_map_single(dev->dev.parent, data,
- length, DMA_TO_DEVICE);
- if (unlikely(dma_mapping_error(dev->dev.parent, desc->buf_ptr))) {
- WARN(1, "dma_map_single failed!\n");
- return -ENOMEM;
+
+ if (length <= 8 && (uintptr_t)data & 0x7) {
+ /* Copy unaligned small data fragment to TSO header data area */
+ memcpy(txq->tso_hdrs + txq->tx_curr_desc * TSO_HEADER_SIZE,
+ data, length);
+ desc->buf_ptr = txq->tso_hdrs_dma
+ + txq->tx_curr_desc * TSO_HEADER_SIZE;
+ } else {
+ /* Alignment is okay, map buffer and hand off to hardware */
+ txq->tx_desc_mapping[tx_index] = DESC_DMA_MAP_SINGLE;
+ desc->buf_ptr = dma_map_single(dev->dev.parent, data,
+ length, DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(dev->dev.parent,
+ desc->buf_ptr))) {
+ WARN(1, "dma_map_single failed!\n");
+ return -ENOMEM;
+ }
}
cmd_sts = BUFFER_OWNED_BY_DMA;
}
static inline void
- txq_put_hdr_tso(struct sk_buff *skb, struct tx_queue *txq, int length)
+ txq_put_hdr_tso(struct sk_buff *skb, struct tx_queue *txq, int length,
+ u32 *first_cmd_sts, bool first_desc)
{
struct mv643xx_eth_private *mp = txq_to_mp(txq);
int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
int ret;
u32 cmd_csum = 0;
u16 l4i_chk = 0;
+ u32 cmd_sts;
tx_index = txq->tx_curr_desc;
desc = &txq->tx_desc_area[tx_index];
desc->byte_cnt = hdr_len;
desc->buf_ptr = txq->tso_hdrs_dma +
txq->tx_curr_desc * TSO_HEADER_SIZE;
- desc->cmd_sts = cmd_csum | BUFFER_OWNED_BY_DMA | TX_FIRST_DESC |
+ cmd_sts = cmd_csum | BUFFER_OWNED_BY_DMA | TX_FIRST_DESC |
GEN_CRC;
+ /* Defer updating the first command descriptor until all
+ * following descriptors have been written.
+ */
+ if (first_desc)
+ *first_cmd_sts = cmd_sts;
+ else
+ desc->cmd_sts = cmd_sts;
+
txq->tx_curr_desc++;
if (txq->tx_curr_desc == txq->tx_ring_size)
txq->tx_curr_desc = 0;
int desc_count = 0;
struct tso_t tso;
int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
+ struct tx_desc *first_tx_desc;
+ u32 first_cmd_sts = 0;
/* Count needed descriptors */
if ((txq->tx_desc_count + tso_count_descs(skb)) >= txq->tx_ring_size) {
return -EBUSY;
}
+ first_tx_desc = &txq->tx_desc_area[txq->tx_curr_desc];
+
/* Initialize the TSO handler, and prepare the first payload */
tso_start(skb, &tso);
total_len = skb->len - hdr_len;
while (total_len > 0) {
+ bool first_desc = (desc_count == 0);
char *hdr;
data_left = min_t(int, skb_shinfo(skb)->gso_size, total_len);
/* prepare packet headers: MAC + IP + TCP */
hdr = txq->tso_hdrs + txq->tx_curr_desc * TSO_HEADER_SIZE;
tso_build_hdr(skb, hdr, &tso, data_left, total_len == 0);
- txq_put_hdr_tso(skb, txq, data_left);
+ txq_put_hdr_tso(skb, txq, data_left, &first_cmd_sts,
+ first_desc);
while (data_left > 0) {
int size;
__skb_queue_tail(&txq->tx_skb, skb);
skb_tx_timestamp(skb);
+ /* ensure all other descriptors are written before first cmd_sts */
+ wmb();
+ first_tx_desc->cmd_sts = first_cmd_sts;
+
/* clear TX_END status */
mp->work_tx_end &= ~(1 << txq->index);
sizeof(drvinfo->version));
strlcpy(drvinfo->fw_version, "N/A", sizeof(drvinfo->fw_version));
strlcpy(drvinfo->bus_info, "platform", sizeof(drvinfo->bus_info));
- drvinfo->n_stats = ARRAY_SIZE(mv643xx_eth_stats);
}
static int mv643xx_eth_nway_reset(struct net_device *dev)
struct netdev_hw_addr *ha;
int i;
- if (dev->flags & (IFF_PROMISC | IFF_ALLMULTI)) {
- int port_num;
- u32 accept;
+ if (dev->flags & (IFF_PROMISC | IFF_ALLMULTI))
+ goto promiscuous;
-oom:
- port_num = mp->port_num;
- accept = 0x01010101;
- for (i = 0; i < 0x100; i += 4) {
- wrl(mp, SPECIAL_MCAST_TABLE(port_num) + i, accept);
- wrl(mp, OTHER_MCAST_TABLE(port_num) + i, accept);
- }
- return;
- }
-
- mc_spec = kzalloc(0x200, GFP_ATOMIC);
- if (mc_spec == NULL)
- goto oom;
- mc_other = mc_spec + (0x100 >> 2);
+ /* Allocate both mc_spec and mc_other tables */
+ mc_spec = kcalloc(128, sizeof(u32), GFP_ATOMIC);
+ if (!mc_spec)
+ goto promiscuous;
+ mc_other = &mc_spec[64];
netdev_for_each_mc_addr(ha, dev) {
u8 *a = ha->addr;
u32 *table;
- int entry;
+ u8 entry;
if (memcmp(a, "\x01\x00\x5e\x00\x00", 5) == 0) {
table = mc_spec;
table[entry >> 2] |= 1 << (8 * (entry & 3));
}
- for (i = 0; i < 0x100; i += 4) {
- wrl(mp, SPECIAL_MCAST_TABLE(mp->port_num) + i, mc_spec[i >> 2]);
- wrl(mp, OTHER_MCAST_TABLE(mp->port_num) + i, mc_other[i >> 2]);
+ for (i = 0; i < 64; i++) {
+ wrl(mp, SPECIAL_MCAST_TABLE(mp->port_num) + i * sizeof(u32),
+ mc_spec[i]);
+ wrl(mp, OTHER_MCAST_TABLE(mp->port_num) + i * sizeof(u32),
+ mc_other[i]);
}
kfree(mc_spec);
+ return;
+
+promiscuous:
+ for (i = 0; i < 64; i++) {
+ wrl(mp, SPECIAL_MCAST_TABLE(mp->port_num) + i * sizeof(u32),
+ 0x01010101u);
+ wrl(mp, OTHER_MCAST_TABLE(mp->port_num) + i * sizeof(u32),
+ 0x01010101u);
+ }
}
static void mv643xx_eth_set_rx_mode(struct net_device *dev)
#include <linux/workqueue.h>
#include <linux/delay.h>
#include <linux/pm_runtime.h>
+#include <linux/gpio.h>
#include <linux/of.h>
+ #include <linux/of_mdio.h>
#include <linux/of_net.h>
#include <linux/of_device.h>
#include <linux/if_vlan.h>
spinlock_t lock;
struct platform_device *pdev;
struct net_device *ndev;
+ struct device_node *phy_node;
struct napi_struct napi_rx;
struct napi_struct napi_tx;
struct device *dev;
cpsw_ale_add_mcast(priv->ale, priv->ndev->broadcast,
1 << slave_port, 0, 0, ALE_MCAST_FWD_2);
- slave->phy = phy_connect(priv->ndev, slave->data->phy_id,
+ if (priv->phy_node)
+ slave->phy = of_phy_connect(priv->ndev, priv->phy_node,
+ &cpsw_adjust_link, 0, slave->data->phy_if);
+ else
+ slave->phy = phy_connect(priv->ndev, slave->data->phy_id,
&cpsw_adjust_link, slave->data->phy_if);
if (IS_ERR(slave->phy)) {
dev_err(priv->dev, "phy %s not found on slave %d\n",
strlcpy(info->driver, "cpsw", sizeof(info->driver));
strlcpy(info->version, "1.0", sizeof(info->version));
strlcpy(info->bus_info, priv->pdev->name, sizeof(info->bus_info));
- info->regdump_len = cpsw_get_regs_len(ndev);
}
static u32 cpsw_get_msglevel(struct net_device *ndev)
slave->port_vlan = data->dual_emac_res_vlan;
}
- static int cpsw_probe_dt(struct cpsw_platform_data *data,
+ static int cpsw_probe_dt(struct cpsw_priv *priv,
struct platform_device *pdev)
{
struct device_node *node = pdev->dev.of_node;
struct device_node *slave_node;
+ struct cpsw_platform_data *data = &priv->data;
int i = 0, ret;
u32 prop;
if (strcmp(slave_node->name, "slave"))
continue;
+ priv->phy_node = of_parse_phandle(slave_node, "phy-handle", 0);
parp = of_get_property(slave_node, "phy_id", &lenp);
if ((parp == NULL) || (lenp != (sizeof(void *) * 2))) {
dev_err(&pdev->dev, "Missing slave[%d] phy_id property\n", i);
}
snprintf(slave_data->phy_id, sizeof(slave_data->phy_id),
PHY_ID_FMT, mdio->name, phyid);
-
slave_data->phy_if = of_get_phy_mode(slave_node);
if (slave_data->phy_if < 0) {
dev_err(&pdev->dev, "Missing or malformed slave[%d] phy-mode property\n",
if (mac_addr) {
memcpy(slave_data->mac_addr, mac_addr, ETH_ALEN);
} else {
- if (of_machine_is_compatible("ti,am33xx")) {
- ret = cpsw_am33xx_cm_get_macid(&pdev->dev,
- 0x630, i,
- slave_data->mac_addr);
- if (ret)
- return ret;
- }
+ ret = ti_cm_get_macid(&pdev->dev, i,
+ slave_data->mac_addr);
+ if (ret)
+ return ret;
}
if (data->dual_emac) {
if (of_property_read_u32(slave_node, "dual_emac_res_vlan",
void __iomem *ss_regs;
struct resource *res, *ss_res;
const struct of_device_id *of_id;
+ struct gpio_descs *mode;
u32 slave_offset, sliver_offset, slave_size;
int ret = 0, i;
int irq;
goto clean_ndev_ret;
}
+ mode = devm_gpiod_get_array_optional(&pdev->dev, "mode", GPIOD_OUT_LOW);
+ if (IS_ERR(mode)) {
+ ret = PTR_ERR(mode);
+ dev_err(&pdev->dev, "gpio request failed, ret %d\n", ret);
+ goto clean_ndev_ret;
+ }
+
/*
* This may be required here for child devices.
*/
/* Select default pin state */
pinctrl_pm_select_default_state(&pdev->dev);
- if (cpsw_probe_dt(&priv->data, pdev)) {
+ if (cpsw_probe_dt(priv, pdev)) {
dev_err(&pdev->dev, "cpsw: platform data missing\n");
ret = -ENODEV;
goto clean_runtime_disable_ret;
.remove = cpsw_remove,
};
-static int __init cpsw_init(void)
-{
- return platform_driver_register(&cpsw_driver);
-}
-late_initcall(cpsw_init);
-
-static void __exit cpsw_exit(void)
-{
- platform_driver_unregister(&cpsw_driver);
-}
-module_exit(cpsw_exit);
+module_platform_driver(cpsw_driver);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Cyril Chemparathy <cyril@ti.com>");
---help---
Currently supports the LAN83C185, LAN8187 and LAN8700 PHYs
+config BCM_NET_PHYLIB
+ tristate
+
config BROADCOM_PHY
tristate "Drivers for Broadcom PHYs"
+ select BCM_NET_PHYLIB
---help---
Currently supports the BCM5411, BCM5421, BCM5461, BCM54616S, BCM5464,
BCM5481 and BCM5482 PHYs.
+config BCM_CYGNUS_PHY
+ tristate "Drivers for Broadcom Cygnus SoC internal PHY"
+ depends on ARCH_BCM_CYGNUS || COMPILE_TEST
+ depends on MDIO_BCM_IPROC
+ select BCM_NET_PHYLIB
+ ---help---
+ This PHY driver is for the 1G internal PHYs of the Broadcom
+ Cygnus Family SoC.
+
+ Currently supports internal PHY's used in the BCM11300,
+ BCM11320, BCM11350, BCM11360, BCM58300, BCM58302,
+ BCM58303 & BCM58305 Broadcom Cygnus SoCs.
+
config BCM63XX_PHY
tristate "Drivers for Broadcom 63xx SOCs internal PHY"
depends on BCM63XX
+ select BCM_NET_PHYLIB
---help---
Currently supports the 6348 and 6358 PHYs.
config BCM7XXX_PHY
tristate "Drivers for Broadcom 7xxx SOCs internal PHYs"
+ select BCM_NET_PHYLIB
---help---
Currently supports the BCM7366, BCM7439, BCM7445, and
40nm and 65nm generation of BCM7xxx Set Top Box SoCs.
---help---
Supports the KSZ9021, VSC8201, KS8001 PHYs.
+ config DP83848_PHY
+ tristate "Driver for Texas Instruments DP83848 PHY"
+ ---help---
+ Supports the DP83848 PHY.
+
config DP83867_PHY
tristate "Drivers for Texas Instruments DP83867 Gigabit PHY"
---help---
This hardware can be found in the Broadcom GENET Ethernet MAC
controllers as well as some Broadcom Ethernet switches such as the
Starfighter 2 switches.
+
+config MDIO_BCM_IPROC
+ tristate "Broadcom iProc MDIO bus controller"
+ depends on ARCH_BCM_IPROC || COMPILE_TEST
+ depends on HAS_IOMEM && OF_MDIO
+ help
+ This module provides a driver for the MDIO busses found in the
+ Broadcom iProc SoC's.
+
endif # PHYLIB
config MICREL_KS8995MA
obj-$(CONFIG_SMSC_PHY) += smsc.o
obj-$(CONFIG_TERANETICS_PHY) += teranetics.o
obj-$(CONFIG_VITESSE_PHY) += vitesse.o
+obj-$(CONFIG_BCM_NET_PHYLIB) += bcm-phy-lib.o
obj-$(CONFIG_BROADCOM_PHY) += broadcom.o
obj-$(CONFIG_BCM63XX_PHY) += bcm63xx.o
obj-$(CONFIG_BCM7XXX_PHY) += bcm7xxx.o
obj-$(CONFIG_BCM87XX_PHY) += bcm87xx.o
+obj-$(CONFIG_BCM_CYGNUS_PHY) += bcm-cygnus.o
obj-$(CONFIG_ICPLUS_PHY) += icplus.o
obj-$(CONFIG_REALTEK_PHY) += realtek.o
obj-$(CONFIG_LSI_ET1011C_PHY) += et1011c.o
obj-$(CONFIG_MDIO_GPIO) += mdio-gpio.o
obj-$(CONFIG_NATIONAL_PHY) += national.o
obj-$(CONFIG_DP83640_PHY) += dp83640.o
+ obj-$(CONFIG_DP83848_PHY) += dp83848.o
obj-$(CONFIG_DP83867_PHY) += dp83867.o
obj-$(CONFIG_STE10XP) += ste10Xp.o
obj-$(CONFIG_MICREL_PHY) += micrel.o
obj-$(CONFIG_MDIO_MOXART) += mdio-moxart.o
obj-$(CONFIG_MDIO_BCM_UNIMAC) += mdio-bcm-unimac.o
obj-$(CONFIG_MICROCHIP_PHY) += microchip.o
+obj-$(CONFIG_MDIO_BCM_IPROC) += mdio-bcm-iproc.o
u8 *buf = intf->cur_altsetting->extra;
int len = intf->cur_altsetting->extralen;
struct usb_interface_descriptor *desc = &intf->cur_altsetting->desc;
- struct usb_cdc_union_desc *cdc_union = NULL;
- struct usb_cdc_ether_desc *cdc_ether = NULL;
- u32 found = 0;
+ struct usb_cdc_union_desc *cdc_union;
+ struct usb_cdc_ether_desc *cdc_ether;
struct usb_driver *driver = driver_of(intf);
struct qmi_wwan_state *info = (void *)&dev->data;
+ struct usb_cdc_parsed_header hdr;
BUILD_BUG_ON((sizeof(((struct usbnet *)0)->data) <
sizeof(struct qmi_wwan_state)));
info->data = intf;
/* and a number of CDC descriptors */
- while (len > 3) {
- struct usb_descriptor_header *h = (void *)buf;
-
- /* ignore any misplaced descriptors */
- if (h->bDescriptorType != USB_DT_CS_INTERFACE)
- goto next_desc;
-
- /* buf[2] is CDC descriptor subtype */
- switch (buf[2]) {
- case USB_CDC_HEADER_TYPE:
- if (found & 1 << USB_CDC_HEADER_TYPE) {
- dev_dbg(&intf->dev, "extra CDC header\n");
- goto err;
- }
- if (h->bLength != sizeof(struct usb_cdc_header_desc)) {
- dev_dbg(&intf->dev, "CDC header len %u\n",
- h->bLength);
- goto err;
- }
- break;
- case USB_CDC_UNION_TYPE:
- if (found & 1 << USB_CDC_UNION_TYPE) {
- dev_dbg(&intf->dev, "extra CDC union\n");
- goto err;
- }
- if (h->bLength != sizeof(struct usb_cdc_union_desc)) {
- dev_dbg(&intf->dev, "CDC union len %u\n",
- h->bLength);
- goto err;
- }
- cdc_union = (struct usb_cdc_union_desc *)buf;
- break;
- case USB_CDC_ETHERNET_TYPE:
- if (found & 1 << USB_CDC_ETHERNET_TYPE) {
- dev_dbg(&intf->dev, "extra CDC ether\n");
- goto err;
- }
- if (h->bLength != sizeof(struct usb_cdc_ether_desc)) {
- dev_dbg(&intf->dev, "CDC ether len %u\n",
- h->bLength);
- goto err;
- }
- cdc_ether = (struct usb_cdc_ether_desc *)buf;
- break;
- }
-
- /* Remember which CDC functional descriptors we've seen. Works
- * for all types we care about, of which USB_CDC_ETHERNET_TYPE
- * (0x0f) is the highest numbered
- */
- if (buf[2] < 32)
- found |= 1 << buf[2];
-
-next_desc:
- len -= h->bLength;
- buf += h->bLength;
- }
+ cdc_parse_cdc_header(&hdr, intf, buf, len);
+ cdc_union = hdr.usb_cdc_union_desc;
+ cdc_ether = hdr.usb_cdc_ether_desc;
/* Use separate control and data interfaces if we found a CDC Union */
if (cdc_union) {
{QMI_FIXED_INTF(0x1199, 0x9056, 8)}, /* Sierra Wireless Modem */
{QMI_FIXED_INTF(0x1199, 0x9057, 8)},
{QMI_FIXED_INTF(0x1199, 0x9061, 8)}, /* Sierra Wireless Modem */
+ {QMI_FIXED_INTF(0x1199, 0x9070, 8)}, /* Sierra Wireless MC74xx/EM74xx */
+ {QMI_FIXED_INTF(0x1199, 0x9070, 10)}, /* Sierra Wireless MC74xx/EM74xx */
+ {QMI_FIXED_INTF(0x1199, 0x9071, 8)}, /* Sierra Wireless MC74xx/EM74xx */
+ {QMI_FIXED_INTF(0x1199, 0x9071, 10)}, /* Sierra Wireless MC74xx/EM74xx */
{QMI_FIXED_INTF(0x1bbb, 0x011e, 4)}, /* Telekom Speedstick LTE II (Alcatel One Touch L100V LTE) */
{QMI_FIXED_INTF(0x1bbb, 0x0203, 2)}, /* Alcatel L800MA */
{QMI_FIXED_INTF(0x2357, 0x0201, 4)}, /* TP-LINK HSUPA Modem MA180 */
static const u8 all_zeros_mac[ETH_ALEN];
-static struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
- bool no_share, u32 flags);
+static int vxlan_sock_add(struct vxlan_dev *vxlan);
/* per-network namespace private data for this module */
struct vxlan_net {
static bool vxlan_group_used(struct vxlan_net *vn, struct vxlan_dev *dev)
{
struct vxlan_dev *vxlan;
+ unsigned short family = dev->default_dst.remote_ip.sa.sa_family;
/* The vxlan_sock is only used by dev, leaving group has
* no effect on other vxlan devices.
*/
- if (atomic_read(&dev->vn_sock->refcnt) == 1)
+ if (family == AF_INET && dev->vn4_sock &&
+ atomic_read(&dev->vn4_sock->refcnt) == 1)
return false;
+#if IS_ENABLED(CONFIG_IPV6)
+ if (family == AF_INET6 && dev->vn6_sock &&
+ atomic_read(&dev->vn6_sock->refcnt) == 1)
+ return false;
+#endif
list_for_each_entry(vxlan, &vn->vxlan_list, next) {
if (!netif_running(vxlan->dev) || vxlan == dev)
continue;
- if (vxlan->vn_sock != dev->vn_sock)
+ if (family == AF_INET && vxlan->vn4_sock != dev->vn4_sock)
continue;
+#if IS_ENABLED(CONFIG_IPV6)
+ if (family == AF_INET6 && vxlan->vn6_sock != dev->vn6_sock)
+ continue;
+#endif
if (!vxlan_addr_equal(&vxlan->default_dst.remote_ip,
&dev->default_dst.remote_ip))
return false;
}
-static void vxlan_sock_release(struct vxlan_sock *vs)
+static void __vxlan_sock_release(struct vxlan_sock *vs)
{
- struct sock *sk = vs->sock->sk;
- struct net *net = sock_net(sk);
- struct vxlan_net *vn = net_generic(net, vxlan_net_id);
+ struct vxlan_net *vn;
+ if (!vs)
+ return;
if (!atomic_dec_and_test(&vs->refcnt))
return;
+ vn = net_generic(sock_net(vs->sock->sk), vxlan_net_id);
spin_lock(&vn->sock_lock);
hlist_del_rcu(&vs->hlist);
vxlan_notify_del_rx_port(vs);
queue_work(vxlan_wq, &vs->del_work);
}
+static void vxlan_sock_release(struct vxlan_dev *vxlan)
+{
+ __vxlan_sock_release(vxlan->vn4_sock);
+#if IS_ENABLED(CONFIG_IPV6)
+ __vxlan_sock_release(vxlan->vn6_sock);
+#endif
+}
+
/* Update multicast group membership when first VNI on
* multicast address is brought up
*/
static int vxlan_igmp_join(struct vxlan_dev *vxlan)
{
- struct vxlan_sock *vs = vxlan->vn_sock;
- struct sock *sk = vs->sock->sk;
+ struct sock *sk;
union vxlan_addr *ip = &vxlan->default_dst.remote_ip;
int ifindex = vxlan->default_dst.remote_ifindex;
int ret = -EINVAL;
- lock_sock(sk);
if (ip->sa.sa_family == AF_INET) {
struct ip_mreqn mreq = {
.imr_multiaddr.s_addr = ip->sin.sin_addr.s_addr,
.imr_ifindex = ifindex,
};
+ sk = vxlan->vn4_sock->sock->sk;
+ lock_sock(sk);
ret = ip_mc_join_group(sk, &mreq);
+ release_sock(sk);
#if IS_ENABLED(CONFIG_IPV6)
} else {
+ sk = vxlan->vn6_sock->sock->sk;
+ lock_sock(sk);
ret = ipv6_stub->ipv6_sock_mc_join(sk, ifindex,
&ip->sin6.sin6_addr);
+ release_sock(sk);
#endif
}
- release_sock(sk);
return ret;
}
/* Inverse of vxlan_igmp_join when last VNI is brought down */
static int vxlan_igmp_leave(struct vxlan_dev *vxlan)
{
- struct vxlan_sock *vs = vxlan->vn_sock;
- struct sock *sk = vs->sock->sk;
+ struct sock *sk;
union vxlan_addr *ip = &vxlan->default_dst.remote_ip;
int ifindex = vxlan->default_dst.remote_ifindex;
int ret = -EINVAL;
- lock_sock(sk);
if (ip->sa.sa_family == AF_INET) {
struct ip_mreqn mreq = {
.imr_multiaddr.s_addr = ip->sin.sin_addr.s_addr,
.imr_ifindex = ifindex,
};
+ sk = vxlan->vn4_sock->sock->sk;
+ lock_sock(sk);
ret = ip_mc_leave_group(sk, &mreq);
+ release_sock(sk);
#if IS_ENABLED(CONFIG_IPV6)
} else {
+ sk = vxlan->vn6_sock->sock->sk;
+ lock_sock(sk);
ret = ipv6_stub->ipv6_sock_mc_drop(sk, ifindex,
&ip->sin6.sin6_addr);
+ release_sock(sk);
#endif
}
- release_sock(sk);
return ret;
}
{
struct ip_tunnel_info *info;
struct vxlan_dev *vxlan = netdev_priv(dev);
- struct sock *sk = vxlan->vn_sock->sock->sk;
- unsigned short family = vxlan_get_sk_family(vxlan->vn_sock);
+ struct sock *sk;
struct rtable *rt = NULL;
const struct iphdr *old_iph;
struct flowi4 fl4;
dev->name);
goto drop;
}
- if (family != ip_tunnel_info_af(info))
- goto drop;
-
dst_port = info->key.tp_dst ? : vxlan->cfg.dst_port;
vni = be64_to_cpu(info->key.tun_id);
- remote_ip.sa.sa_family = family;
- if (family == AF_INET)
+ remote_ip.sa.sa_family = ip_tunnel_info_af(info);
+ if (remote_ip.sa.sa_family == AF_INET)
remote_ip.sin.sin_addr.s_addr = info->key.u.ipv4.dst;
else
remote_ip.sin6.sin6_addr = info->key.u.ipv6.dst;
}
if (dst->sa.sa_family == AF_INET) {
+ if (!vxlan->vn4_sock)
+ goto drop;
+ sk = vxlan->vn4_sock->sock->sk;
+
if (info && (info->key.tun_flags & TUNNEL_DONT_FRAGMENT))
df = htons(IP_DF);
struct flowi6 fl6;
u32 rt6i_flags;
+ if (!vxlan->vn6_sock)
+ goto drop;
+ sk = vxlan->vn6_sock->sock->sk;
+
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_oif = rdst ? rdst->remote_ifindex : 0;
fl6.daddr = dst->sin6.sin6_addr;
struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
__u32 vni = vxlan->default_dst.remote_vni;
- vxlan->vn_sock = vs;
spin_lock(&vn->sock_lock);
hlist_add_head_rcu(&vxlan->hlist, vni_head(vs, vni));
spin_unlock(&vn->sock_lock);
static int vxlan_open(struct net_device *dev)
{
struct vxlan_dev *vxlan = netdev_priv(dev);
- struct vxlan_sock *vs;
- int ret = 0;
+ int ret;
- vs = vxlan_sock_add(vxlan->net, vxlan->cfg.dst_port,
- vxlan->cfg.no_share, vxlan->flags);
- if (IS_ERR(vs))
- return PTR_ERR(vs);
-
- vxlan_vs_add_dev(vs, vxlan);
+ ret = vxlan_sock_add(vxlan);
+ if (ret < 0)
+ return ret;
if (vxlan_addr_multicast(&vxlan->default_dst.remote_ip)) {
ret = vxlan_igmp_join(vxlan);
if (ret == -EADDRINUSE)
ret = 0;
if (ret) {
- vxlan_sock_release(vs);
+ vxlan_sock_release(vxlan);
return ret;
}
}
{
struct vxlan_dev *vxlan = netdev_priv(dev);
struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
- struct vxlan_sock *vs = vxlan->vn_sock;
int ret = 0;
if (vxlan_addr_multicast(&vxlan->default_dst.remote_ip) &&
del_timer_sync(&vxlan->age_timer);
vxlan_flush(vxlan);
- vxlan_sock_release(vs);
+ vxlan_sock_release(vxlan);
return ret;
}
return 0;
}
+ static int egress_ipv4_tun_info(struct net_device *dev, struct sk_buff *skb,
+ struct ip_tunnel_info *info,
+ __be16 sport, __be16 dport)
+ {
+ struct vxlan_dev *vxlan = netdev_priv(dev);
+ struct rtable *rt;
+ struct flowi4 fl4;
+
+ memset(&fl4, 0, sizeof(fl4));
+ fl4.flowi4_tos = RT_TOS(info->key.tos);
+ fl4.flowi4_mark = skb->mark;
+ fl4.flowi4_proto = IPPROTO_UDP;
+ fl4.daddr = info->key.u.ipv4.dst;
+
+ rt = ip_route_output_key(vxlan->net, &fl4);
+ if (IS_ERR(rt))
+ return PTR_ERR(rt);
+ ip_rt_put(rt);
+
+ info->key.u.ipv4.src = fl4.saddr;
+ info->key.tp_src = sport;
+ info->key.tp_dst = dport;
+ return 0;
+ }
+
+ static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
+ {
+ struct vxlan_dev *vxlan = netdev_priv(dev);
+ struct ip_tunnel_info *info = skb_tunnel_info(skb);
+ __be16 sport, dport;
+
+ sport = udp_flow_src_port(dev_net(dev), skb, vxlan->cfg.port_min,
+ vxlan->cfg.port_max, true);
+ dport = info->key.tp_dst ? : vxlan->cfg.dst_port;
+
+ if (ip_tunnel_info_af(info) == AF_INET)
+ return egress_ipv4_tun_info(dev, skb, info, sport, dport);
+ return -EINVAL;
+ }
+
static const struct net_device_ops vxlan_netdev_ops = {
.ndo_init = vxlan_init,
.ndo_uninit = vxlan_uninit,
.ndo_fdb_add = vxlan_fdb_add,
.ndo_fdb_del = vxlan_fdb_delete,
.ndo_fdb_dump = vxlan_fdb_dump,
+ .ndo_fill_metadata_dst = vxlan_fill_metadata_dst,
};
/* Info for udev, that this is a virtual tunnel endpoint */
}
/* Create new listen socket if needed */
-static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port,
- u32 flags)
+static struct vxlan_sock *vxlan_socket_create(struct net *net, bool ipv6,
+ __be16 port, u32 flags)
{
struct vxlan_net *vn = net_generic(net, vxlan_net_id);
struct vxlan_sock *vs;
struct socket *sock;
unsigned int h;
- bool ipv6 = !!(flags & VXLAN_F_IPV6);
struct udp_tunnel_sock_cfg tunnel_cfg;
vs = kzalloc(sizeof(*vs), GFP_KERNEL);
return vs;
}
-static struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
- bool no_share, u32 flags)
+static int __vxlan_sock_add(struct vxlan_dev *vxlan, bool ipv6)
{
- struct vxlan_net *vn = net_generic(net, vxlan_net_id);
- struct vxlan_sock *vs;
- bool ipv6 = flags & VXLAN_F_IPV6;
+ struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
+ struct vxlan_sock *vs = NULL;
- if (!no_share) {
+ if (!vxlan->cfg.no_share) {
spin_lock(&vn->sock_lock);
- vs = vxlan_find_sock(net, ipv6 ? AF_INET6 : AF_INET, port,
- flags);
- if (vs) {
- if (!atomic_add_unless(&vs->refcnt, 1, 0))
- vs = ERR_PTR(-EBUSY);
+ vs = vxlan_find_sock(vxlan->net, ipv6 ? AF_INET6 : AF_INET,
+ vxlan->cfg.dst_port, vxlan->flags);
+ if (vs && !atomic_add_unless(&vs->refcnt, 1, 0)) {
spin_unlock(&vn->sock_lock);
- return vs;
+ return -EBUSY;
}
spin_unlock(&vn->sock_lock);
}
+ if (!vs)
+ vs = vxlan_socket_create(vxlan->net, ipv6,
+ vxlan->cfg.dst_port, vxlan->flags);
+ if (IS_ERR(vs))
+ return PTR_ERR(vs);
+#if IS_ENABLED(CONFIG_IPV6)
+ if (ipv6)
+ vxlan->vn6_sock = vs;
+ else
+#endif
+ vxlan->vn4_sock = vs;
+ vxlan_vs_add_dev(vs, vxlan);
+ return 0;
+}
- return vxlan_socket_create(net, port, flags);
+static int vxlan_sock_add(struct vxlan_dev *vxlan)
+{
+ bool ipv6 = vxlan->flags & VXLAN_F_IPV6;
+ bool metadata = vxlan->flags & VXLAN_F_COLLECT_METADATA;
+ int ret = 0;
+
+ vxlan->vn4_sock = NULL;
+#if IS_ENABLED(CONFIG_IPV6)
+ vxlan->vn6_sock = NULL;
+ if (ipv6 || metadata)
+ ret = __vxlan_sock_add(vxlan, true);
+#endif
+ if (!ret && (!ipv6 || metadata))
+ ret = __vxlan_sock_add(vxlan, false);
+ if (ret < 0)
+ vxlan_sock_release(vxlan);
+ return ret;
}
static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
struct vxlan_net *vn = net_generic(src_net, vxlan_net_id);
struct vxlan_dev *vxlan = netdev_priv(dev);
struct vxlan_rdst *dst = &vxlan->default_dst;
+ unsigned short needed_headroom = ETH_HLEN;
int err;
bool use_ipv6 = false;
__be16 default_port = vxlan->cfg.dst_port;
if (!IS_ENABLED(CONFIG_IPV6))
return -EPFNOSUPPORT;
use_ipv6 = true;
+ vxlan->flags |= VXLAN_F_IPV6;
}
if (conf->remote_ifindex) {
pr_info("IPv6 is disabled via sysctl\n");
return -EPERM;
}
- vxlan->flags |= VXLAN_F_IPV6;
}
#endif
if (!conf->mtu)
dev->mtu = lowerdev->mtu - (use_ipv6 ? VXLAN6_HEADROOM : VXLAN_HEADROOM);
- dev->needed_headroom = lowerdev->hard_header_len +
- (use_ipv6 ? VXLAN6_HEADROOM : VXLAN_HEADROOM);
- } else if (use_ipv6) {
- vxlan->flags |= VXLAN_F_IPV6;
- dev->needed_headroom = ETH_HLEN + VXLAN6_HEADROOM;
- } else {
- dev->needed_headroom = ETH_HLEN + VXLAN_HEADROOM;
+ needed_headroom = lowerdev->hard_header_len;
}
+ if (use_ipv6 || conf->flags & VXLAN_F_COLLECT_METADATA)
+ needed_headroom += VXLAN6_HEADROOM;
+ else
+ needed_headroom += VXLAN_HEADROOM;
+ dev->needed_headroom = needed_headroom;
+
memcpy(&vxlan->cfg, conf, sizeof(*conf));
if (!vxlan->cfg.dst_port)
vxlan->cfg.dst_port = default_port;
}
static int xennet_create_queues(struct netfront_info *info,
- unsigned int num_queues)
+ unsigned int *num_queues)
{
unsigned int i;
int ret;
- info->queues = kcalloc(num_queues, sizeof(struct netfront_queue),
+ info->queues = kcalloc(*num_queues, sizeof(struct netfront_queue),
GFP_KERNEL);
if (!info->queues)
return -ENOMEM;
rtnl_lock();
- for (i = 0; i < num_queues; i++) {
+ for (i = 0; i < *num_queues; i++) {
struct netfront_queue *queue = &info->queues[i];
queue->id = i;
if (ret < 0) {
dev_warn(&info->netdev->dev,
"only created %d queues\n", i);
- num_queues = i;
+ *num_queues = i;
break;
}
napi_enable(&queue->napi);
}
- netif_set_real_num_tx_queues(info->netdev, num_queues);
+ netif_set_real_num_tx_queues(info->netdev, *num_queues);
rtnl_unlock();
- if (num_queues == 0) {
+ if (*num_queues == 0) {
dev_err(&info->netdev->dev, "no queues\n");
return -EINVAL;
}
if (info->queues)
xennet_destroy_queues(info);
- err = xennet_create_queues(info, num_queues);
+ err = xennet_create_queues(info, &num_queues);
if (err < 0)
goto destroy_ring;
goto destroy_ring;
}
- if (num_queues == 1) {
- err = write_queue_xenstore_keys(&info->queues[0], &xbt, 0); /* flat */
- if (err)
- goto abort_transaction_no_dev_fatal;
- } else {
+ if (xenbus_exists(XBT_NIL,
+ info->xbdev->otherend, "multi-queue-max-queues")) {
/* Write the number of queues */
- err = xenbus_printf(xbt, dev->nodename, "multi-queue-num-queues",
- "%u", num_queues);
+ err = xenbus_printf(xbt, dev->nodename,
+ "multi-queue-num-queues", "%u", num_queues);
if (err) {
message = "writing multi-queue-num-queues";
goto abort_transaction_no_dev_fatal;
}
+ }
+ if (num_queues == 1) {
+ err = write_queue_xenstore_keys(&info->queues[0], &xbt, 0); /* flat */
+ if (err)
+ goto abort_transaction_no_dev_fatal;
+ } else {
/* Write the keys for each queue */
for (i = 0; i < num_queues; ++i) {
queue = &info->queues[i];
* int (*ndo_set_vf_rate)(struct net_device *dev, int vf, int min_tx_rate,
* int max_tx_rate);
* int (*ndo_set_vf_spoofchk)(struct net_device *dev, int vf, bool setting);
+ * int (*ndo_set_vf_trust)(struct net_device *dev, int vf, bool setting);
* int (*ndo_get_vf_config)(struct net_device *dev,
* int vf, struct ifla_vf_info *ivf);
* int (*ndo_set_vf_link_state)(struct net_device *dev, int vf, int link_state);
* This function is used to pass protocol port error state information
* to the switch driver. The switch driver can react to the proto_down
* by doing a phys down on the associated switch port.
+ * int (*ndo_fill_metadata_dst)(struct net_device *dev, struct sk_buff *skb);
+ * This function is used to get egress tunnel information for given skb.
+ * This is useful for retrieving outer tunnel header parameters while
+ * sampling packet.
*
*/
struct net_device_ops {
int max_tx_rate);
int (*ndo_set_vf_spoofchk)(struct net_device *dev,
int vf, bool setting);
+ int (*ndo_set_vf_trust)(struct net_device *dev,
+ int vf, bool setting);
int (*ndo_get_vf_config)(struct net_device *dev,
int vf,
struct ifla_vf_info *ivf);
int (*ndo_get_iflink)(const struct net_device *dev);
int (*ndo_change_proto_down)(struct net_device *dev,
bool proto_down);
+ int (*ndo_fill_metadata_dst)(struct net_device *dev,
+ struct sk_buff *skb);
};
/**
* @IFF_LIVE_ADDR_CHANGE: device supports hardware address
* change when it's running
* @IFF_MACVLAN: Macvlan device
- * @IFF_VRF_MASTER: device is a VRF master
+ * @IFF_L3MDEV_MASTER: device is an L3 master device
* @IFF_NO_QUEUE: device can run without qdisc attached
* @IFF_OPENVSWITCH: device is a Open vSwitch master
+ * @IFF_L3MDEV_SLAVE: device is enslaved to an L3 master device
*/
enum netdev_priv_flags {
IFF_802_1Q_VLAN = 1<<0,
IFF_XMIT_DST_RELEASE_PERM = 1<<17,
IFF_IPVLAN_MASTER = 1<<18,
IFF_IPVLAN_SLAVE = 1<<19,
- IFF_VRF_MASTER = 1<<20,
+ IFF_L3MDEV_MASTER = 1<<20,
IFF_NO_QUEUE = 1<<21,
IFF_OPENVSWITCH = 1<<22,
+ IFF_L3MDEV_SLAVE = 1<<23,
};
#define IFF_802_1Q_VLAN IFF_802_1Q_VLAN
#define IFF_XMIT_DST_RELEASE_PERM IFF_XMIT_DST_RELEASE_PERM
#define IFF_IPVLAN_MASTER IFF_IPVLAN_MASTER
#define IFF_IPVLAN_SLAVE IFF_IPVLAN_SLAVE
-#define IFF_VRF_MASTER IFF_VRF_MASTER
+#define IFF_L3MDEV_MASTER IFF_L3MDEV_MASTER
#define IFF_NO_QUEUE IFF_NO_QUEUE
#define IFF_OPENVSWITCH IFF_OPENVSWITCH
* @dn_ptr: DECnet specific data
* @ip6_ptr: IPv6 specific data
* @ax25_ptr: AX.25 specific data
- * @vrf_ptr: VRF specific data
* @ieee80211_ptr: IEEE 802.11 specific data, assign before registering
*
* @last_rx: Time of last Rx
#ifdef CONFIG_NET_SWITCHDEV
const struct switchdev_ops *switchdev_ops;
#endif
+#ifdef CONFIG_NET_L3_MASTER_DEV
+ const struct l3mdev_ops *l3mdev_ops;
+#endif
const struct header_ops *header_ops;
struct dn_dev __rcu *dn_ptr;
struct inet6_dev __rcu *ip6_ptr;
void *ax25_ptr;
- struct net_vrf_dev __rcu *vrf_ptr;
struct wireless_dev *ieee80211_ptr;
struct wpan_dev *ieee802154_ptr;
#if IS_ENABLED(CONFIG_MPLS_ROUTING)
#define NETDEV_PRECHANGEMTU 0x0017 /* notify before mtu change happened */
#define NETDEV_CHANGEINFODATA 0x0018
#define NETDEV_BONDING_INFO 0x0019
+#define NETDEV_PRECHANGEUPPER 0x001A
int register_netdevice_notifier(struct notifier_block *nb);
int unregister_netdevice_notifier(struct notifier_block *nb);
void dev_remove_offload(struct packet_offload *po);
int dev_get_iflink(const struct net_device *dev);
+ int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb);
struct net_device *__dev_get_by_flags(struct net *net, unsigned short flags,
unsigned short mask);
struct net_device *dev_get_by_name(struct net *net, const char *name);
int dev_close(struct net_device *dev);
int dev_close_many(struct list_head *head, bool unlink);
void dev_disable_lro(struct net_device *dev);
-int dev_loopback_xmit(struct sock *sk, struct sk_buff *newskb);
-int dev_queue_xmit_sk(struct sock *sk, struct sk_buff *skb);
-static inline int dev_queue_xmit(struct sk_buff *skb)
-{
- return dev_queue_xmit_sk(skb->sk, skb);
-}
+int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *newskb);
+int dev_queue_xmit(struct sk_buff *skb);
int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv);
int register_netdevice(struct net_device *dev);
void unregister_netdevice_queue(struct net_device *dev, struct list_head *head);
int netif_rx(struct sk_buff *skb);
int netif_rx_ni(struct sk_buff *skb);
-int netif_receive_skb_sk(struct sock *sk, struct sk_buff *skb);
-static inline int netif_receive_skb(struct sk_buff *skb)
-{
- return netif_receive_skb_sk(skb->sk, skb);
-}
+int netif_receive_skb(struct sk_buff *skb);
gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb);
void napi_gro_flush(struct napi_struct *napi, bool flush_old);
struct sk_buff *napi_get_frags(struct napi_struct *napi);
return dev->priv_flags & IFF_SUPP_NOFCS;
}
-static inline bool netif_is_vrf(const struct net_device *dev)
+static inline bool netif_is_l3_master(const struct net_device *dev)
{
- return dev->priv_flags & IFF_VRF_MASTER;
+ return dev->priv_flags & IFF_L3MDEV_MASTER;
+}
+
+static inline bool netif_is_l3_slave(const struct net_device *dev)
+{
+ return dev->priv_flags & IFF_L3MDEV_SLAVE;
}
static inline bool netif_is_bridge_master(const struct net_device *dev)
return dev->priv_flags & IFF_OPENVSWITCH;
}
-static inline bool netif_index_is_vrf(struct net *net, int ifindex)
-{
- bool rc = false;
-
-#if IS_ENABLED(CONFIG_NET_VRF)
- struct net_device *dev;
-
- if (ifindex == 0)
- return false;
-
- rcu_read_lock();
-
- dev = dev_get_by_index_rcu(net, ifindex);
- if (dev)
- rc = netif_is_vrf(dev);
-
- rcu_read_unlock();
-#endif
- return rc;
-}
-
/* This device needs to keep skb dst for qdisc enqueue or ndo_start_xmit() */
static inline void netif_keep_dst(struct net_device *dev)
{
OVS_TUNNEL_KEY_ATTR_TP_SRC, /* be16 src Transport Port. */
OVS_TUNNEL_KEY_ATTR_TP_DST, /* be16 dst Transport Port. */
OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS, /* Nested OVS_VXLAN_EXT_* */
+ OVS_TUNNEL_KEY_ATTR_IPV6_SRC, /* struct in6_addr src IPv6 address. */
+ OVS_TUNNEL_KEY_ATTR_IPV6_DST, /* struct in6_addr dst IPv6 address. */
__OVS_TUNNEL_KEY_ATTR_MAX
};
* enum ovs_ct_attr - Attributes for %OVS_ACTION_ATTR_CT action.
* @OVS_CT_ATTR_COMMIT: If present, commits the connection to the conntrack
* table. This allows future packets for the same connection to be identified
- * as 'established' or 'related'.
+ * as 'established' or 'related'. The flow key for the current packet will
+ * retain the pre-commit connection state.
* @OVS_CT_ATTR_ZONE: u16 connection tracking zone.
* @OVS_CT_ATTR_MARK: u32 value followed by u32 mask. For each bit set in the
* mask, the corresponding bit in the value is copied to the connection
#include <linux/rtnetlink.h>
#include <linux/stat.h>
#include <net/dst.h>
+ #include <net/dst_metadata.h>
#include <net/pkt_sched.h>
#include <net/checksum.h>
#include <net/xfrm.h>
}
EXPORT_SYMBOL(dev_get_iflink);
+ /**
+ * dev_fill_metadata_dst - Retrieve tunnel egress information.
+ * @dev: targeted interface
+ * @skb: The packet.
+ *
+ * For better visibility of tunnel traffic OVS needs to retrieve
+ * egress tunnel information for a packet. Following API allows
+ * user to get this info.
+ */
+ int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
+ {
+ struct ip_tunnel_info *info;
+
+ if (!dev->netdev_ops || !dev->netdev_ops->ndo_fill_metadata_dst)
+ return -EINVAL;
+
+ info = skb_tunnel_info_unclone(skb);
+ if (!info)
+ return -ENOMEM;
+ if (unlikely(!(info->mode & IP_TUNNEL_INFO_TX)))
+ return -EINVAL;
+
+ return dev->netdev_ops->ndo_fill_metadata_dst(dev, skb);
+ }
+ EXPORT_SYMBOL_GPL(dev_fill_metadata_dst);
+
/**
* __dev_get_by_name - find a device by its name
* @net: the applicable net namespace
/**
* dev_loopback_xmit - loop back @skb
+ * @net: network namespace this loopback is happening in
+ * @sk: sk needed to be a netfilter okfn
* @skb: buffer to transmit
*/
-int dev_loopback_xmit(struct sock *sk, struct sk_buff *skb)
+int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
{
skb_reset_mac_header(skb);
__skb_pull(skb, skb_network_offset(skb));
new_index = skb_tx_hash(dev, skb);
if (queue_index != new_index && sk &&
+ sk_fullsock(sk) &&
rcu_access_pointer(sk->sk_dst_cache))
sk_tx_queue_set(sk, new_index);
return rc;
}
-int dev_queue_xmit_sk(struct sock *sk, struct sk_buff *skb)
+int dev_queue_xmit(struct sk_buff *skb)
{
return __dev_queue_xmit(skb, NULL);
}
-EXPORT_SYMBOL(dev_queue_xmit_sk);
+EXPORT_SYMBOL(dev_queue_xmit);
int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv)
{
case TC_ACT_QUEUED:
kfree_skb(skb);
return NULL;
+ case TC_ACT_REDIRECT:
+ /* skb_mac_header check was done by cls/act_bpf, so
+ * we can safely push the L2 header back before
+ * redirecting to another netdev
+ */
+ __skb_push(skb, skb->mac_len);
+ skb_do_redirect(skb);
+ return NULL;
default:
break;
}
* NET_RX_SUCCESS: no congestion
* NET_RX_DROP: packet was dropped
*/
-int netif_receive_skb_sk(struct sock *sk, struct sk_buff *skb)
+int netif_receive_skb(struct sk_buff *skb)
{
trace_netif_receive_skb_entry(skb);
return netif_receive_skb_internal(skb);
}
-EXPORT_SYMBOL(netif_receive_skb_sk);
+EXPORT_SYMBOL(netif_receive_skb);
/* Network device is going away, flush any packets still pending
* Called with irqs disabled.
struct rcu_head rcu;
};
-static struct netdev_adjacent *__netdev_find_adj(struct net_device *dev,
- struct net_device *adj_dev,
+static struct netdev_adjacent *__netdev_find_adj(struct net_device *adj_dev,
struct list_head *adj_list)
{
struct netdev_adjacent *adj;
{
ASSERT_RTNL();
- return __netdev_find_adj(dev, upper_dev, &dev->all_adj_list.upper);
+ return __netdev_find_adj(upper_dev, &dev->all_adj_list.upper);
}
EXPORT_SYMBOL(netdev_has_upper_dev);
struct netdev_adjacent *adj;
int ret;
- adj = __netdev_find_adj(dev, adj_dev, dev_list);
+ adj = __netdev_find_adj(adj_dev, dev_list);
if (adj) {
adj->ref_nr++;
{
struct netdev_adjacent *adj;
- adj = __netdev_find_adj(dev, adj_dev, dev_list);
+ adj = __netdev_find_adj(adj_dev, dev_list);
if (!adj) {
pr_err("tried to remove device %s from %s\n",
return -EBUSY;
/* To prevent loops, check if dev is not upper device to upper_dev. */
- if (__netdev_find_adj(upper_dev, dev, &upper_dev->all_adj_list.upper))
+ if (__netdev_find_adj(dev, &upper_dev->all_adj_list.upper))
return -EBUSY;
- if (__netdev_find_adj(dev, upper_dev, &dev->adj_list.upper))
+ if (__netdev_find_adj(upper_dev, &dev->adj_list.upper))
return -EEXIST;
if (master && netdev_master_upper_dev_get(dev))
changeupper_info.master = master;
changeupper_info.linking = true;
+ ret = call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, dev,
+ &changeupper_info.info);
+ ret = notifier_to_errno(ret);
+ if (ret)
+ return ret;
+
ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, private,
master);
if (ret)
changeupper_info.master = netdev_master_upper_dev_get(dev) == upper_dev;
changeupper_info.linking = false;
+ call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, dev,
+ &changeupper_info.info);
+
__netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);
/* Here is the tricky part. We must remove all dev's lower
if (!lower_dev)
return NULL;
- lower = __netdev_find_adj(dev, lower_dev, &dev->adj_list.lower);
+ lower = __netdev_find_adj(lower_dev, &dev->adj_list.lower);
if (!lower)
return NULL;
return addr;
}
-static bool rpfilter_lookup_reverse(struct flowi4 *fl4,
+static bool rpfilter_lookup_reverse(struct net *net, struct flowi4 *fl4,
const struct net_device *dev, u8 flags)
{
struct fib_result res;
bool dev_match;
- struct net *net = dev_net(dev);
int ret __maybe_unused;
if (fib_lookup(net, fl4, &res, FIB_LOOKUP_IGNORE_LINKSTATE))
if (FIB_RES_DEV(res) == dev)
dev_match = true;
#endif
- if (dev_match || flags & XT_RPFILTER_LOOSE)
- return FIB_RES_NH(res).nh_scope <= RT_SCOPE_HOST;
- return dev_match;
+ return dev_match || flags & XT_RPFILTER_LOOSE;
}
static bool rpfilter_is_local(const struct sk_buff *skb)
flow.flowi4_tos = RT_TOS(iph->tos);
flow.flowi4_scope = RT_SCOPE_UNIVERSE;
- return rpfilter_lookup_reverse(&flow, par->in, info->flags) ^ invert;
+ return rpfilter_lookup_reverse(par->net, &flow, par->in, info->flags) ^ invert;
}
static int rpfilter_check(const struct xt_mtchk_param *par)
}
static void
-tcp_ecn_make_synack(const struct request_sock *req, struct tcphdr *th,
- struct sock *sk)
+tcp_ecn_make_synack(const struct request_sock *req, struct tcphdr *th)
{
- if (inet_rsk(req)->ecn_ok) {
+ if (inet_rsk(req)->ecn_ok)
th->ece = 1;
- if (tcp_ca_needs_ecn(sk))
- INET_ECN_xmit(sk);
- }
}
/* Set up ECN state for a packet on a ESTABLISHED socket that is about to
}
/* Set up TCP options for SYN-ACKs. */
-static unsigned int tcp_synack_options(struct sock *sk,
- struct request_sock *req,
- unsigned int mss, struct sk_buff *skb,
- struct tcp_out_options *opts,
- const struct tcp_md5sig_key *md5,
- struct tcp_fastopen_cookie *foc)
+static unsigned int tcp_synack_options(struct request_sock *req,
+ unsigned int mss, struct sk_buff *skb,
+ struct tcp_out_options *opts,
+ const struct tcp_md5sig_key *md5,
+ struct tcp_fastopen_cookie *foc)
{
struct inet_request_sock *ireq = inet_rsk(req);
unsigned int remaining = MAX_TCP_OPTION_SPACE;
/* Ok, it looks like it is advisable to defer. */
- if (cong_win < send_win && cong_win < skb->len)
+ if (cong_win < send_win && cong_win <= skb->len)
*is_cwnd_limited = true;
return true;
cwnd_quota = tcp_cwnd_test(tp, skb);
if (!cwnd_quota) {
- is_cwnd_limited = true;
if (push_one == 2)
/* Force out a loss probe pkt. */
cwnd_quota = 1;
/* Send one loss probe per tail loss episode. */
if (push_one != 2)
tcp_schedule_loss_probe(sk);
+ is_cwnd_limited |= (tcp_packets_in_flight(tp) >= tp->snd_cwnd);
tcp_cwnd_validate(sk, is_cwnd_limited);
return false;
}
/* Don't do any loss probe on a Fast Open connection before 3WHS
* finishes.
*/
- if (sk->sk_state == TCP_SYN_RECV)
+ if (tp->fastopen_rsk)
return false;
/* TLP is only scheduled when next timer event is RTO. */
/* Schedule a loss probe in 2*RTT for SACK capable connections
* in Open state, that are either limited by cwnd or application.
*/
- if (sysctl_tcp_early_retrans < 3 || !tp->srtt_us || !tp->packets_out ||
+ if (sysctl_tcp_early_retrans < 3 || !tp->packets_out ||
!tcp_is_sack(tp) || inet_csk(sk)->icsk_ca_state != TCP_CA_Open)
return false;
return false;
/* Probe timeout is at least 1.5*rtt + TCP_DELACK_MAX to account
- * for delayed ack when there's one outstanding packet.
+ * for delayed ack when there's one outstanding packet. If no RTT
+ * sample is available then probe after TCP_TIMEOUT_INIT.
*/
- timeout = rtt << 1;
+ timeout = rtt << 1 ? : TCP_TIMEOUT_INIT;
if (tp->packets_out == 1)
timeout = max_t(u32, timeout,
(rtt + (rtt >> 1) + TCP_DELACK_MAX));
net_dbg_ratelimited("retrans_out leaked\n");
}
#endif
- if (!tp->retrans_out)
- tp->lost_retrans_low = tp->snd_nxt;
TCP_SKB_CB(skb)->sacked |= TCPCB_RETRANS;
tp->retrans_out += tcp_skb_pcount(skb);
if (!tp->retrans_stamp)
tp->retrans_stamp = tcp_skb_timestamp(skb);
- /* snd_nxt is stored to detect loss of retransmitted segment,
- * see tcp_input.c tcp_sacktag_write_queue().
- */
- TCP_SKB_CB(skb)->ack_seq = tp->snd_nxt;
} else if (err != -EBUSY) {
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL);
}
* Allocate one skb and build a SYNACK packet.
* @dst is consumed : Caller should not use it again.
*/
-struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
+struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
struct request_sock *req,
- struct tcp_fastopen_cookie *foc)
+ struct tcp_fastopen_cookie *foc,
+ bool attach_req)
{
- struct tcp_out_options opts;
struct inet_request_sock *ireq = inet_rsk(req);
- struct tcp_sock *tp = tcp_sk(sk);
- struct tcphdr *th;
- struct sk_buff *skb;
+ const struct tcp_sock *tp = tcp_sk(sk);
struct tcp_md5sig_key *md5 = NULL;
+ struct tcp_out_options opts;
+ struct sk_buff *skb;
int tcp_header_size;
+ struct tcphdr *th;
+ u16 user_mss;
int mss;
- skb = sock_wmalloc(sk, MAX_TCP_HEADER, 1, GFP_ATOMIC);
+ skb = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
if (unlikely(!skb)) {
dst_release(dst);
return NULL;
/* Reserve space for headers. */
skb_reserve(skb, MAX_TCP_HEADER);
+ if (attach_req) {
+ skb->destructor = sock_edemux;
+ sock_hold(req_to_sk(req));
+ skb->sk = req_to_sk(req);
+ } else {
+ /* sk is a const pointer, because we want to express multiple
+ * cpu might call us concurrently.
+ * sk->sk_wmem_alloc in an atomic, we can promote to rw.
+ */
+ skb_set_owner_w(skb, (struct sock *)sk);
+ }
skb_dst_set(skb, dst);
mss = dst_metric_advmss(dst);
- if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss)
- mss = tp->rx_opt.user_mss;
+ user_mss = READ_ONCE(tp->rx_opt.user_mss);
+ if (user_mss && user_mss < mss)
+ mss = user_mss;
memset(&opts, 0, sizeof(opts));
#ifdef CONFIG_SYN_COOKIES
rcu_read_lock();
md5 = tcp_rsk(req)->af_specific->req_md5_lookup(sk, req_to_sk(req));
#endif
- tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, md5,
- foc) + sizeof(*th);
+ skb_set_hash(skb, tcp_rsk(req)->txhash, PKT_HASH_TYPE_L4);
+ tcp_header_size = tcp_synack_options(req, mss, skb, &opts, md5, foc) +
+ sizeof(*th);
skb_push(skb, tcp_header_size);
skb_reset_transport_header(skb);
memset(th, 0, sizeof(struct tcphdr));
th->syn = 1;
th->ack = 1;
- tcp_ecn_make_synack(req, th, sk);
+ tcp_ecn_make_synack(req, th);
th->source = htons(ireq->ir_num);
th->dest = ireq->ir_rmt_port;
/* Setting of flags are superfluous here for callers (and ECE is
th->ack_seq = htonl(tcp_rsk(req)->rcv_nxt);
/* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */
- th->window = htons(min(req->rcv_wnd, 65535U));
- tcp_options_write((__be32 *)(th + 1), tp, &opts);
+ th->window = htons(min(req->rsk_rcv_wnd, 65535U));
+ tcp_options_write((__be32 *)(th + 1), NULL, &opts);
th->doff = (tcp_header_size >> 2);
TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_OUTSEGS);
*/
tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK);
skb_mstamp_get(&skb->skb_mstamp);
- NET_INC_STATS_BH(sock_net(sk), mib);
+ NET_INC_STATS(sock_net(sk), mib);
return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC);
}
TCP_RTO_MAX);
}
-int tcp_rtx_synack(struct sock *sk, struct request_sock *req)
+int tcp_rtx_synack(const struct sock *sk, struct request_sock *req)
{
const struct tcp_request_sock_ops *af_ops = tcp_rsk(req)->af_specific;
struct flowi fl;
int res;
- res = af_ops->send_synack(sk, NULL, &fl, req, 0, NULL);
+ tcp_rsk(req)->txhash = net_tx_rndhash();
+ res = af_ops->send_synack(sk, NULL, &fl, req, NULL, true);
if (!res) {
TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
mtu = dst_mtu(skb_dst(skb));
if (skb->len > mtu) {
+ skb->protocol = htons(ETH_P_IP);
+
if (skb->sk)
xfrm_local_error(skb, mtu);
else
return xfrm_output(sk, skb);
}
-static int __xfrm4_output(struct sock *sk, struct sk_buff *skb)
+static int __xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct xfrm_state *x = skb_dst(skb)->xfrm;
#ifdef CONFIG_NETFILTER
if (!x) {
IPCB(skb)->flags |= IPSKB_REROUTED;
- return dst_output_sk(sk, skb);
+ return dst_output(net, sk, skb);
}
#endif
return x->outer_mode->afinfo->output_finish(sk, skb);
}
-int xfrm4_output(struct sock *sk, struct sk_buff *skb)
+int xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
- return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, sk, skb,
- NULL, skb_dst(skb)->dev, __xfrm4_output,
+ return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
+ net, sk, skb, NULL, skb_dst(skb)->dev,
+ __xfrm4_output,
!(IPCB(skb)->flags & IPSKB_REROUTED));
}
return NULL;
}
+EXPORT_SYMBOL_GPL(fib6_get_table);
static void __net_init fib6_tables_init(struct net *net)
{
struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
int flags, pol_lookup_t lookup)
{
- return (struct dst_entry *) lookup(net, net->ipv6.fib6_main_tbl, fl6, flags);
+ struct rt6_info *rt;
+
+ rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, flags);
+ if (rt->rt6i_flags & RTF_REJECT &&
+ rt->dst.error == -EAGAIN) {
+ ip6_rt_put(rt);
+ rt = net->ipv6.ip6_null_entry;
+ dst_hold(&rt->dst);
+ }
+
+ return &rt->dst;
}
static void __net_init fib6_tables_init(struct net *net)
#include <linux/errno.h>
#include <linux/kernel.h>
+ #include <linux/overflow-arith.h>
#include <linux/string.h>
#include <linux/socket.h>
#include <linux/net.h>
#include <net/xfrm.h>
#include <net/checksum.h>
#include <linux/mroute6.h>
+#include <net/l3mdev.h>
-static int ip6_finish_output2(struct sock *sk, struct sk_buff *skb)
+static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
struct net_device *dev = dst->dev;
struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
- ((mroute6_socket(dev_net(dev), skb) &&
+ ((mroute6_socket(net, skb) &&
!(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
&ipv6_hdr(skb)->saddr))) {
*/
if (newskb)
NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
- sk, newskb, NULL, newskb->dev,
+ net, sk, newskb, NULL, newskb->dev,
dev_loopback_xmit);
if (ipv6_hdr(skb)->hop_limit == 0) {
- IP6_INC_STATS(dev_net(dev), idev,
+ IP6_INC_STATS(net, idev,
IPSTATS_MIB_OUTDISCARDS);
kfree_skb(skb);
return 0;
}
}
- IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
- skb->len);
+ IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
IPV6_ADDR_SCOPE_NODELOCAL &&
}
rcu_read_unlock_bh();
- IP6_INC_STATS(dev_net(dst->dev),
- ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
+ IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
kfree_skb(skb);
return -EINVAL;
}
-static int ip6_finish_output(struct sock *sk, struct sk_buff *skb)
+static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
dst_allfrag(skb_dst(skb)) ||
(IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
- return ip6_fragment(sk, skb, ip6_finish_output2);
+ return ip6_fragment(net, sk, skb, ip6_finish_output2);
else
- return ip6_finish_output2(sk, skb);
+ return ip6_finish_output2(net, sk, skb);
}
-int ip6_output(struct sock *sk, struct sk_buff *skb)
+int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct net_device *dev = skb_dst(skb)->dev;
struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
+
if (unlikely(idev->cnf.disable_ipv6)) {
- IP6_INC_STATS(dev_net(dev), idev,
- IPSTATS_MIB_OUTDISCARDS);
+ IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
kfree_skb(skb);
return 0;
}
- return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, sk, skb,
- NULL, dev,
+ return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
+ net, sk, skb, NULL, dev,
ip6_finish_output,
!(IP6CB(skb)->flags & IP6SKB_REROUTED));
}
/*
- * xmit an sk_buff (used by TCP, SCTP and DCCP)
+ * xmit an sk_buff (used by TCP, SCTP and DCCP)
+ * Note : socket lock is not held for SYNACK packets, but might be modified
+ * by calls to skb_set_owner_w() and ipv6_local_error(),
+ * which are using proper atomic operations or spinlocks.
*/
-
-int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
+int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
struct ipv6_txoptions *opt, int tclass)
{
struct net *net = sock_net(sk);
- struct ipv6_pinfo *np = inet6_sk(sk);
+ const struct ipv6_pinfo *np = inet6_sk(sk);
struct in6_addr *first_hop = &fl6->daddr;
struct dst_entry *dst = skb_dst(skb);
struct ipv6hdr *hdr;
}
consume_skb(skb);
skb = skb2;
- skb_set_owner_w(skb, sk);
+ /* skb_set_owner_w() changes sk->sk_wmem_alloc atomically,
+ * it is safe to call in our context (socket lock not held)
+ */
+ skb_set_owner_w(skb, (struct sock *)sk);
}
if (opt->opt_flen)
ipv6_push_frag_opts(skb, opt, &proto);
if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
IPSTATS_MIB_OUT, skb->len);
- return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, sk, skb,
- NULL, dst->dev, dst_output_sk);
+ /* hooks should never assume socket lock is held.
+ * we promote our socket to non const
+ */
+ return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
+ net, (struct sock *)sk, skb, NULL, dst->dev,
+ dst_output);
}
skb->dev = dst->dev;
- ipv6_local_error(sk, EMSGSIZE, fl6, mtu);
+ /* ipv6_local_error() does not require socket lock,
+ * we promote our socket to non const
+ */
+ ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
+
IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
kfree_skb(skb);
return -EMSGSIZE;
return 0;
}
-static inline int ip6_forward_finish(struct sock *sk, struct sk_buff *skb)
+static inline int ip6_forward_finish(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
{
skb_sender_cpu_clear(skb);
- return dst_output_sk(sk, skb);
+ return dst_output(net, sk, skb);
}
static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
- return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, NULL, skb,
- skb->dev, dst->dev,
+ return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
+ net, NULL, skb, skb->dev, dst->dev,
ip6_forward_finish);
error:
skb_copy_secmark(to, from);
}
-int ip6_fragment(struct sock *sk, struct sk_buff *skb,
- int (*output)(struct sock *, struct sk_buff *))
+int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
+ int (*output)(struct net *, struct sock *, struct sk_buff *))
{
struct sk_buff *frag;
struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
__be32 frag_id;
int ptr, offset = 0, err = 0;
u8 *prevhdr, nexthdr = 0;
- struct net *net = dev_net(skb_dst(skb)->dev);
hlen = ip6_find_1stfragopt(skb, &prevhdr);
nexthdr = *prevhdr;
if (np->frag_size)
mtu = np->frag_size;
}
- mtu -= hlen + sizeof(struct frag_hdr);
+
+ if (overflow_usub(mtu, hlen + sizeof(struct frag_hdr), &mtu) ||
+ mtu <= 7)
+ goto fail_toobig;
frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
&ipv6_hdr(skb)->saddr);
ip6_copy_metadata(frag, skb);
}
- err = output(sk, skb);
+ err = output(net, sk, skb);
if (!err)
IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
IPSTATS_MIB_FRAGCREATES);
/*
* Put this fragment into the sending queue.
*/
- err = output(sk, frag);
+ err = output(net, sk, frag);
if (err)
goto fail;
return dst;
}
-static int ip6_dst_lookup_tail(struct net *net, struct sock *sk,
+static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
struct dst_entry **dst, struct flowi6 *fl6)
{
#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
* It returns a valid dst pointer on success, or a pointer encoded
* error code.
*/
-struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
+struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
const struct in6_addr *final_dst)
{
struct dst_entry *dst = NULL;
if (final_dst)
fl6->daddr = *final_dst;
if (!fl6->flowi6_oif)
- fl6->flowi6_oif = dst->dev->ifindex;
+ fl6->flowi6_oif = l3mdev_fib_oif(dst->dev);
return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
}
struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
int err;
- err = ip6_local_out(skb);
+ err = ip6_local_out(net, skb->sk, skb);
if (err) {
if (err > 0)
err = net_xmit_errno(err);
#include <net/nexthop.h>
#include <net/lwtunnel.h>
#include <net/ip_tunnels.h>
+#include <net/l3mdev.h>
#include <asm/uaccess.h>
static int ip6_dst_gc(struct dst_ops *ops);
static int ip6_pkt_discard(struct sk_buff *skb);
-static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb);
+static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
static int ip6_pkt_prohibit(struct sk_buff *skb);
-static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb);
+static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
static void ip6_link_failure(struct sk_buff *skb);
static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb, u32 mtu);
.obsolete = DST_OBSOLETE_FORCE_CHK,
.error = -EINVAL,
.input = dst_discard,
- .output = dst_discard_sk,
+ .output = dst_discard_out,
},
.rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
.rt6i_protocol = RTPROT_KERNEL,
static int rt6_info_hash_nhsfn(unsigned int candidate_count,
const struct flowi6 *fl6)
{
- unsigned int val = fl6->flowi6_proto;
-
- val ^= ipv6_addr_hash(&fl6->daddr);
- val ^= ipv6_addr_hash(&fl6->saddr);
-
- /* Work only if this not encapsulated */
- switch (fl6->flowi6_proto) {
- case IPPROTO_UDP:
- case IPPROTO_TCP:
- case IPPROTO_SCTP:
- val ^= (__force u16)fl6->fl6_sport;
- val ^= (__force u16)fl6->fl6_dport;
- break;
-
- case IPPROTO_ICMPV6:
- val ^= (__force u16)fl6->fl6_icmp_type;
- val ^= (__force u16)fl6->fl6_icmp_code;
- break;
- }
- /* RFC6438 recommands to use flowlabel */
- val ^= (__force u32)fl6->flowlabel;
-
- /* Perhaps, we need to tune, this function? */
- val = val ^ (val >> 7) ^ (val >> 12);
- return val % candidate_count;
+ return get_hash_from_flowi6(fl6) % candidate_count;
}
static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
if (dev->flags & IFF_LOOPBACK) {
if (!sprt->rt6i_idev ||
sprt->rt6i_idev->dev->ifindex != oif) {
- if (flags & RT6_LOOKUP_F_IFACE && oif)
+ if (flags & RT6_LOOKUP_F_IFACE)
continue;
- if (local && (!oif ||
- local->rt6i_idev->dev->ifindex == oif))
+ if (local &&
+ local->rt6i_idev->dev->ifindex == oif)
continue;
}
local = sprt;
container_of(w, struct __rt6_probe_work, work);
addrconf_addr_solict_mult(&work->target, &mcaddr);
- ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL, NULL);
+ ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, NULL);
dev_put(work->dev);
kfree(work);
}
int flags = RT6_LOOKUP_F_HAS_SADDR;
struct ip_tunnel_info *tun_info;
struct flowi6 fl6 = {
- .flowi6_iif = skb->dev->ifindex,
+ .flowi6_iif = l3mdev_fib_oif(skb->dev),
.daddr = iph->daddr,
.saddr = iph->saddr,
.flowlabel = ip6_flowinfo(iph),
struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
struct flowi6 *fl6)
{
+ struct dst_entry *dst;
int flags = 0;
+ bool any_src;
+ dst = l3mdev_rt6_dst_by_oif(net, fl6);
+ if (dst)
+ return dst;
+
fl6->flowi6_iif = LOOPBACK_IFINDEX;
+ any_src = ipv6_addr_any(&fl6->saddr);
if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
- fl6->flowi6_oif)
+ (fl6->flowi6_oif && any_src))
flags |= RT6_LOOKUP_F_IFACE;
- if (!ipv6_addr_any(&fl6->saddr))
+ if (!any_src)
flags |= RT6_LOOKUP_F_HAS_SADDR;
else if (sk)
flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
new = &rt->dst;
new->__use = 1;
new->input = dst_discard;
- new->output = dst_discard_sk;
+ new->output = dst_discard_out;
dst_copy_metrics(new, &ort->dst);
rt->rt6i_idev = ort->rt6i_idev;
return -EINVAL;
}
-int ip6_route_info_create(struct fib6_config *cfg, struct rt6_info **rt_ret)
+static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
{
- int err;
struct net *net = cfg->fc_nlinfo.nl_net;
struct rt6_info *rt = NULL;
struct net_device *dev = NULL;
struct inet6_dev *idev = NULL;
struct fib6_table *table;
int addr_type;
+ int err = -EINVAL;
if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
- return -EINVAL;
+ goto out;
#ifndef CONFIG_IPV6_SUBTREES
if (cfg->fc_src_len)
- return -EINVAL;
+ goto out;
#endif
if (cfg->fc_ifindex) {
err = -ENODEV;
switch (cfg->fc_type) {
case RTN_BLACKHOLE:
rt->dst.error = -EINVAL;
- rt->dst.output = dst_discard_sk;
+ rt->dst.output = dst_discard_out;
rt->dst.input = dst_discard;
break;
case RTN_PROHIBIT:
cfg->fc_nlinfo.nl_net = dev_net(dev);
- *rt_ret = rt;
-
- return 0;
+ return rt;
out:
if (dev)
dev_put(dev);
if (rt)
dst_free(&rt->dst);
- *rt_ret = NULL;
-
- return err;
+ return ERR_PTR(err);
}
int ip6_route_add(struct fib6_config *cfg)
{
struct mx6_config mxc = { .mx = NULL, };
- struct rt6_info *rt = NULL;
+ struct rt6_info *rt;
int err;
- err = ip6_route_info_create(cfg, &rt);
- if (err)
+ rt = ip6_route_info_create(cfg);
+ if (IS_ERR(rt)) {
+ err = PTR_ERR(rt);
+ rt = NULL;
goto out;
+ }
err = ip6_convert_metrics(&mxc, cfg);
if (err)
unsigned int pref)
{
struct fib6_config cfg = {
- .fc_table = RT6_TABLE_INFO,
.fc_metric = IP6_RT_PRIO_USER,
.fc_ifindex = ifindex,
.fc_dst_len = prefixlen,
.fc_nlinfo.nl_net = net,
};
+ cfg.fc_table = l3mdev_fib_table_by_index(net, ifindex) ? : RT6_TABLE_INFO;
cfg.fc_dst = *prefix;
cfg.fc_gateway = *gwaddr;
unsigned int pref)
{
struct fib6_config cfg = {
- .fc_table = RT6_TABLE_DFLT,
+ .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
.fc_metric = IP6_RT_PRIO_USER,
.fc_ifindex = dev->ifindex,
.fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
{
memset(cfg, 0, sizeof(*cfg));
- cfg->fc_table = RT6_TABLE_MAIN;
+ cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
+ : RT6_TABLE_MAIN;
cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
cfg->fc_metric = rtmsg->rtmsg_metric;
cfg->fc_expires = rtmsg->rtmsg_info;
return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
}
-static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb)
+static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
{
skb->dev = skb_dst(skb)->dev;
return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
}
-static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb)
+static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
{
skb->dev = skb_dst(skb)->dev;
return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
const struct in6_addr *addr,
bool anycast)
{
+ u32 tb_id;
struct net *net = dev_net(idev->dev);
struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
DST_NOCOUNT);
rt->rt6i_gateway = *addr;
rt->rt6i_dst.addr = *addr;
rt->rt6i_dst.plen = 128;
- rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
+ tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
+ rt->rt6i_table = fib6_get_table(net, tb_id);
rt->dst.flags |= DST_NOCACHE;
atomic_set(&rt->dst.__refcnt, 1);
r_cfg.fc_encap_type = nla_get_u16(nla);
}
- err = ip6_route_info_create(&r_cfg, &rt);
- if (err)
+ rt = ip6_route_info_create(&r_cfg);
+ if (IS_ERR(rt)) {
+ err = PTR_ERR(rt);
+ rt = NULL;
goto cleanup;
+ }
err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
if (err) {
} else {
fl6.flowi6_oif = oif;
+ if (netif_index_is_l3_master(net, oif)) {
+ fl6.flowi6_flags = FLOWI_FLAG_L3MDEV_SRC |
+ FLOWI_FLAG_SKIP_NH_OIF;
+ }
+
rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
}
if (!skb->ignore_df && skb->len > mtu) {
skb->dev = dst->dev;
+ skb->protocol = htons(ETH_P_IPV6);
if (xfrm6_local_dontfrag(skb))
xfrm6_local_rxpmtu(skb, mtu);
return xfrm_output(sk, skb);
}
-static int __xfrm6_output(struct sock *sk, struct sk_buff *skb)
+static int __xfrm6_output_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
+{
+ struct xfrm_state *x = skb_dst(skb)->xfrm;
+
+ return x->outer_mode->afinfo->output_finish(sk, skb);
+}
+
+static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
struct xfrm_state *x = dst->xfrm;
int mtu;
+ bool toobig;
#ifdef CONFIG_NETFILTER
if (!x) {
IP6CB(skb)->flags |= IP6SKB_REROUTED;
- return dst_output_sk(sk, skb);
+ return dst_output(net, sk, skb);
}
#endif
+ if (x->props.mode != XFRM_MODE_TUNNEL)
+ goto skip_frag;
+
if (skb->protocol == htons(ETH_P_IPV6))
mtu = ip6_skb_dst_mtu(skb);
else
mtu = dst_mtu(skb_dst(skb));
- if (skb->len > mtu && xfrm6_local_dontfrag(skb)) {
+ toobig = skb->len > mtu && !skb_is_gso(skb);
+
+ if (toobig && xfrm6_local_dontfrag(skb)) {
xfrm6_local_rxpmtu(skb, mtu);
return -EMSGSIZE;
- } else if (!skb->ignore_df && skb->len > mtu && skb->sk) {
+ } else if (!skb->ignore_df && toobig && skb->sk) {
xfrm_local_error(skb, mtu);
return -EMSGSIZE;
}
- if (x->props.mode == XFRM_MODE_TUNNEL &&
- ((skb->len > mtu && !skb_is_gso(skb)) ||
- dst_allfrag(skb_dst(skb)))) {
+ if (toobig || dst_allfrag(skb_dst(skb)))
- return ip6_fragment(sk, skb,
- x->outer_mode->afinfo->output_finish);
+ return ip6_fragment(net, sk, skb,
+ __xfrm6_output_finish);
- }
+
+ skip_frag:
return x->outer_mode->afinfo->output_finish(sk, skb);
}
-int xfrm6_output(struct sock *sk, struct sk_buff *skb)
+int xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
- return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, sk, skb,
- NULL, skb_dst(skb)->dev, __xfrm6_output,
+ return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
+ net, sk, skb, NULL, skb_dst(skb)->dev,
+ __xfrm6_output,
!(IP6CB(skb)->flags & IP6SKB_REROUTED));
}
#include <net/ip.h>
#include <net/ipv6.h>
#include <net/ip6_route.h>
-#include <net/vrf.h>
+#include <net/l3mdev.h>
#if IS_ENABLED(CONFIG_IPV6_MIP6)
#include <net/mip6.h>
#endif
nexthdr = nh[nhoff];
- if (skb_dst(skb)) {
- oif = vrf_master_ifindex(skb_dst(skb)->dev) ?
- : skb_dst(skb)->dev->ifindex;
- }
+ if (skb_dst(skb))
+ oif = l3mdev_fib_oif(skb_dst(skb)->dev);
memset(fl6, 0, sizeof(struct flowi6));
fl6->flowi6_mark = skb->mark;
return;
case IPPROTO_ICMPV6:
- if (!onlyproto && pskb_may_pull(skb, nh + offset + 2 - skb->data)) {
+ if (!onlyproto && (nh + offset + 2 < skb->data ||
+ pskb_may_pull(skb, nh + offset + 2 - skb->data))) {
u8 *icmp;
nh = skb_network_header(skb);
#if IS_ENABLED(CONFIG_IPV6_MIP6)
case IPPROTO_MH:
offset += ipv6_optlen(exthdr);
- if (!onlyproto && pskb_may_pull(skb, nh + offset + 3 - skb->data)) {
+ if (!onlyproto && (nh + offset + 3 < skb->data ||
+ pskb_may_pull(skb, nh + offset + 3 - skb->data))) {
struct ip6_mh *mh;
nh = skb_network_header(skb);
#endif
synchronize_net();
nf_queue_nf_hook_drop(net, &entry->ops);
+ /* other cpu might still process nfqueue verdict that used reg */
+ synchronize_net();
kfree(entry);
}
EXPORT_SYMBOL(nf_unregister_net_hook);
/* Optimization: we don't need to hold module
reference here, since function can't sleep. --RR */
repeat:
- verdict = (*elemp)->hook(*elemp, skb, state);
+ verdict = (*elemp)->hook((*elemp)->priv, skb, state);
if (verdict != NF_ACCEPT) {
#ifdef CONFIG_NETFILTER_DEBUG
if (unlikely((verdict & NF_VERDICT_MASK)
int err = nf_queue(skb, elem, state,
verdict >> NF_VERDICT_QBITS);
if (err < 0) {
- if (err == -ECANCELED)
- goto next_hook;
if (err == -ESRCH &&
(verdict & NF_VERDICT_FLAG_QUEUE_BYPASS))
goto next_hook;
}
EXPORT_SYMBOL(skb_make_writable);
+/* This needs to be compiled in any case to avoid dependencies between the
+ * nfnetlink_queue code and nf_conntrack.
+ */
+struct nfnl_ct_hook __rcu *nfnl_ct_hook __read_mostly;
+EXPORT_SYMBOL_GPL(nfnl_ct_hook);
+
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
/* This does not belong here, but locally generated errors need it if connection
tracking in use: without this, connection may not be in hash table, and hence
}
EXPORT_SYMBOL(nf_conntrack_destroy);
-struct nfq_ct_hook __rcu *nfq_ct_hook __read_mostly;
-EXPORT_SYMBOL_GPL(nfq_ct_hook);
-
/* Built-in default zone used e.g. by modules. */
const struct nf_conntrack_zone nf_ct_zone_dflt = {
.id = NF_CT_DEFAULT_ZONE_ID,
return 0;
}
-static int ovs_vport_output(struct sock *sock, struct sk_buff *skb)
+static int ovs_vport_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct ovs_frag_data *data = this_cpu_ptr(&ovs_frag_data_storage);
struct vport *vport = data->vport;
skb_pull(skb, hlen);
}
-static void ovs_fragment(struct vport *vport, struct sk_buff *skb, u16 mru,
- __be16 ethertype)
+static void ovs_fragment(struct net *net, struct vport *vport,
+ struct sk_buff *skb, u16 mru, __be16 ethertype)
{
if (skb_network_offset(skb) > MAX_L2_LEN) {
OVS_NLERR(1, "L2 header too long to fragment");
skb_dst_set_noref(skb, &ovs_dst);
IPCB(skb)->frag_max_size = mru;
- ip_do_fragment(skb->sk, skb, ovs_vport_output);
+ ip_do_fragment(net, skb->sk, skb, ovs_vport_output);
refdst_drop(orig_dst);
} else if (ethertype == htons(ETH_P_IPV6)) {
const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops();
skb_dst_set_noref(skb, &ovs_rt.dst);
IP6CB(skb)->frag_max_size = mru;
- v6ops->fragment(skb->sk, skb, ovs_vport_output);
+ v6ops->fragment(net, skb->sk, skb, ovs_vport_output);
refdst_drop(orig_dst);
} else {
WARN_ONCE(1, "Failed fragment ->%s: eth=%04x, MRU=%d, MTU=%d.",
if (likely(!mru || (skb->len <= mru + ETH_HLEN))) {
ovs_vport_send(vport, skb);
} else if (mru <= vport->dev->mtu) {
+ struct net *net = read_pnet(&dp->net);
__be16 ethertype = key->eth.type;
if (!is_flow_key_valid(key)) {
ethertype = vlan_get_protocol(skb);
}
- ovs_fragment(vport, skb, mru, ethertype);
+ ovs_fragment(net, vport, skb, mru, ethertype);
} else {
kfree_skb(skb);
}
struct sw_flow_key *key, const struct nlattr *attr,
const struct nlattr *actions, int actions_len)
{
- struct ip_tunnel_info info;
struct dp_upcall_info upcall;
const struct nlattr *a;
int rem;
if (vport) {
int err;
- upcall.egress_tun_info = &info;
- err = ovs_vport_get_egress_tun_info(vport, skb,
- &upcall);
- if (err)
- upcall.egress_tun_info = NULL;
+ err = dev_fill_metadata_dst(vport->dev, skb);
+ if (!err)
+ upcall.egress_tun_info = skb_tunnel_info(skb);
}
break;
ct = nf_ct_get(skb, &ctinfo);
if (ct) {
state = ovs_ct_get_state(ctinfo);
+ if (!nf_ct_is_confirmed(ct))
+ state |= OVS_CS_F_NEW;
if (ct->master)
state |= OVS_CS_F_RELATED;
zone = nf_ct_zone(ct);
struct nf_conn *ct;
int err;
- if (!IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS))
- return -ENOTSUPP;
-
/* The connection could be invalid, in which case set_label is no-op.*/
ct = nf_ct_get(skb, &ctinfo);
if (!ct)
int err;
memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
- err = ip_defrag(skb, user);
+ err = ip_defrag(net, skb, user);
if (err)
return err;
struct sk_buff *reasm;
memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm));
- reasm = nf_ct_frag6_gather(skb, user);
+ reasm = nf_ct_frag6_gather(net, skb, user);
if (!reasm)
return -EINPROGRESS;
{
struct nf_conntrack_tuple tuple;
- if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), proto, &tuple))
+ if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), proto, net, &tuple))
return NULL;
return __nf_ct_expect_find(net, zone, &tuple);
}
return true;
}
- static int __ovs_ct_lookup(struct net *net, const struct sw_flow_key *key,
+ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
const struct ovs_conntrack_info *info,
struct sk_buff *skb)
{
}
}
+ ovs_ct_update_key(skb, key, true);
+
return 0;
}
err = __ovs_ct_lookup(net, key, info, skb);
if (err)
return err;
-
- ovs_ct_update_key(skb, key, true);
}
return 0;
if (nf_conntrack_confirm(skb) != NF_ACCEPT)
return -EINVAL;
- ovs_ct_update_key(skb, key, true);
-
return 0;
}
case OVS_CT_ATTR_MARK: {
struct md_mark *mark = nla_data(a);
+ if (!mark->mask) {
+ OVS_NLERR(log, "ct_mark mask cannot be 0");
+ return -EINVAL;
+ }
info->mark = *mark;
break;
}
case OVS_CT_ATTR_LABELS: {
struct md_labels *labels = nla_data(a);
+ if (!labels_nonzero(&labels->mask)) {
+ OVS_NLERR(log, "ct_labels mask cannot be 0");
+ return -EINVAL;
+ }
info->labels = *labels;
break;
}
if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) &&
nla_put_u16(skb, OVS_CT_ATTR_ZONE, ct_info->zone.id))
return -EMSGSIZE;
- if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) &&
+ if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) && ct_info->mark.mask &&
nla_put(skb, OVS_CT_ATTR_MARK, sizeof(ct_info->mark),
&ct_info->mark))
return -EMSGSIZE;
if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
+ labels_nonzero(&ct_info->labels.mask) &&
nla_put(skb, OVS_CT_ATTR_LABELS, sizeof(ct_info->labels),
&ct_info->labels))
return -EMSGSIZE;
static void ovs_notify(struct genl_family *family,
struct sk_buff *skb, struct genl_info *info)
{
- genl_notify(family, skb, genl_info_net(info), info->snd_portid,
- 0, info->nlhdr, GFP_KERNEL);
+ genl_notify(family, skb, info, 0, GFP_KERNEL);
}
/**
if (upcall_info->egress_tun_info) {
nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_EGRESS_TUN_KEY);
- err = ovs_nla_put_egress_tunnel_key(user_skb,
- upcall_info->egress_tun_info,
- upcall_info->egress_tun_opts);
+ err = ovs_nla_put_tunnel_info(user_skb,
+ upcall_info->egress_tun_info);
BUG_ON(err);
nla_nest_end(user_skb, nla);
}
* updating this function.
*/
return nla_total_size(8) /* OVS_TUNNEL_KEY_ATTR_ID */
- + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_SRC */
- + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_DST */
+ + nla_total_size(16) /* OVS_TUNNEL_KEY_ATTR_IPV[46]_SRC */
+ + nla_total_size(16) /* OVS_TUNNEL_KEY_ATTR_IPV[46]_DST */
+ nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TOS */
+ nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TTL */
+ nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */
[OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS] = { .len = OVS_ATTR_VARIABLE },
[OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS] = { .len = OVS_ATTR_NESTED,
.next = ovs_vxlan_ext_key_lens },
+ [OVS_TUNNEL_KEY_ATTR_IPV6_SRC] = { .len = sizeof(struct in6_addr) },
+ [OVS_TUNNEL_KEY_ATTR_IPV6_DST] = { .len = sizeof(struct in6_addr) },
};
/* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */
return 0;
}
-static int ipv4_tun_from_nlattr(const struct nlattr *attr,
- struct sw_flow_match *match, bool is_mask,
- bool log)
+static int ip_tun_from_nlattr(const struct nlattr *attr,
+ struct sw_flow_match *match, bool is_mask,
+ bool log)
{
- struct nlattr *a;
- int rem;
- bool ttl = false;
+ bool ttl = false, ipv4 = false, ipv6 = false;
__be16 tun_flags = 0;
int opts_type = 0;
+ struct nlattr *a;
+ int rem;
nla_for_each_nested(a, attr, rem) {
int type = nla_type(a);
case OVS_TUNNEL_KEY_ATTR_IPV4_SRC:
SW_FLOW_KEY_PUT(match, tun_key.u.ipv4.src,
nla_get_in_addr(a), is_mask);
+ ipv4 = true;
break;
case OVS_TUNNEL_KEY_ATTR_IPV4_DST:
SW_FLOW_KEY_PUT(match, tun_key.u.ipv4.dst,
nla_get_in_addr(a), is_mask);
+ ipv4 = true;
+ break;
+ case OVS_TUNNEL_KEY_ATTR_IPV6_SRC:
+ SW_FLOW_KEY_PUT(match, tun_key.u.ipv6.dst,
+ nla_get_in6_addr(a), is_mask);
+ ipv6 = true;
+ break;
+ case OVS_TUNNEL_KEY_ATTR_IPV6_DST:
+ SW_FLOW_KEY_PUT(match, tun_key.u.ipv6.dst,
+ nla_get_in6_addr(a), is_mask);
+ ipv6 = true;
break;
case OVS_TUNNEL_KEY_ATTR_TOS:
SW_FLOW_KEY_PUT(match, tun_key.tos,
opts_type = type;
break;
default:
- OVS_NLERR(log, "Unknown IPv4 tunnel attribute %d",
+ OVS_NLERR(log, "Unknown IP tunnel attribute %d",
type);
return -EINVAL;
}
}
SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask);
+ if (is_mask)
+ SW_FLOW_KEY_MEMSET_FIELD(match, tun_proto, 0xff, true);
+ else
+ SW_FLOW_KEY_PUT(match, tun_proto, ipv6 ? AF_INET6 : AF_INET,
+ false);
if (rem > 0) {
- OVS_NLERR(log, "IPv4 tunnel attribute has %d unknown bytes.",
+ OVS_NLERR(log, "IP tunnel attribute has %d unknown bytes.",
rem);
return -EINVAL;
}
+ if (ipv4 && ipv6) {
+ OVS_NLERR(log, "Mixed IPv4 and IPv6 tunnel attributes");
+ return -EINVAL;
+ }
+
if (!is_mask) {
- if (!match->key->tun_key.u.ipv4.dst) {
+ if (!ipv4 && !ipv6) {
+ OVS_NLERR(log, "IP tunnel dst address not specified");
+ return -EINVAL;
+ }
+ if (ipv4 && !match->key->tun_key.u.ipv4.dst) {
OVS_NLERR(log, "IPv4 tunnel dst address is zero");
return -EINVAL;
}
+ if (ipv6 && ipv6_addr_any(&match->key->tun_key.u.ipv6.dst)) {
+ OVS_NLERR(log, "IPv6 tunnel dst address is zero");
+ return -EINVAL;
+ }
if (!ttl) {
- OVS_NLERR(log, "IPv4 tunnel TTL not specified.");
+ OVS_NLERR(log, "IP tunnel TTL not specified.");
return -EINVAL;
}
}
return 0;
}
-static int __ipv4_tun_to_nlattr(struct sk_buff *skb,
- const struct ip_tunnel_key *output,
- const void *tun_opts, int swkey_tun_opts_len)
+static int __ip_tun_to_nlattr(struct sk_buff *skb,
+ const struct ip_tunnel_key *output,
+ const void *tun_opts, int swkey_tun_opts_len,
+ unsigned short tun_proto)
{
if (output->tun_flags & TUNNEL_KEY &&
nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id))
return -EMSGSIZE;
- if (output->u.ipv4.src &&
- nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC,
- output->u.ipv4.src))
- return -EMSGSIZE;
- if (output->u.ipv4.dst &&
- nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST,
- output->u.ipv4.dst))
- return -EMSGSIZE;
+ switch (tun_proto) {
+ case AF_INET:
+ if (output->u.ipv4.src &&
+ nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC,
+ output->u.ipv4.src))
+ return -EMSGSIZE;
+ if (output->u.ipv4.dst &&
+ nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST,
+ output->u.ipv4.dst))
+ return -EMSGSIZE;
+ break;
+ case AF_INET6:
+ if (!ipv6_addr_any(&output->u.ipv6.src) &&
+ nla_put_in6_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV6_SRC,
+ &output->u.ipv6.src))
+ return -EMSGSIZE;
+ if (!ipv6_addr_any(&output->u.ipv6.dst) &&
+ nla_put_in6_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV6_DST,
+ &output->u.ipv6.dst))
+ return -EMSGSIZE;
+ break;
+ }
if (output->tos &&
nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->tos))
return -EMSGSIZE;
if ((output->tun_flags & TUNNEL_OAM) &&
nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM))
return -EMSGSIZE;
- if (tun_opts) {
+ if (swkey_tun_opts_len) {
if (output->tun_flags & TUNNEL_GENEVE_OPT &&
nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS,
swkey_tun_opts_len, tun_opts))
return 0;
}
-static int ipv4_tun_to_nlattr(struct sk_buff *skb,
- const struct ip_tunnel_key *output,
- const void *tun_opts, int swkey_tun_opts_len)
+static int ip_tun_to_nlattr(struct sk_buff *skb,
+ const struct ip_tunnel_key *output,
+ const void *tun_opts, int swkey_tun_opts_len,
+ unsigned short tun_proto)
{
struct nlattr *nla;
int err;
if (!nla)
return -EMSGSIZE;
- err = __ipv4_tun_to_nlattr(skb, output, tun_opts, swkey_tun_opts_len);
+ err = __ip_tun_to_nlattr(skb, output, tun_opts, swkey_tun_opts_len,
+ tun_proto);
if (err)
return err;
return 0;
}
- int ovs_nla_put_egress_tunnel_key(struct sk_buff *skb,
- const struct ip_tunnel_info *egress_tun_info,
- const void *egress_tun_opts)
+ int ovs_nla_put_tunnel_info(struct sk_buff *skb,
+ struct ip_tunnel_info *tun_info)
{
- return __ip_tun_to_nlattr(skb, &egress_tun_info->key,
- egress_tun_opts,
- egress_tun_info->options_len,
- ip_tunnel_info_af(egress_tun_info));
- return __ipv4_tun_to_nlattr(skb, &tun_info->key,
- ip_tunnel_info_opts(tun_info),
- tun_info->options_len);
++ return __ip_tun_to_nlattr(skb, &tun_info->key,
++ ip_tunnel_info_opts(tun_info),
++ tun_info->options_len,
++ ip_tunnel_info_af(tun_info));
}
static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match,
*attrs &= ~(1 << OVS_KEY_ATTR_SKB_MARK);
}
if (*attrs & (1 << OVS_KEY_ATTR_TUNNEL)) {
- if (ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match,
- is_mask, log) < 0)
+ if (ip_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match,
+ is_mask, log) < 0)
return -EINVAL;
*attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL);
}
ovs_ct_verify(net, OVS_KEY_ATTR_CT_STATE)) {
u32 ct_state = nla_get_u32(a[OVS_KEY_ATTR_CT_STATE]);
- if (!is_mask && !ovs_ct_state_supported(ct_state)) {
+ if (ct_state & ~CT_SUPPORTED_MASK) {
OVS_NLERR(log, "ct_state flags %08x unsupported",
ct_state);
return -EINVAL;
} else {
memset(nla_data(nla), val, nla_len(nla));
}
+
+ if (nla_type(nla) == OVS_KEY_ATTR_CT_STATE)
+ *(u32 *)nla_data(nla) &= CT_SUPPORTED_MASK;
}
}
/* The userspace does not send tunnel attributes that
* are 0, but we should not wildcard them nonetheless.
*/
- if (match->key->tun_key.u.ipv4.dst)
+ if (match->key->tun_proto)
SW_FLOW_KEY_MEMSET_FIELD(match, tun_key,
0xff, true);
if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority))
goto nla_put_failure;
- if ((swkey->tun_key.u.ipv4.dst || is_mask)) {
+ if ((swkey->tun_proto || is_mask)) {
const void *opts = NULL;
if (output->tun_key.tun_flags & TUNNEL_OPTIONS_PRESENT)
opts = TUN_METADATA_OPTS(output, swkey->tun_opts_len);
- if (ipv4_tun_to_nlattr(skb, &output->tun_key, opts,
- swkey->tun_opts_len))
+ if (ip_tun_to_nlattr(skb, &output->tun_key, opts,
+ swkey->tun_opts_len, swkey->tun_proto))
goto nla_put_failure;
}
int err = 0, start, opts_type;
ovs_match_init(&match, &key, NULL);
- opts_type = ipv4_tun_from_nlattr(nla_data(attr), &match, false, log);
+ opts_type = ip_tun_from_nlattr(nla_data(attr), &match, false, log);
if (opts_type < 0)
return opts_type;
tun_info = &tun_dst->u.tun_info;
tun_info->mode = IP_TUNNEL_INFO_TX;
+ if (key.tun_proto == AF_INET6)
+ tun_info->mode |= IP_TUNNEL_INFO_IPV6;
tun_info->key = key.tun_key;
/* We need to store the options in the action itself since
if (!start)
return -EMSGSIZE;
- err = ip_tun_to_nlattr(skb, &tun_info->key,
- tun_info->options_len ?
- ip_tunnel_info_opts(tun_info) : NULL,
- tun_info->options_len,
- ip_tunnel_info_af(tun_info));
+ err = ovs_nla_put_tunnel_info(skb, tun_info);
if (err)
return err;
nla_nest_end(skb, start);
return 0;
}
- static int geneve_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
- struct dp_upcall_info *upcall)
- {
- struct geneve_port *geneve_port = geneve_vport(vport);
- struct net *net = ovs_dp_get_net(vport->dp);
- __be16 dport = htons(geneve_port->port_no);
- __be16 sport = udp_flow_src_port(net, skb, 1, USHRT_MAX, true);
-
- return ovs_tunnel_get_egress_info(upcall, ovs_dp_get_net(vport->dp),
- skb, IPPROTO_UDP, sport, dport);
- }
-
static struct vport *geneve_tnl_create(const struct vport_parms *parms)
{
struct net *net = ovs_dp_get_net(parms->dp);
.create = geneve_create,
.destroy = ovs_netdev_tunnel_destroy,
.get_options = geneve_get_options,
- .send = ovs_netdev_send,
+ .send = dev_queue_xmit,
.owner = THIS_MODULE,
- .get_egress_tun_info = geneve_get_egress_tun_info,
};
static int __init ovs_geneve_tnl_init(void)
return ovs_netdev_link(vport, parms->name);
}
- static int gre_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
- struct dp_upcall_info *upcall)
- {
- return ovs_tunnel_get_egress_info(upcall, ovs_dp_get_net(vport->dp),
- skb, IPPROTO_GRE, 0, 0);
- }
-
static struct vport_ops ovs_gre_vport_ops = {
.type = OVS_VPORT_TYPE_GRE,
.create = gre_create,
- .send = ovs_netdev_send,
+ .send = dev_queue_xmit,
- .get_egress_tun_info = gre_get_egress_tun_info,
.destroy = ovs_netdev_tunnel_destroy,
.owner = THIS_MODULE,
};
free_netdev(dev);
}
+ static struct rtnl_link_stats64 *
+ internal_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
+ {
+ int i;
+
+ memset(stats, 0, sizeof(*stats));
+ stats->rx_errors = dev->stats.rx_errors;
+ stats->tx_errors = dev->stats.tx_errors;
+ stats->tx_dropped = dev->stats.tx_dropped;
+ stats->rx_dropped = dev->stats.rx_dropped;
+
+ for_each_possible_cpu(i) {
+ const struct pcpu_sw_netstats *percpu_stats;
+ struct pcpu_sw_netstats local_stats;
+ unsigned int start;
+
+ percpu_stats = per_cpu_ptr(dev->tstats, i);
+
+ do {
+ start = u64_stats_fetch_begin_irq(&percpu_stats->syncp);
+ local_stats = *percpu_stats;
+ } while (u64_stats_fetch_retry_irq(&percpu_stats->syncp, start));
+
+ stats->rx_bytes += local_stats.rx_bytes;
+ stats->rx_packets += local_stats.rx_packets;
+ stats->tx_bytes += local_stats.tx_bytes;
+ stats->tx_packets += local_stats.tx_packets;
+ }
+
+ return stats;
+ }
+
static const struct net_device_ops internal_dev_netdev_ops = {
.ndo_open = internal_dev_open,
.ndo_stop = internal_dev_stop,
.ndo_start_xmit = internal_dev_xmit,
.ndo_set_mac_address = eth_mac_addr,
.ndo_change_mtu = internal_dev_change_mtu,
+ .ndo_get_stats64 = internal_get_stats,
};
static struct rtnl_link_ops internal_dev_link_ops __read_mostly = {
err = -ENOMEM;
goto error_free_vport;
}
+ vport->dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
+ if (!vport->dev->tstats) {
+ err = -ENOMEM;
+ goto error_free_netdev;
+ }
dev_net_set(vport->dev, ovs_dp_get_net(vport->dp));
internal_dev = internal_dev_priv(vport->dev);
rtnl_lock();
err = register_netdevice(vport->dev);
if (err)
- goto error_free_netdev;
+ goto error_unlock;
dev_set_promiscuity(vport->dev, 1);
rtnl_unlock();
return vport;
- error_free_netdev:
+ error_unlock:
rtnl_unlock();
+ free_percpu(vport->dev->tstats);
+ error_free_netdev:
free_netdev(vport->dev);
error_free_vport:
ovs_vport_free(vport);
/* unregister_netdevice() waits for an RCU grace period. */
unregister_netdevice(vport->dev);
-
+ free_percpu(vport->dev->tstats);
rtnl_unlock();
}
-static void internal_dev_recv(struct vport *vport, struct sk_buff *skb)
+static netdev_tx_t internal_dev_recv(struct sk_buff *skb)
{
- struct net_device *netdev = vport->dev;
+ struct net_device *netdev = skb->dev;
struct pcpu_sw_netstats *stats;
if (unlikely(!(netdev->flags & IFF_UP))) {
kfree_skb(skb);
netdev->stats.rx_dropped++;
- return;
+ return NETDEV_TX_OK;
}
skb_dst_drop(skb);
nf_reset(skb);
secpath_reset(skb);
- skb->dev = netdev;
skb->pkt_type = PACKET_HOST;
skb->protocol = eth_type_trans(skb, netdev);
skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
u64_stats_update_end(&stats->syncp);
netif_rx(skb);
+ return NETDEV_TX_OK;
}
static struct vport_ops ovs_internal_vport_ops = {
return ovs_netdev_link(vport, parms->name);
}
- static int vxlan_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
- struct dp_upcall_info *upcall)
- {
- struct vxlan_dev *vxlan = netdev_priv(vport->dev);
- struct net *net = ovs_dp_get_net(vport->dp);
- unsigned short family = ip_tunnel_info_af(upcall->egress_tun_info);
- __be16 dst_port = vxlan_dev_dst_port(vxlan, family);
- __be16 src_port;
- int port_min;
- int port_max;
-
- inet_get_local_port_range(net, &port_min, &port_max);
- src_port = udp_flow_src_port(net, skb, 0, 0, true);
-
- return ovs_tunnel_get_egress_info(upcall, net,
- skb, IPPROTO_UDP,
- src_port, dst_port);
- }
-
static struct vport_ops ovs_vxlan_netdev_vport_ops = {
.type = OVS_VPORT_TYPE_VXLAN,
.create = vxlan_create,
.destroy = ovs_netdev_tunnel_destroy,
.get_options = vxlan_get_options,
- .send = ovs_netdev_send,
+ .send = dev_queue_xmit,
- .get_egress_tun_info = vxlan_get_egress_tun_info,
};
static int __init ovs_vxlan_tnl_init(void)
call_rcu(&vport->rcu, free_vport_rcu);
}
EXPORT_SYMBOL_GPL(ovs_vport_deferred_free);
- int ovs_tunnel_get_egress_info(struct dp_upcall_info *upcall,
- struct net *net,
- struct sk_buff *skb,
- u8 ipproto,
- __be16 tp_src,
- __be16 tp_dst)
- {
- struct ip_tunnel_info *egress_tun_info = upcall->egress_tun_info;
- const struct ip_tunnel_info *tun_info = skb_tunnel_info(skb);
- const struct ip_tunnel_key *tun_key;
- u32 skb_mark = skb->mark;
- struct rtable *rt;
- struct flowi4 fl;
-
- if (unlikely(!tun_info))
- return -EINVAL;
- if (ip_tunnel_info_af(tun_info) != AF_INET)
- return -EINVAL;
-
- tun_key = &tun_info->key;
-
- /* Route lookup to get srouce IP address.
- * The process may need to be changed if the corresponding process
- * in vports ops changed.
- */
- rt = ovs_tunnel_route_lookup(net, tun_key, skb_mark, &fl, ipproto);
- if (IS_ERR(rt))
- return PTR_ERR(rt);
-
- ip_rt_put(rt);
-
- /* Generate egress_tun_info based on tun_info,
- * saddr, tp_src and tp_dst
- */
- ip_tunnel_key_init(&egress_tun_info->key,
- fl.saddr, tun_key->u.ipv4.dst,
- tun_key->tos,
- tun_key->ttl,
- tp_src, tp_dst,
- tun_key->tun_id,
- tun_key->tun_flags);
- egress_tun_info->options_len = tun_info->options_len;
- egress_tun_info->mode = tun_info->mode;
- upcall->egress_tun_opts = ip_tunnel_info_opts(egress_tun_info);
- return 0;
- }
- EXPORT_SYMBOL_GPL(ovs_tunnel_get_egress_info);
-
- int ovs_vport_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
- struct dp_upcall_info *upcall)
- {
- /* get_egress_tun_info() is only implemented on tunnel ports. */
- if (unlikely(!vport->ops->get_egress_tun_info))
- return -EINVAL;
-
- return vport->ops->get_egress_tun_info(vport, skb, upcall);
- }
-
+
+static unsigned int packet_length(const struct sk_buff *skb)
+{
+ unsigned int length = skb->len - ETH_HLEN;
+
+ if (skb->protocol == htons(ETH_P_8021Q))
+ length -= VLAN_HLEN;
+
+ return length;
+}
+
+void ovs_vport_send(struct vport *vport, struct sk_buff *skb)
+{
+ int mtu = vport->dev->mtu;
+
+ if (unlikely(packet_length(skb) > mtu && !skb_is_gso(skb))) {
+ net_warn_ratelimited("%s: dropped over-mtu packet: %d > %d\n",
+ vport->dev->name,
+ packet_length(skb), mtu);
+ vport->dev->stats.tx_errors++;
+ goto drop;
+ }
+
+ skb->dev = vport->dev;
+ vport->ops->send(skb);
+ return;
+
+drop:
+ kfree_skb(skb);
+}
#include <linux/skbuff.h>
#include <linux/spinlock.h>
#include <linux/u64_stats_sync.h>
- #include <net/route.h>
#include "datapath.h"
int ovs_vport_get_upcall_portids(const struct vport *, struct sk_buff *);
u32 ovs_vport_find_upcall_portid(const struct vport *, struct sk_buff *);
- int ovs_tunnel_get_egress_info(struct dp_upcall_info *upcall,
- struct net *net,
- struct sk_buff *,
- u8 ipproto,
- __be16 tp_src,
- __be16 tp_dst);
-
- int ovs_vport_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
- struct dp_upcall_info *upcall);
-
/**
* struct vport_portids - array of netlink portids of a vport.
* must be protected by rcu.
* have any configuration.
* @send: Send a packet on the device.
* zero for dropped packets or negative for error.
- * @get_egress_tun_info: Get the egress tunnel 5-tuple and other info for
- * a packet.
*/
struct vport_ops {
enum ovs_vport_type type;
int (*set_options)(struct vport *, struct nlattr *);
int (*get_options)(const struct vport *, struct sk_buff *);
- void (*send)(struct vport *, struct sk_buff *);
+ netdev_tx_t (*send) (struct sk_buff *skb);
- int (*get_egress_tun_info)(struct vport *, struct sk_buff *,
- struct dp_upcall_info *upcall);
-
struct module *owner;
struct list_head list;
};
int ovs_vport_ops_register(struct vport_ops *ops);
void ovs_vport_ops_unregister(struct vport_ops *ops);
-static inline void ovs_vport_send(struct vport *vport, struct sk_buff *skb)
+static inline struct rtable *ovs_tunnel_route_lookup(struct net *net,
+ const struct ip_tunnel_key *key,
+ u32 mark,
+ struct flowi4 *fl,
+ u8 protocol)
{
- vport->ops->send(vport, skb);
+ struct rtable *rt;
+
+ memset(fl, 0, sizeof(*fl));
+ fl->daddr = key->u.ipv4.dst;
+ fl->saddr = key->u.ipv4.src;
+ fl->flowi4_tos = RT_TOS(key->tos);
+ fl->flowi4_mark = mark;
+ fl->flowi4_proto = protocol;
+
+ rt = ip_route_output_key(net, fl);
+ return rt;
}
+void ovs_vport_send(struct vport *vport, struct sk_buff *skb);
+
#endif /* vport.h */
{
struct sk_buff *head = *headbuf;
struct sk_buff *frag = *buf;
- struct sk_buff *tail;
+ struct sk_buff *tail = NULL;
struct tipc_msg *msg;
u32 fragid;
int delta;
if (unlikely(skb_unclone(frag, GFP_ATOMIC)))
goto err;
head = *headbuf = frag;
- skb_frag_list_init(head);
- TIPC_SKB_CB(head)->tail = NULL;
*buf = NULL;
+ TIPC_SKB_CB(head)->tail = NULL;
+ if (skb_is_nonlinear(head)) {
+ skb_walk_frags(head, tail) {
+ TIPC_SKB_CB(head)->tail = tail;
+ }
+ } else {
+ skb_frag_list_init(head);
+ }
return 0;
}
kfree_skb(head);
return NULL;
}
+
+/* tipc_skb_queue_sorted(); sort pkt into list according to sequence number
+ * @list: list to be appended to
+ * @seqno: sequence number of buffer to add
+ * @skb: buffer to add
+ */
+void __tipc_skb_queue_sorted(struct sk_buff_head *list, u16 seqno,
+ struct sk_buff *skb)
+{
+ struct sk_buff *_skb, *tmp;
+
+ if (skb_queue_empty(list) || less(seqno, buf_seqno(skb_peek(list)))) {
+ __skb_queue_head(list, skb);
+ return;
+ }
+
+ if (more(seqno, buf_seqno(skb_peek_tail(list)))) {
+ __skb_queue_tail(list, skb);
+ return;
+ }
+
+ skb_queue_walk_safe(list, _skb, tmp) {
+ if (more(seqno, buf_seqno(_skb)))
+ continue;
+ if (seqno == buf_seqno(_skb))
+ break;
+ __skb_queue_before(list, _skb, skb);
+ return;
+ }
+ kfree_skb(skb);
+}
/* IANA assigned UDP port */
#define UDP_PORT_DEFAULT 6118
+ #define UDP_MIN_HEADROOM 28
+
static const struct nla_policy tipc_nl_udp_policy[TIPC_NLA_UDP_MAX + 1] = {
[TIPC_NLA_UDP_UNSPEC] = {.type = NLA_UNSPEC},
[TIPC_NLA_UDP_LOCAL] = {.type = NLA_BINARY,
struct sk_buff *clone;
struct rtable *rt;
+ if (skb_headroom(skb) < UDP_MIN_HEADROOM)
+ pskb_expand_head(skb, UDP_MIN_HEADROOM, 0, GFP_ATOMIC);
+
clone = skb_clone(skb, GFP_ATOMIC);
skb_set_inner_protocol(clone, htons(ETH_P_TIPC));
ub = rcu_dereference_rtnl(b->media_ptr);
}
if (ub->ubsock)
sock_set_flag(ub->ubsock->sk, SOCK_DEAD);
- RCU_INIT_POINTER(b->media_ptr, NULL);
RCU_INIT_POINTER(ub->bearer, NULL);
/* sock_release need to be done outside of rtnl lock */