]> git.karo-electronics.de Git - linux-beck.git/commitdiff
Merge tag 'batman-adv-for-davem' of git://git.open-mesh.org/linux-merge
authorDavid S. Miller <davem@davemloft.net>
Wed, 16 Dec 2015 16:09:40 +0000 (11:09 -0500)
committerDavid S. Miller <davem@davemloft.net>
Wed, 16 Dec 2015 16:09:40 +0000 (11:09 -0500)
Antonio Quartulli says:

====================
Included changes:
- change my email in MAINTAINERS and Doc files
- create and export list of single hop neighs per interface
- protect CRC in the BLA code by means of its own lock
- minor fixes and code cleanups
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
107 files changed:
Documentation/devicetree/bindings/net/socfpga-dwmac.txt
Documentation/devicetree/bindings/net/stmmac.txt
Documentation/networking/ip-sysctl.txt
drivers/net/bonding/bond_main.c
drivers/net/ethernet/emulex/benet/be_main.c
drivers/net/ethernet/ibm/ibmveth.c
drivers/net/ethernet/intel/Kconfig
drivers/net/ethernet/intel/e1000e/defines.h
drivers/net/ethernet/intel/e1000e/e1000.h
drivers/net/ethernet/intel/e1000e/netdev.c
drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
drivers/net/ethernet/intel/i40e/i40e.h
drivers/net/ethernet/intel/i40e/i40e_main.c
drivers/net/ethernet/intel/i40e/i40e_txrx.c
drivers/net/ethernet/intel/i40e/i40e_txrx.h
drivers/net/ethernet/intel/i40evf/i40evf_main.c
drivers/net/ethernet/intel/igb/e1000_82575.c
drivers/net/ethernet/intel/igb/e1000_defines.h
drivers/net/ethernet/intel/igb/e1000_hw.h
drivers/net/ethernet/intel/igb/e1000_i210.c
drivers/net/ethernet/intel/igb/e1000_i210.h
drivers/net/ethernet/intel/igb/e1000_phy.c
drivers/net/ethernet/intel/igb/e1000_phy.h
drivers/net/ethernet/intel/igb/igb_ethtool.c
drivers/net/ethernet/intel/igb/igb_main.c
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
drivers/net/ethernet/jme.c
drivers/net/ethernet/marvell/sky2.c
drivers/net/ethernet/mellanox/mlxsw/Kconfig
drivers/net/ethernet/mellanox/mlxsw/reg.h
drivers/net/ethernet/mellanox/mlxsw/spectrum.c
drivers/net/ethernet/mellanox/mlxsw/spectrum.h
drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
drivers/net/ethernet/netronome/nfp/nfp_net_common.c
drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_param.c
drivers/net/ethernet/renesas/ravb_main.c
drivers/net/ethernet/sfc/efx.c
drivers/net/ethernet/stmicro/stmmac/common.h
drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c
drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
drivers/net/geneve.c
drivers/net/ipvlan/ipvlan_main.c
drivers/net/loopback.c
drivers/net/macvlan.c
drivers/net/macvtap.c
drivers/net/team/team.c
drivers/net/usb/r8152.c
drivers/scsi/fcoe/fcoe.c
drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c
include/linux/if_vlan.h
include/linux/inet_diag.h
include/linux/netdev_features.h
include/linux/netdevice.h
include/linux/netlink.h
include/linux/rhashtable.h
include/linux/skbuff.h
include/linux/sock_diag.h
include/net/genetlink.h
include/net/geneve.h
include/net/ila.h [new file with mode: 0644]
include/net/netns/sctp.h
include/net/sock.h
include/net/switchdev.h
include/net/tcp.h
include/net/vxlan.h
include/uapi/linux/ila.h
include/uapi/linux/sock_diag.h
net/8021q/vlan_dev.c
net/bridge/br_fdb.c
net/bridge/br_stp.c
net/bridge/br_stp_if.c
net/bridge/br_vlan.c
net/core/dev.c
net/core/ethtool.c
net/core/net-sysfs.c
net/core/pktgen.c
net/core/rtnetlink.c
net/core/sock_diag.c
net/ipv4/Kconfig
net/ipv4/inet_diag.c
net/ipv4/ip_output.c
net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
net/ipv4/tcp.c
net/ipv4/tcp_diag.c
net/ipv4/tcp_ipv4.c
net/ipv4/udp.c
net/ipv4/udp_offload.c
net/ipv6/Makefile
net/ipv6/ila/Makefile [new file with mode: 0644]
net/ipv6/ila/ila.h [new file with mode: 0644]
net/ipv6/ila/ila_common.c [new file with mode: 0644]
net/ipv6/ila/ila_lwt.c [moved from net/ipv6/ila.c with 62% similarity]
net/ipv6/ila/ila_xlat.c [new file with mode: 0644]
net/ipv6/ip6_output.c
net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
net/ipv6/tcp_ipv6.c
net/netfilter/ipvs/ip_vs_proto_sctp.c
net/netlink/af_netlink.c
net/netlink/genetlink.c
net/sctp/output.c
net/sctp/protocol.c
net/sctp/sm_sideeffect.c
net/sctp/sysctl.c
net/switchdev/switchdev.c

index 3a9d6795160654080ec4aaa114304b62d5f0754f..72d82d684342b116efdbe2f7e05246ee854a1103 100644 (file)
@@ -11,6 +11,8 @@ Required properties:
                  designware version numbers documented in stmmac.txt
  - altr,sysmgr-syscon : Should be the phandle to the system manager node that
    encompasses the glue register, the register offset, and the register shift.
+ - altr,f2h_ptp_ref_clk use f2h_ptp_ref_clk instead of default eosc1 clock
+   for ptp ref clk. This affects all emacs as the clock is common.
 
 Optional properties:
 altr,emac-splitter: Should be the phandle to the emac splitter soft IP node if
index f34fc3c81a755c8a70573c50fa0a5b6dfae96a3c..e862a922bd3f957daca0989d1a94ae246afa48ca 100644 (file)
@@ -35,18 +35,18 @@ Optional properties:
 - reset-names: Should contain the reset signal name "stmmaceth", if a
        reset phandle is given
 - max-frame-size: See ethernet.txt file in the same directory
-- clocks: If present, the first clock should be the GMAC main clock and
-  the second clock should be peripheral's register interface clock. Further
-  clocks may be specified in derived bindings.
-- clock-names: One name for each entry in the clocks property, the
-  first one should be "stmmaceth" and the second one should be "pclk".
-- clk_ptp_ref: this is the PTP reference clock; in case of the PTP is
-  available this clock is used for programming the Timestamp Addend Register.
-  If not passed then the system clock will be used and this is fine on some
-  platforms.
+- clocks: If present, the first clock should be the GMAC main clock
+  The optional second clock should be peripheral's register interface clock.
+  The third optional clock should be the ptp reference clock.
+  Further clocks may be specified in derived bindings.
+- clock-names: One name for each entry in the clocks property.
+  The first one should be "stmmaceth".
+  The optional second one should be "pclk".
+  The optional third one should be "clk_ptp_ref".
 - snps,burst_len: The AXI burst lenth value of the AXI BUS MODE register.
 - tx-fifo-depth: See ethernet.txt file in the same directory
 - rx-fifo-depth: See ethernet.txt file in the same directory
+- mdio: with compatible = "snps,dwmac-mdio", create and register mdio bus.
 
 Examples:
 
@@ -65,4 +65,11 @@ Examples:
                tx-fifo-depth = <16384>;
                clocks = <&clock>;
                clock-names = "stmmaceth";
+               mdio0 {
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+                       compatible = "snps,dwmac-mdio";
+                       phy1: ethernet-phy@0 {
+                       };
+               };
        };
index 2ea4c45cf1c8736ccd577eff75058bb32ed0ca95..5de632ed0ec05f1dc79ba4abcdedca3c288211b6 100644 (file)
@@ -1723,6 +1723,25 @@ addip_enable - BOOLEAN
 
        Default: 0
 
+pf_enable - INTEGER
+       Enable or disable pf (pf is short for potentially failed) state. A value
+       of pf_retrans > path_max_retrans also disables pf state. That is, one of
+       both pf_enable and pf_retrans > path_max_retrans can disable pf state.
+       Since pf_retrans and path_max_retrans can be changed by userspace
+       application, sometimes user expects to disable pf state by the value of
+       pf_retrans > path_max_retrans, but occasionally the value of pf_retrans
+       or path_max_retrans is changed by the user application, this pf state is
+       enabled. As such, it is necessary to add this to dynamically enable
+       and disable pf state. See:
+       https://datatracker.ietf.org/doc/draft-ietf-tsvwg-sctp-failover for
+       details.
+
+       1: Enable pf.
+
+       0: Disable pf.
+
+       Default: 1
+
 addip_noauth_enable - BOOLEAN
        Dynamic Address Reconfiguration (ADD-IP) requires the use of
        authentication to protect the operations of adding or removing new
@@ -1799,7 +1818,9 @@ pf_retrans - INTEGER
        having to reduce path_max_retrans to a very low value.  See:
        http://www.ietf.org/id/draft-nishida-tsvwg-sctp-failover-05.txt
        for details.  Note also that a value of pf_retrans > path_max_retrans
-       disables this feature
+       disables this feature. Since both pf_retrans and path_max_retrans can
+       be changed by userspace application, a variable pf_enable is used to
+       disable pf state.
 
        Default: 0
 
index fe0e7a6f4d726d9271ec8e849bf72ea9f22915df..cab99fd44c8ef8ea1b559b217f0cb2062d8d4edd 100644 (file)
@@ -1067,12 +1067,12 @@ static netdev_features_t bond_fix_features(struct net_device *dev,
        return features;
 }
 
-#define BOND_VLAN_FEATURES     (NETIF_F_ALL_CSUM | NETIF_F_SG | \
+#define BOND_VLAN_FEATURES     (NETIF_F_HW_CSUM | NETIF_F_SG | \
                                 NETIF_F_FRAGLIST | NETIF_F_ALL_TSO | \
                                 NETIF_F_HIGHDMA | NETIF_F_LRO)
 
-#define BOND_ENC_FEATURES      (NETIF_F_ALL_CSUM | NETIF_F_SG | NETIF_F_RXCSUM |\
-                                NETIF_F_ALL_TSO)
+#define BOND_ENC_FEATURES      (NETIF_F_HW_CSUM | NETIF_F_SG | \
+                                NETIF_F_RXCSUM | NETIF_F_ALL_TSO)
 
 static void bond_compute_features(struct bonding *bond)
 {
@@ -4182,7 +4182,6 @@ void bond_setup(struct net_device *bond_dev)
                                NETIF_F_HW_VLAN_CTAG_RX |
                                NETIF_F_HW_VLAN_CTAG_FILTER;
 
-       bond_dev->hw_features &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_HW_CSUM);
        bond_dev->hw_features |= NETIF_F_GSO_ENCAP_ALL;
        bond_dev->features |= bond_dev->hw_features;
 }
index 4cab8879f5ae4f0b3f64c9f7eb03c217f8e2ad19..34e324f20d8047d987e7a93c9974f8c2482e3df4 100644 (file)
@@ -5289,7 +5289,7 @@ static netdev_features_t be_features_check(struct sk_buff *skb,
            skb->inner_protocol != htons(ETH_P_TEB) ||
            skb_inner_mac_header(skb) - skb_transport_header(skb) !=
            sizeof(struct udphdr) + sizeof(struct vxlanhdr))
-               return features & ~(NETIF_F_ALL_CSUM | NETIF_F_GSO_MASK);
+               return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
 
        return features;
 }
index 7af870a3c549592803a55a5cfea89ea1b15044d3..6691b5a45b9d9be2ba99ec4d5ee15edac1148d5d 100644 (file)
@@ -763,7 +763,7 @@ static netdev_features_t ibmveth_fix_features(struct net_device *dev,
         */
 
        if (!(features & NETIF_F_RXCSUM))
-               features &= ~NETIF_F_ALL_CSUM;
+               features &= ~NETIF_F_CSUM_MASK;
 
        return features;
 }
@@ -928,7 +928,8 @@ static int ibmveth_set_features(struct net_device *dev,
                rc1 = ibmveth_set_csum_offload(dev, rx_csum);
                if (rc1 && !adapter->rx_csum)
                        dev->features =
-                               features & ~(NETIF_F_ALL_CSUM | NETIF_F_RXCSUM);
+                               features & ~(NETIF_F_CSUM_MASK |
+                                            NETIF_F_RXCSUM);
        }
 
        if (large_send != adapter->large_send) {
index 4163b16489b35f75cf4d6b6aa695c21e25ac9520..fa593dd3efe122daed9de5801457470931ebe41c 100644 (file)
@@ -280,6 +280,16 @@ config I40E_VXLAN
          Say Y here if you want to use Virtual eXtensible Local Area Network
          (VXLAN) in the driver.
 
+config I40E_GENEVE
+       bool "Generic Network Virtualization Encapsulation (GENEVE) Support"
+       depends on I40E && GENEVE && !(I40E=y && GENEVE=m)
+       default n
+       ---help---
+         This allows one to create GENEVE virtual interfaces that provide
+         Layer 2 Networks over Layer 3 Networks. GENEVE is often used
+         to tunnel virtual network infrastructure in virtualized environments.
+         Say Y here if you want to use GENEVE in the driver.
+
 config I40E_DCB
        bool "Data Center Bridging (DCB) Support"
        default n
index 133d4074dbe48d8fd148662b2c57d0d41afd4dbe..f7c7804d79e57776492fee070601536994bae800 100644 (file)
 #define E1000_IMS_RXQ1      E1000_ICR_RXQ1      /* Rx Queue 1 Interrupt */
 #define E1000_IMS_TXQ0      E1000_ICR_TXQ0      /* Tx Queue 0 Interrupt */
 #define E1000_IMS_TXQ1      E1000_ICR_TXQ1      /* Tx Queue 1 Interrupt */
-#define E1000_IMS_OTHER     E1000_ICR_OTHER     /* Other Interrupts */
+#define E1000_IMS_OTHER     E1000_ICR_OTHER     /* Other Interrupt */
 
 /* Interrupt Cause Set */
 #define E1000_ICS_LSC       E1000_ICR_LSC       /* Link Status Change */
 #define E1000_ICS_RXSEQ     E1000_ICR_RXSEQ     /* Rx sequence error */
 #define E1000_ICS_RXDMT0    E1000_ICR_RXDMT0    /* Rx desc min. threshold */
+#define E1000_ICS_OTHER     E1000_ICR_OTHER     /* Other Interrupt */
 
 /* Transmit Descriptor Control */
 #define E1000_TXDCTL_PTHRESH 0x0000003F /* TXDCTL Prefetch Threshold */
index 0b748d1959d93162b79dea9f38bc2335bf666b92..1dc293bad87b5fb902f556028476b4199f1f85ee 100644 (file)
@@ -480,7 +480,7 @@ extern const char e1000e_driver_version[];
 void e1000e_check_options(struct e1000_adapter *adapter);
 void e1000e_set_ethtool_ops(struct net_device *netdev);
 
-int e1000e_up(struct e1000_adapter *adapter);
+void e1000e_up(struct e1000_adapter *adapter);
 void e1000e_down(struct e1000_adapter *adapter, bool reset);
 void e1000e_reinit_locked(struct e1000_adapter *adapter);
 void e1000e_reset(struct e1000_adapter *adapter);
index 775e38910681a617eee363309fd97e90a445afe9..c71ba1bfc1ec1dbafacfe5fa829a0464e178d9ef 100644 (file)
@@ -1905,30 +1905,15 @@ static irqreturn_t e1000_msix_other(int __always_unused irq, void *data)
        struct net_device *netdev = data;
        struct e1000_adapter *adapter = netdev_priv(netdev);
        struct e1000_hw *hw = &adapter->hw;
-       u32 icr = er32(ICR);
-
-       if (!(icr & E1000_ICR_INT_ASSERTED)) {
-               if (!test_bit(__E1000_DOWN, &adapter->state))
-                       ew32(IMS, E1000_IMS_OTHER);
-               return IRQ_NONE;
-       }
 
-       if (icr & adapter->eiac_mask)
-               ew32(ICS, (icr & adapter->eiac_mask));
+       hw->mac.get_link_status = true;
 
-       if (icr & E1000_ICR_OTHER) {
-               if (!(icr & E1000_ICR_LSC))
-                       goto no_link_interrupt;
-               hw->mac.get_link_status = true;
-               /* guard against interrupt when we're going down */
-               if (!test_bit(__E1000_DOWN, &adapter->state))
-                       mod_timer(&adapter->watchdog_timer, jiffies + 1);
+       /* guard against interrupt when we're going down */
+       if (!test_bit(__E1000_DOWN, &adapter->state)) {
+               mod_timer(&adapter->watchdog_timer, jiffies + 1);
+               ew32(IMS, E1000_IMS_OTHER);
        }
 
-no_link_interrupt:
-       if (!test_bit(__E1000_DOWN, &adapter->state))
-               ew32(IMS, E1000_IMS_LSC | E1000_IMS_OTHER);
-
        return IRQ_HANDLED;
 }
 
@@ -1946,6 +1931,9 @@ static irqreturn_t e1000_intr_msix_tx(int __always_unused irq, void *data)
                /* Ring was not completely cleaned, so fire another interrupt */
                ew32(ICS, tx_ring->ims_val);
 
+       if (!test_bit(__E1000_DOWN, &adapter->state))
+               ew32(IMS, adapter->tx_ring->ims_val);
+
        return IRQ_HANDLED;
 }
 
@@ -2027,6 +2015,7 @@ static void e1000_configure_msix(struct e1000_adapter *adapter)
                       hw->hw_addr + E1000_EITR_82574(vector));
        else
                writel(1, hw->hw_addr + E1000_EITR_82574(vector));
+       adapter->eiac_mask |= E1000_IMS_OTHER;
 
        /* Cause Tx interrupts on every write back */
        ivar |= (1 << 31);
@@ -2034,12 +2023,8 @@ static void e1000_configure_msix(struct e1000_adapter *adapter)
        ew32(IVAR, ivar);
 
        /* enable MSI-X PBA support */
-       ctrl_ext = er32(CTRL_EXT);
-       ctrl_ext |= E1000_CTRL_EXT_PBA_CLR;
-
-       /* Auto-Mask Other interrupts upon ICR read */
-       ew32(IAM, ~E1000_EIAC_MASK_82574 | E1000_IMS_OTHER);
-       ctrl_ext |= E1000_CTRL_EXT_EIAME;
+       ctrl_ext = er32(CTRL_EXT) & ~E1000_CTRL_EXT_IAME;
+       ctrl_ext |= E1000_CTRL_EXT_PBA_CLR | E1000_CTRL_EXT_EIAME;
        ew32(CTRL_EXT, ctrl_ext);
        e1e_flush();
 }
@@ -2255,7 +2240,7 @@ static void e1000_irq_enable(struct e1000_adapter *adapter)
 
        if (adapter->msix_entries) {
                ew32(EIAC_82574, adapter->eiac_mask & E1000_EIAC_MASK_82574);
-               ew32(IMS, adapter->eiac_mask | E1000_IMS_OTHER | E1000_IMS_LSC);
+               ew32(IMS, adapter->eiac_mask | E1000_IMS_LSC);
        } else if ((hw->mac.type == e1000_pch_lpt) ||
                   (hw->mac.type == e1000_pch_spt)) {
                ew32(IMS, IMS_ENABLE_MASK | E1000_IMS_ECCER);
@@ -4146,10 +4131,24 @@ void e1000e_reset(struct e1000_adapter *adapter)
 
 }
 
-int e1000e_up(struct e1000_adapter *adapter)
+/**
+ * e1000e_trigger_lsc - trigger an LSC interrupt
+ * @adapter: 
+ *
+ * Fire a link status change interrupt to start the watchdog.
+ **/
+static void e1000e_trigger_lsc(struct e1000_adapter *adapter)
 {
        struct e1000_hw *hw = &adapter->hw;
 
+       if (adapter->msix_entries)
+               ew32(ICS, E1000_ICS_OTHER);
+       else
+               ew32(ICS, E1000_ICS_LSC);
+}
+
+void e1000e_up(struct e1000_adapter *adapter)
+{
        /* hardware has been reset, we need to reload some things */
        e1000_configure(adapter);
 
@@ -4161,13 +4160,7 @@ int e1000e_up(struct e1000_adapter *adapter)
 
        netif_start_queue(adapter->netdev);
 
-       /* fire a link change interrupt to start the watchdog */
-       if (adapter->msix_entries)
-               ew32(ICS, E1000_ICS_LSC | E1000_ICR_OTHER);
-       else
-               ew32(ICS, E1000_ICS_LSC);
-
-       return 0;
+       e1000e_trigger_lsc(adapter);
 }
 
 static void e1000e_flush_descriptors(struct e1000_adapter *adapter)
@@ -4592,11 +4585,7 @@ static int e1000_open(struct net_device *netdev)
        hw->mac.get_link_status = true;
        pm_runtime_put(&pdev->dev);
 
-       /* fire a link status change interrupt to start the watchdog */
-       if (adapter->msix_entries)
-               ew32(ICS, E1000_ICS_LSC | E1000_ICR_OTHER);
-       else
-               ew32(ICS, E1000_ICS_LSC);
+       e1000e_trigger_lsc(adapter);
 
        return 0;
 
@@ -6633,7 +6622,7 @@ static int e1000e_pm_runtime_resume(struct device *dev)
                return rc;
 
        if (netdev->flags & IFF_UP)
-               rc = e1000e_up(adapter);
+               e1000e_up(adapter);
 
        return rc;
 }
@@ -6824,13 +6813,8 @@ static void e1000_io_resume(struct pci_dev *pdev)
 
        e1000_init_manageability_pt(adapter);
 
-       if (netif_running(netdev)) {
-               if (e1000e_up(adapter)) {
-                       dev_err(&pdev->dev,
-                               "can't bring device back up after reset\n");
-                       return;
-               }
-       }
+       if (netif_running(netdev))
+               e1000e_up(adapter);
 
        netif_device_attach(netdev);
 
index d9854d39576d46281a8fa60f2584d7cad46f890e..83ddf362ea7762bde034e9048cd541e8bb537d9c 100644 (file)
@@ -1357,7 +1357,7 @@ static netdev_features_t fm10k_features_check(struct sk_buff *skb,
        if (!skb->encapsulation || fm10k_tx_encap_offload(skb))
                return features;
 
-       return features & ~(NETIF_F_ALL_CSUM | NETIF_F_GSO_MASK);
+       return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
 }
 
 static const struct net_device_ops fm10k_netdev_ops = {
index b7bc014ae00b7b9e89a36c74c357618178ef1e16..c202f9b9386a1a4c2753c9af1dd432c868184932 100644 (file)
@@ -245,6 +245,11 @@ struct i40e_tc_configuration {
        struct i40e_tc_info tc_info[I40E_MAX_TRAFFIC_CLASS];
 };
 
+struct i40e_udp_port_config {
+       __be16 index;
+       u8 type;
+};
+
 /* struct that defines the Ethernet device */
 struct i40e_pf {
        struct pci_dev *pdev;
@@ -281,11 +286,9 @@ struct i40e_pf {
        u32 fd_atr_cnt;
        u32 fd_tcp_rule;
 
-#ifdef CONFIG_I40E_VXLAN
-       __be16  vxlan_ports[I40E_MAX_PF_UDP_OFFLOAD_PORTS];
-       u16 pending_vxlan_bitmap;
+       struct i40e_udp_port_config udp_ports[I40E_MAX_PF_UDP_OFFLOAD_PORTS];
+       u16 pending_udp_bitmap;
 
-#endif
        enum i40e_interrupt_policy int_policy;
        u16 rx_itr_default;
        u16 tx_itr_default;
@@ -322,9 +325,7 @@ struct i40e_pf {
 #define I40E_FLAG_FD_ATR_ENABLED               BIT_ULL(22)
 #define I40E_FLAG_PTP                          BIT_ULL(25)
 #define I40E_FLAG_MFP_ENABLED                  BIT_ULL(26)
-#ifdef CONFIG_I40E_VXLAN
-#define I40E_FLAG_VXLAN_FILTER_SYNC            BIT_ULL(27)
-#endif
+#define I40E_FLAG_UDP_FILTER_SYNC              BIT_ULL(27)
 #define I40E_FLAG_PORT_ID_VALID                        BIT_ULL(28)
 #define I40E_FLAG_DCB_CAPABLE                  BIT_ULL(29)
 #define I40E_FLAG_RSS_AQ_CAPABLE               BIT_ULL(31)
@@ -336,6 +337,7 @@ struct i40e_pf {
 #define I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE  BIT_ULL(38)
 #define I40E_FLAG_LINK_POLLING_ENABLED         BIT_ULL(39)
 #define I40E_FLAG_VEB_MODE_ENABLED             BIT_ULL(40)
+#define I40E_FLAG_GENEVE_OFFLOAD_CAPABLE       BIT_ULL(41)
 #define I40E_FLAG_NO_PCI_LINK_CHECK            BIT_ULL(42)
 
        /* tracks features that get auto disabled by errors */
index b118deb08ce60f48f6a306d7c86bc7621c5786e8..23211e08eecb870c784203e48fc83effc305c7b9 100644 (file)
 /* Local includes */
 #include "i40e.h"
 #include "i40e_diag.h"
-#ifdef CONFIG_I40E_VXLAN
+#if IS_ENABLED(CONFIG_VXLAN)
 #include <net/vxlan.h>
 #endif
+#if IS_ENABLED(CONFIG_GENEVE)
+#include <net/geneve.h>
+#endif
 
 const char i40e_driver_name[] = "i40e";
 static const char i40e_driver_string[] =
@@ -5336,6 +5339,9 @@ int i40e_open(struct net_device *netdev)
 #ifdef CONFIG_I40E_VXLAN
        vxlan_get_rx_port(netdev);
 #endif
+#ifdef CONFIG_I40E_GENEVE
+       geneve_get_rx_port(netdev);
+#endif
 
        return 0;
 }
@@ -7036,30 +7042,30 @@ static void i40e_handle_mdd_event(struct i40e_pf *pf)
        i40e_flush(hw);
 }
 
-#ifdef CONFIG_I40E_VXLAN
 /**
- * i40e_sync_vxlan_filters_subtask - Sync the VSI filter list with HW
+ * i40e_sync_udp_filters_subtask - Sync the VSI filter list with HW
  * @pf: board private structure
  **/
-static void i40e_sync_vxlan_filters_subtask(struct i40e_pf *pf)
+static void i40e_sync_udp_filters_subtask(struct i40e_pf *pf)
 {
+#if IS_ENABLED(CONFIG_VXLAN) || IS_ENABLED(CONFIG_GENEVE)
        struct i40e_hw *hw = &pf->hw;
        i40e_status ret;
        __be16 port;
        int i;
 
-       if (!(pf->flags & I40E_FLAG_VXLAN_FILTER_SYNC))
+       if (!(pf->flags & I40E_FLAG_UDP_FILTER_SYNC))
                return;
 
-       pf->flags &= ~I40E_FLAG_VXLAN_FILTER_SYNC;
+       pf->flags &= ~I40E_FLAG_UDP_FILTER_SYNC;
 
        for (i = 0; i < I40E_MAX_PF_UDP_OFFLOAD_PORTS; i++) {
-               if (pf->pending_vxlan_bitmap & BIT_ULL(i)) {
-                       pf->pending_vxlan_bitmap &= ~BIT_ULL(i);
-                       port = pf->vxlan_ports[i];
+               if (pf->pending_udp_bitmap & BIT_ULL(i)) {
+                       pf->pending_udp_bitmap &= ~BIT_ULL(i);
+                       port = pf->udp_ports[i].index;
                        if (port)
                                ret = i40e_aq_add_udp_tunnel(hw, ntohs(port),
-                                                    I40E_AQC_TUNNEL_TYPE_VXLAN,
+                                                    pf->udp_ports[i].type,
                                                     NULL, NULL);
                        else
                                ret = i40e_aq_del_udp_tunnel(hw, i, NULL);
@@ -7072,13 +7078,13 @@ static void i40e_sync_vxlan_filters_subtask(struct i40e_pf *pf)
                                         i40e_stat_str(&pf->hw, ret),
                                         i40e_aq_str(&pf->hw,
                                                    pf->hw.aq.asq_last_status));
-                               pf->vxlan_ports[i] = 0;
+                               pf->udp_ports[i].index = 0;
                        }
                }
        }
+#endif
 }
 
-#endif
 /**
  * i40e_service_task - Run the driver's async subtasks
  * @work: pointer to work_struct containing our data
@@ -7103,8 +7109,8 @@ static void i40e_service_task(struct work_struct *work)
        i40e_watchdog_subtask(pf);
        i40e_fdir_reinit_subtask(pf);
        i40e_sync_filters_subtask(pf);
-#ifdef CONFIG_I40E_VXLAN
-       i40e_sync_vxlan_filters_subtask(pf);
+#if IS_ENABLED(CONFIG_VXLAN) || IS_ENABLED(CONFIG_GENEVE)
+       i40e_sync_udp_filters_subtask(pf);
 #endif
        i40e_clean_adminq_subtask(pf);
 
@@ -8380,7 +8386,8 @@ static int i40e_sw_init(struct i40e_pf *pf)
                             I40E_FLAG_HW_ATR_EVICT_CAPABLE |
                             I40E_FLAG_OUTER_UDP_CSUM_CAPABLE |
                             I40E_FLAG_WB_ON_ITR_CAPABLE |
-                            I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE;
+                            I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE |
+                            I40E_FLAG_GENEVE_OFFLOAD_CAPABLE;
        }
        pf->eeprom_version = 0xDEAD;
        pf->lan_veb = I40E_NO_VEB;
@@ -8479,26 +8486,27 @@ static int i40e_set_features(struct net_device *netdev,
        return 0;
 }
 
-#ifdef CONFIG_I40E_VXLAN
+#if IS_ENABLED(CONFIG_VXLAN) || IS_ENABLED(CONFIG_GENEVE)
 /**
- * i40e_get_vxlan_port_idx - Lookup a possibly offloaded for Rx UDP port
+ * i40e_get_udp_port_idx - Lookup a possibly offloaded for Rx UDP port
  * @pf: board private structure
  * @port: The UDP port to look up
  *
  * Returns the index number or I40E_MAX_PF_UDP_OFFLOAD_PORTS if port not found
  **/
-static u8 i40e_get_vxlan_port_idx(struct i40e_pf *pf, __be16 port)
+static u8 i40e_get_udp_port_idx(struct i40e_pf *pf, __be16 port)
 {
        u8 i;
 
        for (i = 0; i < I40E_MAX_PF_UDP_OFFLOAD_PORTS; i++) {
-               if (pf->vxlan_ports[i] == port)
+               if (pf->udp_ports[i].index == port)
                        return i;
        }
 
        return i;
 }
 
+#endif
 /**
  * i40e_add_vxlan_port - Get notifications about VXLAN ports that come up
  * @netdev: This physical port's netdev
@@ -8508,6 +8516,7 @@ static u8 i40e_get_vxlan_port_idx(struct i40e_pf *pf, __be16 port)
 static void i40e_add_vxlan_port(struct net_device *netdev,
                                sa_family_t sa_family, __be16 port)
 {
+#if IS_ENABLED(CONFIG_VXLAN)
        struct i40e_netdev_priv *np = netdev_priv(netdev);
        struct i40e_vsi *vsi = np->vsi;
        struct i40e_pf *pf = vsi->back;
@@ -8517,7 +8526,7 @@ static void i40e_add_vxlan_port(struct net_device *netdev,
        if (sa_family == AF_INET6)
                return;
 
-       idx = i40e_get_vxlan_port_idx(pf, port);
+       idx = i40e_get_udp_port_idx(pf, port);
 
        /* Check if port already exists */
        if (idx < I40E_MAX_PF_UDP_OFFLOAD_PORTS) {
@@ -8527,7 +8536,7 @@ static void i40e_add_vxlan_port(struct net_device *netdev,
        }
 
        /* Now check if there is space to add the new port */
-       next_idx = i40e_get_vxlan_port_idx(pf, 0);
+       next_idx = i40e_get_udp_port_idx(pf, 0);
 
        if (next_idx == I40E_MAX_PF_UDP_OFFLOAD_PORTS) {
                netdev_info(netdev, "maximum number of vxlan UDP ports reached, not adding port %d\n",
@@ -8536,9 +8545,11 @@ static void i40e_add_vxlan_port(struct net_device *netdev,
        }
 
        /* New port: add it and mark its index in the bitmap */
-       pf->vxlan_ports[next_idx] = port;
-       pf->pending_vxlan_bitmap |= BIT_ULL(next_idx);
-       pf->flags |= I40E_FLAG_VXLAN_FILTER_SYNC;
+       pf->udp_ports[next_idx].index = port;
+       pf->udp_ports[next_idx].type = I40E_AQC_TUNNEL_TYPE_VXLAN;
+       pf->pending_udp_bitmap |= BIT_ULL(next_idx);
+       pf->flags |= I40E_FLAG_UDP_FILTER_SYNC;
+#endif
 }
 
 /**
@@ -8550,6 +8561,7 @@ static void i40e_add_vxlan_port(struct net_device *netdev,
 static void i40e_del_vxlan_port(struct net_device *netdev,
                                sa_family_t sa_family, __be16 port)
 {
+#if IS_ENABLED(CONFIG_VXLAN)
        struct i40e_netdev_priv *np = netdev_priv(netdev);
        struct i40e_vsi *vsi = np->vsi;
        struct i40e_pf *pf = vsi->back;
@@ -8558,23 +8570,108 @@ static void i40e_del_vxlan_port(struct net_device *netdev,
        if (sa_family == AF_INET6)
                return;
 
-       idx = i40e_get_vxlan_port_idx(pf, port);
+       idx = i40e_get_udp_port_idx(pf, port);
 
        /* Check if port already exists */
        if (idx < I40E_MAX_PF_UDP_OFFLOAD_PORTS) {
                /* if port exists, set it to 0 (mark for deletion)
                 * and make it pending
                 */
-               pf->vxlan_ports[idx] = 0;
-               pf->pending_vxlan_bitmap |= BIT_ULL(idx);
-               pf->flags |= I40E_FLAG_VXLAN_FILTER_SYNC;
+               pf->udp_ports[idx].index = 0;
+               pf->pending_udp_bitmap |= BIT_ULL(idx);
+               pf->flags |= I40E_FLAG_UDP_FILTER_SYNC;
        } else {
                netdev_warn(netdev, "vxlan port %d was not found, not deleting\n",
                            ntohs(port));
        }
+#endif
 }
 
+/**
+ * i40e_add_geneve_port - Get notifications about GENEVE ports that come up
+ * @netdev: This physical port's netdev
+ * @sa_family: Socket Family that GENEVE is notifying us about
+ * @port: New UDP port number that GENEVE started listening to
+ **/
+static void i40e_add_geneve_port(struct net_device *netdev,
+                                sa_family_t sa_family, __be16 port)
+{
+#if IS_ENABLED(CONFIG_GENEVE)
+       struct i40e_netdev_priv *np = netdev_priv(netdev);
+       struct i40e_vsi *vsi = np->vsi;
+       struct i40e_pf *pf = vsi->back;
+       u8 next_idx;
+       u8 idx;
+
+       if (sa_family == AF_INET6)
+               return;
+
+       idx = i40e_get_udp_port_idx(pf, port);
+
+       /* Check if port already exists */
+       if (idx < I40E_MAX_PF_UDP_OFFLOAD_PORTS) {
+               netdev_info(netdev, "udp port %d already offloaded\n",
+                           ntohs(port));
+               return;
+       }
+
+       /* Now check if there is space to add the new port */
+       next_idx = i40e_get_udp_port_idx(pf, 0);
+
+       if (next_idx == I40E_MAX_PF_UDP_OFFLOAD_PORTS) {
+               netdev_info(netdev, "maximum number of UDP ports reached, not adding port %d\n",
+                           ntohs(port));
+               return;
+       }
+
+       /* New port: add it and mark its index in the bitmap */
+       pf->udp_ports[next_idx].index = port;
+       pf->udp_ports[next_idx].type = I40E_AQC_TUNNEL_TYPE_NGE;
+       pf->pending_udp_bitmap |= BIT_ULL(next_idx);
+       pf->flags |= I40E_FLAG_UDP_FILTER_SYNC;
+
+       dev_info(&pf->pdev->dev, "adding geneve port %d\n", ntohs(port));
 #endif
+}
+
+/**
+ * i40e_del_geneve_port - Get notifications about GENEVE ports that go away
+ * @netdev: This physical port's netdev
+ * @sa_family: Socket Family that GENEVE is notifying us about
+ * @port: UDP port number that GENEVE stopped listening to
+ **/
+static void i40e_del_geneve_port(struct net_device *netdev,
+                                sa_family_t sa_family, __be16 port)
+{
+#if IS_ENABLED(CONFIG_GENEVE)
+       struct i40e_netdev_priv *np = netdev_priv(netdev);
+       struct i40e_vsi *vsi = np->vsi;
+       struct i40e_pf *pf = vsi->back;
+       u8 idx;
+
+       if (sa_family == AF_INET6)
+               return;
+
+       idx = i40e_get_udp_port_idx(pf, port);
+
+       /* Check if port already exists */
+       if (idx < I40E_MAX_PF_UDP_OFFLOAD_PORTS) {
+               /* if port exists, set it to 0 (mark for deletion)
+                * and make it pending
+                */
+               pf->udp_ports[idx].index = 0;
+               pf->pending_udp_bitmap |= BIT_ULL(idx);
+               pf->flags |= I40E_FLAG_UDP_FILTER_SYNC;
+
+               dev_info(&pf->pdev->dev, "deleting geneve port %d\n",
+                        ntohs(port));
+       } else {
+               netdev_warn(netdev, "geneve port %d was not found, not deleting\n",
+                           ntohs(port));
+       }
+#endif
+}
+
 static int i40e_get_phys_port_id(struct net_device *netdev,
                                 struct netdev_phys_item_id *ppid)
 {
@@ -8752,7 +8849,10 @@ static int i40e_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
                                       nlflags, 0, 0, filter_mask, NULL);
 }
 
-#define I40E_MAX_TUNNEL_HDR_LEN 80
+/* Hardware supports L4 tunnel length of 128B (=2^7) which includes
+ * inner mac plus all inner ethertypes.
+ */
+#define I40E_MAX_TUNNEL_HDR_LEN 128
 /**
  * i40e_features_check - Validate encapsulated packet conforms to limits
  * @skb: skb buff
@@ -8764,9 +8864,9 @@ static netdev_features_t i40e_features_check(struct sk_buff *skb,
                                             netdev_features_t features)
 {
        if (skb->encapsulation &&
-           (skb_inner_mac_header(skb) - skb_transport_header(skb) >
+           ((skb_inner_network_header(skb) - skb_transport_header(skb)) >
             I40E_MAX_TUNNEL_HDR_LEN))
-               return features & ~(NETIF_F_ALL_CSUM | NETIF_F_GSO_MASK);
+               return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
 
        return features;
 }
@@ -8799,9 +8899,13 @@ static const struct net_device_ops i40e_netdev_ops = {
        .ndo_get_vf_config      = i40e_ndo_get_vf_config,
        .ndo_set_vf_link_state  = i40e_ndo_set_vf_link_state,
        .ndo_set_vf_spoofchk    = i40e_ndo_set_vf_spoofchk,
-#ifdef CONFIG_I40E_VXLAN
+#if IS_ENABLED(CONFIG_VXLAN)
        .ndo_add_vxlan_port     = i40e_add_vxlan_port,
        .ndo_del_vxlan_port     = i40e_del_vxlan_port,
+#endif
+#if IS_ENABLED(CONFIG_GENEVE)
+       .ndo_add_geneve_port    = i40e_add_geneve_port,
+       .ndo_del_geneve_port    = i40e_del_geneve_port,
 #endif
        .ndo_get_phys_port_id   = i40e_get_phys_port_id,
        .ndo_fdb_add            = i40e_ndo_fdb_add,
@@ -8836,13 +8940,14 @@ static int i40e_config_netdev(struct i40e_vsi *vsi)
        np->vsi = vsi;
 
        netdev->hw_enc_features |= NETIF_F_IP_CSUM       |
+                                 NETIF_F_RXCSUM         |
                                  NETIF_F_GSO_UDP_TUNNEL |
                                  NETIF_F_GSO_GRE        |
                                  NETIF_F_TSO;
 
        netdev->features = NETIF_F_SG                  |
                           NETIF_F_IP_CSUM             |
-                          NETIF_F_SCTP_CSUM           |
+                          NETIF_F_SCTP_CRC            |
                           NETIF_F_HIGHDMA             |
                           NETIF_F_GSO_UDP_TUNNEL      |
                           NETIF_F_GSO_GRE             |
@@ -10348,6 +10453,9 @@ static void i40e_print_features(struct i40e_pf *pf)
                i += snprintf(&buf[i], REMAIN(i), " DCB");
 #if IS_ENABLED(CONFIG_VXLAN)
        i += snprintf(&buf[i], REMAIN(i), " VxLAN");
+#endif
+#if IS_ENABLED(CONFIG_GENEVE)
+       i += snprintf(&buf[i], REMAIN(i), " Geneve");
 #endif
        if (pf->flags & I40E_FLAG_PTP)
                i += snprintf(&buf[i], REMAIN(i), " PTP");
index b0ae3e695783982edf37875c7d5b3491f7b21299..e9e9a37ee274cf146a6fe748e675ee892fed3179 100644 (file)
@@ -1380,7 +1380,7 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi,
        if (rx_error & BIT(I40E_RX_DESC_ERROR_PPRS_SHIFT))
                return;
 
-       /* If VXLAN traffic has an outer UDPv4 checksum we need to check
+       /* If VXLAN/GENEVE traffic has an outer UDPv4 checksum we need to check
         * it in the driver, hardware does not do it for us.
         * Since L3L4P bit was set we assume a valid IHL value (>=5)
         * so the total length of IPv4 header is IHL*4 bytes
@@ -2001,7 +2001,7 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb,
        if (!(tx_flags & (I40E_TX_FLAGS_IPV4 | I40E_TX_FLAGS_IPV6)))
                return;
 
-       if (!(tx_flags & I40E_TX_FLAGS_VXLAN_TUNNEL)) {
+       if (!(tx_flags & I40E_TX_FLAGS_UDP_TUNNEL)) {
                /* snag network header to get L4 type and address */
                hdr.network = skb_network_header(skb);
 
@@ -2086,7 +2086,7 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb,
                     I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT;
 
        dtype_cmd |= I40E_TXD_FLTR_QW1_CNT_ENA_MASK;
-       if (!(tx_flags & I40E_TX_FLAGS_VXLAN_TUNNEL))
+       if (!(tx_flags & I40E_TX_FLAGS_UDP_TUNNEL))
                dtype_cmd |=
                        ((u32)I40E_FD_ATR_STAT_IDX(pf->hw.pf_id) <<
                        I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) &
@@ -2319,7 +2319,7 @@ static void i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags,
                        oudph = udp_hdr(skb);
                        oiph = ip_hdr(skb);
                        l4_tunnel = I40E_TXD_CTX_UDP_TUNNELING;
-                       *tx_flags |= I40E_TX_FLAGS_VXLAN_TUNNEL;
+                       *tx_flags |= I40E_TX_FLAGS_UDP_TUNNEL;
                        break;
                case IPPROTO_GRE:
                        l4_tunnel = I40E_TXD_CTX_GRE_TUNNELING;
index dccc1eb576f251060376e6eb09bca71c3a4f807f..3f081e25e097aa34cca027e60c9bb720e310e9d6 100644 (file)
@@ -163,7 +163,7 @@ enum i40e_dyn_idx_t {
 #define I40E_TX_FLAGS_FSO              BIT(7)
 #define I40E_TX_FLAGS_TSYN             BIT(8)
 #define I40E_TX_FLAGS_FD_SB            BIT(9)
-#define I40E_TX_FLAGS_VXLAN_TUNNEL     BIT(10)
+#define I40E_TX_FLAGS_UDP_TUNNEL       BIT(10)
 #define I40E_TX_FLAGS_VLAN_MASK                0xffff0000
 #define I40E_TX_FLAGS_VLAN_PRIO_MASK   0xe0000000
 #define I40E_TX_FLAGS_VLAN_PRIO_SHIFT  29
index 455394cf7f80c3199117587ca896510cb23bb622..4d05ff6f0423143ae463b89678383765ab6ae3a8 100644 (file)
@@ -2321,7 +2321,7 @@ int i40evf_process_config(struct i40evf_adapter *adapter)
        netdev->features |= NETIF_F_HIGHDMA |
                            NETIF_F_SG |
                            NETIF_F_IP_CSUM |
-                           NETIF_F_SCTP_CSUM |
+                           NETIF_F_SCTP_CRC |
                            NETIF_F_IPV6_CSUM |
                            NETIF_F_TSO |
                            NETIF_F_TSO6 |
index 362911d024b5d8237070413cc722d65adab58451..adb33e2a0137600e841cd2b289f7f3bb4f28362a 100644 (file)
@@ -45,8 +45,6 @@ static s32  igb_get_cfg_done_82575(struct e1000_hw *);
 static s32  igb_init_hw_82575(struct e1000_hw *);
 static s32  igb_phy_hw_reset_sgmii_82575(struct e1000_hw *);
 static s32  igb_read_phy_reg_sgmii_82575(struct e1000_hw *, u32, u16 *);
-static s32  igb_read_phy_reg_82580(struct e1000_hw *, u32, u16 *);
-static s32  igb_write_phy_reg_82580(struct e1000_hw *, u32, u16);
 static s32  igb_reset_hw_82575(struct e1000_hw *);
 static s32  igb_reset_hw_82580(struct e1000_hw *);
 static s32  igb_set_d0_lplu_state_82575(struct e1000_hw *, bool);
@@ -205,13 +203,10 @@ static s32 igb_init_phy_params_82575(struct e1000_hw *hw)
                case e1000_82580:
                case e1000_i350:
                case e1000_i354:
-                       phy->ops.read_reg = igb_read_phy_reg_82580;
-                       phy->ops.write_reg = igb_write_phy_reg_82580;
-                       break;
                case e1000_i210:
                case e1000_i211:
-                       phy->ops.read_reg = igb_read_phy_reg_gs40g;
-                       phy->ops.write_reg = igb_write_phy_reg_gs40g;
+                       phy->ops.read_reg = igb_read_phy_reg_82580;
+                       phy->ops.write_reg = igb_write_phy_reg_82580;
                        break;
                default:
                        phy->ops.read_reg = igb_read_phy_reg_igp;
@@ -2153,7 +2148,7 @@ void igb_vmdq_set_replication_pf(struct e1000_hw *hw, bool enable)
  *  Reads the MDI control register in the PHY at offset and stores the
  *  information read to data.
  **/
-static s32 igb_read_phy_reg_82580(struct e1000_hw *hw, u32 offset, u16 *data)
+s32 igb_read_phy_reg_82580(struct e1000_hw *hw, u32 offset, u16 *data)
 {
        s32 ret_val;
 
@@ -2177,7 +2172,7 @@ out:
  *
  *  Writes data to MDI control register in the PHY at offset.
  **/
-static s32 igb_write_phy_reg_82580(struct e1000_hw *hw, u32 offset, u16 data)
+s32 igb_write_phy_reg_82580(struct e1000_hw *hw, u32 offset, u16 data)
 {
        s32 ret_val;
 
index a61ee9462dd48583129d839c79a9628d8480d780..c3c598c347a951455d9e2626e023159b81f68136 100644 (file)
 
 /* Intel i347-AT4 Registers */
 
-#define I347AT4_PCDL                   0x10 /* PHY Cable Diagnostics Length */
+#define I347AT4_PCDL0                  0x10 /* Pair 0 PHY Cable Diagnostics Length */
+#define I347AT4_PCDL1                  0x11 /* Pair 1 PHY Cable Diagnostics Length */
+#define I347AT4_PCDL2                  0x12 /* Pair 2 PHY Cable Diagnostics Length */
+#define I347AT4_PCDL3                  0x13 /* Pair 3 PHY Cable Diagnostics Length */
 #define I347AT4_PCDC                   0x15 /* PHY Cable Diagnostics Control */
 #define I347AT4_PAGE_SELECT            0x16
 
index 2003b3756ba2d7321f8735b302f467852ed85fdc..4034207eb5ccb434a9ae4af124b10223f9c71395 100644 (file)
@@ -441,6 +441,7 @@ struct e1000_phy_info {
        u16 cable_length;
        u16 max_cable_length;
        u16 min_cable_length;
+       u16 pair_length[4];
 
        u8 mdix;
 
index 29f59c76878a59a61e4e7d61a084b25db33e8726..8aa798737d4d392f4e240948755daa0fe5decca2 100644 (file)
@@ -861,10 +861,10 @@ s32 igb_pll_workaround_i210(struct e1000_hw *hw)
        if (ret_val)
                nvm_word = E1000_INVM_DEFAULT_AL;
        tmp_nvm = nvm_word | E1000_INVM_PLL_WO_VAL;
+       igb_write_phy_reg_82580(hw, I347AT4_PAGE_SELECT, E1000_PHY_PLL_FREQ_PAGE);
        for (i = 0; i < E1000_MAX_PLL_TRIES; i++) {
                /* check current state directly from internal PHY */
-               igb_read_phy_reg_gs40g(hw, (E1000_PHY_PLL_FREQ_PAGE |
-                                        E1000_PHY_PLL_FREQ_REG), &phy_word);
+               igb_read_phy_reg_82580(hw, E1000_PHY_PLL_FREQ_REG, &phy_word);
                if ((phy_word & E1000_PHY_PLL_UNCONF)
                    != E1000_PHY_PLL_UNCONF) {
                        ret_val = 0;
@@ -896,6 +896,7 @@ s32 igb_pll_workaround_i210(struct e1000_hw *hw)
                /* restore WUC register */
                wr32(E1000_WUC, wuc);
        }
+       igb_write_phy_reg_82580(hw, I347AT4_PAGE_SELECT, 0);
        /* restore MDICNFG setting */
        wr32(E1000_MDICNFG, mdicnfg);
        return ret_val;
index eaa68a50cb3b7e7bda4db83c666725b224b80bf0..b2964a2a60b1ac2df4cb4465c47ea7c80b018353 100644 (file)
@@ -85,7 +85,7 @@ enum E1000_INVM_STRUCTURE_TYPE {
 #define E1000_PCI_PMCSR_D3             0x03
 #define E1000_MAX_PLL_TRIES            5
 #define E1000_PHY_PLL_UNCONF           0xFF
-#define E1000_PHY_PLL_FREQ_PAGE                0xFC0000
+#define E1000_PHY_PLL_FREQ_PAGE                0xFC
 #define E1000_PHY_PLL_FREQ_REG         0x000E
 #define E1000_INVM_DEFAULT_AL          0x202F
 #define E1000_INVM_AUTOLOAD            0x0A
index c0df40f2b29585fc52d2edc23ec10b4ed192e262..5b54254aed4f35e4529b7c33df8442ef32d8b0dc 100644 (file)
@@ -1717,59 +1717,76 @@ s32 igb_get_cable_length_m88_gen2(struct e1000_hw *hw)
        struct e1000_phy_info *phy = &hw->phy;
        s32 ret_val;
        u16 phy_data, phy_data2, index, default_page, is_cm;
+       int len_tot = 0;
+       u16 len_min;
+       u16 len_max;
 
        switch (hw->phy.id) {
+       case M88E1543_E_PHY_ID:
+       case M88E1512_E_PHY_ID:
+       case I347AT4_E_PHY_ID:
        case I210_I_PHY_ID:
-               /* Get cable length from PHY Cable Diagnostics Control Reg */
-               ret_val = phy->ops.read_reg(hw, (0x7 << GS40G_PAGE_SHIFT) +
-                                           (I347AT4_PCDL + phy->addr),
-                                           &phy_data);
+               /* Remember the original page select and set it to 7 */
+               ret_val = phy->ops.read_reg(hw, I347AT4_PAGE_SELECT,
+                                           &default_page);
                if (ret_val)
-                       return ret_val;
+                       goto out;
+
+               ret_val = phy->ops.write_reg(hw, I347AT4_PAGE_SELECT, 0x07);
+               if (ret_val)
+                       goto out;
 
                /* Check if the unit of cable length is meters or cm */
-               ret_val = phy->ops.read_reg(hw, (0x7 << GS40G_PAGE_SHIFT) +
-                                           I347AT4_PCDC, &phy_data2);
+               ret_val = phy->ops.read_reg(hw, I347AT4_PCDC, &phy_data2);
                if (ret_val)
-                       return ret_val;
+                       goto out;
 
                is_cm = !(phy_data2 & I347AT4_PCDC_CABLE_LENGTH_UNIT);
 
-               /* Populate the phy structure with cable length in meters */
-               phy->min_cable_length = phy_data / (is_cm ? 100 : 1);
-               phy->max_cable_length = phy_data / (is_cm ? 100 : 1);
-               phy->cable_length = phy_data / (is_cm ? 100 : 1);
-               break;
-       case M88E1543_E_PHY_ID:
-       case M88E1512_E_PHY_ID:
-       case I347AT4_E_PHY_ID:
-               /* Remember the original page select and set it to 7 */
-               ret_val = phy->ops.read_reg(hw, I347AT4_PAGE_SELECT,
-                                           &default_page);
+               /* Get cable length from Pair 0 length Regs */
+               ret_val = phy->ops.read_reg(hw, I347AT4_PCDL0, &phy_data);
                if (ret_val)
                        goto out;
 
-               ret_val = phy->ops.write_reg(hw, I347AT4_PAGE_SELECT, 0x07);
+               phy->pair_length[0] = phy_data / (is_cm ? 100 : 1);
+               len_tot = phy->pair_length[0];
+               len_min = phy->pair_length[0];
+               len_max = phy->pair_length[0];
+
+               /* Get cable length from Pair 1 length Regs */
+               ret_val = phy->ops.read_reg(hw, I347AT4_PCDL1, &phy_data);
                if (ret_val)
                        goto out;
 
-               /* Get cable length from PHY Cable Diagnostics Control Reg */
-               ret_val = phy->ops.read_reg(hw, (I347AT4_PCDL + phy->addr),
-                                           &phy_data);
+               phy->pair_length[1] = phy_data / (is_cm ? 100 : 1);
+               len_tot += phy->pair_length[1];
+               len_min = min(len_min, phy->pair_length[1]);
+               len_max = max(len_max, phy->pair_length[1]);
+
+               /* Get cable length from Pair 2 length Regs */
+               ret_val = phy->ops.read_reg(hw, I347AT4_PCDL2, &phy_data);
                if (ret_val)
                        goto out;
 
-               /* Check if the unit of cable length is meters or cm */
-               ret_val = phy->ops.read_reg(hw, I347AT4_PCDC, &phy_data2);
+               phy->pair_length[2] = phy_data / (is_cm ? 100 : 1);
+               len_tot += phy->pair_length[2];
+               len_min = min(len_min, phy->pair_length[2]);
+               len_max = max(len_max, phy->pair_length[2]);
+
+               /* Get cable length from Pair 3 length Regs */
+               ret_val = phy->ops.read_reg(hw, I347AT4_PCDL3, &phy_data);
                if (ret_val)
                        goto out;
 
-               is_cm = !(phy_data2 & I347AT4_PCDC_CABLE_LENGTH_UNIT);
+               phy->pair_length[3] = phy_data / (is_cm ? 100 : 1);
+               len_tot += phy->pair_length[3];
+               len_min = min(len_min, phy->pair_length[3]);
+               len_max = max(len_max, phy->pair_length[3]);
 
                /* Populate the phy structure with cable length in meters */
-               phy->min_cable_length = phy_data / (is_cm ? 100 : 1);
-               phy->max_cable_length = phy_data / (is_cm ? 100 : 1);
-               phy->cable_length = phy_data / (is_cm ? 100 : 1);
+               phy->min_cable_length = len_min;
+               phy->max_cable_length = len_max;
+               phy->cable_length = len_tot / 4;
 
                /* Reset the page selec to its original value */
                ret_val = phy->ops.write_reg(hw, I347AT4_PAGE_SELECT,
@@ -2587,66 +2604,6 @@ out:
        return ret_val;
 }
 
-/**
- *  igb_write_phy_reg_gs40g - Write GS40G PHY register
- *  @hw: pointer to the HW structure
- *  @offset: lower half is register offset to write to
- *     upper half is page to use.
- *  @data: data to write at register offset
- *
- *  Acquires semaphore, if necessary, then writes the data to PHY register
- *  at the offset.  Release any acquired semaphores before exiting.
- **/
-s32 igb_write_phy_reg_gs40g(struct e1000_hw *hw, u32 offset, u16 data)
-{
-       s32 ret_val;
-       u16 page = offset >> GS40G_PAGE_SHIFT;
-
-       offset = offset & GS40G_OFFSET_MASK;
-       ret_val = hw->phy.ops.acquire(hw);
-       if (ret_val)
-               return ret_val;
-
-       ret_val = igb_write_phy_reg_mdic(hw, GS40G_PAGE_SELECT, page);
-       if (ret_val)
-               goto release;
-       ret_val = igb_write_phy_reg_mdic(hw, offset, data);
-
-release:
-       hw->phy.ops.release(hw);
-       return ret_val;
-}
-
-/**
- *  igb_read_phy_reg_gs40g - Read GS40G  PHY register
- *  @hw: pointer to the HW structure
- *  @offset: lower half is register offset to read to
- *     upper half is page to use.
- *  @data: data to read at register offset
- *
- *  Acquires semaphore, if necessary, then reads the data in the PHY register
- *  at the offset.  Release any acquired semaphores before exiting.
- **/
-s32 igb_read_phy_reg_gs40g(struct e1000_hw *hw, u32 offset, u16 *data)
-{
-       s32 ret_val;
-       u16 page = offset >> GS40G_PAGE_SHIFT;
-
-       offset = offset & GS40G_OFFSET_MASK;
-       ret_val = hw->phy.ops.acquire(hw);
-       if (ret_val)
-               return ret_val;
-
-       ret_val = igb_write_phy_reg_mdic(hw, GS40G_PAGE_SELECT, page);
-       if (ret_val)
-               goto release;
-       ret_val = igb_read_phy_reg_mdic(hw, offset, data);
-
-release:
-       hw->phy.ops.release(hw);
-       return ret_val;
-}
-
 /**
  *  igb_set_master_slave_mode - Setup PHY for Master/slave mode
  *  @hw: pointer to the HW structure
index aa1ae61a61d86378273d21f4b072df8e210652c8..969a6ddafa3bc294adf1387c8e498ed6d031e157 100644 (file)
@@ -72,8 +72,8 @@ s32  igb_copper_link_setup_82580(struct e1000_hw *hw);
 s32  igb_get_phy_info_82580(struct e1000_hw *hw);
 s32  igb_phy_force_speed_duplex_82580(struct e1000_hw *hw);
 s32  igb_get_cable_length_82580(struct e1000_hw *hw);
-s32  igb_read_phy_reg_gs40g(struct e1000_hw *hw, u32 offset, u16 *data);
-s32  igb_write_phy_reg_gs40g(struct e1000_hw *hw, u32 offset, u16 data);
+s32  igb_read_phy_reg_82580(struct e1000_hw *hw, u32 offset, u16 *data);
+s32  igb_write_phy_reg_82580(struct e1000_hw *hw, u32 offset, u16 data);
 s32  igb_check_polarity_m88(struct e1000_hw *hw);
 
 /* IGP01E1000 Specific Registers */
@@ -144,17 +144,6 @@ s32  igb_check_polarity_m88(struct e1000_hw *hw);
 
 #define E1000_CABLE_LENGTH_UNDEFINED      0xFF
 
-/* GS40G - I210 PHY defines */
-#define GS40G_PAGE_SELECT              0x16
-#define GS40G_PAGE_SHIFT               16
-#define GS40G_OFFSET_MASK              0xFFFF
-#define GS40G_PAGE_2                   0x20000
-#define GS40G_MAC_REG2                 0x15
-#define GS40G_MAC_LB                   0x4140
-#define GS40G_MAC_SPEED_1G             0X0006
-#define GS40G_COPPER_SPEC              0x0010
-#define GS40G_LINE_LB                  0x4000
-
 /* SFP modules ID memory locations */
 #define E1000_SFF_IDENTIFIER_OFFSET    0x00
 #define E1000_SFF_IDENTIFIER_SFF       0x02
index 2529bc625de4532e78407500d4c43ba92714f06a..1d329f1d047be8604f0e11f33bb09a04d49cfd9d 100644 (file)
@@ -127,10 +127,20 @@ static const struct igb_stats igb_gstrings_net_stats[] = {
 #define IGB_STATS_LEN \
        (IGB_GLOBAL_STATS_LEN + IGB_NETDEV_STATS_LEN + IGB_QUEUE_STATS_LEN)
 
+enum igb_diagnostics_results {
+       TEST_REG = 0,
+       TEST_EEP,
+       TEST_IRQ,
+       TEST_LOOP,
+       TEST_LINK
+};
+
 static const char igb_gstrings_test[][ETH_GSTRING_LEN] = {
-       "Register test  (offline)", "Eeprom test    (offline)",
-       "Interrupt test (offline)", "Loopback test  (offline)",
-       "Link test   (on/offline)"
+       [TEST_REG]  = "Register test  (offline)",
+       [TEST_EEP]  = "Eeprom test    (offline)",
+       [TEST_IRQ]  = "Interrupt test (offline)",
+       [TEST_LOOP] = "Loopback test  (offline)",
+       [TEST_LINK] = "Link test   (on/offline)"
 };
 #define IGB_TEST_LEN (sizeof(igb_gstrings_test) / ETH_GSTRING_LEN)
 
@@ -2002,7 +2012,7 @@ static void igb_diag_test(struct net_device *netdev,
                /* Link test performed before hardware reset so autoneg doesn't
                 * interfere with test result
                 */
-               if (igb_link_test(adapter, &data[4]))
+               if (igb_link_test(adapter, &data[TEST_LINK]))
                        eth_test->flags |= ETH_TEST_FL_FAILED;
 
                if (if_running)
@@ -2011,21 +2021,21 @@ static void igb_diag_test(struct net_device *netdev,
                else
                        igb_reset(adapter);
 
-               if (igb_reg_test(adapter, &data[0]))
+               if (igb_reg_test(adapter, &data[TEST_REG]))
                        eth_test->flags |= ETH_TEST_FL_FAILED;
 
                igb_reset(adapter);
-               if (igb_eeprom_test(adapter, &data[1]))
+               if (igb_eeprom_test(adapter, &data[TEST_EEP]))
                        eth_test->flags |= ETH_TEST_FL_FAILED;
 
                igb_reset(adapter);
-               if (igb_intr_test(adapter, &data[2]))
+               if (igb_intr_test(adapter, &data[TEST_IRQ]))
                        eth_test->flags |= ETH_TEST_FL_FAILED;
 
                igb_reset(adapter);
                /* power up link for loopback test */
                igb_power_up_link(adapter);
-               if (igb_loopback_test(adapter, &data[3]))
+               if (igb_loopback_test(adapter, &data[TEST_LOOP]))
                        eth_test->flags |= ETH_TEST_FL_FAILED;
 
                /* restore speed, duplex, autoneg settings */
@@ -2045,16 +2055,16 @@ static void igb_diag_test(struct net_device *netdev,
                dev_info(&adapter->pdev->dev, "online testing starting\n");
 
                /* PHY is powered down when interface is down */
-               if (if_running && igb_link_test(adapter, &data[4]))
+               if (if_running && igb_link_test(adapter, &data[TEST_LINK]))
                        eth_test->flags |= ETH_TEST_FL_FAILED;
                else
-                       data[4] = 0;
+                       data[TEST_LINK] = 0;
 
                /* Online tests aren't run; pass by default */
-               data[0] = 0;
-               data[1] = 0;
-               data[2] = 0;
-               data[3] = 0;
+               data[TEST_REG] = 0;
+               data[TEST_EEP] = 0;
+               data[TEST_IRQ] = 0;
+               data[TEST_LOOP] = 0;
 
                clear_bit(__IGB_TESTING, &adapter->state);
        }
index 7afde455326d741ad5959741519b9e22b7fac6d2..31e5f39428393818257853b44b0b4915f4817eac 100644 (file)
@@ -2379,8 +2379,8 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        }
 
        if (hw->mac.type >= e1000_82576) {
-               netdev->hw_features |= NETIF_F_SCTP_CSUM;
-               netdev->features |= NETIF_F_SCTP_CSUM;
+               netdev->hw_features |= NETIF_F_SCTP_CRC;
+               netdev->features |= NETIF_F_SCTP_CRC;
        }
 
        netdev->priv_flags |= IFF_UNICAST_FLT;
index 66c64a3767198d78f49efe2b52bf03a99de0dc04..fca35aa90d0ff54c0ded4f751c82405f40ea6970 100644 (file)
@@ -8598,7 +8598,7 @@ ixgbe_features_check(struct sk_buff *skb, struct net_device *dev,
 
        if (unlikely(skb_inner_mac_header(skb) - skb_transport_header(skb) >
                     IXGBE_MAX_TUNNEL_HDR_LEN))
-               return features & ~NETIF_F_ALL_CSUM;
+               return features & ~NETIF_F_CSUM_MASK;
 
        return features;
 }
@@ -8995,8 +8995,8 @@ skip_sriov:
        case ixgbe_mac_X540:
        case ixgbe_mac_X550:
        case ixgbe_mac_X550EM_x:
-               netdev->features |= NETIF_F_SCTP_CSUM;
-               netdev->hw_features |= NETIF_F_SCTP_CSUM |
+               netdev->features |= NETIF_F_SCTP_CRC;
+               netdev->hw_features |= NETIF_F_SCTP_CRC |
                                       NETIF_F_NTUPLE;
                break;
        default:
index 060dd39229747c4b1f43e4f9f374777bea4d95ec..b1de7afd41166cc9ef67ed7e43191aaff897ec4e 100644 (file)
@@ -2753,7 +2753,7 @@ static netdev_features_t
 jme_fix_features(struct net_device *netdev, netdev_features_t features)
 {
        if (netdev->mtu > 1900)
-               features &= ~(NETIF_F_ALL_TSO | NETIF_F_ALL_CSUM);
+               features &= ~(NETIF_F_ALL_TSO | NETIF_F_CSUM_MASK);
        return features;
 }
 
index 5606a043063e3a83c5532ea0ebccbaf6fca77259..ec0a22119e09e2a49b54c9a23995e200a2d780a9 100644 (file)
@@ -4380,7 +4380,7 @@ static netdev_features_t sky2_fix_features(struct net_device *dev,
         */
        if (dev->mtu > ETH_DATA_LEN && hw->chip_id == CHIP_ID_YUKON_EC_U) {
                netdev_info(dev, "checksum offload not possible with jumbo frames\n");
-               features &= ~(NETIF_F_TSO|NETIF_F_SG|NETIF_F_ALL_CSUM);
+               features &= ~(NETIF_F_TSO | NETIF_F_SG | NETIF_F_CSUM_MASK);
        }
 
        /* Some hardware requires receive checksum for RSS to work. */
index ec8caf8fedc63b728f810ce0de9ab7a0d4e8d4ec..ce26adcb4988171fc5e1c81712fb5c6055fe689c 100644 (file)
@@ -41,7 +41,7 @@ config MLXSW_SWITCHX2
 
 config MLXSW_SPECTRUM
        tristate "Mellanox Technologies Spectrum support"
-       depends on MLXSW_CORE && NET_SWITCHDEV
+       depends on MLXSW_CORE && NET_SWITCHDEV && VLAN_8021Q
        default m
        ---help---
          This driver supports Mellanox Technologies Spectrum Ethernet
index af631df4603a5f7f3199a556080fb1b2b554e794..66d851d4dfb4daf473f6fd3a45cf797ac84834c6 100644 (file)
@@ -396,7 +396,7 @@ static inline void mlxsw_reg_sfd_rec_pack(char *payload, int rec_index,
 
 static inline void mlxsw_reg_sfd_uc_pack(char *payload, int rec_index,
                                         enum mlxsw_reg_sfd_rec_policy policy,
-                                        const char *mac, u16 vid,
+                                        const char *mac, u16 fid_vid,
                                         enum mlxsw_reg_sfd_rec_action action,
                                         u8 local_port)
 {
@@ -404,16 +404,16 @@ static inline void mlxsw_reg_sfd_uc_pack(char *payload, int rec_index,
                               MLXSW_REG_SFD_REC_TYPE_UNICAST,
                               policy, mac, action);
        mlxsw_reg_sfd_uc_sub_port_set(payload, rec_index, 0);
-       mlxsw_reg_sfd_uc_fid_vid_set(payload, rec_index, vid);
+       mlxsw_reg_sfd_uc_fid_vid_set(payload, rec_index, fid_vid);
        mlxsw_reg_sfd_uc_system_port_set(payload, rec_index, local_port);
 }
 
 static inline void mlxsw_reg_sfd_uc_unpack(char *payload, int rec_index,
-                                          char *mac, u16 *p_vid,
+                                          char *mac, u16 *p_fid_vid,
                                           u8 *p_local_port)
 {
        mlxsw_reg_sfd_rec_mac_memcpy_from(payload, rec_index, mac);
-       *p_vid = mlxsw_reg_sfd_uc_fid_vid_get(payload, rec_index);
+       *p_fid_vid = mlxsw_reg_sfd_uc_fid_vid_get(payload, rec_index);
        *p_local_port = mlxsw_reg_sfd_uc_system_port_get(payload, rec_index);
 }
 
@@ -438,6 +438,13 @@ MLXSW_ITEM32_INDEXED(reg, sfd, uc_lag_sub_port, MLXSW_REG_SFD_BASE_LEN, 16, 8,
 MLXSW_ITEM32_INDEXED(reg, sfd, uc_lag_fid_vid, MLXSW_REG_SFD_BASE_LEN, 0, 16,
                     MLXSW_REG_SFD_REC_LEN, 0x08, false);
 
+/* reg_sfd_uc_lag_lag_vid
+ * Indicates VID in case of vFIDs. Reserved for FIDs.
+ * Access: RW
+ */
+MLXSW_ITEM32_INDEXED(reg, sfd, uc_lag_lag_vid, MLXSW_REG_SFD_BASE_LEN, 16, 12,
+                    MLXSW_REG_SFD_REC_LEN, 0x0C, false);
+
 /* reg_sfd_uc_lag_lag_id
  * LAG Identifier - pointer into the LAG descriptor table.
  * Access: RW
@@ -448,15 +455,16 @@ MLXSW_ITEM32_INDEXED(reg, sfd, uc_lag_lag_id, MLXSW_REG_SFD_BASE_LEN, 0, 10,
 static inline void
 mlxsw_reg_sfd_uc_lag_pack(char *payload, int rec_index,
                          enum mlxsw_reg_sfd_rec_policy policy,
-                         const char *mac, u16 vid,
-                         enum mlxsw_reg_sfd_rec_action action,
+                         const char *mac, u16 fid_vid,
+                         enum mlxsw_reg_sfd_rec_action action, u16 lag_vid,
                          u16 lag_id)
 {
        mlxsw_reg_sfd_rec_pack(payload, rec_index,
                               MLXSW_REG_SFD_REC_TYPE_UNICAST_LAG,
                               policy, mac, action);
        mlxsw_reg_sfd_uc_lag_sub_port_set(payload, rec_index, 0);
-       mlxsw_reg_sfd_uc_lag_fid_vid_set(payload, rec_index, vid);
+       mlxsw_reg_sfd_uc_lag_fid_vid_set(payload, rec_index, fid_vid);
+       mlxsw_reg_sfd_uc_lag_lag_vid_set(payload, rec_index, lag_vid);
        mlxsw_reg_sfd_uc_lag_lag_id_set(payload, rec_index, lag_id);
 }
 
index 322ed544348f09a75073400ec5eefbc5f6b4270f..c588c65e91f516ee44c6b59d5c3c7bfe71b77acc 100644 (file)
@@ -48,6 +48,7 @@
 #include <linux/workqueue.h>
 #include <linux/jiffies.h>
 #include <linux/bitops.h>
+#include <linux/list.h>
 #include <net/switchdev.h>
 #include <generated/utsrelease.h>
 
@@ -186,33 +187,6 @@ static int mlxsw_sp_port_oper_status_get(struct mlxsw_sp_port *mlxsw_sp_port,
        return 0;
 }
 
-static int mlxsw_sp_vfid_create(struct mlxsw_sp *mlxsw_sp, u16 vfid)
-{
-       char sfmr_pl[MLXSW_REG_SFMR_LEN];
-       int err;
-
-       mlxsw_reg_sfmr_pack(sfmr_pl, MLXSW_REG_SFMR_OP_CREATE_FID,
-                           MLXSW_SP_VFID_BASE + vfid, 0);
-       err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfmr), sfmr_pl);
-
-       if (err)
-               return err;
-
-       set_bit(vfid, mlxsw_sp->active_vfids);
-       return 0;
-}
-
-static void mlxsw_sp_vfid_destroy(struct mlxsw_sp *mlxsw_sp, u16 vfid)
-{
-       char sfmr_pl[MLXSW_REG_SFMR_LEN];
-
-       clear_bit(vfid, mlxsw_sp->active_vfids);
-
-       mlxsw_reg_sfmr_pack(sfmr_pl, MLXSW_REG_SFMR_OP_DESTROY_FID,
-                           MLXSW_SP_VFID_BASE + vfid, 0);
-       mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfmr), sfmr_pl);
-}
-
 static int mlxsw_sp_port_dev_addr_set(struct mlxsw_sp_port *mlxsw_sp_port,
                                      unsigned char *addr)
 {
@@ -549,12 +523,132 @@ static int mlxsw_sp_port_vlan_mode_trans(struct mlxsw_sp_port *mlxsw_sp_port)
        return 0;
 }
 
+static struct mlxsw_sp_vfid *
+mlxsw_sp_vfid_find(const struct mlxsw_sp *mlxsw_sp, u16 vid)
+{
+       struct mlxsw_sp_vfid *vfid;
+
+       list_for_each_entry(vfid, &mlxsw_sp->port_vfids.list, list) {
+               if (vfid->vid == vid)
+                       return vfid;
+       }
+
+       return NULL;
+}
+
+static u16 mlxsw_sp_avail_vfid_get(const struct mlxsw_sp *mlxsw_sp)
+{
+       return find_first_zero_bit(mlxsw_sp->port_vfids.mapped,
+                                  MLXSW_SP_VFID_PORT_MAX);
+}
+
+static int __mlxsw_sp_vfid_create(struct mlxsw_sp *mlxsw_sp, u16 vfid)
+{
+       u16 fid = mlxsw_sp_vfid_to_fid(vfid);
+       char sfmr_pl[MLXSW_REG_SFMR_LEN];
+
+       mlxsw_reg_sfmr_pack(sfmr_pl, MLXSW_REG_SFMR_OP_CREATE_FID, fid, 0);
+       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfmr), sfmr_pl);
+}
+
+static void __mlxsw_sp_vfid_destroy(struct mlxsw_sp *mlxsw_sp, u16 vfid)
+{
+       u16 fid = mlxsw_sp_vfid_to_fid(vfid);
+       char sfmr_pl[MLXSW_REG_SFMR_LEN];
+
+       mlxsw_reg_sfmr_pack(sfmr_pl, MLXSW_REG_SFMR_OP_DESTROY_FID, fid, 0);
+       mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfmr), sfmr_pl);
+}
+
+static struct mlxsw_sp_vfid *mlxsw_sp_vfid_create(struct mlxsw_sp *mlxsw_sp,
+                                                 u16 vid)
+{
+       struct device *dev = mlxsw_sp->bus_info->dev;
+       struct mlxsw_sp_vfid *vfid;
+       u16 n_vfid;
+       int err;
+
+       n_vfid = mlxsw_sp_avail_vfid_get(mlxsw_sp);
+       if (n_vfid == MLXSW_SP_VFID_PORT_MAX) {
+               dev_err(dev, "No available vFIDs\n");
+               return ERR_PTR(-ERANGE);
+       }
+
+       err = __mlxsw_sp_vfid_create(mlxsw_sp, n_vfid);
+       if (err) {
+               dev_err(dev, "Failed to create vFID=%d\n", n_vfid);
+               return ERR_PTR(err);
+       }
+
+       vfid = kzalloc(sizeof(*vfid), GFP_KERNEL);
+       if (!vfid)
+               goto err_allocate_vfid;
+
+       vfid->vfid = n_vfid;
+       vfid->vid = vid;
+
+       list_add(&vfid->list, &mlxsw_sp->port_vfids.list);
+       set_bit(n_vfid, mlxsw_sp->port_vfids.mapped);
+
+       return vfid;
+
+err_allocate_vfid:
+       __mlxsw_sp_vfid_destroy(mlxsw_sp, n_vfid);
+       return ERR_PTR(-ENOMEM);
+}
+
+static void mlxsw_sp_vfid_destroy(struct mlxsw_sp *mlxsw_sp,
+                                 struct mlxsw_sp_vfid *vfid)
+{
+       clear_bit(vfid->vfid, mlxsw_sp->port_vfids.mapped);
+       list_del(&vfid->list);
+
+       __mlxsw_sp_vfid_destroy(mlxsw_sp, vfid->vfid);
+
+       kfree(vfid);
+}
+
+static struct mlxsw_sp_port *
+mlxsw_sp_port_vport_create(struct mlxsw_sp_port *mlxsw_sp_port,
+                          struct mlxsw_sp_vfid *vfid)
+{
+       struct mlxsw_sp_port *mlxsw_sp_vport;
+
+       mlxsw_sp_vport = kzalloc(sizeof(*mlxsw_sp_vport), GFP_KERNEL);
+       if (!mlxsw_sp_vport)
+               return NULL;
+
+       /* dev will be set correctly after the VLAN device is linked
+        * with the real device. In case of bridge SELF invocation, dev
+        * will remain as is.
+        */
+       mlxsw_sp_vport->dev = mlxsw_sp_port->dev;
+       mlxsw_sp_vport->mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+       mlxsw_sp_vport->local_port = mlxsw_sp_port->local_port;
+       mlxsw_sp_vport->stp_state = BR_STATE_FORWARDING;
+       mlxsw_sp_vport->lagged = mlxsw_sp_port->lagged;
+       mlxsw_sp_vport->lag_id = mlxsw_sp_port->lag_id;
+       mlxsw_sp_vport->vport.vfid = vfid;
+       mlxsw_sp_vport->vport.vid = vfid->vid;
+
+       list_add(&mlxsw_sp_vport->vport.list, &mlxsw_sp_port->vports_list);
+
+       return mlxsw_sp_vport;
+}
+
+static void mlxsw_sp_port_vport_destroy(struct mlxsw_sp_port *mlxsw_sp_vport)
+{
+       list_del(&mlxsw_sp_vport->vport.list);
+       kfree(mlxsw_sp_vport);
+}
+
 int mlxsw_sp_port_add_vid(struct net_device *dev, __be16 __always_unused proto,
                          u16 vid)
 {
        struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
        struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
-       char *sftr_pl;
+       struct mlxsw_sp_port *mlxsw_sp_vport;
+       struct mlxsw_sp_vfid *vfid;
        int err;
 
        /* VLAN 0 is added to HW filter when device goes up, but it is
@@ -563,100 +657,105 @@ int mlxsw_sp_port_add_vid(struct net_device *dev, __be16 __always_unused proto,
        if (!vid)
                return 0;
 
-       if (test_bit(vid, mlxsw_sp_port->active_vfids)) {
+       if (mlxsw_sp_port_vport_find(mlxsw_sp_port, vid)) {
                netdev_warn(dev, "VID=%d already configured\n", vid);
                return 0;
        }
 
-       if (!test_bit(vid, mlxsw_sp->active_vfids)) {
-               err = mlxsw_sp_vfid_create(mlxsw_sp, vid);
-               if (err) {
-                       netdev_err(dev, "Failed to create vFID=%d\n",
-                                  MLXSW_SP_VFID_BASE + vid);
-                       return err;
+       vfid = mlxsw_sp_vfid_find(mlxsw_sp, vid);
+       if (!vfid) {
+               vfid = mlxsw_sp_vfid_create(mlxsw_sp, vid);
+               if (IS_ERR(vfid)) {
+                       netdev_err(dev, "Failed to create vFID for VID=%d\n",
+                                  vid);
+                       return PTR_ERR(vfid);
                }
+       }
 
-               sftr_pl = kmalloc(MLXSW_REG_SFTR_LEN, GFP_KERNEL);
-               if (!sftr_pl) {
-                       err = -ENOMEM;
-                       goto err_flood_table_alloc;
-               }
-               mlxsw_reg_sftr_pack(sftr_pl, 0, vid,
-                                   MLXSW_REG_SFGC_TABLE_TYPE_FID, 0,
-                                   MLXSW_PORT_CPU_PORT, true);
-               err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sftr), sftr_pl);
-               kfree(sftr_pl);
+       mlxsw_sp_vport = mlxsw_sp_port_vport_create(mlxsw_sp_port, vfid);
+       if (!mlxsw_sp_vport) {
+               netdev_err(dev, "Failed to create vPort for VID=%d\n", vid);
+               err = -ENOMEM;
+               goto err_port_vport_create;
+       }
+
+       if (!vfid->nr_vports) {
+               err = mlxsw_sp_vport_flood_set(mlxsw_sp_vport, vfid->vfid,
+                                              true, false);
                if (err) {
-                       netdev_err(dev, "Failed to configure flood table\n");
-                       goto err_flood_table_config;
+                       netdev_err(dev, "Failed to setup flooding for vFID=%d\n",
+                                  vfid->vfid);
+                       goto err_vport_flood_set;
                }
        }
 
-       /* In case we fail in the following steps, we intentionally do not
-        * destroy the associated vFID.
-        */
-
        /* When adding the first VLAN interface on a bridged port we need to
         * transition all the active 802.1Q bridge VLANs to use explicit
         * {Port, VID} to FID mappings and set the port's mode to Virtual mode.
         */
-       if (!mlxsw_sp_port->nr_vfids) {
+       if (list_is_singular(&mlxsw_sp_port->vports_list)) {
                err = mlxsw_sp_port_vp_mode_trans(mlxsw_sp_port);
                if (err) {
                        netdev_err(dev, "Failed to set to Virtual mode\n");
-                       return err;
+                       goto err_port_vp_mode_trans;
                }
        }
 
-       err = mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_port,
+       err = mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_vport,
                                           MLXSW_REG_SVFA_MT_PORT_VID_TO_FID,
-                                          true, MLXSW_SP_VFID_BASE + vid, vid);
+                                          true,
+                                          mlxsw_sp_vfid_to_fid(vfid->vfid),
+                                          vid);
        if (err) {
                netdev_err(dev, "Failed to map {Port, VID=%d} to vFID=%d\n",
-                          vid, MLXSW_SP_VFID_BASE + vid);
+                          vid, vfid->vfid);
                goto err_port_vid_to_fid_set;
        }
 
-       err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, false);
+       err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, false);
        if (err) {
                netdev_err(dev, "Failed to disable learning for VID=%d\n", vid);
                goto err_port_vid_learning_set;
        }
 
-       err = mlxsw_sp_port_vlan_set(mlxsw_sp_port, vid, vid, true, false);
+       err = mlxsw_sp_port_vlan_set(mlxsw_sp_vport, vid, vid, true, false);
        if (err) {
                netdev_err(dev, "Failed to set VLAN membership for VID=%d\n",
                           vid);
                goto err_port_add_vid;
        }
 
-       err = mlxsw_sp_port_stp_state_set(mlxsw_sp_port, vid,
+       err = mlxsw_sp_port_stp_state_set(mlxsw_sp_vport, vid,
                                          MLXSW_REG_SPMS_STATE_FORWARDING);
        if (err) {
                netdev_err(dev, "Failed to set STP state for VID=%d\n", vid);
                goto err_port_stp_state_set;
        }
 
-       mlxsw_sp_port->nr_vfids++;
-       set_bit(vid, mlxsw_sp_port->active_vfids);
+       vfid->nr_vports++;
 
        return 0;
 
-err_flood_table_config:
-err_flood_table_alloc:
-       mlxsw_sp_vfid_destroy(mlxsw_sp, vid);
-       return err;
-
 err_port_stp_state_set:
-       mlxsw_sp_port_vlan_set(mlxsw_sp_port, vid, vid, false, false);
+       mlxsw_sp_port_vlan_set(mlxsw_sp_vport, vid, vid, false, false);
 err_port_add_vid:
-       mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
+       mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, true);
 err_port_vid_learning_set:
-       mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_port,
+       mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_vport,
                                     MLXSW_REG_SVFA_MT_PORT_VID_TO_FID, false,
-                                    MLXSW_SP_VFID_BASE + vid, vid);
+                                    mlxsw_sp_vfid_to_fid(vfid->vfid), vid);
 err_port_vid_to_fid_set:
-       mlxsw_sp_port_vlan_mode_trans(mlxsw_sp_port);
+       if (list_is_singular(&mlxsw_sp_port->vports_list))
+               mlxsw_sp_port_vlan_mode_trans(mlxsw_sp_port);
+err_port_vp_mode_trans:
+       if (!vfid->nr_vports)
+               mlxsw_sp_vport_flood_set(mlxsw_sp_vport, vfid->vfid, false,
+                                        false);
+err_vport_flood_set:
+       mlxsw_sp_port_vport_destroy(mlxsw_sp_vport);
+err_port_vport_create:
+       if (!vfid->nr_vports)
+               mlxsw_sp_vfid_destroy(mlxsw_sp, vfid);
        return err;
 }
 
@@ -664,6 +763,8 @@ int mlxsw_sp_port_kill_vid(struct net_device *dev,
                           __be16 __always_unused proto, u16 vid)
 {
        struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+       struct mlxsw_sp_port *mlxsw_sp_vport;
+       struct mlxsw_sp_vfid *vfid;
        int err;
 
        /* VLAN 0 is removed from HW filter when device goes down, but
@@ -672,38 +773,42 @@ int mlxsw_sp_port_kill_vid(struct net_device *dev,
        if (!vid)
                return 0;
 
-       if (!test_bit(vid, mlxsw_sp_port->active_vfids)) {
+       mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, vid);
+       if (!mlxsw_sp_vport) {
                netdev_warn(dev, "VID=%d does not exist\n", vid);
                return 0;
        }
 
-       err = mlxsw_sp_port_stp_state_set(mlxsw_sp_port, vid,
+       vfid = mlxsw_sp_vport->vport.vfid;
+
+       err = mlxsw_sp_port_stp_state_set(mlxsw_sp_vport, vid,
                                          MLXSW_REG_SPMS_STATE_DISCARDING);
        if (err) {
                netdev_err(dev, "Failed to set STP state for VID=%d\n", vid);
                return err;
        }
 
-       err = mlxsw_sp_port_vlan_set(mlxsw_sp_port, vid, vid, false, false);
+       err = mlxsw_sp_port_vlan_set(mlxsw_sp_vport, vid, vid, false, false);
        if (err) {
                netdev_err(dev, "Failed to set VLAN membership for VID=%d\n",
                           vid);
                return err;
        }
 
-       err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
+       err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, true);
        if (err) {
                netdev_err(dev, "Failed to enable learning for VID=%d\n", vid);
                return err;
        }
 
-       err = mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_port,
+       err = mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_vport,
                                           MLXSW_REG_SVFA_MT_PORT_VID_TO_FID,
-                                          false, MLXSW_SP_VFID_BASE + vid,
+                                          false,
+                                          mlxsw_sp_vfid_to_fid(vfid->vfid),
                                           vid);
        if (err) {
                netdev_err(dev, "Failed to invalidate {Port, VID=%d} to vFID=%d mapping\n",
-                          vid, MLXSW_SP_VFID_BASE + vid);
+                          vid, vfid->vfid);
                return err;
        }
 
@@ -711,7 +816,7 @@ int mlxsw_sp_port_kill_vid(struct net_device *dev,
         * transition all active 802.1Q bridge VLANs to use VID to FID
         * mappings and set port's mode to VLAN mode.
         */
-       if (mlxsw_sp_port->nr_vfids == 1) {
+       if (list_is_singular(&mlxsw_sp_port->vports_list)) {
                err = mlxsw_sp_port_vlan_mode_trans(mlxsw_sp_port);
                if (err) {
                        netdev_err(dev, "Failed to set to VLAN mode\n");
@@ -719,8 +824,12 @@ int mlxsw_sp_port_kill_vid(struct net_device *dev,
                }
        }
 
-       mlxsw_sp_port->nr_vfids--;
-       clear_bit(vid, mlxsw_sp_port->active_vfids);
+       vfid->nr_vports--;
+       mlxsw_sp_port_vport_destroy(mlxsw_sp_vport);
+
+       /* Destroy the vFID if no vPorts are assigned to it anymore. */
+       if (!vfid->nr_vports)
+               mlxsw_sp_vfid_destroy(mlxsw_sp_port->mlxsw_sp, vfid);
 
        return 0;
 }
@@ -1245,6 +1354,7 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port)
        struct mlxsw_sp_port *mlxsw_sp_port;
        struct net_device *dev;
        bool usable;
+       size_t bytes;
        int err;
 
        dev = alloc_etherdev(sizeof(struct mlxsw_sp_port));
@@ -1258,6 +1368,13 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port)
        mlxsw_sp_port->learning_sync = 1;
        mlxsw_sp_port->uc_flood = 1;
        mlxsw_sp_port->pvid = 1;
+       bytes = DIV_ROUND_UP(VLAN_N_VID, BITS_PER_BYTE);
+       mlxsw_sp_port->active_vlans = kzalloc(bytes, GFP_KERNEL);
+       if (!mlxsw_sp_port->active_vlans) {
+               err = -ENOMEM;
+               goto err_port_active_vlans_alloc;
+       }
+       INIT_LIST_HEAD(&mlxsw_sp_port->vports_list);
 
        mlxsw_sp_port->pcpu_stats =
                netdev_alloc_pcpu_stats(struct mlxsw_sp_port_pcpu_stats);
@@ -1359,16 +1476,27 @@ err_port_module_check:
 err_dev_addr_init:
        free_percpu(mlxsw_sp_port->pcpu_stats);
 err_alloc_stats:
+       kfree(mlxsw_sp_port->active_vlans);
+err_port_active_vlans_alloc:
        free_netdev(dev);
        return err;
 }
 
-static void mlxsw_sp_vfids_fini(struct mlxsw_sp *mlxsw_sp)
+static void mlxsw_sp_port_vports_fini(struct mlxsw_sp_port *mlxsw_sp_port)
 {
-       u16 vfid;
+       struct net_device *dev = mlxsw_sp_port->dev;
+       struct mlxsw_sp_port *mlxsw_sp_vport, *tmp;
 
-       for_each_set_bit(vfid, mlxsw_sp->active_vfids, VLAN_N_VID)
-               mlxsw_sp_vfid_destroy(mlxsw_sp, vfid);
+       list_for_each_entry_safe(mlxsw_sp_vport, tmp,
+                                &mlxsw_sp_port->vports_list, vport.list) {
+               u16 vid = mlxsw_sp_vport_vid_get(mlxsw_sp_vport);
+
+               /* vPorts created for VLAN devices should already be gone
+                * by now, since we unregistered the port netdev.
+                */
+               WARN_ON(is_vlan_dev(mlxsw_sp_vport->dev));
+               mlxsw_sp_port_kill_vid(dev, 0, vid);
+       }
 }
 
 static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port)
@@ -1377,10 +1505,11 @@ static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port)
 
        if (!mlxsw_sp_port)
                return;
-       mlxsw_sp_port_kill_vid(mlxsw_sp_port->dev, 0, 1);
        unregister_netdev(mlxsw_sp_port->dev); /* This calls ndo_stop */
+       mlxsw_sp_port_vports_fini(mlxsw_sp_port);
        mlxsw_sp_port_switchdev_fini(mlxsw_sp_port);
        free_percpu(mlxsw_sp_port->pcpu_stats);
+       kfree(mlxsw_sp_port->active_vlans);
        free_netdev(mlxsw_sp_port->dev);
 }
 
@@ -1662,16 +1791,15 @@ static int __mlxsw_sp_flood_init(struct mlxsw_core *mlxsw_core,
        enum mlxsw_sp_flood_table flood_table;
        char sfgc_pl[MLXSW_REG_SFGC_LEN];
 
-       if (bridge_type == MLXSW_REG_SFGC_BRIDGE_TYPE_VFID) {
+       if (bridge_type == MLXSW_REG_SFGC_BRIDGE_TYPE_VFID)
                table_type = MLXSW_REG_SFGC_TABLE_TYPE_FID;
-               flood_table = 0;
-       } else {
+       else
                table_type = MLXSW_REG_SFGC_TABLE_TYPE_FID_OFFEST;
-               if (type == MLXSW_REG_SFGC_TYPE_UNKNOWN_UNICAST)
-                       flood_table = MLXSW_SP_FLOOD_TABLE_UC;
-               else
-                       flood_table = MLXSW_SP_FLOOD_TABLE_BM;
-       }
+
+       if (type == MLXSW_REG_SFGC_TYPE_UNKNOWN_UNICAST)
+               flood_table = MLXSW_SP_FLOOD_TABLE_UC;
+       else
+               flood_table = MLXSW_SP_FLOOD_TABLE_BM;
 
        mlxsw_reg_sfgc_pack(sfgc_pl, type, bridge_type, table_type,
                            flood_table);
@@ -1682,9 +1810,6 @@ static int mlxsw_sp_flood_init(struct mlxsw_sp *mlxsw_sp)
 {
        int type, err;
 
-       /* For non-offloaded netdevs, flood all traffic types to CPU
-        * port.
-        */
        for (type = 0; type < MLXSW_REG_SFGC_TYPE_MAX; type++) {
                if (type == MLXSW_REG_SFGC_TYPE_RESERVED)
                        continue;
@@ -1693,15 +1818,6 @@ static int mlxsw_sp_flood_init(struct mlxsw_sp *mlxsw_sp)
                                            MLXSW_REG_SFGC_BRIDGE_TYPE_VFID);
                if (err)
                        return err;
-       }
-
-       /* For bridged ports, use one flooding table for unknown unicast
-        * traffic and a second table for unregistered multicast and
-        * broadcast.
-        */
-       for (type = 0; type < MLXSW_REG_SFGC_TYPE_MAX; type++) {
-               if (type == MLXSW_REG_SFGC_TYPE_RESERVED)
-                       continue;
 
                err = __mlxsw_sp_flood_init(mlxsw_sp->core, type,
                                            MLXSW_REG_SFGC_BRIDGE_TYPE_1Q_FID);
@@ -1736,6 +1852,8 @@ static int mlxsw_sp_init(void *priv, struct mlxsw_core *mlxsw_core,
 
        mlxsw_sp->core = mlxsw_core;
        mlxsw_sp->bus_info = mlxsw_bus_info;
+       INIT_LIST_HEAD(&mlxsw_sp->port_vfids.list);
+       INIT_LIST_HEAD(&mlxsw_sp->br_vfids.list);
 
        err = mlxsw_sp_base_mac_get(mlxsw_sp);
        if (err) {
@@ -1746,7 +1864,7 @@ static int mlxsw_sp_init(void *priv, struct mlxsw_core *mlxsw_core,
        err = mlxsw_sp_ports_create(mlxsw_sp);
        if (err) {
                dev_err(mlxsw_sp->bus_info->dev, "Failed to create ports\n");
-               goto err_ports_create;
+               return err;
        }
 
        err = mlxsw_sp_event_register(mlxsw_sp, MLXSW_TRAP_ID_PUDE);
@@ -1796,8 +1914,6 @@ err_rx_listener_register:
        mlxsw_sp_event_unregister(mlxsw_sp, MLXSW_TRAP_ID_PUDE);
 err_event_register:
        mlxsw_sp_ports_remove(mlxsw_sp);
-err_ports_create:
-       mlxsw_sp_vfids_fini(mlxsw_sp);
        return err;
 }
 
@@ -1809,7 +1925,6 @@ static void mlxsw_sp_fini(void *priv)
        mlxsw_sp_traps_fini(mlxsw_sp);
        mlxsw_sp_event_unregister(mlxsw_sp, MLXSW_TRAP_ID_PUDE);
        mlxsw_sp_ports_remove(mlxsw_sp);
-       mlxsw_sp_vfids_fini(mlxsw_sp);
 }
 
 static struct mlxsw_config_profile mlxsw_sp_config_profile = {
@@ -1834,8 +1949,8 @@ static struct mlxsw_config_profile mlxsw_sp_config_profile = {
        .flood_mode                     = 3,
        .max_fid_offset_flood_tables    = 2,
        .fid_offset_flood_table_size    = VLAN_N_VID - 1,
-       .max_fid_flood_tables           = 1,
-       .fid_flood_table_size           = VLAN_N_VID,
+       .max_fid_flood_tables           = 2,
+       .fid_flood_table_size           = MLXSW_SP_VFID_MAX,
        .used_max_ib_mc                 = 1,
        .max_ib_mc                      = 0,
        .used_max_pkey                  = 1,
@@ -2147,6 +2262,54 @@ static int mlxsw_sp_port_lag_changed(struct mlxsw_sp_port *mlxsw_sp_port,
        return mlxsw_sp_port_lag_tx_en_set(mlxsw_sp_port, info->tx_enabled);
 }
 
+static int mlxsw_sp_vport_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_vport,
+                                      struct net_device *br_dev);
+
+static int mlxsw_sp_port_vlan_link(struct mlxsw_sp_port *mlxsw_sp_port,
+                                  struct net_device *vlan_dev)
+{
+       struct mlxsw_sp_port *mlxsw_sp_vport;
+       u16 vid = vlan_dev_vlan_id(vlan_dev);
+
+       mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, vid);
+       if (!mlxsw_sp_vport) {
+               WARN_ON(!mlxsw_sp_vport);
+               return -EINVAL;
+       }
+
+       mlxsw_sp_vport->dev = vlan_dev;
+
+       return 0;
+}
+
+static int mlxsw_sp_port_vlan_unlink(struct mlxsw_sp_port *mlxsw_sp_port,
+                                    struct net_device *vlan_dev)
+{
+       struct mlxsw_sp_port *mlxsw_sp_vport;
+       u16 vid = vlan_dev_vlan_id(vlan_dev);
+
+       mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, vid);
+       if (!mlxsw_sp_vport) {
+               WARN_ON(!mlxsw_sp_vport);
+               return -EINVAL;
+       }
+
+       /* When removing a VLAN device while still bridged we should first
+        * remove it from the bridge, as we receive the bridge's notification
+        * when the vPort is already gone.
+        */
+       if (mlxsw_sp_vport->bridged) {
+               struct net_device *br_dev;
+
+               br_dev = mlxsw_sp_vport_br_get(mlxsw_sp_vport);
+               mlxsw_sp_vport_bridge_leave(mlxsw_sp_vport, br_dev);
+       }
+
+       mlxsw_sp_vport->dev = mlxsw_sp_port->dev;
+
+       return 0;
+}
+
 static int mlxsw_sp_netdevice_port_upper_event(struct net_device *dev,
                                               unsigned long event, void *ptr)
 {
@@ -2176,9 +2339,23 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *dev,
                break;
        case NETDEV_CHANGEUPPER:
                upper_dev = info->upper_dev;
-               if (!info->master)
-                       break;
-               if (netif_is_bridge_master(upper_dev)) {
+               if (is_vlan_dev(upper_dev)) {
+                       if (info->linking) {
+                               err = mlxsw_sp_port_vlan_link(mlxsw_sp_port,
+                                                             upper_dev);
+                               if (err) {
+                                       netdev_err(dev, "Failed to link VLAN device\n");
+                                       return NOTIFY_BAD;
+                               }
+                       } else {
+                               err = mlxsw_sp_port_vlan_unlink(mlxsw_sp_port,
+                                                               upper_dev);
+                               if (err) {
+                                       netdev_err(dev, "Failed to unlink VLAN device\n");
+                                       return NOTIFY_BAD;
+                               }
+                       }
+               } else if (netif_is_bridge_master(upper_dev)) {
                        if (info->linking) {
                                err = mlxsw_sp_port_bridge_join(mlxsw_sp_port);
                                if (err)
@@ -2271,6 +2448,370 @@ static int mlxsw_sp_netdevice_lag_event(struct net_device *lag_dev,
        return NOTIFY_DONE;
 }
 
+static struct mlxsw_sp_vfid *
+mlxsw_sp_br_vfid_find(const struct mlxsw_sp *mlxsw_sp,
+                     const struct net_device *br_dev)
+{
+       struct mlxsw_sp_vfid *vfid;
+
+       list_for_each_entry(vfid, &mlxsw_sp->br_vfids.list, list) {
+               if (vfid->br_dev == br_dev)
+                       return vfid;
+       }
+
+       return NULL;
+}
+
+static u16 mlxsw_sp_vfid_to_br_vfid(u16 vfid)
+{
+       return vfid - MLXSW_SP_VFID_PORT_MAX;
+}
+
+static u16 mlxsw_sp_br_vfid_to_vfid(u16 br_vfid)
+{
+       return MLXSW_SP_VFID_PORT_MAX + br_vfid;
+}
+
+static u16 mlxsw_sp_avail_br_vfid_get(const struct mlxsw_sp *mlxsw_sp)
+{
+       return find_first_zero_bit(mlxsw_sp->br_vfids.mapped,
+                                  MLXSW_SP_VFID_BR_MAX);
+}
+
+static struct mlxsw_sp_vfid *mlxsw_sp_br_vfid_create(struct mlxsw_sp *mlxsw_sp,
+                                                    struct net_device *br_dev)
+{
+       struct device *dev = mlxsw_sp->bus_info->dev;
+       struct mlxsw_sp_vfid *vfid;
+       u16 n_vfid;
+       int err;
+
+       n_vfid = mlxsw_sp_br_vfid_to_vfid(mlxsw_sp_avail_br_vfid_get(mlxsw_sp));
+       if (n_vfid == MLXSW_SP_VFID_MAX) {
+               dev_err(dev, "No available vFIDs\n");
+               return ERR_PTR(-ERANGE);
+       }
+
+       err = __mlxsw_sp_vfid_create(mlxsw_sp, n_vfid);
+       if (err) {
+               dev_err(dev, "Failed to create vFID=%d\n", n_vfid);
+               return ERR_PTR(err);
+       }
+
+       vfid = kzalloc(sizeof(*vfid), GFP_KERNEL);
+       if (!vfid)
+               goto err_allocate_vfid;
+
+       vfid->vfid = n_vfid;
+       vfid->br_dev = br_dev;
+
+       list_add(&vfid->list, &mlxsw_sp->br_vfids.list);
+       set_bit(mlxsw_sp_vfid_to_br_vfid(n_vfid), mlxsw_sp->br_vfids.mapped);
+
+       return vfid;
+
+err_allocate_vfid:
+       __mlxsw_sp_vfid_destroy(mlxsw_sp, n_vfid);
+       return ERR_PTR(-ENOMEM);
+}
+
+static void mlxsw_sp_br_vfid_destroy(struct mlxsw_sp *mlxsw_sp,
+                                    struct mlxsw_sp_vfid *vfid)
+{
+       u16 br_vfid = mlxsw_sp_vfid_to_br_vfid(vfid->vfid);
+
+       clear_bit(br_vfid, mlxsw_sp->br_vfids.mapped);
+       list_del(&vfid->list);
+
+       __mlxsw_sp_vfid_destroy(mlxsw_sp, vfid->vfid);
+
+       kfree(vfid);
+}
+
+static int mlxsw_sp_vport_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_vport,
+                                      struct net_device *br_dev)
+{
+       struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp;
+       u16 vid = mlxsw_sp_vport_vid_get(mlxsw_sp_vport);
+       struct net_device *dev = mlxsw_sp_vport->dev;
+       struct mlxsw_sp_vfid *vfid, *new_vfid;
+       int err;
+
+       vfid = mlxsw_sp_br_vfid_find(mlxsw_sp, br_dev);
+       if (!vfid) {
+               WARN_ON(!vfid);
+               return -EINVAL;
+       }
+
+       /* We need a vFID to go back to after leaving the bridge's vFID. */
+       new_vfid = mlxsw_sp_vfid_find(mlxsw_sp, vid);
+       if (!new_vfid) {
+               new_vfid = mlxsw_sp_vfid_create(mlxsw_sp, vid);
+               if (IS_ERR(new_vfid)) {
+                       netdev_err(dev, "Failed to create vFID for VID=%d\n",
+                                  vid);
+                       return PTR_ERR(new_vfid);
+               }
+       }
+
+       /* Invalidate existing {Port, VID} to vFID mapping and create a new
+        * one for the new vFID.
+        */
+       err = mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_vport,
+                                          MLXSW_REG_SVFA_MT_PORT_VID_TO_FID,
+                                          false,
+                                          mlxsw_sp_vfid_to_fid(vfid->vfid),
+                                          vid);
+       if (err) {
+               netdev_err(dev, "Failed to invalidate {Port, VID} to vFID=%d mapping\n",
+                          vfid->vfid);
+               goto err_port_vid_to_fid_invalidate;
+       }
+
+       err = mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_vport,
+                                          MLXSW_REG_SVFA_MT_PORT_VID_TO_FID,
+                                          true,
+                                          mlxsw_sp_vfid_to_fid(new_vfid->vfid),
+                                          vid);
+       if (err) {
+               netdev_err(dev, "Failed to map {Port, VID} to vFID=%d\n",
+                          new_vfid->vfid);
+               goto err_port_vid_to_fid_validate;
+       }
+
+       err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, false);
+       if (err) {
+               netdev_err(dev, "Failed to disable learning\n");
+               goto err_port_vid_learning_set;
+       }
+
+       err = mlxsw_sp_vport_flood_set(mlxsw_sp_vport, vfid->vfid, false,
+                                      false);
+       if (err) {
+               netdev_err(dev, "Failed clear to clear flooding\n");
+               goto err_vport_flood_set;
+       }
+
+       /* Switch between the vFIDs and destroy the old one if needed. */
+       new_vfid->nr_vports++;
+       mlxsw_sp_vport->vport.vfid = new_vfid;
+       vfid->nr_vports--;
+       if (!vfid->nr_vports)
+               mlxsw_sp_br_vfid_destroy(mlxsw_sp, vfid);
+
+       mlxsw_sp_vport->learning = 0;
+       mlxsw_sp_vport->learning_sync = 0;
+       mlxsw_sp_vport->uc_flood = 0;
+       mlxsw_sp_vport->bridged = 0;
+
+       return 0;
+
+err_vport_flood_set:
+err_port_vid_learning_set:
+err_port_vid_to_fid_validate:
+err_port_vid_to_fid_invalidate:
+       /* Rollback vFID only if new. */
+       if (!new_vfid->nr_vports)
+               mlxsw_sp_vfid_destroy(mlxsw_sp, new_vfid);
+       return err;
+}
+
+static int mlxsw_sp_vport_bridge_join(struct mlxsw_sp_port *mlxsw_sp_vport,
+                                     struct net_device *br_dev)
+{
+       struct mlxsw_sp_vfid *old_vfid = mlxsw_sp_vport->vport.vfid;
+       struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp;
+       u16 vid = mlxsw_sp_vport_vid_get(mlxsw_sp_vport);
+       struct net_device *dev = mlxsw_sp_vport->dev;
+       struct mlxsw_sp_vfid *vfid;
+       int err;
+
+       vfid = mlxsw_sp_br_vfid_find(mlxsw_sp, br_dev);
+       if (!vfid) {
+               vfid = mlxsw_sp_br_vfid_create(mlxsw_sp, br_dev);
+               if (IS_ERR(vfid)) {
+                       netdev_err(dev, "Failed to create bridge vFID\n");
+                       return PTR_ERR(vfid);
+               }
+       }
+
+       err = mlxsw_sp_vport_flood_set(mlxsw_sp_vport, vfid->vfid, true, false);
+       if (err) {
+               netdev_err(dev, "Failed to setup flooding for vFID=%d\n",
+                          vfid->vfid);
+               goto err_port_flood_set;
+       }
+
+       err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, true);
+       if (err) {
+               netdev_err(dev, "Failed to enable learning\n");
+               goto err_port_vid_learning_set;
+       }
+
+       /* We need to invalidate existing {Port, VID} to vFID mapping and
+        * create a new one for the bridge's vFID.
+        */
+       err = mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_vport,
+                                          MLXSW_REG_SVFA_MT_PORT_VID_TO_FID,
+                                          false,
+                                          mlxsw_sp_vfid_to_fid(old_vfid->vfid),
+                                          vid);
+       if (err) {
+               netdev_err(dev, "Failed to invalidate {Port, VID} to vFID=%d mapping\n",
+                          old_vfid->vfid);
+               goto err_port_vid_to_fid_invalidate;
+       }
+
+       err = mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_vport,
+                                          MLXSW_REG_SVFA_MT_PORT_VID_TO_FID,
+                                          true,
+                                          mlxsw_sp_vfid_to_fid(vfid->vfid),
+                                          vid);
+       if (err) {
+               netdev_err(dev, "Failed to map {Port, VID} to vFID=%d\n",
+                          vfid->vfid);
+               goto err_port_vid_to_fid_validate;
+       }
+
+       /* Switch between the vFIDs and destroy the old one if needed. */
+       vfid->nr_vports++;
+       mlxsw_sp_vport->vport.vfid = vfid;
+       old_vfid->nr_vports--;
+       if (!old_vfid->nr_vports)
+               mlxsw_sp_vfid_destroy(mlxsw_sp, old_vfid);
+
+       mlxsw_sp_vport->learning = 1;
+       mlxsw_sp_vport->learning_sync = 1;
+       mlxsw_sp_vport->uc_flood = 1;
+       mlxsw_sp_vport->bridged = 1;
+
+       return 0;
+
+err_port_vid_to_fid_validate:
+       mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_vport,
+                                    MLXSW_REG_SVFA_MT_PORT_VID_TO_FID, false,
+                                    mlxsw_sp_vfid_to_fid(old_vfid->vfid), vid);
+err_port_vid_to_fid_invalidate:
+       mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, false);
+err_port_vid_learning_set:
+       mlxsw_sp_vport_flood_set(mlxsw_sp_vport, vfid->vfid, false, false);
+err_port_flood_set:
+       if (!vfid->nr_vports)
+               mlxsw_sp_br_vfid_destroy(mlxsw_sp, vfid);
+       return err;
+}
+
+static bool
+mlxsw_sp_port_master_bridge_check(const struct mlxsw_sp_port *mlxsw_sp_port,
+                                 const struct net_device *br_dev)
+{
+       struct mlxsw_sp_port *mlxsw_sp_vport;
+
+       list_for_each_entry(mlxsw_sp_vport, &mlxsw_sp_port->vports_list,
+                           vport.list) {
+               if (mlxsw_sp_vport_br_get(mlxsw_sp_vport) == br_dev)
+                       return false;
+       }
+
+       return true;
+}
+
+static int mlxsw_sp_netdevice_vport_event(struct net_device *dev,
+                                         unsigned long event, void *ptr,
+                                         u16 vid)
+{
+       struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+       struct netdev_notifier_changeupper_info *info = ptr;
+       struct mlxsw_sp_port *mlxsw_sp_vport;
+       struct net_device *upper_dev;
+       int err;
+
+       mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, vid);
+
+       switch (event) {
+       case NETDEV_PRECHANGEUPPER:
+               upper_dev = info->upper_dev;
+               if (!info->master || !info->linking)
+                       break;
+               if (!netif_is_bridge_master(upper_dev))
+                       return NOTIFY_BAD;
+               /* We can't have multiple VLAN interfaces configured on
+                * the same port and being members in the same bridge.
+                */
+               if (!mlxsw_sp_port_master_bridge_check(mlxsw_sp_port,
+                                                      upper_dev))
+                       return NOTIFY_BAD;
+               break;
+       case NETDEV_CHANGEUPPER:
+               upper_dev = info->upper_dev;
+               if (!info->master)
+                       break;
+               if (info->linking) {
+                       if (!mlxsw_sp_vport) {
+                               WARN_ON(!mlxsw_sp_vport);
+                               return NOTIFY_BAD;
+                       }
+                       err = mlxsw_sp_vport_bridge_join(mlxsw_sp_vport,
+                                                        upper_dev);
+                       if (err) {
+                               netdev_err(dev, "Failed to join bridge\n");
+                               return NOTIFY_BAD;
+                       }
+               } else {
+                       /* We ignore bridge's unlinking notifications if vPort
+                        * is gone, since we already left the bridge when the
+                        * VLAN device was unlinked from the real device.
+                        */
+                       if (!mlxsw_sp_vport)
+                               return NOTIFY_DONE;
+                       err = mlxsw_sp_vport_bridge_leave(mlxsw_sp_vport,
+                                                         upper_dev);
+                       if (err) {
+                               netdev_err(dev, "Failed to leave bridge\n");
+                               return NOTIFY_BAD;
+                       }
+               }
+       }
+
+       return NOTIFY_DONE;
+}
+
+static int mlxsw_sp_netdevice_lag_vport_event(struct net_device *lag_dev,
+                                             unsigned long event, void *ptr,
+                                             u16 vid)
+{
+       struct net_device *dev;
+       struct list_head *iter;
+       int ret;
+
+       netdev_for_each_lower_dev(lag_dev, dev, iter) {
+               if (mlxsw_sp_port_dev_check(dev)) {
+                       ret = mlxsw_sp_netdevice_vport_event(dev, event, ptr,
+                                                            vid);
+                       if (ret == NOTIFY_BAD)
+                               return ret;
+               }
+       }
+
+       return NOTIFY_DONE;
+}
+
+static int mlxsw_sp_netdevice_vlan_event(struct net_device *vlan_dev,
+                                        unsigned long event, void *ptr)
+{
+       struct net_device *real_dev = vlan_dev_real_dev(vlan_dev);
+       u16 vid = vlan_dev_vlan_id(vlan_dev);
+
+       if (mlxsw_sp_port_dev_check(real_dev))
+               return mlxsw_sp_netdevice_vport_event(real_dev, event, ptr,
+                                                     vid);
+       else if (netif_is_lag_master(real_dev))
+               return mlxsw_sp_netdevice_lag_vport_event(real_dev, event, ptr,
+                                                         vid);
+
+       return NOTIFY_DONE;
+}
+
 static int mlxsw_sp_netdevice_event(struct notifier_block *unused,
                                    unsigned long event, void *ptr)
 {
@@ -2282,6 +2823,9 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *unused,
        if (netif_is_lag_master(dev))
                return mlxsw_sp_netdevice_lag_event(dev, event, ptr);
 
+       if (is_vlan_dev(dev))
+               return mlxsw_sp_netdevice_vlan_event(dev, event, ptr);
+
        return NOTIFY_DONE;
 }
 
index 48be5a63b9b5b25780e7709e24c8a4670348d34e..463ed6dcc7099f684f57b6d703688a7cff71b8dd 100644 (file)
 #include <linux/netdevice.h>
 #include <linux/bitops.h>
 #include <linux/if_vlan.h>
+#include <linux/list.h>
 #include <net/switchdev.h>
 
 #include "core.h"
 
 #define MLXSW_SP_VFID_BASE VLAN_N_VID
+#define MLXSW_SP_VFID_PORT_MAX 512     /* Non-bridged VLAN interfaces */
+#define MLXSW_SP_VFID_BR_MAX 8192      /* Bridged VLAN interfaces */
+#define MLXSW_SP_VFID_MAX (MLXSW_SP_VFID_PORT_MAX + MLXSW_SP_VFID_BR_MAX)
+
 #define MLXSW_SP_LAG_MAX 64
 #define MLXSW_SP_PORT_PER_LAG_MAX 16
 
@@ -56,8 +61,38 @@ struct mlxsw_sp_upper {
        unsigned int ref_count;
 };
 
+struct mlxsw_sp_vfid {
+       struct list_head list;
+       u16 nr_vports;
+       u16 vfid;       /* Starting at 0 */
+       struct net_device *br_dev;
+       u16 vid;
+};
+
+static inline u16 mlxsw_sp_vfid_to_fid(u16 vfid)
+{
+       return MLXSW_SP_VFID_BASE + vfid;
+}
+
+static inline u16 mlxsw_sp_fid_to_vfid(u16 fid)
+{
+       return fid - MLXSW_SP_VFID_BASE;
+}
+
+static inline bool mlxsw_sp_fid_is_vfid(u16 fid)
+{
+       return fid >= MLXSW_SP_VFID_BASE;
+}
+
 struct mlxsw_sp {
-       unsigned long active_vfids[BITS_TO_LONGS(VLAN_N_VID)];
+       struct {
+               struct list_head list;
+               unsigned long mapped[BITS_TO_LONGS(MLXSW_SP_VFID_PORT_MAX)];
+       } port_vfids;
+       struct {
+               struct list_head list;
+               unsigned long mapped[BITS_TO_LONGS(MLXSW_SP_VFID_BR_MAX)];
+       } br_vfids;
        unsigned long active_fids[BITS_TO_LONGS(VLAN_N_VID)];
        struct mlxsw_sp_port **ports;
        struct mlxsw_core *core;
@@ -102,11 +137,15 @@ struct mlxsw_sp_port {
           lagged:1;
        u16 pvid;
        u16 lag_id;
+       struct {
+               struct list_head list;
+               struct mlxsw_sp_vfid *vfid;
+               u16 vid;
+       } vport;
        /* 802.1Q bridge VLANs */
-       unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
+       unsigned long *active_vlans;
        /* VLAN interfaces */
-       unsigned long active_vfids[BITS_TO_LONGS(VLAN_N_VID)];
-       u16 nr_vfids;
+       struct list_head vports_list;
 };
 
 static inline struct mlxsw_sp_port *
@@ -121,6 +160,59 @@ mlxsw_sp_port_lagged_get(struct mlxsw_sp *mlxsw_sp, u16 lag_id, u8 port_index)
        return mlxsw_sp_port && mlxsw_sp_port->lagged ? mlxsw_sp_port : NULL;
 }
 
+static inline bool
+mlxsw_sp_port_is_vport(const struct mlxsw_sp_port *mlxsw_sp_port)
+{
+       return mlxsw_sp_port->vport.vfid;
+}
+
+static inline struct net_device *
+mlxsw_sp_vport_br_get(const struct mlxsw_sp_port *mlxsw_sp_vport)
+{
+       return mlxsw_sp_vport->vport.vfid->br_dev;
+}
+
+static inline u16
+mlxsw_sp_vport_vid_get(const struct mlxsw_sp_port *mlxsw_sp_vport)
+{
+       return mlxsw_sp_vport->vport.vid;
+}
+
+static inline u16
+mlxsw_sp_vport_vfid_get(const struct mlxsw_sp_port *mlxsw_sp_vport)
+{
+       return mlxsw_sp_vport->vport.vfid->vfid;
+}
+
+static inline struct mlxsw_sp_port *
+mlxsw_sp_port_vport_find(const struct mlxsw_sp_port *mlxsw_sp_port, u16 vid)
+{
+       struct mlxsw_sp_port *mlxsw_sp_vport;
+
+       list_for_each_entry(mlxsw_sp_vport, &mlxsw_sp_port->vports_list,
+                           vport.list) {
+               if (mlxsw_sp_vport_vid_get(mlxsw_sp_vport) == vid)
+                       return mlxsw_sp_vport;
+       }
+
+       return NULL;
+}
+
+static inline struct mlxsw_sp_port *
+mlxsw_sp_port_vport_find_by_vfid(const struct mlxsw_sp_port *mlxsw_sp_port,
+                                u16 vfid)
+{
+       struct mlxsw_sp_port *mlxsw_sp_vport;
+
+       list_for_each_entry(mlxsw_sp_vport, &mlxsw_sp_port->vports_list,
+                           vport.list) {
+               if (mlxsw_sp_vport_vfid_get(mlxsw_sp_vport) == vfid)
+                       return mlxsw_sp_vport;
+       }
+
+       return NULL;
+}
+
 enum mlxsw_sp_flood_table {
        MLXSW_SP_FLOOD_TABLE_UC,
        MLXSW_SP_FLOOD_TABLE_BM,
@@ -143,5 +235,7 @@ int mlxsw_sp_port_add_vid(struct net_device *dev, __be16 __always_unused proto,
                          u16 vid);
 int mlxsw_sp_port_kill_vid(struct net_device *dev,
                           __be16 __always_unused proto, u16 vid);
+int mlxsw_sp_vport_flood_set(struct mlxsw_sp_port *mlxsw_sp_vport, u16 vfid,
+                            bool set, bool only_uc);
 
 #endif
index 406dab2f6b17cee9413f9f6682f585efe9b4eaab..9476ff9237ae93b576360e01bd3e9cb5be5979bc 100644 (file)
 #include "core.h"
 #include "reg.h"
 
+static struct mlxsw_sp_port *
+mlxsw_sp_port_orig_get(struct net_device *dev,
+                      struct mlxsw_sp_port *mlxsw_sp_port)
+{
+       struct mlxsw_sp_port *mlxsw_sp_vport;
+       u16 vid;
+
+       if (!is_vlan_dev(dev))
+               return mlxsw_sp_port;
+
+       vid = vlan_dev_vlan_id(dev);
+       mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, vid);
+       WARN_ON(!mlxsw_sp_vport);
+
+       return mlxsw_sp_vport;
+}
+
 static int mlxsw_sp_port_attr_get(struct net_device *dev,
                                  struct switchdev_attr *attr)
 {
        struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
        struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
 
+       mlxsw_sp_port = mlxsw_sp_port_orig_get(attr->orig_dev, mlxsw_sp_port);
+       if (!mlxsw_sp_port)
+               return -EINVAL;
+
        switch (attr->id) {
        case SWITCHDEV_ATTR_ID_PORT_PARENT_ID:
                attr->u.ppid.id_len = sizeof(mlxsw_sp->base_mac);
@@ -105,8 +126,14 @@ static int mlxsw_sp_port_stp_state_set(struct mlxsw_sp_port *mlxsw_sp_port,
        if (!spms_pl)
                return -ENOMEM;
        mlxsw_reg_spms_pack(spms_pl, mlxsw_sp_port->local_port);
-       for_each_set_bit(vid, mlxsw_sp_port->active_vlans, VLAN_N_VID)
+
+       if (mlxsw_sp_port_is_vport(mlxsw_sp_port)) {
+               vid = mlxsw_sp_vport_vid_get(mlxsw_sp_port);
                mlxsw_reg_spms_vid_pack(spms_pl, vid, spms_state);
+       } else {
+               for_each_set_bit(vid, mlxsw_sp_port->active_vlans, VLAN_N_VID)
+                       mlxsw_reg_spms_vid_pack(spms_pl, vid, spms_state);
+       }
 
        err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(spms), spms_pl);
        kfree(spms_pl);
@@ -124,22 +151,38 @@ static int mlxsw_sp_port_attr_stp_state_set(struct mlxsw_sp_port *mlxsw_sp_port,
        return mlxsw_sp_port_stp_state_set(mlxsw_sp_port, state);
 }
 
+static bool mlxsw_sp_vfid_is_vport_br(u16 vfid)
+{
+       return vfid >= MLXSW_SP_VFID_PORT_MAX;
+}
+
 static int __mlxsw_sp_port_flood_set(struct mlxsw_sp_port *mlxsw_sp_port,
-                                    u16 fid_begin, u16 fid_end, bool set,
+                                    u16 idx_begin, u16 idx_end, bool set,
                                     bool only_uc)
 {
        struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
-       u16 range = fid_end - fid_begin + 1;
+       u16 local_port = mlxsw_sp_port->local_port;
+       enum mlxsw_flood_table_type table_type;
+       u16 range = idx_end - idx_begin + 1;
        char *sftr_pl;
        int err;
 
+       if (mlxsw_sp_port_is_vport(mlxsw_sp_port)) {
+               table_type = MLXSW_REG_SFGC_TABLE_TYPE_FID;
+               if (mlxsw_sp_vfid_is_vport_br(idx_begin))
+                       local_port = mlxsw_sp_port->local_port;
+               else
+                       local_port = MLXSW_PORT_CPU_PORT;
+       } else {
+               table_type = MLXSW_REG_SFGC_TABLE_TYPE_FID_OFFEST;
+       }
+
        sftr_pl = kmalloc(MLXSW_REG_SFTR_LEN, GFP_KERNEL);
        if (!sftr_pl)
                return -ENOMEM;
 
-       mlxsw_reg_sftr_pack(sftr_pl, MLXSW_SP_FLOOD_TABLE_UC, fid_begin,
-                           MLXSW_REG_SFGC_TABLE_TYPE_FID_OFFEST, range,
-                           mlxsw_sp_port->local_port, set);
+       mlxsw_reg_sftr_pack(sftr_pl, MLXSW_SP_FLOOD_TABLE_UC, idx_begin,
+                           table_type, range, local_port, set);
        err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sftr), sftr_pl);
        if (err)
                goto buffer_out;
@@ -150,9 +193,8 @@ static int __mlxsw_sp_port_flood_set(struct mlxsw_sp_port *mlxsw_sp_port,
        if (only_uc)
                goto buffer_out;
 
-       mlxsw_reg_sftr_pack(sftr_pl, MLXSW_SP_FLOOD_TABLE_BM, fid_begin,
-                           MLXSW_REG_SFGC_TABLE_TYPE_FID_OFFEST, range,
-                           mlxsw_sp_port->local_port, set);
+       mlxsw_reg_sftr_pack(sftr_pl, MLXSW_SP_FLOOD_TABLE_BM, idx_begin,
+                           table_type, range, local_port, set);
        err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sftr), sftr_pl);
 
 buffer_out:
@@ -167,6 +209,13 @@ static int mlxsw_sp_port_uc_flood_set(struct mlxsw_sp_port *mlxsw_sp_port,
        u16 vid, last_visited_vid;
        int err;
 
+       if (mlxsw_sp_port_is_vport(mlxsw_sp_port)) {
+               u16 vfid = mlxsw_sp_vport_vfid_get(mlxsw_sp_port);
+
+               return  __mlxsw_sp_port_flood_set(mlxsw_sp_port, vfid, vfid,
+                                                 set, true);
+       }
+
        for_each_set_bit(vid, mlxsw_sp_port->active_vlans, VLAN_N_VID) {
                err = __mlxsw_sp_port_flood_set(mlxsw_sp_port, vid, vid, set,
                                                true);
@@ -185,6 +234,16 @@ err_port_flood_set:
        return err;
 }
 
+int mlxsw_sp_vport_flood_set(struct mlxsw_sp_port *mlxsw_sp_vport, u16 vfid,
+                            bool set, bool only_uc)
+{
+       /* In case of vFIDs, index into the flooding table is relative to
+        * the start of the vFIDs range.
+        */
+       return __mlxsw_sp_port_flood_set(mlxsw_sp_vport, vfid, vfid, set,
+                                        only_uc);
+}
+
 static int mlxsw_sp_port_attr_br_flags_set(struct mlxsw_sp_port *mlxsw_sp_port,
                                           struct switchdev_trans *trans,
                                           unsigned long brport_flags)
@@ -244,6 +303,10 @@ static int mlxsw_sp_port_attr_set(struct net_device *dev,
        struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
        int err = 0;
 
+       mlxsw_sp_port = mlxsw_sp_port_orig_get(attr->orig_dev, mlxsw_sp_port);
+       if (!mlxsw_sp_port)
+               return -EINVAL;
+
        switch (attr->id) {
        case SWITCHDEV_ATTR_ID_PORT_STP_STATE:
                err = mlxsw_sp_port_attr_stp_state_set(mlxsw_sp_port, trans,
@@ -304,7 +367,7 @@ static int mlxsw_sp_port_fid_map(struct mlxsw_sp_port *mlxsw_sp_port, u16 fid)
 {
        enum mlxsw_reg_svfa_mt mt;
 
-       if (mlxsw_sp_port->nr_vfids)
+       if (!list_empty(&mlxsw_sp_port->vports_list))
                mt = MLXSW_REG_SVFA_MT_PORT_VID_TO_FID;
        else
                mt = MLXSW_REG_SVFA_MT_VID_TO_FID;
@@ -316,7 +379,7 @@ static int mlxsw_sp_port_fid_unmap(struct mlxsw_sp_port *mlxsw_sp_port, u16 fid)
 {
        enum mlxsw_reg_svfa_mt mt;
 
-       if (!mlxsw_sp_port->nr_vfids)
+       if (list_empty(&mlxsw_sp_port->vports_list))
                return 0;
 
        mt = MLXSW_REG_SVFA_MT_PORT_VID_TO_FID;
@@ -503,7 +566,7 @@ static enum mlxsw_reg_sfd_op mlxsw_sp_sfd_op(bool adding)
 }
 
 static int mlxsw_sp_port_fdb_uc_op(struct mlxsw_sp_port *mlxsw_sp_port,
-                                  const char *mac, u16 vid, bool adding,
+                                  const char *mac, u16 fid, bool adding,
                                   bool dynamic)
 {
        struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
@@ -516,7 +579,7 @@ static int mlxsw_sp_port_fdb_uc_op(struct mlxsw_sp_port *mlxsw_sp_port,
 
        mlxsw_reg_sfd_pack(sfd_pl, mlxsw_sp_sfd_op(adding), 0);
        mlxsw_reg_sfd_uc_pack(sfd_pl, 0, mlxsw_sp_sfd_rec_policy(dynamic),
-                             mac, vid, MLXSW_REG_SFD_REC_ACTION_NOP,
+                             mac, fid, MLXSW_REG_SFD_REC_ACTION_NOP,
                              mlxsw_sp_port->local_port);
        err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfd), sfd_pl);
        kfree(sfd_pl);
@@ -525,8 +588,8 @@ static int mlxsw_sp_port_fdb_uc_op(struct mlxsw_sp_port *mlxsw_sp_port,
 }
 
 static int mlxsw_sp_port_fdb_uc_lag_op(struct mlxsw_sp *mlxsw_sp, u16 lag_id,
-                                      const char *mac, u16 vid, bool adding,
-                                      bool dynamic)
+                                      const char *mac, u16 fid, u16 lag_vid,
+                                      bool adding, bool dynamic)
 {
        char *sfd_pl;
        int err;
@@ -537,8 +600,8 @@ static int mlxsw_sp_port_fdb_uc_lag_op(struct mlxsw_sp *mlxsw_sp, u16 lag_id,
 
        mlxsw_reg_sfd_pack(sfd_pl, mlxsw_sp_sfd_op(adding), 0);
        mlxsw_reg_sfd_uc_lag_pack(sfd_pl, 0, mlxsw_sp_sfd_rec_policy(dynamic),
-                                 mac, vid, MLXSW_REG_SFD_REC_ACTION_NOP,
-                                 lag_id);
+                                 mac, fid, MLXSW_REG_SFD_REC_ACTION_NOP,
+                                 lag_vid, lag_id);
        err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfd), sfd_pl);
        kfree(sfd_pl);
 
@@ -550,21 +613,30 @@ mlxsw_sp_port_fdb_static_add(struct mlxsw_sp_port *mlxsw_sp_port,
                             const struct switchdev_obj_port_fdb *fdb,
                             struct switchdev_trans *trans)
 {
-       u16 vid = fdb->vid;
+       u16 fid = fdb->vid;
+       u16 lag_vid = 0;
 
        if (switchdev_trans_ph_prepare(trans))
                return 0;
 
-       if (!vid)
-               vid = mlxsw_sp_port->pvid;
+       if (mlxsw_sp_port_is_vport(mlxsw_sp_port)) {
+               u16 vfid = mlxsw_sp_vport_vfid_get(mlxsw_sp_port);
+
+               fid = mlxsw_sp_vfid_to_fid(vfid);
+               lag_vid = mlxsw_sp_vport_vid_get(mlxsw_sp_port);
+       }
+
+       if (!fid)
+               fid = mlxsw_sp_port->pvid;
 
        if (!mlxsw_sp_port->lagged)
                return mlxsw_sp_port_fdb_uc_op(mlxsw_sp_port,
-                                              fdb->addr, vid, true, false);
+                                              fdb->addr, fid, true, false);
        else
                return mlxsw_sp_port_fdb_uc_lag_op(mlxsw_sp_port->mlxsw_sp,
                                                   mlxsw_sp_port->lag_id,
-                                                  fdb->addr, vid, true, false);
+                                                  fdb->addr, fid, lag_vid,
+                                                  true, false);
 }
 
 static int mlxsw_sp_port_obj_add(struct net_device *dev,
@@ -574,8 +646,15 @@ static int mlxsw_sp_port_obj_add(struct net_device *dev,
        struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
        int err = 0;
 
+       mlxsw_sp_port = mlxsw_sp_port_orig_get(obj->orig_dev, mlxsw_sp_port);
+       if (!mlxsw_sp_port)
+               return -EINVAL;
+
        switch (obj->id) {
        case SWITCHDEV_OBJ_ID_PORT_VLAN:
+               if (mlxsw_sp_port_is_vport(mlxsw_sp_port))
+                       return 0;
+
                err = mlxsw_sp_port_vlans_add(mlxsw_sp_port,
                                              SWITCHDEV_OBJ_PORT_VLAN(obj),
                                              trans);
@@ -679,14 +758,24 @@ static int
 mlxsw_sp_port_fdb_static_del(struct mlxsw_sp_port *mlxsw_sp_port,
                             const struct switchdev_obj_port_fdb *fdb)
 {
+       u16 fid = fdb->vid;
+       u16 lag_vid = 0;
+
+       if (mlxsw_sp_port_is_vport(mlxsw_sp_port)) {
+               u16 vfid = mlxsw_sp_vport_vfid_get(mlxsw_sp_port);
+
+               fid = mlxsw_sp_vfid_to_fid(vfid);
+               lag_vid = mlxsw_sp_vport_vid_get(mlxsw_sp_port);
+       }
+
        if (!mlxsw_sp_port->lagged)
                return mlxsw_sp_port_fdb_uc_op(mlxsw_sp_port,
-                                              fdb->addr, fdb->vid,
+                                              fdb->addr, fid,
                                               false, false);
        else
                return mlxsw_sp_port_fdb_uc_lag_op(mlxsw_sp_port->mlxsw_sp,
                                                   mlxsw_sp_port->lag_id,
-                                                  fdb->addr, fdb->vid,
+                                                  fdb->addr, fid, lag_vid,
                                                   false, false);
 }
 
@@ -696,8 +785,15 @@ static int mlxsw_sp_port_obj_del(struct net_device *dev,
        struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
        int err = 0;
 
+       mlxsw_sp_port = mlxsw_sp_port_orig_get(obj->orig_dev, mlxsw_sp_port);
+       if (!mlxsw_sp_port)
+               return -EINVAL;
+
        switch (obj->id) {
        case SWITCHDEV_OBJ_ID_PORT_VLAN:
+               if (mlxsw_sp_port_is_vport(mlxsw_sp_port))
+                       return 0;
+
                err = mlxsw_sp_port_vlans_del(mlxsw_sp_port,
                                              SWITCHDEV_OBJ_PORT_VLAN(obj));
                break;
@@ -732,9 +828,10 @@ static int mlxsw_sp_port_fdb_dump(struct mlxsw_sp_port *mlxsw_sp_port,
                                  switchdev_obj_dump_cb_t *cb)
 {
        struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+       u16 vport_vid = 0, vport_fid = 0;
        char *sfd_pl;
        char mac[ETH_ALEN];
-       u16 vid;
+       u16 fid;
        u8 local_port;
        u16 lag_id;
        u8 num_rec;
@@ -746,6 +843,14 @@ static int mlxsw_sp_port_fdb_dump(struct mlxsw_sp_port *mlxsw_sp_port,
        if (!sfd_pl)
                return -ENOMEM;
 
+       if (mlxsw_sp_port_is_vport(mlxsw_sp_port)) {
+               u16 tmp;
+
+               tmp = mlxsw_sp_vport_vfid_get(mlxsw_sp_port);
+               vport_fid = mlxsw_sp_vfid_to_fid(tmp);
+               vport_vid = mlxsw_sp_vport_vid_get(mlxsw_sp_port);
+       }
+
        mlxsw_reg_sfd_pack(sfd_pl, MLXSW_REG_SFD_OP_QUERY_DUMP, 0);
        do {
                mlxsw_reg_sfd_num_rec_set(sfd_pl, MLXSW_REG_SFD_REC_MAX_COUNT);
@@ -764,12 +869,17 @@ static int mlxsw_sp_port_fdb_dump(struct mlxsw_sp_port *mlxsw_sp_port,
                for (i = 0; i < num_rec; i++) {
                        switch (mlxsw_reg_sfd_rec_type_get(sfd_pl, i)) {
                        case MLXSW_REG_SFD_REC_TYPE_UNICAST:
-                               mlxsw_reg_sfd_uc_unpack(sfd_pl, i, mac, &vid,
+                               mlxsw_reg_sfd_uc_unpack(sfd_pl, i, mac, &fid,
                                                        &local_port);
                                if (local_port == mlxsw_sp_port->local_port) {
+                                       if (vport_fid && vport_fid != fid)
+                                               continue;
+                                       else if (vport_fid)
+                                               fdb->vid = vport_vid;
+                                       else
+                                               fdb->vid = fid;
                                        ether_addr_copy(fdb->addr, mac);
                                        fdb->ndm_state = NUD_REACHABLE;
-                                       fdb->vid = vid;
                                        err = cb(&fdb->obj);
                                        if (err)
                                                stored_err = err;
@@ -777,12 +887,17 @@ static int mlxsw_sp_port_fdb_dump(struct mlxsw_sp_port *mlxsw_sp_port,
                                break;
                        case MLXSW_REG_SFD_REC_TYPE_UNICAST_LAG:
                                mlxsw_reg_sfd_uc_lag_unpack(sfd_pl, i,
-                                                           mac, &vid, &lag_id);
+                                                           mac, &fid, &lag_id);
                                if (mlxsw_sp_port ==
                                    mlxsw_sp_lag_rep_port(mlxsw_sp, lag_id)) {
+                                       if (vport_fid && vport_fid != fid)
+                                               continue;
+                                       else if (vport_fid)
+                                               fdb->vid = vport_vid;
+                                       else
+                                               fdb->vid = fid;
                                        ether_addr_copy(fdb->addr, mac);
                                        fdb->ndm_state = NUD_REACHABLE;
-                                       fdb->vid = vid;
                                        err = cb(&fdb->obj);
                                        if (err)
                                                stored_err = err;
@@ -804,6 +919,13 @@ static int mlxsw_sp_port_vlan_dump(struct mlxsw_sp_port *mlxsw_sp_port,
        u16 vid;
        int err = 0;
 
+       if (mlxsw_sp_port_is_vport(mlxsw_sp_port)) {
+               vlan->flags = 0;
+               vlan->vid_begin = mlxsw_sp_vport_vid_get(mlxsw_sp_port);
+               vlan->vid_end = mlxsw_sp_vport_vid_get(mlxsw_sp_port);
+               return cb(&vlan->obj);
+       }
+
        for_each_set_bit(vid, mlxsw_sp_port->active_vlans, VLAN_N_VID) {
                vlan->flags = 0;
                if (vid == mlxsw_sp_port->pvid)
@@ -824,6 +946,10 @@ static int mlxsw_sp_port_obj_dump(struct net_device *dev,
        struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
        int err = 0;
 
+       mlxsw_sp_port = mlxsw_sp_port_orig_get(obj->orig_dev, mlxsw_sp_port);
+       if (!mlxsw_sp_port)
+               return -EINVAL;
+
        switch (obj->id) {
        case SWITCHDEV_OBJ_ID_PORT_VLAN:
                err = mlxsw_sp_port_vlan_dump(mlxsw_sp_port,
@@ -871,17 +997,35 @@ static void mlxsw_sp_fdb_notify_mac_process(struct mlxsw_sp *mlxsw_sp,
        struct mlxsw_sp_port *mlxsw_sp_port;
        char mac[ETH_ALEN];
        u8 local_port;
-       u16 vid;
+       u16 vid, fid;
        int err;
 
-       mlxsw_reg_sfn_mac_unpack(sfn_pl, rec_index, mac, &vid, &local_port);
+       mlxsw_reg_sfn_mac_unpack(sfn_pl, rec_index, mac, &fid, &local_port);
        mlxsw_sp_port = mlxsw_sp->ports[local_port];
        if (!mlxsw_sp_port) {
                dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect local port in FDB notification\n");
                return;
        }
 
-       err = mlxsw_sp_port_fdb_uc_op(mlxsw_sp_port, mac, vid,
+       if (mlxsw_sp_fid_is_vfid(fid)) {
+               u16 vfid = mlxsw_sp_fid_to_vfid(fid);
+               struct mlxsw_sp_port *mlxsw_sp_vport;
+
+               mlxsw_sp_vport = mlxsw_sp_port_vport_find_by_vfid(mlxsw_sp_port,
+                                                                 vfid);
+               if (!mlxsw_sp_vport) {
+                       netdev_err(mlxsw_sp_port->dev, "Failed to find a matching vPort following FDB notification\n");
+                       return;
+               }
+
+               vid = mlxsw_sp_vport_vid_get(mlxsw_sp_vport);
+               /* Override the physical port with the vPort. */
+               mlxsw_sp_port = mlxsw_sp_vport;
+       } else {
+               vid = fid;
+       }
+
+       err = mlxsw_sp_port_fdb_uc_op(mlxsw_sp_port, mac, fid,
                                      adding && mlxsw_sp_port->learning, true);
        if (err) {
                if (net_ratelimit())
@@ -900,18 +1044,38 @@ static void mlxsw_sp_fdb_notify_mac_lag_process(struct mlxsw_sp *mlxsw_sp,
 {
        struct mlxsw_sp_port *mlxsw_sp_port;
        char mac[ETH_ALEN];
+       u16 lag_vid = 0;
        u16 lag_id;
-       u16 vid;
+       u16 vid, fid;
        int err;
 
-       mlxsw_reg_sfn_mac_lag_unpack(sfn_pl, rec_index, mac, &vid, &lag_id);
+       mlxsw_reg_sfn_mac_lag_unpack(sfn_pl, rec_index, mac, &fid, &lag_id);
        mlxsw_sp_port = mlxsw_sp_lag_rep_port(mlxsw_sp, lag_id);
        if (!mlxsw_sp_port) {
                dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Cannot find port representor for LAG\n");
                return;
        }
 
-       err = mlxsw_sp_port_fdb_uc_lag_op(mlxsw_sp, lag_id, mac, vid,
+       if (mlxsw_sp_fid_is_vfid(fid)) {
+               u16 vfid = mlxsw_sp_fid_to_vfid(fid);
+               struct mlxsw_sp_port *mlxsw_sp_vport;
+
+               mlxsw_sp_vport = mlxsw_sp_port_vport_find_by_vfid(mlxsw_sp_port,
+                                                                 vfid);
+               if (!mlxsw_sp_vport) {
+                       netdev_err(mlxsw_sp_port->dev, "Failed to find a matching vPort following FDB notification\n");
+                       return;
+               }
+
+               vid = mlxsw_sp_vport_vid_get(mlxsw_sp_vport);
+               lag_vid = vid;
+               /* Override the physical port with the vPort. */
+               mlxsw_sp_port = mlxsw_sp_vport;
+       } else {
+               vid = fid;
+       }
+
+       err = mlxsw_sp_port_fdb_uc_lag_op(mlxsw_sp, lag_id, mac, fid, lag_vid,
                                          adding && mlxsw_sp_port->learning,
                                          true);
        if (err) {
index 038ac6b14a603994c69410d163d281448dccf2d6..7060539d276aa17a9538ebfec527d5d202645f7c 100644 (file)
@@ -2071,7 +2071,7 @@ nfp_net_features_check(struct sk_buff *skb, struct net_device *dev,
                l4_hdr = ipv6_hdr(skb)->nexthdr;
                break;
        default:
-               return features & ~(NETIF_F_ALL_CSUM | NETIF_F_GSO_MASK);
+               return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
        }
 
        if (skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
@@ -2080,7 +2080,7 @@ nfp_net_features_check(struct sk_buff *skb, struct net_device *dev,
            (l4_hdr == IPPROTO_UDP &&
             (skb_inner_mac_header(skb) - skb_transport_header(skb) !=
              sizeof(struct udphdr) + sizeof(struct vxlanhdr))))
-               return features & ~(NETIF_F_ALL_CSUM | NETIF_F_GSO_MASK);
+               return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
 
        return features;
 }
index 08d4be61606480d645f5e772f998b335b508ed65..e097e6baaac431247981a5cce40989e86100a3fc 100644 (file)
@@ -500,7 +500,7 @@ void pch_gbe_check_options(struct pch_gbe_adapter *adapter)
                val = XsumTX;
                pch_gbe_validate_option(&val, &opt, adapter);
                if (!val)
-                       dev->features &= ~NETIF_F_ALL_CSUM;
+                       dev->features &= ~NETIF_F_CSUM_MASK;
        }
        { /* Flow Control */
                static const struct pch_gbe_option opt = {
index 120cc2565d16c44b39245b8ea62680890dce824b..3448eb0f8a4aeedf7b56a2ee76d03ecd7b38d107 100644 (file)
@@ -882,6 +882,7 @@ static int ravb_phy_init(struct net_device *ndev)
        struct ravb_private *priv = netdev_priv(ndev);
        struct phy_device *phydev;
        struct device_node *pn;
+       int err;
 
        priv->link = 0;
        priv->speed = 0;
@@ -889,6 +890,17 @@ static int ravb_phy_init(struct net_device *ndev)
 
        /* Try connecting to PHY */
        pn = of_parse_phandle(np, "phy-handle", 0);
+       if (!pn) {
+               /* In the case of a fixed PHY, the DT node associated
+                * to the PHY is the Ethernet MAC DT node.
+                */
+               if (of_phy_is_fixed_link(np)) {
+                       err = of_phy_register_fixed_link(np);
+                       if (err)
+                               return err;
+               }
+               pn = of_node_get(np);
+       }
        phydev = of_phy_connect(ndev, pn, ravb_adjust_link, 0,
                                priv->phy_interface);
        if (!phydev) {
index b405349a570c284226d98109927183274fbe0b31..6f697438545d9404b71b796456f86c5e1d45c493 100644 (file)
@@ -3128,10 +3128,10 @@ static int efx_pci_probe(struct pci_dev *pci_dev,
        net_dev->features |= (efx->type->offload_features | NETIF_F_SG |
                              NETIF_F_HIGHDMA | NETIF_F_TSO |
                              NETIF_F_RXCSUM);
-       if (efx->type->offload_features & NETIF_F_V6_CSUM)
+       if (efx->type->offload_features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
                net_dev->features |= NETIF_F_TSO6;
        /* Mask for features that also apply to VLAN devices */
-       net_dev->vlan_features |= (NETIF_F_ALL_CSUM | NETIF_F_SG |
+       net_dev->vlan_features |= (NETIF_F_HW_CSUM | NETIF_F_SG |
                                   NETIF_F_HIGHDMA | NETIF_F_ALL_TSO |
                                   NETIF_F_RXCSUM);
        /* All offloads can be toggled */
index f4518bc2cd288b68726b87b7e062df423e57684c..1e19c8fd8b823d0419d537be27a2058727c6d30a 100644 (file)
@@ -439,7 +439,7 @@ struct stmmac_ops {
 /* PTP and HW Timer helpers */
 struct stmmac_hwtimestamp {
        void (*config_hw_tstamping) (void __iomem *ioaddr, u32 data);
-       void (*config_sub_second_increment) (void __iomem *ioaddr);
+       u32 (*config_sub_second_increment) (void __iomem *ioaddr, u32 clk_rate);
        int (*init_systime) (void __iomem *ioaddr, u32 sec, u32 nsec);
        int (*config_addend) (void __iomem *ioaddr, u32 addend);
        int (*adjust_systime) (void __iomem *ioaddr, u32 sec, u32 nsec,
index 401383b252a8f079aba4688afd6af1a53726b962..f0d797ab74d8f2ea927c6956525b728b9c20ddca 100644 (file)
@@ -32,6 +32,7 @@
 #define SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_RMII 0x2
 #define SYSMGR_EMACGRP_CTRL_PHYSEL_WIDTH 2
 #define SYSMGR_EMACGRP_CTRL_PHYSEL_MASK 0x00000003
+#define SYSMGR_EMACGRP_CTRL_PTP_REF_CLK_MASK 0x00000010
 
 #define EMAC_SPLITTER_CTRL_REG                 0x0
 #define EMAC_SPLITTER_CTRL_SPEED_MASK          0x3
@@ -47,6 +48,7 @@ struct socfpga_dwmac {
        struct regmap *sys_mgr_base_addr;
        struct reset_control *stmmac_rst;
        void __iomem *splitter_base;
+       bool f2h_ptp_ref_clk;
 };
 
 static void socfpga_dwmac_fix_mac_speed(void *priv, unsigned int speed)
@@ -116,6 +118,8 @@ static int socfpga_dwmac_parse_data(struct socfpga_dwmac *dwmac, struct device *
                return -EINVAL;
        }
 
+       dwmac->f2h_ptp_ref_clk = of_property_read_bool(np, "altr,f2h_ptp_ref_clk");
+
        np_splitter = of_parse_phandle(np, "altr,emac-splitter", 0);
        if (np_splitter) {
                if (of_address_to_resource(np_splitter, 0, &res_splitter)) {
@@ -171,6 +175,11 @@ static int socfpga_dwmac_setup(struct socfpga_dwmac *dwmac)
        ctrl &= ~(SYSMGR_EMACGRP_CTRL_PHYSEL_MASK << reg_shift);
        ctrl |= val << reg_shift;
 
+       if (dwmac->f2h_ptp_ref_clk)
+               ctrl |= SYSMGR_EMACGRP_CTRL_PTP_REF_CLK_MASK << (reg_shift / 2);
+       else
+               ctrl &= ~(SYSMGR_EMACGRP_CTRL_PTP_REF_CLK_MASK << (reg_shift / 2));
+
        regmap_write(sys_mgr_base_addr, reg_offset, ctrl);
        return 0;
 }
index 76ad214b403694fa56790c478dc310251cdc4288..a77f68918010d3a511c4a97cd3b2c83671f9f542 100644 (file)
@@ -33,22 +33,25 @@ static void stmmac_config_hw_tstamping(void __iomem *ioaddr, u32 data)
        writel(data, ioaddr + PTP_TCR);
 }
 
-static void stmmac_config_sub_second_increment(void __iomem *ioaddr)
+static u32 stmmac_config_sub_second_increment(void __iomem *ioaddr,
+                                             u32 ptp_clock)
 {
        u32 value = readl(ioaddr + PTP_TCR);
        unsigned long data;
 
        /* Convert the ptp_clock to nano second
-        * formula = (1/ptp_clock) * 1000000000
+        * formula = (2/ptp_clock) * 1000000000
         * where, ptp_clock = 50MHz.
         */
-       data = (1000000000ULL / 50000000);
+       data = (2000000000ULL / ptp_clock);
 
        /* 0.465ns accuracy */
        if (!(value & PTP_TCR_TSCTRLSSR))
                data = (data * 1000) / 465;
 
        writel(data, ioaddr + PTP_SSIR);
+
+       return data;
 }
 
 static int stmmac_init_systime(void __iomem *ioaddr, u32 sec, u32 nsec)
index 3c6549aee11dee5cf13c92c90c5b4b7a8618f375..6d4c33a7f0b41ce0b1d6528c7a659a84e08b26e7 100644 (file)
@@ -53,6 +53,7 @@
 #include "stmmac.h"
 #include <linux/reset.h>
 #include <linux/of_mdio.h>
+#include "dwmac1000.h"
 
 #define STMMAC_ALIGN(x)        L1_CACHE_ALIGN(x)
 
@@ -185,7 +186,7 @@ static void stmmac_clk_csr_set(struct stmmac_priv *priv)
                        priv->clk_csr = STMMAC_CSR_100_150M;
                else if ((clk_rate >= CSR_F_150M) && (clk_rate < CSR_F_250M))
                        priv->clk_csr = STMMAC_CSR_150_250M;
-               else if ((clk_rate >= CSR_F_250M) && (clk_rate <= CSR_F_300M))
+               else if ((clk_rate >= CSR_F_250M) && (clk_rate < CSR_F_300M))
                        priv->clk_csr = STMMAC_CSR_250_300M;
        }
 }
@@ -435,6 +436,7 @@ static int stmmac_hwtstamp_ioctl(struct net_device *dev, struct ifreq *ifr)
        u32 ts_master_en = 0;
        u32 ts_event_en = 0;
        u32 value = 0;
+       u32 sec_inc;
 
        if (!(priv->dma_cap.time_stamp || priv->adv_ts)) {
                netdev_alert(priv->dev, "No support for HW time stamping\n");
@@ -598,24 +600,19 @@ static int stmmac_hwtstamp_ioctl(struct net_device *dev, struct ifreq *ifr)
                         tstamp_all | ptp_v2 | ptp_over_ethernet |
                         ptp_over_ipv6_udp | ptp_over_ipv4_udp | ts_event_en |
                         ts_master_en | snap_type_sel);
-
                priv->hw->ptp->config_hw_tstamping(priv->ioaddr, value);
 
                /* program Sub Second Increment reg */
-               priv->hw->ptp->config_sub_second_increment(priv->ioaddr);
+               sec_inc = priv->hw->ptp->config_sub_second_increment(
+                       priv->ioaddr, priv->clk_ptp_rate);
+               temp = div_u64(1000000000ULL, sec_inc);
 
                /* calculate default added value:
                 * formula is :
                 * addend = (2^32)/freq_div_ratio;
-                * where, freq_div_ratio = clk_ptp_ref_i/50MHz
-                * hence, addend = ((2^32) * 50MHz)/clk_ptp_ref_i;
-                * NOTE: clk_ptp_ref_i should be >= 50MHz to
-                *       achieve 20ns accuracy.
-                *
-                * 2^x * y == (y << x), hence
-                * 2^32 * 50000000 ==> (50000000 << 32)
+                * where, freq_div_ratio = 1e9ns/sec_inc
                 */
-               temp = (u64) (50000000ULL << 32);
+               temp = (u64)(temp << 32);
                priv->default_addend = div_u64(temp, priv->clk_ptp_rate);
                priv->hw->ptp->config_addend(priv->ioaddr,
                                             priv->default_addend);
@@ -2402,7 +2399,7 @@ static netdev_features_t stmmac_fix_features(struct net_device *dev,
                features &= ~NETIF_F_RXCSUM;
 
        if (!priv->plat->tx_coe)
-               features &= ~NETIF_F_ALL_CSUM;
+               features &= ~NETIF_F_CSUM_MASK;
 
        /* Some GMAC devices have a bugged Jumbo frame support that
         * needs to have the Tx COE disabled for oversized frames
@@ -2410,7 +2407,7 @@ static netdev_features_t stmmac_fix_features(struct net_device *dev,
         * the TX csum insertionin the TDES and not use SF.
         */
        if (priv->plat->bugged_jumbo && (dev->mtu > ETH_DATA_LEN))
-               features &= ~NETIF_F_ALL_CSUM;
+               features &= ~NETIF_F_CSUM_MASK;
 
        return features;
 }
index bba670c42e3749483bf2218ad2da6b3b4c8d0587..16c85ccd1762c00d55c21fc47ab398ec069367ea 100644 (file)
@@ -29,7 +29,7 @@
 #include <linux/slab.h>
 #include <linux/of.h>
 #include <linux/of_gpio.h>
-
+#include <linux/of_mdio.h>
 #include <asm/io.h>
 
 #include "stmmac.h"
@@ -200,10 +200,29 @@ int stmmac_mdio_register(struct net_device *ndev)
        struct stmmac_priv *priv = netdev_priv(ndev);
        struct stmmac_mdio_bus_data *mdio_bus_data = priv->plat->mdio_bus_data;
        int addr, found;
+       struct device_node *mdio_node = NULL;
+       struct device_node *child_node = NULL;
 
        if (!mdio_bus_data)
                return 0;
 
+       if (IS_ENABLED(CONFIG_OF)) {
+               for_each_child_of_node(priv->device->of_node, child_node) {
+                       if (of_device_is_compatible(child_node,
+                                                   "snps,dwmac-mdio")) {
+                               mdio_node = child_node;
+                               break;
+                       }
+               }
+
+               if (mdio_node) {
+                       netdev_dbg(ndev, "FOUND MDIO subnode\n");
+               } else {
+                       netdev_err(ndev, "NO MDIO subnode\n");
+                       return 0;
+               }
+       }
+
        new_bus = mdiobus_alloc();
        if (new_bus == NULL)
                return -ENOMEM;
@@ -231,7 +250,8 @@ int stmmac_mdio_register(struct net_device *ndev)
        new_bus->irq = irqlist;
        new_bus->phy_mask = mdio_bus_data->phy_mask;
        new_bus->parent = priv->device;
-       err = mdiobus_register(new_bus);
+
+       err = of_mdiobus_register(new_bus, mdio_node);
        if (err != 0) {
                pr_err("%s: Cannot register as MDIO bus\n", new_bus->name);
                goto bus_register_fail;
@@ -284,7 +304,7 @@ int stmmac_mdio_register(struct net_device *ndev)
                }
        }
 
-       if (!found) {
+       if (!found && !mdio_node) {
                pr_warn("%s: No PHY found\n", ndev->name);
                mdiobus_unregister(new_bus);
                mdiobus_free(new_bus);
index d02691ba3d7feb15ec7e783db7d9086924a6f7fc..6a52fa18cbf2e94958bdfe44c4db258ababcdd02 100644 (file)
@@ -146,7 +146,7 @@ stmmac_probe_config_dt(struct platform_device *pdev, const char **mac)
        if (of_property_read_u32(np, "snps,phy-addr", &plat->phy_addr) == 0)
                dev_warn(&pdev->dev, "snps,phy-addr property is deprecated\n");
 
-       if (plat->phy_node || plat->phy_bus_name)
+       if ((plat->phy_node && !of_phy_is_fixed_link(np)) || plat->phy_bus_name)
                plat->mdio_bus_data = NULL;
        else
                plat->mdio_bus_data =
index 0750d7a938787bf44e101fbdbe4939ed96925400..31b19fdf659d45d16ea5e7afb3836b86e70a1640 100644 (file)
@@ -380,8 +380,11 @@ static struct socket *geneve_create_sock(struct net *net, bool ipv6,
 
 static void geneve_notify_add_rx_port(struct geneve_sock *gs)
 {
+       struct net_device *dev;
        struct sock *sk = gs->sock->sk;
+       struct net *net = sock_net(sk);
        sa_family_t sa_family = sk->sk_family;
+       __be16 port = inet_sk(sk)->inet_sport;
        int err;
 
        if (sa_family == AF_INET) {
@@ -390,6 +393,14 @@ static void geneve_notify_add_rx_port(struct geneve_sock *gs)
                        pr_warn("geneve: udp_add_offload failed with status %d\n",
                                err);
        }
+
+       rcu_read_lock();
+       for_each_netdev_rcu(net, dev) {
+               if (dev->netdev_ops->ndo_add_geneve_port)
+                       dev->netdev_ops->ndo_add_geneve_port(dev, sa_family,
+                                                            port);
+       }
+       rcu_read_unlock();
 }
 
 static int geneve_hlen(struct genevehdr *gh)
@@ -530,8 +541,20 @@ static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port,
 
 static void geneve_notify_del_rx_port(struct geneve_sock *gs)
 {
+       struct net_device *dev;
        struct sock *sk = gs->sock->sk;
+       struct net *net = sock_net(sk);
        sa_family_t sa_family = sk->sk_family;
+       __be16 port = inet_sk(sk)->inet_sport;
+
+       rcu_read_lock();
+       for_each_netdev_rcu(net, dev) {
+               if (dev->netdev_ops->ndo_del_geneve_port)
+                       dev->netdev_ops->ndo_del_geneve_port(dev, sa_family,
+                                                            port);
+       }
+
+       rcu_read_unlock();
 
        if (sa_family == AF_INET)
                udp_del_offload(&gs->udp_offloads);
@@ -1086,6 +1109,30 @@ static struct device_type geneve_type = {
        .name = "geneve",
 };
 
+/* Calls the ndo_add_geneve_port of the caller in order to
+ * supply the listening GENEVE udp ports. Callers are expected
+ * to implement the ndo_add_geneve_port.
+ */
+void geneve_get_rx_port(struct net_device *dev)
+{
+       struct net *net = dev_net(dev);
+       struct geneve_net *gn = net_generic(net, geneve_net_id);
+       struct geneve_sock *gs;
+       sa_family_t sa_family;
+       struct sock *sk;
+       __be16 port;
+
+       rcu_read_lock();
+       list_for_each_entry_rcu(gs, &gn->sock_list, list) {
+               sk = gs->sock->sk;
+               sa_family = sk->sk_family;
+               port = inet_sk(sk)->inet_sport;
+               dev->netdev_ops->ndo_add_geneve_port(dev, sa_family, port);
+       }
+       rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(geneve_get_rx_port);
+
 /* Initialize the device structure. */
 static void geneve_setup(struct net_device *dev)
 {
index a9268db4e349fc2e131be0010ecd67e8bb779939..f94392d07126c12fe7a60e0018b6214e9f72a4e2 100644 (file)
@@ -88,7 +88,7 @@ static struct lock_class_key ipvlan_netdev_xmit_lock_key;
 static struct lock_class_key ipvlan_netdev_addr_lock_key;
 
 #define IPVLAN_FEATURES \
-       (NETIF_F_SG | NETIF_F_ALL_CSUM | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \
+       (NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \
         NETIF_F_GSO | NETIF_F_TSO | NETIF_F_UFO | NETIF_F_GSO_ROBUST | \
         NETIF_F_TSO_ECN | NETIF_F_TSO6 | NETIF_F_GRO | NETIF_F_RXCSUM | \
         NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER)
index dc7d970bd1c0baaf1f707df87001da8f59f3d74a..a400288cb37b9bfb6190f1bd7c64d02e97713956 100644 (file)
@@ -175,7 +175,7 @@ static void loopback_setup(struct net_device *dev)
                | NETIF_F_UFO
                | NETIF_F_HW_CSUM
                | NETIF_F_RXCSUM
-               | NETIF_F_SCTP_CSUM
+               | NETIF_F_SCTP_CRC
                | NETIF_F_HIGHDMA
                | NETIF_F_LLTX
                | NETIF_F_NETNS_LOCAL
index 06c8bfeaccd6c11cf15a8f05d87ce23c0f1e4405..6a57a005e0ca8162d2a0b9334440d1b9cb7d75db 100644 (file)
@@ -758,11 +758,11 @@ static struct lock_class_key macvlan_netdev_xmit_lock_key;
 static struct lock_class_key macvlan_netdev_addr_lock_key;
 
 #define ALWAYS_ON_FEATURES \
-       (NETIF_F_SG | NETIF_F_GEN_CSUM | NETIF_F_GSO_SOFTWARE | NETIF_F_LLTX | \
+       (NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE | NETIF_F_LLTX | \
         NETIF_F_GSO_ROBUST)
 
 #define MACVLAN_FEATURES \
-       (NETIF_F_SG | NETIF_F_ALL_CSUM | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \
+       (NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \
         NETIF_F_GSO | NETIF_F_TSO | NETIF_F_UFO | NETIF_F_LRO | \
         NETIF_F_TSO_ECN | NETIF_F_TSO6 | NETIF_F_GRO | NETIF_F_RXCSUM | \
         NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER)
index 0fc521941c718dbcdea487a18a900984ad176ea5..d636d051fac8317ca6d94dd1bdf9c34cac4a5b4b 100644 (file)
@@ -388,7 +388,7 @@ static rx_handler_result_t macvtap_handle_frame(struct sk_buff **pskb)
                 *        check, we either support them all or none.
                 */
                if (skb->ip_summed == CHECKSUM_PARTIAL &&
-                   !(features & NETIF_F_ALL_CSUM) &&
+                   !(features & NETIF_F_CSUM_MASK) &&
                    skb_checksum_help(skb))
                        goto drop;
                skb_queue_tail(&q->sk.sk_receive_queue, skb);
index 059c0f60a2b28f8447d78c8876aac29618551b0f..915f60fce186352c9253aca3763cc3bcdd5e15ef 100644 (file)
@@ -981,7 +981,7 @@ static void team_port_disable(struct team *team,
        team_lower_state_changed(port);
 }
 
-#define TEAM_VLAN_FEATURES (NETIF_F_ALL_CSUM | NETIF_F_SG | \
+#define TEAM_VLAN_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | \
                            NETIF_F_FRAGLIST | NETIF_F_ALL_TSO | \
                            NETIF_F_HIGHDMA | NETIF_F_LRO)
 
@@ -2091,7 +2091,6 @@ static void team_setup(struct net_device *dev)
                           NETIF_F_HW_VLAN_CTAG_RX |
                           NETIF_F_HW_VLAN_CTAG_FILTER;
 
-       dev->hw_features &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_HW_CSUM);
        dev->features |= dev->hw_features;
 }
 
index d9427ca3dba79628f402867b83e735bee78fac3b..34642a9583e0de11ac53df4223b445a5f12078e6 100644 (file)
@@ -1986,7 +1986,7 @@ rtl8152_features_check(struct sk_buff *skb, struct net_device *dev,
        int offset = skb_transport_offset(skb);
 
        if ((mss || skb->ip_summed == CHECKSUM_PARTIAL) && offset > max_offset)
-               features &= ~(NETIF_F_ALL_CSUM | NETIF_F_GSO_MASK);
+               features &= ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
        else if ((skb->len + sizeof(struct tx_desc)) > agg_buf_sz)
                features &= ~NETIF_F_GSO_MASK;
 
index f4424063b860177c6501f4b8796c384b4fed1884..0efe7112fc1f2157bd157e5b75b4c9ef06500d85 100644 (file)
@@ -1625,7 +1625,7 @@ static int fcoe_xmit(struct fc_lport *lport, struct fc_frame *fp)
 
        /* crc offload */
        if (likely(lport->crc_offload)) {
-               skb->ip_summed = CHECKSUM_UNNECESSARY;
+               skb->ip_summed = CHECKSUM_PARTIAL;
                skb->csum_start = skb_headroom(skb);
                skb->csum_offset = skb->len;
                crc = 0;
index 679785b0209cf6a5f9c40c58a10979a77cd3912f..9de4f23910d8bc2f447c1487a5f0622735173b34 100644 (file)
@@ -69,7 +69,7 @@ ksocknal_lib_zc_capable(ksock_conn_t *conn)
 
        /* ZC if the socket supports scatter/gather and doesn't need software
         * checksums */
-       return ((caps & NETIF_F_SG) != 0 && (caps & NETIF_F_ALL_CSUM) != 0);
+       return ((caps & NETIF_F_SG) != 0 && (caps & NETIF_F_CSUM_MASK) != 0);
 }
 
 int
index 05f5879821b8146e79cb0561bb56400961cae714..a5f6ce6b578c70188a36adb24d3b7a5afd7165e2 100644 (file)
@@ -621,7 +621,7 @@ static inline netdev_features_t vlan_features_check(const struct sk_buff *skb,
                                                     NETIF_F_SG |
                                                     NETIF_F_HIGHDMA |
                                                     NETIF_F_FRAGLIST |
-                                                    NETIF_F_GEN_CSUM |
+                                                    NETIF_F_HW_CSUM |
                                                     NETIF_F_HW_VLAN_CTAG_TX |
                                                     NETIF_F_HW_VLAN_STAG_TX);
 
index 0e707f0c1a3ed1e747bf11ec9477fbae95468bff..7c27fa1030e873d1841f3e291e9fc806992967e3 100644 (file)
@@ -3,6 +3,7 @@
 
 #include <uapi/linux/inet_diag.h>
 
+struct net;
 struct sock;
 struct inet_hashinfo;
 struct nlattr;
@@ -23,6 +24,10 @@ struct inet_diag_handler {
        void            (*idiag_get_info)(struct sock *sk,
                                          struct inet_diag_msg *r,
                                          void *info);
+
+       int             (*destroy)(struct sk_buff *in_skb,
+                                  const struct inet_diag_req_v2 *req);
+
        __u16           idiag_type;
        __u16           idiag_info_size;
 };
@@ -41,6 +46,10 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
                            struct sk_buff *in_skb, const struct nlmsghdr *nlh,
                            const struct inet_diag_req_v2 *req);
 
+struct sock *inet_diag_find_one_icsk(struct net *net,
+                                    struct inet_hashinfo *hashinfo,
+                                    const struct inet_diag_req_v2 *req);
+
 int inet_diag_bc_sk(const struct nlattr *_bc, struct sock *sk);
 
 extern int  inet_diag_register(const struct inet_diag_handler *handler);
index f0d87347df19edd25d84d61dab931ebf8112117d..d9654f0eecb3519383441afa6b131ff9a5898485 100644 (file)
@@ -52,7 +52,7 @@ enum {
                NETIF_F_GSO_TUNNEL_REMCSUM_BIT,
 
        NETIF_F_FCOE_CRC_BIT,           /* FCoE CRC32 */
-       NETIF_F_SCTP_CSUM_BIT,          /* SCTP checksum offload */
+       NETIF_F_SCTP_CRC_BIT,           /* SCTP checksum offload */
        NETIF_F_FCOE_MTU_BIT,           /* Supports max FCoE MTU, 2158 bytes*/
        NETIF_F_NTUPLE_BIT,             /* N-tuple filters supported */
        NETIF_F_RXHASH_BIT,             /* Receive hashing offload */
@@ -103,7 +103,7 @@ enum {
 #define NETIF_F_NTUPLE         __NETIF_F(NTUPLE)
 #define NETIF_F_RXCSUM         __NETIF_F(RXCSUM)
 #define NETIF_F_RXHASH         __NETIF_F(RXHASH)
-#define NETIF_F_SCTP_CSUM      __NETIF_F(SCTP_CSUM)
+#define NETIF_F_SCTP_CRC       __NETIF_F(SCTP_CRC)
 #define NETIF_F_SG             __NETIF_F(SG)
 #define NETIF_F_TSO6           __NETIF_F(TSO6)
 #define NETIF_F_TSO_ECN                __NETIF_F(TSO_ECN)
@@ -146,10 +146,12 @@ enum {
 #define NETIF_F_GSO_SOFTWARE   (NETIF_F_TSO | NETIF_F_TSO_ECN | \
                                 NETIF_F_TSO6 | NETIF_F_UFO)
 
-#define NETIF_F_GEN_CSUM       NETIF_F_HW_CSUM
-#define NETIF_F_V4_CSUM                (NETIF_F_GEN_CSUM | NETIF_F_IP_CSUM)
-#define NETIF_F_V6_CSUM                (NETIF_F_GEN_CSUM | NETIF_F_IPV6_CSUM)
-#define NETIF_F_ALL_CSUM       (NETIF_F_V4_CSUM | NETIF_F_V6_CSUM)
+/* List of IP checksum features. Note that NETIF_F_ HW_CSUM should not be
+ * set in features when NETIF_F_IP_CSUM or NETIF_F_IPV6_CSUM are set--
+ * this would be contradictory
+ */
+#define NETIF_F_CSUM_MASK      (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | \
+                                NETIF_F_HW_CSUM)
 
 #define NETIF_F_ALL_TSO        (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN)
 
index 1bb21ff0fa6414ae3a6def9ff4c66fa02217795b..81b26a543a3cda9a97a5d1d8b6d8837f622d0320 100644 (file)
@@ -1013,6 +1013,19 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
  *     a new port starts listening. The operation is protected by the
  *     vxlan_net->sock_lock.
  *
+ * void (*ndo_add_geneve_port)(struct net_device *dev,
+ *                           sa_family_t sa_family, __be16 port);
+ *     Called by geneve to notify a driver about the UDP port and socket
+ *     address family that geneve is listnening to. It is called only when
+ *     a new port starts listening. The operation is protected by the
+ *     geneve_net->sock_lock.
+ *
+ * void (*ndo_del_geneve_port)(struct net_device *dev,
+ *                           sa_family_t sa_family, __be16 port);
+ *     Called by geneve to notify the driver about a UDP port and socket
+ *     address family that geneve is not listening to anymore. The operation
+ *     is protected by the geneve_net->sock_lock.
+ *
  * void (*ndo_del_vxlan_port)(struct  net_device *dev,
  *                           sa_family_t sa_family, __be16 port);
  *     Called by vxlan to notify the driver about a UDP port and socket
@@ -1217,7 +1230,12 @@ struct net_device_ops {
        void                    (*ndo_del_vxlan_port)(struct  net_device *dev,
                                                      sa_family_t sa_family,
                                                      __be16 port);
-
+       void                    (*ndo_add_geneve_port)(struct  net_device *dev,
+                                                      sa_family_t sa_family,
+                                                      __be16 port);
+       void                    (*ndo_del_geneve_port)(struct  net_device *dev,
+                                                      sa_family_t sa_family,
+                                                      __be16 port);
        void*                   (*ndo_dfwd_add_station)(struct net_device *pdev,
                                                        struct net_device *dev);
        void                    (*ndo_dfwd_del_station)(struct net_device *pdev,
@@ -2522,6 +2540,71 @@ static inline void skb_gro_remcsum_cleanup(struct sk_buff *skb,
        remcsum_unadjust((__sum16 *)ptr, grc->delta);
 }
 
+struct skb_csum_offl_spec {
+       __u16           ipv4_okay:1,
+                       ipv6_okay:1,
+                       encap_okay:1,
+                       ip_options_okay:1,
+                       ext_hdrs_okay:1,
+                       tcp_okay:1,
+                       udp_okay:1,
+                       sctp_okay:1,
+                       vlan_okay:1,
+                       no_encapped_ipv6:1,
+                       no_not_encapped:1;
+};
+
+bool __skb_csum_offload_chk(struct sk_buff *skb,
+                           const struct skb_csum_offl_spec *spec,
+                           bool *csum_encapped,
+                           bool csum_help);
+
+static inline bool skb_csum_offload_chk(struct sk_buff *skb,
+                                       const struct skb_csum_offl_spec *spec,
+                                       bool *csum_encapped,
+                                       bool csum_help)
+{
+       if (skb->ip_summed != CHECKSUM_PARTIAL)
+               return false;
+
+       return __skb_csum_offload_chk(skb, spec, csum_encapped, csum_help);
+}
+
+static inline bool skb_csum_offload_chk_help(struct sk_buff *skb,
+                                            const struct skb_csum_offl_spec *spec)
+{
+       bool csum_encapped;
+
+       return skb_csum_offload_chk(skb, spec, &csum_encapped, true);
+}
+
+static inline bool skb_csum_off_chk_help_cmn(struct sk_buff *skb)
+{
+       static const struct skb_csum_offl_spec csum_offl_spec = {
+               .ipv4_okay = 1,
+               .ip_options_okay = 1,
+               .ipv6_okay = 1,
+               .vlan_okay = 1,
+               .tcp_okay = 1,
+               .udp_okay = 1,
+       };
+
+       return skb_csum_offload_chk_help(skb, &csum_offl_spec);
+}
+
+static inline bool skb_csum_off_chk_help_cmn_v4_only(struct sk_buff *skb)
+{
+       static const struct skb_csum_offl_spec csum_offl_spec = {
+               .ipv4_okay = 1,
+               .ip_options_okay = 1,
+               .tcp_okay = 1,
+               .udp_okay = 1,
+               .vlan_okay = 1,
+       };
+
+       return skb_csum_offload_chk_help(skb, &csum_offl_spec);
+}
+
 static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev,
                                  unsigned short type,
                                  const void *daddr, const void *saddr,
@@ -3691,13 +3774,37 @@ __be16 skb_network_protocol(struct sk_buff *skb, int *depth);
 static inline bool can_checksum_protocol(netdev_features_t features,
                                         __be16 protocol)
 {
-       return ((features & NETIF_F_GEN_CSUM) ||
-               ((features & NETIF_F_V4_CSUM) &&
-                protocol == htons(ETH_P_IP)) ||
-               ((features & NETIF_F_V6_CSUM) &&
-                protocol == htons(ETH_P_IPV6)) ||
-               ((features & NETIF_F_FCOE_CRC) &&
-                protocol == htons(ETH_P_FCOE)));
+       if (protocol == htons(ETH_P_FCOE))
+               return !!(features & NETIF_F_FCOE_CRC);
+
+       /* Assume this is an IP checksum (not SCTP CRC) */
+
+       if (features & NETIF_F_HW_CSUM) {
+               /* Can checksum everything */
+               return true;
+       }
+
+       switch (protocol) {
+       case htons(ETH_P_IP):
+               return !!(features & NETIF_F_IP_CSUM);
+       case htons(ETH_P_IPV6):
+               return !!(features & NETIF_F_IPV6_CSUM);
+       default:
+               return false;
+       }
+}
+
+/* Map an ethertype into IP protocol if possible */
+static inline int eproto_to_ipproto(int eproto)
+{
+       switch (eproto) {
+       case htons(ETH_P_IP):
+               return IPPROTO_IP;
+       case htons(ETH_P_IPV6):
+               return IPPROTO_IPV6;
+       default:
+               return -1;
+       }
 }
 
 #ifdef CONFIG_BUG
@@ -3762,15 +3869,14 @@ void linkwatch_run_queue(void);
 static inline netdev_features_t netdev_intersect_features(netdev_features_t f1,
                                                          netdev_features_t f2)
 {
-       if (f1 & NETIF_F_GEN_CSUM)
-               f1 |= (NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM);
-       if (f2 & NETIF_F_GEN_CSUM)
-               f2 |= (NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM);
-       f1 &= f2;
-       if (f1 & NETIF_F_GEN_CSUM)
-               f1 &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM);
+       if ((f1 ^ f2) & NETIF_F_HW_CSUM) {
+               if (f1 & NETIF_F_HW_CSUM)
+                       f1 |= (NETIF_F_IP_CSUM|NETIF_F_IP_CSUM);
+               else
+                       f2 |= (NETIF_F_IP_CSUM|NETIF_F_IP_CSUM);
+       }
 
-       return f1;
+       return f1 & f2;
 }
 
 static inline netdev_features_t netdev_get_wanted_features(
index 639e9b8b0e4d9ff2c9b10ce5b44a8a328abe2a31..0b41959aab9f8fa4c5901b3ad054e63746c4b74a 100644 (file)
@@ -131,6 +131,7 @@ netlink_skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
 struct netlink_callback {
        struct sk_buff          *skb;
        const struct nlmsghdr   *nlh;
+       int                     (*start)(struct netlink_callback *);
        int                     (*dump)(struct sk_buff * skb,
                                        struct netlink_callback *cb);
        int                     (*done)(struct netlink_callback *cb);
@@ -153,6 +154,7 @@ struct nlmsghdr *
 __nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int flags);
 
 struct netlink_dump_control {
+       int (*start)(struct netlink_callback *);
        int (*dump)(struct sk_buff *skb, struct netlink_callback *);
        int (*done)(struct netlink_callback *);
        void *data;
index 843ceca9a21e5f1327fa5c82fa5f3089c5ebab23..77deece15fb36e51c769b5a1c764079a5e416b05 100644 (file)
@@ -819,4 +819,86 @@ out:
        return err;
 }
 
+/* Internal function, please use rhashtable_replace_fast() instead */
+static inline int __rhashtable_replace_fast(
+       struct rhashtable *ht, struct bucket_table *tbl,
+       struct rhash_head *obj_old, struct rhash_head *obj_new,
+       const struct rhashtable_params params)
+{
+       struct rhash_head __rcu **pprev;
+       struct rhash_head *he;
+       spinlock_t *lock;
+       unsigned int hash;
+       int err = -ENOENT;
+
+       /* Minimally, the old and new objects must have same hash
+        * (which should mean identifiers are the same).
+        */
+       hash = rht_head_hashfn(ht, tbl, obj_old, params);
+       if (hash != rht_head_hashfn(ht, tbl, obj_new, params))
+               return -EINVAL;
+
+       lock = rht_bucket_lock(tbl, hash);
+
+       spin_lock_bh(lock);
+
+       pprev = &tbl->buckets[hash];
+       rht_for_each(he, tbl, hash) {
+               if (he != obj_old) {
+                       pprev = &he->next;
+                       continue;
+               }
+
+               rcu_assign_pointer(obj_new->next, obj_old->next);
+               rcu_assign_pointer(*pprev, obj_new);
+               err = 0;
+               break;
+       }
+
+       spin_unlock_bh(lock);
+
+       return err;
+}
+
+/**
+ * rhashtable_replace_fast - replace an object in hash table
+ * @ht:                hash table
+ * @obj_old:   pointer to hash head inside object being replaced
+ * @obj_new:   pointer to hash head inside object which is new
+ * @params:    hash table parameters
+ *
+ * Replacing an object doesn't affect the number of elements in the hash table
+ * or bucket, so we don't need to worry about shrinking or expanding the
+ * table here.
+ *
+ * Returns zero on success, -ENOENT if the entry could not be found,
+ * -EINVAL if hash is not the same for the old and new objects.
+ */
+static inline int rhashtable_replace_fast(
+       struct rhashtable *ht, struct rhash_head *obj_old,
+       struct rhash_head *obj_new,
+       const struct rhashtable_params params)
+{
+       struct bucket_table *tbl;
+       int err;
+
+       rcu_read_lock();
+
+       tbl = rht_dereference_rcu(ht->tbl, ht);
+
+       /* Because we have already taken (and released) the bucket
+        * lock in old_tbl, if we find that future_tbl is not yet
+        * visible then that guarantees the entry to still be in
+        * the old tbl if it exists.
+        */
+       while ((err = __rhashtable_replace_fast(ht, tbl, obj_old,
+                                               obj_new, params)) &&
+              (tbl = rht_dereference_rcu(tbl->future_tbl, ht)))
+               ;
+
+       rcu_read_unlock();
+
+       return err;
+}
+
 #endif /* _LINUX_RHASHTABLE_H */
index af4f6ac025b6b360be51363d4e0fc3652b76040f..6b6bd42d6134a3bfccb36f0bad4447c8c35ab3ff 100644 (file)
 #include <linux/in6.h>
 #include <net/flow.h>
 
-/* A. Checksumming of received packets by device.
+/* The interface for checksum offload between the stack and networking drivers
+ * is as follows...
+ *
+ * A. IP checksum related features
+ *
+ * Drivers advertise checksum offload capabilities in the features of a device.
+ * From the stack's point of view these are capabilities offered by the driver,
+ * a driver typically only advertises features that it is capable of offloading
+ * to its device.
+ *
+ * The checksum related features are:
+ *
+ *     NETIF_F_HW_CSUM - The driver (or its device) is able to compute one
+ *                       IP (one's complement) checksum for any combination
+ *                       of protocols or protocol layering. The checksum is
+ *                       computed and set in a packet per the CHECKSUM_PARTIAL
+ *                       interface (see below).
+ *
+ *     NETIF_F_IP_CSUM - Driver (device) is only able to checksum plain
+ *                       TCP or UDP packets over IPv4. These are specifically
+ *                       unencapsulated packets of the form IPv4|TCP or
+ *                       IPv4|UDP where the Protocol field in the IPv4 header
+ *                       is TCP or UDP. The IPv4 header may contain IP options
+ *                       This feature cannot be set in features for a device
+ *                       with NETIF_F_HW_CSUM also set. This feature is being
+ *                       DEPRECATED (see below).
+ *
+ *     NETIF_F_IPV6_CSUM - Driver (device) is only able to checksum plain
+ *                       TCP or UDP packets over IPv6. These are specifically
+ *                       unencapsulated packets of the form IPv6|TCP or
+ *                       IPv4|UDP where the Next Header field in the IPv6
+ *                       header is either TCP or UDP. IPv6 extension headers
+ *                       are not supported with this feature. This feature
+ *                       cannot be set in features for a device with
+ *                       NETIF_F_HW_CSUM also set. This feature is being
+ *                       DEPRECATED (see below).
+ *
+ *     NETIF_F_RXCSUM - Driver (device) performs receive checksum offload.
+ *                      This flag is used only used to disable the RX checksum
+ *                      feature for a device. The stack will accept receive
+ *                      checksum indication in packets received on a device
+ *                      regardless of whether NETIF_F_RXCSUM is set.
+ *
+ * B. Checksumming of received packets by device. Indication of checksum
+ *    verification is in set skb->ip_summed. Possible values are:
  *
  * CHECKSUM_NONE:
  *
- *   Device failed to checksum this packet e.g. due to lack of capabilities.
+ *   Device did not checksum this packet e.g. due to lack of capabilities.
  *   The packet contains full (though not verified) checksum in packet but
  *   not in skb->csum. Thus, skb->csum is undefined in this case.
  *
@@ -53,9 +97,8 @@
  *   (as in CHECKSUM_COMPLETE), but it does parse headers and verify checksums
  *   for specific protocols. For such packets it will set CHECKSUM_UNNECESSARY
  *   if their checksums are okay. skb->csum is still undefined in this case
- *   though. It is a bad option, but, unfortunately, nowadays most vendors do
- *   this. Apparently with the secret goal to sell you new devices, when you
- *   will add new protocol to your host, f.e. IPv6 8)
+ *   though. A driver or device must never modify the checksum field in the
+ *   packet even if checksum is verified.
  *
  *   CHECKSUM_UNNECESSARY is applicable to following protocols:
  *     TCP: IPv6 and IPv4.
  *   packet that are after the checksum being offloaded are not considered to
  *   be verified.
  *
- * B. Checksumming on output.
- *
- * CHECKSUM_NONE:
- *
- *   The skb was already checksummed by the protocol, or a checksum is not
- *   required.
+ * C. Checksumming on transmit for non-GSO. The stack requests checksum offload
+ *    in the skb->ip_summed for a packet. Values are:
  *
  * CHECKSUM_PARTIAL:
  *
- *   The device is required to checksum the packet as seen by hard_start_xmit()
+ *   The driver is required to checksum the packet as seen by hard_start_xmit()
  *   from skb->csum_start up to the end, and to record/write the checksum at
- *   offset skb->csum_start + skb->csum_offset.
+ *   offset skb->csum_start + skb->csum_offset. A driver may verify that the
+ *   csum_start and csum_offset values are valid values given the length and
+ *   offset of the packet, however they should not attempt to validate that the
+ *   checksum refers to a legitimate transport layer checksum-- it is the
+ *   purview of the stack to validate that csum_start and csum_offset are set
+ *   correctly.
+ *
+ *   When the stack requests checksum offload for a packet, the driver MUST
+ *   ensure that the checksum is set correctly. A driver can either offload the
+ *   checksum calculation to the device, or call skb_checksum_help (in the case
+ *   that the device does not support offload for a particular checksum).
+ *
+ *   NETIF_F_IP_CSUM and NETIF_F_IPV6_CSUM are being deprecated in favor of
+ *   NETIF_F_HW_CSUM. New devices should use NETIF_F_HW_CSUM to indicate
+ *   checksum offload capability. If a device has limited checksum capabilities
+ *   (for instance can only perform NETIF_F_IP_CSUM or NETIF_F_IPV6_CSUM as
+ *   described above) a helper function can be called to resolve
+ *   CHECKSUM_PARTIAL. The helper functions are skb_csum_off_chk*. The helper
+ *   function takes a spec argument that describes the protocol layer that is
+ *   supported for checksum offload and can be called for each packet. If a
+ *   packet does not match the specification for offload, skb_checksum_help
+ *   is called to resolve the checksum.
  *
- *   The device must show its capabilities in dev->features, set up at device
- *   setup time, e.g. netdev_features.h:
+ * CHECKSUM_NONE:
  *
- *     NETIF_F_HW_CSUM - It's a clever device, it's able to checksum everything.
- *     NETIF_F_IP_CSUM - Device is dumb, it's able to checksum only TCP/UDP over
- *                       IPv4. Sigh. Vendors like this way for an unknown reason.
- *                       Though, see comment above about CHECKSUM_UNNECESSARY. 8)
- *     NETIF_F_IPV6_CSUM - About as dumb as the last one but does IPv6 instead.
- *     NETIF_F_...     - Well, you get the picture.
+ *   The skb was already checksummed by the protocol, or a checksum is not
+ *   required.
  *
  * CHECKSUM_UNNECESSARY:
  *
- *   Normally, the device will do per protocol specific checksumming. Protocol
- *   implementations that do not want the NIC to perform the checksum
- *   calculation should use this flag in their outgoing skbs.
- *
- *     NETIF_F_FCOE_CRC - This indicates that the device can do FCoE FC CRC
- *                        offload. Correspondingly, the FCoE protocol driver
- *                        stack should use CHECKSUM_UNNECESSARY.
+ *   This has the same meaning on as CHECKSUM_NONE for checksum offload on
+ *   output.
  *
- * Any questions? No questions, good.          --ANK
+ * CHECKSUM_COMPLETE:
+ *   Not used in checksum output. If a driver observes a packet with this value
+ *   set in skbuff, if should treat as CHECKSUM_NONE being set.
+ *
+ * D. Non-IP checksum (CRC) offloads
+ *
+ *   NETIF_F_SCTP_CRC - This feature indicates that a device is capable of
+ *     offloading the SCTP CRC in a packet. To perform this offload the stack
+ *     will set ip_summed to CHECKSUM_PARTIAL and set csum_start and csum_offset
+ *     accordingly. Note the there is no indication in the skbuff that the
+ *     CHECKSUM_PARTIAL refers to an SCTP checksum, a driver that supports
+ *     both IP checksum offload and SCTP CRC offload must verify which offload
+ *     is configured for a packet presumably by inspecting packet headers.
+ *
+ *   NETIF_F_FCOE_CRC - This feature indicates that a device is capable of
+ *     offloading the FCOE CRC in a packet. To perform this offload the stack
+ *     will set ip_summed to CHECKSUM_PARTIAL and set csum_start and csum_offset
+ *     accordingly. Note the there is no indication in the skbuff that the
+ *     CHECKSUM_PARTIAL refers to an FCOE checksum, a driver that supports
+ *     both IP checksum offload and FCOE CRC offload must verify which offload
+ *     is configured for a packet presumably by inspecting packet headers.
+ *
+ * E. Checksumming on output with GSO.
+ *
+ * In the case of a GSO packet (skb_is_gso(skb) is true), checksum offload
+ * is implied by the SKB_GSO_* flags in gso_type. Most obviously, if the
+ * gso_type is SKB_GSO_TCPV4 or SKB_GSO_TCPV6, TCP checksum offload as
+ * part of the GSO operation is implied. If a checksum is being offloaded
+ * with GSO then ip_summed is CHECKSUM_PARTIAL, csum_start and csum_offset
+ * are set to refer to the outermost checksum being offload (two offloaded
+ * checksums are possible with UDP encapsulation).
  */
 
 /* Don't change this without changing skb_csum_unnecessary! */
@@ -1939,6 +2019,11 @@ static inline unsigned char *skb_inner_transport_header(const struct sk_buff
        return skb->head + skb->inner_transport_header;
 }
 
+static inline int skb_inner_transport_offset(const struct sk_buff *skb)
+{
+       return skb_inner_transport_header(skb) - skb->data;
+}
+
 static inline void skb_reset_inner_transport_header(struct sk_buff *skb)
 {
        skb->inner_transport_header = skb->data - skb->head;
index fddebc6174697a311be8aa06d1e30ed90135d41b..4018b48f2b3b4f115802fba8b67fcc3cd113c773 100644 (file)
@@ -15,6 +15,7 @@ struct sock_diag_handler {
        __u8 family;
        int (*dump)(struct sk_buff *skb, struct nlmsghdr *nlh);
        int (*get_info)(struct sk_buff *skb, struct sock *sk);
+       int (*destroy)(struct sk_buff *skb, struct nlmsghdr *nlh);
 };
 
 int sock_diag_register(const struct sock_diag_handler *h);
@@ -68,4 +69,5 @@ bool sock_diag_has_destroy_listeners(const struct sock *sk)
 }
 void sock_diag_broadcast_destroy(struct sock *sk);
 
+int sock_diag_destroy(struct sock *sk, int err);
 #endif
index 1b6b6dcb018def094a36786bc5cd3d03e26fa453..43c0e771f417ae3c36f4fb7a0390f9569f7c9433 100644 (file)
@@ -114,6 +114,7 @@ static inline void genl_info_net_set(struct genl_info *info, struct net *net)
  * @flags: flags
  * @policy: attribute validation policy
  * @doit: standard command callback
+ * @start: start callback for dumps
  * @dumpit: callback for dumpers
  * @done: completion callback for dumps
  * @ops_list: operations list
@@ -122,6 +123,7 @@ struct genl_ops {
        const struct nla_policy *policy;
        int                    (*doit)(struct sk_buff *skb,
                                       struct genl_info *info);
+       int                    (*start)(struct netlink_callback *cb);
        int                    (*dumpit)(struct sk_buff *skb,
                                         struct netlink_callback *cb);
        int                    (*done)(struct netlink_callback *cb);
index 3106ed6eae0d1b908544e5aa044df3704fd2c46a..e6c23dc765f7ecae795e9c6acb72b5ac017e04f7 100644 (file)
@@ -62,6 +62,14 @@ struct genevehdr {
        struct geneve_opt options[];
 };
 
+#if IS_ENABLED(CONFIG_GENEVE)
+void geneve_get_rx_port(struct net_device *netdev);
+#else
+static inline void geneve_get_rx_port(struct net_device *netdev)
+{
+}
+#endif
+
 #ifdef CONFIG_INET
 struct net_device *geneve_dev_create_fb(struct net *net, const char *name,
                                        u8 name_assign_type, u16 dst_port);
diff --git a/include/net/ila.h b/include/net/ila.h
new file mode 100644 (file)
index 0000000..9f4f43e
--- /dev/null
@@ -0,0 +1,18 @@
+/*
+ * ILA kernel interface
+ *
+ * Copyright (c) 2015 Tom Herbert <tom@herbertland.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ */
+
+#ifndef _NET_ILA_H
+#define _NET_ILA_H
+
+int ila_xlat_outgoing(struct sk_buff *skb);
+int ila_xlat_incoming(struct sk_buff *skb);
+
+#endif /* _NET_ILA_H */
index 8ba379f9e4678d7f00209a6b2ac12d41d82f4b25..c501d67172b125d55108ca407f4c50a158da0e14 100644 (file)
@@ -88,6 +88,13 @@ struct netns_sctp {
         */
        int pf_retrans;
 
+       /*
+        * Disable Potentially-Failed feature, the feature is enabled by default
+        * pf_enable    -  0  : disable pf
+        *              - >0  : enable pf
+        */
+       int pf_enable;
+
        /*
         * Policy for preforming sctp/socket accounting
         * 0   - do socket level accounting, all assocs share sk_sndbuf
index 0ca22b014de1a0a31e539a89e35fa5cc14875384..6e6e8a25d9978303a882b40010c94d3ead0e1a67 100644 (file)
@@ -1060,6 +1060,7 @@ struct proto {
        void                    (*destroy_cgroup)(struct mem_cgroup *memcg);
        struct cg_proto         *(*proto_cgroup)(struct mem_cgroup *memcg);
 #endif
+       int                     (*diag_destroy)(struct sock *sk, int err);
 };
 
 int proto_register(struct proto *prot, int alloc_slab);
@@ -1791,6 +1792,15 @@ static inline void sk_nocaps_add(struct sock *sk, netdev_features_t flags)
        sk->sk_route_caps &= ~flags;
 }
 
+static inline bool sk_check_csum_caps(struct sock *sk)
+{
+       return (sk->sk_route_caps & NETIF_F_HW_CSUM) ||
+              (sk->sk_family == PF_INET &&
+               (sk->sk_route_caps & NETIF_F_IP_CSUM)) ||
+              (sk->sk_family == PF_INET6 &&
+               (sk->sk_route_caps & NETIF_F_IPV6_CSUM));
+}
+
 static inline int skb_do_copy_data_nocache(struct sock *sk, struct sk_buff *skb,
                                           struct iov_iter *from, char *to,
                                           int copy, int offset)
index 1d22ce9f352e2e7610095c4b1774164aa6af4516..6612946167feb1d85f2a74a493b73ad94ab0ac20 100644 (file)
@@ -50,6 +50,7 @@ enum switchdev_attr_id {
 };
 
 struct switchdev_attr {
+       struct net_device *orig_dev;
        enum switchdev_attr_id id;
        u32 flags;
        union {
@@ -68,6 +69,7 @@ enum switchdev_obj_id {
 };
 
 struct switchdev_obj {
+       struct net_device *orig_dev;
        enum switchdev_obj_id id;
        u32 flags;
 };
index f80e74c5ad18b22c274ecd7e75b6a23ffe7268b4..3077735b348d072b47f63a47fbc55cc548a1fc77 100644 (file)
@@ -1170,6 +1170,8 @@ void tcp_set_state(struct sock *sk, int state);
 
 void tcp_done(struct sock *sk);
 
+int tcp_abort(struct sock *sk, int err);
+
 static inline void tcp_sack_reset(struct tcp_options_received *rx_opt)
 {
        rx_opt->dsack = 0;
index c1c899c3a51be42f680fff5ce0f5c5e997652daa..b5a1aec1a167c26b593b33648cdeb7f14ef56cf4 100644 (file)
@@ -232,7 +232,7 @@ static inline netdev_features_t vxlan_features_check(struct sk_buff *skb,
             skb->inner_protocol != htons(ETH_P_TEB) ||
             (skb_inner_mac_header(skb) - skb_transport_header(skb) !=
              sizeof(struct udphdr) + sizeof(struct vxlanhdr))))
-               return features & ~(NETIF_F_ALL_CSUM | NETIF_F_GSO_MASK);
+               return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
 
        return features;
 }
index 7ed9e670814ed2f519a54aa67f4f8f71b55d48e2..abde7bbd6f3b7c2e59b9b7f52baa758731ccde85 100644 (file)
@@ -3,13 +3,35 @@
 #ifndef _UAPI_LINUX_ILA_H
 #define _UAPI_LINUX_ILA_H
 
+/* NETLINK_GENERIC related info */
+#define ILA_GENL_NAME          "ila"
+#define ILA_GENL_VERSION       0x1
+
 enum {
        ILA_ATTR_UNSPEC,
        ILA_ATTR_LOCATOR,                       /* u64 */
+       ILA_ATTR_IDENTIFIER,                    /* u64 */
+       ILA_ATTR_LOCATOR_MATCH,                 /* u64 */
+       ILA_ATTR_IFINDEX,                       /* s32 */
+       ILA_ATTR_DIR,                           /* u32 */
 
        __ILA_ATTR_MAX,
 };
 
 #define ILA_ATTR_MAX           (__ILA_ATTR_MAX - 1)
 
+enum {
+       ILA_CMD_UNSPEC,
+       ILA_CMD_ADD,
+       ILA_CMD_DEL,
+       ILA_CMD_GET,
+
+       __ILA_CMD_MAX,
+};
+
+#define ILA_CMD_MAX    (__ILA_CMD_MAX - 1)
+
+#define ILA_DIR_IN     (1 << 0)
+#define ILA_DIR_OUT    (1 << 1)
+
 #endif /* _UAPI_LINUX_ILA_H */
index 49230d36f9ce783011bd23edb19ba8be0fb751f1..bae2d80034d4a2e7b9d2e63172c64b312d82ddf1 100644 (file)
@@ -4,6 +4,7 @@
 #include <linux/types.h>
 
 #define SOCK_DIAG_BY_FAMILY 20
+#define SOCK_DESTROY 21
 
 struct sock_diag_req {
        __u8    sdiag_family;
index fded86508117dad4d81aad327b287aee991a55be..ad5e2fd1012c4c1fdfedf010055acfae003646e4 100644 (file)
@@ -30,6 +30,7 @@
 #include <linux/etherdevice.h>
 #include <linux/ethtool.h>
 #include <net/arp.h>
+#include <net/switchdev.h>
 
 #include "vlan.h"
 #include "vlanproc.h"
@@ -542,9 +543,9 @@ static int vlan_dev_init(struct net_device *dev)
                                          (1<<__LINK_STATE_DORMANT))) |
                      (1<<__LINK_STATE_PRESENT);
 
-       dev->hw_features = NETIF_F_ALL_CSUM | NETIF_F_SG |
+       dev->hw_features = NETIF_F_HW_CSUM | NETIF_F_SG |
                           NETIF_F_FRAGLIST | NETIF_F_GSO_SOFTWARE |
-                          NETIF_F_HIGHDMA | NETIF_F_SCTP_CSUM |
+                          NETIF_F_HIGHDMA | NETIF_F_SCTP_CRC |
                           NETIF_F_ALL_FCOE;
 
        dev->features |= real_dev->vlan_features | NETIF_F_LLTX |
@@ -774,6 +775,12 @@ static const struct net_device_ops vlan_netdev_ops = {
        .ndo_netpoll_cleanup    = vlan_dev_netpoll_cleanup,
 #endif
        .ndo_fix_features       = vlan_dev_fix_features,
+       .ndo_fdb_add            = switchdev_port_fdb_add,
+       .ndo_fdb_del            = switchdev_port_fdb_del,
+       .ndo_fdb_dump           = switchdev_port_fdb_dump,
+       .ndo_bridge_setlink     = switchdev_port_bridge_setlink,
+       .ndo_bridge_getlink     = switchdev_port_bridge_getlink,
+       .ndo_bridge_dellink     = switchdev_port_bridge_dellink,
        .ndo_get_lock_subclass  = vlan_dev_get_lock_subclass,
        .ndo_get_iflink         = vlan_dev_get_iflink,
 };
index a642bb829d09cfd473d5ad17a64b2ba6b76d912e..82e3e97050173542b5d6094ebc4db81324198b18 100644 (file)
@@ -135,6 +135,7 @@ static void fdb_del_external_learn(struct net_bridge_fdb_entry *f)
 {
        struct switchdev_obj_port_fdb fdb = {
                .obj = {
+                       .orig_dev = f->dst->dev,
                        .id = SWITCHDEV_OBJ_ID_PORT_FDB,
                        .flags = SWITCHDEV_F_DEFER,
                },
index 5f3f6455317924e0e74281d23075d2af68d72d73..b3cca126b103d24ddaee0b13d7eb73f4e8617f3a 100644 (file)
@@ -40,6 +40,7 @@ void br_log_state(const struct net_bridge_port *p)
 void br_set_state(struct net_bridge_port *p, unsigned int state)
 {
        struct switchdev_attr attr = {
+               .orig_dev = p->dev,
                .id = SWITCHDEV_ATTR_ID_PORT_STP_STATE,
                .flags = SWITCHDEV_F_DEFER,
                .u.stp_state = state,
@@ -570,6 +571,7 @@ int br_set_max_age(struct net_bridge *br, unsigned long val)
 int br_set_ageing_time(struct net_bridge *br, u32 ageing_time)
 {
        struct switchdev_attr attr = {
+               .orig_dev = br->dev,
                .id = SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME,
                .flags = SWITCHDEV_F_SKIP_EOPNOTSUPP,
                .u.ageing_time = ageing_time,
index 5396ff08af3215d1a532b853c8859fb64dfc6a1f..775e00fbeb1ef52a4367f118c2a3dbb6410c025a 100644 (file)
@@ -37,6 +37,7 @@ static inline port_id br_make_port_id(__u8 priority, __u16 port_no)
 void br_init_port(struct net_bridge_port *p)
 {
        struct switchdev_attr attr = {
+               .orig_dev = p->dev,
                .id = SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME,
                .flags = SWITCHDEV_F_SKIP_EOPNOTSUPP | SWITCHDEV_F_DEFER,
                .u.ageing_time = p->br->ageing_time,
index 1394da63614a1d8a325ef44d5fa17849b5d82294..66c4549efbbb3505831587baaeaf7c0d55be1607 100644 (file)
@@ -73,6 +73,7 @@ static int __vlan_vid_add(struct net_device *dev, struct net_bridge *br,
                          u16 vid, u16 flags)
 {
        struct switchdev_obj_port_vlan v = {
+               .obj.orig_dev = dev,
                .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
                .flags = flags,
                .vid_begin = vid,
@@ -120,6 +121,7 @@ static int __vlan_vid_del(struct net_device *dev, struct net_bridge *br,
                          u16 vid)
 {
        struct switchdev_obj_port_vlan v = {
+               .obj.orig_dev = dev,
                .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
                .vid_begin = vid,
                .vid_end = vid,
index 8f705fcedb94b0e4ef56cdf70b363fdd396a14ee..914b4a24c654361c3604bf8d96225d587db2f013 100644 (file)
 #include <linux/errqueue.h>
 #include <linux/hrtimer.h>
 #include <linux/netfilter_ingress.h>
+#include <linux/sctp.h>
 
 #include "net-sysfs.h"
 
@@ -2471,6 +2472,141 @@ out:
 }
 EXPORT_SYMBOL(skb_checksum_help);
 
+/* skb_csum_offload_check - Driver helper function to determine if a device
+ * with limited checksum offload capabilities is able to offload the checksum
+ * for a given packet.
+ *
+ * Arguments:
+ *   skb - sk_buff for the packet in question
+ *   spec - contains the description of what device can offload
+ *   csum_encapped - returns true if the checksum being offloaded is
+ *           encpasulated. That is it is checksum for the transport header
+ *           in the inner headers.
+ *   checksum_help - when set indicates that helper function should
+ *           call skb_checksum_help if offload checks fail
+ *
+ * Returns:
+ *   true: Packet has passed the checksum checks and should be offloadable to
+ *        the device (a driver may still need to check for additional
+ *        restrictions of its device)
+ *   false: Checksum is not offloadable. If checksum_help was set then
+ *        skb_checksum_help was called to resolve checksum for non-GSO
+ *        packets and when IP protocol is not SCTP
+ */
+bool __skb_csum_offload_chk(struct sk_buff *skb,
+                           const struct skb_csum_offl_spec *spec,
+                           bool *csum_encapped,
+                           bool csum_help)
+{
+       struct iphdr *iph;
+       struct ipv6hdr *ipv6;
+       void *nhdr;
+       int protocol;
+       u8 ip_proto;
+
+       if (skb->protocol == htons(ETH_P_8021Q) ||
+           skb->protocol == htons(ETH_P_8021AD)) {
+               if (!spec->vlan_okay)
+                       goto need_help;
+       }
+
+       /* We check whether the checksum refers to a transport layer checksum in
+        * the outermost header or an encapsulated transport layer checksum that
+        * corresponds to the inner headers of the skb. If the checksum is for
+        * something else in the packet we need help.
+        */
+       if (skb_checksum_start_offset(skb) == skb_transport_offset(skb)) {
+               /* Non-encapsulated checksum */
+               protocol = eproto_to_ipproto(vlan_get_protocol(skb));
+               nhdr = skb_network_header(skb);
+               *csum_encapped = false;
+               if (spec->no_not_encapped)
+                       goto need_help;
+       } else if (skb->encapsulation && spec->encap_okay &&
+                  skb_checksum_start_offset(skb) ==
+                  skb_inner_transport_offset(skb)) {
+               /* Encapsulated checksum */
+               *csum_encapped = true;
+               switch (skb->inner_protocol_type) {
+               case ENCAP_TYPE_ETHER:
+                       protocol = eproto_to_ipproto(skb->inner_protocol);
+                       break;
+               case ENCAP_TYPE_IPPROTO:
+                       protocol = skb->inner_protocol;
+                       break;
+               }
+               nhdr = skb_inner_network_header(skb);
+       } else {
+               goto need_help;
+       }
+
+       switch (protocol) {
+       case IPPROTO_IP:
+               if (!spec->ipv4_okay)
+                       goto need_help;
+               iph = nhdr;
+               ip_proto = iph->protocol;
+               if (iph->ihl != 5 && !spec->ip_options_okay)
+                       goto need_help;
+               break;
+       case IPPROTO_IPV6:
+               if (!spec->ipv6_okay)
+                       goto need_help;
+               if (spec->no_encapped_ipv6 && *csum_encapped)
+                       goto need_help;
+               ipv6 = nhdr;
+               nhdr += sizeof(*ipv6);
+               ip_proto = ipv6->nexthdr;
+               break;
+       default:
+               goto need_help;
+       }
+
+ip_proto_again:
+       switch (ip_proto) {
+       case IPPROTO_TCP:
+               if (!spec->tcp_okay ||
+                   skb->csum_offset != offsetof(struct tcphdr, check))
+                       goto need_help;
+               break;
+       case IPPROTO_UDP:
+               if (!spec->udp_okay ||
+                   skb->csum_offset != offsetof(struct udphdr, check))
+                       goto need_help;
+               break;
+       case IPPROTO_SCTP:
+               if (!spec->sctp_okay ||
+                   skb->csum_offset != offsetof(struct sctphdr, checksum))
+                       goto cant_help;
+               break;
+       case NEXTHDR_HOP:
+       case NEXTHDR_ROUTING:
+       case NEXTHDR_DEST: {
+               u8 *opthdr = nhdr;
+
+               if (protocol != IPPROTO_IPV6 || !spec->ext_hdrs_okay)
+                       goto need_help;
+
+               ip_proto = opthdr[0];
+               nhdr += (opthdr[1] + 1) << 3;
+
+               goto ip_proto_again;
+       }
+       default:
+               goto need_help;
+       }
+
+       /* Passed the tests for offloading checksum */
+       return true;
+
+need_help:
+       if (csum_help && !skb_shinfo(skb)->gso_size)
+               skb_checksum_help(skb);
+cant_help:
+       return false;
+}
+EXPORT_SYMBOL(__skb_csum_offload_chk);
+
 __be16 skb_network_protocol(struct sk_buff *skb, int *depth)
 {
        __be16 type = skb->protocol;
@@ -2645,7 +2781,7 @@ static netdev_features_t harmonize_features(struct sk_buff *skb,
 
        if (skb->ip_summed != CHECKSUM_NONE &&
            !can_checksum_protocol(features, type)) {
-               features &= ~NETIF_F_ALL_CSUM;
+               features &= ~NETIF_F_CSUM_MASK;
        } else if (illegal_highdma(skb->dev, skb)) {
                features &= ~NETIF_F_SG;
        }
@@ -2792,7 +2928,7 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device
                        else
                                skb_set_transport_header(skb,
                                                         skb_checksum_start_offset(skb));
-                       if (!(features & NETIF_F_ALL_CSUM) &&
+                       if (!(features & NETIF_F_CSUM_MASK) &&
                            skb_checksum_help(skb))
                                goto out_kfree_skb;
                }
@@ -6467,9 +6603,9 @@ static netdev_features_t netdev_fix_features(struct net_device *dev,
        /* UFO needs SG and checksumming */
        if (features & NETIF_F_UFO) {
                /* maybe split UFO into V4 and V6? */
-               if (!((features & NETIF_F_GEN_CSUM) ||
-                   (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))
-                           == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
+               if (!(features & NETIF_F_HW_CSUM) &&
+                   ((features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) !=
+                    (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM))) {
                        netdev_dbg(dev,
                                "Dropping NETIF_F_UFO since no checksum offload features.\n");
                        features &= ~NETIF_F_UFO;
@@ -7571,16 +7707,16 @@ static int dev_cpu_callback(struct notifier_block *nfb,
 netdev_features_t netdev_increment_features(netdev_features_t all,
        netdev_features_t one, netdev_features_t mask)
 {
-       if (mask & NETIF_F_GEN_CSUM)
-               mask |= NETIF_F_ALL_CSUM;
+       if (mask & NETIF_F_HW_CSUM)
+               mask |= NETIF_F_CSUM_MASK;
        mask |= NETIF_F_VLAN_CHALLENGED;
 
-       all |= one & (NETIF_F_ONE_FOR_ALL|NETIF_F_ALL_CSUM) & mask;
+       all |= one & (NETIF_F_ONE_FOR_ALL | NETIF_F_CSUM_MASK) & mask;
        all &= one | ~NETIF_F_ALL_FOR_ALL;
 
        /* If one device supports hw checksumming, set for all. */
-       if (all & NETIF_F_GEN_CSUM)
-               all &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM);
+       if (all & NETIF_F_HW_CSUM)
+               all &= ~(NETIF_F_CSUM_MASK & ~NETIF_F_HW_CSUM);
 
        return all;
 }
index 29edf74846fc9cfef49f3fc35b4ba41de6c254af..09948a726347cf06b31565a8d85cca1cdee590cf 100644 (file)
@@ -87,7 +87,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
        [NETIF_F_GSO_UDP_TUNNEL_BIT] =   "tx-udp_tnl-segmentation",
 
        [NETIF_F_FCOE_CRC_BIT] =         "tx-checksum-fcoe-crc",
-       [NETIF_F_SCTP_CSUM_BIT] =        "tx-checksum-sctp",
+       [NETIF_F_SCTP_CRC_BIT] =        "tx-checksum-sctp",
        [NETIF_F_FCOE_MTU_BIT] =         "fcoe-mtu",
        [NETIF_F_NTUPLE_BIT] =           "rx-ntuple-filter",
        [NETIF_F_RXHASH_BIT] =           "rx-hashing",
@@ -235,7 +235,7 @@ static netdev_features_t ethtool_get_feature_mask(u32 eth_cmd)
        switch (eth_cmd) {
        case ETHTOOL_GTXCSUM:
        case ETHTOOL_STXCSUM:
-               return NETIF_F_ALL_CSUM | NETIF_F_SCTP_CSUM;
+               return NETIF_F_CSUM_MASK | NETIF_F_SCTP_CRC;
        case ETHTOOL_GRXCSUM:
        case ETHTOOL_SRXCSUM:
                return NETIF_F_RXCSUM;
index f88a62ab019d25097efc67ee8e5239c9b39e8ca3..bca8c350e7f30e44ea4743adcd606234aa44627c 100644 (file)
@@ -471,6 +471,7 @@ static ssize_t phys_switch_id_show(struct device *dev,
 
        if (dev_isalive(netdev)) {
                struct switchdev_attr attr = {
+                       .orig_dev = netdev,
                        .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
                        .flags = SWITCHDEV_F_NO_RECURSE,
                };
index de8d5cc5eb240555e2c8b30da7858b3ff9eadffb..2be144498bcf0b570b15a94222407a2344dc8ffd 100644 (file)
@@ -2898,7 +2898,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
 
        if (!(pkt_dev->flags & F_UDPCSUM)) {
                skb->ip_summed = CHECKSUM_NONE;
-       } else if (odev->features & NETIF_F_V4_CSUM) {
+       } else if (odev->features & (NETIF_F_HW_CSUM | NETIF_F_IP_CSUM)) {
                skb->ip_summed = CHECKSUM_PARTIAL;
                skb->csum = 0;
                udp4_hwcsum(skb, iph->saddr, iph->daddr);
@@ -3032,7 +3032,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
 
        if (!(pkt_dev->flags & F_UDPCSUM)) {
                skb->ip_summed = CHECKSUM_NONE;
-       } else if (odev->features & NETIF_F_V6_CSUM) {
+       } else if (odev->features & (NETIF_F_HW_CSUM | NETIF_F_IPV6_CSUM)) {
                skb->ip_summed = CHECKSUM_PARTIAL;
                skb->csum_start = skb_transport_header(skb) - skb->head;
                skb->csum_offset = offsetof(struct udphdr, check);
index 34ba7a08876de74409ea5d2ad3af6f78124ff0a7..d8b0113d3eec83f1e8b46b34db2efea9e93c8fb8 100644 (file)
@@ -1027,6 +1027,7 @@ static int rtnl_phys_switch_id_fill(struct sk_buff *skb, struct net_device *dev)
 {
        int err;
        struct switchdev_attr attr = {
+               .orig_dev = dev,
                .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
                .flags = SWITCHDEV_F_NO_RECURSE,
        };
index 0c1d58d43f67c46c7a11081d0ecdd30cb5878529..a996ce8c8fb25e016a674f6ce3e42c6d4de36485 100644 (file)
@@ -214,7 +214,7 @@ void sock_diag_unregister(const struct sock_diag_handler *hnld)
 }
 EXPORT_SYMBOL_GPL(sock_diag_unregister);
 
-static int __sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int __sock_diag_cmd(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
        int err;
        struct sock_diag_req *req = nlmsg_data(nlh);
@@ -234,8 +234,12 @@ static int __sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
        hndl = sock_diag_handlers[req->sdiag_family];
        if (hndl == NULL)
                err = -ENOENT;
-       else
+       else if (nlh->nlmsg_type == SOCK_DIAG_BY_FAMILY)
                err = hndl->dump(skb, nlh);
+       else if (nlh->nlmsg_type == SOCK_DESTROY && hndl->destroy)
+               err = hndl->destroy(skb, nlh);
+       else
+               err = -EOPNOTSUPP;
        mutex_unlock(&sock_diag_table_mutex);
 
        return err;
@@ -261,7 +265,8 @@ static int sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 
                return ret;
        case SOCK_DIAG_BY_FAMILY:
-               return __sock_diag_rcv_msg(skb, nlh);
+       case SOCK_DESTROY:
+               return __sock_diag_cmd(skb, nlh);
        default:
                return -EINVAL;
        }
@@ -295,6 +300,18 @@ static int sock_diag_bind(struct net *net, int group)
        return 0;
 }
 
+int sock_diag_destroy(struct sock *sk, int err)
+{
+       if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
+               return -EPERM;
+
+       if (!sk->sk_prot->diag_destroy)
+               return -EOPNOTSUPP;
+
+       return sk->sk_prot->diag_destroy(sk, err);
+}
+EXPORT_SYMBOL_GPL(sock_diag_destroy);
+
 static int __net_init diag_net_init(struct net *net)
 {
        struct netlink_kernel_cfg cfg = {
index 416dfa004cfb1b6b7c176096b69e33b104499cf7..c22920525e5d844bd7e4210b233440b883c2681e 100644 (file)
@@ -436,6 +436,19 @@ config INET_UDP_DIAG
          Support for UDP socket monitoring interface used by the ss tool.
          If unsure, say Y.
 
+config INET_DIAG_DESTROY
+       bool "INET: allow privileged process to administratively close sockets"
+       depends on INET_DIAG
+       default n
+       ---help---
+         Provides a SOCK_DESTROY operation that allows privileged processes
+         (e.g., a connection manager or a network administration tool such as
+         ss) to close sockets opened by other processes. Closing a socket in
+         this way interrupts any blocking read/write/connect operations on
+         the socket and causes future socket calls to behave as if the socket
+         had been disconnected.
+         If unsure, say N.
+
 menuconfig TCP_CONG_ADVANCED
        bool "TCP: advanced congestion control"
        ---help---
index ab9f8a66615d0872b586a0c2745196c58f6026be..8bb8e7ad85483234d8a852782515ca11c93af68d 100644 (file)
@@ -350,17 +350,12 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
                                  nlmsg_flags, unlh);
 }
 
-int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
-                           struct sk_buff *in_skb,
-                           const struct nlmsghdr *nlh,
-                           const struct inet_diag_req_v2 *req)
+struct sock *inet_diag_find_one_icsk(struct net *net,
+                                    struct inet_hashinfo *hashinfo,
+                                    const struct inet_diag_req_v2 *req)
 {
-       struct net *net = sock_net(in_skb->sk);
-       struct sk_buff *rep;
        struct sock *sk;
-       int err;
 
-       err = -EINVAL;
        if (req->sdiag_family == AF_INET)
                sk = inet_lookup(net, hashinfo, req->id.idiag_dst[0],
                                 req->id.idiag_dport, req->id.idiag_src[0],
@@ -375,15 +370,33 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
                                  req->id.idiag_if);
 #endif
        else
-               goto out_nosk;
+               return ERR_PTR(-EINVAL);
 
-       err = -ENOENT;
        if (!sk)
-               goto out_nosk;
+               return ERR_PTR(-ENOENT);
 
-       err = sock_diag_check_cookie(sk, req->id.idiag_cookie);
-       if (err)
-               goto out;
+       if (sock_diag_check_cookie(sk, req->id.idiag_cookie)) {
+               sock_gen_put(sk);
+               return ERR_PTR(-ENOENT);
+       }
+
+       return sk;
+}
+EXPORT_SYMBOL_GPL(inet_diag_find_one_icsk);
+
+int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
+                           struct sk_buff *in_skb,
+                           const struct nlmsghdr *nlh,
+                           const struct inet_diag_req_v2 *req)
+{
+       struct net *net = sock_net(in_skb->sk);
+       struct sk_buff *rep;
+       struct sock *sk;
+       int err;
+
+       sk = inet_diag_find_one_icsk(net, hashinfo, req);
+       if (IS_ERR(sk))
+               return PTR_ERR(sk);
 
        rep = nlmsg_new(inet_sk_attr_size(), GFP_KERNEL);
        if (!rep) {
@@ -409,12 +422,11 @@ out:
        if (sk)
                sock_gen_put(sk);
 
-out_nosk:
        return err;
 }
 EXPORT_SYMBOL_GPL(inet_diag_dump_one_icsk);
 
-static int inet_diag_get_exact(struct sk_buff *in_skb,
+static int inet_diag_cmd_exact(int cmd, struct sk_buff *in_skb,
                               const struct nlmsghdr *nlh,
                               const struct inet_diag_req_v2 *req)
 {
@@ -424,8 +436,12 @@ static int inet_diag_get_exact(struct sk_buff *in_skb,
        handler = inet_diag_lock_handler(req->sdiag_protocol);
        if (IS_ERR(handler))
                err = PTR_ERR(handler);
-       else
+       else if (cmd == SOCK_DIAG_BY_FAMILY)
                err = handler->dump_one(in_skb, nlh, req);
+       else if (cmd == SOCK_DESTROY && handler->destroy)
+               err = handler->destroy(in_skb, req);
+       else
+               err = -EOPNOTSUPP;
        inet_diag_unlock_handler(handler);
 
        return err;
@@ -938,7 +954,7 @@ static int inet_diag_get_exact_compat(struct sk_buff *in_skb,
        req.idiag_states = rc->idiag_states;
        req.id = rc->id;
 
-       return inet_diag_get_exact(in_skb, nlh, &req);
+       return inet_diag_cmd_exact(SOCK_DIAG_BY_FAMILY, in_skb, nlh, &req);
 }
 
 static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh)
@@ -972,7 +988,7 @@ static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh)
        return inet_diag_get_exact_compat(skb, nlh);
 }
 
-static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
+static int inet_diag_handler_cmd(struct sk_buff *skb, struct nlmsghdr *h)
 {
        int hdrlen = sizeof(struct inet_diag_req_v2);
        struct net *net = sock_net(skb->sk);
@@ -980,7 +996,8 @@ static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
        if (nlmsg_len(h) < hdrlen)
                return -EINVAL;
 
-       if (h->nlmsg_flags & NLM_F_DUMP) {
+       if (h->nlmsg_type == SOCK_DIAG_BY_FAMILY &&
+           h->nlmsg_flags & NLM_F_DUMP) {
                if (nlmsg_attrlen(h, hdrlen)) {
                        struct nlattr *attr;
 
@@ -999,7 +1016,7 @@ static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
                }
        }
 
-       return inet_diag_get_exact(skb, h, nlmsg_data(h));
+       return inet_diag_cmd_exact(h->nlmsg_type, skb, h, nlmsg_data(h));
 }
 
 static
@@ -1050,14 +1067,16 @@ int inet_diag_handler_get_info(struct sk_buff *skb, struct sock *sk)
 
 static const struct sock_diag_handler inet_diag_handler = {
        .family = AF_INET,
-       .dump = inet_diag_handler_dump,
+       .dump = inet_diag_handler_cmd,
        .get_info = inet_diag_handler_get_info,
+       .destroy = inet_diag_handler_cmd,
 };
 
 static const struct sock_diag_handler inet6_diag_handler = {
        .family = AF_INET6,
-       .dump = inet_diag_handler_dump,
+       .dump = inet_diag_handler_cmd,
        .get_info = inet_diag_handler_get_info,
+       .destroy = inet_diag_handler_cmd,
 };
 
 int inet_diag_register(const struct inet_diag_handler *h)
index e0b94cd843d7f8a546b5d00852a2aa704e1104e9..568e2bc0d93d1862784da62e236c6ab2560e8926 100644 (file)
@@ -911,7 +911,7 @@ static int __ip_append_data(struct sock *sk,
         */
        if (transhdrlen &&
            length + fragheaderlen <= mtu &&
-           rt->dst.dev->features & NETIF_F_V4_CSUM &&
+           rt->dst.dev->features & (NETIF_F_HW_CSUM | NETIF_F_IP_CSUM) &&
            !(flags & MSG_MORE) &&
            !exthdrlen)
                csummode = CHECKSUM_PARTIAL;
index 5075b7ecd26d4a323f0549930d23bad0a5df8ab5..61c7cc22ea684ffe289f3eb8373584dee94c182c 100644 (file)
@@ -132,7 +132,8 @@ static void nf_nat_ipv4_csum_recalc(struct sk_buff *skb,
 
        if (skb->ip_summed != CHECKSUM_PARTIAL) {
                if (!(rt->rt_flags & RTCF_LOCAL) &&
-                   (!skb->dev || skb->dev->features & NETIF_F_V4_CSUM)) {
+                   (!skb->dev || skb->dev->features &
+                    (NETIF_F_IP_CSUM | NETIF_F_HW_CSUM))) {
                        skb->ip_summed = CHECKSUM_PARTIAL;
                        skb->csum_start = skb_headroom(skb) +
                                          skb_network_offset(skb) +
index c82cca18c90fbd67c2daf71c6769ee5fef21d2a9..2c0e340518d2a1902248973d918aa58f59027eb8 100644 (file)
@@ -1018,7 +1018,7 @@ int tcp_sendpage(struct sock *sk, struct page *page, int offset,
        ssize_t res;
 
        if (!(sk->sk_route_caps & NETIF_F_SG) ||
-           !(sk->sk_route_caps & NETIF_F_ALL_CSUM))
+           !sk_check_csum_caps(sk))
                return sock_no_sendpage(sk->sk_socket, page, offset, size,
                                        flags);
 
@@ -1175,7 +1175,7 @@ new_segment:
                        /*
                         * Check whether we can use HW checksum.
                         */
-                       if (sk->sk_route_caps & NETIF_F_ALL_CSUM)
+                       if (sk_check_csum_caps(sk))
                                skb->ip_summed = CHECKSUM_PARTIAL;
 
                        skb_entail(sk, skb);
@@ -3080,6 +3080,38 @@ void tcp_done(struct sock *sk)
 }
 EXPORT_SYMBOL_GPL(tcp_done);
 
+int tcp_abort(struct sock *sk, int err)
+{
+       if (!sk_fullsock(sk)) {
+               sock_gen_put(sk);
+               return -EOPNOTSUPP;
+       }
+
+       /* Don't race with userspace socket closes such as tcp_close. */
+       lock_sock(sk);
+
+       /* Don't race with BH socket closes such as inet_csk_listen_stop. */
+       local_bh_disable();
+       bh_lock_sock(sk);
+
+       if (!sock_flag(sk, SOCK_DEAD)) {
+               sk->sk_err = err;
+               /* This barrier is coupled with smp_rmb() in tcp_poll() */
+               smp_wmb();
+               sk->sk_error_report(sk);
+               if (tcp_need_reset(sk->sk_state))
+                       tcp_send_active_reset(sk, GFP_ATOMIC);
+               tcp_done(sk);
+       }
+
+       bh_unlock_sock(sk);
+       local_bh_enable();
+       release_sock(sk);
+       sock_put(sk);
+       return 0;
+}
+EXPORT_SYMBOL_GPL(tcp_abort);
+
 extern struct tcp_congestion_ops tcp_reno;
 
 static __initdata unsigned long thash_entries;
index b31604086edd6b8aec2eaaf67c755b63caf75743..4d610934fb391c111d822a4d0544334a7b4b858f 100644 (file)
@@ -10,6 +10,8 @@
  */
 
 #include <linux/module.h>
+#include <linux/net.h>
+#include <linux/sock_diag.h>
 #include <linux/inet_diag.h>
 
 #include <linux/tcp.h>
@@ -46,12 +48,29 @@ static int tcp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh,
        return inet_diag_dump_one_icsk(&tcp_hashinfo, in_skb, nlh, req);
 }
 
+#ifdef CONFIG_INET_DIAG_DESTROY
+static int tcp_diag_destroy(struct sk_buff *in_skb,
+                           const struct inet_diag_req_v2 *req)
+{
+       struct net *net = sock_net(in_skb->sk);
+       struct sock *sk = inet_diag_find_one_icsk(net, &tcp_hashinfo, req);
+
+       if (IS_ERR(sk))
+               return PTR_ERR(sk);
+
+       return sock_diag_destroy(sk, ECONNABORTED);
+}
+#endif
+
 static const struct inet_diag_handler tcp_diag_handler = {
        .dump            = tcp_diag_dump,
        .dump_one        = tcp_diag_dump_one,
        .idiag_get_info  = tcp_diag_get_info,
        .idiag_type      = IPPROTO_TCP,
        .idiag_info_size = sizeof(struct tcp_info),
+#ifdef CONFIG_INET_DIAG_DESTROY
+       .destroy         = tcp_diag_destroy,
+#endif
 };
 
 static int __init tcp_diag_init(void)
index db003438aaf5f6a2b27319c4669be1b1f62c7ae3..7aa13bd3de2961e721a8086b35009dcae8b209b9 100644 (file)
@@ -2342,6 +2342,7 @@ struct proto tcp_prot = {
        .destroy_cgroup         = tcp_destroy_cgroup,
        .proto_cgroup           = tcp_proto_cgroup,
 #endif
+       .diag_destroy           = tcp_abort,
 };
 EXPORT_SYMBOL(tcp_prot);
 
index 0c7b0e61b917158af7e431f9e7781f7bce313c83..8841e984f8bff631891e20f3cb6d0e686829daf6 100644 (file)
@@ -772,7 +772,8 @@ void udp_set_csum(bool nocheck, struct sk_buff *skb,
        else if (skb_is_gso(skb))
                uh->check = ~udp_v4_check(len, saddr, daddr, 0);
        else if (skb_dst(skb) && skb_dst(skb)->dev &&
-                (skb_dst(skb)->dev->features & NETIF_F_V4_CSUM)) {
+                (skb_dst(skb)->dev->features &
+                 (NETIF_F_IP_CSUM | NETIF_F_HW_CSUM))) {
 
                BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
 
index f9386160cbee0288e294ea2cd8ba3b5be65cdbf6..130042660181da595a496f45da92520a0aed9690 100644 (file)
@@ -60,8 +60,9 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
 
        /* Try to offload checksum if possible */
        offload_csum = !!(need_csum &&
-                         (skb->dev->features &
-                          (is_ipv6 ? NETIF_F_V6_CSUM : NETIF_F_V4_CSUM)));
+                         ((skb->dev->features & NETIF_F_HW_CSUM) ||
+                          (skb->dev->features & (is_ipv6 ?
+                           NETIF_F_IPV6_CSUM : NETIF_F_IP_CSUM))));
 
        /* segment inner packet. */
        enc_features = skb->dev->hw_enc_features & features;
index 2c900c7b7eb1c45b1619a42275552861bdfcda44..2fbd90bf8d33df9bc8851e54597a4e028d0d903f 100644 (file)
@@ -34,7 +34,7 @@ obj-$(CONFIG_INET6_XFRM_MODE_TUNNEL) += xfrm6_mode_tunnel.o
 obj-$(CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION) += xfrm6_mode_ro.o
 obj-$(CONFIG_INET6_XFRM_MODE_BEET) += xfrm6_mode_beet.o
 obj-$(CONFIG_IPV6_MIP6) += mip6.o
-obj-$(CONFIG_IPV6_ILA) += ila.o
+obj-$(CONFIG_IPV6_ILA) += ila/
 obj-$(CONFIG_NETFILTER)        += netfilter/
 
 obj-$(CONFIG_IPV6_VTI) += ip6_vti.o
diff --git a/net/ipv6/ila/Makefile b/net/ipv6/ila/Makefile
new file mode 100644 (file)
index 0000000..4b32e59
--- /dev/null
@@ -0,0 +1,7 @@
+#
+# Makefile for ILA module
+#
+
+obj-$(CONFIG_IPV6_ILA) += ila.o
+
+ila-objs := ila_common.o ila_lwt.o ila_xlat.o
diff --git a/net/ipv6/ila/ila.h b/net/ipv6/ila/ila.h
new file mode 100644 (file)
index 0000000..28542cb
--- /dev/null
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2015 Tom Herbert <tom@herbertland.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ */
+
+#ifndef __ILA_H
+#define __ILA_H
+
+#include <linux/errno.h>
+#include <linux/ip.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/socket.h>
+#include <linux/skbuff.h>
+#include <linux/types.h>
+#include <net/checksum.h>
+#include <net/ip.h>
+#include <net/protocol.h>
+#include <uapi/linux/ila.h>
+
+struct ila_params {
+       __be64 locator;
+       __be64 locator_match;
+       __wsum csum_diff;
+};
+
+static inline __wsum compute_csum_diff8(const __be32 *from, const __be32 *to)
+{
+       __be32 diff[] = {
+               ~from[0], ~from[1], to[0], to[1],
+       };
+
+       return csum_partial(diff, sizeof(diff), 0);
+}
+
+void update_ipv6_locator(struct sk_buff *skb, struct ila_params *p);
+
+int ila_lwt_init(void);
+void ila_lwt_fini(void);
+int ila_xlat_init(void);
+void ila_xlat_fini(void);
+
+#endif /* __ILA_H */
diff --git a/net/ipv6/ila/ila_common.c b/net/ipv6/ila/ila_common.c
new file mode 100644 (file)
index 0000000..32dc9aa
--- /dev/null
@@ -0,0 +1,103 @@
+#include <linux/errno.h>
+#include <linux/ip.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/socket.h>
+#include <linux/types.h>
+#include <net/checksum.h>
+#include <net/ip.h>
+#include <net/ip6_fib.h>
+#include <net/lwtunnel.h>
+#include <net/protocol.h>
+#include <uapi/linux/ila.h>
+#include "ila.h"
+
+static __wsum get_csum_diff(struct ipv6hdr *ip6h, struct ila_params *p)
+{
+       if (*(__be64 *)&ip6h->daddr == p->locator_match)
+               return p->csum_diff;
+       else
+               return compute_csum_diff8((__be32 *)&ip6h->daddr,
+                                         (__be32 *)&p->locator);
+}
+
+void update_ipv6_locator(struct sk_buff *skb, struct ila_params *p)
+{
+       __wsum diff;
+       struct ipv6hdr *ip6h = ipv6_hdr(skb);
+       size_t nhoff = sizeof(struct ipv6hdr);
+
+       /* First update checksum */
+       switch (ip6h->nexthdr) {
+       case NEXTHDR_TCP:
+               if (likely(pskb_may_pull(skb, nhoff + sizeof(struct tcphdr)))) {
+                       struct tcphdr *th = (struct tcphdr *)
+                                       (skb_network_header(skb) + nhoff);
+
+                       diff = get_csum_diff(ip6h, p);
+                       inet_proto_csum_replace_by_diff(&th->check, skb,
+                                                       diff, true);
+               }
+               break;
+       case NEXTHDR_UDP:
+               if (likely(pskb_may_pull(skb, nhoff + sizeof(struct udphdr)))) {
+                       struct udphdr *uh = (struct udphdr *)
+                                       (skb_network_header(skb) + nhoff);
+
+                       if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) {
+                               diff = get_csum_diff(ip6h, p);
+                               inet_proto_csum_replace_by_diff(&uh->check, skb,
+                                                               diff, true);
+                               if (!uh->check)
+                                       uh->check = CSUM_MANGLED_0;
+                       }
+               }
+               break;
+       case NEXTHDR_ICMP:
+               if (likely(pskb_may_pull(skb,
+                                        nhoff + sizeof(struct icmp6hdr)))) {
+                       struct icmp6hdr *ih = (struct icmp6hdr *)
+                                       (skb_network_header(skb) + nhoff);
+
+                       diff = get_csum_diff(ip6h, p);
+                       inet_proto_csum_replace_by_diff(&ih->icmp6_cksum, skb,
+                                                       diff, true);
+               }
+               break;
+       }
+
+       /* Now change destination address */
+       *(__be64 *)&ip6h->daddr = p->locator;
+}
+
+static int __init ila_init(void)
+{
+       int ret;
+
+       ret = ila_lwt_init();
+
+       if (ret)
+               goto fail_lwt;
+
+       ret = ila_xlat_init();
+       if (ret)
+               goto fail_xlat;
+
+       return 0;
+fail_xlat:
+       ila_lwt_fini();
+fail_lwt:
+       return ret;
+}
+
+static void __exit ila_fini(void)
+{
+       ila_xlat_fini();
+       ila_lwt_fini();
+}
+
+module_init(ila_init);
+module_exit(ila_fini);
+MODULE_AUTHOR("Tom Herbert <tom@herbertland.com>");
+MODULE_LICENSE("GPL");
similarity index 62%
rename from net/ipv6/ila.c
rename to net/ipv6/ila/ila_lwt.c
index 1a6852e1ac69e408de22e8df9131188542ad649a..2ae3c4fd8aabc65a7206a73480b3dca4b975f7e3 100644 (file)
 #include <net/lwtunnel.h>
 #include <net/protocol.h>
 #include <uapi/linux/ila.h>
-
-struct ila_params {
-       __be64 locator;
-       __be64 locator_match;
-       __wsum csum_diff;
-};
+#include "ila.h"
 
 static inline struct ila_params *ila_params_lwtunnel(
        struct lwtunnel_state *lwstate)
@@ -24,73 +19,6 @@ static inline struct ila_params *ila_params_lwtunnel(
        return (struct ila_params *)lwstate->data;
 }
 
-static inline __wsum compute_csum_diff8(const __be32 *from, const __be32 *to)
-{
-       __be32 diff[] = {
-               ~from[0], ~from[1], to[0], to[1],
-       };
-
-       return csum_partial(diff, sizeof(diff), 0);
-}
-
-static inline __wsum get_csum_diff(struct ipv6hdr *ip6h, struct ila_params *p)
-{
-       if (*(__be64 *)&ip6h->daddr == p->locator_match)
-               return p->csum_diff;
-       else
-               return compute_csum_diff8((__be32 *)&ip6h->daddr,
-                                         (__be32 *)&p->locator);
-}
-
-static void update_ipv6_locator(struct sk_buff *skb, struct ila_params *p)
-{
-       __wsum diff;
-       struct ipv6hdr *ip6h = ipv6_hdr(skb);
-       size_t nhoff = sizeof(struct ipv6hdr);
-
-       /* First update checksum */
-       switch (ip6h->nexthdr) {
-       case NEXTHDR_TCP:
-               if (likely(pskb_may_pull(skb, nhoff + sizeof(struct tcphdr)))) {
-                       struct tcphdr *th = (struct tcphdr *)
-                                       (skb_network_header(skb) + nhoff);
-
-                       diff = get_csum_diff(ip6h, p);
-                       inet_proto_csum_replace_by_diff(&th->check, skb,
-                                                       diff, true);
-               }
-               break;
-       case NEXTHDR_UDP:
-               if (likely(pskb_may_pull(skb, nhoff + sizeof(struct udphdr)))) {
-                       struct udphdr *uh = (struct udphdr *)
-                                       (skb_network_header(skb) + nhoff);
-
-                       if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) {
-                               diff = get_csum_diff(ip6h, p);
-                               inet_proto_csum_replace_by_diff(&uh->check, skb,
-                                                               diff, true);
-                               if (!uh->check)
-                                       uh->check = CSUM_MANGLED_0;
-                       }
-               }
-               break;
-       case NEXTHDR_ICMP:
-               if (likely(pskb_may_pull(skb,
-                                        nhoff + sizeof(struct icmp6hdr)))) {
-                       struct icmp6hdr *ih = (struct icmp6hdr *)
-                                       (skb_network_header(skb) + nhoff);
-
-                       diff = get_csum_diff(ip6h, p);
-                       inet_proto_csum_replace_by_diff(&ih->icmp6_cksum, skb,
-                                                       diff, true);
-               }
-               break;
-       }
-
-       /* Now change destination address */
-       *(__be64 *)&ip6h->daddr = p->locator;
-}
-
 static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
        struct dst_entry *dst = skb_dst(skb);
@@ -213,17 +141,12 @@ static const struct lwtunnel_encap_ops ila_encap_ops = {
        .cmp_encap = ila_encap_cmp,
 };
 
-static int __init ila_init(void)
+int ila_lwt_init(void)
 {
        return lwtunnel_encap_add_ops(&ila_encap_ops, LWTUNNEL_ENCAP_ILA);
 }
 
-static void __exit ila_fini(void)
+void ila_lwt_fini(void)
 {
        lwtunnel_encap_del_ops(&ila_encap_ops, LWTUNNEL_ENCAP_ILA);
 }
-
-module_init(ila_init);
-module_exit(ila_fini);
-MODULE_AUTHOR("Tom Herbert <tom@herbertland.com>");
-MODULE_LICENSE("GPL");
diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
new file mode 100644 (file)
index 0000000..295ca29
--- /dev/null
@@ -0,0 +1,680 @@
+#include <linux/jhash.h>
+#include <linux/netfilter.h>
+#include <linux/rcupdate.h>
+#include <linux/rhashtable.h>
+#include <linux/vmalloc.h>
+#include <net/genetlink.h>
+#include <net/ila.h>
+#include <net/netns/generic.h>
+#include <uapi/linux/genetlink.h>
+#include "ila.h"
+
+struct ila_xlat_params {
+       struct ila_params ip;
+       __be64 identifier;
+       int ifindex;
+       unsigned int dir;
+};
+
+struct ila_map {
+       struct ila_xlat_params p;
+       struct rhash_head node;
+       struct ila_map __rcu *next;
+       struct rcu_head rcu;
+};
+
+static unsigned int ila_net_id;
+
+struct ila_net {
+       struct rhashtable rhash_table;
+       spinlock_t *locks; /* Bucket locks for entry manipulation */
+       unsigned int locks_mask;
+       bool hooks_registered;
+};
+
+#define        LOCKS_PER_CPU 10
+
+static int alloc_ila_locks(struct ila_net *ilan)
+{
+       unsigned int i, size;
+       unsigned int nr_pcpus = num_possible_cpus();
+
+       nr_pcpus = min_t(unsigned int, nr_pcpus, 32UL);
+       size = roundup_pow_of_two(nr_pcpus * LOCKS_PER_CPU);
+
+       if (sizeof(spinlock_t) != 0) {
+#ifdef CONFIG_NUMA
+               if (size * sizeof(spinlock_t) > PAGE_SIZE)
+                       ilan->locks = vmalloc(size * sizeof(spinlock_t));
+               else
+#endif
+               ilan->locks = kmalloc_array(size, sizeof(spinlock_t),
+                                           GFP_KERNEL);
+               if (!ilan->locks)
+                       return -ENOMEM;
+               for (i = 0; i < size; i++)
+                       spin_lock_init(&ilan->locks[i]);
+       }
+       ilan->locks_mask = size - 1;
+
+       return 0;
+}
+
+static u32 hashrnd __read_mostly;
+static __always_inline void __ila_hash_secret_init(void)
+{
+       net_get_random_once(&hashrnd, sizeof(hashrnd));
+}
+
+static inline u32 ila_identifier_hash(__be64 identifier)
+{
+       u32 *v = (u32 *)&identifier;
+
+       return jhash_2words(v[0], v[1], hashrnd);
+}
+
+static inline spinlock_t *ila_get_lock(struct ila_net *ilan, __be64 identifier)
+{
+       return &ilan->locks[ila_identifier_hash(identifier) & ilan->locks_mask];
+}
+
+static inline int ila_cmp_wildcards(struct ila_map *ila, __be64 loc,
+                                   int ifindex, unsigned int dir)
+{
+       return (ila->p.ip.locator_match && ila->p.ip.locator_match != loc) ||
+              (ila->p.ifindex && ila->p.ifindex != ifindex) ||
+              !(ila->p.dir & dir);
+}
+
+static inline int ila_cmp_params(struct ila_map *ila, struct ila_xlat_params *p)
+{
+       return (ila->p.ip.locator_match != p->ip.locator_match) ||
+              (ila->p.ifindex != p->ifindex) ||
+              (ila->p.dir != p->dir);
+}
+
+static int ila_cmpfn(struct rhashtable_compare_arg *arg,
+                    const void *obj)
+{
+       const struct ila_map *ila = obj;
+
+       return (ila->p.identifier != *(__be64 *)arg->key);
+}
+
+static inline int ila_order(struct ila_map *ila)
+{
+       int score = 0;
+
+       if (ila->p.ip.locator_match)
+               score += 1 << 0;
+
+       if (ila->p.ifindex)
+               score += 1 << 1;
+
+       return score;
+}
+
+static const struct rhashtable_params rht_params = {
+       .nelem_hint = 1024,
+       .head_offset = offsetof(struct ila_map, node),
+       .key_offset = offsetof(struct ila_map, p.identifier),
+       .key_len = sizeof(u64), /* identifier */
+       .max_size = 1048576,
+       .min_size = 256,
+       .automatic_shrinking = true,
+       .obj_cmpfn = ila_cmpfn,
+};
+
+static struct genl_family ila_nl_family = {
+       .id             = GENL_ID_GENERATE,
+       .hdrsize        = 0,
+       .name           = ILA_GENL_NAME,
+       .version        = ILA_GENL_VERSION,
+       .maxattr        = ILA_ATTR_MAX,
+       .netnsok        = true,
+       .parallel_ops   = true,
+};
+
+static struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = {
+       [ILA_ATTR_IDENTIFIER] = { .type = NLA_U64, },
+       [ILA_ATTR_LOCATOR] = { .type = NLA_U64, },
+       [ILA_ATTR_LOCATOR_MATCH] = { .type = NLA_U64, },
+       [ILA_ATTR_IFINDEX] = { .type = NLA_U32, },
+       [ILA_ATTR_DIR] = { .type = NLA_U32, },
+};
+
+static int parse_nl_config(struct genl_info *info,
+                          struct ila_xlat_params *p)
+{
+       memset(p, 0, sizeof(*p));
+
+       if (info->attrs[ILA_ATTR_IDENTIFIER])
+               p->identifier = (__force __be64)nla_get_u64(
+                       info->attrs[ILA_ATTR_IDENTIFIER]);
+
+       if (info->attrs[ILA_ATTR_LOCATOR])
+               p->ip.locator = (__force __be64)nla_get_u64(
+                       info->attrs[ILA_ATTR_LOCATOR]);
+
+       if (info->attrs[ILA_ATTR_LOCATOR_MATCH])
+               p->ip.locator_match = (__force __be64)nla_get_u64(
+                       info->attrs[ILA_ATTR_LOCATOR_MATCH]);
+
+       if (info->attrs[ILA_ATTR_IFINDEX])
+               p->ifindex = nla_get_s32(info->attrs[ILA_ATTR_IFINDEX]);
+
+       if (info->attrs[ILA_ATTR_DIR])
+               p->dir = nla_get_u32(info->attrs[ILA_ATTR_DIR]);
+
+       return 0;
+}
+
+/* Must be called with rcu readlock */
+static inline struct ila_map *ila_lookup_wildcards(__be64 id, __be64 loc,
+                                                  int ifindex,
+                                                  unsigned int dir,
+                                                  struct ila_net *ilan)
+{
+       struct ila_map *ila;
+
+       ila = rhashtable_lookup_fast(&ilan->rhash_table, &id, rht_params);
+       while (ila) {
+               if (!ila_cmp_wildcards(ila, loc, ifindex, dir))
+                       return ila;
+               ila = rcu_access_pointer(ila->next);
+       }
+
+       return NULL;
+}
+
+/* Must be called with rcu readlock */
+static inline struct ila_map *ila_lookup_by_params(struct ila_xlat_params *p,
+                                                  struct ila_net *ilan)
+{
+       struct ila_map *ila;
+
+       ila = rhashtable_lookup_fast(&ilan->rhash_table, &p->identifier,
+                                    rht_params);
+       while (ila) {
+               if (!ila_cmp_params(ila, p))
+                       return ila;
+               ila = rcu_access_pointer(ila->next);
+       }
+
+       return NULL;
+}
+
+static inline void ila_release(struct ila_map *ila)
+{
+       kfree_rcu(ila, rcu);
+}
+
+static void ila_free_cb(void *ptr, void *arg)
+{
+       struct ila_map *ila = (struct ila_map *)ptr, *next;
+
+       /* Assume rcu_readlock held */
+       while (ila) {
+               next = rcu_access_pointer(ila->next);
+               ila_release(ila);
+               ila = next;
+       }
+}
+
+static int ila_xlat_addr(struct sk_buff *skb, int dir);
+
+static unsigned int
+ila_nf_input(void *priv,
+            struct sk_buff *skb,
+            const struct nf_hook_state *state)
+{
+       ila_xlat_addr(skb, ILA_DIR_IN);
+       return NF_ACCEPT;
+}
+
+static struct nf_hook_ops ila_nf_hook_ops[] __read_mostly = {
+       {
+               .hook = ila_nf_input,
+               .pf = NFPROTO_IPV6,
+               .hooknum = NF_INET_PRE_ROUTING,
+               .priority = -1,
+       },
+};
+
+static int ila_add_mapping(struct net *net, struct ila_xlat_params *p)
+{
+       struct ila_net *ilan = net_generic(net, ila_net_id);
+       struct ila_map *ila, *head;
+       spinlock_t *lock = ila_get_lock(ilan, p->identifier);
+       int err = 0, order;
+
+       if (!ilan->hooks_registered) {
+               /* We defer registering net hooks in the namespace until the
+                * first mapping is added.
+                */
+               err = nf_register_net_hooks(net, ila_nf_hook_ops,
+                                           ARRAY_SIZE(ila_nf_hook_ops));
+               if (err)
+                       return err;
+
+               ilan->hooks_registered = true;
+       }
+
+       ila = kzalloc(sizeof(*ila), GFP_KERNEL);
+       if (!ila)
+               return -ENOMEM;
+
+       ila->p = *p;
+
+       if (p->ip.locator_match) {
+               /* Precompute checksum difference for translation since we
+                * know both the old identifier and the new one.
+                */
+               ila->p.ip.csum_diff = compute_csum_diff8(
+                       (__be32 *)&p->ip.locator_match,
+                       (__be32 *)&p->ip.locator);
+       }
+
+       order = ila_order(ila);
+
+       spin_lock(lock);
+
+       head = rhashtable_lookup_fast(&ilan->rhash_table, &p->identifier,
+                                     rht_params);
+       if (!head) {
+               /* New entry for the rhash_table */
+               err = rhashtable_lookup_insert_fast(&ilan->rhash_table,
+                                                   &ila->node, rht_params);
+       } else {
+               struct ila_map *tila = head, *prev = NULL;
+
+               do {
+                       if (!ila_cmp_params(tila, p)) {
+                               err = -EEXIST;
+                               goto out;
+                       }
+
+                       if (order > ila_order(tila))
+                               break;
+
+                       prev = tila;
+                       tila = rcu_dereference_protected(tila->next,
+                               lockdep_is_held(lock));
+               } while (tila);
+
+               if (prev) {
+                       /* Insert in sub list of head */
+                       RCU_INIT_POINTER(ila->next, tila);
+                       rcu_assign_pointer(prev->next, ila);
+               } else {
+                       /* Make this ila new head */
+                       RCU_INIT_POINTER(ila->next, head);
+                       err = rhashtable_replace_fast(&ilan->rhash_table,
+                                                     &head->node,
+                                                     &ila->node, rht_params);
+                       if (err)
+                               goto out;
+               }
+       }
+
+out:
+       spin_unlock(lock);
+
+       if (err)
+               kfree(ila);
+
+       return err;
+}
+
+static int ila_del_mapping(struct net *net, struct ila_xlat_params *p)
+{
+       struct ila_net *ilan = net_generic(net, ila_net_id);
+       struct ila_map *ila, *head, *prev;
+       spinlock_t *lock = ila_get_lock(ilan, p->identifier);
+       int err = -ENOENT;
+
+       spin_lock(lock);
+
+       head = rhashtable_lookup_fast(&ilan->rhash_table,
+                                     &p->identifier, rht_params);
+       ila = head;
+
+       prev = NULL;
+
+       while (ila) {
+               if (ila_cmp_params(ila, p)) {
+                       prev = ila;
+                       ila = rcu_dereference_protected(ila->next,
+                                                       lockdep_is_held(lock));
+                       continue;
+               }
+
+               err = 0;
+
+               if (prev) {
+                       /* Not head, just delete from list */
+                       rcu_assign_pointer(prev->next, ila->next);
+               } else {
+                       /* It is the head. If there is something in the
+                        * sublist we need to make a new head.
+                        */
+                       head = rcu_dereference_protected(ila->next,
+                                                        lockdep_is_held(lock));
+                       if (head) {
+                               /* Put first entry in the sublist into the
+                                * table
+                                */
+                               err = rhashtable_replace_fast(
+                                       &ilan->rhash_table, &ila->node,
+                                       &head->node, rht_params);
+                               if (err)
+                                       goto out;
+                       } else {
+                               /* Entry no longer used */
+                               err = rhashtable_remove_fast(&ilan->rhash_table,
+                                                            &ila->node,
+                                                            rht_params);
+                       }
+               }
+
+               ila_release(ila);
+
+               break;
+       }
+
+out:
+       spin_unlock(lock);
+
+       return err;
+}
+
+static int ila_nl_cmd_add_mapping(struct sk_buff *skb, struct genl_info *info)
+{
+       struct net *net = genl_info_net(info);
+       struct ila_xlat_params p;
+       int err;
+
+       err = parse_nl_config(info, &p);
+       if (err)
+               return err;
+
+       return ila_add_mapping(net, &p);
+}
+
+static int ila_nl_cmd_del_mapping(struct sk_buff *skb, struct genl_info *info)
+{
+       struct net *net = genl_info_net(info);
+       struct ila_xlat_params p;
+       int err;
+
+       err = parse_nl_config(info, &p);
+       if (err)
+               return err;
+
+       ila_del_mapping(net, &p);
+
+       return 0;
+}
+
+static int ila_fill_info(struct ila_map *ila, struct sk_buff *msg)
+{
+       if (nla_put_u64(msg, ILA_ATTR_IDENTIFIER,
+                       (__force u64)ila->p.identifier) ||
+           nla_put_u64(msg, ILA_ATTR_LOCATOR,
+                       (__force u64)ila->p.ip.locator) ||
+           nla_put_u64(msg, ILA_ATTR_LOCATOR_MATCH,
+                       (__force u64)ila->p.ip.locator_match) ||
+           nla_put_s32(msg, ILA_ATTR_IFINDEX, ila->p.ifindex) ||
+           nla_put_u32(msg, ILA_ATTR_DIR, ila->p.dir))
+               return -1;
+
+       return 0;
+}
+
+static int ila_dump_info(struct ila_map *ila,
+                        u32 portid, u32 seq, u32 flags,
+                        struct sk_buff *skb, u8 cmd)
+{
+       void *hdr;
+
+       hdr = genlmsg_put(skb, portid, seq, &ila_nl_family, flags, cmd);
+       if (!hdr)
+               return -ENOMEM;
+
+       if (ila_fill_info(ila, skb) < 0)
+               goto nla_put_failure;
+
+       genlmsg_end(skb, hdr);
+       return 0;
+
+nla_put_failure:
+       genlmsg_cancel(skb, hdr);
+       return -EMSGSIZE;
+}
+
+static int ila_nl_cmd_get_mapping(struct sk_buff *skb, struct genl_info *info)
+{
+       struct net *net = genl_info_net(info);
+       struct ila_net *ilan = net_generic(net, ila_net_id);
+       struct sk_buff *msg;
+       struct ila_xlat_params p;
+       struct ila_map *ila;
+       int ret;
+
+       ret = parse_nl_config(info, &p);
+       if (ret)
+               return ret;
+
+       msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+       if (!msg)
+               return -ENOMEM;
+
+       rcu_read_lock();
+
+       ila = ila_lookup_by_params(&p, ilan);
+       if (ila) {
+               ret = ila_dump_info(ila,
+                                   info->snd_portid,
+                                   info->snd_seq, 0, msg,
+                                   info->genlhdr->cmd);
+       }
+
+       rcu_read_unlock();
+
+       if (ret < 0)
+               goto out_free;
+
+       return genlmsg_reply(msg, info);
+
+out_free:
+       nlmsg_free(msg);
+       return ret;
+}
+
+struct ila_dump_iter {
+       struct rhashtable_iter rhiter;
+};
+
+static int ila_nl_dump_start(struct netlink_callback *cb)
+{
+       struct net *net = sock_net(cb->skb->sk);
+       struct ila_net *ilan = net_generic(net, ila_net_id);
+       struct ila_dump_iter *iter = (struct ila_dump_iter *)cb->args;
+
+       return rhashtable_walk_init(&ilan->rhash_table, &iter->rhiter);
+}
+
+static int ila_nl_dump_done(struct netlink_callback *cb)
+{
+       struct ila_dump_iter *iter = (struct ila_dump_iter *)cb->args;
+
+       rhashtable_walk_exit(&iter->rhiter);
+
+       return 0;
+}
+
+static int ila_nl_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+       struct ila_dump_iter *iter = (struct ila_dump_iter *)cb->args;
+       struct rhashtable_iter *rhiter = &iter->rhiter;
+       struct ila_map *ila;
+       int ret;
+
+       ret = rhashtable_walk_start(rhiter);
+       if (ret && ret != -EAGAIN)
+               goto done;
+
+       for (;;) {
+               ila = rhashtable_walk_next(rhiter);
+
+               if (IS_ERR(ila)) {
+                       if (PTR_ERR(ila) == -EAGAIN)
+                               continue;
+                       ret = PTR_ERR(ila);
+                       goto done;
+               } else if (!ila) {
+                       break;
+               }
+
+               while (ila) {
+                       ret =  ila_dump_info(ila, NETLINK_CB(cb->skb).portid,
+                                            cb->nlh->nlmsg_seq, NLM_F_MULTI,
+                                            skb, ILA_CMD_GET);
+                       if (ret)
+                               goto done;
+
+                       ila = rcu_access_pointer(ila->next);
+               }
+       }
+
+       ret = skb->len;
+
+done:
+       rhashtable_walk_stop(rhiter);
+       return ret;
+}
+
+static const struct genl_ops ila_nl_ops[] = {
+       {
+               .cmd = ILA_CMD_ADD,
+               .doit = ila_nl_cmd_add_mapping,
+               .policy = ila_nl_policy,
+               .flags = GENL_ADMIN_PERM,
+       },
+       {
+               .cmd = ILA_CMD_DEL,
+               .doit = ila_nl_cmd_del_mapping,
+               .policy = ila_nl_policy,
+               .flags = GENL_ADMIN_PERM,
+       },
+       {
+               .cmd = ILA_CMD_GET,
+               .doit = ila_nl_cmd_get_mapping,
+               .start = ila_nl_dump_start,
+               .dumpit = ila_nl_dump,
+               .done = ila_nl_dump_done,
+               .policy = ila_nl_policy,
+       },
+};
+
+#define ILA_HASH_TABLE_SIZE 1024
+
+static __net_init int ila_init_net(struct net *net)
+{
+       int err;
+       struct ila_net *ilan = net_generic(net, ila_net_id);
+
+       err = alloc_ila_locks(ilan);
+       if (err)
+               return err;
+
+       rhashtable_init(&ilan->rhash_table, &rht_params);
+
+       return 0;
+}
+
+static __net_exit void ila_exit_net(struct net *net)
+{
+       struct ila_net *ilan = net_generic(net, ila_net_id);
+
+       rhashtable_free_and_destroy(&ilan->rhash_table, ila_free_cb, NULL);
+
+       kvfree(ilan->locks);
+
+       if (ilan->hooks_registered)
+               nf_unregister_net_hooks(net, ila_nf_hook_ops,
+                                       ARRAY_SIZE(ila_nf_hook_ops));
+}
+
+static struct pernet_operations ila_net_ops = {
+       .init = ila_init_net,
+       .exit = ila_exit_net,
+       .id   = &ila_net_id,
+       .size = sizeof(struct ila_net),
+};
+
+static int ila_xlat_addr(struct sk_buff *skb, int dir)
+{
+       struct ila_map *ila;
+       struct ipv6hdr *ip6h = ipv6_hdr(skb);
+       struct net *net = dev_net(skb->dev);
+       struct ila_net *ilan = net_generic(net, ila_net_id);
+       __be64 identifier, locator_match;
+       size_t nhoff;
+
+       /* Assumes skb contains a valid IPv6 header that is pulled */
+
+       identifier = *(__be64 *)&ip6h->daddr.in6_u.u6_addr8[8];
+       locator_match = *(__be64 *)&ip6h->daddr.in6_u.u6_addr8[0];
+       nhoff = sizeof(struct ipv6hdr);
+
+       rcu_read_lock();
+
+       ila = ila_lookup_wildcards(identifier, locator_match,
+                                  skb->dev->ifindex, dir, ilan);
+       if (ila)
+               update_ipv6_locator(skb, &ila->p.ip);
+
+       rcu_read_unlock();
+
+       return 0;
+}
+
+int ila_xlat_incoming(struct sk_buff *skb)
+{
+       return ila_xlat_addr(skb, ILA_DIR_IN);
+}
+EXPORT_SYMBOL(ila_xlat_incoming);
+
+int ila_xlat_outgoing(struct sk_buff *skb)
+{
+       return ila_xlat_addr(skb, ILA_DIR_OUT);
+}
+EXPORT_SYMBOL(ila_xlat_outgoing);
+
+int ila_xlat_init(void)
+{
+       int ret;
+
+       ret = register_pernet_device(&ila_net_ops);
+       if (ret)
+               goto exit;
+
+       ret = genl_register_family_with_ops(&ila_nl_family,
+                                           ila_nl_ops);
+       if (ret < 0)
+               goto unregister;
+
+       return 0;
+
+unregister:
+       unregister_pernet_device(&ila_net_ops);
+exit:
+       return ret;
+}
+
+void ila_xlat_fini(void)
+{
+       genl_unregister_family(&ila_nl_family);
+       unregister_pernet_device(&ila_net_ops);
+}
index e6a7bd15b9b7115ce6a61e3157c637cdaf86ec18..2f748452b4aaa5d74641f11fa988cb8740e17051 100644 (file)
@@ -1322,7 +1322,7 @@ emsgsize:
            headersize == sizeof(struct ipv6hdr) &&
            length < mtu - headersize &&
            !(flags & MSG_MORE) &&
-           rt->dst.dev->features & NETIF_F_V6_CSUM)
+           rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
                csummode = CHECKSUM_PARTIAL;
 
        if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) {
index 238e70c3f7b7b432f017c592a6cbaf8258629324..6ce3099288416b3753d6cab8ee70f1a0b97ab94f 100644 (file)
@@ -136,7 +136,8 @@ static void nf_nat_ipv6_csum_recalc(struct sk_buff *skb,
 
        if (skb->ip_summed != CHECKSUM_PARTIAL) {
                if (!(rt->rt6i_flags & RTF_LOCAL) &&
-                   (!skb->dev || skb->dev->features & NETIF_F_V6_CSUM)) {
+                   (!skb->dev || skb->dev->features &
+                    (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))) {
                        skb->ip_summed = CHECKSUM_PARTIAL;
                        skb->csum_start = skb_headroom(skb) +
                                          skb_network_offset(skb) +
index c16e3fbf6854754c0e388285312ccc2244bc201b..5382c2662fa2b6a63e21e9bbd129d6d670ba24e1 100644 (file)
@@ -1890,6 +1890,7 @@ struct proto tcpv6_prot = {
        .proto_cgroup           = tcp_proto_cgroup,
 #endif
        .clear_sk               = tcp_v6_clear_sk,
+       .diag_destroy           = tcp_abort,
 };
 
 static const struct inet6_protocol tcpv6_protocol = {
index 010ddeec135f50950d2f9a356bc7211c351dde9a..d952d67f904d1124ed0c5adfa20a51f82207181c 100644 (file)
@@ -169,7 +169,7 @@ sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
        /* Only update csum if we really have to */
        if (sctph->dest != cp->dport || payload_csum ||
            (skb->ip_summed == CHECKSUM_PARTIAL &&
-            !(skb_dst(skb)->dev->features & NETIF_F_SCTP_CSUM))) {
+            !(skb_dst(skb)->dev->features & NETIF_F_SCTP_CRC))) {
                sctph->dest = cp->dport;
                sctp_nat_csum(skb, sctph, sctphoff);
        } else if (skb->ip_summed != CHECKSUM_PARTIAL) {
index 59651af8cc2705b39f3ad1ea71ab0b161668af02..81dc1bb6e0168ed17cd2d60e6dc3a394f0f34e76 100644 (file)
@@ -2915,6 +2915,7 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
 
        cb = &nlk->cb;
        memset(cb, 0, sizeof(*cb));
+       cb->start = control->start;
        cb->dump = control->dump;
        cb->done = control->done;
        cb->nlh = nlh;
@@ -2927,6 +2928,9 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
 
        mutex_unlock(nlk->cb_mutex);
 
+       if (cb->start)
+               cb->start(cb);
+
        ret = netlink_dump(sk);
        sock_put(sk);
 
index bc0e504f33a68ee92897cb3c45241d4d3348deba..8e63662c6fb0fc236da90883ff34b79d05308fcc 100644 (file)
@@ -513,6 +513,20 @@ void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq,
 }
 EXPORT_SYMBOL(genlmsg_put);
 
+static int genl_lock_start(struct netlink_callback *cb)
+{
+       /* our ops are always const - netlink API doesn't propagate that */
+       const struct genl_ops *ops = cb->data;
+       int rc = 0;
+
+       if (ops->start) {
+               genl_lock();
+               rc = ops->start(cb);
+               genl_unlock();
+       }
+       return rc;
+}
+
 static int genl_lock_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
 {
        /* our ops are always const - netlink API doesn't propagate that */
@@ -577,6 +591,7 @@ static int genl_family_rcv_msg(struct genl_family *family,
                                .module = family->module,
                                /* we have const, but the netlink API doesn't */
                                .data = (void *)ops,
+                               .start = genl_lock_start,
                                .dump = genl_lock_dumpit,
                                .done = genl_lock_done,
                        };
@@ -588,6 +603,7 @@ static int genl_family_rcv_msg(struct genl_family *family,
                } else {
                        struct netlink_dump_control c = {
                                .module = family->module,
+                               .start = ops->start,
                                .dump = ops->dumpit,
                                .done = ops->done,
                        };
index abe7c2db24120a13992131e2be9a19c70a297de0..9d610eddd19ef2320fc34ae9d91e7426ae5f50f9 100644 (file)
@@ -534,7 +534,7 @@ int sctp_packet_transmit(struct sctp_packet *packet)
         * by CRC32-C as described in <draft-ietf-tsvwg-sctpcsum-02.txt>.
         */
        if (!sctp_checksum_disable) {
-               if (!(dst->dev->features & NETIF_F_SCTP_CSUM) ||
+               if (!(dst->dev->features & NETIF_F_SCTP_CRC) ||
                    (dst_xfrm(dst) != NULL) || packet->ipfragok) {
                        sh->checksum = sctp_compute_cksum(nskb, 0);
                } else {
index 3d9ea9a48289af7324d2c96d82938dd978634ce5..010aced44b6b23d0e57799674c6fbcc72af331e5 100644 (file)
@@ -1223,6 +1223,9 @@ static int __net_init sctp_defaults_init(struct net *net)
        /* Max.Burst                - 4 */
        net->sctp.max_burst                     = SCTP_DEFAULT_MAX_BURST;
 
+       /* Enable pf state by default */
+       net->sctp.pf_enable = 1;
+
        /* Association.Max.Retrans  - 10 attempts
         * Path.Max.Retrans         - 5  attempts (per destination address)
         * Max.Init.Retransmits     - 8  attempts
@@ -1419,7 +1422,7 @@ static __init int sctp_init(void)
                if ((sctp_assoc_hashsize > (64 * 1024)) && order > 0)
                        continue;
                sctp_assoc_hashtable = (struct sctp_hashbucket *)
-                       __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, order);
+                       __get_free_pages(GFP_KERNEL | __GFP_NOWARN, order);
        } while (!sctp_assoc_hashtable && --order > 0);
        if (!sctp_assoc_hashtable) {
                pr_err("Failed association hash alloc\n");
@@ -1452,7 +1455,7 @@ static __init int sctp_init(void)
                if ((sctp_port_hashsize > (64 * 1024)) && order > 0)
                        continue;
                sctp_port_hashtable = (struct sctp_bind_hashbucket *)
-                       __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, order);
+                       __get_free_pages(GFP_KERNEL | __GFP_NOWARN, order);
        } while (!sctp_port_hashtable && --order > 0);
        if (!sctp_port_hashtable) {
                pr_err("Failed bind hash alloc\n");
index 6098d4c42fa91287d3cde36ac05d860f76d4fe32..05cd16400e0b8b2d474ed6af310053e297e474a8 100644 (file)
@@ -477,6 +477,8 @@ static void sctp_do_8_2_transport_strike(sctp_cmd_seq_t *commands,
                                         struct sctp_transport *transport,
                                         int is_hb)
 {
+       struct net *net = sock_net(asoc->base.sk);
+
        /* The check for association's overall error counter exceeding the
         * threshold is done in the state function.
         */
@@ -503,7 +505,8 @@ static void sctp_do_8_2_transport_strike(sctp_cmd_seq_t *commands,
         * is SCTP_ACTIVE, then mark this transport as Partially Failed,
         * see SCTP Quick Failover Draft, section 5.1
         */
-       if ((transport->state == SCTP_ACTIVE) &&
+       if (net->sctp.pf_enable &&
+          (transport->state == SCTP_ACTIVE) &&
           (asoc->pf_retrans < transport->pathmaxrxt) &&
           (transport->error_count > asoc->pf_retrans)) {
 
index 26d50c565f54223ec28d29358932227b4b22acd1..ccbfc93fb8fecfc41cacfccfd98e61a3e0871375 100644 (file)
@@ -308,6 +308,13 @@ static struct ctl_table sctp_net_table[] = {
                .extra1         = &max_autoclose_min,
                .extra2         = &max_autoclose_max,
        },
+       {
+               .procname       = "pf_enable",
+               .data           = &init_net.sctp.pf_enable,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec,
+       },
 
        { /* sentinel */ }
 };
index f34e535e93bdf780d7093c84e32a6288029d5442..df790d3385a22f4d89af1ba3d01212102dec765a 100644 (file)
@@ -723,6 +723,7 @@ static int switchdev_port_vlan_fill(struct sk_buff *skb, struct net_device *dev,
                                    u32 filter_mask)
 {
        struct switchdev_vlan_dump dump = {
+               .vlan.obj.orig_dev = dev,
                .vlan.obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
                .skb = skb,
                .filter_mask = filter_mask,
@@ -757,6 +758,7 @@ int switchdev_port_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
                                  int nlflags)
 {
        struct switchdev_attr attr = {
+               .orig_dev = dev,
                .id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS,
        };
        u16 mode = BRIDGE_MODE_UNDEF;
@@ -778,6 +780,7 @@ static int switchdev_port_br_setflag(struct net_device *dev,
                                     unsigned long brport_flag)
 {
        struct switchdev_attr attr = {
+               .orig_dev = dev,
                .id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS,
        };
        u8 flag = nla_get_u8(nlattr);
@@ -853,6 +856,7 @@ static int switchdev_port_br_afspec(struct net_device *dev,
        struct nlattr *attr;
        struct bridge_vlan_info *vinfo;
        struct switchdev_obj_port_vlan vlan = {
+               .obj.orig_dev = dev,
                .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
        };
        int rem;
@@ -975,6 +979,7 @@ int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
                           u16 vid, u16 nlm_flags)
 {
        struct switchdev_obj_port_fdb fdb = {
+               .obj.orig_dev = dev,
                .obj.id = SWITCHDEV_OBJ_ID_PORT_FDB,
                .vid = vid,
        };
@@ -1000,6 +1005,7 @@ int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
                           u16 vid)
 {
        struct switchdev_obj_port_fdb fdb = {
+               .obj.orig_dev = dev,
                .obj.id = SWITCHDEV_OBJ_ID_PORT_FDB,
                .vid = vid,
        };
@@ -1077,6 +1083,7 @@ int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
                            struct net_device *filter_dev, int idx)
 {
        struct switchdev_fdb_dump dump = {
+               .fdb.obj.orig_dev = dev,
                .fdb.obj.id = SWITCHDEV_OBJ_ID_PORT_FDB,
                .dev = dev,
                .skb = skb,
@@ -1135,6 +1142,7 @@ static struct net_device *switchdev_get_dev_by_nhs(struct fib_info *fi)
                if (!dev)
                        return NULL;
 
+               attr.orig_dev = dev;
                if (switchdev_port_attr_get(dev, &attr))
                        return NULL;
 
@@ -1194,6 +1202,7 @@ int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi,
        if (!dev)
                return 0;
 
+       ipv4_fib.obj.orig_dev = dev;
        err = switchdev_port_obj_add(dev, &ipv4_fib.obj);
        if (!err)
                fi->fib_flags |= RTNH_F_OFFLOAD;
@@ -1238,6 +1247,7 @@ int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi,
        if (!dev)
                return 0;
 
+       ipv4_fib.obj.orig_dev = dev;
        err = switchdev_port_obj_del(dev, &ipv4_fib.obj);
        if (!err)
                fi->fib_flags &= ~RTNH_F_OFFLOAD;
@@ -1270,10 +1280,12 @@ static bool switchdev_port_same_parent_id(struct net_device *a,
                                          struct net_device *b)
 {
        struct switchdev_attr a_attr = {
+               .orig_dev = a,
                .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
                .flags = SWITCHDEV_F_NO_RECURSE,
        };
        struct switchdev_attr b_attr = {
+               .orig_dev = b,
                .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
                .flags = SWITCHDEV_F_NO_RECURSE,
        };